xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision 2983445f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84 
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89 
90 #include <sys/buf2.h>
91 
92 #include <sys/thread2.h>
93 
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99 
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)	kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105 
106 #define MAX_COMMIT_COUNT	(1024 * 1024)
107 
108 #define NUM_HEURISTIC		1017
109 #define NHUSE_INIT		64
110 #define NHUSE_INC		16
111 #define NHUSE_MAX		2048
112 
113 static struct nfsheur {
114     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
115     off_t nh_nextr;		/* next offset for sequential detection */
116     int nh_use;			/* use count for selection */
117     int nh_seqcount;		/* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119 
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121 		      NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124 		      NFCHR, NFNON };
125 
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128 
129 static struct timespec	nfsver;
130 
131 SYSCTL_DECL(_vfs_nfs);
132 
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
135     "Enable unstable and fast writes");
136 static int nfs_commit_blks;
137 static int nfs_commit_miss;
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
139     "Number of committed blocks");
140 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
141     "Number of nfs blocks committed from dirty buffers");
142 
143 static int nfsrv_access (struct mount *, struct vnode *, int,
144 			struct ucred *, int, struct thread *, int);
145 static void nfsrvw_coalesce (struct nfsrv_descript *,
146 		struct nfsrv_descript *);
147 
148 /*
149  * nfs v3 access service
150  */
151 int
152 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
153 	      struct thread *td, struct mbuf **mrq)
154 {
155 	struct sockaddr *nam = nfsd->nd_nam;
156 	struct ucred *cred = &nfsd->nd_cr;
157 	struct vnode *vp = NULL;
158 	struct mount *mp = NULL;
159 	nfsfh_t nfh;
160 	fhandle_t *fhp;
161 	int error = 0, rdonly, getret;
162 	struct vattr vattr, *vap = &vattr;
163 	u_long testmode, nfsmode;
164 	struct nfsm_info info;
165 	u_int32_t *tl;
166 
167 	info.dpos = nfsd->nd_dpos;
168 	info.md = nfsd->nd_md;
169 	info.mrep = nfsd->nd_mrep;
170 	info.mreq = NULL;
171 
172 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
173 	fhp = &nfh.fh_generic;
174 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
175 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
176 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
177 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
178 	if (error) {
179 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
180 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
181 		error = 0;
182 		goto nfsmout;
183 	}
184 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
185 	if ((nfsmode & NFSV3ACCESS_READ) &&
186 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
187 		nfsmode &= ~NFSV3ACCESS_READ;
188 	if (vp->v_type == VDIR)
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
190 			NFSV3ACCESS_DELETE);
191 	else
192 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
193 	if ((nfsmode & testmode) &&
194 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
195 		nfsmode &= ~testmode;
196 	if (vp->v_type == VDIR)
197 		testmode = NFSV3ACCESS_LOOKUP;
198 	else
199 		testmode = NFSV3ACCESS_EXECUTE;
200 	if ((nfsmode & testmode) &&
201 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
202 		nfsmode &= ~testmode;
203 	getret = VOP_GETATTR(vp, vap);
204 	vput(vp);
205 	vp = NULL;
206 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
207 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
208 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
209 	tl = nfsm_build(&info, NFSX_UNSIGNED);
210 	*tl = txdr_unsigned(nfsmode);
211 nfsmout:
212 	*mrq = info.mreq;
213 	if (vp)
214 		vput(vp);
215 	return(error);
216 }
217 
218 /*
219  * nfs getattr service
220  */
221 int
222 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
223 	      struct thread *td, struct mbuf **mrq)
224 {
225 	struct sockaddr *nam = nfsd->nd_nam;
226 	struct ucred *cred = &nfsd->nd_cr;
227 	struct nfs_fattr *fp;
228 	struct vattr va;
229 	struct vattr *vap = &va;
230 	struct vnode *vp = NULL;
231 	struct mount *mp = NULL;
232 	nfsfh_t nfh;
233 	fhandle_t *fhp;
234 	int error = 0, rdonly;
235 	struct nfsm_info info;
236 
237 	info.mrep = nfsd->nd_mrep;
238 	info.md = nfsd->nd_md;
239 	info.dpos = nfsd->nd_dpos;
240 	info.mreq = NULL;
241 
242 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
243 	fhp = &nfh.fh_generic;
244 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
245 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
246 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
247 	if (error) {
248 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
249 		error = 0;
250 		goto nfsmout;
251 	}
252 	error = VOP_GETATTR(vp, vap);
253 	vput(vp);
254 	vp = NULL;
255 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
256 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
257 	if (error) {
258 		error = 0;
259 		goto nfsmout;
260 	}
261 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
262 	nfsm_srvfattr(nfsd, vap, fp);
263 	/* fall through */
264 
265 nfsmout:
266 	*mrq = info.mreq;
267 	if (vp)
268 		vput(vp);
269 	return(error);
270 }
271 
272 /*
273  * nfs setattr service
274  */
275 int
276 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
277 	      struct thread *td, struct mbuf **mrq)
278 {
279 	struct sockaddr *nam = nfsd->nd_nam;
280 	struct ucred *cred = &nfsd->nd_cr;
281 	struct vattr va, preat;
282 	struct vattr *vap = &va;
283 	struct nfsv2_sattr *sp;
284 	struct nfs_fattr *fp;
285 	struct vnode *vp = NULL;
286 	struct mount *mp = NULL;
287 	nfsfh_t nfh;
288 	fhandle_t *fhp;
289 	u_int32_t *tl;
290 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
291 	int gcheck = 0;
292 	struct timespec guard;
293 	struct nfsm_info info;
294 
295 	info.mrep = nfsd->nd_mrep;
296 	info.mreq = NULL;
297 	info.md = nfsd->nd_md;
298 	info.dpos = nfsd->nd_dpos;
299 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
300 
301 	guard.tv_sec = 0;	/* fix compiler warning */
302 	guard.tv_nsec = 0;
303 
304 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
305 	fhp = &nfh.fh_generic;
306 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
307 	VATTR_NULL(vap);
308 	if (info.v3) {
309 		ERROROUT(nfsm_srvsattr(&info, vap));
310 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
311 		gcheck = fxdr_unsigned(int, *tl);
312 		if (gcheck) {
313 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
314 			fxdr_nfsv3time(tl, &guard);
315 		}
316 	} else {
317 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
318 		/*
319 		 * Nah nah nah nah na nah
320 		 * There is a bug in the Sun client that puts 0xffff in the mode
321 		 * field of sattr when it should put in 0xffffffff. The u_short
322 		 * doesn't sign extend.
323 		 * --> check the low order 2 bytes for 0xffff
324 		 */
325 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
326 			vap->va_mode = nfstov_mode(sp->sa_mode);
327 		if (sp->sa_uid != nfs_xdrneg1)
328 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
329 		if (sp->sa_gid != nfs_xdrneg1)
330 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
331 		if (sp->sa_size != nfs_xdrneg1)
332 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
333 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
334 #ifdef notyet
335 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
336 #else
337 			vap->va_atime.tv_sec =
338 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
339 			vap->va_atime.tv_nsec = 0;
340 #endif
341 		}
342 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
343 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
344 
345 	}
346 
347 	/*
348 	 * Now that we have all the fields, lets do it.
349 	 */
350 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
351 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
352 	if (error) {
353 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
354 				      2 * NFSX_UNSIGNED, &error));
355 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
356 				 postat_ret, vap);
357 		error = 0;
358 		goto nfsmout;
359 	}
360 
361 	/*
362 	 * vp now an active resource, pay careful attention to cleanup
363 	 */
364 
365 	if (info.v3) {
366 		error = preat_ret = VOP_GETATTR(vp, &preat);
367 		if (!error && gcheck &&
368 			(preat.va_ctime.tv_sec != guard.tv_sec ||
369 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
370 			error = NFSERR_NOT_SYNC;
371 		if (error) {
372 			vput(vp);
373 			vp = NULL;
374 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
375 					      NFSX_WCCDATA(info.v3), &error));
376 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
377 					 postat_ret, vap);
378 			error = 0;
379 			goto nfsmout;
380 		}
381 	}
382 
383 	/*
384 	 * If the size is being changed write acces is required, otherwise
385 	 * just check for a read only file system.
386 	 */
387 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
388 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
389 			error = EROFS;
390 			goto out;
391 		}
392 	} else {
393 		if (vp->v_type == VDIR) {
394 			error = EISDIR;
395 			goto out;
396 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
397 			    td, 0)) != 0){
398 			goto out;
399 		}
400 	}
401 	error = VOP_SETATTR(vp, vap, cred);
402 	postat_ret = VOP_GETATTR(vp, vap);
403 	if (!error)
404 		error = postat_ret;
405 out:
406 	vput(vp);
407 	vp = NULL;
408 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
409 		   NFSX_WCCORFATTR(info.v3), &error));
410 	if (info.v3) {
411 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
412 				 postat_ret, vap);
413 		error = 0;
414 		goto nfsmout;
415 	} else {
416 		fp = nfsm_build(&info, NFSX_V2FATTR);
417 		nfsm_srvfattr(nfsd, vap, fp);
418 	}
419 	/* fall through */
420 
421 nfsmout:
422 	*mrq = info.mreq;
423 	if (vp)
424 		vput(vp);
425 	return(error);
426 }
427 
428 /*
429  * nfs lookup rpc
430  */
431 int
432 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
433 	     struct thread *td, struct mbuf **mrq)
434 {
435 	struct sockaddr *nam = nfsd->nd_nam;
436 	struct ucred *cred = &nfsd->nd_cr;
437 	struct nfs_fattr *fp;
438 	struct nlookupdata nd;
439 	struct vnode *vp;
440 	struct vnode *dirp;
441 	struct nchandle nch;
442 	nfsfh_t nfh;
443 	fhandle_t *fhp;
444 	int error = 0, len, dirattr_ret = 1;
445 	int pubflag;
446 	struct vattr va, dirattr, *vap = &va;
447 	struct nfsm_info info;
448 
449 	info.mrep = nfsd->nd_mrep;
450 	info.mreq = NULL;
451 	info.md = nfsd->nd_md;
452 	info.dpos = nfsd->nd_dpos;
453 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
454 
455 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
456 	nlookup_zero(&nd);
457 	dirp = NULL;
458 	vp = NULL;
459 
460 	fhp = &nfh.fh_generic;
461 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
462 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
463 
464 	pubflag = nfs_ispublicfh(fhp);
465 
466 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
467 		fhp, len, slp, nam, &info.md, &info.dpos,
468 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
469 
470 	/*
471 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
472 	 * structure in case macros jump to nfsmout.
473 	 */
474 
475 	if (error) {
476 		if (dirp) {
477 			if (info.v3)
478 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
479 			vrele(dirp);
480 			dirp = NULL;
481 		}
482 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
483 				      NFSX_POSTOPATTR(info.v3), &error));
484 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
485 		error = 0;
486 		goto nfsmout;
487 	}
488 
489 	/*
490 	 * Locate index file for public filehandle
491 	 *
492 	 * error is 0 on entry and 0 on exit from this block.
493 	 */
494 
495 	if (pubflag) {
496 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
497 			/*
498 			 * Setup call to lookup() to see if we can find
499 			 * the index file. Arguably, this doesn't belong
500 			 * in a kernel.. Ugh.  If an error occurs, do not
501 			 * try to install an index file and then clear the
502 			 * error.
503 			 *
504 			 * When we replace nd with ind and redirect ndp,
505 			 * maintenance of ni_startdir and ni_vp shift to
506 			 * ind and we have to clean them up in the old nd.
507 			 * However, the cnd resource continues to be maintained
508 			 * via the original nd.  Confused?  You aren't alone!
509 			 */
510 			vn_unlock(vp);
511 			cache_copy(&nd.nl_nch, &nch);
512 			nlookup_done(&nd);
513 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
514 						UIO_SYSSPACE, 0, cred, &nch);
515 			cache_drop(&nch);
516 			if (error == 0)
517 				error = nlookup(&nd);
518 
519 			if (error == 0) {
520 				/*
521 				 * Found an index file. Get rid of
522 				 * the old references.  transfer vp and
523 				 * load up the new vp.  Fortunately we do
524 				 * not have to deal with dvp, that would be
525 				 * a huge mess.
526 				 */
527 				if (dirp)
528 					vrele(dirp);
529 				dirp = vp;
530 				vp = NULL;
531 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
532 							LK_EXCLUSIVE, &vp);
533 				KKASSERT(error == 0);
534 			}
535 			error = 0;
536 		}
537 		/*
538 		 * If the public filehandle was used, check that this lookup
539 		 * didn't result in a filehandle outside the publicly exported
540 		 * filesystem.  We clear the poor vp here to avoid lockups due
541 		 * to NFS I/O.
542 		 */
543 
544 		if (vp->v_mount != nfs_pub.np_mount) {
545 			vput(vp);
546 			vp = NULL;
547 			error = EPERM;
548 		}
549 	}
550 
551 	if (dirp) {
552 		if (info.v3)
553 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
554 		vrele(dirp);
555 		dirp = NULL;
556 	}
557 
558 	/*
559 	 * Resources at this point:
560 	 *	ndp->ni_vp	may not be NULL
561 	 *
562 	 */
563 
564 	if (error) {
565 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
566 				      NFSX_POSTOPATTR(info.v3), &error));
567 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
568 		error = 0;
569 		goto nfsmout;
570 	}
571 
572 	/*
573 	 * Clear out some resources prior to potentially blocking.  This
574 	 * is not as critical as ni_dvp resources in other routines, but
575 	 * it helps.
576 	 */
577 	nlookup_done(&nd);
578 
579 	/*
580 	 * Get underlying attribute, then release remaining resources ( for
581 	 * the same potential blocking reason ) and reply.
582 	 */
583 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
584 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
585 	if (!error)
586 		error = VOP_GETATTR(vp, vap);
587 
588 	vput(vp);
589 	vp = NULL;
590 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
591 			      NFSX_SRVFH(info.v3) +
592 			      NFSX_POSTOPORFATTR(info.v3) +
593 			      NFSX_POSTOPATTR(info.v3),
594 			      &error));
595 	if (error) {
596 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
597 		error = 0;
598 		goto nfsmout;
599 	}
600 	nfsm_srvfhtom(&info, fhp);
601 	if (info.v3) {
602 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
603 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
604 	} else {
605 		fp = nfsm_build(&info, NFSX_V2FATTR);
606 		nfsm_srvfattr(nfsd, vap, fp);
607 	}
608 
609 nfsmout:
610 	*mrq = info.mreq;
611 	if (dirp)
612 		vrele(dirp);
613 	nlookup_done(&nd);		/* may be called twice */
614 	if (vp)
615 		vput(vp);
616 	return (error);
617 }
618 
619 /*
620  * nfs readlink service
621  */
622 int
623 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
624 	       struct thread *td, struct mbuf **mrq)
625 {
626 	struct sockaddr *nam = nfsd->nd_nam;
627 	struct ucred *cred = &nfsd->nd_cr;
628 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
629 	struct iovec *ivp = iv;
630 	u_int32_t *tl;
631 	int error = 0, rdonly, i, tlen, len, getret;
632 	struct mbuf *mp1, *mp2, *mp3;
633 	struct vnode *vp = NULL;
634 	struct mount *mp = NULL;
635 	struct vattr attr;
636 	nfsfh_t nfh;
637 	fhandle_t *fhp;
638 	struct uio io, *uiop = &io;
639 	struct nfsm_info info;
640 
641 	info.mrep = nfsd->nd_mrep;
642 	info.mreq = NULL;
643 	info.md = nfsd->nd_md;
644 	info.dpos = nfsd->nd_dpos;
645 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
646 
647 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
648 #ifndef nolint
649 	mp2 = NULL;
650 #endif
651 	mp3 = NULL;
652 	fhp = &nfh.fh_generic;
653 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
654 	len = 0;
655 	i = 0;
656 	while (len < NFS_MAXPATHLEN) {
657 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
658 		mp1->m_len = MCLBYTES;
659 		if (len == 0)
660 			mp3 = mp2 = mp1;
661 		else {
662 			mp2->m_next = mp1;
663 			mp2 = mp1;
664 		}
665 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
666 			mp1->m_len = NFS_MAXPATHLEN-len;
667 			len = NFS_MAXPATHLEN;
668 		} else
669 			len += mp1->m_len;
670 		ivp->iov_base = mtod(mp1, caddr_t);
671 		ivp->iov_len = mp1->m_len;
672 		i++;
673 		ivp++;
674 	}
675 	uiop->uio_iov = iv;
676 	uiop->uio_iovcnt = i;
677 	uiop->uio_offset = 0;
678 	uiop->uio_resid = len;
679 	uiop->uio_rw = UIO_READ;
680 	uiop->uio_segflg = UIO_SYSSPACE;
681 	uiop->uio_td = NULL;
682 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
683 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
684 	if (error) {
685 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
686 				      2 * NFSX_UNSIGNED, &error));
687 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
688 		error = 0;
689 		goto nfsmout;
690 	}
691 	if (vp->v_type != VLNK) {
692 		if (info.v3)
693 			error = EINVAL;
694 		else
695 			error = ENXIO;
696 		goto out;
697 	}
698 	error = VOP_READLINK(vp, uiop, cred);
699 out:
700 	getret = VOP_GETATTR(vp, &attr);
701 	vput(vp);
702 	vp = NULL;
703 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
704 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
705 			     &error));
706 	if (info.v3) {
707 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
708 		if (error) {
709 			error = 0;
710 			goto nfsmout;
711 		}
712 	}
713 	if (uiop->uio_resid > 0) {
714 		len -= uiop->uio_resid;
715 		tlen = nfsm_rndup(len);
716 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
717 	}
718 	tl = nfsm_build(&info, NFSX_UNSIGNED);
719 	*tl = txdr_unsigned(len);
720 	info.mb->m_next = mp3;
721 	mp3 = NULL;
722 nfsmout:
723 	*mrq = info.mreq;
724 	if (mp3)
725 		m_freem(mp3);
726 	if (vp)
727 		vput(vp);
728 	return(error);
729 }
730 
731 /*
732  * nfs read service
733  */
734 int
735 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
736 	   struct thread *td, struct mbuf **mrq)
737 {
738 	struct nfsm_info info;
739 	struct sockaddr *nam = nfsd->nd_nam;
740 	struct ucred *cred = &nfsd->nd_cr;
741 	struct iovec *iv;
742 	struct iovec *iv2;
743 	struct mbuf *m;
744 	struct nfs_fattr *fp;
745 	u_int32_t *tl;
746 	int i;
747 	int reqlen;
748 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
749 	struct mbuf *m2;
750 	struct vnode *vp = NULL;
751 	struct mount *mp = NULL;
752 	nfsfh_t nfh;
753 	fhandle_t *fhp;
754 	struct uio io, *uiop = &io;
755 	struct vattr va, *vap = &va;
756 	struct nfsheur *nh;
757 	off_t off;
758 	int ioflag = 0;
759 
760 	info.mrep = nfsd->nd_mrep;
761 	info.mreq = NULL;
762 	info.md = nfsd->nd_md;
763 	info.dpos = nfsd->nd_dpos;
764 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
765 
766 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
767 	fhp = &nfh.fh_generic;
768 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
769 	if (info.v3) {
770 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
771 		off = fxdr_hyper(tl);
772 	} else {
773 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
774 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
775 	}
776 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
777 					    NFS_SRVMAXDATA(nfsd), &error));
778 
779 	/*
780 	 * Reference vp.  If an error occurs, vp will be invalid, but we
781 	 * have to NULL it just in case.  The macros might goto nfsmout
782 	 * as well.
783 	 */
784 
785 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
786 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
787 	if (error) {
788 		vp = NULL;
789 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
790 				      2 * NFSX_UNSIGNED, &error));
791 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
792 		error = 0;
793 		goto nfsmout;
794 	}
795 
796 	if (vp->v_type != VREG) {
797 		if (info.v3)
798 			error = EINVAL;
799 		else
800 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
801 	}
802 	if (!error) {
803 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
804 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
805 	}
806 	getret = VOP_GETATTR(vp, vap);
807 	if (!error)
808 		error = getret;
809 	if (error) {
810 		vput(vp);
811 		vp = NULL;
812 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
813 				      NFSX_POSTOPATTR(info.v3), &error));
814 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
815 		error = 0;
816 		goto nfsmout;
817 	}
818 
819 	/*
820 	 * Calculate byte count to read
821 	 */
822 
823 	if (off >= vap->va_size)
824 		cnt = 0;
825 	else if ((off + reqlen) > vap->va_size)
826 		cnt = vap->va_size - off;
827 	else
828 		cnt = reqlen;
829 
830 	/*
831 	 * Calculate seqcount for heuristic
832 	 */
833 
834 	{
835 		int hi;
836 		int try = 32;
837 
838 		/*
839 		 * Locate best candidate
840 		 */
841 
842 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
843 		nh = &nfsheur[hi];
844 
845 		while (try--) {
846 			if (nfsheur[hi].nh_vp == vp) {
847 				nh = &nfsheur[hi];
848 				break;
849 			}
850 			if (nfsheur[hi].nh_use > 0)
851 				--nfsheur[hi].nh_use;
852 			hi = (hi + 1) % NUM_HEURISTIC;
853 			if (nfsheur[hi].nh_use < nh->nh_use)
854 				nh = &nfsheur[hi];
855 		}
856 
857 		if (nh->nh_vp != vp) {
858 			nh->nh_vp = vp;
859 			nh->nh_nextr = off;
860 			nh->nh_use = NHUSE_INIT;
861 			if (off == 0)
862 				nh->nh_seqcount = 4;
863 			else
864 				nh->nh_seqcount = 1;
865 		}
866 
867 		/*
868 		 * Calculate heuristic
869 		 */
870 
871 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
872 			if (++nh->nh_seqcount > IO_SEQMAX)
873 				nh->nh_seqcount = IO_SEQMAX;
874 		} else if (nh->nh_seqcount > 1) {
875 			nh->nh_seqcount = 1;
876 		} else {
877 			nh->nh_seqcount = 0;
878 		}
879 		nh->nh_use += NHUSE_INC;
880 		if (nh->nh_use > NHUSE_MAX)
881 			nh->nh_use = NHUSE_MAX;
882 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
883         }
884 
885 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
886 			      NFSX_POSTOPORFATTR(info.v3) +
887 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
888 			      &error));
889 	if (info.v3) {
890 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
891 		*tl++ = nfs_true;
892 		fp = (struct nfs_fattr *)tl;
893 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
894 	} else {
895 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
896 		fp = (struct nfs_fattr *)tl;
897 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
898 	}
899 	len = left = nfsm_rndup(cnt);
900 	if (cnt > 0) {
901 		/*
902 		 * Generate the mbuf list with the uio_iov ref. to it.
903 		 */
904 		i = 0;
905 		m = m2 = info.mb;
906 		while (left > 0) {
907 			siz = min(M_TRAILINGSPACE(m), left);
908 			if (siz > 0) {
909 				left -= siz;
910 				i++;
911 			}
912 			if (left > 0) {
913 				m = m_getcl(MB_WAIT, MT_DATA, 0);
914 				m->m_len = 0;
915 				m2->m_next = m;
916 				m2 = m;
917 			}
918 		}
919 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
920 		       M_TEMP, M_WAITOK);
921 		uiop->uio_iov = iv2 = iv;
922 		m = info.mb;
923 		left = len;
924 		i = 0;
925 		while (left > 0) {
926 			if (m == NULL)
927 				panic("nfsrv_read iov");
928 			siz = min(M_TRAILINGSPACE(m), left);
929 			if (siz > 0) {
930 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
931 				iv->iov_len = siz;
932 				m->m_len += siz;
933 				left -= siz;
934 				iv++;
935 				i++;
936 			}
937 			m = m->m_next;
938 		}
939 		uiop->uio_iovcnt = i;
940 		uiop->uio_offset = off;
941 		uiop->uio_resid = len;
942 		uiop->uio_rw = UIO_READ;
943 		uiop->uio_segflg = UIO_SYSSPACE;
944 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
945 		off = uiop->uio_offset;
946 		nh->nh_nextr = off;
947 		FREE((caddr_t)iv2, M_TEMP);
948 		if (error || (getret = VOP_GETATTR(vp, vap))) {
949 			if (!error)
950 				error = getret;
951 			m_freem(info.mreq);
952 			info.mreq = NULL;
953 			vput(vp);
954 			vp = NULL;
955 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
956 					      NFSX_POSTOPATTR(info.v3),
957 					      &error));
958 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
959 			error = 0;
960 			goto nfsmout;
961 		}
962 	} else {
963 		uiop->uio_resid = 0;
964 	}
965 	vput(vp);
966 	vp = NULL;
967 	nfsm_srvfattr(nfsd, vap, fp);
968 	tlen = len - uiop->uio_resid;
969 	cnt = cnt < tlen ? cnt : tlen;
970 	tlen = nfsm_rndup(cnt);
971 	if (len != tlen || tlen != cnt)
972 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
973 	if (info.v3) {
974 		*tl++ = txdr_unsigned(cnt);
975 		if (len < reqlen)
976 			*tl++ = nfs_true;
977 		else
978 			*tl++ = nfs_false;
979 	}
980 	*tl = txdr_unsigned(cnt);
981 nfsmout:
982 	*mrq = info.mreq;
983 	if (vp)
984 		vput(vp);
985 	return(error);
986 }
987 
988 /*
989  * nfs write service
990  */
991 int
992 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
993 	    struct thread *td, struct mbuf **mrq)
994 {
995 	struct sockaddr *nam = nfsd->nd_nam;
996 	struct ucred *cred = &nfsd->nd_cr;
997 	struct iovec *ivp;
998 	int i, cnt;
999 	struct mbuf *mp1;
1000 	struct nfs_fattr *fp;
1001 	struct iovec *iv;
1002 	struct vattr va, forat;
1003 	struct vattr *vap = &va;
1004 	u_int32_t *tl;
1005 	int error = 0, rdonly, len, forat_ret = 1;
1006 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1007 	int stable = NFSV3WRITE_FILESYNC;
1008 	struct vnode *vp = NULL;
1009 	struct mount *mp = NULL;
1010 	nfsfh_t nfh;
1011 	fhandle_t *fhp;
1012 	struct uio io, *uiop = &io;
1013 	struct nfsm_info info;
1014 	off_t off;
1015 
1016 	info.mrep = nfsd->nd_mrep;
1017 	info.mreq = NULL;
1018 	info.md = nfsd->nd_md;
1019 	info.dpos = nfsd->nd_dpos;
1020 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1021 
1022 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1023 	if (info.mrep == NULL) {
1024 		error = 0;
1025 		goto nfsmout;
1026 	}
1027 	fhp = &nfh.fh_generic;
1028 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1029 	if (info.v3) {
1030 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1031 		off = fxdr_hyper(tl);
1032 		tl += 3;
1033 		stable = fxdr_unsigned(int, *tl++);
1034 	} else {
1035 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1036 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1037 		tl += 2;
1038 		if (nfs_async)
1039 	    		stable = NFSV3WRITE_UNSTABLE;
1040 	}
1041 	retlen = len = fxdr_unsigned(int32_t, *tl);
1042 	cnt = i = 0;
1043 
1044 	/*
1045 	 * For NFS Version 2, it is not obvious what a write of zero length
1046 	 * should do, but I might as well be consistent with Version 3,
1047 	 * which is to return ok so long as there are no permission problems.
1048 	 */
1049 	if (len > 0) {
1050 	    zeroing = 1;
1051 	    mp1 = info.mrep;
1052 	    while (mp1) {
1053 		if (mp1 == info.md) {
1054 			zeroing = 0;
1055 			adjust = info.dpos - mtod(mp1, caddr_t);
1056 			mp1->m_len -= adjust;
1057 			if (mp1->m_len > 0 && adjust > 0)
1058 				mp1->m_data += adjust;
1059 		}
1060 		if (zeroing)
1061 			mp1->m_len = 0;
1062 		else if (mp1->m_len > 0) {
1063 			i += mp1->m_len;
1064 			if (i > len) {
1065 				mp1->m_len -= (i - len);
1066 				zeroing	= 1;
1067 			}
1068 			if (mp1->m_len > 0)
1069 				cnt++;
1070 		}
1071 		mp1 = mp1->m_next;
1072 	    }
1073 	}
1074 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1075 		error = EIO;
1076 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1077 				      2 * NFSX_UNSIGNED, &error));
1078 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1079 				 aftat_ret, vap);
1080 		error = 0;
1081 		goto nfsmout;
1082 	}
1083 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1084 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1085 	if (error) {
1086 		vp = NULL;
1087 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1088 				      2 * NFSX_UNSIGNED, &error));
1089 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1090 				 aftat_ret, vap);
1091 		error = 0;
1092 		goto nfsmout;
1093 	}
1094 	if (info.v3)
1095 		forat_ret = VOP_GETATTR(vp, &forat);
1096 	if (vp->v_type != VREG) {
1097 		if (info.v3)
1098 			error = EINVAL;
1099 		else
1100 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1101 	}
1102 	if (!error) {
1103 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1104 	}
1105 	if (error) {
1106 		vput(vp);
1107 		vp = NULL;
1108 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1109 				      NFSX_WCCDATA(info.v3), &error));
1110 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1111 				 aftat_ret, vap);
1112 		error = 0;
1113 		goto nfsmout;
1114 	}
1115 
1116 	if (len > 0) {
1117 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1118 		M_WAITOK);
1119 	    uiop->uio_iov = iv = ivp;
1120 	    uiop->uio_iovcnt = cnt;
1121 	    mp1 = info.mrep;
1122 	    while (mp1) {
1123 		if (mp1->m_len > 0) {
1124 			ivp->iov_base = mtod(mp1, caddr_t);
1125 			ivp->iov_len = mp1->m_len;
1126 			ivp++;
1127 		}
1128 		mp1 = mp1->m_next;
1129 	    }
1130 
1131 	    /*
1132 	     * XXX
1133 	     * The IO_METASYNC flag indicates that all metadata (and not just
1134 	     * enough to ensure data integrity) mus be written to stable storage
1135 	     * synchronously.
1136 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1137 	     */
1138 	    if (stable == NFSV3WRITE_UNSTABLE)
1139 		ioflags = IO_NODELOCKED;
1140 	    else if (stable == NFSV3WRITE_DATASYNC)
1141 		ioflags = (IO_SYNC | IO_NODELOCKED);
1142 	    else
1143 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1144 	    uiop->uio_resid = len;
1145 	    uiop->uio_rw = UIO_WRITE;
1146 	    uiop->uio_segflg = UIO_SYSSPACE;
1147 	    uiop->uio_td = NULL;
1148 	    uiop->uio_offset = off;
1149 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1150 	    nfsstats.srvvop_writes++;
1151 	    FREE((caddr_t)iv, M_TEMP);
1152 	}
1153 	aftat_ret = VOP_GETATTR(vp, vap);
1154 	vput(vp);
1155 	vp = NULL;
1156 	if (!error)
1157 		error = aftat_ret;
1158 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1159 			      NFSX_PREOPATTR(info.v3) +
1160 			      NFSX_POSTOPORFATTR(info.v3) +
1161 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1162 			      &error));
1163 	if (info.v3) {
1164 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1165 				 aftat_ret, vap);
1166 		if (error) {
1167 			error = 0;
1168 			goto nfsmout;
1169 		}
1170 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1171 		*tl++ = txdr_unsigned(retlen);
1172 		/*
1173 		 * If nfs_async is set, then pretend the write was FILESYNC.
1174 		 */
1175 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1176 			*tl++ = txdr_unsigned(stable);
1177 		else
1178 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1179 		/*
1180 		 * Actually, there is no need to txdr these fields,
1181 		 * but it may make the values more human readable,
1182 		 * for debugging purposes.
1183 		 */
1184 		if (nfsver.tv_sec == 0)
1185 			nfsver = boottime;
1186 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1187 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1188 	} else {
1189 		fp = nfsm_build(&info, NFSX_V2FATTR);
1190 		nfsm_srvfattr(nfsd, vap, fp);
1191 	}
1192 nfsmout:
1193 	*mrq = info.mreq;
1194 	if (vp)
1195 		vput(vp);
1196 	return(error);
1197 }
1198 
1199 /*
1200  * NFS write service with write gathering support. Called when
1201  * nfsrvw_procrastinate > 0.
1202  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1203  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1204  * Jan. 1994.
1205  */
1206 int
1207 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1208 		  struct thread *td, struct mbuf **mrq)
1209 {
1210 	struct iovec *ivp;
1211 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1212 	struct nfs_fattr *fp;
1213 	int i;
1214 	struct iovec *iov;
1215 	struct nfsrvw_delayhash *wpp;
1216 	struct ucred *cred;
1217 	struct vattr va, forat;
1218 	u_int32_t *tl;
1219 	int error = 0, rdonly, len, forat_ret = 1;
1220 	int ioflags, aftat_ret = 1, adjust, zeroing;
1221 	struct mbuf *mp1;
1222 	struct vnode *vp = NULL;
1223 	struct mount *mp = NULL;
1224 	struct uio io, *uiop = &io;
1225 	u_quad_t cur_usec;
1226 	struct nfsm_info info;
1227 
1228 	info.mreq = NULL;
1229 
1230 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1231 #ifndef nolint
1232 	i = 0;
1233 	len = 0;
1234 #endif
1235 	if (*ndp) {
1236 	    nfsd = *ndp;
1237 	    *ndp = NULL;
1238 	    info.mrep = nfsd->nd_mrep;
1239 	    info.mreq = NULL;
1240 	    info.md = nfsd->nd_md;
1241 	    info.dpos = nfsd->nd_dpos;
1242 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1243 	    cred = &nfsd->nd_cr;
1244 	    LIST_INIT(&nfsd->nd_coalesce);
1245 	    nfsd->nd_mreq = NULL;
1246 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1247 	    cur_usec = nfs_curusec();
1248 	    nfsd->nd_time = cur_usec +
1249 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1250 
1251 	    /*
1252 	     * Now, get the write header..
1253 	     */
1254 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1255 	    if (info.v3) {
1256 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1257 		nfsd->nd_off = fxdr_hyper(tl);
1258 		tl += 3;
1259 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1260 	    } else {
1261 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1262 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1263 		tl += 2;
1264 		if (nfs_async)
1265 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1266 	    }
1267 	    len = fxdr_unsigned(int32_t, *tl);
1268 	    nfsd->nd_len = len;
1269 	    nfsd->nd_eoff = nfsd->nd_off + len;
1270 
1271 	    /*
1272 	     * Trim the header out of the mbuf list and trim off any trailing
1273 	     * junk so that the mbuf list has only the write data.
1274 	     */
1275 	    zeroing = 1;
1276 	    i = 0;
1277 	    mp1 = info.mrep;
1278 	    while (mp1) {
1279 		if (mp1 == info.md) {
1280 		    zeroing = 0;
1281 		    adjust = info.dpos - mtod(mp1, caddr_t);
1282 		    mp1->m_len -= adjust;
1283 		    if (mp1->m_len > 0 && adjust > 0)
1284 			mp1->m_data += adjust;
1285 		}
1286 		if (zeroing)
1287 		    mp1->m_len = 0;
1288 		else {
1289 		    i += mp1->m_len;
1290 		    if (i > len) {
1291 			mp1->m_len -= (i - len);
1292 			zeroing = 1;
1293 		    }
1294 		}
1295 		mp1 = mp1->m_next;
1296 	    }
1297 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1298 nfsmout:
1299 		m_freem(info.mrep);
1300 		info.mrep = NULL;
1301 		error = EIO;
1302 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1303 		if (info.v3) {
1304 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1305 				     aftat_ret, &va);
1306 		}
1307 		nfsd->nd_mreq = info.mreq;
1308 		nfsd->nd_mrep = NULL;
1309 		nfsd->nd_time = 0;
1310 	    }
1311 
1312 	    /*
1313 	     * Add this entry to the hash and time queues.
1314 	     */
1315 	    owp = NULL;
1316 	    wp = slp->ns_tq.lh_first;
1317 	    while (wp && wp->nd_time < nfsd->nd_time) {
1318 		owp = wp;
1319 		wp = wp->nd_tq.le_next;
1320 	    }
1321 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1322 	    if (owp) {
1323 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1324 	    } else {
1325 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1326 	    }
1327 	    if (nfsd->nd_mrep) {
1328 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1329 		owp = NULL;
1330 		wp = wpp->lh_first;
1331 		while (wp &&
1332 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1333 		    owp = wp;
1334 		    wp = wp->nd_hash.le_next;
1335 		}
1336 		while (wp && wp->nd_off < nfsd->nd_off &&
1337 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1338 		    owp = wp;
1339 		    wp = wp->nd_hash.le_next;
1340 		}
1341 		if (owp) {
1342 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1343 
1344 		    /*
1345 		     * Search the hash list for overlapping entries and
1346 		     * coalesce.
1347 		     */
1348 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1349 			wp = nfsd->nd_hash.le_next;
1350 			if (NFSW_SAMECRED(owp, nfsd))
1351 			    nfsrvw_coalesce(owp, nfsd);
1352 		    }
1353 		} else {
1354 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1355 		}
1356 	    }
1357 	}
1358 
1359 	/*
1360 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1361 	 * and generate the associated reply mbuf list(s).
1362 	 */
1363 loop1:
1364 	cur_usec = nfs_curusec();
1365 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1366 		owp = nfsd->nd_tq.le_next;
1367 		if (nfsd->nd_time > cur_usec)
1368 		    break;
1369 		if (nfsd->nd_mreq)
1370 		    continue;
1371 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1372 		LIST_REMOVE(nfsd, nd_tq);
1373 		LIST_REMOVE(nfsd, nd_hash);
1374 		info.mrep = nfsd->nd_mrep;
1375 		info.mreq = NULL;
1376 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1377 		nfsd->nd_mrep = NULL;
1378 		cred = &nfsd->nd_cr;
1379 		forat_ret = aftat_ret = 1;
1380 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1381 				     nfsd->nd_nam, &rdonly,
1382 				     (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1383 		if (!error) {
1384 		    if (info.v3)
1385 			forat_ret = VOP_GETATTR(vp, &forat);
1386 		    if (vp->v_type != VREG) {
1387 			if (info.v3)
1388 			    error = EINVAL;
1389 			else
1390 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1391 		    }
1392 		} else {
1393 		    vp = NULL;
1394 		}
1395 		if (!error) {
1396 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1397 		}
1398 
1399 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1400 		    ioflags = IO_NODELOCKED;
1401 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1402 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1403 		else
1404 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1405 		uiop->uio_rw = UIO_WRITE;
1406 		uiop->uio_segflg = UIO_SYSSPACE;
1407 		uiop->uio_td = NULL;
1408 		uiop->uio_offset = nfsd->nd_off;
1409 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1410 		if (uiop->uio_resid > 0) {
1411 		    mp1 = info.mrep;
1412 		    i = 0;
1413 		    while (mp1) {
1414 			if (mp1->m_len > 0)
1415 			    i++;
1416 			mp1 = mp1->m_next;
1417 		    }
1418 		    uiop->uio_iovcnt = i;
1419 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1420 			M_TEMP, M_WAITOK);
1421 		    uiop->uio_iov = ivp = iov;
1422 		    mp1 = info.mrep;
1423 		    while (mp1) {
1424 			if (mp1->m_len > 0) {
1425 			    ivp->iov_base = mtod(mp1, caddr_t);
1426 			    ivp->iov_len = mp1->m_len;
1427 			    ivp++;
1428 			}
1429 			mp1 = mp1->m_next;
1430 		    }
1431 		    if (!error) {
1432 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1433 			nfsstats.srvvop_writes++;
1434 		    }
1435 		    FREE((caddr_t)iov, M_TEMP);
1436 		}
1437 		m_freem(info.mrep);
1438 		info.mrep = NULL;
1439 		if (vp) {
1440 		    aftat_ret = VOP_GETATTR(vp, &va);
1441 		    vput(vp);
1442 		    vp = NULL;
1443 		}
1444 
1445 		/*
1446 		 * Loop around generating replies for all write rpcs that have
1447 		 * now been completed.
1448 		 */
1449 		swp = nfsd;
1450 		do {
1451 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1452 		    if (error) {
1453 			nfsm_writereply(&info, nfsd, slp, error,
1454 					NFSX_WCCDATA(info.v3));
1455 			if (info.v3) {
1456 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1457 					     aftat_ret, &va);
1458 			}
1459 		    } else {
1460 			nfsm_writereply(&info, nfsd, slp, error,
1461 					NFSX_PREOPATTR(info.v3) +
1462 					NFSX_POSTOPORFATTR(info.v3) +
1463 					2 * NFSX_UNSIGNED +
1464 					NFSX_WRITEVERF(info.v3));
1465 			if (info.v3) {
1466 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1467 					     aftat_ret, &va);
1468 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1469 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1470 			    *tl++ = txdr_unsigned(swp->nd_stable);
1471 			    /*
1472 			     * Actually, there is no need to txdr these fields,
1473 			     * but it may make the values more human readable,
1474 			     * for debugging purposes.
1475 			     */
1476 			    if (nfsver.tv_sec == 0)
1477 				    nfsver = boottime;
1478 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1479 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1480 			} else {
1481 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1482 			    nfsm_srvfattr(nfsd, &va, fp);
1483 			}
1484 		    }
1485 		    nfsd->nd_mreq = info.mreq;
1486 		    if (nfsd->nd_mrep)
1487 			panic("nfsrv_write: nd_mrep not free");
1488 
1489 		    /*
1490 		     * Done. Put it at the head of the timer queue so that
1491 		     * the final phase can return the reply.
1492 		     */
1493 		    if (nfsd != swp) {
1494 			nfsd->nd_time = 0;
1495 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1496 		    }
1497 		    nfsd = swp->nd_coalesce.lh_first;
1498 		    if (nfsd) {
1499 			LIST_REMOVE(nfsd, nd_tq);
1500 		    }
1501 		} while (nfsd);
1502 		swp->nd_time = 0;
1503 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1504 		goto loop1;
1505 	}
1506 
1507 	/*
1508 	 * Search for a reply to return.
1509 	 */
1510 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1511 		if (nfsd->nd_mreq) {
1512 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1513 		    LIST_REMOVE(nfsd, nd_tq);
1514 		    break;
1515 		}
1516 	}
1517 	if (nfsd) {
1518 		*ndp = nfsd;
1519 		*mrq = nfsd->nd_mreq;
1520 	} else {
1521 		*ndp = NULL;
1522 		*mrq = NULL;
1523 	}
1524 	return (0);
1525 }
1526 
1527 /*
1528  * Coalesce the write request nfsd into owp. To do this we must:
1529  * - remove nfsd from the queues
1530  * - merge nfsd->nd_mrep into owp->nd_mrep
1531  * - update the nd_eoff and nd_stable for owp
1532  * - put nfsd on owp's nd_coalesce list
1533  * NB: Must be called at splsoftclock().
1534  */
1535 static void
1536 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1537 {
1538         int overlap;
1539         struct mbuf *mp1;
1540 	struct nfsrv_descript *p;
1541 
1542 	NFS_DPF(WG, ("C%03x-%03x",
1543 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1544         LIST_REMOVE(nfsd, nd_hash);
1545         LIST_REMOVE(nfsd, nd_tq);
1546         if (owp->nd_eoff < nfsd->nd_eoff) {
1547             overlap = owp->nd_eoff - nfsd->nd_off;
1548             if (overlap < 0)
1549                 panic("nfsrv_coalesce: bad off");
1550             if (overlap > 0)
1551                 m_adj(nfsd->nd_mrep, overlap);
1552             mp1 = owp->nd_mrep;
1553             while (mp1->m_next)
1554                 mp1 = mp1->m_next;
1555             mp1->m_next = nfsd->nd_mrep;
1556             owp->nd_eoff = nfsd->nd_eoff;
1557         } else
1558             m_freem(nfsd->nd_mrep);
1559         nfsd->nd_mrep = NULL;
1560         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1561             owp->nd_stable = NFSV3WRITE_FILESYNC;
1562         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1563             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1564             owp->nd_stable = NFSV3WRITE_DATASYNC;
1565         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1566 
1567 	/*
1568 	 * If nfsd had anything else coalesced into it, transfer them
1569 	 * to owp, otherwise their replies will never get sent.
1570 	 */
1571 	for (p = nfsd->nd_coalesce.lh_first; p;
1572 	     p = nfsd->nd_coalesce.lh_first) {
1573 	    LIST_REMOVE(p, nd_tq);
1574 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1575 	}
1576 }
1577 
1578 /*
1579  * nfs create service
1580  * now does a truncate to 0 length via. setattr if it already exists
1581  */
1582 int
1583 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1584 	     struct thread *td, struct mbuf **mrq)
1585 {
1586 	struct sockaddr *nam = nfsd->nd_nam;
1587 	struct ucred *cred = &nfsd->nd_cr;
1588 	struct nfs_fattr *fp;
1589 	struct vattr va, dirfor, diraft;
1590 	struct vattr *vap = &va;
1591 	struct nfsv2_sattr *sp;
1592 	u_int32_t *tl;
1593 	struct nlookupdata nd;
1594 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1595 	udev_t rdev = NOUDEV;
1596 	caddr_t cp;
1597 	int how, exclusive_flag = 0;
1598 	struct vnode *dirp;
1599 	struct vnode *dvp;
1600 	struct vnode *vp;
1601 	struct mount *mp;
1602 	nfsfh_t nfh;
1603 	fhandle_t *fhp;
1604 	u_quad_t tempsize;
1605 	u_char cverf[NFSX_V3CREATEVERF];
1606 	struct nfsm_info info;
1607 
1608 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1609 	nlookup_zero(&nd);
1610 	dirp = NULL;
1611 	dvp = NULL;
1612 	vp = NULL;
1613 
1614 	info.mrep = nfsd->nd_mrep;
1615 	info.mreq = NULL;
1616 	info.md = nfsd->nd_md;
1617 	info.dpos = nfsd->nd_dpos;
1618 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1619 
1620 	fhp = &nfh.fh_generic;
1621 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1622 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1623 
1624 	/*
1625 	 * Call namei and do initial cleanup to get a few things
1626 	 * out of the way.  If we get an initial error we cleanup
1627 	 * and return here to avoid special-casing the invalid nd
1628 	 * structure through the rest of the case.  dirp may be
1629 	 * set even if an error occurs, but the nd structure will not
1630 	 * be valid at all if an error occurs so we have to invalidate it
1631 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1632 	 */
1633 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1634 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1635 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1636 	mp = vfs_getvfs(&fhp->fh_fsid);
1637 
1638 	if (dirp) {
1639 		if (info.v3) {
1640 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1641 		} else {
1642 			vrele(dirp);
1643 			dirp = NULL;
1644 		}
1645 	}
1646 	if (error) {
1647 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1648 				      NFSX_WCCDATA(info.v3), &error));
1649 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1650 				 diraft_ret, &diraft);
1651 		error = 0;
1652 		goto nfsmout;
1653 	}
1654 
1655 	/*
1656 	 * No error.  Continue.  State:
1657 	 *
1658 	 *	dirp 		may be valid
1659 	 *	vp		may be valid or NULL if the target does not
1660 	 *			exist.
1661 	 *	dvp		is valid
1662 	 *
1663 	 * The error state is set through the code and we may also do some
1664 	 * opportunistic releasing of vnodes to avoid holding locks through
1665 	 * NFS I/O.  The cleanup at the end is a catch-all
1666 	 */
1667 
1668 	VATTR_NULL(vap);
1669 	if (info.v3) {
1670 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1671 		how = fxdr_unsigned(int, *tl);
1672 		switch (how) {
1673 		case NFSV3CREATE_GUARDED:
1674 			if (vp) {
1675 				error = EEXIST;
1676 				break;
1677 			}
1678 			/* fall through */
1679 		case NFSV3CREATE_UNCHECKED:
1680 			ERROROUT(nfsm_srvsattr(&info, vap));
1681 			break;
1682 		case NFSV3CREATE_EXCLUSIVE:
1683 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1684 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1685 			exclusive_flag = 1;
1686 			break;
1687 		};
1688 		vap->va_type = VREG;
1689 	} else {
1690 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1691 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1692 		if (vap->va_type == VNON)
1693 			vap->va_type = VREG;
1694 		vap->va_mode = nfstov_mode(sp->sa_mode);
1695 		switch (vap->va_type) {
1696 		case VREG:
1697 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1698 			if (tsize != -1)
1699 				vap->va_size = (u_quad_t)tsize;
1700 			break;
1701 		case VCHR:
1702 		case VBLK:
1703 		case VFIFO:
1704 			rdev = fxdr_unsigned(long, sp->sa_size);
1705 			break;
1706 		default:
1707 			break;
1708 		};
1709 	}
1710 
1711 	/*
1712 	 * Iff doesn't exist, create it
1713 	 * otherwise just truncate to 0 length
1714 	 *   should I set the mode too ?
1715 	 *
1716 	 * The only possible error we can have at this point is EEXIST.
1717 	 * nd.ni_vp will also be non-NULL in that case.
1718 	 */
1719 	if (vp == NULL) {
1720 		if (vap->va_mode == (mode_t)VNOVAL)
1721 			vap->va_mode = 0;
1722 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1723 			vn_unlock(dvp);
1724 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1725 					    nd.nl_cred, vap);
1726 			vrele(dvp);
1727 			dvp = NULL;
1728 			if (error == 0) {
1729 				if (exclusive_flag) {
1730 					exclusive_flag = 0;
1731 					VATTR_NULL(vap);
1732 					bcopy(cverf, (caddr_t)&vap->va_atime,
1733 						NFSX_V3CREATEVERF);
1734 					error = VOP_SETATTR(vp, vap, cred);
1735 				}
1736 			}
1737 		} else if (
1738 			vap->va_type == VCHR ||
1739 			vap->va_type == VBLK ||
1740 			vap->va_type == VFIFO
1741 		) {
1742 			/*
1743 			 * Handle SysV FIFO node special cases.  All other
1744 			 * devices require super user to access.
1745 			 */
1746 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1747 				vap->va_type = VFIFO;
1748                         if (vap->va_type != VFIFO &&
1749                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1750 				goto nfsmreply0;
1751                         }
1752 			vap->va_rmajor = umajor(rdev);
1753 			vap->va_rminor = uminor(rdev);
1754 
1755 			vn_unlock(dvp);
1756 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1757 			vrele(dvp);
1758 			dvp = NULL;
1759 			if (error)
1760 				goto nfsmreply0;
1761 #if 0
1762 			/*
1763 			 * XXX what is this junk supposed to do ?
1764 			 */
1765 
1766 			vput(vp);
1767 			vp = NULL;
1768 
1769 			/*
1770 			 * release dvp prior to lookup
1771 			 */
1772 			vput(dvp);
1773 			dvp = NULL;
1774 
1775 			/*
1776 			 * Setup for lookup.
1777 			 *
1778 			 * Even though LOCKPARENT was cleared, ni_dvp may
1779 			 * be garbage.
1780 			 */
1781 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1782 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1783 			nd.ni_cnd.cn_td = td;
1784 			nd.ni_cnd.cn_cred = cred;
1785 
1786 			error = lookup(&nd);
1787 			nd.ni_dvp = NULL;
1788 
1789 			if (error != 0) {
1790 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1791 						      0, &error));
1792 				/* fall through on certain errors */
1793 			}
1794 			nfsrv_object_create(nd.ni_vp);
1795 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1796 				error = EINVAL;
1797 				goto nfsmreply0;
1798 			}
1799 #endif
1800 		} else {
1801 			error = ENXIO;
1802 		}
1803 	} else {
1804 		if (vap->va_size != -1) {
1805 			error = nfsrv_access(mp, vp, VWRITE, cred,
1806 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1807 			if (!error) {
1808 				tempsize = vap->va_size;
1809 				VATTR_NULL(vap);
1810 				vap->va_size = tempsize;
1811 				error = VOP_SETATTR(vp, vap, cred);
1812 			}
1813 		}
1814 	}
1815 
1816 	if (!error) {
1817 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1818 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1819 		if (!error)
1820 			error = VOP_GETATTR(vp, vap);
1821 	}
1822 	if (info.v3) {
1823 		if (exclusive_flag && !error &&
1824 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1825 			error = EEXIST;
1826 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1827 		vrele(dirp);
1828 		dirp = NULL;
1829 	}
1830 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1831 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1832 			      NFSX_WCCDATA(info.v3),
1833 			      &error));
1834 	if (info.v3) {
1835 		if (!error) {
1836 			nfsm_srvpostop_fh(&info, fhp);
1837 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1838 		}
1839 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1840 				 diraft_ret, &diraft);
1841 		error = 0;
1842 	} else {
1843 		nfsm_srvfhtom(&info, fhp);
1844 		fp = nfsm_build(&info, NFSX_V2FATTR);
1845 		nfsm_srvfattr(nfsd, vap, fp);
1846 	}
1847 	goto nfsmout;
1848 
1849 nfsmreply0:
1850 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1851 	error = 0;
1852 	/* fall through */
1853 
1854 nfsmout:
1855 	*mrq = info.mreq;
1856 	if (dirp)
1857 		vrele(dirp);
1858 	nlookup_done(&nd);
1859 	if (dvp) {
1860 		if (dvp == vp)
1861 			vrele(dvp);
1862 		else
1863 			vput(dvp);
1864 	}
1865 	if (vp)
1866 		vput(vp);
1867 	return (error);
1868 }
1869 
1870 /*
1871  * nfs v3 mknod service
1872  */
1873 int
1874 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1875 	    struct thread *td, struct mbuf **mrq)
1876 {
1877 	struct sockaddr *nam = nfsd->nd_nam;
1878 	struct ucred *cred = &nfsd->nd_cr;
1879 	struct vattr va, dirfor, diraft;
1880 	struct vattr *vap = &va;
1881 	u_int32_t *tl;
1882 	struct nlookupdata nd;
1883 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1884 	enum vtype vtyp;
1885 	struct vnode *dirp;
1886 	struct vnode *dvp;
1887 	struct vnode *vp;
1888 	nfsfh_t nfh;
1889 	fhandle_t *fhp;
1890 	struct nfsm_info info;
1891 
1892 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1893 	nlookup_zero(&nd);
1894 	dirp = NULL;
1895 	dvp = NULL;
1896 	vp = NULL;
1897 
1898 	info.mrep = nfsd->nd_mrep;
1899 	info.mreq = NULL;
1900 	info.md = nfsd->nd_md;
1901 	info.dpos = nfsd->nd_dpos;
1902 
1903 	fhp = &nfh.fh_generic;
1904 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1905 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1906 
1907 	/*
1908 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1909 	 * is not valid.  However, nfsm_*() routines may still jump to
1910 	 * nfsmout.
1911 	 */
1912 
1913 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1914 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1915 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1916 	if (dirp)
1917 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1918 	if (error) {
1919 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1920 			   NFSX_WCCDATA(1), &error));
1921 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1922 				 diraft_ret, &diraft);
1923 		error = 0;
1924 		goto nfsmout;
1925 	}
1926 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1927 	vtyp = nfsv3tov_type(*tl);
1928 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1929 		error = NFSERR_BADTYPE;
1930 		goto out;
1931 	}
1932 	VATTR_NULL(vap);
1933 	ERROROUT(nfsm_srvsattr(&info, vap));
1934 	if (vtyp == VCHR || vtyp == VBLK) {
1935 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1936 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1937 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1938 	}
1939 
1940 	/*
1941 	 * Iff doesn't exist, create it.
1942 	 */
1943 	if (vp) {
1944 		error = EEXIST;
1945 		goto out;
1946 	}
1947 	vap->va_type = vtyp;
1948 	if (vap->va_mode == (mode_t)VNOVAL)
1949 		vap->va_mode = 0;
1950 	if (vtyp == VSOCK) {
1951 		vn_unlock(dvp);
1952 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1953 		vrele(dvp);
1954 		dvp = NULL;
1955 	} else {
1956 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1957 			goto out;
1958 
1959 		vn_unlock(dvp);
1960 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1961 		vrele(dvp);
1962 		dvp = NULL;
1963 		if (error)
1964 			goto out;
1965 	}
1966 
1967 	/*
1968 	 * send response, cleanup, return.
1969 	 */
1970 out:
1971 	nlookup_done(&nd);
1972 	if (dvp) {
1973 		if (dvp == vp)
1974 			vrele(dvp);
1975 		else
1976 			vput(dvp);
1977 		dvp = NULL;
1978 	}
1979 	if (!error) {
1980 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1981 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1982 		if (!error)
1983 			error = VOP_GETATTR(vp, vap);
1984 	}
1985 	if (vp) {
1986 		vput(vp);
1987 		vp = NULL;
1988 	}
1989 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1990 	if (dirp) {
1991 		vrele(dirp);
1992 		dirp = NULL;
1993 	}
1994 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1995 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1996 			      NFSX_WCCDATA(1), &error));
1997 	if (!error) {
1998 		nfsm_srvpostop_fh(&info, fhp);
1999 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2000 	}
2001 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2002 			 diraft_ret, &diraft);
2003 	*mrq = info.mreq;
2004 	return (0);
2005 nfsmout:
2006 	*mrq = info.mreq;
2007 	if (dirp)
2008 		vrele(dirp);
2009 	nlookup_done(&nd);
2010 	if (dvp) {
2011 		if (dvp == vp)
2012 			vrele(dvp);
2013 		else
2014 			vput(dvp);
2015 	}
2016 	if (vp)
2017 		vput(vp);
2018 	return (error);
2019 }
2020 
2021 /*
2022  * nfs remove service
2023  */
2024 int
2025 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2026 	     struct thread *td, struct mbuf **mrq)
2027 {
2028 	struct sockaddr *nam = nfsd->nd_nam;
2029 	struct ucred *cred = &nfsd->nd_cr;
2030 	struct nlookupdata nd;
2031 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2032 	struct vnode *dirp;
2033 	struct vnode *dvp;
2034 	struct vnode *vp;
2035 	struct vattr dirfor, diraft;
2036 	nfsfh_t nfh;
2037 	fhandle_t *fhp;
2038 	struct nfsm_info info;
2039 
2040 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2041 	nlookup_zero(&nd);
2042 	dirp = NULL;
2043 	dvp = NULL;
2044 	vp = NULL;
2045 
2046 	info.mrep = nfsd->nd_mrep;
2047 	info.mreq = NULL;
2048 	info.md = nfsd->nd_md;
2049 	info.dpos = nfsd->nd_dpos;
2050 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2051 
2052 	fhp = &nfh.fh_generic;
2053 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2054 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2055 
2056 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2057 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2058 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2059 	if (dirp) {
2060 		if (info.v3)
2061 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2062 	}
2063 	if (error == 0) {
2064 		if (vp->v_type == VDIR) {
2065 			error = EPERM;		/* POSIX */
2066 			goto out;
2067 		}
2068 		/*
2069 		 * The root of a mounted filesystem cannot be deleted.
2070 		 */
2071 		if (vp->v_flag & VROOT) {
2072 			error = EBUSY;
2073 			goto out;
2074 		}
2075 out:
2076 		if (!error) {
2077 			if (dvp != vp)
2078 				vn_unlock(dvp);
2079 			if (vp) {
2080 				vput(vp);
2081 				vp = NULL;
2082 			}
2083 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2084 			vrele(dvp);
2085 			dvp = NULL;
2086 		}
2087 	}
2088 	if (dirp && info.v3)
2089 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2090 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2091 	if (info.v3) {
2092 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2093 				 diraft_ret, &diraft);
2094 		error = 0;
2095 	}
2096 nfsmout:
2097 	*mrq = info.mreq;
2098 	nlookup_done(&nd);
2099 	if (dirp)
2100 		vrele(dirp);
2101 	if (dvp) {
2102 		if (dvp == vp)
2103 			vrele(dvp);
2104 		else
2105 			vput(dvp);
2106 	}
2107 	if (vp)
2108 		vput(vp);
2109 	return(error);
2110 }
2111 
2112 /*
2113  * nfs rename service
2114  */
2115 int
2116 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2117 	     struct thread *td, struct mbuf **mrq)
2118 {
2119 	struct sockaddr *nam = nfsd->nd_nam;
2120 	struct ucred *cred = &nfsd->nd_cr;
2121 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2122 	int tdirfor_ret = 1, tdiraft_ret = 1;
2123 	struct nlookupdata fromnd, tond;
2124 	struct vnode *fvp, *fdirp, *fdvp;
2125 	struct vnode *tvp, *tdirp, *tdvp;
2126 	struct namecache *ncp;
2127 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2128 	nfsfh_t fnfh, tnfh;
2129 	fhandle_t *ffhp, *tfhp;
2130 	uid_t saved_uid;
2131 	struct nfsm_info info;
2132 
2133 	info.mrep = nfsd->nd_mrep;
2134 	info.mreq = NULL;
2135 	info.md = nfsd->nd_md;
2136 	info.dpos = nfsd->nd_dpos;
2137 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2138 
2139 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2140 #ifndef nolint
2141 	fvp = NULL;
2142 #endif
2143 	ffhp = &fnfh.fh_generic;
2144 	tfhp = &tnfh.fh_generic;
2145 
2146 	/*
2147 	 * Clear fields incase goto nfsmout occurs from macro.
2148 	 */
2149 
2150 	nlookup_zero(&fromnd);
2151 	nlookup_zero(&tond);
2152 	fdirp = NULL;
2153 	tdirp = NULL;
2154 
2155 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2156 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2157 
2158 	/*
2159 	 * Remember our original uid so that we can reset cr_uid before
2160 	 * the second nfs_namei() call, in case it is remapped.
2161 	 */
2162 	saved_uid = cred->cr_uid;
2163 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2164 			  NULL, NULL,
2165 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2166 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2167 	if (fdirp) {
2168 		if (info.v3)
2169 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2170 	}
2171 	if (error) {
2172 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2173 				      2 * NFSX_WCCDATA(info.v3), &error));
2174 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2175 				 fdiraft_ret, &fdiraft);
2176 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2177 				 tdiraft_ret, &tdiraft);
2178 		error = 0;
2179 		goto nfsmout;
2180 	}
2181 
2182 	/*
2183 	 * We have to unlock the from ncp before we can safely lookup
2184 	 * the target ncp.
2185 	 */
2186 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2187 	cache_unlock(&fromnd.nl_nch);
2188 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2189 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2190 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2191 	cred->cr_uid = saved_uid;
2192 
2193 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2194 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2195 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2196 	if (tdirp) {
2197 		if (info.v3)
2198 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2199 	}
2200 	if (error)
2201 		goto out1;
2202 
2203 	/*
2204 	 * relock the source
2205 	 */
2206 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2207 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2208 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2209 		cache_lock(&fromnd.nl_nch);
2210 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2211 	} else {
2212 		cache_unlock(&tond.nl_nch);
2213 		cache_lock(&fromnd.nl_nch);
2214 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2215 		cache_lock(&tond.nl_nch);
2216 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2217 	}
2218 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2219 
2220 	fvp = fromnd.nl_nch.ncp->nc_vp;
2221 	tvp = tond.nl_nch.ncp->nc_vp;
2222 
2223 	/*
2224 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2225 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2226 	 * point).  If we get through the checks these will be guarenteed
2227 	 * to be non-NULL.
2228 	 *
2229 	 * Holding the children ncp's should be sufficient to prevent
2230 	 * fdvp and tdvp ripouts.
2231 	 */
2232 	if (fromnd.nl_nch.ncp->nc_parent)
2233 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2234 	else
2235 		fdvp = NULL;
2236 	if (tond.nl_nch.ncp->nc_parent)
2237 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2238 	else
2239 		tdvp = NULL;
2240 
2241 	if (tvp != NULL) {
2242 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2243 			if (info.v3)
2244 				error = EEXIST;
2245 			else
2246 				error = EISDIR;
2247 			goto out;
2248 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2249 			if (info.v3)
2250 				error = EEXIST;
2251 			else
2252 				error = ENOTDIR;
2253 			goto out;
2254 		}
2255 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2256 			if (info.v3)
2257 				error = EXDEV;
2258 			else
2259 				error = ENOTEMPTY;
2260 			goto out;
2261 		}
2262 	}
2263 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2264 		if (info.v3)
2265 			error = EXDEV;
2266 		else
2267 			error = ENOTEMPTY;
2268 		goto out;
2269 	}
2270 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2271 		if (info.v3)
2272 			error = EXDEV;
2273 		else
2274 			error = ENOTEMPTY;
2275 		goto out;
2276 	}
2277 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2278 		if (info.v3)
2279 			error = EINVAL;
2280 		else
2281 			error = ENOTEMPTY;
2282 	}
2283 
2284 	/*
2285 	 * You cannot rename a source into itself or a subdirectory of itself.
2286 	 * We check this by travsering the target directory upwards looking
2287 	 * for a match against the source.
2288 	 */
2289 	if (error == 0) {
2290 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2291 			if (fromnd.nl_nch.ncp == ncp) {
2292 				error = EINVAL;
2293 				break;
2294 			}
2295 		}
2296 	}
2297 
2298 	/*
2299 	 * If source is the same as the destination (that is the
2300 	 * same vnode with the same name in the same directory),
2301 	 * then there is nothing to do.
2302 	 */
2303 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2304 		error = -1;
2305 out:
2306 	if (!error) {
2307 		/*
2308 		 * The VOP_NRENAME function releases all vnode references &
2309 		 * locks prior to returning so we need to clear the pointers
2310 		 * to bypass cleanup code later on.
2311 		 */
2312 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2313 				    fdvp, tdvp, tond.nl_cred);
2314 	} else {
2315 		if (error == -1)
2316 			error = 0;
2317 	}
2318 	/* fall through */
2319 
2320 out1:
2321 	if (fdirp)
2322 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2323 	if (tdirp)
2324 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2325 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2326 			      2 * NFSX_WCCDATA(info.v3), &error));
2327 	if (info.v3) {
2328 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2329 				 fdiraft_ret, &fdiraft);
2330 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2331 				 tdiraft_ret, &tdiraft);
2332 	}
2333 	error = 0;
2334 	/* fall through */
2335 
2336 nfsmout:
2337 	*mrq = info.mreq;
2338 	if (tdirp)
2339 		vrele(tdirp);
2340 	nlookup_done(&tond);
2341 	if (fdirp)
2342 		vrele(fdirp);
2343 	nlookup_done(&fromnd);
2344 	return (error);
2345 }
2346 
2347 /*
2348  * nfs link service
2349  */
2350 int
2351 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2352 	   struct thread *td, struct mbuf **mrq)
2353 {
2354 	struct sockaddr *nam = nfsd->nd_nam;
2355 	struct ucred *cred = &nfsd->nd_cr;
2356 	struct nlookupdata nd;
2357 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2358 	int getret = 1;
2359 	struct vnode *dirp;
2360 	struct vnode *dvp;
2361 	struct vnode *vp;
2362 	struct vnode *xp;
2363 	struct mount *mp;
2364 	struct mount *xmp;
2365 	struct vattr dirfor, diraft, at;
2366 	nfsfh_t nfh, dnfh;
2367 	fhandle_t *fhp, *dfhp;
2368 	struct nfsm_info info;
2369 
2370 	info.mrep = nfsd->nd_mrep;
2371 	info.mreq = NULL;
2372 	info.md = nfsd->nd_md;
2373 	info.dpos = nfsd->nd_dpos;
2374 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2375 
2376 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2377 	nlookup_zero(&nd);
2378 	dirp = dvp = vp = xp = NULL;
2379 	mp = xmp = NULL;
2380 
2381 	fhp = &nfh.fh_generic;
2382 	dfhp = &dnfh.fh_generic;
2383 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2384 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2385 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2386 
2387 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2388 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2389 	if (error) {
2390 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2391 				      NFSX_POSTOPATTR(info.v3) +
2392 				      NFSX_WCCDATA(info.v3),
2393 				      &error));
2394 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2395 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2396 				 diraft_ret, &diraft);
2397 		xp = NULL;
2398 		error = 0;
2399 		goto nfsmout;
2400 	}
2401 	if (xp->v_type == VDIR) {
2402 		error = EPERM;		/* POSIX */
2403 		goto out1;
2404 	}
2405 
2406 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2407 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2408 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2409 	if (dirp) {
2410 		if (info.v3)
2411 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2412 	}
2413 	if (error)
2414 		goto out1;
2415 
2416 	if (vp != NULL) {
2417 		error = EEXIST;
2418 		goto out;
2419 	}
2420 	if (xp->v_mount != dvp->v_mount)
2421 		error = EXDEV;
2422 out:
2423 	if (!error) {
2424 		vn_unlock(dvp);
2425 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2426 		vrele(dvp);
2427 		dvp = NULL;
2428 	}
2429 	/* fall through */
2430 
2431 out1:
2432 	if (info.v3)
2433 		getret = VOP_GETATTR(xp, &at);
2434 	if (dirp)
2435 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2436 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2437 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2438 			      &error));
2439 	if (info.v3) {
2440 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2441 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2442 				 diraft_ret, &diraft);
2443 		error = 0;
2444 	}
2445 	/* fall through */
2446 
2447 nfsmout:
2448 	*mrq = info.mreq;
2449 	nlookup_done(&nd);
2450 	if (dirp)
2451 		vrele(dirp);
2452 	if (xp)
2453 		vrele(xp);
2454 	if (dvp) {
2455 		if (dvp == vp)
2456 			vrele(dvp);
2457 		else
2458 			vput(dvp);
2459 	}
2460 	if (vp)
2461 		vput(vp);
2462 	return(error);
2463 }
2464 
2465 /*
2466  * nfs symbolic link service
2467  */
2468 int
2469 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2470 	      struct thread *td, struct mbuf **mrq)
2471 {
2472 	struct sockaddr *nam = nfsd->nd_nam;
2473 	struct ucred *cred = &nfsd->nd_cr;
2474 	struct vattr va, dirfor, diraft;
2475 	struct nlookupdata nd;
2476 	struct vattr *vap = &va;
2477 	struct nfsv2_sattr *sp;
2478 	char *pathcp = NULL;
2479 	struct uio io;
2480 	struct iovec iv;
2481 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2482 	struct vnode *dirp;
2483 	struct vnode *vp;
2484 	struct vnode *dvp;
2485 	nfsfh_t nfh;
2486 	fhandle_t *fhp;
2487 	struct nfsm_info info;
2488 
2489 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2490 	nlookup_zero(&nd);
2491 	dirp = NULL;
2492 	dvp = NULL;
2493 	vp = NULL;
2494 
2495 	info.mrep = nfsd->nd_mrep;
2496 	info.mreq =  NULL;
2497 	info.md = nfsd->nd_md;
2498 	info.dpos = nfsd->nd_dpos;
2499 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2500 
2501 	fhp = &nfh.fh_generic;
2502 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2503 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2504 
2505 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2506 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2507 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2508 	if (dirp) {
2509 		if (info.v3)
2510 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2511 	}
2512 	if (error)
2513 		goto out;
2514 
2515 	VATTR_NULL(vap);
2516 	if (info.v3) {
2517 		ERROROUT(nfsm_srvsattr(&info, vap));
2518 	}
2519 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2520 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2521 	iv.iov_base = pathcp;
2522 	iv.iov_len = len2;
2523 	io.uio_resid = len2;
2524 	io.uio_offset = 0;
2525 	io.uio_iov = &iv;
2526 	io.uio_iovcnt = 1;
2527 	io.uio_segflg = UIO_SYSSPACE;
2528 	io.uio_rw = UIO_READ;
2529 	io.uio_td = NULL;
2530 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2531 	if (info.v3 == 0) {
2532 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2533 		vap->va_mode = nfstov_mode(sp->sa_mode);
2534 	}
2535 	*(pathcp + len2) = '\0';
2536 	if (vp) {
2537 		error = EEXIST;
2538 		goto out;
2539 	}
2540 
2541 	if (vap->va_mode == (mode_t)VNOVAL)
2542 		vap->va_mode = 0;
2543 	if (dvp != vp)
2544 		vn_unlock(dvp);
2545 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2546 	vrele(dvp);
2547 	dvp = NULL;
2548 	if (error == 0) {
2549 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2550 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2551 		if (!error)
2552 			error = VOP_GETATTR(vp, vap);
2553 	}
2554 
2555 out:
2556 	if (dvp) {
2557 		if (dvp == vp)
2558 			vrele(dvp);
2559 		else
2560 			vput(dvp);
2561 	}
2562 	if (vp) {
2563 		vput(vp);
2564 		vp = NULL;
2565 	}
2566 	if (pathcp) {
2567 		FREE(pathcp, M_TEMP);
2568 		pathcp = NULL;
2569 	}
2570 	if (dirp) {
2571 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2572 		vrele(dirp);
2573 		dirp = NULL;
2574 	}
2575 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2576 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2577 			      NFSX_WCCDATA(info.v3),
2578 			      &error));
2579 	if (info.v3) {
2580 		if (!error) {
2581 			nfsm_srvpostop_fh(&info, fhp);
2582 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2583 		}
2584 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2585 				 diraft_ret, &diraft);
2586 	}
2587 	error = 0;
2588 	/* fall through */
2589 
2590 nfsmout:
2591 	*mrq = info.mreq;
2592 	nlookup_done(&nd);
2593 	if (vp)
2594 		vput(vp);
2595 	if (dirp)
2596 		vrele(dirp);
2597 	if (pathcp)
2598 		FREE(pathcp, M_TEMP);
2599 	return (error);
2600 }
2601 
2602 /*
2603  * nfs mkdir service
2604  */
2605 int
2606 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2607 	    struct thread *td, struct mbuf **mrq)
2608 {
2609 	struct sockaddr *nam = nfsd->nd_nam;
2610 	struct ucred *cred = &nfsd->nd_cr;
2611 	struct vattr va, dirfor, diraft;
2612 	struct vattr *vap = &va;
2613 	struct nfs_fattr *fp;
2614 	struct nlookupdata nd;
2615 	u_int32_t *tl;
2616 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2617 	struct vnode *dirp;
2618 	struct vnode *dvp;
2619 	struct vnode *vp;
2620 	nfsfh_t nfh;
2621 	fhandle_t *fhp;
2622 	struct nfsm_info info;
2623 
2624 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2625 	nlookup_zero(&nd);
2626 	dirp = NULL;
2627 	dvp = NULL;
2628 	vp = NULL;
2629 
2630 	info.dpos = nfsd->nd_dpos;
2631 	info.mrep = nfsd->nd_mrep;
2632 	info.mreq =  NULL;
2633 	info.md = nfsd->nd_md;
2634 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2635 
2636 	fhp = &nfh.fh_generic;
2637 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2638 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2639 
2640 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2641 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2642 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2643 	if (dirp) {
2644 		if (info.v3)
2645 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2646 	}
2647 	if (error) {
2648 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2649 				      NFSX_WCCDATA(info.v3), &error));
2650 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2651 				 diraft_ret, &diraft);
2652 		error = 0;
2653 		goto nfsmout;
2654 	}
2655 	VATTR_NULL(vap);
2656 	if (info.v3) {
2657 		ERROROUT(nfsm_srvsattr(&info, vap));
2658 	} else {
2659 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2660 		vap->va_mode = nfstov_mode(*tl++);
2661 	}
2662 
2663 	/*
2664 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2665 	 * nd.ni_vp, if it exists, is referenced but not locked.
2666 	 */
2667 
2668 	vap->va_type = VDIR;
2669 	if (vp != NULL) {
2670 		error = EEXIST;
2671 		goto out;
2672 	}
2673 
2674 	/*
2675 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2676 	 * component is freed by the VOP call.  This will fill-in
2677 	 * nd.ni_vp, reference, and exclusively lock it.
2678 	 */
2679 	if (vap->va_mode == (mode_t)VNOVAL)
2680 		vap->va_mode = 0;
2681 	vn_unlock(dvp);
2682 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2683 	vrele(dvp);
2684 	dvp = NULL;
2685 
2686 	if (error == 0) {
2687 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2688 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2689 		if (error == 0)
2690 			error = VOP_GETATTR(vp, vap);
2691 	}
2692 out:
2693 	if (dirp)
2694 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2695 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2696 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2697 			      NFSX_WCCDATA(info.v3),
2698 			      &error));
2699 	if (info.v3) {
2700 		if (!error) {
2701 			nfsm_srvpostop_fh(&info, fhp);
2702 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2703 		}
2704 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2705 				 diraft_ret, &diraft);
2706 	} else {
2707 		nfsm_srvfhtom(&info, fhp);
2708 		fp = nfsm_build(&info, NFSX_V2FATTR);
2709 		nfsm_srvfattr(nfsd, vap, fp);
2710 	}
2711 	error = 0;
2712 	/* fall through */
2713 
2714 nfsmout:
2715 	*mrq = info.mreq;
2716 	nlookup_done(&nd);
2717 	if (dirp)
2718 		vrele(dirp);
2719 	if (dvp) {
2720 		if (dvp == vp)
2721 			vrele(dvp);
2722 		else
2723 			vput(dvp);
2724 	}
2725 	if (vp)
2726 		vput(vp);
2727 	return (error);
2728 }
2729 
2730 /*
2731  * nfs rmdir service
2732  */
2733 int
2734 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2735 	    struct thread *td, struct mbuf **mrq)
2736 {
2737 	struct sockaddr *nam = nfsd->nd_nam;
2738 	struct ucred *cred = &nfsd->nd_cr;
2739 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2740 	struct vnode *dirp;
2741 	struct vnode *dvp;
2742 	struct vnode *vp;
2743 	struct vattr dirfor, diraft;
2744 	nfsfh_t nfh;
2745 	fhandle_t *fhp;
2746 	struct nlookupdata nd;
2747 	struct nfsm_info info;
2748 
2749 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2750 	nlookup_zero(&nd);
2751 	dirp = NULL;
2752 	dvp = NULL;
2753 	vp = NULL;
2754 
2755 	info.mrep = nfsd->nd_mrep;
2756 	info.mreq = NULL;
2757 	info.md = nfsd->nd_md;
2758 	info.dpos = nfsd->nd_dpos;
2759 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2760 
2761 	fhp = &nfh.fh_generic;
2762 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2763 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2764 
2765 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2766 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2767 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2768 	if (dirp) {
2769 		if (info.v3)
2770 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2771 	}
2772 	if (error) {
2773 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2774 				      NFSX_WCCDATA(info.v3), &error));
2775 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2776 				 diraft_ret, &diraft);
2777 		error = 0;
2778 		goto nfsmout;
2779 	}
2780 	if (vp->v_type != VDIR) {
2781 		error = ENOTDIR;
2782 		goto out;
2783 	}
2784 
2785 	/*
2786 	 * The root of a mounted filesystem cannot be deleted.
2787 	 */
2788 	if (vp->v_flag & VROOT)
2789 		error = EBUSY;
2790 out:
2791 	/*
2792 	 * Issue or abort op.  Since SAVESTART is not set, path name
2793 	 * component is freed by the VOP after either.
2794 	 */
2795 	if (!error) {
2796 		if (dvp != vp)
2797 			vn_unlock(dvp);
2798 		vput(vp);
2799 		vp = NULL;
2800 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2801 		vrele(dvp);
2802 		dvp = NULL;
2803 	}
2804 	nlookup_done(&nd);
2805 
2806 	if (dirp)
2807 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2808 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2809 	if (info.v3) {
2810 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2811 				 diraft_ret, &diraft);
2812 		error = 0;
2813 	}
2814 	/* fall through */
2815 
2816 nfsmout:
2817 	*mrq = info.mreq;
2818 	if (dvp) {
2819 		if (dvp == vp)
2820 			vrele(dvp);
2821 		else
2822 			vput(dvp);
2823 	}
2824 	nlookup_done(&nd);
2825 	if (dirp)
2826 		vrele(dirp);
2827 	if (vp)
2828 		vput(vp);
2829 	return(error);
2830 }
2831 
2832 /*
2833  * nfs readdir service
2834  * - mallocs what it thinks is enough to read
2835  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2836  * - calls VOP_READDIR()
2837  * - loops around building the reply
2838  *	if the output generated exceeds count break out of loop
2839  *	The nfsm_clget macro is used here so that the reply will be packed
2840  *	tightly in mbuf clusters.
2841  * - it only knows that it has encountered eof when the VOP_READDIR()
2842  *	reads nothing
2843  * - as such one readdir rpc will return eof false although you are there
2844  *	and then the next will return eof
2845  * - it trims out records with d_fileno == 0
2846  *	this doesn't matter for Unix clients, but they might confuse clients
2847  *	for other os'.
2848  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2849  *	than requested, but this may not apply to all filesystems. For
2850  *	example, client NFS does not { although it is never remote mounted
2851  *	anyhow }
2852  *     The alternate call nfsrv_readdirplus() does lookups as well.
2853  * PS: The NFS protocol spec. does not clarify what the "count" byte
2854  *	argument is a count of.. just name strings and file id's or the
2855  *	entire reply rpc or ...
2856  *	I tried just file name and id sizes and it confused the Sun client,
2857  *	so I am using the full rpc size now. The "paranoia.." comment refers
2858  *	to including the status longwords that are not a part of the dir.
2859  *	"entry" structures, but are in the rpc.
2860  */
2861 struct flrep {
2862 	nfsuint64	fl_off;
2863 	u_int32_t	fl_postopok;
2864 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2865 	u_int32_t	fl_fhok;
2866 	u_int32_t	fl_fhsize;
2867 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2868 };
2869 
2870 int
2871 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2872 	      struct thread *td, struct mbuf **mrq)
2873 {
2874 	struct sockaddr *nam = nfsd->nd_nam;
2875 	struct ucred *cred = &nfsd->nd_cr;
2876 	char *bp, *be;
2877 	struct dirent *dp;
2878 	caddr_t cp;
2879 	u_int32_t *tl;
2880 	struct mbuf *mp1, *mp2;
2881 	char *cpos, *cend, *rbuf;
2882 	struct vnode *vp = NULL;
2883 	struct mount *mp = NULL;
2884 	struct vattr at;
2885 	nfsfh_t nfh;
2886 	fhandle_t *fhp;
2887 	struct uio io;
2888 	struct iovec iv;
2889 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2890 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2891 	u_quad_t off, toff, verf;
2892 	off_t *cookies = NULL, *cookiep;
2893 	struct nfsm_info info;
2894 
2895 	info.mrep = nfsd->nd_mrep;
2896 	info.mreq = NULL;
2897 	info.md = nfsd->nd_md;
2898 	info.dpos = nfsd->nd_dpos;
2899 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2900 
2901 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2902 	fhp = &nfh.fh_generic;
2903 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2904 	if (info.v3) {
2905 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2906 		toff = fxdr_hyper(tl);
2907 		tl += 2;
2908 		verf = fxdr_hyper(tl);
2909 		tl += 2;
2910 	} else {
2911 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2912 		toff = fxdr_unsigned(u_quad_t, *tl++);
2913 		verf = 0;	/* shut up gcc */
2914 	}
2915 	off = toff;
2916 	cnt = fxdr_unsigned(int, *tl);
2917 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2918 	xfer = NFS_SRVMAXDATA(nfsd);
2919 	if ((unsigned)cnt > xfer)
2920 		cnt = xfer;
2921 	if ((unsigned)siz > xfer)
2922 		siz = xfer;
2923 	fullsiz = siz;
2924 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2925 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2926 	if (!error && vp->v_type != VDIR) {
2927 		error = ENOTDIR;
2928 		vput(vp);
2929 		vp = NULL;
2930 	}
2931 	if (error) {
2932 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2933 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2934 		error = 0;
2935 		goto nfsmout;
2936 	}
2937 
2938 	/*
2939 	 * Obtain lock on vnode for this section of the code
2940 	 */
2941 
2942 	if (info.v3) {
2943 		error = getret = VOP_GETATTR(vp, &at);
2944 #if 0
2945 		/*
2946 		 * XXX This check may be too strict for Solaris 2.5 clients.
2947 		 */
2948 		if (!error && toff && verf && verf != at.va_filerev)
2949 			error = NFSERR_BAD_COOKIE;
2950 #endif
2951 	}
2952 	if (!error)
2953 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2954 	if (error) {
2955 		vput(vp);
2956 		vp = NULL;
2957 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2958 				      NFSX_POSTOPATTR(info.v3), &error));
2959 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2960 		error = 0;
2961 		goto nfsmout;
2962 	}
2963 	vn_unlock(vp);
2964 
2965 	/*
2966 	 * end section.  Allocate rbuf and continue
2967 	 */
2968 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2969 again:
2970 	iv.iov_base = rbuf;
2971 	iv.iov_len = fullsiz;
2972 	io.uio_iov = &iv;
2973 	io.uio_iovcnt = 1;
2974 	io.uio_offset = (off_t)off;
2975 	io.uio_resid = fullsiz;
2976 	io.uio_segflg = UIO_SYSSPACE;
2977 	io.uio_rw = UIO_READ;
2978 	io.uio_td = NULL;
2979 	eofflag = 0;
2980 	if (cookies) {
2981 		kfree((caddr_t)cookies, M_TEMP);
2982 		cookies = NULL;
2983 	}
2984 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2985 	off = (off_t)io.uio_offset;
2986 	if (!cookies && !error)
2987 		error = NFSERR_PERM;
2988 	if (info.v3) {
2989 		getret = VOP_GETATTR(vp, &at);
2990 		if (!error)
2991 			error = getret;
2992 	}
2993 	if (error) {
2994 		vrele(vp);
2995 		vp = NULL;
2996 		kfree((caddr_t)rbuf, M_TEMP);
2997 		if (cookies)
2998 			kfree((caddr_t)cookies, M_TEMP);
2999 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3000 				      NFSX_POSTOPATTR(info.v3), &error));
3001 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3002 		error = 0;
3003 		goto nfsmout;
3004 	}
3005 	if (io.uio_resid) {
3006 		siz -= io.uio_resid;
3007 
3008 		/*
3009 		 * If nothing read, return eof
3010 		 * rpc reply
3011 		 */
3012 		if (siz == 0) {
3013 			vrele(vp);
3014 			vp = NULL;
3015 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3016 					      NFSX_POSTOPATTR(info.v3) +
3017 					      NFSX_COOKIEVERF(info.v3) +
3018 					      2 * NFSX_UNSIGNED,
3019 					      &error));
3020 			if (info.v3) {
3021 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3022 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3023 				txdr_hyper(at.va_filerev, tl);
3024 				tl += 2;
3025 			} else
3026 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3027 			*tl++ = nfs_false;
3028 			*tl = nfs_true;
3029 			FREE((caddr_t)rbuf, M_TEMP);
3030 			FREE((caddr_t)cookies, M_TEMP);
3031 			error = 0;
3032 			goto nfsmout;
3033 		}
3034 	}
3035 
3036 	/*
3037 	 * Check for degenerate cases of nothing useful read.
3038 	 * If so go try again
3039 	 */
3040 	cpos = rbuf;
3041 	cend = rbuf + siz;
3042 	dp = (struct dirent *)cpos;
3043 	cookiep = cookies;
3044 	/*
3045 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3046 	 * directory offset up to a block boundary, so it is necessary to
3047 	 * skip over the records that preceed the requested offset. This
3048 	 * requires the assumption that file offset cookies monotonically
3049 	 * increase.
3050 	 */
3051 	while (cpos < cend && ncookies > 0 &&
3052 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3053 		 ((u_quad_t)(*cookiep)) <= toff)) {
3054 		dp = _DIRENT_NEXT(dp);
3055 		cpos = (char *)dp;
3056 		cookiep++;
3057 		ncookies--;
3058 	}
3059 	if (cpos >= cend || ncookies == 0) {
3060 		toff = off;
3061 		siz = fullsiz;
3062 		goto again;
3063 	}
3064 
3065 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3066 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3067 			      NFSX_POSTOPATTR(info.v3) +
3068 			      NFSX_COOKIEVERF(info.v3) + siz,
3069 			      &error));
3070 	if (info.v3) {
3071 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3072 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3073 		txdr_hyper(at.va_filerev, tl);
3074 	}
3075 	mp1 = mp2 = info.mb;
3076 	bp = info.bpos;
3077 	be = bp + M_TRAILINGSPACE(mp1);
3078 
3079 	/* Loop through the records and build reply */
3080 	while (cpos < cend && ncookies > 0) {
3081 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3082 			nlen = dp->d_namlen;
3083 			rem = nfsm_rndup(nlen) - nlen;
3084 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3085 			if (info.v3)
3086 				len += 2 * NFSX_UNSIGNED;
3087 			if (len > cnt) {
3088 				eofflag = 0;
3089 				break;
3090 			}
3091 			/*
3092 			 * Build the directory record xdr from
3093 			 * the dirent entry.
3094 			 */
3095 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3096 			*tl = nfs_true;
3097 			bp += NFSX_UNSIGNED;
3098 			if (info.v3) {
3099 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3100 				*tl = txdr_unsigned(dp->d_ino >> 32);
3101 				bp += NFSX_UNSIGNED;
3102 			}
3103 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3104 			*tl = txdr_unsigned(dp->d_ino);
3105 			bp += NFSX_UNSIGNED;
3106 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3107 			*tl = txdr_unsigned(nlen);
3108 			bp += NFSX_UNSIGNED;
3109 
3110 			/* And loop around copying the name */
3111 			xfer = nlen;
3112 			cp = dp->d_name;
3113 			while (xfer > 0) {
3114 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3115 				if ((bp+xfer) > be)
3116 					tsiz = be-bp;
3117 				else
3118 					tsiz = xfer;
3119 				bcopy(cp, bp, tsiz);
3120 				bp += tsiz;
3121 				xfer -= tsiz;
3122 				if (xfer > 0)
3123 					cp += tsiz;
3124 			}
3125 			/* And null pad to a int32_t boundary */
3126 			for (i = 0; i < rem; i++)
3127 				*bp++ = '\0';
3128 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3129 
3130 			/* Finish off the record */
3131 			if (info.v3) {
3132 				*tl = txdr_unsigned(*cookiep >> 32);
3133 				bp += NFSX_UNSIGNED;
3134 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3135 			}
3136 			*tl = txdr_unsigned(*cookiep);
3137 			bp += NFSX_UNSIGNED;
3138 		}
3139 		dp = _DIRENT_NEXT(dp);
3140 		cpos = (char *)dp;
3141 		cookiep++;
3142 		ncookies--;
3143 	}
3144 	vrele(vp);
3145 	vp = NULL;
3146 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3147 	*tl = nfs_false;
3148 	bp += NFSX_UNSIGNED;
3149 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3150 	if (eofflag)
3151 		*tl = nfs_true;
3152 	else
3153 		*tl = nfs_false;
3154 	bp += NFSX_UNSIGNED;
3155 	if (mp1 != info.mb) {
3156 		if (bp < be)
3157 			mp1->m_len = bp - mtod(mp1, caddr_t);
3158 	} else
3159 		mp1->m_len += bp - info.bpos;
3160 	FREE((caddr_t)rbuf, M_TEMP);
3161 	FREE((caddr_t)cookies, M_TEMP);
3162 
3163 nfsmout:
3164 	*mrq = info.mreq;
3165 	if (vp)
3166 		vrele(vp);
3167 	return(error);
3168 }
3169 
3170 int
3171 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3172 		  struct thread *td, struct mbuf **mrq)
3173 {
3174 	struct sockaddr *nam = nfsd->nd_nam;
3175 	struct ucred *cred = &nfsd->nd_cr;
3176 	char *bp, *be;
3177 	struct dirent *dp;
3178 	caddr_t cp;
3179 	u_int32_t *tl;
3180 	struct mbuf *mp1, *mp2;
3181 	char *cpos, *cend, *rbuf;
3182 	struct vnode *vp = NULL, *nvp;
3183 	struct mount *mp = NULL;
3184 	struct flrep fl;
3185 	nfsfh_t nfh;
3186 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3187 	struct uio io;
3188 	struct iovec iv;
3189 	struct vattr va, at, *vap = &va;
3190 	struct nfs_fattr *fp;
3191 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3192 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3193 	u_quad_t off, toff, verf;
3194 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3195 	struct nfsm_info info;
3196 
3197 	info.mrep = nfsd->nd_mrep;
3198 	info.mreq = NULL;
3199 	info.md = nfsd->nd_md;
3200 	info.dpos = nfsd->nd_dpos;
3201 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3202 
3203 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3204 	fhp = &nfh.fh_generic;
3205 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3206 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3207 	toff = fxdr_hyper(tl);
3208 	tl += 2;
3209 	verf = fxdr_hyper(tl);
3210 	tl += 2;
3211 	siz = fxdr_unsigned(int, *tl++);
3212 	cnt = fxdr_unsigned(int, *tl);
3213 	off = toff;
3214 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3215 	xfer = NFS_SRVMAXDATA(nfsd);
3216 	if ((unsigned)cnt > xfer)
3217 		cnt = xfer;
3218 	if ((unsigned)siz > xfer)
3219 		siz = xfer;
3220 	fullsiz = siz;
3221 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3222 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3223 	if (!error && vp->v_type != VDIR) {
3224 		error = ENOTDIR;
3225 		vput(vp);
3226 		vp = NULL;
3227 	}
3228 	if (error) {
3229 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3230 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3231 		error = 0;
3232 		goto nfsmout;
3233 	}
3234 	error = getret = VOP_GETATTR(vp, &at);
3235 #if 0
3236 	/*
3237 	 * XXX This check may be too strict for Solaris 2.5 clients.
3238 	 */
3239 	if (!error && toff && verf && verf != at.va_filerev)
3240 		error = NFSERR_BAD_COOKIE;
3241 #endif
3242 	if (!error) {
3243 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3244 	}
3245 	if (error) {
3246 		vput(vp);
3247 		vp = NULL;
3248 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3249 				      NFSX_V3POSTOPATTR, &error));
3250 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3251 		error = 0;
3252 		goto nfsmout;
3253 	}
3254 	vn_unlock(vp);
3255 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3256 again:
3257 	iv.iov_base = rbuf;
3258 	iv.iov_len = fullsiz;
3259 	io.uio_iov = &iv;
3260 	io.uio_iovcnt = 1;
3261 	io.uio_offset = (off_t)off;
3262 	io.uio_resid = fullsiz;
3263 	io.uio_segflg = UIO_SYSSPACE;
3264 	io.uio_rw = UIO_READ;
3265 	io.uio_td = NULL;
3266 	eofflag = 0;
3267 	if (cookies) {
3268 		kfree((caddr_t)cookies, M_TEMP);
3269 		cookies = NULL;
3270 	}
3271 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3272 	off = (u_quad_t)io.uio_offset;
3273 	getret = VOP_GETATTR(vp, &at);
3274 	if (!cookies && !error)
3275 		error = NFSERR_PERM;
3276 	if (!error)
3277 		error = getret;
3278 	if (error) {
3279 		vrele(vp);
3280 		vp = NULL;
3281 		if (cookies)
3282 			kfree((caddr_t)cookies, M_TEMP);
3283 		kfree((caddr_t)rbuf, M_TEMP);
3284 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3285 				      NFSX_V3POSTOPATTR, &error));
3286 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3287 		error = 0;
3288 		goto nfsmout;
3289 	}
3290 	if (io.uio_resid) {
3291 		siz -= io.uio_resid;
3292 
3293 		/*
3294 		 * If nothing read, return eof
3295 		 * rpc reply
3296 		 */
3297 		if (siz == 0) {
3298 			vrele(vp);
3299 			vp = NULL;
3300 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3301 					      NFSX_V3POSTOPATTR +
3302 					      NFSX_V3COOKIEVERF +
3303 					      2 * NFSX_UNSIGNED,
3304 					      &error));
3305 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3306 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3307 			txdr_hyper(at.va_filerev, tl);
3308 			tl += 2;
3309 			*tl++ = nfs_false;
3310 			*tl = nfs_true;
3311 			FREE((caddr_t)cookies, M_TEMP);
3312 			FREE((caddr_t)rbuf, M_TEMP);
3313 			error = 0;
3314 			goto nfsmout;
3315 		}
3316 	}
3317 
3318 	/*
3319 	 * Check for degenerate cases of nothing useful read.
3320 	 * If so go try again
3321 	 */
3322 	cpos = rbuf;
3323 	cend = rbuf + siz;
3324 	dp = (struct dirent *)cpos;
3325 	cookiep = cookies;
3326 	/*
3327 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3328 	 * directory offset up to a block boundary, so it is necessary to
3329 	 * skip over the records that preceed the requested offset. This
3330 	 * requires the assumption that file offset cookies monotonically
3331 	 * increase.
3332 	 */
3333 	while (cpos < cend && ncookies > 0 &&
3334 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3335 		 ((u_quad_t)(*cookiep)) <= toff)) {
3336 		dp = _DIRENT_NEXT(dp);
3337 		cpos = (char *)dp;
3338 		cookiep++;
3339 		ncookies--;
3340 	}
3341 	if (cpos >= cend || ncookies == 0) {
3342 		toff = off;
3343 		siz = fullsiz;
3344 		goto again;
3345 	}
3346 
3347 	/*
3348 	 * Probe one of the directory entries to see if the filesystem
3349 	 * supports VGET.
3350 	 */
3351 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3352 		error = NFSERR_NOTSUPP;
3353 		vrele(vp);
3354 		vp = NULL;
3355 		kfree((caddr_t)cookies, M_TEMP);
3356 		kfree((caddr_t)rbuf, M_TEMP);
3357 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3358 				      NFSX_V3POSTOPATTR, &error));
3359 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3360 		error = 0;
3361 		goto nfsmout;
3362 	}
3363 	if (nvp) {
3364 		vput(nvp);
3365 		nvp = NULL;
3366 	}
3367 
3368 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3369 			2 * NFSX_UNSIGNED;
3370 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3371 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3372 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3373 	txdr_hyper(at.va_filerev, tl);
3374 	mp1 = mp2 = info.mb;
3375 	bp = info.bpos;
3376 	be = bp + M_TRAILINGSPACE(mp1);
3377 
3378 	/* Loop through the records and build reply */
3379 	while (cpos < cend && ncookies > 0) {
3380 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3381 			nlen = dp->d_namlen;
3382 			rem = nfsm_rndup(nlen) - nlen;
3383 
3384 			/*
3385 			 * For readdir_and_lookup get the vnode using
3386 			 * the file number.
3387 			 */
3388 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3389 				goto invalid;
3390 			bzero((caddr_t)nfhp, NFSX_V3FH);
3391 			nfhp->fh_fsid = fhp->fh_fsid;
3392 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3393 				vput(nvp);
3394 				nvp = NULL;
3395 				goto invalid;
3396 			}
3397 			if (VOP_GETATTR(nvp, vap)) {
3398 				vput(nvp);
3399 				nvp = NULL;
3400 				goto invalid;
3401 			}
3402 			vput(nvp);
3403 			nvp = NULL;
3404 
3405 			/*
3406 			 * If either the dircount or maxcount will be
3407 			 * exceeded, get out now. Both of these lengths
3408 			 * are calculated conservatively, including all
3409 			 * XDR overheads.
3410 			 */
3411 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3412 				NFSX_V3POSTOPATTR);
3413 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3414 			if (len > cnt || dirlen > fullsiz) {
3415 				eofflag = 0;
3416 				break;
3417 			}
3418 
3419 			/*
3420 			 * Build the directory record xdr from
3421 			 * the dirent entry.
3422 			 */
3423 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3424 			nfsm_srvfattr(nfsd, vap, fp);
3425 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3426 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3427 			fl.fl_postopok = nfs_true;
3428 			fl.fl_fhok = nfs_true;
3429 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3430 
3431 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3432 			*tl = nfs_true;
3433 			bp += NFSX_UNSIGNED;
3434 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3435 			*tl = txdr_unsigned(dp->d_ino >> 32);
3436 			bp += NFSX_UNSIGNED;
3437 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3438 			*tl = txdr_unsigned(dp->d_ino);
3439 			bp += NFSX_UNSIGNED;
3440 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3441 			*tl = txdr_unsigned(nlen);
3442 			bp += NFSX_UNSIGNED;
3443 
3444 			/* And loop around copying the name */
3445 			xfer = nlen;
3446 			cp = dp->d_name;
3447 			while (xfer > 0) {
3448 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3449 				if ((bp + xfer) > be)
3450 					tsiz = be - bp;
3451 				else
3452 					tsiz = xfer;
3453 				bcopy(cp, bp, tsiz);
3454 				bp += tsiz;
3455 				xfer -= tsiz;
3456 				cp += tsiz;
3457 			}
3458 			/* And null pad to a int32_t boundary */
3459 			for (i = 0; i < rem; i++)
3460 				*bp++ = '\0';
3461 
3462 			/*
3463 			 * Now copy the flrep structure out.
3464 			 */
3465 			xfer = sizeof (struct flrep);
3466 			cp = (caddr_t)&fl;
3467 			while (xfer > 0) {
3468 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3469 				if ((bp + xfer) > be)
3470 					tsiz = be - bp;
3471 				else
3472 					tsiz = xfer;
3473 				bcopy(cp, bp, tsiz);
3474 				bp += tsiz;
3475 				xfer -= tsiz;
3476 				cp += tsiz;
3477 			}
3478 		}
3479 invalid:
3480 		dp = _DIRENT_NEXT(dp);
3481 		cpos = (char *)dp;
3482 		cookiep++;
3483 		ncookies--;
3484 	}
3485 	vrele(vp);
3486 	vp = NULL;
3487 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3488 	*tl = nfs_false;
3489 	bp += NFSX_UNSIGNED;
3490 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3491 	if (eofflag)
3492 		*tl = nfs_true;
3493 	else
3494 		*tl = nfs_false;
3495 	bp += NFSX_UNSIGNED;
3496 	if (mp1 != info.mb) {
3497 		if (bp < be)
3498 			mp1->m_len = bp - mtod(mp1, caddr_t);
3499 	} else
3500 		mp1->m_len += bp - info.bpos;
3501 	FREE((caddr_t)cookies, M_TEMP);
3502 	FREE((caddr_t)rbuf, M_TEMP);
3503 nfsmout:
3504 	*mrq = info.mreq;
3505 	if (vp)
3506 		vrele(vp);
3507 	return(error);
3508 }
3509 
3510 /*
3511  * nfs commit service
3512  */
3513 int
3514 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3515 	     struct thread *td, struct mbuf **mrq)
3516 {
3517 	struct sockaddr *nam = nfsd->nd_nam;
3518 	struct ucred *cred = &nfsd->nd_cr;
3519 	struct vattr bfor, aft;
3520 	struct vnode *vp = NULL;
3521 	struct mount *mp = NULL;
3522 	nfsfh_t nfh;
3523 	fhandle_t *fhp;
3524 	u_int32_t *tl;
3525 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3526 	u_quad_t off;
3527 	struct nfsm_info info;
3528 
3529 	info.mrep = nfsd->nd_mrep;
3530 	info.mreq = NULL;
3531 	info.md = nfsd->nd_md;
3532 	info.dpos = nfsd->nd_dpos;
3533 
3534 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3535 	fhp = &nfh.fh_generic;
3536 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3537 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3538 
3539 	/*
3540 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3541 	 * count parameters, so these arguments are useless (someday maybe).
3542 	 */
3543 	off = fxdr_hyper(tl);
3544 	tl += 2;
3545 	cnt = fxdr_unsigned(int, *tl);
3546 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3547 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3548 	if (error) {
3549 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3550 				      2 * NFSX_UNSIGNED, &error));
3551 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3552 				 aft_ret, &aft);
3553 		error = 0;
3554 		goto nfsmout;
3555 	}
3556 	for_ret = VOP_GETATTR(vp, &bfor);
3557 
3558 	if (cnt > MAX_COMMIT_COUNT) {
3559 		/*
3560 		 * Give up and do the whole thing
3561 		 */
3562 		if (vp->v_object &&
3563 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3564 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3565 		}
3566 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3567 	} else {
3568 		/*
3569 		 * Locate and synchronously write any buffers that fall
3570 		 * into the requested range.  Note:  we are assuming that
3571 		 * f_iosize is a power of 2.
3572 		 */
3573 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3574 		int iomask = iosize - 1;
3575 		off_t loffset;
3576 
3577 		/*
3578 		 * Align to iosize boundry, super-align to page boundry.
3579 		 */
3580 		if (off & iomask) {
3581 			cnt += off & iomask;
3582 			off &= ~(u_quad_t)iomask;
3583 		}
3584 		if (off & PAGE_MASK) {
3585 			cnt += off & PAGE_MASK;
3586 			off &= ~(u_quad_t)PAGE_MASK;
3587 		}
3588 		loffset = off;
3589 
3590 		if (vp->v_object &&
3591 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3592 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3593 			    (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3594 		}
3595 
3596 		crit_enter();
3597 		while (cnt > 0) {
3598 			struct buf *bp;
3599 
3600 			/*
3601 			 * If we have a buffer and it is marked B_DELWRI we
3602 			 * have to lock and write it.  Otherwise the prior
3603 			 * write is assumed to have already been committed.
3604 			 *
3605 			 * WARNING: FINDBLK_TEST buffers represent stable
3606 			 *	    storage but not necessarily stable
3607 			 *	    content.  It is ok in this case.
3608 			 */
3609 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3610 				if (bp->b_flags & B_DELWRI)
3611 					bp = findblk(vp, loffset, 0);
3612 				else
3613 					bp = NULL;
3614 			}
3615 			if (bp) {
3616 				if (bp->b_flags & B_DELWRI) {
3617 					bremfree(bp);
3618 					bwrite(bp);
3619 					++nfs_commit_miss;
3620 				} else {
3621 					BUF_UNLOCK(bp);
3622 				}
3623 			}
3624 			++nfs_commit_blks;
3625 			if (cnt < iosize)
3626 				break;
3627 			cnt -= iosize;
3628 			loffset += iosize;
3629 		}
3630 		crit_exit();
3631 	}
3632 
3633 	aft_ret = VOP_GETATTR(vp, &aft);
3634 	vput(vp);
3635 	vp = NULL;
3636 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3637 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3638 			      &error));
3639 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3640 			 aft_ret, &aft);
3641 	if (!error) {
3642 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3643 		if (nfsver.tv_sec == 0)
3644 			nfsver = boottime;
3645 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3646 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3647 	} else {
3648 		error = 0;
3649 	}
3650 nfsmout:
3651 	*mrq = info.mreq;
3652 	if (vp)
3653 		vput(vp);
3654 	return(error);
3655 }
3656 
3657 /*
3658  * nfs statfs service
3659  */
3660 int
3661 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3662 	     struct thread *td, struct mbuf **mrq)
3663 {
3664 	struct sockaddr *nam = nfsd->nd_nam;
3665 	struct ucred *cred = &nfsd->nd_cr;
3666 	struct statfs *sf;
3667 	struct nfs_statfs *sfp;
3668 	int error = 0, rdonly, getret = 1;
3669 	struct vnode *vp = NULL;
3670 	struct mount *mp = NULL;
3671 	struct vattr at;
3672 	nfsfh_t nfh;
3673 	fhandle_t *fhp;
3674 	struct statfs statfs;
3675 	u_quad_t tval;
3676 	struct nfsm_info info;
3677 
3678 	info.mrep = nfsd->nd_mrep;
3679 	info.mreq = NULL;
3680 	info.md = nfsd->nd_md;
3681 	info.dpos = nfsd->nd_dpos;
3682 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3683 
3684 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3685 	fhp = &nfh.fh_generic;
3686 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3687 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3688 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3689 	if (error) {
3690 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3691 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3692 		error = 0;
3693 		goto nfsmout;
3694 	}
3695 	sf = &statfs;
3696 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3697 	getret = VOP_GETATTR(vp, &at);
3698 	vput(vp);
3699 	vp = NULL;
3700 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3701 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3702 			      &error));
3703 	if (info.v3)
3704 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3705 	if (error) {
3706 		error = 0;
3707 		goto nfsmout;
3708 	}
3709 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3710 	if (info.v3) {
3711 		tval = (u_quad_t)sf->f_blocks;
3712 		tval *= (u_quad_t)sf->f_bsize;
3713 		txdr_hyper(tval, &sfp->sf_tbytes);
3714 		tval = (u_quad_t)sf->f_bfree;
3715 		tval *= (u_quad_t)sf->f_bsize;
3716 		txdr_hyper(tval, &sfp->sf_fbytes);
3717 		tval = (u_quad_t)sf->f_bavail;
3718 		tval *= (u_quad_t)sf->f_bsize;
3719 		txdr_hyper(tval, &sfp->sf_abytes);
3720 		sfp->sf_tfiles.nfsuquad[0] = 0;
3721 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3722 		sfp->sf_ffiles.nfsuquad[0] = 0;
3723 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3724 		sfp->sf_afiles.nfsuquad[0] = 0;
3725 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3726 		sfp->sf_invarsec = 0;
3727 	} else {
3728 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3729 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3730 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3731 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3732 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3733 	}
3734 nfsmout:
3735 	*mrq = info.mreq;
3736 	if (vp)
3737 		vput(vp);
3738 	return(error);
3739 }
3740 
3741 /*
3742  * nfs fsinfo service
3743  */
3744 int
3745 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3746 	     struct thread *td, struct mbuf **mrq)
3747 {
3748 	struct sockaddr *nam = nfsd->nd_nam;
3749 	struct ucred *cred = &nfsd->nd_cr;
3750 	struct nfsv3_fsinfo *sip;
3751 	int error = 0, rdonly, getret = 1, pref;
3752 	struct vnode *vp = NULL;
3753 	struct mount *mp = NULL;
3754 	struct vattr at;
3755 	nfsfh_t nfh;
3756 	fhandle_t *fhp;
3757 	u_quad_t maxfsize;
3758 	struct statfs sb;
3759 	struct nfsm_info info;
3760 
3761 	info.mrep = nfsd->nd_mrep;
3762 	info.mreq = NULL;
3763 	info.md = nfsd->nd_md;
3764 	info.dpos = nfsd->nd_dpos;
3765 
3766 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3767 	fhp = &nfh.fh_generic;
3768 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3769 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3770 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3771 	if (error) {
3772 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3773 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3774 		error = 0;
3775 		goto nfsmout;
3776 	}
3777 
3778 	/* XXX Try to make a guess on the max file size. */
3779 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3780 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3781 
3782 	getret = VOP_GETATTR(vp, &at);
3783 	vput(vp);
3784 	vp = NULL;
3785 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3786 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3787 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3788 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3789 
3790 	/*
3791 	 * XXX
3792 	 * There should be file system VFS OP(s) to get this information.
3793 	 * For now, assume ufs.
3794 	 */
3795 	if (slp->ns_so->so_type == SOCK_DGRAM)
3796 		pref = NFS_MAXDGRAMDATA;
3797 	else
3798 		pref = NFS_MAXDATA;
3799 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3800 	sip->fs_rtpref = txdr_unsigned(pref);
3801 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3802 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3803 	sip->fs_wtpref = txdr_unsigned(pref);
3804 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3805 	sip->fs_dtpref = txdr_unsigned(pref);
3806 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3807 	sip->fs_timedelta.nfsv3_sec = 0;
3808 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3809 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3810 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3811 		NFSV3FSINFO_CANSETTIME);
3812 nfsmout:
3813 	*mrq = info.mreq;
3814 	if (vp)
3815 		vput(vp);
3816 	return(error);
3817 }
3818 
3819 /*
3820  * nfs pathconf service
3821  */
3822 int
3823 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3824 	       struct thread *td, struct mbuf **mrq)
3825 {
3826 	struct sockaddr *nam = nfsd->nd_nam;
3827 	struct ucred *cred = &nfsd->nd_cr;
3828 	struct nfsv3_pathconf *pc;
3829 	int error = 0, rdonly, getret = 1;
3830 	register_t linkmax, namemax, chownres, notrunc;
3831 	struct vnode *vp = NULL;
3832 	struct mount *mp = NULL;
3833 	struct vattr at;
3834 	nfsfh_t nfh;
3835 	fhandle_t *fhp;
3836 	struct nfsm_info info;
3837 
3838 	info.mrep = nfsd->nd_mrep;
3839 	info.mreq = NULL;
3840 	info.md = nfsd->nd_md;
3841 	info.dpos = nfsd->nd_dpos;
3842 
3843 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3844 	fhp = &nfh.fh_generic;
3845 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3846 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3847 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3848 	if (error) {
3849 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3850 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3851 		error = 0;
3852 		goto nfsmout;
3853 	}
3854 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3855 	if (!error)
3856 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3857 	if (!error)
3858 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3859 	if (!error)
3860 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3861 	getret = VOP_GETATTR(vp, &at);
3862 	vput(vp);
3863 	vp = NULL;
3864 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3865 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3866 			      &error));
3867 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3868 	if (error) {
3869 		error = 0;
3870 		goto nfsmout;
3871 	}
3872 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3873 
3874 	pc->pc_linkmax = txdr_unsigned(linkmax);
3875 	pc->pc_namemax = txdr_unsigned(namemax);
3876 	pc->pc_notrunc = txdr_unsigned(notrunc);
3877 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3878 
3879 	/*
3880 	 * These should probably be supported by VOP_PATHCONF(), but
3881 	 * until msdosfs is exportable (why would you want to?), the
3882 	 * Unix defaults should be ok.
3883 	 */
3884 	pc->pc_caseinsensitive = nfs_false;
3885 	pc->pc_casepreserving = nfs_true;
3886 nfsmout:
3887 	*mrq = info.mreq;
3888 	if (vp)
3889 		vput(vp);
3890 	return(error);
3891 }
3892 
3893 /*
3894  * Null operation, used by clients to ping server
3895  */
3896 /* ARGSUSED */
3897 int
3898 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3899 	   struct thread *td, struct mbuf **mrq)
3900 {
3901 	struct nfsm_info info;
3902 	int error = NFSERR_RETVOID;
3903 
3904 	info.mrep = nfsd->nd_mrep;
3905 	info.mreq = NULL;
3906 
3907 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3908 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3909 nfsmout:
3910 	*mrq = info.mreq;
3911 	return (error);
3912 }
3913 
3914 /*
3915  * No operation, used for obsolete procedures
3916  */
3917 /* ARGSUSED */
3918 int
3919 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3920 	   struct thread *td, struct mbuf **mrq)
3921 {
3922 	struct nfsm_info info;
3923 	int error;
3924 
3925 	info.mrep = nfsd->nd_mrep;
3926 	info.mreq = NULL;
3927 
3928 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3929 	if (nfsd->nd_repstat)
3930 		error = nfsd->nd_repstat;
3931 	else
3932 		error = EPROCUNAVAIL;
3933 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3934 	error = 0;
3935 nfsmout:
3936 	*mrq = info.mreq;
3937 	return (error);
3938 }
3939 
3940 /*
3941  * Perform access checking for vnodes obtained from file handles that would
3942  * refer to files already opened by a Unix client. You cannot just use
3943  * vn_writechk() and VOP_ACCESS() for two reasons.
3944  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3945  * 2 - The owner is to be given access irrespective of mode bits for some
3946  *     operations, so that processes that chmod after opening a file don't
3947  *     break. I don't like this because it opens a security hole, but since
3948  *     the nfs server opens a security hole the size of a barn door anyhow,
3949  *     what the heck.
3950  *
3951  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3952  * will return EPERM instead of EACCESS. EPERM is always an error.
3953  */
3954 static int
3955 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3956 	     int rdonly, struct thread *td, int override)
3957 {
3958 	struct vattr vattr;
3959 	int error;
3960 
3961 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3962 	if (flags & VWRITE) {
3963 		/* Just vn_writechk() changed to check rdonly */
3964 		/*
3965 		 * Disallow write attempts on read-only file systems;
3966 		 * unless the file is a socket or a block or character
3967 		 * device resident on the file system.
3968 		 */
3969 		if (rdonly ||
3970 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3971 			switch (vp->v_type) {
3972 			case VREG:
3973 			case VDIR:
3974 			case VLNK:
3975 				return (EROFS);
3976 			default:
3977 				break;
3978 			}
3979 		}
3980 		/*
3981 		 * If there's shared text associated with
3982 		 * the inode, we can't allow writing.
3983 		 */
3984 		if (vp->v_flag & VTEXT)
3985 			return (ETXTBSY);
3986 	}
3987 	error = VOP_GETATTR(vp, &vattr);
3988 	if (error)
3989 		return (error);
3990 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
3991 	/*
3992 	 * Allow certain operations for the owner (reads and writes
3993 	 * on files that are already open).
3994 	 */
3995 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3996 		error = 0;
3997 	return error;
3998 }
3999 #endif /* NFS_NOSERVER */
4000 
4001