xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision bcb3e04d)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84 
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89 
90 #include <sys/buf2.h>
91 
92 #include <sys/thread2.h>
93 
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99 
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)	kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105 
106 #define MAX_COMMIT_COUNT	(1024 * 1024)
107 
108 #define NUM_HEURISTIC		1017
109 #define NHUSE_INIT		64
110 #define NHUSE_INC		16
111 #define NHUSE_MAX		2048
112 
113 static struct nfsheur {
114     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
115     off_t nh_nextr;		/* next offset for sequential detection */
116     int nh_use;			/* use count for selection */
117     int nh_seqcount;		/* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119 
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121 		      NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124 		      NFCHR, NFNON };
125 
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128 
129 static struct timespec	nfsver;
130 
131 SYSCTL_DECL(_vfs_nfs);
132 
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139 
140 static int nfsrv_access (struct mount *, struct vnode *, int,
141 			struct ucred *, int, struct thread *, int);
142 static void nfsrvw_coalesce (struct nfsrv_descript *,
143 		struct nfsrv_descript *);
144 
145 /*
146  * nfs v3 access service
147  */
148 int
149 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
150 	      struct thread *td, struct mbuf **mrq)
151 {
152 	struct sockaddr *nam = nfsd->nd_nam;
153 	struct ucred *cred = &nfsd->nd_cr;
154 	struct vnode *vp = NULL;
155 	struct mount *mp = NULL;
156 	nfsfh_t nfh;
157 	fhandle_t *fhp;
158 	int error = 0, rdonly, getret;
159 	struct vattr vattr, *vap = &vattr;
160 	u_long testmode, nfsmode;
161 	struct nfsm_info info;
162 	u_int32_t *tl;
163 
164 	info.dpos = nfsd->nd_dpos;
165 	info.md = nfsd->nd_md;
166 	info.mrep = nfsd->nd_mrep;
167 	info.mreq = NULL;
168 
169 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
170 	fhp = &nfh.fh_generic;
171 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
172 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
173 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
174 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
175 	if (error) {
176 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
177 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
178 		error = 0;
179 		goto nfsmout;
180 	}
181 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
182 	if ((nfsmode & NFSV3ACCESS_READ) &&
183 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
184 		nfsmode &= ~NFSV3ACCESS_READ;
185 	if (vp->v_type == VDIR)
186 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
187 			NFSV3ACCESS_DELETE);
188 	else
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190 	if ((nfsmode & testmode) &&
191 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
192 		nfsmode &= ~testmode;
193 	if (vp->v_type == VDIR)
194 		testmode = NFSV3ACCESS_LOOKUP;
195 	else
196 		testmode = NFSV3ACCESS_EXECUTE;
197 	if ((nfsmode & testmode) &&
198 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
199 		nfsmode &= ~testmode;
200 	getret = VOP_GETATTR(vp, vap);
201 	vput(vp);
202 	vp = NULL;
203 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
204 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
205 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
206 	tl = nfsm_build(&info, NFSX_UNSIGNED);
207 	*tl = txdr_unsigned(nfsmode);
208 nfsmout:
209 	*mrq = info.mreq;
210 	if (vp)
211 		vput(vp);
212 	return(error);
213 }
214 
215 /*
216  * nfs getattr service
217  */
218 int
219 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
220 	      struct thread *td, struct mbuf **mrq)
221 {
222 	struct sockaddr *nam = nfsd->nd_nam;
223 	struct ucred *cred = &nfsd->nd_cr;
224 	struct nfs_fattr *fp;
225 	struct vattr va;
226 	struct vattr *vap = &va;
227 	struct vnode *vp = NULL;
228 	struct mount *mp = NULL;
229 	nfsfh_t nfh;
230 	fhandle_t *fhp;
231 	int error = 0, rdonly;
232 	struct nfsm_info info;
233 
234 	info.mrep = nfsd->nd_mrep;
235 	info.md = nfsd->nd_md;
236 	info.dpos = nfsd->nd_dpos;
237 	info.mreq = NULL;
238 
239 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
240 	fhp = &nfh.fh_generic;
241 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
242 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
243 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
244 	if (error) {
245 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
246 		error = 0;
247 		goto nfsmout;
248 	}
249 	error = VOP_GETATTR(vp, vap);
250 	vput(vp);
251 	vp = NULL;
252 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
253 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
254 	if (error) {
255 		error = 0;
256 		goto nfsmout;
257 	}
258 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259 	nfsm_srvfattr(nfsd, vap, fp);
260 	/* fall through */
261 
262 nfsmout:
263 	*mrq = info.mreq;
264 	if (vp)
265 		vput(vp);
266 	return(error);
267 }
268 
269 /*
270  * nfs setattr service
271  */
272 int
273 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
274 	      struct thread *td, struct mbuf **mrq)
275 {
276 	struct sockaddr *nam = nfsd->nd_nam;
277 	struct ucred *cred = &nfsd->nd_cr;
278 	struct vattr va, preat;
279 	struct vattr *vap = &va;
280 	struct nfsv2_sattr *sp;
281 	struct nfs_fattr *fp;
282 	struct vnode *vp = NULL;
283 	struct mount *mp = NULL;
284 	nfsfh_t nfh;
285 	fhandle_t *fhp;
286 	u_int32_t *tl;
287 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
288 	int gcheck = 0;
289 	struct timespec guard;
290 	struct nfsm_info info;
291 
292 	info.mrep = nfsd->nd_mrep;
293 	info.mreq = NULL;
294 	info.md = nfsd->nd_md;
295 	info.dpos = nfsd->nd_dpos;
296 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
297 
298 	guard.tv_sec = 0;	/* fix compiler warning */
299 	guard.tv_nsec = 0;
300 
301 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
302 	fhp = &nfh.fh_generic;
303 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
304 	VATTR_NULL(vap);
305 	if (info.v3) {
306 		ERROROUT(nfsm_srvsattr(&info, vap));
307 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
308 		gcheck = fxdr_unsigned(int, *tl);
309 		if (gcheck) {
310 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
311 			fxdr_nfsv3time(tl, &guard);
312 		}
313 	} else {
314 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
315 		/*
316 		 * Nah nah nah nah na nah
317 		 * There is a bug in the Sun client that puts 0xffff in the mode
318 		 * field of sattr when it should put in 0xffffffff. The u_short
319 		 * doesn't sign extend.
320 		 * --> check the low order 2 bytes for 0xffff
321 		 */
322 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
323 			vap->va_mode = nfstov_mode(sp->sa_mode);
324 		if (sp->sa_uid != nfs_xdrneg1)
325 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
326 		if (sp->sa_gid != nfs_xdrneg1)
327 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
328 		if (sp->sa_size != nfs_xdrneg1)
329 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
330 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
331 #ifdef notyet
332 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
333 #else
334 			vap->va_atime.tv_sec =
335 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
336 			vap->va_atime.tv_nsec = 0;
337 #endif
338 		}
339 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
340 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
341 
342 	}
343 
344 	/*
345 	 * Now that we have all the fields, lets do it.
346 	 */
347 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
348 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
349 	if (error) {
350 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
351 				      2 * NFSX_UNSIGNED, &error));
352 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
353 				 postat_ret, vap);
354 		error = 0;
355 		goto nfsmout;
356 	}
357 
358 	/*
359 	 * vp now an active resource, pay careful attention to cleanup
360 	 */
361 
362 	if (info.v3) {
363 		error = preat_ret = VOP_GETATTR(vp, &preat);
364 		if (!error && gcheck &&
365 			(preat.va_ctime.tv_sec != guard.tv_sec ||
366 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
367 			error = NFSERR_NOT_SYNC;
368 		if (error) {
369 			vput(vp);
370 			vp = NULL;
371 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
372 					      NFSX_WCCDATA(info.v3), &error));
373 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
374 					 postat_ret, vap);
375 			error = 0;
376 			goto nfsmout;
377 		}
378 	}
379 
380 	/*
381 	 * If the size is being changed write acces is required, otherwise
382 	 * just check for a read only file system.
383 	 */
384 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
385 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
386 			error = EROFS;
387 			goto out;
388 		}
389 	} else {
390 		if (vp->v_type == VDIR) {
391 			error = EISDIR;
392 			goto out;
393 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
394 			    td, 0)) != 0){
395 			goto out;
396 		}
397 	}
398 	error = VOP_SETATTR(vp, vap, cred);
399 	postat_ret = VOP_GETATTR(vp, vap);
400 	if (!error)
401 		error = postat_ret;
402 out:
403 	vput(vp);
404 	vp = NULL;
405 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
406 		   NFSX_WCCORFATTR(info.v3), &error));
407 	if (info.v3) {
408 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
409 				 postat_ret, vap);
410 		error = 0;
411 		goto nfsmout;
412 	} else {
413 		fp = nfsm_build(&info, NFSX_V2FATTR);
414 		nfsm_srvfattr(nfsd, vap, fp);
415 	}
416 	/* fall through */
417 
418 nfsmout:
419 	*mrq = info.mreq;
420 	if (vp)
421 		vput(vp);
422 	return(error);
423 }
424 
425 /*
426  * nfs lookup rpc
427  */
428 int
429 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
430 	     struct thread *td, struct mbuf **mrq)
431 {
432 	struct sockaddr *nam = nfsd->nd_nam;
433 	struct ucred *cred = &nfsd->nd_cr;
434 	struct nfs_fattr *fp;
435 	struct nlookupdata nd;
436 	struct vnode *vp;
437 	struct vnode *dirp;
438 	struct nchandle nch;
439 	nfsfh_t nfh;
440 	fhandle_t *fhp;
441 	int error = 0, len, dirattr_ret = 1;
442 	int pubflag;
443 	struct vattr va, dirattr, *vap = &va;
444 	struct nfsm_info info;
445 
446 	info.mrep = nfsd->nd_mrep;
447 	info.mreq = NULL;
448 	info.md = nfsd->nd_md;
449 	info.dpos = nfsd->nd_dpos;
450 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
451 
452 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
453 	nlookup_zero(&nd);
454 	dirp = NULL;
455 	vp = NULL;
456 
457 	fhp = &nfh.fh_generic;
458 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
459 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
460 
461 	pubflag = nfs_ispublicfh(fhp);
462 
463 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
464 		fhp, len, slp, nam, &info.md, &info.dpos,
465 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
466 
467 	/*
468 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
469 	 * structure in case macros jump to nfsmout.
470 	 */
471 
472 	if (error) {
473 		if (dirp) {
474 			if (info.v3)
475 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
476 			vrele(dirp);
477 			dirp = NULL;
478 		}
479 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
480 				      NFSX_POSTOPATTR(info.v3), &error));
481 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
482 		error = 0;
483 		goto nfsmout;
484 	}
485 
486 	/*
487 	 * Locate index file for public filehandle
488 	 *
489 	 * error is 0 on entry and 0 on exit from this block.
490 	 */
491 
492 	if (pubflag) {
493 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
494 			/*
495 			 * Setup call to lookup() to see if we can find
496 			 * the index file. Arguably, this doesn't belong
497 			 * in a kernel.. Ugh.  If an error occurs, do not
498 			 * try to install an index file and then clear the
499 			 * error.
500 			 *
501 			 * When we replace nd with ind and redirect ndp,
502 			 * maintenance of ni_startdir and ni_vp shift to
503 			 * ind and we have to clean them up in the old nd.
504 			 * However, the cnd resource continues to be maintained
505 			 * via the original nd.  Confused?  You aren't alone!
506 			 */
507 			vn_unlock(vp);
508 			cache_copy(&nd.nl_nch, &nch);
509 			nlookup_done(&nd);
510 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
511 						UIO_SYSSPACE, 0, cred, &nch);
512 			cache_drop(&nch);
513 			if (error == 0)
514 				error = nlookup(&nd);
515 
516 			if (error == 0) {
517 				/*
518 				 * Found an index file. Get rid of
519 				 * the old references.  transfer vp and
520 				 * load up the new vp.  Fortunately we do
521 				 * not have to deal with dvp, that would be
522 				 * a huge mess.
523 				 */
524 				if (dirp)
525 					vrele(dirp);
526 				dirp = vp;
527 				vp = NULL;
528 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
529 							LK_EXCLUSIVE, &vp);
530 				KKASSERT(error == 0);
531 			}
532 			error = 0;
533 		}
534 		/*
535 		 * If the public filehandle was used, check that this lookup
536 		 * didn't result in a filehandle outside the publicly exported
537 		 * filesystem.  We clear the poor vp here to avoid lockups due
538 		 * to NFS I/O.
539 		 */
540 
541 		if (vp->v_mount != nfs_pub.np_mount) {
542 			vput(vp);
543 			vp = NULL;
544 			error = EPERM;
545 		}
546 	}
547 
548 	if (dirp) {
549 		if (info.v3)
550 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
551 		vrele(dirp);
552 		dirp = NULL;
553 	}
554 
555 	/*
556 	 * Resources at this point:
557 	 *	ndp->ni_vp	may not be NULL
558 	 *
559 	 */
560 
561 	if (error) {
562 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
563 				      NFSX_POSTOPATTR(info.v3), &error));
564 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
565 		error = 0;
566 		goto nfsmout;
567 	}
568 
569 	/*
570 	 * Clear out some resources prior to potentially blocking.  This
571 	 * is not as critical as ni_dvp resources in other routines, but
572 	 * it helps.
573 	 */
574 	nlookup_done(&nd);
575 
576 	/*
577 	 * Get underlying attribute, then release remaining resources ( for
578 	 * the same potential blocking reason ) and reply.
579 	 */
580 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
581 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
582 	if (!error)
583 		error = VOP_GETATTR(vp, vap);
584 
585 	vput(vp);
586 	vp = NULL;
587 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
588 			      NFSX_SRVFH(info.v3) +
589 			      NFSX_POSTOPORFATTR(info.v3) +
590 			      NFSX_POSTOPATTR(info.v3),
591 			      &error));
592 	if (error) {
593 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
594 		error = 0;
595 		goto nfsmout;
596 	}
597 	nfsm_srvfhtom(&info, fhp);
598 	if (info.v3) {
599 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
600 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
601 	} else {
602 		fp = nfsm_build(&info, NFSX_V2FATTR);
603 		nfsm_srvfattr(nfsd, vap, fp);
604 	}
605 
606 nfsmout:
607 	*mrq = info.mreq;
608 	if (dirp)
609 		vrele(dirp);
610 	nlookup_done(&nd);		/* may be called twice */
611 	if (vp)
612 		vput(vp);
613 	return (error);
614 }
615 
616 /*
617  * nfs readlink service
618  */
619 int
620 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
621 	       struct thread *td, struct mbuf **mrq)
622 {
623 	struct sockaddr *nam = nfsd->nd_nam;
624 	struct ucred *cred = &nfsd->nd_cr;
625 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
626 	struct iovec *ivp = iv;
627 	u_int32_t *tl;
628 	int error = 0, rdonly, i, tlen, len, getret;
629 	struct mbuf *mp1, *mp2, *mp3;
630 	struct vnode *vp = NULL;
631 	struct mount *mp = NULL;
632 	struct vattr attr;
633 	nfsfh_t nfh;
634 	fhandle_t *fhp;
635 	struct uio io, *uiop = &io;
636 	struct nfsm_info info;
637 
638 	info.mrep = nfsd->nd_mrep;
639 	info.mreq = NULL;
640 	info.md = nfsd->nd_md;
641 	info.dpos = nfsd->nd_dpos;
642 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
643 
644 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
645 #ifndef nolint
646 	mp2 = NULL;
647 #endif
648 	mp3 = NULL;
649 	fhp = &nfh.fh_generic;
650 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
651 	len = 0;
652 	i = 0;
653 	while (len < NFS_MAXPATHLEN) {
654 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
655 		mp1->m_len = MCLBYTES;
656 		if (len == 0)
657 			mp3 = mp2 = mp1;
658 		else {
659 			mp2->m_next = mp1;
660 			mp2 = mp1;
661 		}
662 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
663 			mp1->m_len = NFS_MAXPATHLEN-len;
664 			len = NFS_MAXPATHLEN;
665 		} else
666 			len += mp1->m_len;
667 		ivp->iov_base = mtod(mp1, caddr_t);
668 		ivp->iov_len = mp1->m_len;
669 		i++;
670 		ivp++;
671 	}
672 	uiop->uio_iov = iv;
673 	uiop->uio_iovcnt = i;
674 	uiop->uio_offset = 0;
675 	uiop->uio_resid = len;
676 	uiop->uio_rw = UIO_READ;
677 	uiop->uio_segflg = UIO_SYSSPACE;
678 	uiop->uio_td = NULL;
679 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
680 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
681 	if (error) {
682 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
683 				      2 * NFSX_UNSIGNED, &error));
684 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
685 		error = 0;
686 		goto nfsmout;
687 	}
688 	if (vp->v_type != VLNK) {
689 		if (info.v3)
690 			error = EINVAL;
691 		else
692 			error = ENXIO;
693 		goto out;
694 	}
695 	error = VOP_READLINK(vp, uiop, cred);
696 out:
697 	getret = VOP_GETATTR(vp, &attr);
698 	vput(vp);
699 	vp = NULL;
700 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
701 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
702 			     &error));
703 	if (info.v3) {
704 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
705 		if (error) {
706 			error = 0;
707 			goto nfsmout;
708 		}
709 	}
710 	if (uiop->uio_resid > 0) {
711 		len -= uiop->uio_resid;
712 		tlen = nfsm_rndup(len);
713 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
714 	}
715 	tl = nfsm_build(&info, NFSX_UNSIGNED);
716 	*tl = txdr_unsigned(len);
717 	info.mb->m_next = mp3;
718 	mp3 = NULL;
719 nfsmout:
720 	*mrq = info.mreq;
721 	if (mp3)
722 		m_freem(mp3);
723 	if (vp)
724 		vput(vp);
725 	return(error);
726 }
727 
728 /*
729  * nfs read service
730  */
731 int
732 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
733 	   struct thread *td, struct mbuf **mrq)
734 {
735 	struct nfsm_info info;
736 	struct sockaddr *nam = nfsd->nd_nam;
737 	struct ucred *cred = &nfsd->nd_cr;
738 	struct iovec *iv;
739 	struct iovec *iv2;
740 	struct mbuf *m;
741 	struct nfs_fattr *fp;
742 	u_int32_t *tl;
743 	int i;
744 	int reqlen;
745 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
746 	struct mbuf *m2;
747 	struct vnode *vp = NULL;
748 	struct mount *mp = NULL;
749 	nfsfh_t nfh;
750 	fhandle_t *fhp;
751 	struct uio io, *uiop = &io;
752 	struct vattr va, *vap = &va;
753 	struct nfsheur *nh;
754 	off_t off;
755 	int ioflag = 0;
756 
757 	info.mrep = nfsd->nd_mrep;
758 	info.mreq = NULL;
759 	info.md = nfsd->nd_md;
760 	info.dpos = nfsd->nd_dpos;
761 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
762 
763 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
764 	fhp = &nfh.fh_generic;
765 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
766 	if (info.v3) {
767 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
768 		off = fxdr_hyper(tl);
769 	} else {
770 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
771 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
772 	}
773 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
774 					    NFS_SRVMAXDATA(nfsd), &error));
775 
776 	/*
777 	 * Reference vp.  If an error occurs, vp will be invalid, but we
778 	 * have to NULL it just in case.  The macros might goto nfsmout
779 	 * as well.
780 	 */
781 
782 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
783 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
784 	if (error) {
785 		vp = NULL;
786 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
787 				      2 * NFSX_UNSIGNED, &error));
788 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
789 		error = 0;
790 		goto nfsmout;
791 	}
792 
793 	if (vp->v_type != VREG) {
794 		if (info.v3)
795 			error = EINVAL;
796 		else
797 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
798 	}
799 	if (!error) {
800 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
801 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
802 	}
803 	getret = VOP_GETATTR(vp, vap);
804 	if (!error)
805 		error = getret;
806 	if (error) {
807 		vput(vp);
808 		vp = NULL;
809 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
810 				      NFSX_POSTOPATTR(info.v3), &error));
811 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
812 		error = 0;
813 		goto nfsmout;
814 	}
815 
816 	/*
817 	 * Calculate byte count to read
818 	 */
819 
820 	if (off >= vap->va_size)
821 		cnt = 0;
822 	else if ((off + reqlen) > vap->va_size)
823 		cnt = vap->va_size - off;
824 	else
825 		cnt = reqlen;
826 
827 	/*
828 	 * Calculate seqcount for heuristic
829 	 */
830 
831 	{
832 		int hi;
833 		int try = 32;
834 
835 		/*
836 		 * Locate best candidate
837 		 */
838 
839 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
840 		nh = &nfsheur[hi];
841 
842 		while (try--) {
843 			if (nfsheur[hi].nh_vp == vp) {
844 				nh = &nfsheur[hi];
845 				break;
846 			}
847 			if (nfsheur[hi].nh_use > 0)
848 				--nfsheur[hi].nh_use;
849 			hi = (hi + 1) % NUM_HEURISTIC;
850 			if (nfsheur[hi].nh_use < nh->nh_use)
851 				nh = &nfsheur[hi];
852 		}
853 
854 		if (nh->nh_vp != vp) {
855 			nh->nh_vp = vp;
856 			nh->nh_nextr = off;
857 			nh->nh_use = NHUSE_INIT;
858 			if (off == 0)
859 				nh->nh_seqcount = 4;
860 			else
861 				nh->nh_seqcount = 1;
862 		}
863 
864 		/*
865 		 * Calculate heuristic
866 		 */
867 
868 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
869 			if (++nh->nh_seqcount > IO_SEQMAX)
870 				nh->nh_seqcount = IO_SEQMAX;
871 		} else if (nh->nh_seqcount > 1) {
872 			nh->nh_seqcount = 1;
873 		} else {
874 			nh->nh_seqcount = 0;
875 		}
876 		nh->nh_use += NHUSE_INC;
877 		if (nh->nh_use > NHUSE_MAX)
878 			nh->nh_use = NHUSE_MAX;
879 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
880         }
881 
882 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
883 			      NFSX_POSTOPORFATTR(info.v3) +
884 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
885 			      &error));
886 	if (info.v3) {
887 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
888 		*tl++ = nfs_true;
889 		fp = (struct nfs_fattr *)tl;
890 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
891 	} else {
892 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
893 		fp = (struct nfs_fattr *)tl;
894 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
895 	}
896 	len = left = nfsm_rndup(cnt);
897 	if (cnt > 0) {
898 		/*
899 		 * Generate the mbuf list with the uio_iov ref. to it.
900 		 */
901 		i = 0;
902 		m = m2 = info.mb;
903 		while (left > 0) {
904 			siz = min(M_TRAILINGSPACE(m), left);
905 			if (siz > 0) {
906 				left -= siz;
907 				i++;
908 			}
909 			if (left > 0) {
910 				m = m_getcl(MB_WAIT, MT_DATA, 0);
911 				m->m_len = 0;
912 				m2->m_next = m;
913 				m2 = m;
914 			}
915 		}
916 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
917 		       M_TEMP, M_WAITOK);
918 		uiop->uio_iov = iv2 = iv;
919 		m = info.mb;
920 		left = len;
921 		i = 0;
922 		while (left > 0) {
923 			if (m == NULL)
924 				panic("nfsrv_read iov");
925 			siz = min(M_TRAILINGSPACE(m), left);
926 			if (siz > 0) {
927 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
928 				iv->iov_len = siz;
929 				m->m_len += siz;
930 				left -= siz;
931 				iv++;
932 				i++;
933 			}
934 			m = m->m_next;
935 		}
936 		uiop->uio_iovcnt = i;
937 		uiop->uio_offset = off;
938 		uiop->uio_resid = len;
939 		uiop->uio_rw = UIO_READ;
940 		uiop->uio_segflg = UIO_SYSSPACE;
941 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942 		off = uiop->uio_offset;
943 		nh->nh_nextr = off;
944 		FREE((caddr_t)iv2, M_TEMP);
945 		if (error || (getret = VOP_GETATTR(vp, vap))) {
946 			if (!error)
947 				error = getret;
948 			m_freem(info.mreq);
949 			info.mreq = NULL;
950 			vput(vp);
951 			vp = NULL;
952 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953 					      NFSX_POSTOPATTR(info.v3),
954 					      &error));
955 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956 			error = 0;
957 			goto nfsmout;
958 		}
959 	} else {
960 		uiop->uio_resid = 0;
961 	}
962 	vput(vp);
963 	vp = NULL;
964 	nfsm_srvfattr(nfsd, vap, fp);
965 	tlen = len - uiop->uio_resid;
966 	cnt = cnt < tlen ? cnt : tlen;
967 	tlen = nfsm_rndup(cnt);
968 	if (len != tlen || tlen != cnt)
969 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
970 	if (info.v3) {
971 		*tl++ = txdr_unsigned(cnt);
972 		if (len < reqlen)
973 			*tl++ = nfs_true;
974 		else
975 			*tl++ = nfs_false;
976 	}
977 	*tl = txdr_unsigned(cnt);
978 nfsmout:
979 	*mrq = info.mreq;
980 	if (vp)
981 		vput(vp);
982 	return(error);
983 }
984 
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990 	    struct thread *td, struct mbuf **mrq)
991 {
992 	struct sockaddr *nam = nfsd->nd_nam;
993 	struct ucred *cred = &nfsd->nd_cr;
994 	struct iovec *ivp;
995 	int i, cnt;
996 	struct mbuf *mp1;
997 	struct nfs_fattr *fp;
998 	struct iovec *iv;
999 	struct vattr va, forat;
1000 	struct vattr *vap = &va;
1001 	u_int32_t *tl;
1002 	int error = 0, rdonly, len, forat_ret = 1;
1003 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004 	int stable = NFSV3WRITE_FILESYNC;
1005 	struct vnode *vp = NULL;
1006 	struct mount *mp = NULL;
1007 	nfsfh_t nfh;
1008 	fhandle_t *fhp;
1009 	struct uio io, *uiop = &io;
1010 	struct nfsm_info info;
1011 	off_t off;
1012 
1013 	info.mrep = nfsd->nd_mrep;
1014 	info.mreq = NULL;
1015 	info.md = nfsd->nd_md;
1016 	info.dpos = nfsd->nd_dpos;
1017 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018 
1019 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020 	if (info.mrep == NULL) {
1021 		error = 0;
1022 		goto nfsmout;
1023 	}
1024 	fhp = &nfh.fh_generic;
1025 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026 	if (info.v3) {
1027 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028 		off = fxdr_hyper(tl);
1029 		tl += 3;
1030 		stable = fxdr_unsigned(int, *tl++);
1031 	} else {
1032 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034 		tl += 2;
1035 		if (nfs_async)
1036 	    		stable = NFSV3WRITE_UNSTABLE;
1037 	}
1038 	retlen = len = fxdr_unsigned(int32_t, *tl);
1039 	cnt = i = 0;
1040 
1041 	/*
1042 	 * For NFS Version 2, it is not obvious what a write of zero length
1043 	 * should do, but I might as well be consistent with Version 3,
1044 	 * which is to return ok so long as there are no permission problems.
1045 	 */
1046 	if (len > 0) {
1047 	    zeroing = 1;
1048 	    mp1 = info.mrep;
1049 	    while (mp1) {
1050 		if (mp1 == info.md) {
1051 			zeroing = 0;
1052 			adjust = info.dpos - mtod(mp1, caddr_t);
1053 			mp1->m_len -= adjust;
1054 			if (mp1->m_len > 0 && adjust > 0)
1055 				mp1->m_data += adjust;
1056 		}
1057 		if (zeroing)
1058 			mp1->m_len = 0;
1059 		else if (mp1->m_len > 0) {
1060 			i += mp1->m_len;
1061 			if (i > len) {
1062 				mp1->m_len -= (i - len);
1063 				zeroing	= 1;
1064 			}
1065 			if (mp1->m_len > 0)
1066 				cnt++;
1067 		}
1068 		mp1 = mp1->m_next;
1069 	    }
1070 	}
1071 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1072 		error = EIO;
1073 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074 				      2 * NFSX_UNSIGNED, &error));
1075 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076 				 aftat_ret, vap);
1077 		error = 0;
1078 		goto nfsmout;
1079 	}
1080 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082 	if (error) {
1083 		vp = NULL;
1084 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085 				      2 * NFSX_UNSIGNED, &error));
1086 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087 				 aftat_ret, vap);
1088 		error = 0;
1089 		goto nfsmout;
1090 	}
1091 	if (info.v3)
1092 		forat_ret = VOP_GETATTR(vp, &forat);
1093 	if (vp->v_type != VREG) {
1094 		if (info.v3)
1095 			error = EINVAL;
1096 		else
1097 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098 	}
1099 	if (!error) {
1100 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101 	}
1102 	if (error) {
1103 		vput(vp);
1104 		vp = NULL;
1105 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106 				      NFSX_WCCDATA(info.v3), &error));
1107 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108 				 aftat_ret, vap);
1109 		error = 0;
1110 		goto nfsmout;
1111 	}
1112 
1113 	if (len > 0) {
1114 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1115 		M_WAITOK);
1116 	    uiop->uio_iov = iv = ivp;
1117 	    uiop->uio_iovcnt = cnt;
1118 	    mp1 = info.mrep;
1119 	    while (mp1) {
1120 		if (mp1->m_len > 0) {
1121 			ivp->iov_base = mtod(mp1, caddr_t);
1122 			ivp->iov_len = mp1->m_len;
1123 			ivp++;
1124 		}
1125 		mp1 = mp1->m_next;
1126 	    }
1127 
1128 	    /*
1129 	     * XXX
1130 	     * The IO_METASYNC flag indicates that all metadata (and not just
1131 	     * enough to ensure data integrity) mus be written to stable storage
1132 	     * synchronously.
1133 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1134 	     */
1135 	    if (stable == NFSV3WRITE_UNSTABLE)
1136 		ioflags = IO_NODELOCKED;
1137 	    else if (stable == NFSV3WRITE_DATASYNC)
1138 		ioflags = (IO_SYNC | IO_NODELOCKED);
1139 	    else
1140 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1141 	    uiop->uio_resid = len;
1142 	    uiop->uio_rw = UIO_WRITE;
1143 	    uiop->uio_segflg = UIO_SYSSPACE;
1144 	    uiop->uio_td = NULL;
1145 	    uiop->uio_offset = off;
1146 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1147 	    nfsstats.srvvop_writes++;
1148 	    FREE((caddr_t)iv, M_TEMP);
1149 	}
1150 	aftat_ret = VOP_GETATTR(vp, vap);
1151 	vput(vp);
1152 	vp = NULL;
1153 	if (!error)
1154 		error = aftat_ret;
1155 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1156 			      NFSX_PREOPATTR(info.v3) +
1157 			      NFSX_POSTOPORFATTR(info.v3) +
1158 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1159 			      &error));
1160 	if (info.v3) {
1161 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1162 				 aftat_ret, vap);
1163 		if (error) {
1164 			error = 0;
1165 			goto nfsmout;
1166 		}
1167 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1168 		*tl++ = txdr_unsigned(retlen);
1169 		/*
1170 		 * If nfs_async is set, then pretend the write was FILESYNC.
1171 		 */
1172 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1173 			*tl++ = txdr_unsigned(stable);
1174 		else
1175 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1176 		/*
1177 		 * Actually, there is no need to txdr these fields,
1178 		 * but it may make the values more human readable,
1179 		 * for debugging purposes.
1180 		 */
1181 		if (nfsver.tv_sec == 0)
1182 			nfsver = boottime;
1183 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1184 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1185 	} else {
1186 		fp = nfsm_build(&info, NFSX_V2FATTR);
1187 		nfsm_srvfattr(nfsd, vap, fp);
1188 	}
1189 nfsmout:
1190 	*mrq = info.mreq;
1191 	if (vp)
1192 		vput(vp);
1193 	return(error);
1194 }
1195 
1196 /*
1197  * NFS write service with write gathering support. Called when
1198  * nfsrvw_procrastinate > 0.
1199  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1200  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1201  * Jan. 1994.
1202  */
1203 int
1204 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1205 		  struct thread *td, struct mbuf **mrq)
1206 {
1207 	struct iovec *ivp;
1208 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1209 	struct nfs_fattr *fp;
1210 	int i;
1211 	struct iovec *iov;
1212 	struct nfsrvw_delayhash *wpp;
1213 	struct ucred *cred;
1214 	struct vattr va, forat;
1215 	u_int32_t *tl;
1216 	int error = 0, rdonly, len, forat_ret = 1;
1217 	int ioflags, aftat_ret = 1, adjust, zeroing;
1218 	struct mbuf *mp1;
1219 	struct vnode *vp = NULL;
1220 	struct mount *mp = NULL;
1221 	struct uio io, *uiop = &io;
1222 	u_quad_t cur_usec;
1223 	struct nfsm_info info;
1224 
1225 	info.mreq = NULL;
1226 
1227 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1228 #ifndef nolint
1229 	i = 0;
1230 	len = 0;
1231 #endif
1232 	if (*ndp) {
1233 	    nfsd = *ndp;
1234 	    *ndp = NULL;
1235 	    info.mrep = nfsd->nd_mrep;
1236 	    info.mreq = NULL;
1237 	    info.md = nfsd->nd_md;
1238 	    info.dpos = nfsd->nd_dpos;
1239 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1240 	    cred = &nfsd->nd_cr;
1241 	    LIST_INIT(&nfsd->nd_coalesce);
1242 	    nfsd->nd_mreq = NULL;
1243 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1244 	    cur_usec = nfs_curusec();
1245 	    nfsd->nd_time = cur_usec +
1246 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1247 
1248 	    /*
1249 	     * Now, get the write header..
1250 	     */
1251 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1252 	    if (info.v3) {
1253 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1254 		nfsd->nd_off = fxdr_hyper(tl);
1255 		tl += 3;
1256 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1257 	    } else {
1258 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1259 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1260 		tl += 2;
1261 		if (nfs_async)
1262 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1263 	    }
1264 	    len = fxdr_unsigned(int32_t, *tl);
1265 	    nfsd->nd_len = len;
1266 	    nfsd->nd_eoff = nfsd->nd_off + len;
1267 
1268 	    /*
1269 	     * Trim the header out of the mbuf list and trim off any trailing
1270 	     * junk so that the mbuf list has only the write data.
1271 	     */
1272 	    zeroing = 1;
1273 	    i = 0;
1274 	    mp1 = info.mrep;
1275 	    while (mp1) {
1276 		if (mp1 == info.md) {
1277 		    zeroing = 0;
1278 		    adjust = info.dpos - mtod(mp1, caddr_t);
1279 		    mp1->m_len -= adjust;
1280 		    if (mp1->m_len > 0 && adjust > 0)
1281 			mp1->m_data += adjust;
1282 		}
1283 		if (zeroing)
1284 		    mp1->m_len = 0;
1285 		else {
1286 		    i += mp1->m_len;
1287 		    if (i > len) {
1288 			mp1->m_len -= (i - len);
1289 			zeroing = 1;
1290 		    }
1291 		}
1292 		mp1 = mp1->m_next;
1293 	    }
1294 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1295 nfsmout:
1296 		m_freem(info.mrep);
1297 		info.mrep = NULL;
1298 		error = EIO;
1299 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1300 		if (info.v3) {
1301 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1302 				     aftat_ret, &va);
1303 		}
1304 		nfsd->nd_mreq = info.mreq;
1305 		nfsd->nd_mrep = NULL;
1306 		nfsd->nd_time = 0;
1307 	    }
1308 
1309 	    /*
1310 	     * Add this entry to the hash and time queues.
1311 	     */
1312 	    owp = NULL;
1313 	    wp = slp->ns_tq.lh_first;
1314 	    while (wp && wp->nd_time < nfsd->nd_time) {
1315 		owp = wp;
1316 		wp = wp->nd_tq.le_next;
1317 	    }
1318 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1319 	    if (owp) {
1320 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1321 	    } else {
1322 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1323 	    }
1324 	    if (nfsd->nd_mrep) {
1325 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1326 		owp = NULL;
1327 		wp = wpp->lh_first;
1328 		while (wp &&
1329 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1330 		    owp = wp;
1331 		    wp = wp->nd_hash.le_next;
1332 		}
1333 		while (wp && wp->nd_off < nfsd->nd_off &&
1334 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1335 		    owp = wp;
1336 		    wp = wp->nd_hash.le_next;
1337 		}
1338 		if (owp) {
1339 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1340 
1341 		    /*
1342 		     * Search the hash list for overlapping entries and
1343 		     * coalesce.
1344 		     */
1345 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1346 			wp = nfsd->nd_hash.le_next;
1347 			if (NFSW_SAMECRED(owp, nfsd))
1348 			    nfsrvw_coalesce(owp, nfsd);
1349 		    }
1350 		} else {
1351 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1352 		}
1353 	    }
1354 	}
1355 
1356 	/*
1357 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1358 	 * and generate the associated reply mbuf list(s).
1359 	 */
1360 loop1:
1361 	cur_usec = nfs_curusec();
1362 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1363 		owp = nfsd->nd_tq.le_next;
1364 		if (nfsd->nd_time > cur_usec)
1365 		    break;
1366 		if (nfsd->nd_mreq)
1367 		    continue;
1368 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1369 		LIST_REMOVE(nfsd, nd_tq);
1370 		LIST_REMOVE(nfsd, nd_hash);
1371 		info.mrep = nfsd->nd_mrep;
1372 		info.mreq = NULL;
1373 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1374 		nfsd->nd_mrep = NULL;
1375 		cred = &nfsd->nd_cr;
1376 		forat_ret = aftat_ret = 1;
1377 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1378 				     nfsd->nd_nam, &rdonly,
1379 				     (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1380 		if (!error) {
1381 		    if (info.v3)
1382 			forat_ret = VOP_GETATTR(vp, &forat);
1383 		    if (vp->v_type != VREG) {
1384 			if (info.v3)
1385 			    error = EINVAL;
1386 			else
1387 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1388 		    }
1389 		} else {
1390 		    vp = NULL;
1391 		}
1392 		if (!error) {
1393 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1394 		}
1395 
1396 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1397 		    ioflags = IO_NODELOCKED;
1398 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1399 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1400 		else
1401 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1402 		uiop->uio_rw = UIO_WRITE;
1403 		uiop->uio_segflg = UIO_SYSSPACE;
1404 		uiop->uio_td = NULL;
1405 		uiop->uio_offset = nfsd->nd_off;
1406 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1407 		if (uiop->uio_resid > 0) {
1408 		    mp1 = info.mrep;
1409 		    i = 0;
1410 		    while (mp1) {
1411 			if (mp1->m_len > 0)
1412 			    i++;
1413 			mp1 = mp1->m_next;
1414 		    }
1415 		    uiop->uio_iovcnt = i;
1416 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1417 			M_TEMP, M_WAITOK);
1418 		    uiop->uio_iov = ivp = iov;
1419 		    mp1 = info.mrep;
1420 		    while (mp1) {
1421 			if (mp1->m_len > 0) {
1422 			    ivp->iov_base = mtod(mp1, caddr_t);
1423 			    ivp->iov_len = mp1->m_len;
1424 			    ivp++;
1425 			}
1426 			mp1 = mp1->m_next;
1427 		    }
1428 		    if (!error) {
1429 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1430 			nfsstats.srvvop_writes++;
1431 		    }
1432 		    FREE((caddr_t)iov, M_TEMP);
1433 		}
1434 		m_freem(info.mrep);
1435 		info.mrep = NULL;
1436 		if (vp) {
1437 		    aftat_ret = VOP_GETATTR(vp, &va);
1438 		    vput(vp);
1439 		    vp = NULL;
1440 		}
1441 
1442 		/*
1443 		 * Loop around generating replies for all write rpcs that have
1444 		 * now been completed.
1445 		 */
1446 		swp = nfsd;
1447 		do {
1448 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1449 		    if (error) {
1450 			nfsm_writereply(&info, nfsd, slp, error,
1451 					NFSX_WCCDATA(info.v3));
1452 			if (info.v3) {
1453 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1454 					     aftat_ret, &va);
1455 			}
1456 		    } else {
1457 			nfsm_writereply(&info, nfsd, slp, error,
1458 					NFSX_PREOPATTR(info.v3) +
1459 					NFSX_POSTOPORFATTR(info.v3) +
1460 					2 * NFSX_UNSIGNED +
1461 					NFSX_WRITEVERF(info.v3));
1462 			if (info.v3) {
1463 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1464 					     aftat_ret, &va);
1465 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1466 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1467 			    *tl++ = txdr_unsigned(swp->nd_stable);
1468 			    /*
1469 			     * Actually, there is no need to txdr these fields,
1470 			     * but it may make the values more human readable,
1471 			     * for debugging purposes.
1472 			     */
1473 			    if (nfsver.tv_sec == 0)
1474 				    nfsver = boottime;
1475 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1476 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1477 			} else {
1478 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1479 			    nfsm_srvfattr(nfsd, &va, fp);
1480 			}
1481 		    }
1482 		    nfsd->nd_mreq = info.mreq;
1483 		    if (nfsd->nd_mrep)
1484 			panic("nfsrv_write: nd_mrep not free");
1485 
1486 		    /*
1487 		     * Done. Put it at the head of the timer queue so that
1488 		     * the final phase can return the reply.
1489 		     */
1490 		    if (nfsd != swp) {
1491 			nfsd->nd_time = 0;
1492 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1493 		    }
1494 		    nfsd = swp->nd_coalesce.lh_first;
1495 		    if (nfsd) {
1496 			LIST_REMOVE(nfsd, nd_tq);
1497 		    }
1498 		} while (nfsd);
1499 		swp->nd_time = 0;
1500 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1501 		goto loop1;
1502 	}
1503 
1504 	/*
1505 	 * Search for a reply to return.
1506 	 */
1507 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1508 		if (nfsd->nd_mreq) {
1509 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1510 		    LIST_REMOVE(nfsd, nd_tq);
1511 		    break;
1512 		}
1513 	}
1514 	if (nfsd) {
1515 		*ndp = nfsd;
1516 		*mrq = nfsd->nd_mreq;
1517 	} else {
1518 		*ndp = NULL;
1519 		*mrq = NULL;
1520 	}
1521 	return (0);
1522 }
1523 
1524 /*
1525  * Coalesce the write request nfsd into owp. To do this we must:
1526  * - remove nfsd from the queues
1527  * - merge nfsd->nd_mrep into owp->nd_mrep
1528  * - update the nd_eoff and nd_stable for owp
1529  * - put nfsd on owp's nd_coalesce list
1530  * NB: Must be called at splsoftclock().
1531  */
1532 static void
1533 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1534 {
1535         int overlap;
1536         struct mbuf *mp1;
1537 	struct nfsrv_descript *p;
1538 
1539 	NFS_DPF(WG, ("C%03x-%03x",
1540 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1541         LIST_REMOVE(nfsd, nd_hash);
1542         LIST_REMOVE(nfsd, nd_tq);
1543         if (owp->nd_eoff < nfsd->nd_eoff) {
1544             overlap = owp->nd_eoff - nfsd->nd_off;
1545             if (overlap < 0)
1546                 panic("nfsrv_coalesce: bad off");
1547             if (overlap > 0)
1548                 m_adj(nfsd->nd_mrep, overlap);
1549             mp1 = owp->nd_mrep;
1550             while (mp1->m_next)
1551                 mp1 = mp1->m_next;
1552             mp1->m_next = nfsd->nd_mrep;
1553             owp->nd_eoff = nfsd->nd_eoff;
1554         } else
1555             m_freem(nfsd->nd_mrep);
1556         nfsd->nd_mrep = NULL;
1557         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1558             owp->nd_stable = NFSV3WRITE_FILESYNC;
1559         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1560             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1561             owp->nd_stable = NFSV3WRITE_DATASYNC;
1562         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1563 
1564 	/*
1565 	 * If nfsd had anything else coalesced into it, transfer them
1566 	 * to owp, otherwise their replies will never get sent.
1567 	 */
1568 	for (p = nfsd->nd_coalesce.lh_first; p;
1569 	     p = nfsd->nd_coalesce.lh_first) {
1570 	    LIST_REMOVE(p, nd_tq);
1571 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1572 	}
1573 }
1574 
1575 /*
1576  * nfs create service
1577  * now does a truncate to 0 length via. setattr if it already exists
1578  */
1579 int
1580 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1581 	     struct thread *td, struct mbuf **mrq)
1582 {
1583 	struct sockaddr *nam = nfsd->nd_nam;
1584 	struct ucred *cred = &nfsd->nd_cr;
1585 	struct nfs_fattr *fp;
1586 	struct vattr va, dirfor, diraft;
1587 	struct vattr *vap = &va;
1588 	struct nfsv2_sattr *sp;
1589 	u_int32_t *tl;
1590 	struct nlookupdata nd;
1591 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1592 	udev_t rdev = NOUDEV;
1593 	caddr_t cp;
1594 	int how, exclusive_flag = 0;
1595 	struct vnode *dirp;
1596 	struct vnode *dvp;
1597 	struct vnode *vp;
1598 	struct mount *mp;
1599 	nfsfh_t nfh;
1600 	fhandle_t *fhp;
1601 	u_quad_t tempsize;
1602 	u_char cverf[NFSX_V3CREATEVERF];
1603 	struct nfsm_info info;
1604 
1605 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1606 	nlookup_zero(&nd);
1607 	dirp = NULL;
1608 	dvp = NULL;
1609 	vp = NULL;
1610 
1611 	info.mrep = nfsd->nd_mrep;
1612 	info.mreq = NULL;
1613 	info.md = nfsd->nd_md;
1614 	info.dpos = nfsd->nd_dpos;
1615 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1616 
1617 	fhp = &nfh.fh_generic;
1618 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1619 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1620 
1621 	/*
1622 	 * Call namei and do initial cleanup to get a few things
1623 	 * out of the way.  If we get an initial error we cleanup
1624 	 * and return here to avoid special-casing the invalid nd
1625 	 * structure through the rest of the case.  dirp may be
1626 	 * set even if an error occurs, but the nd structure will not
1627 	 * be valid at all if an error occurs so we have to invalidate it
1628 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1629 	 */
1630 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1631 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1632 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1633 	mp = vfs_getvfs(&fhp->fh_fsid);
1634 
1635 	if (dirp) {
1636 		if (info.v3) {
1637 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1638 		} else {
1639 			vrele(dirp);
1640 			dirp = NULL;
1641 		}
1642 	}
1643 	if (error) {
1644 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1645 				      NFSX_WCCDATA(info.v3), &error));
1646 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1647 				 diraft_ret, &diraft);
1648 		error = 0;
1649 		goto nfsmout;
1650 	}
1651 
1652 	/*
1653 	 * No error.  Continue.  State:
1654 	 *
1655 	 *	dirp 		may be valid
1656 	 *	vp		may be valid or NULL if the target does not
1657 	 *			exist.
1658 	 *	dvp		is valid
1659 	 *
1660 	 * The error state is set through the code and we may also do some
1661 	 * opportunistic releasing of vnodes to avoid holding locks through
1662 	 * NFS I/O.  The cleanup at the end is a catch-all
1663 	 */
1664 
1665 	VATTR_NULL(vap);
1666 	if (info.v3) {
1667 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1668 		how = fxdr_unsigned(int, *tl);
1669 		switch (how) {
1670 		case NFSV3CREATE_GUARDED:
1671 			if (vp) {
1672 				error = EEXIST;
1673 				break;
1674 			}
1675 			/* fall through */
1676 		case NFSV3CREATE_UNCHECKED:
1677 			ERROROUT(nfsm_srvsattr(&info, vap));
1678 			break;
1679 		case NFSV3CREATE_EXCLUSIVE:
1680 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1681 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1682 			exclusive_flag = 1;
1683 			break;
1684 		};
1685 		vap->va_type = VREG;
1686 	} else {
1687 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1688 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1689 		if (vap->va_type == VNON)
1690 			vap->va_type = VREG;
1691 		vap->va_mode = nfstov_mode(sp->sa_mode);
1692 		switch (vap->va_type) {
1693 		case VREG:
1694 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1695 			if (tsize != -1)
1696 				vap->va_size = (u_quad_t)tsize;
1697 			break;
1698 		case VCHR:
1699 		case VBLK:
1700 		case VFIFO:
1701 			rdev = fxdr_unsigned(long, sp->sa_size);
1702 			break;
1703 		default:
1704 			break;
1705 		};
1706 	}
1707 
1708 	/*
1709 	 * Iff doesn't exist, create it
1710 	 * otherwise just truncate to 0 length
1711 	 *   should I set the mode too ?
1712 	 *
1713 	 * The only possible error we can have at this point is EEXIST.
1714 	 * nd.ni_vp will also be non-NULL in that case.
1715 	 */
1716 	if (vp == NULL) {
1717 		if (vap->va_mode == (mode_t)VNOVAL)
1718 			vap->va_mode = 0;
1719 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1720 			vn_unlock(dvp);
1721 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1722 					    nd.nl_cred, vap);
1723 			vrele(dvp);
1724 			dvp = NULL;
1725 			if (error == 0) {
1726 				if (exclusive_flag) {
1727 					exclusive_flag = 0;
1728 					VATTR_NULL(vap);
1729 					bcopy(cverf, (caddr_t)&vap->va_atime,
1730 						NFSX_V3CREATEVERF);
1731 					error = VOP_SETATTR(vp, vap, cred);
1732 				}
1733 			}
1734 		} else if (
1735 			vap->va_type == VCHR ||
1736 			vap->va_type == VBLK ||
1737 			vap->va_type == VFIFO
1738 		) {
1739 			/*
1740 			 * Handle SysV FIFO node special cases.  All other
1741 			 * devices require super user to access.
1742 			 */
1743 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1744 				vap->va_type = VFIFO;
1745                         if (vap->va_type != VFIFO &&
1746                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1747 				goto nfsmreply0;
1748                         }
1749 			vap->va_rmajor = umajor(rdev);
1750 			vap->va_rminor = uminor(rdev);
1751 
1752 			vn_unlock(dvp);
1753 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1754 			vrele(dvp);
1755 			dvp = NULL;
1756 			if (error)
1757 				goto nfsmreply0;
1758 #if 0
1759 			/*
1760 			 * XXX what is this junk supposed to do ?
1761 			 */
1762 
1763 			vput(vp);
1764 			vp = NULL;
1765 
1766 			/*
1767 			 * release dvp prior to lookup
1768 			 */
1769 			vput(dvp);
1770 			dvp = NULL;
1771 
1772 			/*
1773 			 * Setup for lookup.
1774 			 *
1775 			 * Even though LOCKPARENT was cleared, ni_dvp may
1776 			 * be garbage.
1777 			 */
1778 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1779 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1780 			nd.ni_cnd.cn_td = td;
1781 			nd.ni_cnd.cn_cred = cred;
1782 
1783 			error = lookup(&nd);
1784 			nd.ni_dvp = NULL;
1785 
1786 			if (error != 0) {
1787 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1788 						      0, &error));
1789 				/* fall through on certain errors */
1790 			}
1791 			nfsrv_object_create(nd.ni_vp);
1792 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1793 				error = EINVAL;
1794 				goto nfsmreply0;
1795 			}
1796 #endif
1797 		} else {
1798 			error = ENXIO;
1799 		}
1800 	} else {
1801 		if (vap->va_size != -1) {
1802 			error = nfsrv_access(mp, vp, VWRITE, cred,
1803 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1804 			if (!error) {
1805 				tempsize = vap->va_size;
1806 				VATTR_NULL(vap);
1807 				vap->va_size = tempsize;
1808 				error = VOP_SETATTR(vp, vap, cred);
1809 			}
1810 		}
1811 	}
1812 
1813 	if (!error) {
1814 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1815 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1816 		if (!error)
1817 			error = VOP_GETATTR(vp, vap);
1818 	}
1819 	if (info.v3) {
1820 		if (exclusive_flag && !error &&
1821 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1822 			error = EEXIST;
1823 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1824 		vrele(dirp);
1825 		dirp = NULL;
1826 	}
1827 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1828 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1829 			      NFSX_WCCDATA(info.v3),
1830 			      &error));
1831 	if (info.v3) {
1832 		if (!error) {
1833 			nfsm_srvpostop_fh(&info, fhp);
1834 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1835 		}
1836 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1837 				 diraft_ret, &diraft);
1838 		error = 0;
1839 	} else {
1840 		nfsm_srvfhtom(&info, fhp);
1841 		fp = nfsm_build(&info, NFSX_V2FATTR);
1842 		nfsm_srvfattr(nfsd, vap, fp);
1843 	}
1844 	goto nfsmout;
1845 
1846 nfsmreply0:
1847 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1848 	error = 0;
1849 	/* fall through */
1850 
1851 nfsmout:
1852 	*mrq = info.mreq;
1853 	if (dirp)
1854 		vrele(dirp);
1855 	nlookup_done(&nd);
1856 	if (dvp) {
1857 		if (dvp == vp)
1858 			vrele(dvp);
1859 		else
1860 			vput(dvp);
1861 	}
1862 	if (vp)
1863 		vput(vp);
1864 	return (error);
1865 }
1866 
1867 /*
1868  * nfs v3 mknod service
1869  */
1870 int
1871 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1872 	    struct thread *td, struct mbuf **mrq)
1873 {
1874 	struct sockaddr *nam = nfsd->nd_nam;
1875 	struct ucred *cred = &nfsd->nd_cr;
1876 	struct vattr va, dirfor, diraft;
1877 	struct vattr *vap = &va;
1878 	u_int32_t *tl;
1879 	struct nlookupdata nd;
1880 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1881 	enum vtype vtyp;
1882 	struct vnode *dirp;
1883 	struct vnode *dvp;
1884 	struct vnode *vp;
1885 	nfsfh_t nfh;
1886 	fhandle_t *fhp;
1887 	struct nfsm_info info;
1888 
1889 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1890 	nlookup_zero(&nd);
1891 	dirp = NULL;
1892 	dvp = NULL;
1893 	vp = NULL;
1894 
1895 	info.mrep = nfsd->nd_mrep;
1896 	info.mreq = NULL;
1897 	info.md = nfsd->nd_md;
1898 	info.dpos = nfsd->nd_dpos;
1899 
1900 	fhp = &nfh.fh_generic;
1901 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1902 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1903 
1904 	/*
1905 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1906 	 * is not valid.  However, nfsm_*() routines may still jump to
1907 	 * nfsmout.
1908 	 */
1909 
1910 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1911 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1912 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1913 	if (dirp)
1914 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1915 	if (error) {
1916 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1917 			   NFSX_WCCDATA(1), &error));
1918 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1919 				 diraft_ret, &diraft);
1920 		error = 0;
1921 		goto nfsmout;
1922 	}
1923 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1924 	vtyp = nfsv3tov_type(*tl);
1925 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1926 		error = NFSERR_BADTYPE;
1927 		goto out;
1928 	}
1929 	VATTR_NULL(vap);
1930 	ERROROUT(nfsm_srvsattr(&info, vap));
1931 	if (vtyp == VCHR || vtyp == VBLK) {
1932 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1933 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1934 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1935 	}
1936 
1937 	/*
1938 	 * Iff doesn't exist, create it.
1939 	 */
1940 	if (vp) {
1941 		error = EEXIST;
1942 		goto out;
1943 	}
1944 	vap->va_type = vtyp;
1945 	if (vap->va_mode == (mode_t)VNOVAL)
1946 		vap->va_mode = 0;
1947 	if (vtyp == VSOCK) {
1948 		vn_unlock(dvp);
1949 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1950 		vrele(dvp);
1951 		dvp = NULL;
1952 	} else {
1953 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1954 			goto out;
1955 
1956 		vn_unlock(dvp);
1957 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1958 		vrele(dvp);
1959 		dvp = NULL;
1960 		if (error)
1961 			goto out;
1962 	}
1963 
1964 	/*
1965 	 * send response, cleanup, return.
1966 	 */
1967 out:
1968 	nlookup_done(&nd);
1969 	if (dvp) {
1970 		if (dvp == vp)
1971 			vrele(dvp);
1972 		else
1973 			vput(dvp);
1974 		dvp = NULL;
1975 	}
1976 	if (!error) {
1977 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1978 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1979 		if (!error)
1980 			error = VOP_GETATTR(vp, vap);
1981 	}
1982 	if (vp) {
1983 		vput(vp);
1984 		vp = NULL;
1985 	}
1986 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1987 	if (dirp) {
1988 		vrele(dirp);
1989 		dirp = NULL;
1990 	}
1991 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1992 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1993 			      NFSX_WCCDATA(1), &error));
1994 	if (!error) {
1995 		nfsm_srvpostop_fh(&info, fhp);
1996 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1997 	}
1998 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1999 			 diraft_ret, &diraft);
2000 	*mrq = info.mreq;
2001 	return (0);
2002 nfsmout:
2003 	*mrq = info.mreq;
2004 	if (dirp)
2005 		vrele(dirp);
2006 	nlookup_done(&nd);
2007 	if (dvp) {
2008 		if (dvp == vp)
2009 			vrele(dvp);
2010 		else
2011 			vput(dvp);
2012 	}
2013 	if (vp)
2014 		vput(vp);
2015 	return (error);
2016 }
2017 
2018 /*
2019  * nfs remove service
2020  */
2021 int
2022 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2023 	     struct thread *td, struct mbuf **mrq)
2024 {
2025 	struct sockaddr *nam = nfsd->nd_nam;
2026 	struct ucred *cred = &nfsd->nd_cr;
2027 	struct nlookupdata nd;
2028 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2029 	struct vnode *dirp;
2030 	struct vnode *dvp;
2031 	struct vnode *vp;
2032 	struct vattr dirfor, diraft;
2033 	nfsfh_t nfh;
2034 	fhandle_t *fhp;
2035 	struct nfsm_info info;
2036 
2037 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2038 	nlookup_zero(&nd);
2039 	dirp = NULL;
2040 	dvp = NULL;
2041 	vp = NULL;
2042 
2043 	info.mrep = nfsd->nd_mrep;
2044 	info.mreq = NULL;
2045 	info.md = nfsd->nd_md;
2046 	info.dpos = nfsd->nd_dpos;
2047 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2048 
2049 	fhp = &nfh.fh_generic;
2050 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2051 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2052 
2053 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2054 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2055 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2056 	if (dirp) {
2057 		if (info.v3)
2058 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2059 	}
2060 	if (error == 0) {
2061 		if (vp->v_type == VDIR) {
2062 			error = EPERM;		/* POSIX */
2063 			goto out;
2064 		}
2065 		/*
2066 		 * The root of a mounted filesystem cannot be deleted.
2067 		 */
2068 		if (vp->v_flag & VROOT) {
2069 			error = EBUSY;
2070 			goto out;
2071 		}
2072 out:
2073 		if (!error) {
2074 			if (dvp != vp)
2075 				vn_unlock(dvp);
2076 			if (vp) {
2077 				vput(vp);
2078 				vp = NULL;
2079 			}
2080 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2081 			vrele(dvp);
2082 			dvp = NULL;
2083 		}
2084 	}
2085 	if (dirp && info.v3)
2086 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2087 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2088 	if (info.v3) {
2089 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2090 				 diraft_ret, &diraft);
2091 		error = 0;
2092 	}
2093 nfsmout:
2094 	*mrq = info.mreq;
2095 	nlookup_done(&nd);
2096 	if (dirp)
2097 		vrele(dirp);
2098 	if (dvp) {
2099 		if (dvp == vp)
2100 			vrele(dvp);
2101 		else
2102 			vput(dvp);
2103 	}
2104 	if (vp)
2105 		vput(vp);
2106 	return(error);
2107 }
2108 
2109 /*
2110  * nfs rename service
2111  */
2112 int
2113 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2114 	     struct thread *td, struct mbuf **mrq)
2115 {
2116 	struct sockaddr *nam = nfsd->nd_nam;
2117 	struct ucred *cred = &nfsd->nd_cr;
2118 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2119 	int tdirfor_ret = 1, tdiraft_ret = 1;
2120 	struct nlookupdata fromnd, tond;
2121 	struct vnode *fvp, *fdirp, *fdvp;
2122 	struct vnode *tvp, *tdirp, *tdvp;
2123 	struct namecache *ncp;
2124 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2125 	nfsfh_t fnfh, tnfh;
2126 	fhandle_t *ffhp, *tfhp;
2127 	uid_t saved_uid;
2128 	struct nfsm_info info;
2129 
2130 	info.mrep = nfsd->nd_mrep;
2131 	info.mreq = NULL;
2132 	info.md = nfsd->nd_md;
2133 	info.dpos = nfsd->nd_dpos;
2134 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2135 
2136 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2137 #ifndef nolint
2138 	fvp = NULL;
2139 #endif
2140 	ffhp = &fnfh.fh_generic;
2141 	tfhp = &tnfh.fh_generic;
2142 
2143 	/*
2144 	 * Clear fields incase goto nfsmout occurs from macro.
2145 	 */
2146 
2147 	nlookup_zero(&fromnd);
2148 	nlookup_zero(&tond);
2149 	fdirp = NULL;
2150 	tdirp = NULL;
2151 
2152 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2153 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2154 
2155 	/*
2156 	 * Remember our original uid so that we can reset cr_uid before
2157 	 * the second nfs_namei() call, in case it is remapped.
2158 	 */
2159 	saved_uid = cred->cr_uid;
2160 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2161 			  NULL, NULL,
2162 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2163 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2164 	if (fdirp) {
2165 		if (info.v3)
2166 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2167 	}
2168 	if (error) {
2169 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2170 				      2 * NFSX_WCCDATA(info.v3), &error));
2171 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2172 				 fdiraft_ret, &fdiraft);
2173 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2174 				 tdiraft_ret, &tdiraft);
2175 		error = 0;
2176 		goto nfsmout;
2177 	}
2178 
2179 	/*
2180 	 * We have to unlock the from ncp before we can safely lookup
2181 	 * the target ncp.
2182 	 */
2183 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2184 	cache_unlock(&fromnd.nl_nch);
2185 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2186 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2187 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2188 	cred->cr_uid = saved_uid;
2189 
2190 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2191 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2192 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2193 	if (tdirp) {
2194 		if (info.v3)
2195 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2196 	}
2197 	if (error)
2198 		goto out1;
2199 
2200 	/*
2201 	 * relock the source
2202 	 */
2203 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2204 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2205 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2206 		cache_lock(&fromnd.nl_nch);
2207 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2208 	} else {
2209 		cache_unlock(&tond.nl_nch);
2210 		cache_lock(&fromnd.nl_nch);
2211 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2212 		cache_lock(&tond.nl_nch);
2213 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2214 	}
2215 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2216 
2217 	fvp = fromnd.nl_nch.ncp->nc_vp;
2218 	tvp = tond.nl_nch.ncp->nc_vp;
2219 
2220 	/*
2221 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2222 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2223 	 * point).  If we get through the checks these will be guarenteed
2224 	 * to be non-NULL.
2225 	 *
2226 	 * Holding the children ncp's should be sufficient to prevent
2227 	 * fdvp and tdvp ripouts.
2228 	 */
2229 	if (fromnd.nl_nch.ncp->nc_parent)
2230 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2231 	else
2232 		fdvp = NULL;
2233 	if (tond.nl_nch.ncp->nc_parent)
2234 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2235 	else
2236 		tdvp = NULL;
2237 
2238 	if (tvp != NULL) {
2239 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2240 			if (info.v3)
2241 				error = EEXIST;
2242 			else
2243 				error = EISDIR;
2244 			goto out;
2245 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2246 			if (info.v3)
2247 				error = EEXIST;
2248 			else
2249 				error = ENOTDIR;
2250 			goto out;
2251 		}
2252 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2253 			if (info.v3)
2254 				error = EXDEV;
2255 			else
2256 				error = ENOTEMPTY;
2257 			goto out;
2258 		}
2259 	}
2260 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2261 		if (info.v3)
2262 			error = EXDEV;
2263 		else
2264 			error = ENOTEMPTY;
2265 		goto out;
2266 	}
2267 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2268 		if (info.v3)
2269 			error = EXDEV;
2270 		else
2271 			error = ENOTEMPTY;
2272 		goto out;
2273 	}
2274 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2275 		if (info.v3)
2276 			error = EINVAL;
2277 		else
2278 			error = ENOTEMPTY;
2279 	}
2280 
2281 	/*
2282 	 * You cannot rename a source into itself or a subdirectory of itself.
2283 	 * We check this by travsering the target directory upwards looking
2284 	 * for a match against the source.
2285 	 */
2286 	if (error == 0) {
2287 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2288 			if (fromnd.nl_nch.ncp == ncp) {
2289 				error = EINVAL;
2290 				break;
2291 			}
2292 		}
2293 	}
2294 
2295 	/*
2296 	 * If source is the same as the destination (that is the
2297 	 * same vnode with the same name in the same directory),
2298 	 * then there is nothing to do.
2299 	 */
2300 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2301 		error = -1;
2302 out:
2303 	if (!error) {
2304 		/*
2305 		 * The VOP_NRENAME function releases all vnode references &
2306 		 * locks prior to returning so we need to clear the pointers
2307 		 * to bypass cleanup code later on.
2308 		 */
2309 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2310 				    fdvp, tdvp, tond.nl_cred);
2311 	} else {
2312 		if (error == -1)
2313 			error = 0;
2314 	}
2315 	/* fall through */
2316 
2317 out1:
2318 	if (fdirp)
2319 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2320 	if (tdirp)
2321 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2322 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2323 			      2 * NFSX_WCCDATA(info.v3), &error));
2324 	if (info.v3) {
2325 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2326 				 fdiraft_ret, &fdiraft);
2327 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2328 				 tdiraft_ret, &tdiraft);
2329 	}
2330 	error = 0;
2331 	/* fall through */
2332 
2333 nfsmout:
2334 	*mrq = info.mreq;
2335 	if (tdirp)
2336 		vrele(tdirp);
2337 	nlookup_done(&tond);
2338 	if (fdirp)
2339 		vrele(fdirp);
2340 	nlookup_done(&fromnd);
2341 	return (error);
2342 }
2343 
2344 /*
2345  * nfs link service
2346  */
2347 int
2348 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2349 	   struct thread *td, struct mbuf **mrq)
2350 {
2351 	struct sockaddr *nam = nfsd->nd_nam;
2352 	struct ucred *cred = &nfsd->nd_cr;
2353 	struct nlookupdata nd;
2354 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2355 	int getret = 1;
2356 	struct vnode *dirp;
2357 	struct vnode *dvp;
2358 	struct vnode *vp;
2359 	struct vnode *xp;
2360 	struct mount *mp;
2361 	struct mount *xmp;
2362 	struct vattr dirfor, diraft, at;
2363 	nfsfh_t nfh, dnfh;
2364 	fhandle_t *fhp, *dfhp;
2365 	struct nfsm_info info;
2366 
2367 	info.mrep = nfsd->nd_mrep;
2368 	info.mreq = NULL;
2369 	info.md = nfsd->nd_md;
2370 	info.dpos = nfsd->nd_dpos;
2371 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2372 
2373 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2374 	nlookup_zero(&nd);
2375 	dirp = dvp = vp = xp = NULL;
2376 	mp = xmp = NULL;
2377 
2378 	fhp = &nfh.fh_generic;
2379 	dfhp = &dnfh.fh_generic;
2380 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2381 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2382 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2383 
2384 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2385 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2386 	if (error) {
2387 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2388 				      NFSX_POSTOPATTR(info.v3) +
2389 				      NFSX_WCCDATA(info.v3),
2390 				      &error));
2391 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2392 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2393 				 diraft_ret, &diraft);
2394 		xp = NULL;
2395 		error = 0;
2396 		goto nfsmout;
2397 	}
2398 	if (xp->v_type == VDIR) {
2399 		error = EPERM;		/* POSIX */
2400 		goto out1;
2401 	}
2402 
2403 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2404 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2405 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2406 	if (dirp) {
2407 		if (info.v3)
2408 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2409 	}
2410 	if (error)
2411 		goto out1;
2412 
2413 	if (vp != NULL) {
2414 		error = EEXIST;
2415 		goto out;
2416 	}
2417 	if (xp->v_mount != dvp->v_mount)
2418 		error = EXDEV;
2419 out:
2420 	if (!error) {
2421 		vn_unlock(dvp);
2422 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2423 		vrele(dvp);
2424 		dvp = NULL;
2425 	}
2426 	/* fall through */
2427 
2428 out1:
2429 	if (info.v3)
2430 		getret = VOP_GETATTR(xp, &at);
2431 	if (dirp)
2432 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2433 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2434 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2435 			      &error));
2436 	if (info.v3) {
2437 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2438 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2439 				 diraft_ret, &diraft);
2440 		error = 0;
2441 	}
2442 	/* fall through */
2443 
2444 nfsmout:
2445 	*mrq = info.mreq;
2446 	nlookup_done(&nd);
2447 	if (dirp)
2448 		vrele(dirp);
2449 	if (xp)
2450 		vrele(xp);
2451 	if (dvp) {
2452 		if (dvp == vp)
2453 			vrele(dvp);
2454 		else
2455 			vput(dvp);
2456 	}
2457 	if (vp)
2458 		vput(vp);
2459 	return(error);
2460 }
2461 
2462 /*
2463  * nfs symbolic link service
2464  */
2465 int
2466 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2467 	      struct thread *td, struct mbuf **mrq)
2468 {
2469 	struct sockaddr *nam = nfsd->nd_nam;
2470 	struct ucred *cred = &nfsd->nd_cr;
2471 	struct vattr va, dirfor, diraft;
2472 	struct nlookupdata nd;
2473 	struct vattr *vap = &va;
2474 	struct nfsv2_sattr *sp;
2475 	char *pathcp = NULL;
2476 	struct uio io;
2477 	struct iovec iv;
2478 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2479 	struct vnode *dirp;
2480 	struct vnode *vp;
2481 	struct vnode *dvp;
2482 	nfsfh_t nfh;
2483 	fhandle_t *fhp;
2484 	struct nfsm_info info;
2485 
2486 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2487 	nlookup_zero(&nd);
2488 	dirp = NULL;
2489 	dvp = NULL;
2490 	vp = NULL;
2491 
2492 	info.mrep = nfsd->nd_mrep;
2493 	info.mreq =  NULL;
2494 	info.md = nfsd->nd_md;
2495 	info.dpos = nfsd->nd_dpos;
2496 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2497 
2498 	fhp = &nfh.fh_generic;
2499 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2500 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2501 
2502 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2503 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2504 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2505 	if (dirp) {
2506 		if (info.v3)
2507 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2508 	}
2509 	if (error)
2510 		goto out;
2511 
2512 	VATTR_NULL(vap);
2513 	if (info.v3) {
2514 		ERROROUT(nfsm_srvsattr(&info, vap));
2515 	}
2516 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2517 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2518 	iv.iov_base = pathcp;
2519 	iv.iov_len = len2;
2520 	io.uio_resid = len2;
2521 	io.uio_offset = 0;
2522 	io.uio_iov = &iv;
2523 	io.uio_iovcnt = 1;
2524 	io.uio_segflg = UIO_SYSSPACE;
2525 	io.uio_rw = UIO_READ;
2526 	io.uio_td = NULL;
2527 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2528 	if (info.v3 == 0) {
2529 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2530 		vap->va_mode = nfstov_mode(sp->sa_mode);
2531 	}
2532 	*(pathcp + len2) = '\0';
2533 	if (vp) {
2534 		error = EEXIST;
2535 		goto out;
2536 	}
2537 
2538 	if (vap->va_mode == (mode_t)VNOVAL)
2539 		vap->va_mode = 0;
2540 	if (dvp != vp)
2541 		vn_unlock(dvp);
2542 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2543 	vrele(dvp);
2544 	dvp = NULL;
2545 	if (error == 0) {
2546 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2547 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2548 		if (!error)
2549 			error = VOP_GETATTR(vp, vap);
2550 	}
2551 
2552 out:
2553 	if (dvp) {
2554 		if (dvp == vp)
2555 			vrele(dvp);
2556 		else
2557 			vput(dvp);
2558 	}
2559 	if (vp) {
2560 		vput(vp);
2561 		vp = NULL;
2562 	}
2563 	if (pathcp) {
2564 		FREE(pathcp, M_TEMP);
2565 		pathcp = NULL;
2566 	}
2567 	if (dirp) {
2568 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2569 		vrele(dirp);
2570 		dirp = NULL;
2571 	}
2572 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2573 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2574 			      NFSX_WCCDATA(info.v3),
2575 			      &error));
2576 	if (info.v3) {
2577 		if (!error) {
2578 			nfsm_srvpostop_fh(&info, fhp);
2579 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2580 		}
2581 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2582 				 diraft_ret, &diraft);
2583 	}
2584 	error = 0;
2585 	/* fall through */
2586 
2587 nfsmout:
2588 	*mrq = info.mreq;
2589 	nlookup_done(&nd);
2590 	if (vp)
2591 		vput(vp);
2592 	if (dirp)
2593 		vrele(dirp);
2594 	if (pathcp)
2595 		FREE(pathcp, M_TEMP);
2596 	return (error);
2597 }
2598 
2599 /*
2600  * nfs mkdir service
2601  */
2602 int
2603 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2604 	    struct thread *td, struct mbuf **mrq)
2605 {
2606 	struct sockaddr *nam = nfsd->nd_nam;
2607 	struct ucred *cred = &nfsd->nd_cr;
2608 	struct vattr va, dirfor, diraft;
2609 	struct vattr *vap = &va;
2610 	struct nfs_fattr *fp;
2611 	struct nlookupdata nd;
2612 	u_int32_t *tl;
2613 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2614 	struct vnode *dirp;
2615 	struct vnode *dvp;
2616 	struct vnode *vp;
2617 	nfsfh_t nfh;
2618 	fhandle_t *fhp;
2619 	struct nfsm_info info;
2620 
2621 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2622 	nlookup_zero(&nd);
2623 	dirp = NULL;
2624 	dvp = NULL;
2625 	vp = NULL;
2626 
2627 	info.dpos = nfsd->nd_dpos;
2628 	info.mrep = nfsd->nd_mrep;
2629 	info.mreq =  NULL;
2630 	info.md = nfsd->nd_md;
2631 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2632 
2633 	fhp = &nfh.fh_generic;
2634 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2635 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2636 
2637 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2638 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2639 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2640 	if (dirp) {
2641 		if (info.v3)
2642 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2643 	}
2644 	if (error) {
2645 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2646 				      NFSX_WCCDATA(info.v3), &error));
2647 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2648 				 diraft_ret, &diraft);
2649 		error = 0;
2650 		goto nfsmout;
2651 	}
2652 	VATTR_NULL(vap);
2653 	if (info.v3) {
2654 		ERROROUT(nfsm_srvsattr(&info, vap));
2655 	} else {
2656 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2657 		vap->va_mode = nfstov_mode(*tl++);
2658 	}
2659 
2660 	/*
2661 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2662 	 * nd.ni_vp, if it exists, is referenced but not locked.
2663 	 */
2664 
2665 	vap->va_type = VDIR;
2666 	if (vp != NULL) {
2667 		error = EEXIST;
2668 		goto out;
2669 	}
2670 
2671 	/*
2672 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2673 	 * component is freed by the VOP call.  This will fill-in
2674 	 * nd.ni_vp, reference, and exclusively lock it.
2675 	 */
2676 	if (vap->va_mode == (mode_t)VNOVAL)
2677 		vap->va_mode = 0;
2678 	vn_unlock(dvp);
2679 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2680 	vrele(dvp);
2681 	dvp = NULL;
2682 
2683 	if (error == 0) {
2684 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2685 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2686 		if (error == 0)
2687 			error = VOP_GETATTR(vp, vap);
2688 	}
2689 out:
2690 	if (dirp)
2691 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2692 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2693 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2694 			      NFSX_WCCDATA(info.v3),
2695 			      &error));
2696 	if (info.v3) {
2697 		if (!error) {
2698 			nfsm_srvpostop_fh(&info, fhp);
2699 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2700 		}
2701 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2702 				 diraft_ret, &diraft);
2703 	} else {
2704 		nfsm_srvfhtom(&info, fhp);
2705 		fp = nfsm_build(&info, NFSX_V2FATTR);
2706 		nfsm_srvfattr(nfsd, vap, fp);
2707 	}
2708 	error = 0;
2709 	/* fall through */
2710 
2711 nfsmout:
2712 	*mrq = info.mreq;
2713 	nlookup_done(&nd);
2714 	if (dirp)
2715 		vrele(dirp);
2716 	if (dvp) {
2717 		if (dvp == vp)
2718 			vrele(dvp);
2719 		else
2720 			vput(dvp);
2721 	}
2722 	if (vp)
2723 		vput(vp);
2724 	return (error);
2725 }
2726 
2727 /*
2728  * nfs rmdir service
2729  */
2730 int
2731 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2732 	    struct thread *td, struct mbuf **mrq)
2733 {
2734 	struct sockaddr *nam = nfsd->nd_nam;
2735 	struct ucred *cred = &nfsd->nd_cr;
2736 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2737 	struct vnode *dirp;
2738 	struct vnode *dvp;
2739 	struct vnode *vp;
2740 	struct vattr dirfor, diraft;
2741 	nfsfh_t nfh;
2742 	fhandle_t *fhp;
2743 	struct nlookupdata nd;
2744 	struct nfsm_info info;
2745 
2746 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2747 	nlookup_zero(&nd);
2748 	dirp = NULL;
2749 	dvp = NULL;
2750 	vp = NULL;
2751 
2752 	info.mrep = nfsd->nd_mrep;
2753 	info.mreq = NULL;
2754 	info.md = nfsd->nd_md;
2755 	info.dpos = nfsd->nd_dpos;
2756 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2757 
2758 	fhp = &nfh.fh_generic;
2759 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2760 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2761 
2762 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2763 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2764 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2765 	if (dirp) {
2766 		if (info.v3)
2767 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2768 	}
2769 	if (error) {
2770 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2771 				      NFSX_WCCDATA(info.v3), &error));
2772 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2773 				 diraft_ret, &diraft);
2774 		error = 0;
2775 		goto nfsmout;
2776 	}
2777 	if (vp->v_type != VDIR) {
2778 		error = ENOTDIR;
2779 		goto out;
2780 	}
2781 
2782 	/*
2783 	 * The root of a mounted filesystem cannot be deleted.
2784 	 */
2785 	if (vp->v_flag & VROOT)
2786 		error = EBUSY;
2787 out:
2788 	/*
2789 	 * Issue or abort op.  Since SAVESTART is not set, path name
2790 	 * component is freed by the VOP after either.
2791 	 */
2792 	if (!error) {
2793 		if (dvp != vp)
2794 			vn_unlock(dvp);
2795 		vput(vp);
2796 		vp = NULL;
2797 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2798 		vrele(dvp);
2799 		dvp = NULL;
2800 	}
2801 	nlookup_done(&nd);
2802 
2803 	if (dirp)
2804 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2805 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2806 	if (info.v3) {
2807 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2808 				 diraft_ret, &diraft);
2809 		error = 0;
2810 	}
2811 	/* fall through */
2812 
2813 nfsmout:
2814 	*mrq = info.mreq;
2815 	if (dvp) {
2816 		if (dvp == vp)
2817 			vrele(dvp);
2818 		else
2819 			vput(dvp);
2820 	}
2821 	nlookup_done(&nd);
2822 	if (dirp)
2823 		vrele(dirp);
2824 	if (vp)
2825 		vput(vp);
2826 	return(error);
2827 }
2828 
2829 /*
2830  * nfs readdir service
2831  * - mallocs what it thinks is enough to read
2832  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2833  * - calls VOP_READDIR()
2834  * - loops around building the reply
2835  *	if the output generated exceeds count break out of loop
2836  *	The nfsm_clget macro is used here so that the reply will be packed
2837  *	tightly in mbuf clusters.
2838  * - it only knows that it has encountered eof when the VOP_READDIR()
2839  *	reads nothing
2840  * - as such one readdir rpc will return eof false although you are there
2841  *	and then the next will return eof
2842  * - it trims out records with d_fileno == 0
2843  *	this doesn't matter for Unix clients, but they might confuse clients
2844  *	for other os'.
2845  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2846  *	than requested, but this may not apply to all filesystems. For
2847  *	example, client NFS does not { although it is never remote mounted
2848  *	anyhow }
2849  *     The alternate call nfsrv_readdirplus() does lookups as well.
2850  * PS: The NFS protocol spec. does not clarify what the "count" byte
2851  *	argument is a count of.. just name strings and file id's or the
2852  *	entire reply rpc or ...
2853  *	I tried just file name and id sizes and it confused the Sun client,
2854  *	so I am using the full rpc size now. The "paranoia.." comment refers
2855  *	to including the status longwords that are not a part of the dir.
2856  *	"entry" structures, but are in the rpc.
2857  */
2858 struct flrep {
2859 	nfsuint64	fl_off;
2860 	u_int32_t	fl_postopok;
2861 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2862 	u_int32_t	fl_fhok;
2863 	u_int32_t	fl_fhsize;
2864 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2865 };
2866 
2867 int
2868 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2869 	      struct thread *td, struct mbuf **mrq)
2870 {
2871 	struct sockaddr *nam = nfsd->nd_nam;
2872 	struct ucred *cred = &nfsd->nd_cr;
2873 	char *bp, *be;
2874 	struct dirent *dp;
2875 	caddr_t cp;
2876 	u_int32_t *tl;
2877 	struct mbuf *mp1, *mp2;
2878 	char *cpos, *cend, *rbuf;
2879 	struct vnode *vp = NULL;
2880 	struct mount *mp = NULL;
2881 	struct vattr at;
2882 	nfsfh_t nfh;
2883 	fhandle_t *fhp;
2884 	struct uio io;
2885 	struct iovec iv;
2886 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2887 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2888 	u_quad_t off, toff, verf;
2889 	off_t *cookies = NULL, *cookiep;
2890 	struct nfsm_info info;
2891 
2892 	info.mrep = nfsd->nd_mrep;
2893 	info.mreq = NULL;
2894 	info.md = nfsd->nd_md;
2895 	info.dpos = nfsd->nd_dpos;
2896 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2897 
2898 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2899 	fhp = &nfh.fh_generic;
2900 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2901 	if (info.v3) {
2902 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2903 		toff = fxdr_hyper(tl);
2904 		tl += 2;
2905 		verf = fxdr_hyper(tl);
2906 		tl += 2;
2907 	} else {
2908 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2909 		toff = fxdr_unsigned(u_quad_t, *tl++);
2910 		verf = 0;	/* shut up gcc */
2911 	}
2912 	off = toff;
2913 	cnt = fxdr_unsigned(int, *tl);
2914 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2915 	xfer = NFS_SRVMAXDATA(nfsd);
2916 	if ((unsigned)cnt > xfer)
2917 		cnt = xfer;
2918 	if ((unsigned)siz > xfer)
2919 		siz = xfer;
2920 	fullsiz = siz;
2921 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2922 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2923 	if (!error && vp->v_type != VDIR) {
2924 		error = ENOTDIR;
2925 		vput(vp);
2926 		vp = NULL;
2927 	}
2928 	if (error) {
2929 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2930 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2931 		error = 0;
2932 		goto nfsmout;
2933 	}
2934 
2935 	/*
2936 	 * Obtain lock on vnode for this section of the code
2937 	 */
2938 
2939 	if (info.v3) {
2940 		error = getret = VOP_GETATTR(vp, &at);
2941 #if 0
2942 		/*
2943 		 * XXX This check may be too strict for Solaris 2.5 clients.
2944 		 */
2945 		if (!error && toff && verf && verf != at.va_filerev)
2946 			error = NFSERR_BAD_COOKIE;
2947 #endif
2948 	}
2949 	if (!error)
2950 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2951 	if (error) {
2952 		vput(vp);
2953 		vp = NULL;
2954 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2955 				      NFSX_POSTOPATTR(info.v3), &error));
2956 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2957 		error = 0;
2958 		goto nfsmout;
2959 	}
2960 	vn_unlock(vp);
2961 
2962 	/*
2963 	 * end section.  Allocate rbuf and continue
2964 	 */
2965 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2966 again:
2967 	iv.iov_base = rbuf;
2968 	iv.iov_len = fullsiz;
2969 	io.uio_iov = &iv;
2970 	io.uio_iovcnt = 1;
2971 	io.uio_offset = (off_t)off;
2972 	io.uio_resid = fullsiz;
2973 	io.uio_segflg = UIO_SYSSPACE;
2974 	io.uio_rw = UIO_READ;
2975 	io.uio_td = NULL;
2976 	eofflag = 0;
2977 	if (cookies) {
2978 		kfree((caddr_t)cookies, M_TEMP);
2979 		cookies = NULL;
2980 	}
2981 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2982 	off = (off_t)io.uio_offset;
2983 	if (!cookies && !error)
2984 		error = NFSERR_PERM;
2985 	if (info.v3) {
2986 		getret = VOP_GETATTR(vp, &at);
2987 		if (!error)
2988 			error = getret;
2989 	}
2990 	if (error) {
2991 		vrele(vp);
2992 		vp = NULL;
2993 		kfree((caddr_t)rbuf, M_TEMP);
2994 		if (cookies)
2995 			kfree((caddr_t)cookies, M_TEMP);
2996 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2997 				      NFSX_POSTOPATTR(info.v3), &error));
2998 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2999 		error = 0;
3000 		goto nfsmout;
3001 	}
3002 	if (io.uio_resid) {
3003 		siz -= io.uio_resid;
3004 
3005 		/*
3006 		 * If nothing read, return eof
3007 		 * rpc reply
3008 		 */
3009 		if (siz == 0) {
3010 			vrele(vp);
3011 			vp = NULL;
3012 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3013 					      NFSX_POSTOPATTR(info.v3) +
3014 					      NFSX_COOKIEVERF(info.v3) +
3015 					      2 * NFSX_UNSIGNED,
3016 					      &error));
3017 			if (info.v3) {
3018 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3019 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3020 				txdr_hyper(at.va_filerev, tl);
3021 				tl += 2;
3022 			} else
3023 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3024 			*tl++ = nfs_false;
3025 			*tl = nfs_true;
3026 			FREE((caddr_t)rbuf, M_TEMP);
3027 			FREE((caddr_t)cookies, M_TEMP);
3028 			error = 0;
3029 			goto nfsmout;
3030 		}
3031 	}
3032 
3033 	/*
3034 	 * Check for degenerate cases of nothing useful read.
3035 	 * If so go try again
3036 	 */
3037 	cpos = rbuf;
3038 	cend = rbuf + siz;
3039 	dp = (struct dirent *)cpos;
3040 	cookiep = cookies;
3041 	/*
3042 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3043 	 * directory offset up to a block boundary, so it is necessary to
3044 	 * skip over the records that preceed the requested offset. This
3045 	 * requires the assumption that file offset cookies monotonically
3046 	 * increase.
3047 	 */
3048 	while (cpos < cend && ncookies > 0 &&
3049 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3050 		 ((u_quad_t)(*cookiep)) <= toff)) {
3051 		dp = _DIRENT_NEXT(dp);
3052 		cpos = (char *)dp;
3053 		cookiep++;
3054 		ncookies--;
3055 	}
3056 	if (cpos >= cend || ncookies == 0) {
3057 		toff = off;
3058 		siz = fullsiz;
3059 		goto again;
3060 	}
3061 
3062 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3063 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3064 			      NFSX_POSTOPATTR(info.v3) +
3065 			      NFSX_COOKIEVERF(info.v3) + siz,
3066 			      &error));
3067 	if (info.v3) {
3068 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3069 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3070 		txdr_hyper(at.va_filerev, tl);
3071 	}
3072 	mp1 = mp2 = info.mb;
3073 	bp = info.bpos;
3074 	be = bp + M_TRAILINGSPACE(mp1);
3075 
3076 	/* Loop through the records and build reply */
3077 	while (cpos < cend && ncookies > 0) {
3078 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3079 			nlen = dp->d_namlen;
3080 			rem = nfsm_rndup(nlen) - nlen;
3081 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3082 			if (info.v3)
3083 				len += 2 * NFSX_UNSIGNED;
3084 			if (len > cnt) {
3085 				eofflag = 0;
3086 				break;
3087 			}
3088 			/*
3089 			 * Build the directory record xdr from
3090 			 * the dirent entry.
3091 			 */
3092 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3093 			*tl = nfs_true;
3094 			bp += NFSX_UNSIGNED;
3095 			if (info.v3) {
3096 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3097 				*tl = txdr_unsigned(dp->d_ino >> 32);
3098 				bp += NFSX_UNSIGNED;
3099 			}
3100 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3101 			*tl = txdr_unsigned(dp->d_ino);
3102 			bp += NFSX_UNSIGNED;
3103 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3104 			*tl = txdr_unsigned(nlen);
3105 			bp += NFSX_UNSIGNED;
3106 
3107 			/* And loop around copying the name */
3108 			xfer = nlen;
3109 			cp = dp->d_name;
3110 			while (xfer > 0) {
3111 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3112 				if ((bp+xfer) > be)
3113 					tsiz = be-bp;
3114 				else
3115 					tsiz = xfer;
3116 				bcopy(cp, bp, tsiz);
3117 				bp += tsiz;
3118 				xfer -= tsiz;
3119 				if (xfer > 0)
3120 					cp += tsiz;
3121 			}
3122 			/* And null pad to a int32_t boundary */
3123 			for (i = 0; i < rem; i++)
3124 				*bp++ = '\0';
3125 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3126 
3127 			/* Finish off the record */
3128 			if (info.v3) {
3129 				*tl = txdr_unsigned(*cookiep >> 32);
3130 				bp += NFSX_UNSIGNED;
3131 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3132 			}
3133 			*tl = txdr_unsigned(*cookiep);
3134 			bp += NFSX_UNSIGNED;
3135 		}
3136 		dp = _DIRENT_NEXT(dp);
3137 		cpos = (char *)dp;
3138 		cookiep++;
3139 		ncookies--;
3140 	}
3141 	vrele(vp);
3142 	vp = NULL;
3143 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3144 	*tl = nfs_false;
3145 	bp += NFSX_UNSIGNED;
3146 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3147 	if (eofflag)
3148 		*tl = nfs_true;
3149 	else
3150 		*tl = nfs_false;
3151 	bp += NFSX_UNSIGNED;
3152 	if (mp1 != info.mb) {
3153 		if (bp < be)
3154 			mp1->m_len = bp - mtod(mp1, caddr_t);
3155 	} else
3156 		mp1->m_len += bp - info.bpos;
3157 	FREE((caddr_t)rbuf, M_TEMP);
3158 	FREE((caddr_t)cookies, M_TEMP);
3159 
3160 nfsmout:
3161 	*mrq = info.mreq;
3162 	if (vp)
3163 		vrele(vp);
3164 	return(error);
3165 }
3166 
3167 int
3168 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3169 		  struct thread *td, struct mbuf **mrq)
3170 {
3171 	struct sockaddr *nam = nfsd->nd_nam;
3172 	struct ucred *cred = &nfsd->nd_cr;
3173 	char *bp, *be;
3174 	struct dirent *dp;
3175 	caddr_t cp;
3176 	u_int32_t *tl;
3177 	struct mbuf *mp1, *mp2;
3178 	char *cpos, *cend, *rbuf;
3179 	struct vnode *vp = NULL, *nvp;
3180 	struct mount *mp = NULL;
3181 	struct flrep fl;
3182 	nfsfh_t nfh;
3183 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3184 	struct uio io;
3185 	struct iovec iv;
3186 	struct vattr va, at, *vap = &va;
3187 	struct nfs_fattr *fp;
3188 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3189 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3190 	u_quad_t off, toff, verf;
3191 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3192 	struct nfsm_info info;
3193 
3194 	info.mrep = nfsd->nd_mrep;
3195 	info.mreq = NULL;
3196 	info.md = nfsd->nd_md;
3197 	info.dpos = nfsd->nd_dpos;
3198 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3199 
3200 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3201 	fhp = &nfh.fh_generic;
3202 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3203 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3204 	toff = fxdr_hyper(tl);
3205 	tl += 2;
3206 	verf = fxdr_hyper(tl);
3207 	tl += 2;
3208 	siz = fxdr_unsigned(int, *tl++);
3209 	cnt = fxdr_unsigned(int, *tl);
3210 	off = toff;
3211 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3212 	xfer = NFS_SRVMAXDATA(nfsd);
3213 	if ((unsigned)cnt > xfer)
3214 		cnt = xfer;
3215 	if ((unsigned)siz > xfer)
3216 		siz = xfer;
3217 	fullsiz = siz;
3218 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3219 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3220 	if (!error && vp->v_type != VDIR) {
3221 		error = ENOTDIR;
3222 		vput(vp);
3223 		vp = NULL;
3224 	}
3225 	if (error) {
3226 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3227 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3228 		error = 0;
3229 		goto nfsmout;
3230 	}
3231 	error = getret = VOP_GETATTR(vp, &at);
3232 #if 0
3233 	/*
3234 	 * XXX This check may be too strict for Solaris 2.5 clients.
3235 	 */
3236 	if (!error && toff && verf && verf != at.va_filerev)
3237 		error = NFSERR_BAD_COOKIE;
3238 #endif
3239 	if (!error) {
3240 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3241 	}
3242 	if (error) {
3243 		vput(vp);
3244 		vp = NULL;
3245 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3246 				      NFSX_V3POSTOPATTR, &error));
3247 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3248 		error = 0;
3249 		goto nfsmout;
3250 	}
3251 	vn_unlock(vp);
3252 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3253 again:
3254 	iv.iov_base = rbuf;
3255 	iv.iov_len = fullsiz;
3256 	io.uio_iov = &iv;
3257 	io.uio_iovcnt = 1;
3258 	io.uio_offset = (off_t)off;
3259 	io.uio_resid = fullsiz;
3260 	io.uio_segflg = UIO_SYSSPACE;
3261 	io.uio_rw = UIO_READ;
3262 	io.uio_td = NULL;
3263 	eofflag = 0;
3264 	if (cookies) {
3265 		kfree((caddr_t)cookies, M_TEMP);
3266 		cookies = NULL;
3267 	}
3268 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3269 	off = (u_quad_t)io.uio_offset;
3270 	getret = VOP_GETATTR(vp, &at);
3271 	if (!cookies && !error)
3272 		error = NFSERR_PERM;
3273 	if (!error)
3274 		error = getret;
3275 	if (error) {
3276 		vrele(vp);
3277 		vp = NULL;
3278 		if (cookies)
3279 			kfree((caddr_t)cookies, M_TEMP);
3280 		kfree((caddr_t)rbuf, M_TEMP);
3281 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3282 				      NFSX_V3POSTOPATTR, &error));
3283 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3284 		error = 0;
3285 		goto nfsmout;
3286 	}
3287 	if (io.uio_resid) {
3288 		siz -= io.uio_resid;
3289 
3290 		/*
3291 		 * If nothing read, return eof
3292 		 * rpc reply
3293 		 */
3294 		if (siz == 0) {
3295 			vrele(vp);
3296 			vp = NULL;
3297 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3298 					      NFSX_V3POSTOPATTR +
3299 					      NFSX_V3COOKIEVERF +
3300 					      2 * NFSX_UNSIGNED,
3301 					      &error));
3302 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3303 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3304 			txdr_hyper(at.va_filerev, tl);
3305 			tl += 2;
3306 			*tl++ = nfs_false;
3307 			*tl = nfs_true;
3308 			FREE((caddr_t)cookies, M_TEMP);
3309 			FREE((caddr_t)rbuf, M_TEMP);
3310 			error = 0;
3311 			goto nfsmout;
3312 		}
3313 	}
3314 
3315 	/*
3316 	 * Check for degenerate cases of nothing useful read.
3317 	 * If so go try again
3318 	 */
3319 	cpos = rbuf;
3320 	cend = rbuf + siz;
3321 	dp = (struct dirent *)cpos;
3322 	cookiep = cookies;
3323 	/*
3324 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3325 	 * directory offset up to a block boundary, so it is necessary to
3326 	 * skip over the records that preceed the requested offset. This
3327 	 * requires the assumption that file offset cookies monotonically
3328 	 * increase.
3329 	 */
3330 	while (cpos < cend && ncookies > 0 &&
3331 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3332 		 ((u_quad_t)(*cookiep)) <= toff)) {
3333 		dp = _DIRENT_NEXT(dp);
3334 		cpos = (char *)dp;
3335 		cookiep++;
3336 		ncookies--;
3337 	}
3338 	if (cpos >= cend || ncookies == 0) {
3339 		toff = off;
3340 		siz = fullsiz;
3341 		goto again;
3342 	}
3343 
3344 	/*
3345 	 * Probe one of the directory entries to see if the filesystem
3346 	 * supports VGET.
3347 	 */
3348 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3349 		error = NFSERR_NOTSUPP;
3350 		vrele(vp);
3351 		vp = NULL;
3352 		kfree((caddr_t)cookies, M_TEMP);
3353 		kfree((caddr_t)rbuf, M_TEMP);
3354 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3355 				      NFSX_V3POSTOPATTR, &error));
3356 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3357 		error = 0;
3358 		goto nfsmout;
3359 	}
3360 	if (nvp) {
3361 		vput(nvp);
3362 		nvp = NULL;
3363 	}
3364 
3365 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3366 			2 * NFSX_UNSIGNED;
3367 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3368 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3369 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3370 	txdr_hyper(at.va_filerev, tl);
3371 	mp1 = mp2 = info.mb;
3372 	bp = info.bpos;
3373 	be = bp + M_TRAILINGSPACE(mp1);
3374 
3375 	/* Loop through the records and build reply */
3376 	while (cpos < cend && ncookies > 0) {
3377 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3378 			nlen = dp->d_namlen;
3379 			rem = nfsm_rndup(nlen) - nlen;
3380 
3381 			/*
3382 			 * For readdir_and_lookup get the vnode using
3383 			 * the file number.
3384 			 */
3385 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3386 				goto invalid;
3387 			bzero((caddr_t)nfhp, NFSX_V3FH);
3388 			nfhp->fh_fsid = fhp->fh_fsid;
3389 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3390 				vput(nvp);
3391 				nvp = NULL;
3392 				goto invalid;
3393 			}
3394 			if (VOP_GETATTR(nvp, vap)) {
3395 				vput(nvp);
3396 				nvp = NULL;
3397 				goto invalid;
3398 			}
3399 			vput(nvp);
3400 			nvp = NULL;
3401 
3402 			/*
3403 			 * If either the dircount or maxcount will be
3404 			 * exceeded, get out now. Both of these lengths
3405 			 * are calculated conservatively, including all
3406 			 * XDR overheads.
3407 			 */
3408 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3409 				NFSX_V3POSTOPATTR);
3410 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3411 			if (len > cnt || dirlen > fullsiz) {
3412 				eofflag = 0;
3413 				break;
3414 			}
3415 
3416 			/*
3417 			 * Build the directory record xdr from
3418 			 * the dirent entry.
3419 			 */
3420 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3421 			nfsm_srvfattr(nfsd, vap, fp);
3422 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3423 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3424 			fl.fl_postopok = nfs_true;
3425 			fl.fl_fhok = nfs_true;
3426 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3427 
3428 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3429 			*tl = nfs_true;
3430 			bp += NFSX_UNSIGNED;
3431 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3432 			*tl = txdr_unsigned(dp->d_ino >> 32);
3433 			bp += NFSX_UNSIGNED;
3434 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3435 			*tl = txdr_unsigned(dp->d_ino);
3436 			bp += NFSX_UNSIGNED;
3437 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3438 			*tl = txdr_unsigned(nlen);
3439 			bp += NFSX_UNSIGNED;
3440 
3441 			/* And loop around copying the name */
3442 			xfer = nlen;
3443 			cp = dp->d_name;
3444 			while (xfer > 0) {
3445 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3446 				if ((bp + xfer) > be)
3447 					tsiz = be - bp;
3448 				else
3449 					tsiz = xfer;
3450 				bcopy(cp, bp, tsiz);
3451 				bp += tsiz;
3452 				xfer -= tsiz;
3453 				cp += tsiz;
3454 			}
3455 			/* And null pad to a int32_t boundary */
3456 			for (i = 0; i < rem; i++)
3457 				*bp++ = '\0';
3458 
3459 			/*
3460 			 * Now copy the flrep structure out.
3461 			 */
3462 			xfer = sizeof (struct flrep);
3463 			cp = (caddr_t)&fl;
3464 			while (xfer > 0) {
3465 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3466 				if ((bp + xfer) > be)
3467 					tsiz = be - bp;
3468 				else
3469 					tsiz = xfer;
3470 				bcopy(cp, bp, tsiz);
3471 				bp += tsiz;
3472 				xfer -= tsiz;
3473 				cp += tsiz;
3474 			}
3475 		}
3476 invalid:
3477 		dp = _DIRENT_NEXT(dp);
3478 		cpos = (char *)dp;
3479 		cookiep++;
3480 		ncookies--;
3481 	}
3482 	vrele(vp);
3483 	vp = NULL;
3484 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3485 	*tl = nfs_false;
3486 	bp += NFSX_UNSIGNED;
3487 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3488 	if (eofflag)
3489 		*tl = nfs_true;
3490 	else
3491 		*tl = nfs_false;
3492 	bp += NFSX_UNSIGNED;
3493 	if (mp1 != info.mb) {
3494 		if (bp < be)
3495 			mp1->m_len = bp - mtod(mp1, caddr_t);
3496 	} else
3497 		mp1->m_len += bp - info.bpos;
3498 	FREE((caddr_t)cookies, M_TEMP);
3499 	FREE((caddr_t)rbuf, M_TEMP);
3500 nfsmout:
3501 	*mrq = info.mreq;
3502 	if (vp)
3503 		vrele(vp);
3504 	return(error);
3505 }
3506 
3507 /*
3508  * nfs commit service
3509  */
3510 int
3511 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3512 	     struct thread *td, struct mbuf **mrq)
3513 {
3514 	struct sockaddr *nam = nfsd->nd_nam;
3515 	struct ucred *cred = &nfsd->nd_cr;
3516 	struct vattr bfor, aft;
3517 	struct vnode *vp = NULL;
3518 	struct mount *mp = NULL;
3519 	nfsfh_t nfh;
3520 	fhandle_t *fhp;
3521 	u_int32_t *tl;
3522 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3523 	u_quad_t off;
3524 	struct nfsm_info info;
3525 
3526 	info.mrep = nfsd->nd_mrep;
3527 	info.mreq = NULL;
3528 	info.md = nfsd->nd_md;
3529 	info.dpos = nfsd->nd_dpos;
3530 
3531 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3532 	fhp = &nfh.fh_generic;
3533 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3534 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3535 
3536 	/*
3537 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3538 	 * count parameters, so these arguments are useless (someday maybe).
3539 	 */
3540 	off = fxdr_hyper(tl);
3541 	tl += 2;
3542 	cnt = fxdr_unsigned(int, *tl);
3543 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3544 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3545 	if (error) {
3546 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3547 				      2 * NFSX_UNSIGNED, &error));
3548 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3549 				 aft_ret, &aft);
3550 		error = 0;
3551 		goto nfsmout;
3552 	}
3553 	for_ret = VOP_GETATTR(vp, &bfor);
3554 
3555 	if (cnt > MAX_COMMIT_COUNT) {
3556 		/*
3557 		 * Give up and do the whole thing
3558 		 */
3559 		if (vp->v_object &&
3560 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3561 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3562 		}
3563 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3564 	} else {
3565 		/*
3566 		 * Locate and synchronously write any buffers that fall
3567 		 * into the requested range.  Note:  we are assuming that
3568 		 * f_iosize is a power of 2.
3569 		 */
3570 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3571 		int iomask = iosize - 1;
3572 		off_t loffset;
3573 
3574 		/*
3575 		 * Align to iosize boundry, super-align to page boundry.
3576 		 */
3577 		if (off & iomask) {
3578 			cnt += off & iomask;
3579 			off &= ~(u_quad_t)iomask;
3580 		}
3581 		if (off & PAGE_MASK) {
3582 			cnt += off & PAGE_MASK;
3583 			off &= ~(u_quad_t)PAGE_MASK;
3584 		}
3585 		loffset = off;
3586 
3587 		if (vp->v_object &&
3588 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3589 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3590 		}
3591 
3592 		crit_enter();
3593 		while (cnt > 0) {
3594 			struct buf *bp;
3595 
3596 			/*
3597 			 * If we have a buffer and it is marked B_DELWRI we
3598 			 * have to lock and write it.  Otherwise the prior
3599 			 * write is assumed to have already been committed.
3600 			 *
3601 			 * WARNING: FINDBLK_TEST buffers represent stable
3602 			 *	    storage but not necessarily stable
3603 			 *	    content.  It is ok in this case.
3604 			 */
3605 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3606 				if (bp->b_flags & B_DELWRI)
3607 					bp = findblk(vp, loffset, 0);
3608 				else
3609 					bp = NULL;
3610 			}
3611 			if (bp) {
3612 				if (bp->b_flags & B_DELWRI) {
3613 					bremfree(bp);
3614 					bwrite(bp);
3615 					++nfs_commit_miss;
3616 				} else {
3617 					BUF_UNLOCK(bp);
3618 				}
3619 			}
3620 			++nfs_commit_blks;
3621 			if (cnt < iosize)
3622 				break;
3623 			cnt -= iosize;
3624 			loffset += iosize;
3625 		}
3626 		crit_exit();
3627 	}
3628 
3629 	aft_ret = VOP_GETATTR(vp, &aft);
3630 	vput(vp);
3631 	vp = NULL;
3632 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3633 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3634 			      &error));
3635 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3636 			 aft_ret, &aft);
3637 	if (!error) {
3638 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3639 		if (nfsver.tv_sec == 0)
3640 			nfsver = boottime;
3641 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3642 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3643 	} else {
3644 		error = 0;
3645 	}
3646 nfsmout:
3647 	*mrq = info.mreq;
3648 	if (vp)
3649 		vput(vp);
3650 	return(error);
3651 }
3652 
3653 /*
3654  * nfs statfs service
3655  */
3656 int
3657 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3658 	     struct thread *td, struct mbuf **mrq)
3659 {
3660 	struct sockaddr *nam = nfsd->nd_nam;
3661 	struct ucred *cred = &nfsd->nd_cr;
3662 	struct statfs *sf;
3663 	struct nfs_statfs *sfp;
3664 	int error = 0, rdonly, getret = 1;
3665 	struct vnode *vp = NULL;
3666 	struct mount *mp = NULL;
3667 	struct vattr at;
3668 	nfsfh_t nfh;
3669 	fhandle_t *fhp;
3670 	struct statfs statfs;
3671 	u_quad_t tval;
3672 	struct nfsm_info info;
3673 
3674 	info.mrep = nfsd->nd_mrep;
3675 	info.mreq = NULL;
3676 	info.md = nfsd->nd_md;
3677 	info.dpos = nfsd->nd_dpos;
3678 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3679 
3680 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3681 	fhp = &nfh.fh_generic;
3682 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3683 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3684 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3685 	if (error) {
3686 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3687 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3688 		error = 0;
3689 		goto nfsmout;
3690 	}
3691 	sf = &statfs;
3692 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3693 	getret = VOP_GETATTR(vp, &at);
3694 	vput(vp);
3695 	vp = NULL;
3696 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3697 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3698 			      &error));
3699 	if (info.v3)
3700 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3701 	if (error) {
3702 		error = 0;
3703 		goto nfsmout;
3704 	}
3705 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3706 	if (info.v3) {
3707 		tval = (u_quad_t)sf->f_blocks;
3708 		tval *= (u_quad_t)sf->f_bsize;
3709 		txdr_hyper(tval, &sfp->sf_tbytes);
3710 		tval = (u_quad_t)sf->f_bfree;
3711 		tval *= (u_quad_t)sf->f_bsize;
3712 		txdr_hyper(tval, &sfp->sf_fbytes);
3713 		tval = (u_quad_t)sf->f_bavail;
3714 		tval *= (u_quad_t)sf->f_bsize;
3715 		txdr_hyper(tval, &sfp->sf_abytes);
3716 		sfp->sf_tfiles.nfsuquad[0] = 0;
3717 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3718 		sfp->sf_ffiles.nfsuquad[0] = 0;
3719 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3720 		sfp->sf_afiles.nfsuquad[0] = 0;
3721 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3722 		sfp->sf_invarsec = 0;
3723 	} else {
3724 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3725 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3726 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3727 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3728 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3729 	}
3730 nfsmout:
3731 	*mrq = info.mreq;
3732 	if (vp)
3733 		vput(vp);
3734 	return(error);
3735 }
3736 
3737 /*
3738  * nfs fsinfo service
3739  */
3740 int
3741 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3742 	     struct thread *td, struct mbuf **mrq)
3743 {
3744 	struct sockaddr *nam = nfsd->nd_nam;
3745 	struct ucred *cred = &nfsd->nd_cr;
3746 	struct nfsv3_fsinfo *sip;
3747 	int error = 0, rdonly, getret = 1, pref;
3748 	struct vnode *vp = NULL;
3749 	struct mount *mp = NULL;
3750 	struct vattr at;
3751 	nfsfh_t nfh;
3752 	fhandle_t *fhp;
3753 	u_quad_t maxfsize;
3754 	struct statfs sb;
3755 	struct nfsm_info info;
3756 
3757 	info.mrep = nfsd->nd_mrep;
3758 	info.mreq = NULL;
3759 	info.md = nfsd->nd_md;
3760 	info.dpos = nfsd->nd_dpos;
3761 
3762 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3763 	fhp = &nfh.fh_generic;
3764 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3765 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3766 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3767 	if (error) {
3768 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3769 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3770 		error = 0;
3771 		goto nfsmout;
3772 	}
3773 
3774 	/* XXX Try to make a guess on the max file size. */
3775 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3776 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3777 
3778 	getret = VOP_GETATTR(vp, &at);
3779 	vput(vp);
3780 	vp = NULL;
3781 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3782 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3783 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3784 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3785 
3786 	/*
3787 	 * XXX
3788 	 * There should be file system VFS OP(s) to get this information.
3789 	 * For now, assume ufs.
3790 	 */
3791 	if (slp->ns_so->so_type == SOCK_DGRAM)
3792 		pref = NFS_MAXDGRAMDATA;
3793 	else
3794 		pref = NFS_MAXDATA;
3795 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3796 	sip->fs_rtpref = txdr_unsigned(pref);
3797 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3798 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3799 	sip->fs_wtpref = txdr_unsigned(pref);
3800 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3801 	sip->fs_dtpref = txdr_unsigned(pref);
3802 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3803 	sip->fs_timedelta.nfsv3_sec = 0;
3804 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3805 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3806 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3807 		NFSV3FSINFO_CANSETTIME);
3808 nfsmout:
3809 	*mrq = info.mreq;
3810 	if (vp)
3811 		vput(vp);
3812 	return(error);
3813 }
3814 
3815 /*
3816  * nfs pathconf service
3817  */
3818 int
3819 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3820 	       struct thread *td, struct mbuf **mrq)
3821 {
3822 	struct sockaddr *nam = nfsd->nd_nam;
3823 	struct ucred *cred = &nfsd->nd_cr;
3824 	struct nfsv3_pathconf *pc;
3825 	int error = 0, rdonly, getret = 1;
3826 	register_t linkmax, namemax, chownres, notrunc;
3827 	struct vnode *vp = NULL;
3828 	struct mount *mp = NULL;
3829 	struct vattr at;
3830 	nfsfh_t nfh;
3831 	fhandle_t *fhp;
3832 	struct nfsm_info info;
3833 
3834 	info.mrep = nfsd->nd_mrep;
3835 	info.mreq = NULL;
3836 	info.md = nfsd->nd_md;
3837 	info.dpos = nfsd->nd_dpos;
3838 
3839 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3840 	fhp = &nfh.fh_generic;
3841 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3842 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3843 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3844 	if (error) {
3845 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3846 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3847 		error = 0;
3848 		goto nfsmout;
3849 	}
3850 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3851 	if (!error)
3852 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3853 	if (!error)
3854 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3855 	if (!error)
3856 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3857 	getret = VOP_GETATTR(vp, &at);
3858 	vput(vp);
3859 	vp = NULL;
3860 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3861 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3862 			      &error));
3863 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3864 	if (error) {
3865 		error = 0;
3866 		goto nfsmout;
3867 	}
3868 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3869 
3870 	pc->pc_linkmax = txdr_unsigned(linkmax);
3871 	pc->pc_namemax = txdr_unsigned(namemax);
3872 	pc->pc_notrunc = txdr_unsigned(notrunc);
3873 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3874 
3875 	/*
3876 	 * These should probably be supported by VOP_PATHCONF(), but
3877 	 * until msdosfs is exportable (why would you want to?), the
3878 	 * Unix defaults should be ok.
3879 	 */
3880 	pc->pc_caseinsensitive = nfs_false;
3881 	pc->pc_casepreserving = nfs_true;
3882 nfsmout:
3883 	*mrq = info.mreq;
3884 	if (vp)
3885 		vput(vp);
3886 	return(error);
3887 }
3888 
3889 /*
3890  * Null operation, used by clients to ping server
3891  */
3892 /* ARGSUSED */
3893 int
3894 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3895 	   struct thread *td, struct mbuf **mrq)
3896 {
3897 	struct nfsm_info info;
3898 	int error = NFSERR_RETVOID;
3899 
3900 	info.mrep = nfsd->nd_mrep;
3901 	info.mreq = NULL;
3902 
3903 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3904 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3905 nfsmout:
3906 	*mrq = info.mreq;
3907 	return (error);
3908 }
3909 
3910 /*
3911  * No operation, used for obsolete procedures
3912  */
3913 /* ARGSUSED */
3914 int
3915 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3916 	   struct thread *td, struct mbuf **mrq)
3917 {
3918 	struct nfsm_info info;
3919 	int error;
3920 
3921 	info.mrep = nfsd->nd_mrep;
3922 	info.mreq = NULL;
3923 
3924 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3925 	if (nfsd->nd_repstat)
3926 		error = nfsd->nd_repstat;
3927 	else
3928 		error = EPROCUNAVAIL;
3929 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3930 	error = 0;
3931 nfsmout:
3932 	*mrq = info.mreq;
3933 	return (error);
3934 }
3935 
3936 /*
3937  * Perform access checking for vnodes obtained from file handles that would
3938  * refer to files already opened by a Unix client. You cannot just use
3939  * vn_writechk() and VOP_ACCESS() for two reasons.
3940  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3941  * 2 - The owner is to be given access irrespective of mode bits for some
3942  *     operations, so that processes that chmod after opening a file don't
3943  *     break. I don't like this because it opens a security hole, but since
3944  *     the nfs server opens a security hole the size of a barn door anyhow,
3945  *     what the heck.
3946  *
3947  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3948  * will return EPERM instead of EACCESS. EPERM is always an error.
3949  */
3950 static int
3951 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3952 	     int rdonly, struct thread *td, int override)
3953 {
3954 	struct vattr vattr;
3955 	int error;
3956 
3957 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3958 	if (flags & VWRITE) {
3959 		/* Just vn_writechk() changed to check rdonly */
3960 		/*
3961 		 * Disallow write attempts on read-only file systems;
3962 		 * unless the file is a socket or a block or character
3963 		 * device resident on the file system.
3964 		 */
3965 		if (rdonly ||
3966 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3967 			switch (vp->v_type) {
3968 			case VREG:
3969 			case VDIR:
3970 			case VLNK:
3971 				return (EROFS);
3972 			default:
3973 				break;
3974 			}
3975 		}
3976 		/*
3977 		 * If there's shared text associated with
3978 		 * the inode, we can't allow writing.
3979 		 */
3980 		if (vp->v_flag & VTEXT)
3981 			return (ETXTBSY);
3982 	}
3983 	error = VOP_GETATTR(vp, &vattr);
3984 	if (error)
3985 		return (error);
3986 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
3987 	/*
3988 	 * Allow certain operations for the owner (reads and writes
3989 	 * on files that are already open).
3990 	 */
3991 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3992 		error = 0;
3993 	return error;
3994 }
3995 #endif /* NFS_NOSERVER */
3996 
3997