xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision 73e0051e)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84 
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89 
90 #include <sys/buf2.h>
91 
92 #include <sys/thread2.h>
93 
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99 
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)	kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105 
106 #define MAX_COMMIT_COUNT	(1024 * 1024)
107 
108 #define NUM_HEURISTIC		1017
109 #define NHUSE_INIT		64
110 #define NHUSE_INC		16
111 #define NHUSE_MAX		2048
112 
113 static struct nfsheur {
114     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
115     off_t nh_nextr;		/* next offset for sequential detection */
116     int nh_use;			/* use count for selection */
117     int nh_seqcount;		/* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119 
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121 		      NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124 		      NFCHR, NFNON };
125 
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128 
129 static struct timespec	nfsver;
130 
131 SYSCTL_DECL(_vfs_nfs);
132 
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139 
140 static int nfsrv_access (struct mount *, struct vnode *, int,
141 			struct ucred *, int, struct thread *, int);
142 static void nfsrvw_coalesce (struct nfsrv_descript *,
143 		struct nfsrv_descript *);
144 
145 /*
146  * nfs v3 access service
147  */
148 int
149 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
150 	      struct thread *td, struct mbuf **mrq)
151 {
152 	struct sockaddr *nam = nfsd->nd_nam;
153 	struct ucred *cred = &nfsd->nd_cr;
154 	struct vnode *vp = NULL;
155 	struct mount *mp = NULL;
156 	nfsfh_t nfh;
157 	fhandle_t *fhp;
158 	int error = 0, rdonly, getret;
159 	struct vattr vattr, *vap = &vattr;
160 	u_long testmode, nfsmode;
161 	struct nfsm_info info;
162 	u_int32_t *tl;
163 
164 	info.dpos = nfsd->nd_dpos;
165 	info.md = nfsd->nd_md;
166 	info.mrep = nfsd->nd_mrep;
167 	info.mreq = NULL;
168 
169 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
170 	fhp = &nfh.fh_generic;
171 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
172 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
173 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
174 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
175 	if (error) {
176 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
177 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
178 		error = 0;
179 		goto nfsmout;
180 	}
181 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
182 	if ((nfsmode & NFSV3ACCESS_READ) &&
183 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
184 		nfsmode &= ~NFSV3ACCESS_READ;
185 	if (vp->v_type == VDIR)
186 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
187 			NFSV3ACCESS_DELETE);
188 	else
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190 	if ((nfsmode & testmode) &&
191 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
192 		nfsmode &= ~testmode;
193 	if (vp->v_type == VDIR)
194 		testmode = NFSV3ACCESS_LOOKUP;
195 	else
196 		testmode = NFSV3ACCESS_EXECUTE;
197 	if ((nfsmode & testmode) &&
198 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
199 		nfsmode &= ~testmode;
200 	getret = VOP_GETATTR(vp, vap);
201 	vput(vp);
202 	vp = NULL;
203 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
204 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
205 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
206 	tl = nfsm_build(&info, NFSX_UNSIGNED);
207 	*tl = txdr_unsigned(nfsmode);
208 nfsmout:
209 	*mrq = info.mreq;
210 	if (vp)
211 		vput(vp);
212 	return(error);
213 }
214 
215 /*
216  * nfs getattr service
217  */
218 int
219 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
220 	      struct thread *td, struct mbuf **mrq)
221 {
222 	struct sockaddr *nam = nfsd->nd_nam;
223 	struct ucred *cred = &nfsd->nd_cr;
224 	struct nfs_fattr *fp;
225 	struct vattr va;
226 	struct vattr *vap = &va;
227 	struct vnode *vp = NULL;
228 	struct mount *mp = NULL;
229 	nfsfh_t nfh;
230 	fhandle_t *fhp;
231 	int error = 0, rdonly;
232 	struct nfsm_info info;
233 
234 	info.mrep = nfsd->nd_mrep;
235 	info.md = nfsd->nd_md;
236 	info.dpos = nfsd->nd_dpos;
237 	info.mreq = NULL;
238 
239 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
240 	fhp = &nfh.fh_generic;
241 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
242 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
243 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
244 	if (error) {
245 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
246 		error = 0;
247 		goto nfsmout;
248 	}
249 	error = VOP_GETATTR(vp, vap);
250 	vput(vp);
251 	vp = NULL;
252 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
253 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
254 	if (error) {
255 		error = 0;
256 		goto nfsmout;
257 	}
258 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259 	nfsm_srvfattr(nfsd, vap, fp);
260 	/* fall through */
261 
262 nfsmout:
263 	*mrq = info.mreq;
264 	if (vp)
265 		vput(vp);
266 	return(error);
267 }
268 
269 /*
270  * nfs setattr service
271  */
272 int
273 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
274 	      struct thread *td, struct mbuf **mrq)
275 {
276 	struct sockaddr *nam = nfsd->nd_nam;
277 	struct ucred *cred = &nfsd->nd_cr;
278 	struct vattr va, preat;
279 	struct vattr *vap = &va;
280 	struct nfsv2_sattr *sp;
281 	struct nfs_fattr *fp;
282 	struct vnode *vp = NULL;
283 	struct mount *mp = NULL;
284 	nfsfh_t nfh;
285 	fhandle_t *fhp;
286 	u_int32_t *tl;
287 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
288 	int gcheck = 0;
289 	struct timespec guard;
290 	struct nfsm_info info;
291 
292 	info.mrep = nfsd->nd_mrep;
293 	info.mreq = NULL;
294 	info.md = nfsd->nd_md;
295 	info.dpos = nfsd->nd_dpos;
296 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
297 
298 	guard.tv_sec = 0;	/* fix compiler warning */
299 	guard.tv_nsec = 0;
300 
301 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
302 	fhp = &nfh.fh_generic;
303 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
304 	VATTR_NULL(vap);
305 	if (info.v3) {
306 		ERROROUT(nfsm_srvsattr(&info, vap));
307 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
308 		gcheck = fxdr_unsigned(int, *tl);
309 		if (gcheck) {
310 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
311 			fxdr_nfsv3time(tl, &guard);
312 		}
313 	} else {
314 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
315 		/*
316 		 * Nah nah nah nah na nah
317 		 * There is a bug in the Sun client that puts 0xffff in the mode
318 		 * field of sattr when it should put in 0xffffffff. The u_short
319 		 * doesn't sign extend.
320 		 * --> check the low order 2 bytes for 0xffff
321 		 */
322 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
323 			vap->va_mode = nfstov_mode(sp->sa_mode);
324 		if (sp->sa_uid != nfs_xdrneg1)
325 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
326 		if (sp->sa_gid != nfs_xdrneg1)
327 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
328 		if (sp->sa_size != nfs_xdrneg1)
329 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
330 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
331 #ifdef notyet
332 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
333 #else
334 			vap->va_atime.tv_sec =
335 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
336 			vap->va_atime.tv_nsec = 0;
337 #endif
338 		}
339 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
340 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
341 
342 	}
343 
344 	/*
345 	 * Now that we have all the fields, lets do it.
346 	 */
347 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
348 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
349 	if (error) {
350 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
351 				      2 * NFSX_UNSIGNED, &error));
352 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
353 				 postat_ret, vap);
354 		error = 0;
355 		goto nfsmout;
356 	}
357 
358 	/*
359 	 * vp now an active resource, pay careful attention to cleanup
360 	 */
361 
362 	if (info.v3) {
363 		error = preat_ret = VOP_GETATTR(vp, &preat);
364 		if (!error && gcheck &&
365 			(preat.va_ctime.tv_sec != guard.tv_sec ||
366 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
367 			error = NFSERR_NOT_SYNC;
368 		if (error) {
369 			vput(vp);
370 			vp = NULL;
371 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
372 					      NFSX_WCCDATA(info.v3), &error));
373 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
374 					 postat_ret, vap);
375 			error = 0;
376 			goto nfsmout;
377 		}
378 	}
379 
380 	/*
381 	 * If the size is being changed write acces is required, otherwise
382 	 * just check for a read only file system.
383 	 */
384 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
385 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
386 			error = EROFS;
387 			goto out;
388 		}
389 	} else {
390 		if (vp->v_type == VDIR) {
391 			error = EISDIR;
392 			goto out;
393 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
394 			    td, 0)) != 0){
395 			goto out;
396 		}
397 	}
398 	error = VOP_SETATTR(vp, vap, cred);
399 	postat_ret = VOP_GETATTR(vp, vap);
400 	if (!error)
401 		error = postat_ret;
402 out:
403 	vput(vp);
404 	vp = NULL;
405 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
406 		   NFSX_WCCORFATTR(info.v3), &error));
407 	if (info.v3) {
408 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
409 				 postat_ret, vap);
410 		error = 0;
411 		goto nfsmout;
412 	} else {
413 		fp = nfsm_build(&info, NFSX_V2FATTR);
414 		nfsm_srvfattr(nfsd, vap, fp);
415 	}
416 	/* fall through */
417 
418 nfsmout:
419 	*mrq = info.mreq;
420 	if (vp)
421 		vput(vp);
422 	return(error);
423 }
424 
425 /*
426  * nfs lookup rpc
427  */
428 int
429 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
430 	     struct thread *td, struct mbuf **mrq)
431 {
432 	struct sockaddr *nam = nfsd->nd_nam;
433 	struct ucred *cred = &nfsd->nd_cr;
434 	struct nfs_fattr *fp;
435 	struct nlookupdata nd;
436 	struct vnode *vp;
437 	struct vnode *dirp;
438 	struct nchandle nch;
439 	nfsfh_t nfh;
440 	fhandle_t *fhp;
441 	int error = 0, len, dirattr_ret = 1;
442 	int pubflag;
443 	struct vattr va, dirattr, *vap = &va;
444 	struct nfsm_info info;
445 
446 	info.mrep = nfsd->nd_mrep;
447 	info.mreq = NULL;
448 	info.md = nfsd->nd_md;
449 	info.dpos = nfsd->nd_dpos;
450 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
451 
452 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
453 	nlookup_zero(&nd);
454 	dirp = NULL;
455 	vp = NULL;
456 
457 	fhp = &nfh.fh_generic;
458 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
459 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
460 
461 	pubflag = nfs_ispublicfh(fhp);
462 
463 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
464 		fhp, len, slp, nam, &info.md, &info.dpos,
465 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
466 
467 	/*
468 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
469 	 * structure in case macros jump to nfsmout.
470 	 */
471 
472 	if (error) {
473 		if (dirp) {
474 			if (info.v3)
475 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
476 			vrele(dirp);
477 			dirp = NULL;
478 		}
479 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
480 				      NFSX_POSTOPATTR(info.v3), &error));
481 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
482 		error = 0;
483 		goto nfsmout;
484 	}
485 
486 	/*
487 	 * Locate index file for public filehandle
488 	 *
489 	 * error is 0 on entry and 0 on exit from this block.
490 	 */
491 
492 	if (pubflag) {
493 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
494 			/*
495 			 * Setup call to lookup() to see if we can find
496 			 * the index file. Arguably, this doesn't belong
497 			 * in a kernel.. Ugh.  If an error occurs, do not
498 			 * try to install an index file and then clear the
499 			 * error.
500 			 *
501 			 * When we replace nd with ind and redirect ndp,
502 			 * maintenance of ni_startdir and ni_vp shift to
503 			 * ind and we have to clean them up in the old nd.
504 			 * However, the cnd resource continues to be maintained
505 			 * via the original nd.  Confused?  You aren't alone!
506 			 */
507 			vn_unlock(vp);
508 			cache_copy(&nd.nl_nch, &nch);
509 			nlookup_done(&nd);
510 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
511 						UIO_SYSSPACE, 0, cred, &nch);
512 			cache_drop(&nch);
513 			if (error == 0)
514 				error = nlookup(&nd);
515 
516 			if (error == 0) {
517 				/*
518 				 * Found an index file. Get rid of
519 				 * the old references.  transfer vp and
520 				 * load up the new vp.  Fortunately we do
521 				 * not have to deal with dvp, that would be
522 				 * a huge mess.
523 				 */
524 				if (dirp)
525 					vrele(dirp);
526 				dirp = vp;
527 				vp = NULL;
528 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
529 							LK_EXCLUSIVE, &vp);
530 				KKASSERT(error == 0);
531 			}
532 			error = 0;
533 		}
534 		/*
535 		 * If the public filehandle was used, check that this lookup
536 		 * didn't result in a filehandle outside the publicly exported
537 		 * filesystem.  We clear the poor vp here to avoid lockups due
538 		 * to NFS I/O.
539 		 */
540 
541 		if (vp->v_mount != nfs_pub.np_mount) {
542 			vput(vp);
543 			vp = NULL;
544 			error = EPERM;
545 		}
546 	}
547 
548 	if (dirp) {
549 		if (info.v3)
550 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
551 		vrele(dirp);
552 		dirp = NULL;
553 	}
554 
555 	/*
556 	 * Resources at this point:
557 	 *	ndp->ni_vp	may not be NULL
558 	 *
559 	 */
560 
561 	if (error) {
562 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
563 				      NFSX_POSTOPATTR(info.v3), &error));
564 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
565 		error = 0;
566 		goto nfsmout;
567 	}
568 
569 	/*
570 	 * Clear out some resources prior to potentially blocking.  This
571 	 * is not as critical as ni_dvp resources in other routines, but
572 	 * it helps.
573 	 */
574 	nlookup_done(&nd);
575 
576 	/*
577 	 * Get underlying attribute, then release remaining resources ( for
578 	 * the same potential blocking reason ) and reply.
579 	 */
580 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
581 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
582 	if (!error)
583 		error = VOP_GETATTR(vp, vap);
584 
585 	vput(vp);
586 	vp = NULL;
587 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
588 			      NFSX_SRVFH(info.v3) +
589 			      NFSX_POSTOPORFATTR(info.v3) +
590 			      NFSX_POSTOPATTR(info.v3),
591 			      &error));
592 	if (error) {
593 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
594 		error = 0;
595 		goto nfsmout;
596 	}
597 	nfsm_srvfhtom(&info, fhp);
598 	if (info.v3) {
599 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
600 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
601 	} else {
602 		fp = nfsm_build(&info, NFSX_V2FATTR);
603 		nfsm_srvfattr(nfsd, vap, fp);
604 	}
605 
606 nfsmout:
607 	*mrq = info.mreq;
608 	if (dirp)
609 		vrele(dirp);
610 	nlookup_done(&nd);		/* may be called twice */
611 	if (vp)
612 		vput(vp);
613 	return (error);
614 }
615 
616 /*
617  * nfs readlink service
618  */
619 int
620 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
621 	       struct thread *td, struct mbuf **mrq)
622 {
623 	struct sockaddr *nam = nfsd->nd_nam;
624 	struct ucred *cred = &nfsd->nd_cr;
625 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
626 	struct iovec *ivp = iv;
627 	u_int32_t *tl;
628 	int error = 0, rdonly, i, tlen, len, getret;
629 	struct mbuf *mp1, *mp2, *mp3;
630 	struct vnode *vp = NULL;
631 	struct mount *mp = NULL;
632 	struct vattr attr;
633 	nfsfh_t nfh;
634 	fhandle_t *fhp;
635 	struct uio io, *uiop = &io;
636 	struct nfsm_info info;
637 
638 	info.mrep = nfsd->nd_mrep;
639 	info.mreq = NULL;
640 	info.md = nfsd->nd_md;
641 	info.dpos = nfsd->nd_dpos;
642 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
643 
644 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
645 #ifndef nolint
646 	mp2 = NULL;
647 #endif
648 	mp3 = NULL;
649 	fhp = &nfh.fh_generic;
650 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
651 	len = 0;
652 	i = 0;
653 	while (len < NFS_MAXPATHLEN) {
654 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
655 		mp1->m_len = MCLBYTES;
656 		if (len == 0)
657 			mp3 = mp2 = mp1;
658 		else {
659 			mp2->m_next = mp1;
660 			mp2 = mp1;
661 		}
662 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
663 			mp1->m_len = NFS_MAXPATHLEN-len;
664 			len = NFS_MAXPATHLEN;
665 		} else
666 			len += mp1->m_len;
667 		ivp->iov_base = mtod(mp1, caddr_t);
668 		ivp->iov_len = mp1->m_len;
669 		i++;
670 		ivp++;
671 	}
672 	uiop->uio_iov = iv;
673 	uiop->uio_iovcnt = i;
674 	uiop->uio_offset = 0;
675 	uiop->uio_resid = len;
676 	uiop->uio_rw = UIO_READ;
677 	uiop->uio_segflg = UIO_SYSSPACE;
678 	uiop->uio_td = NULL;
679 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
680 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
681 	if (error) {
682 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
683 				      2 * NFSX_UNSIGNED, &error));
684 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
685 		error = 0;
686 		goto nfsmout;
687 	}
688 	if (vp->v_type != VLNK) {
689 		if (info.v3)
690 			error = EINVAL;
691 		else
692 			error = ENXIO;
693 		goto out;
694 	}
695 	error = VOP_READLINK(vp, uiop, cred);
696 out:
697 	getret = VOP_GETATTR(vp, &attr);
698 	vput(vp);
699 	vp = NULL;
700 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
701 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
702 			     &error));
703 	if (info.v3) {
704 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
705 		if (error) {
706 			error = 0;
707 			goto nfsmout;
708 		}
709 	}
710 	if (uiop->uio_resid > 0) {
711 		len -= uiop->uio_resid;
712 		tlen = nfsm_rndup(len);
713 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
714 	}
715 	tl = nfsm_build(&info, NFSX_UNSIGNED);
716 	*tl = txdr_unsigned(len);
717 	info.mb->m_next = mp3;
718 	mp3 = NULL;
719 nfsmout:
720 	*mrq = info.mreq;
721 	if (mp3)
722 		m_freem(mp3);
723 	if (vp)
724 		vput(vp);
725 	return(error);
726 }
727 
728 /*
729  * nfs read service
730  */
731 int
732 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
733 	   struct thread *td, struct mbuf **mrq)
734 {
735 	struct nfsm_info info;
736 	struct sockaddr *nam = nfsd->nd_nam;
737 	struct ucred *cred = &nfsd->nd_cr;
738 	struct iovec *iv;
739 	struct iovec *iv2;
740 	struct mbuf *m;
741 	struct nfs_fattr *fp;
742 	u_int32_t *tl;
743 	int i;
744 	int reqlen;
745 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
746 	struct mbuf *m2;
747 	struct vnode *vp = NULL;
748 	struct mount *mp = NULL;
749 	nfsfh_t nfh;
750 	fhandle_t *fhp;
751 	struct uio io, *uiop = &io;
752 	struct vattr va, *vap = &va;
753 	struct nfsheur *nh;
754 	off_t off;
755 	int ioflag = 0;
756 
757 	info.mrep = nfsd->nd_mrep;
758 	info.mreq = NULL;
759 	info.md = nfsd->nd_md;
760 	info.dpos = nfsd->nd_dpos;
761 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
762 
763 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
764 	fhp = &nfh.fh_generic;
765 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
766 	if (info.v3) {
767 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
768 		off = fxdr_hyper(tl);
769 	} else {
770 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
771 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
772 	}
773 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
774 					    NFS_SRVMAXDATA(nfsd), &error));
775 
776 	/*
777 	 * Reference vp.  If an error occurs, vp will be invalid, but we
778 	 * have to NULL it just in case.  The macros might goto nfsmout
779 	 * as well.
780 	 */
781 
782 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
783 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
784 	if (error) {
785 		vp = NULL;
786 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
787 				      2 * NFSX_UNSIGNED, &error));
788 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
789 		error = 0;
790 		goto nfsmout;
791 	}
792 
793 	if (vp->v_type != VREG) {
794 		if (info.v3)
795 			error = EINVAL;
796 		else
797 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
798 	}
799 	if (!error) {
800 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
801 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
802 	}
803 	getret = VOP_GETATTR(vp, vap);
804 	if (!error)
805 		error = getret;
806 	if (error) {
807 		vput(vp);
808 		vp = NULL;
809 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
810 				      NFSX_POSTOPATTR(info.v3), &error));
811 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
812 		error = 0;
813 		goto nfsmout;
814 	}
815 
816 	/*
817 	 * Calculate byte count to read
818 	 */
819 
820 	if (off >= vap->va_size)
821 		cnt = 0;
822 	else if ((off + reqlen) > vap->va_size)
823 		cnt = vap->va_size - off;
824 	else
825 		cnt = reqlen;
826 
827 	/*
828 	 * Calculate seqcount for heuristic
829 	 */
830 
831 	{
832 		int hi;
833 		int try = 32;
834 
835 		/*
836 		 * Locate best candidate
837 		 */
838 
839 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
840 		nh = &nfsheur[hi];
841 
842 		while (try--) {
843 			if (nfsheur[hi].nh_vp == vp) {
844 				nh = &nfsheur[hi];
845 				break;
846 			}
847 			if (nfsheur[hi].nh_use > 0)
848 				--nfsheur[hi].nh_use;
849 			hi = (hi + 1) % NUM_HEURISTIC;
850 			if (nfsheur[hi].nh_use < nh->nh_use)
851 				nh = &nfsheur[hi];
852 		}
853 
854 		if (nh->nh_vp != vp) {
855 			nh->nh_vp = vp;
856 			nh->nh_nextr = off;
857 			nh->nh_use = NHUSE_INIT;
858 			if (off == 0)
859 				nh->nh_seqcount = 4;
860 			else
861 				nh->nh_seqcount = 1;
862 		}
863 
864 		/*
865 		 * Calculate heuristic
866 		 */
867 
868 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
869 			if (++nh->nh_seqcount > IO_SEQMAX)
870 				nh->nh_seqcount = IO_SEQMAX;
871 		} else if (nh->nh_seqcount > 1) {
872 			nh->nh_seqcount = 1;
873 		} else {
874 			nh->nh_seqcount = 0;
875 		}
876 		nh->nh_use += NHUSE_INC;
877 		if (nh->nh_use > NHUSE_MAX)
878 			nh->nh_use = NHUSE_MAX;
879 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
880         }
881 
882 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
883 			      NFSX_POSTOPORFATTR(info.v3) +
884 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
885 			      &error));
886 	if (info.v3) {
887 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
888 		*tl++ = nfs_true;
889 		fp = (struct nfs_fattr *)tl;
890 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
891 	} else {
892 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
893 		fp = (struct nfs_fattr *)tl;
894 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
895 	}
896 	len = left = nfsm_rndup(cnt);
897 	if (cnt > 0) {
898 		/*
899 		 * Generate the mbuf list with the uio_iov ref. to it.
900 		 */
901 		i = 0;
902 		m = m2 = info.mb;
903 		while (left > 0) {
904 			siz = min(M_TRAILINGSPACE(m), left);
905 			if (siz > 0) {
906 				left -= siz;
907 				i++;
908 			}
909 			if (left > 0) {
910 				m = m_getcl(MB_WAIT, MT_DATA, 0);
911 				m->m_len = 0;
912 				m2->m_next = m;
913 				m2 = m;
914 			}
915 		}
916 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
917 		       M_TEMP, M_WAITOK);
918 		uiop->uio_iov = iv2 = iv;
919 		m = info.mb;
920 		left = len;
921 		i = 0;
922 		while (left > 0) {
923 			if (m == NULL)
924 				panic("nfsrv_read iov");
925 			siz = min(M_TRAILINGSPACE(m), left);
926 			if (siz > 0) {
927 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
928 				iv->iov_len = siz;
929 				m->m_len += siz;
930 				left -= siz;
931 				iv++;
932 				i++;
933 			}
934 			m = m->m_next;
935 		}
936 		uiop->uio_iovcnt = i;
937 		uiop->uio_offset = off;
938 		uiop->uio_resid = len;
939 		uiop->uio_rw = UIO_READ;
940 		uiop->uio_segflg = UIO_SYSSPACE;
941 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942 		off = uiop->uio_offset;
943 		nh->nh_nextr = off;
944 		FREE((caddr_t)iv2, M_TEMP);
945 		if (error || (getret = VOP_GETATTR(vp, vap))) {
946 			if (!error)
947 				error = getret;
948 			m_freem(info.mreq);
949 			info.mreq = NULL;
950 			vput(vp);
951 			vp = NULL;
952 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953 					      NFSX_POSTOPATTR(info.v3),
954 					      &error));
955 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956 			error = 0;
957 			goto nfsmout;
958 		}
959 	} else {
960 		uiop->uio_resid = 0;
961 	}
962 	vput(vp);
963 	vp = NULL;
964 	nfsm_srvfattr(nfsd, vap, fp);
965 	tlen = len - uiop->uio_resid;
966 	cnt = cnt < tlen ? cnt : tlen;
967 	tlen = nfsm_rndup(cnt);
968 	if (len != tlen || tlen != cnt)
969 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
970 	if (info.v3) {
971 		*tl++ = txdr_unsigned(cnt);
972 		if (len < reqlen)
973 			*tl++ = nfs_true;
974 		else
975 			*tl++ = nfs_false;
976 	}
977 	*tl = txdr_unsigned(cnt);
978 nfsmout:
979 	*mrq = info.mreq;
980 	if (vp)
981 		vput(vp);
982 	return(error);
983 }
984 
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990 	    struct thread *td, struct mbuf **mrq)
991 {
992 	struct sockaddr *nam = nfsd->nd_nam;
993 	struct ucred *cred = &nfsd->nd_cr;
994 	struct iovec *ivp;
995 	int i, cnt;
996 	struct mbuf *mp1;
997 	struct nfs_fattr *fp;
998 	struct iovec *iv;
999 	struct vattr va, forat;
1000 	struct vattr *vap = &va;
1001 	u_int32_t *tl;
1002 	int error = 0, rdonly, len, forat_ret = 1;
1003 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004 	int stable = NFSV3WRITE_FILESYNC;
1005 	struct vnode *vp = NULL;
1006 	struct mount *mp = NULL;
1007 	nfsfh_t nfh;
1008 	fhandle_t *fhp;
1009 	struct uio io, *uiop = &io;
1010 	struct nfsm_info info;
1011 	off_t off;
1012 
1013 	info.mrep = nfsd->nd_mrep;
1014 	info.mreq = NULL;
1015 	info.md = nfsd->nd_md;
1016 	info.dpos = nfsd->nd_dpos;
1017 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018 
1019 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020 	if (info.mrep == NULL) {
1021 		error = 0;
1022 		goto nfsmout;
1023 	}
1024 	fhp = &nfh.fh_generic;
1025 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026 	if (info.v3) {
1027 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028 		off = fxdr_hyper(tl);
1029 		tl += 3;
1030 		stable = fxdr_unsigned(int, *tl++);
1031 	} else {
1032 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034 		tl += 2;
1035 		if (nfs_async)
1036 	    		stable = NFSV3WRITE_UNSTABLE;
1037 	}
1038 	retlen = len = fxdr_unsigned(int32_t, *tl);
1039 	cnt = i = 0;
1040 
1041 	/*
1042 	 * For NFS Version 2, it is not obvious what a write of zero length
1043 	 * should do, but I might as well be consistent with Version 3,
1044 	 * which is to return ok so long as there are no permission problems.
1045 	 */
1046 	if (len > 0) {
1047 	    zeroing = 1;
1048 	    mp1 = info.mrep;
1049 	    while (mp1) {
1050 		if (mp1 == info.md) {
1051 			zeroing = 0;
1052 			adjust = info.dpos - mtod(mp1, caddr_t);
1053 			mp1->m_len -= adjust;
1054 			if (mp1->m_len > 0 && adjust > 0)
1055 				mp1->m_data += adjust;
1056 		}
1057 		if (zeroing)
1058 			mp1->m_len = 0;
1059 		else if (mp1->m_len > 0) {
1060 			i += mp1->m_len;
1061 			if (i > len) {
1062 				mp1->m_len -= (i - len);
1063 				zeroing	= 1;
1064 			}
1065 			if (mp1->m_len > 0)
1066 				cnt++;
1067 		}
1068 		mp1 = mp1->m_next;
1069 	    }
1070 	}
1071 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1072 		error = EIO;
1073 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074 				      2 * NFSX_UNSIGNED, &error));
1075 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076 				 aftat_ret, vap);
1077 		error = 0;
1078 		goto nfsmout;
1079 	}
1080 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082 	if (error) {
1083 		vp = NULL;
1084 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085 				      2 * NFSX_UNSIGNED, &error));
1086 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087 				 aftat_ret, vap);
1088 		error = 0;
1089 		goto nfsmout;
1090 	}
1091 	if (info.v3)
1092 		forat_ret = VOP_GETATTR(vp, &forat);
1093 	if (vp->v_type != VREG) {
1094 		if (info.v3)
1095 			error = EINVAL;
1096 		else
1097 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098 	}
1099 	if (!error) {
1100 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101 	}
1102 	if (error) {
1103 		vput(vp);
1104 		vp = NULL;
1105 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106 				      NFSX_WCCDATA(info.v3), &error));
1107 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108 				 aftat_ret, vap);
1109 		error = 0;
1110 		goto nfsmout;
1111 	}
1112 
1113 	if (len > 0) {
1114 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1115 		M_WAITOK);
1116 	    uiop->uio_iov = iv = ivp;
1117 	    uiop->uio_iovcnt = cnt;
1118 	    mp1 = info.mrep;
1119 	    while (mp1) {
1120 		if (mp1->m_len > 0) {
1121 			ivp->iov_base = mtod(mp1, caddr_t);
1122 			ivp->iov_len = mp1->m_len;
1123 			ivp++;
1124 		}
1125 		mp1 = mp1->m_next;
1126 	    }
1127 
1128 	    /*
1129 	     * XXX
1130 	     * The IO_METASYNC flag indicates that all metadata (and not just
1131 	     * enough to ensure data integrity) mus be written to stable storage
1132 	     * synchronously.
1133 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1134 	     */
1135 	    if (stable == NFSV3WRITE_UNSTABLE)
1136 		ioflags = IO_NODELOCKED;
1137 	    else if (stable == NFSV3WRITE_DATASYNC)
1138 		ioflags = (IO_SYNC | IO_NODELOCKED);
1139 	    else
1140 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1141 	    uiop->uio_resid = len;
1142 	    uiop->uio_rw = UIO_WRITE;
1143 	    uiop->uio_segflg = UIO_SYSSPACE;
1144 	    uiop->uio_td = NULL;
1145 	    uiop->uio_offset = off;
1146 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1147 	    nfsstats.srvvop_writes++;
1148 	    FREE((caddr_t)iv, M_TEMP);
1149 	}
1150 	aftat_ret = VOP_GETATTR(vp, vap);
1151 	vput(vp);
1152 	vp = NULL;
1153 	if (!error)
1154 		error = aftat_ret;
1155 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1156 			      NFSX_PREOPATTR(info.v3) +
1157 			      NFSX_POSTOPORFATTR(info.v3) +
1158 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1159 			      &error));
1160 	if (info.v3) {
1161 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1162 				 aftat_ret, vap);
1163 		if (error) {
1164 			error = 0;
1165 			goto nfsmout;
1166 		}
1167 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1168 		*tl++ = txdr_unsigned(retlen);
1169 		/*
1170 		 * If nfs_async is set, then pretend the write was FILESYNC.
1171 		 */
1172 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1173 			*tl++ = txdr_unsigned(stable);
1174 		else
1175 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1176 		/*
1177 		 * Actually, there is no need to txdr these fields,
1178 		 * but it may make the values more human readable,
1179 		 * for debugging purposes.
1180 		 */
1181 		if (nfsver.tv_sec == 0)
1182 			nfsver = boottime;
1183 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1184 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1185 	} else {
1186 		fp = nfsm_build(&info, NFSX_V2FATTR);
1187 		nfsm_srvfattr(nfsd, vap, fp);
1188 	}
1189 nfsmout:
1190 	*mrq = info.mreq;
1191 	if (vp)
1192 		vput(vp);
1193 	return(error);
1194 }
1195 
1196 /*
1197  * NFS write service with write gathering support. Called when
1198  * nfsrvw_procrastinate > 0.
1199  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1200  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1201  * Jan. 1994.
1202  */
1203 int
1204 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1205 		  struct thread *td, struct mbuf **mrq)
1206 {
1207 	struct iovec *ivp;
1208 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1209 	struct nfs_fattr *fp;
1210 	int i;
1211 	struct iovec *iov;
1212 	struct nfsrvw_delayhash *wpp;
1213 	struct ucred *cred;
1214 	struct vattr va, forat;
1215 	u_int32_t *tl;
1216 	int error = 0, rdonly, len, forat_ret = 1;
1217 	int ioflags, aftat_ret = 1, adjust, zeroing;
1218 	struct mbuf *mp1;
1219 	struct vnode *vp = NULL;
1220 	struct mount *mp = NULL;
1221 	struct uio io, *uiop = &io;
1222 	u_quad_t cur_usec;
1223 	struct nfsm_info info;
1224 
1225 	info.mreq = NULL;
1226 
1227 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1228 #ifndef nolint
1229 	i = 0;
1230 	len = 0;
1231 #endif
1232 	*mrq = NULL;
1233 	if (*ndp) {
1234 	    nfsd = *ndp;
1235 	    *ndp = NULL;
1236 	    info.mrep = nfsd->nd_mrep;
1237 	    info.mreq = NULL;
1238 	    info.md = nfsd->nd_md;
1239 	    info.dpos = nfsd->nd_dpos;
1240 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1241 	    cred = &nfsd->nd_cr;
1242 	    LIST_INIT(&nfsd->nd_coalesce);
1243 	    nfsd->nd_mreq = NULL;
1244 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1245 	    cur_usec = nfs_curusec();
1246 	    nfsd->nd_time = cur_usec +
1247 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1248 
1249 	    /*
1250 	     * Now, get the write header..
1251 	     */
1252 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1253 	    if (info.v3) {
1254 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1255 		nfsd->nd_off = fxdr_hyper(tl);
1256 		tl += 3;
1257 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1258 	    } else {
1259 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1260 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1261 		tl += 2;
1262 		if (nfs_async)
1263 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1264 	    }
1265 	    len = fxdr_unsigned(int32_t, *tl);
1266 	    nfsd->nd_len = len;
1267 	    nfsd->nd_eoff = nfsd->nd_off + len;
1268 
1269 	    /*
1270 	     * Trim the header out of the mbuf list and trim off any trailing
1271 	     * junk so that the mbuf list has only the write data.
1272 	     */
1273 	    zeroing = 1;
1274 	    i = 0;
1275 	    mp1 = info.mrep;
1276 	    while (mp1) {
1277 		if (mp1 == info.md) {
1278 		    zeroing = 0;
1279 		    adjust = info.dpos - mtod(mp1, caddr_t);
1280 		    mp1->m_len -= adjust;
1281 		    if (mp1->m_len > 0 && adjust > 0)
1282 			mp1->m_data += adjust;
1283 		}
1284 		if (zeroing)
1285 		    mp1->m_len = 0;
1286 		else {
1287 		    i += mp1->m_len;
1288 		    if (i > len) {
1289 			mp1->m_len -= (i - len);
1290 			zeroing = 1;
1291 		    }
1292 		}
1293 		mp1 = mp1->m_next;
1294 	    }
1295 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1296 nfsmout:
1297 		m_freem(info.mrep);
1298 		info.mrep = NULL;
1299 		error = EIO;
1300 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1301 		if (info.v3) {
1302 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1303 				     aftat_ret, &va);
1304 		}
1305 		nfsd->nd_mreq = info.mreq;
1306 		nfsd->nd_mrep = NULL;
1307 		nfsd->nd_time = 0;
1308 	    }
1309 
1310 	    /*
1311 	     * Add this entry to the hash and time queues.
1312 	     */
1313 	    crit_enter();
1314 	    owp = NULL;
1315 	    wp = slp->ns_tq.lh_first;
1316 	    while (wp && wp->nd_time < nfsd->nd_time) {
1317 		owp = wp;
1318 		wp = wp->nd_tq.le_next;
1319 	    }
1320 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1321 	    if (owp) {
1322 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1323 	    } else {
1324 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1325 	    }
1326 	    if (nfsd->nd_mrep) {
1327 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1328 		owp = NULL;
1329 		wp = wpp->lh_first;
1330 		while (wp &&
1331 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1332 		    owp = wp;
1333 		    wp = wp->nd_hash.le_next;
1334 		}
1335 		while (wp && wp->nd_off < nfsd->nd_off &&
1336 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1337 		    owp = wp;
1338 		    wp = wp->nd_hash.le_next;
1339 		}
1340 		if (owp) {
1341 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1342 
1343 		    /*
1344 		     * Search the hash list for overlapping entries and
1345 		     * coalesce.
1346 		     */
1347 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1348 			wp = nfsd->nd_hash.le_next;
1349 			if (NFSW_SAMECRED(owp, nfsd))
1350 			    nfsrvw_coalesce(owp, nfsd);
1351 		    }
1352 		} else {
1353 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1354 		}
1355 	    }
1356 	    crit_exit();
1357 	}
1358 
1359 	/*
1360 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1361 	 * and generate the associated reply mbuf list(s).
1362 	 */
1363 loop1:
1364 	cur_usec = nfs_curusec();
1365 	crit_enter();
1366 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1367 		owp = nfsd->nd_tq.le_next;
1368 		if (nfsd->nd_time > cur_usec)
1369 		    break;
1370 		if (nfsd->nd_mreq)
1371 		    continue;
1372 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1373 		LIST_REMOVE(nfsd, nd_tq);
1374 		LIST_REMOVE(nfsd, nd_hash);
1375 		crit_exit();
1376 		info.mrep = nfsd->nd_mrep;
1377 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1378 		nfsd->nd_mrep = NULL;
1379 		cred = &nfsd->nd_cr;
1380 		forat_ret = aftat_ret = 1;
1381 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1382 		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1383 		if (!error) {
1384 		    if (info.v3)
1385 			forat_ret = VOP_GETATTR(vp, &forat);
1386 		    if (vp->v_type != VREG) {
1387 			if (info.v3)
1388 			    error = EINVAL;
1389 			else
1390 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1391 		    }
1392 		} else {
1393 		    vp = NULL;
1394 		}
1395 		if (!error) {
1396 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1397 		}
1398 
1399 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1400 		    ioflags = IO_NODELOCKED;
1401 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1402 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1403 		else
1404 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1405 		uiop->uio_rw = UIO_WRITE;
1406 		uiop->uio_segflg = UIO_SYSSPACE;
1407 		uiop->uio_td = NULL;
1408 		uiop->uio_offset = nfsd->nd_off;
1409 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1410 		if (uiop->uio_resid > 0) {
1411 		    mp1 = info.mrep;
1412 		    i = 0;
1413 		    while (mp1) {
1414 			if (mp1->m_len > 0)
1415 			    i++;
1416 			mp1 = mp1->m_next;
1417 		    }
1418 		    uiop->uio_iovcnt = i;
1419 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1420 			M_TEMP, M_WAITOK);
1421 		    uiop->uio_iov = ivp = iov;
1422 		    mp1 = info.mrep;
1423 		    while (mp1) {
1424 			if (mp1->m_len > 0) {
1425 			    ivp->iov_base = mtod(mp1, caddr_t);
1426 			    ivp->iov_len = mp1->m_len;
1427 			    ivp++;
1428 			}
1429 			mp1 = mp1->m_next;
1430 		    }
1431 		    if (!error) {
1432 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1433 			nfsstats.srvvop_writes++;
1434 		    }
1435 		    FREE((caddr_t)iov, M_TEMP);
1436 		}
1437 		m_freem(info.mrep);
1438 		info.mrep = NULL;
1439 		if (vp) {
1440 		    aftat_ret = VOP_GETATTR(vp, &va);
1441 		    vput(vp);
1442 		    vp = NULL;
1443 		}
1444 
1445 		/*
1446 		 * Loop around generating replies for all write rpcs that have
1447 		 * now been completed.
1448 		 */
1449 		swp = nfsd;
1450 		do {
1451 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1452 		    if (error) {
1453 			nfsm_writereply(&info, nfsd, slp, error,
1454 					NFSX_WCCDATA(info.v3));
1455 			if (info.v3) {
1456 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1457 					     aftat_ret, &va);
1458 			}
1459 		    } else {
1460 			nfsm_writereply(&info, nfsd, slp, error,
1461 					NFSX_PREOPATTR(info.v3) +
1462 					NFSX_POSTOPORFATTR(info.v3) +
1463 					2 * NFSX_UNSIGNED +
1464 					NFSX_WRITEVERF(info.v3));
1465 			if (info.v3) {
1466 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1467 					     aftat_ret, &va);
1468 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1469 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1470 			    *tl++ = txdr_unsigned(swp->nd_stable);
1471 			    /*
1472 			     * Actually, there is no need to txdr these fields,
1473 			     * but it may make the values more human readable,
1474 			     * for debugging purposes.
1475 			     */
1476 			    if (nfsver.tv_sec == 0)
1477 				    nfsver = boottime;
1478 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1479 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1480 			} else {
1481 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1482 			    nfsm_srvfattr(nfsd, &va, fp);
1483 			}
1484 		    }
1485 		    nfsd->nd_mreq = info.mreq;
1486 		    if (nfsd->nd_mrep)
1487 			panic("nfsrv_write: nd_mrep not free");
1488 
1489 		    /*
1490 		     * Done. Put it at the head of the timer queue so that
1491 		     * the final phase can return the reply.
1492 		     */
1493 		    crit_enter();
1494 		    if (nfsd != swp) {
1495 			nfsd->nd_time = 0;
1496 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1497 		    }
1498 		    nfsd = swp->nd_coalesce.lh_first;
1499 		    if (nfsd) {
1500 			LIST_REMOVE(nfsd, nd_tq);
1501 		    }
1502 		    crit_exit();
1503 		} while (nfsd);
1504 		crit_enter();
1505 		swp->nd_time = 0;
1506 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1507 		crit_exit();
1508 		goto loop1;
1509 	}
1510 	crit_exit();
1511 
1512 	/*
1513 	 * Search for a reply to return.
1514 	 */
1515 	crit_enter();
1516 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1517 		if (nfsd->nd_mreq) {
1518 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1519 		    LIST_REMOVE(nfsd, nd_tq);
1520 		    *mrq = nfsd->nd_mreq;
1521 		    *ndp = nfsd;
1522 		    break;
1523 		}
1524 	crit_exit();
1525 	*mrq = info.mreq;
1526 	return (0);
1527 }
1528 
1529 /*
1530  * Coalesce the write request nfsd into owp. To do this we must:
1531  * - remove nfsd from the queues
1532  * - merge nfsd->nd_mrep into owp->nd_mrep
1533  * - update the nd_eoff and nd_stable for owp
1534  * - put nfsd on owp's nd_coalesce list
1535  * NB: Must be called at splsoftclock().
1536  */
1537 static void
1538 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1539 {
1540         int overlap;
1541         struct mbuf *mp1;
1542 	struct nfsrv_descript *p;
1543 
1544 	NFS_DPF(WG, ("C%03x-%03x",
1545 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1546         LIST_REMOVE(nfsd, nd_hash);
1547         LIST_REMOVE(nfsd, nd_tq);
1548         if (owp->nd_eoff < nfsd->nd_eoff) {
1549             overlap = owp->nd_eoff - nfsd->nd_off;
1550             if (overlap < 0)
1551                 panic("nfsrv_coalesce: bad off");
1552             if (overlap > 0)
1553                 m_adj(nfsd->nd_mrep, overlap);
1554             mp1 = owp->nd_mrep;
1555             while (mp1->m_next)
1556                 mp1 = mp1->m_next;
1557             mp1->m_next = nfsd->nd_mrep;
1558             owp->nd_eoff = nfsd->nd_eoff;
1559         } else
1560             m_freem(nfsd->nd_mrep);
1561         nfsd->nd_mrep = NULL;
1562         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1563             owp->nd_stable = NFSV3WRITE_FILESYNC;
1564         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1565             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1566             owp->nd_stable = NFSV3WRITE_DATASYNC;
1567         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1568 
1569 	/*
1570 	 * If nfsd had anything else coalesced into it, transfer them
1571 	 * to owp, otherwise their replies will never get sent.
1572 	 */
1573 	for (p = nfsd->nd_coalesce.lh_first; p;
1574 	     p = nfsd->nd_coalesce.lh_first) {
1575 	    LIST_REMOVE(p, nd_tq);
1576 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1577 	}
1578 }
1579 
1580 /*
1581  * nfs create service
1582  * now does a truncate to 0 length via. setattr if it already exists
1583  */
1584 int
1585 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1586 	     struct thread *td, struct mbuf **mrq)
1587 {
1588 	struct sockaddr *nam = nfsd->nd_nam;
1589 	struct ucred *cred = &nfsd->nd_cr;
1590 	struct nfs_fattr *fp;
1591 	struct vattr va, dirfor, diraft;
1592 	struct vattr *vap = &va;
1593 	struct nfsv2_sattr *sp;
1594 	u_int32_t *tl;
1595 	struct nlookupdata nd;
1596 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1597 	udev_t rdev = NOUDEV;
1598 	caddr_t cp;
1599 	int how, exclusive_flag = 0;
1600 	struct vnode *dirp;
1601 	struct vnode *dvp;
1602 	struct vnode *vp;
1603 	struct mount *mp;
1604 	nfsfh_t nfh;
1605 	fhandle_t *fhp;
1606 	u_quad_t tempsize;
1607 	u_char cverf[NFSX_V3CREATEVERF];
1608 	struct nfsm_info info;
1609 
1610 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1611 	nlookup_zero(&nd);
1612 	dirp = NULL;
1613 	dvp = NULL;
1614 	vp = NULL;
1615 
1616 	info.mrep = nfsd->nd_mrep;
1617 	info.mreq = NULL;
1618 	info.md = nfsd->nd_md;
1619 	info.dpos = nfsd->nd_dpos;
1620 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1621 
1622 	fhp = &nfh.fh_generic;
1623 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1624 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1625 
1626 	/*
1627 	 * Call namei and do initial cleanup to get a few things
1628 	 * out of the way.  If we get an initial error we cleanup
1629 	 * and return here to avoid special-casing the invalid nd
1630 	 * structure through the rest of the case.  dirp may be
1631 	 * set even if an error occurs, but the nd structure will not
1632 	 * be valid at all if an error occurs so we have to invalidate it
1633 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1634 	 */
1635 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1636 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1637 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1638 	mp = vfs_getvfs(&fhp->fh_fsid);
1639 
1640 	if (dirp) {
1641 		if (info.v3) {
1642 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1643 		} else {
1644 			vrele(dirp);
1645 			dirp = NULL;
1646 		}
1647 	}
1648 	if (error) {
1649 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1650 				      NFSX_WCCDATA(info.v3), &error));
1651 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1652 				 diraft_ret, &diraft);
1653 		error = 0;
1654 		goto nfsmout;
1655 	}
1656 
1657 	/*
1658 	 * No error.  Continue.  State:
1659 	 *
1660 	 *	dirp 		may be valid
1661 	 *	vp		may be valid or NULL if the target does not
1662 	 *			exist.
1663 	 *	dvp		is valid
1664 	 *
1665 	 * The error state is set through the code and we may also do some
1666 	 * opportunistic releasing of vnodes to avoid holding locks through
1667 	 * NFS I/O.  The cleanup at the end is a catch-all
1668 	 */
1669 
1670 	VATTR_NULL(vap);
1671 	if (info.v3) {
1672 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1673 		how = fxdr_unsigned(int, *tl);
1674 		switch (how) {
1675 		case NFSV3CREATE_GUARDED:
1676 			if (vp) {
1677 				error = EEXIST;
1678 				break;
1679 			}
1680 			/* fall through */
1681 		case NFSV3CREATE_UNCHECKED:
1682 			ERROROUT(nfsm_srvsattr(&info, vap));
1683 			break;
1684 		case NFSV3CREATE_EXCLUSIVE:
1685 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1686 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1687 			exclusive_flag = 1;
1688 			break;
1689 		};
1690 		vap->va_type = VREG;
1691 	} else {
1692 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1693 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1694 		if (vap->va_type == VNON)
1695 			vap->va_type = VREG;
1696 		vap->va_mode = nfstov_mode(sp->sa_mode);
1697 		switch (vap->va_type) {
1698 		case VREG:
1699 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1700 			if (tsize != -1)
1701 				vap->va_size = (u_quad_t)tsize;
1702 			break;
1703 		case VCHR:
1704 		case VBLK:
1705 		case VFIFO:
1706 			rdev = fxdr_unsigned(long, sp->sa_size);
1707 			break;
1708 		default:
1709 			break;
1710 		};
1711 	}
1712 
1713 	/*
1714 	 * Iff doesn't exist, create it
1715 	 * otherwise just truncate to 0 length
1716 	 *   should I set the mode too ?
1717 	 *
1718 	 * The only possible error we can have at this point is EEXIST.
1719 	 * nd.ni_vp will also be non-NULL in that case.
1720 	 */
1721 	if (vp == NULL) {
1722 		if (vap->va_mode == (mode_t)VNOVAL)
1723 			vap->va_mode = 0;
1724 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1725 			vn_unlock(dvp);
1726 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1727 					    nd.nl_cred, vap);
1728 			vrele(dvp);
1729 			dvp = NULL;
1730 			if (error == 0) {
1731 				if (exclusive_flag) {
1732 					exclusive_flag = 0;
1733 					VATTR_NULL(vap);
1734 					bcopy(cverf, (caddr_t)&vap->va_atime,
1735 						NFSX_V3CREATEVERF);
1736 					error = VOP_SETATTR(vp, vap, cred);
1737 				}
1738 			}
1739 		} else if (
1740 			vap->va_type == VCHR ||
1741 			vap->va_type == VBLK ||
1742 			vap->va_type == VFIFO
1743 		) {
1744 			/*
1745 			 * Handle SysV FIFO node special cases.  All other
1746 			 * devices require super user to access.
1747 			 */
1748 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1749 				vap->va_type = VFIFO;
1750                         if (vap->va_type != VFIFO &&
1751                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1752 				goto nfsmreply0;
1753                         }
1754 			vap->va_rmajor = umajor(rdev);
1755 			vap->va_rminor = uminor(rdev);
1756 
1757 			vn_unlock(dvp);
1758 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1759 			vrele(dvp);
1760 			dvp = NULL;
1761 			if (error)
1762 				goto nfsmreply0;
1763 #if 0
1764 			/*
1765 			 * XXX what is this junk supposed to do ?
1766 			 */
1767 
1768 			vput(vp);
1769 			vp = NULL;
1770 
1771 			/*
1772 			 * release dvp prior to lookup
1773 			 */
1774 			vput(dvp);
1775 			dvp = NULL;
1776 
1777 			/*
1778 			 * Setup for lookup.
1779 			 *
1780 			 * Even though LOCKPARENT was cleared, ni_dvp may
1781 			 * be garbage.
1782 			 */
1783 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1784 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1785 			nd.ni_cnd.cn_td = td;
1786 			nd.ni_cnd.cn_cred = cred;
1787 
1788 			error = lookup(&nd);
1789 			nd.ni_dvp = NULL;
1790 
1791 			if (error != 0) {
1792 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1793 						      0, &error));
1794 				/* fall through on certain errors */
1795 			}
1796 			nfsrv_object_create(nd.ni_vp);
1797 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1798 				error = EINVAL;
1799 				goto nfsmreply0;
1800 			}
1801 #endif
1802 		} else {
1803 			error = ENXIO;
1804 		}
1805 	} else {
1806 		if (vap->va_size != -1) {
1807 			error = nfsrv_access(mp, vp, VWRITE, cred,
1808 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1809 			if (!error) {
1810 				tempsize = vap->va_size;
1811 				VATTR_NULL(vap);
1812 				vap->va_size = tempsize;
1813 				error = VOP_SETATTR(vp, vap, cred);
1814 			}
1815 		}
1816 	}
1817 
1818 	if (!error) {
1819 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1820 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1821 		if (!error)
1822 			error = VOP_GETATTR(vp, vap);
1823 	}
1824 	if (info.v3) {
1825 		if (exclusive_flag && !error &&
1826 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1827 			error = EEXIST;
1828 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1829 		vrele(dirp);
1830 		dirp = NULL;
1831 	}
1832 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1833 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1834 			      NFSX_WCCDATA(info.v3),
1835 			      &error));
1836 	if (info.v3) {
1837 		if (!error) {
1838 			nfsm_srvpostop_fh(&info, fhp);
1839 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1840 		}
1841 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1842 				 diraft_ret, &diraft);
1843 		error = 0;
1844 	} else {
1845 		nfsm_srvfhtom(&info, fhp);
1846 		fp = nfsm_build(&info, NFSX_V2FATTR);
1847 		nfsm_srvfattr(nfsd, vap, fp);
1848 	}
1849 	goto nfsmout;
1850 
1851 nfsmreply0:
1852 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1853 	error = 0;
1854 	/* fall through */
1855 
1856 nfsmout:
1857 	*mrq = info.mreq;
1858 	if (dirp)
1859 		vrele(dirp);
1860 	nlookup_done(&nd);
1861 	if (dvp) {
1862 		if (dvp == vp)
1863 			vrele(dvp);
1864 		else
1865 			vput(dvp);
1866 	}
1867 	if (vp)
1868 		vput(vp);
1869 	return (error);
1870 }
1871 
1872 /*
1873  * nfs v3 mknod service
1874  */
1875 int
1876 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1877 	    struct thread *td, struct mbuf **mrq)
1878 {
1879 	struct sockaddr *nam = nfsd->nd_nam;
1880 	struct ucred *cred = &nfsd->nd_cr;
1881 	struct vattr va, dirfor, diraft;
1882 	struct vattr *vap = &va;
1883 	u_int32_t *tl;
1884 	struct nlookupdata nd;
1885 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1886 	enum vtype vtyp;
1887 	struct vnode *dirp;
1888 	struct vnode *dvp;
1889 	struct vnode *vp;
1890 	nfsfh_t nfh;
1891 	fhandle_t *fhp;
1892 	struct nfsm_info info;
1893 
1894 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1895 	nlookup_zero(&nd);
1896 	dirp = NULL;
1897 	dvp = NULL;
1898 	vp = NULL;
1899 
1900 	info.mrep = nfsd->nd_mrep;
1901 	info.mreq = NULL;
1902 	info.md = nfsd->nd_md;
1903 	info.dpos = nfsd->nd_dpos;
1904 
1905 	fhp = &nfh.fh_generic;
1906 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1907 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1908 
1909 	/*
1910 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1911 	 * is not valid.  However, nfsm_*() routines may still jump to
1912 	 * nfsmout.
1913 	 */
1914 
1915 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1916 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1917 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1918 	if (dirp)
1919 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1920 	if (error) {
1921 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1922 			   NFSX_WCCDATA(1), &error));
1923 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1924 				 diraft_ret, &diraft);
1925 		error = 0;
1926 		goto nfsmout;
1927 	}
1928 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1929 	vtyp = nfsv3tov_type(*tl);
1930 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1931 		error = NFSERR_BADTYPE;
1932 		goto out;
1933 	}
1934 	VATTR_NULL(vap);
1935 	ERROROUT(nfsm_srvsattr(&info, vap));
1936 	if (vtyp == VCHR || vtyp == VBLK) {
1937 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1938 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1939 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1940 	}
1941 
1942 	/*
1943 	 * Iff doesn't exist, create it.
1944 	 */
1945 	if (vp) {
1946 		error = EEXIST;
1947 		goto out;
1948 	}
1949 	vap->va_type = vtyp;
1950 	if (vap->va_mode == (mode_t)VNOVAL)
1951 		vap->va_mode = 0;
1952 	if (vtyp == VSOCK) {
1953 		vn_unlock(dvp);
1954 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1955 		vrele(dvp);
1956 		dvp = NULL;
1957 	} else {
1958 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1959 			goto out;
1960 
1961 		vn_unlock(dvp);
1962 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1963 		vrele(dvp);
1964 		dvp = NULL;
1965 		if (error)
1966 			goto out;
1967 	}
1968 
1969 	/*
1970 	 * send response, cleanup, return.
1971 	 */
1972 out:
1973 	nlookup_done(&nd);
1974 	if (dvp) {
1975 		if (dvp == vp)
1976 			vrele(dvp);
1977 		else
1978 			vput(dvp);
1979 		dvp = NULL;
1980 	}
1981 	if (!error) {
1982 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1983 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1984 		if (!error)
1985 			error = VOP_GETATTR(vp, vap);
1986 	}
1987 	if (vp) {
1988 		vput(vp);
1989 		vp = NULL;
1990 	}
1991 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1992 	if (dirp) {
1993 		vrele(dirp);
1994 		dirp = NULL;
1995 	}
1996 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1997 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1998 			      NFSX_WCCDATA(1), &error));
1999 	if (!error) {
2000 		nfsm_srvpostop_fh(&info, fhp);
2001 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2002 	}
2003 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2004 			 diraft_ret, &diraft);
2005 	*mrq = info.mreq;
2006 	return (0);
2007 nfsmout:
2008 	*mrq = info.mreq;
2009 	if (dirp)
2010 		vrele(dirp);
2011 	nlookup_done(&nd);
2012 	if (dvp) {
2013 		if (dvp == vp)
2014 			vrele(dvp);
2015 		else
2016 			vput(dvp);
2017 	}
2018 	if (vp)
2019 		vput(vp);
2020 	return (error);
2021 }
2022 
2023 /*
2024  * nfs remove service
2025  */
2026 int
2027 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2028 	     struct thread *td, struct mbuf **mrq)
2029 {
2030 	struct sockaddr *nam = nfsd->nd_nam;
2031 	struct ucred *cred = &nfsd->nd_cr;
2032 	struct nlookupdata nd;
2033 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2034 	struct vnode *dirp;
2035 	struct vnode *dvp;
2036 	struct vnode *vp;
2037 	struct vattr dirfor, diraft;
2038 	nfsfh_t nfh;
2039 	fhandle_t *fhp;
2040 	struct nfsm_info info;
2041 
2042 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2043 	nlookup_zero(&nd);
2044 	dirp = NULL;
2045 	dvp = NULL;
2046 	vp = NULL;
2047 
2048 	info.mrep = nfsd->nd_mrep;
2049 	info.mreq = NULL;
2050 	info.md = nfsd->nd_md;
2051 	info.dpos = nfsd->nd_dpos;
2052 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2053 
2054 	fhp = &nfh.fh_generic;
2055 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2056 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2057 
2058 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2059 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2060 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2061 	if (dirp) {
2062 		if (info.v3)
2063 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2064 	}
2065 	if (error == 0) {
2066 		if (vp->v_type == VDIR) {
2067 			error = EPERM;		/* POSIX */
2068 			goto out;
2069 		}
2070 		/*
2071 		 * The root of a mounted filesystem cannot be deleted.
2072 		 */
2073 		if (vp->v_flag & VROOT) {
2074 			error = EBUSY;
2075 			goto out;
2076 		}
2077 out:
2078 		if (!error) {
2079 			if (dvp != vp)
2080 				vn_unlock(dvp);
2081 			if (vp) {
2082 				vput(vp);
2083 				vp = NULL;
2084 			}
2085 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2086 			vrele(dvp);
2087 			dvp = NULL;
2088 		}
2089 	}
2090 	if (dirp && info.v3)
2091 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2092 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2093 	if (info.v3) {
2094 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2095 				 diraft_ret, &diraft);
2096 		error = 0;
2097 	}
2098 nfsmout:
2099 	*mrq = info.mreq;
2100 	nlookup_done(&nd);
2101 	if (dirp)
2102 		vrele(dirp);
2103 	if (dvp) {
2104 		if (dvp == vp)
2105 			vrele(dvp);
2106 		else
2107 			vput(dvp);
2108 	}
2109 	if (vp)
2110 		vput(vp);
2111 	return(error);
2112 }
2113 
2114 /*
2115  * nfs rename service
2116  */
2117 int
2118 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2119 	     struct thread *td, struct mbuf **mrq)
2120 {
2121 	struct sockaddr *nam = nfsd->nd_nam;
2122 	struct ucred *cred = &nfsd->nd_cr;
2123 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2124 	int tdirfor_ret = 1, tdiraft_ret = 1;
2125 	struct nlookupdata fromnd, tond;
2126 	struct vnode *fvp, *fdirp, *fdvp;
2127 	struct vnode *tvp, *tdirp, *tdvp;
2128 	struct namecache *ncp;
2129 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2130 	nfsfh_t fnfh, tnfh;
2131 	fhandle_t *ffhp, *tfhp;
2132 	uid_t saved_uid;
2133 	struct nfsm_info info;
2134 
2135 	info.mrep = nfsd->nd_mrep;
2136 	info.mreq = NULL;
2137 	info.md = nfsd->nd_md;
2138 	info.dpos = nfsd->nd_dpos;
2139 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2140 
2141 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2142 #ifndef nolint
2143 	fvp = NULL;
2144 #endif
2145 	ffhp = &fnfh.fh_generic;
2146 	tfhp = &tnfh.fh_generic;
2147 
2148 	/*
2149 	 * Clear fields incase goto nfsmout occurs from macro.
2150 	 */
2151 
2152 	nlookup_zero(&fromnd);
2153 	nlookup_zero(&tond);
2154 	fdirp = NULL;
2155 	tdirp = NULL;
2156 
2157 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2158 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2159 
2160 	/*
2161 	 * Remember our original uid so that we can reset cr_uid before
2162 	 * the second nfs_namei() call, in case it is remapped.
2163 	 */
2164 	saved_uid = cred->cr_uid;
2165 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2166 			  NULL, NULL,
2167 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2168 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2169 	if (fdirp) {
2170 		if (info.v3)
2171 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2172 	}
2173 	if (error) {
2174 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2175 				      2 * NFSX_WCCDATA(info.v3), &error));
2176 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2177 				 fdiraft_ret, &fdiraft);
2178 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2179 				 tdiraft_ret, &tdiraft);
2180 		error = 0;
2181 		goto nfsmout;
2182 	}
2183 
2184 	/*
2185 	 * We have to unlock the from ncp before we can safely lookup
2186 	 * the target ncp.
2187 	 */
2188 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2189 	cache_unlock(&fromnd.nl_nch);
2190 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2191 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2192 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2193 	cred->cr_uid = saved_uid;
2194 
2195 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2196 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2197 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2198 	if (tdirp) {
2199 		if (info.v3)
2200 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2201 	}
2202 	if (error)
2203 		goto out1;
2204 
2205 	/*
2206 	 * relock the source
2207 	 */
2208 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2209 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2211 		cache_lock(&fromnd.nl_nch);
2212 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2213 	} else {
2214 		cache_unlock(&tond.nl_nch);
2215 		cache_lock(&fromnd.nl_nch);
2216 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2217 		cache_lock(&tond.nl_nch);
2218 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2219 	}
2220 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2221 
2222 	fvp = fromnd.nl_nch.ncp->nc_vp;
2223 	tvp = tond.nl_nch.ncp->nc_vp;
2224 
2225 	/*
2226 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2227 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2228 	 * point).  If we get through the checks these will be guarenteed
2229 	 * to be non-NULL.
2230 	 *
2231 	 * Holding the children ncp's should be sufficient to prevent
2232 	 * fdvp and tdvp ripouts.
2233 	 */
2234 	if (fromnd.nl_nch.ncp->nc_parent)
2235 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2236 	else
2237 		fdvp = NULL;
2238 	if (tond.nl_nch.ncp->nc_parent)
2239 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2240 	else
2241 		tdvp = NULL;
2242 
2243 	if (tvp != NULL) {
2244 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2245 			if (info.v3)
2246 				error = EEXIST;
2247 			else
2248 				error = EISDIR;
2249 			goto out;
2250 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2251 			if (info.v3)
2252 				error = EEXIST;
2253 			else
2254 				error = ENOTDIR;
2255 			goto out;
2256 		}
2257 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2258 			if (info.v3)
2259 				error = EXDEV;
2260 			else
2261 				error = ENOTEMPTY;
2262 			goto out;
2263 		}
2264 	}
2265 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2266 		if (info.v3)
2267 			error = EXDEV;
2268 		else
2269 			error = ENOTEMPTY;
2270 		goto out;
2271 	}
2272 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2273 		if (info.v3)
2274 			error = EXDEV;
2275 		else
2276 			error = ENOTEMPTY;
2277 		goto out;
2278 	}
2279 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2280 		if (info.v3)
2281 			error = EINVAL;
2282 		else
2283 			error = ENOTEMPTY;
2284 	}
2285 
2286 	/*
2287 	 * You cannot rename a source into itself or a subdirectory of itself.
2288 	 * We check this by travsering the target directory upwards looking
2289 	 * for a match against the source.
2290 	 */
2291 	if (error == 0) {
2292 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2293 			if (fromnd.nl_nch.ncp == ncp) {
2294 				error = EINVAL;
2295 				break;
2296 			}
2297 		}
2298 	}
2299 
2300 	/*
2301 	 * If source is the same as the destination (that is the
2302 	 * same vnode with the same name in the same directory),
2303 	 * then there is nothing to do.
2304 	 */
2305 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2306 		error = -1;
2307 out:
2308 	if (!error) {
2309 		/*
2310 		 * The VOP_NRENAME function releases all vnode references &
2311 		 * locks prior to returning so we need to clear the pointers
2312 		 * to bypass cleanup code later on.
2313 		 */
2314 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2315 				    fdvp, tdvp, tond.nl_cred);
2316 	} else {
2317 		if (error == -1)
2318 			error = 0;
2319 	}
2320 	/* fall through */
2321 
2322 out1:
2323 	if (fdirp)
2324 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2325 	if (tdirp)
2326 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2327 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2328 			      2 * NFSX_WCCDATA(info.v3), &error));
2329 	if (info.v3) {
2330 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2331 				 fdiraft_ret, &fdiraft);
2332 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2333 				 tdiraft_ret, &tdiraft);
2334 	}
2335 	error = 0;
2336 	/* fall through */
2337 
2338 nfsmout:
2339 	*mrq = info.mreq;
2340 	if (tdirp)
2341 		vrele(tdirp);
2342 	nlookup_done(&tond);
2343 	if (fdirp)
2344 		vrele(fdirp);
2345 	nlookup_done(&fromnd);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * nfs link service
2351  */
2352 int
2353 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2354 	   struct thread *td, struct mbuf **mrq)
2355 {
2356 	struct sockaddr *nam = nfsd->nd_nam;
2357 	struct ucred *cred = &nfsd->nd_cr;
2358 	struct nlookupdata nd;
2359 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2360 	int getret = 1;
2361 	struct vnode *dirp;
2362 	struct vnode *dvp;
2363 	struct vnode *vp;
2364 	struct vnode *xp;
2365 	struct mount *mp;
2366 	struct mount *xmp;
2367 	struct vattr dirfor, diraft, at;
2368 	nfsfh_t nfh, dnfh;
2369 	fhandle_t *fhp, *dfhp;
2370 	struct nfsm_info info;
2371 
2372 	info.mrep = nfsd->nd_mrep;
2373 	info.mreq = NULL;
2374 	info.md = nfsd->nd_md;
2375 	info.dpos = nfsd->nd_dpos;
2376 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2377 
2378 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2379 	nlookup_zero(&nd);
2380 	dirp = dvp = vp = xp = NULL;
2381 	mp = xmp = NULL;
2382 
2383 	fhp = &nfh.fh_generic;
2384 	dfhp = &dnfh.fh_generic;
2385 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2386 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2387 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2388 
2389 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2390 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2391 	if (error) {
2392 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2393 				      NFSX_POSTOPATTR(info.v3) +
2394 				      NFSX_WCCDATA(info.v3),
2395 				      &error));
2396 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2397 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2398 				 diraft_ret, &diraft);
2399 		xp = NULL;
2400 		error = 0;
2401 		goto nfsmout;
2402 	}
2403 	if (xp->v_type == VDIR) {
2404 		error = EPERM;		/* POSIX */
2405 		goto out1;
2406 	}
2407 
2408 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2409 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2410 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2411 	if (dirp) {
2412 		if (info.v3)
2413 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2414 	}
2415 	if (error)
2416 		goto out1;
2417 
2418 	if (vp != NULL) {
2419 		error = EEXIST;
2420 		goto out;
2421 	}
2422 	if (xp->v_mount != dvp->v_mount)
2423 		error = EXDEV;
2424 out:
2425 	if (!error) {
2426 		vn_unlock(dvp);
2427 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2428 		vrele(dvp);
2429 		dvp = NULL;
2430 	}
2431 	/* fall through */
2432 
2433 out1:
2434 	if (info.v3)
2435 		getret = VOP_GETATTR(xp, &at);
2436 	if (dirp)
2437 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2438 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2439 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2440 			      &error));
2441 	if (info.v3) {
2442 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2443 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2444 				 diraft_ret, &diraft);
2445 		error = 0;
2446 	}
2447 	/* fall through */
2448 
2449 nfsmout:
2450 	*mrq = info.mreq;
2451 	nlookup_done(&nd);
2452 	if (dirp)
2453 		vrele(dirp);
2454 	if (xp)
2455 		vrele(xp);
2456 	if (dvp) {
2457 		if (dvp == vp)
2458 			vrele(dvp);
2459 		else
2460 			vput(dvp);
2461 	}
2462 	if (vp)
2463 		vput(vp);
2464 	return(error);
2465 }
2466 
2467 /*
2468  * nfs symbolic link service
2469  */
2470 int
2471 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2472 	      struct thread *td, struct mbuf **mrq)
2473 {
2474 	struct sockaddr *nam = nfsd->nd_nam;
2475 	struct ucred *cred = &nfsd->nd_cr;
2476 	struct vattr va, dirfor, diraft;
2477 	struct nlookupdata nd;
2478 	struct vattr *vap = &va;
2479 	struct nfsv2_sattr *sp;
2480 	char *pathcp = NULL;
2481 	struct uio io;
2482 	struct iovec iv;
2483 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2484 	struct vnode *dirp;
2485 	struct vnode *vp;
2486 	struct vnode *dvp;
2487 	nfsfh_t nfh;
2488 	fhandle_t *fhp;
2489 	struct nfsm_info info;
2490 
2491 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2492 	nlookup_zero(&nd);
2493 	dirp = NULL;
2494 	dvp = NULL;
2495 	vp = NULL;
2496 
2497 	info.mrep = nfsd->nd_mrep;
2498 	info.mreq =  NULL;
2499 	info.md = nfsd->nd_md;
2500 	info.dpos = nfsd->nd_dpos;
2501 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2502 
2503 	fhp = &nfh.fh_generic;
2504 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2505 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2506 
2507 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2508 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2509 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2510 	if (dirp) {
2511 		if (info.v3)
2512 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2513 	}
2514 	if (error)
2515 		goto out;
2516 
2517 	VATTR_NULL(vap);
2518 	if (info.v3) {
2519 		ERROROUT(nfsm_srvsattr(&info, vap));
2520 	}
2521 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2522 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2523 	iv.iov_base = pathcp;
2524 	iv.iov_len = len2;
2525 	io.uio_resid = len2;
2526 	io.uio_offset = 0;
2527 	io.uio_iov = &iv;
2528 	io.uio_iovcnt = 1;
2529 	io.uio_segflg = UIO_SYSSPACE;
2530 	io.uio_rw = UIO_READ;
2531 	io.uio_td = NULL;
2532 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2533 	if (info.v3 == 0) {
2534 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2535 		vap->va_mode = nfstov_mode(sp->sa_mode);
2536 	}
2537 	*(pathcp + len2) = '\0';
2538 	if (vp) {
2539 		error = EEXIST;
2540 		goto out;
2541 	}
2542 
2543 	if (vap->va_mode == (mode_t)VNOVAL)
2544 		vap->va_mode = 0;
2545 	if (dvp != vp)
2546 		vn_unlock(dvp);
2547 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2548 	vrele(dvp);
2549 	dvp = NULL;
2550 	if (error == 0) {
2551 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2552 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2553 		if (!error)
2554 			error = VOP_GETATTR(vp, vap);
2555 	}
2556 
2557 out:
2558 	if (dvp) {
2559 		if (dvp == vp)
2560 			vrele(dvp);
2561 		else
2562 			vput(dvp);
2563 	}
2564 	if (vp) {
2565 		vput(vp);
2566 		vp = NULL;
2567 	}
2568 	if (pathcp) {
2569 		FREE(pathcp, M_TEMP);
2570 		pathcp = NULL;
2571 	}
2572 	if (dirp) {
2573 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2574 		vrele(dirp);
2575 		dirp = NULL;
2576 	}
2577 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2578 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2579 			      NFSX_WCCDATA(info.v3),
2580 			      &error));
2581 	if (info.v3) {
2582 		if (!error) {
2583 			nfsm_srvpostop_fh(&info, fhp);
2584 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2585 		}
2586 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2587 				 diraft_ret, &diraft);
2588 	}
2589 	error = 0;
2590 	/* fall through */
2591 
2592 nfsmout:
2593 	*mrq = info.mreq;
2594 	nlookup_done(&nd);
2595 	if (vp)
2596 		vput(vp);
2597 	if (dirp)
2598 		vrele(dirp);
2599 	if (pathcp)
2600 		FREE(pathcp, M_TEMP);
2601 	return (error);
2602 }
2603 
2604 /*
2605  * nfs mkdir service
2606  */
2607 int
2608 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2609 	    struct thread *td, struct mbuf **mrq)
2610 {
2611 	struct sockaddr *nam = nfsd->nd_nam;
2612 	struct ucred *cred = &nfsd->nd_cr;
2613 	struct vattr va, dirfor, diraft;
2614 	struct vattr *vap = &va;
2615 	struct nfs_fattr *fp;
2616 	struct nlookupdata nd;
2617 	u_int32_t *tl;
2618 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2619 	struct vnode *dirp;
2620 	struct vnode *dvp;
2621 	struct vnode *vp;
2622 	nfsfh_t nfh;
2623 	fhandle_t *fhp;
2624 	struct nfsm_info info;
2625 
2626 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2627 	nlookup_zero(&nd);
2628 	dirp = NULL;
2629 	dvp = NULL;
2630 	vp = NULL;
2631 
2632 	info.dpos = nfsd->nd_dpos;
2633 	info.mrep = nfsd->nd_mrep;
2634 	info.mreq =  NULL;
2635 	info.md = nfsd->nd_md;
2636 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2637 
2638 	fhp = &nfh.fh_generic;
2639 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2640 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2641 
2642 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2643 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2644 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2645 	if (dirp) {
2646 		if (info.v3)
2647 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2648 	}
2649 	if (error) {
2650 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2651 				      NFSX_WCCDATA(info.v3), &error));
2652 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2653 				 diraft_ret, &diraft);
2654 		error = 0;
2655 		goto nfsmout;
2656 	}
2657 	VATTR_NULL(vap);
2658 	if (info.v3) {
2659 		ERROROUT(nfsm_srvsattr(&info, vap));
2660 	} else {
2661 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2662 		vap->va_mode = nfstov_mode(*tl++);
2663 	}
2664 
2665 	/*
2666 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2667 	 * nd.ni_vp, if it exists, is referenced but not locked.
2668 	 */
2669 
2670 	vap->va_type = VDIR;
2671 	if (vp != NULL) {
2672 		error = EEXIST;
2673 		goto out;
2674 	}
2675 
2676 	/*
2677 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2678 	 * component is freed by the VOP call.  This will fill-in
2679 	 * nd.ni_vp, reference, and exclusively lock it.
2680 	 */
2681 	if (vap->va_mode == (mode_t)VNOVAL)
2682 		vap->va_mode = 0;
2683 	vn_unlock(dvp);
2684 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2685 	vrele(dvp);
2686 	dvp = NULL;
2687 
2688 	if (error == 0) {
2689 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2690 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2691 		if (error == 0)
2692 			error = VOP_GETATTR(vp, vap);
2693 	}
2694 out:
2695 	if (dirp)
2696 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2697 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2698 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2699 			      NFSX_WCCDATA(info.v3),
2700 			      &error));
2701 	if (info.v3) {
2702 		if (!error) {
2703 			nfsm_srvpostop_fh(&info, fhp);
2704 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2705 		}
2706 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2707 				 diraft_ret, &diraft);
2708 	} else {
2709 		nfsm_srvfhtom(&info, fhp);
2710 		fp = nfsm_build(&info, NFSX_V2FATTR);
2711 		nfsm_srvfattr(nfsd, vap, fp);
2712 	}
2713 	error = 0;
2714 	/* fall through */
2715 
2716 nfsmout:
2717 	*mrq = info.mreq;
2718 	nlookup_done(&nd);
2719 	if (dirp)
2720 		vrele(dirp);
2721 	if (dvp) {
2722 		if (dvp == vp)
2723 			vrele(dvp);
2724 		else
2725 			vput(dvp);
2726 	}
2727 	if (vp)
2728 		vput(vp);
2729 	return (error);
2730 }
2731 
2732 /*
2733  * nfs rmdir service
2734  */
2735 int
2736 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2737 	    struct thread *td, struct mbuf **mrq)
2738 {
2739 	struct sockaddr *nam = nfsd->nd_nam;
2740 	struct ucred *cred = &nfsd->nd_cr;
2741 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2742 	struct vnode *dirp;
2743 	struct vnode *dvp;
2744 	struct vnode *vp;
2745 	struct vattr dirfor, diraft;
2746 	nfsfh_t nfh;
2747 	fhandle_t *fhp;
2748 	struct nlookupdata nd;
2749 	struct nfsm_info info;
2750 
2751 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2752 	nlookup_zero(&nd);
2753 	dirp = NULL;
2754 	dvp = NULL;
2755 	vp = NULL;
2756 
2757 	info.mrep = nfsd->nd_mrep;
2758 	info.mreq = NULL;
2759 	info.md = nfsd->nd_md;
2760 	info.dpos = nfsd->nd_dpos;
2761 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2762 
2763 	fhp = &nfh.fh_generic;
2764 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2765 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2766 
2767 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2768 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2769 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2770 	if (dirp) {
2771 		if (info.v3)
2772 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2773 	}
2774 	if (error) {
2775 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2776 				      NFSX_WCCDATA(info.v3), &error));
2777 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2778 				 diraft_ret, &diraft);
2779 		error = 0;
2780 		goto nfsmout;
2781 	}
2782 	if (vp->v_type != VDIR) {
2783 		error = ENOTDIR;
2784 		goto out;
2785 	}
2786 
2787 	/*
2788 	 * The root of a mounted filesystem cannot be deleted.
2789 	 */
2790 	if (vp->v_flag & VROOT)
2791 		error = EBUSY;
2792 out:
2793 	/*
2794 	 * Issue or abort op.  Since SAVESTART is not set, path name
2795 	 * component is freed by the VOP after either.
2796 	 */
2797 	if (!error) {
2798 		if (dvp != vp)
2799 			vn_unlock(dvp);
2800 		vput(vp);
2801 		vp = NULL;
2802 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2803 		vrele(dvp);
2804 		dvp = NULL;
2805 	}
2806 	nlookup_done(&nd);
2807 
2808 	if (dirp)
2809 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2810 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2811 	if (info.v3) {
2812 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2813 				 diraft_ret, &diraft);
2814 		error = 0;
2815 	}
2816 	/* fall through */
2817 
2818 nfsmout:
2819 	*mrq = info.mreq;
2820 	if (dvp) {
2821 		if (dvp == vp)
2822 			vrele(dvp);
2823 		else
2824 			vput(dvp);
2825 	}
2826 	nlookup_done(&nd);
2827 	if (dirp)
2828 		vrele(dirp);
2829 	if (vp)
2830 		vput(vp);
2831 	return(error);
2832 }
2833 
2834 /*
2835  * nfs readdir service
2836  * - mallocs what it thinks is enough to read
2837  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2838  * - calls VOP_READDIR()
2839  * - loops around building the reply
2840  *	if the output generated exceeds count break out of loop
2841  *	The nfsm_clget macro is used here so that the reply will be packed
2842  *	tightly in mbuf clusters.
2843  * - it only knows that it has encountered eof when the VOP_READDIR()
2844  *	reads nothing
2845  * - as such one readdir rpc will return eof false although you are there
2846  *	and then the next will return eof
2847  * - it trims out records with d_fileno == 0
2848  *	this doesn't matter for Unix clients, but they might confuse clients
2849  *	for other os'.
2850  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2851  *	than requested, but this may not apply to all filesystems. For
2852  *	example, client NFS does not { although it is never remote mounted
2853  *	anyhow }
2854  *     The alternate call nfsrv_readdirplus() does lookups as well.
2855  * PS: The NFS protocol spec. does not clarify what the "count" byte
2856  *	argument is a count of.. just name strings and file id's or the
2857  *	entire reply rpc or ...
2858  *	I tried just file name and id sizes and it confused the Sun client,
2859  *	so I am using the full rpc size now. The "paranoia.." comment refers
2860  *	to including the status longwords that are not a part of the dir.
2861  *	"entry" structures, but are in the rpc.
2862  */
2863 struct flrep {
2864 	nfsuint64	fl_off;
2865 	u_int32_t	fl_postopok;
2866 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2867 	u_int32_t	fl_fhok;
2868 	u_int32_t	fl_fhsize;
2869 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2870 };
2871 
2872 int
2873 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2874 	      struct thread *td, struct mbuf **mrq)
2875 {
2876 	struct sockaddr *nam = nfsd->nd_nam;
2877 	struct ucred *cred = &nfsd->nd_cr;
2878 	char *bp, *be;
2879 	struct dirent *dp;
2880 	caddr_t cp;
2881 	u_int32_t *tl;
2882 	struct mbuf *mp1, *mp2;
2883 	char *cpos, *cend, *rbuf;
2884 	struct vnode *vp = NULL;
2885 	struct mount *mp = NULL;
2886 	struct vattr at;
2887 	nfsfh_t nfh;
2888 	fhandle_t *fhp;
2889 	struct uio io;
2890 	struct iovec iv;
2891 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2892 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2893 	u_quad_t off, toff, verf;
2894 	off_t *cookies = NULL, *cookiep;
2895 	struct nfsm_info info;
2896 
2897 	info.mrep = nfsd->nd_mrep;
2898 	info.mreq = NULL;
2899 	info.md = nfsd->nd_md;
2900 	info.dpos = nfsd->nd_dpos;
2901 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2902 
2903 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2904 	fhp = &nfh.fh_generic;
2905 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2906 	if (info.v3) {
2907 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2908 		toff = fxdr_hyper(tl);
2909 		tl += 2;
2910 		verf = fxdr_hyper(tl);
2911 		tl += 2;
2912 	} else {
2913 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2914 		toff = fxdr_unsigned(u_quad_t, *tl++);
2915 		verf = 0;	/* shut up gcc */
2916 	}
2917 	off = toff;
2918 	cnt = fxdr_unsigned(int, *tl);
2919 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2920 	xfer = NFS_SRVMAXDATA(nfsd);
2921 	if ((unsigned)cnt > xfer)
2922 		cnt = xfer;
2923 	if ((unsigned)siz > xfer)
2924 		siz = xfer;
2925 	fullsiz = siz;
2926 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2927 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2928 	if (!error && vp->v_type != VDIR) {
2929 		error = ENOTDIR;
2930 		vput(vp);
2931 		vp = NULL;
2932 	}
2933 	if (error) {
2934 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2935 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2936 		error = 0;
2937 		goto nfsmout;
2938 	}
2939 
2940 	/*
2941 	 * Obtain lock on vnode for this section of the code
2942 	 */
2943 
2944 	if (info.v3) {
2945 		error = getret = VOP_GETATTR(vp, &at);
2946 #if 0
2947 		/*
2948 		 * XXX This check may be too strict for Solaris 2.5 clients.
2949 		 */
2950 		if (!error && toff && verf && verf != at.va_filerev)
2951 			error = NFSERR_BAD_COOKIE;
2952 #endif
2953 	}
2954 	if (!error)
2955 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2956 	if (error) {
2957 		vput(vp);
2958 		vp = NULL;
2959 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2960 				      NFSX_POSTOPATTR(info.v3), &error));
2961 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2962 		error = 0;
2963 		goto nfsmout;
2964 	}
2965 	vn_unlock(vp);
2966 
2967 	/*
2968 	 * end section.  Allocate rbuf and continue
2969 	 */
2970 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2971 again:
2972 	iv.iov_base = rbuf;
2973 	iv.iov_len = fullsiz;
2974 	io.uio_iov = &iv;
2975 	io.uio_iovcnt = 1;
2976 	io.uio_offset = (off_t)off;
2977 	io.uio_resid = fullsiz;
2978 	io.uio_segflg = UIO_SYSSPACE;
2979 	io.uio_rw = UIO_READ;
2980 	io.uio_td = NULL;
2981 	eofflag = 0;
2982 	if (cookies) {
2983 		kfree((caddr_t)cookies, M_TEMP);
2984 		cookies = NULL;
2985 	}
2986 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2987 	off = (off_t)io.uio_offset;
2988 	if (!cookies && !error)
2989 		error = NFSERR_PERM;
2990 	if (info.v3) {
2991 		getret = VOP_GETATTR(vp, &at);
2992 		if (!error)
2993 			error = getret;
2994 	}
2995 	if (error) {
2996 		vrele(vp);
2997 		vp = NULL;
2998 		kfree((caddr_t)rbuf, M_TEMP);
2999 		if (cookies)
3000 			kfree((caddr_t)cookies, M_TEMP);
3001 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3002 				      NFSX_POSTOPATTR(info.v3), &error));
3003 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3004 		error = 0;
3005 		goto nfsmout;
3006 	}
3007 	if (io.uio_resid) {
3008 		siz -= io.uio_resid;
3009 
3010 		/*
3011 		 * If nothing read, return eof
3012 		 * rpc reply
3013 		 */
3014 		if (siz == 0) {
3015 			vrele(vp);
3016 			vp = NULL;
3017 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3018 					      NFSX_POSTOPATTR(info.v3) +
3019 					      NFSX_COOKIEVERF(info.v3) +
3020 					      2 * NFSX_UNSIGNED,
3021 					      &error));
3022 			if (info.v3) {
3023 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3024 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3025 				txdr_hyper(at.va_filerev, tl);
3026 				tl += 2;
3027 			} else
3028 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3029 			*tl++ = nfs_false;
3030 			*tl = nfs_true;
3031 			FREE((caddr_t)rbuf, M_TEMP);
3032 			FREE((caddr_t)cookies, M_TEMP);
3033 			error = 0;
3034 			goto nfsmout;
3035 		}
3036 	}
3037 
3038 	/*
3039 	 * Check for degenerate cases of nothing useful read.
3040 	 * If so go try again
3041 	 */
3042 	cpos = rbuf;
3043 	cend = rbuf + siz;
3044 	dp = (struct dirent *)cpos;
3045 	cookiep = cookies;
3046 	/*
3047 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3048 	 * directory offset up to a block boundary, so it is necessary to
3049 	 * skip over the records that preceed the requested offset. This
3050 	 * requires the assumption that file offset cookies monotonically
3051 	 * increase.
3052 	 */
3053 	while (cpos < cend && ncookies > 0 &&
3054 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3055 		 ((u_quad_t)(*cookiep)) <= toff)) {
3056 		dp = _DIRENT_NEXT(dp);
3057 		cpos = (char *)dp;
3058 		cookiep++;
3059 		ncookies--;
3060 	}
3061 	if (cpos >= cend || ncookies == 0) {
3062 		toff = off;
3063 		siz = fullsiz;
3064 		goto again;
3065 	}
3066 
3067 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3068 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3069 			      NFSX_POSTOPATTR(info.v3) +
3070 			      NFSX_COOKIEVERF(info.v3) + siz,
3071 			      &error));
3072 	if (info.v3) {
3073 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3074 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3075 		txdr_hyper(at.va_filerev, tl);
3076 	}
3077 	mp1 = mp2 = info.mb;
3078 	bp = info.bpos;
3079 	be = bp + M_TRAILINGSPACE(mp1);
3080 
3081 	/* Loop through the records and build reply */
3082 	while (cpos < cend && ncookies > 0) {
3083 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3084 			nlen = dp->d_namlen;
3085 			rem = nfsm_rndup(nlen) - nlen;
3086 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3087 			if (info.v3)
3088 				len += 2 * NFSX_UNSIGNED;
3089 			if (len > cnt) {
3090 				eofflag = 0;
3091 				break;
3092 			}
3093 			/*
3094 			 * Build the directory record xdr from
3095 			 * the dirent entry.
3096 			 */
3097 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3098 			*tl = nfs_true;
3099 			bp += NFSX_UNSIGNED;
3100 			if (info.v3) {
3101 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3102 				*tl = txdr_unsigned(dp->d_ino >> 32);
3103 				bp += NFSX_UNSIGNED;
3104 			}
3105 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3106 			*tl = txdr_unsigned(dp->d_ino);
3107 			bp += NFSX_UNSIGNED;
3108 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3109 			*tl = txdr_unsigned(nlen);
3110 			bp += NFSX_UNSIGNED;
3111 
3112 			/* And loop around copying the name */
3113 			xfer = nlen;
3114 			cp = dp->d_name;
3115 			while (xfer > 0) {
3116 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3117 				if ((bp+xfer) > be)
3118 					tsiz = be-bp;
3119 				else
3120 					tsiz = xfer;
3121 				bcopy(cp, bp, tsiz);
3122 				bp += tsiz;
3123 				xfer -= tsiz;
3124 				if (xfer > 0)
3125 					cp += tsiz;
3126 			}
3127 			/* And null pad to a int32_t boundary */
3128 			for (i = 0; i < rem; i++)
3129 				*bp++ = '\0';
3130 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3131 
3132 			/* Finish off the record */
3133 			if (info.v3) {
3134 				*tl = txdr_unsigned(*cookiep >> 32);
3135 				bp += NFSX_UNSIGNED;
3136 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3137 			}
3138 			*tl = txdr_unsigned(*cookiep);
3139 			bp += NFSX_UNSIGNED;
3140 		}
3141 		dp = _DIRENT_NEXT(dp);
3142 		cpos = (char *)dp;
3143 		cookiep++;
3144 		ncookies--;
3145 	}
3146 	vrele(vp);
3147 	vp = NULL;
3148 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3149 	*tl = nfs_false;
3150 	bp += NFSX_UNSIGNED;
3151 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3152 	if (eofflag)
3153 		*tl = nfs_true;
3154 	else
3155 		*tl = nfs_false;
3156 	bp += NFSX_UNSIGNED;
3157 	if (mp1 != info.mb) {
3158 		if (bp < be)
3159 			mp1->m_len = bp - mtod(mp1, caddr_t);
3160 	} else
3161 		mp1->m_len += bp - info.bpos;
3162 	FREE((caddr_t)rbuf, M_TEMP);
3163 	FREE((caddr_t)cookies, M_TEMP);
3164 
3165 nfsmout:
3166 	*mrq = info.mreq;
3167 	if (vp)
3168 		vrele(vp);
3169 	return(error);
3170 }
3171 
3172 int
3173 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3174 		  struct thread *td, struct mbuf **mrq)
3175 {
3176 	struct sockaddr *nam = nfsd->nd_nam;
3177 	struct ucred *cred = &nfsd->nd_cr;
3178 	char *bp, *be;
3179 	struct dirent *dp;
3180 	caddr_t cp;
3181 	u_int32_t *tl;
3182 	struct mbuf *mp1, *mp2;
3183 	char *cpos, *cend, *rbuf;
3184 	struct vnode *vp = NULL, *nvp;
3185 	struct mount *mp = NULL;
3186 	struct flrep fl;
3187 	nfsfh_t nfh;
3188 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3189 	struct uio io;
3190 	struct iovec iv;
3191 	struct vattr va, at, *vap = &va;
3192 	struct nfs_fattr *fp;
3193 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3194 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3195 	u_quad_t off, toff, verf;
3196 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3197 	struct nfsm_info info;
3198 
3199 	info.mrep = nfsd->nd_mrep;
3200 	info.mreq = NULL;
3201 	info.md = nfsd->nd_md;
3202 	info.dpos = nfsd->nd_dpos;
3203 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3204 
3205 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3206 	fhp = &nfh.fh_generic;
3207 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3208 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3209 	toff = fxdr_hyper(tl);
3210 	tl += 2;
3211 	verf = fxdr_hyper(tl);
3212 	tl += 2;
3213 	siz = fxdr_unsigned(int, *tl++);
3214 	cnt = fxdr_unsigned(int, *tl);
3215 	off = toff;
3216 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3217 	xfer = NFS_SRVMAXDATA(nfsd);
3218 	if ((unsigned)cnt > xfer)
3219 		cnt = xfer;
3220 	if ((unsigned)siz > xfer)
3221 		siz = xfer;
3222 	fullsiz = siz;
3223 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3224 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3225 	if (!error && vp->v_type != VDIR) {
3226 		error = ENOTDIR;
3227 		vput(vp);
3228 		vp = NULL;
3229 	}
3230 	if (error) {
3231 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3232 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3233 		error = 0;
3234 		goto nfsmout;
3235 	}
3236 	error = getret = VOP_GETATTR(vp, &at);
3237 #if 0
3238 	/*
3239 	 * XXX This check may be too strict for Solaris 2.5 clients.
3240 	 */
3241 	if (!error && toff && verf && verf != at.va_filerev)
3242 		error = NFSERR_BAD_COOKIE;
3243 #endif
3244 	if (!error) {
3245 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3246 	}
3247 	if (error) {
3248 		vput(vp);
3249 		vp = NULL;
3250 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3251 				      NFSX_V3POSTOPATTR, &error));
3252 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3253 		error = 0;
3254 		goto nfsmout;
3255 	}
3256 	vn_unlock(vp);
3257 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3258 again:
3259 	iv.iov_base = rbuf;
3260 	iv.iov_len = fullsiz;
3261 	io.uio_iov = &iv;
3262 	io.uio_iovcnt = 1;
3263 	io.uio_offset = (off_t)off;
3264 	io.uio_resid = fullsiz;
3265 	io.uio_segflg = UIO_SYSSPACE;
3266 	io.uio_rw = UIO_READ;
3267 	io.uio_td = NULL;
3268 	eofflag = 0;
3269 	if (cookies) {
3270 		kfree((caddr_t)cookies, M_TEMP);
3271 		cookies = NULL;
3272 	}
3273 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3274 	off = (u_quad_t)io.uio_offset;
3275 	getret = VOP_GETATTR(vp, &at);
3276 	if (!cookies && !error)
3277 		error = NFSERR_PERM;
3278 	if (!error)
3279 		error = getret;
3280 	if (error) {
3281 		vrele(vp);
3282 		vp = NULL;
3283 		if (cookies)
3284 			kfree((caddr_t)cookies, M_TEMP);
3285 		kfree((caddr_t)rbuf, M_TEMP);
3286 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3287 				      NFSX_V3POSTOPATTR, &error));
3288 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3289 		error = 0;
3290 		goto nfsmout;
3291 	}
3292 	if (io.uio_resid) {
3293 		siz -= io.uio_resid;
3294 
3295 		/*
3296 		 * If nothing read, return eof
3297 		 * rpc reply
3298 		 */
3299 		if (siz == 0) {
3300 			vrele(vp);
3301 			vp = NULL;
3302 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3303 					      NFSX_V3POSTOPATTR +
3304 					      NFSX_V3COOKIEVERF +
3305 					      2 * NFSX_UNSIGNED,
3306 					      &error));
3307 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3308 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3309 			txdr_hyper(at.va_filerev, tl);
3310 			tl += 2;
3311 			*tl++ = nfs_false;
3312 			*tl = nfs_true;
3313 			FREE((caddr_t)cookies, M_TEMP);
3314 			FREE((caddr_t)rbuf, M_TEMP);
3315 			error = 0;
3316 			goto nfsmout;
3317 		}
3318 	}
3319 
3320 	/*
3321 	 * Check for degenerate cases of nothing useful read.
3322 	 * If so go try again
3323 	 */
3324 	cpos = rbuf;
3325 	cend = rbuf + siz;
3326 	dp = (struct dirent *)cpos;
3327 	cookiep = cookies;
3328 	/*
3329 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3330 	 * directory offset up to a block boundary, so it is necessary to
3331 	 * skip over the records that preceed the requested offset. This
3332 	 * requires the assumption that file offset cookies monotonically
3333 	 * increase.
3334 	 */
3335 	while (cpos < cend && ncookies > 0 &&
3336 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3337 		 ((u_quad_t)(*cookiep)) <= toff)) {
3338 		dp = _DIRENT_NEXT(dp);
3339 		cpos = (char *)dp;
3340 		cookiep++;
3341 		ncookies--;
3342 	}
3343 	if (cpos >= cend || ncookies == 0) {
3344 		toff = off;
3345 		siz = fullsiz;
3346 		goto again;
3347 	}
3348 
3349 	/*
3350 	 * Probe one of the directory entries to see if the filesystem
3351 	 * supports VGET.
3352 	 */
3353 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3354 		error = NFSERR_NOTSUPP;
3355 		vrele(vp);
3356 		vp = NULL;
3357 		kfree((caddr_t)cookies, M_TEMP);
3358 		kfree((caddr_t)rbuf, M_TEMP);
3359 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3360 				      NFSX_V3POSTOPATTR, &error));
3361 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3362 		error = 0;
3363 		goto nfsmout;
3364 	}
3365 	if (nvp) {
3366 		vput(nvp);
3367 		nvp = NULL;
3368 	}
3369 
3370 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3371 			2 * NFSX_UNSIGNED;
3372 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3373 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3374 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3375 	txdr_hyper(at.va_filerev, tl);
3376 	mp1 = mp2 = info.mb;
3377 	bp = info.bpos;
3378 	be = bp + M_TRAILINGSPACE(mp1);
3379 
3380 	/* Loop through the records and build reply */
3381 	while (cpos < cend && ncookies > 0) {
3382 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3383 			nlen = dp->d_namlen;
3384 			rem = nfsm_rndup(nlen) - nlen;
3385 
3386 			/*
3387 			 * For readdir_and_lookup get the vnode using
3388 			 * the file number.
3389 			 */
3390 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3391 				goto invalid;
3392 			bzero((caddr_t)nfhp, NFSX_V3FH);
3393 			nfhp->fh_fsid = fhp->fh_fsid;
3394 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3395 				vput(nvp);
3396 				nvp = NULL;
3397 				goto invalid;
3398 			}
3399 			if (VOP_GETATTR(nvp, vap)) {
3400 				vput(nvp);
3401 				nvp = NULL;
3402 				goto invalid;
3403 			}
3404 			vput(nvp);
3405 			nvp = NULL;
3406 
3407 			/*
3408 			 * If either the dircount or maxcount will be
3409 			 * exceeded, get out now. Both of these lengths
3410 			 * are calculated conservatively, including all
3411 			 * XDR overheads.
3412 			 */
3413 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3414 				NFSX_V3POSTOPATTR);
3415 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3416 			if (len > cnt || dirlen > fullsiz) {
3417 				eofflag = 0;
3418 				break;
3419 			}
3420 
3421 			/*
3422 			 * Build the directory record xdr from
3423 			 * the dirent entry.
3424 			 */
3425 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3426 			nfsm_srvfattr(nfsd, vap, fp);
3427 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3428 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3429 			fl.fl_postopok = nfs_true;
3430 			fl.fl_fhok = nfs_true;
3431 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3432 
3433 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3434 			*tl = nfs_true;
3435 			bp += NFSX_UNSIGNED;
3436 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3437 			*tl = txdr_unsigned(dp->d_ino >> 32);
3438 			bp += NFSX_UNSIGNED;
3439 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3440 			*tl = txdr_unsigned(dp->d_ino);
3441 			bp += NFSX_UNSIGNED;
3442 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3443 			*tl = txdr_unsigned(nlen);
3444 			bp += NFSX_UNSIGNED;
3445 
3446 			/* And loop around copying the name */
3447 			xfer = nlen;
3448 			cp = dp->d_name;
3449 			while (xfer > 0) {
3450 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3451 				if ((bp + xfer) > be)
3452 					tsiz = be - bp;
3453 				else
3454 					tsiz = xfer;
3455 				bcopy(cp, bp, tsiz);
3456 				bp += tsiz;
3457 				xfer -= tsiz;
3458 				cp += tsiz;
3459 			}
3460 			/* And null pad to a int32_t boundary */
3461 			for (i = 0; i < rem; i++)
3462 				*bp++ = '\0';
3463 
3464 			/*
3465 			 * Now copy the flrep structure out.
3466 			 */
3467 			xfer = sizeof (struct flrep);
3468 			cp = (caddr_t)&fl;
3469 			while (xfer > 0) {
3470 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3471 				if ((bp + xfer) > be)
3472 					tsiz = be - bp;
3473 				else
3474 					tsiz = xfer;
3475 				bcopy(cp, bp, tsiz);
3476 				bp += tsiz;
3477 				xfer -= tsiz;
3478 				cp += tsiz;
3479 			}
3480 		}
3481 invalid:
3482 		dp = _DIRENT_NEXT(dp);
3483 		cpos = (char *)dp;
3484 		cookiep++;
3485 		ncookies--;
3486 	}
3487 	vrele(vp);
3488 	vp = NULL;
3489 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3490 	*tl = nfs_false;
3491 	bp += NFSX_UNSIGNED;
3492 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3493 	if (eofflag)
3494 		*tl = nfs_true;
3495 	else
3496 		*tl = nfs_false;
3497 	bp += NFSX_UNSIGNED;
3498 	if (mp1 != info.mb) {
3499 		if (bp < be)
3500 			mp1->m_len = bp - mtod(mp1, caddr_t);
3501 	} else
3502 		mp1->m_len += bp - info.bpos;
3503 	FREE((caddr_t)cookies, M_TEMP);
3504 	FREE((caddr_t)rbuf, M_TEMP);
3505 nfsmout:
3506 	*mrq = info.mreq;
3507 	if (vp)
3508 		vrele(vp);
3509 	return(error);
3510 }
3511 
3512 /*
3513  * nfs commit service
3514  */
3515 int
3516 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3517 	     struct thread *td, struct mbuf **mrq)
3518 {
3519 	struct sockaddr *nam = nfsd->nd_nam;
3520 	struct ucred *cred = &nfsd->nd_cr;
3521 	struct vattr bfor, aft;
3522 	struct vnode *vp = NULL;
3523 	struct mount *mp = NULL;
3524 	nfsfh_t nfh;
3525 	fhandle_t *fhp;
3526 	u_int32_t *tl;
3527 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3528 	u_quad_t off;
3529 	struct nfsm_info info;
3530 
3531 	info.mrep = nfsd->nd_mrep;
3532 	info.mreq = NULL;
3533 	info.md = nfsd->nd_md;
3534 	info.dpos = nfsd->nd_dpos;
3535 
3536 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3537 	fhp = &nfh.fh_generic;
3538 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3539 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3540 
3541 	/*
3542 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3543 	 * count parameters, so these arguments are useless (someday maybe).
3544 	 */
3545 	off = fxdr_hyper(tl);
3546 	tl += 2;
3547 	cnt = fxdr_unsigned(int, *tl);
3548 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3549 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3550 	if (error) {
3551 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3552 				      2 * NFSX_UNSIGNED, &error));
3553 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3554 				 aft_ret, &aft);
3555 		error = 0;
3556 		goto nfsmout;
3557 	}
3558 	for_ret = VOP_GETATTR(vp, &bfor);
3559 
3560 	if (cnt > MAX_COMMIT_COUNT) {
3561 		/*
3562 		 * Give up and do the whole thing
3563 		 */
3564 		if (vp->v_object &&
3565 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3566 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3567 		}
3568 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3569 	} else {
3570 		/*
3571 		 * Locate and synchronously write any buffers that fall
3572 		 * into the requested range.  Note:  we are assuming that
3573 		 * f_iosize is a power of 2.
3574 		 */
3575 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3576 		int iomask = iosize - 1;
3577 		off_t loffset;
3578 
3579 		/*
3580 		 * Align to iosize boundry, super-align to page boundry.
3581 		 */
3582 		if (off & iomask) {
3583 			cnt += off & iomask;
3584 			off &= ~(u_quad_t)iomask;
3585 		}
3586 		if (off & PAGE_MASK) {
3587 			cnt += off & PAGE_MASK;
3588 			off &= ~(u_quad_t)PAGE_MASK;
3589 		}
3590 		loffset = off;
3591 
3592 		if (vp->v_object &&
3593 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3594 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3595 		}
3596 
3597 		crit_enter();
3598 		while (cnt > 0) {
3599 			struct buf *bp;
3600 
3601 			/*
3602 			 * If we have a buffer and it is marked B_DELWRI we
3603 			 * have to lock and write it.  Otherwise the prior
3604 			 * write is assumed to have already been committed.
3605 			 */
3606 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3607 				if (bp->b_flags & B_DELWRI)
3608 					bp = findblk(vp, loffset, 0);
3609 				else
3610 					bp = NULL;
3611 			}
3612 			if (bp) {
3613 				if (bp->b_flags & B_DELWRI) {
3614 					bremfree(bp);
3615 					bwrite(bp);
3616 					++nfs_commit_miss;
3617 				} else {
3618 					BUF_UNLOCK(bp);
3619 				}
3620 			}
3621 			++nfs_commit_blks;
3622 			if (cnt < iosize)
3623 				break;
3624 			cnt -= iosize;
3625 			loffset += iosize;
3626 		}
3627 		crit_exit();
3628 	}
3629 
3630 	aft_ret = VOP_GETATTR(vp, &aft);
3631 	vput(vp);
3632 	vp = NULL;
3633 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3634 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3635 			      &error));
3636 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3637 			 aft_ret, &aft);
3638 	if (!error) {
3639 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3640 		if (nfsver.tv_sec == 0)
3641 			nfsver = boottime;
3642 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3643 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3644 	} else {
3645 		error = 0;
3646 	}
3647 nfsmout:
3648 	*mrq = info.mreq;
3649 	if (vp)
3650 		vput(vp);
3651 	return(error);
3652 }
3653 
3654 /*
3655  * nfs statfs service
3656  */
3657 int
3658 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3659 	     struct thread *td, struct mbuf **mrq)
3660 {
3661 	struct sockaddr *nam = nfsd->nd_nam;
3662 	struct ucred *cred = &nfsd->nd_cr;
3663 	struct statfs *sf;
3664 	struct nfs_statfs *sfp;
3665 	int error = 0, rdonly, getret = 1;
3666 	struct vnode *vp = NULL;
3667 	struct mount *mp = NULL;
3668 	struct vattr at;
3669 	nfsfh_t nfh;
3670 	fhandle_t *fhp;
3671 	struct statfs statfs;
3672 	u_quad_t tval;
3673 	struct nfsm_info info;
3674 
3675 	info.mrep = nfsd->nd_mrep;
3676 	info.mreq = NULL;
3677 	info.md = nfsd->nd_md;
3678 	info.dpos = nfsd->nd_dpos;
3679 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3680 
3681 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3682 	fhp = &nfh.fh_generic;
3683 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3684 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3685 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3686 	if (error) {
3687 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3688 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3689 		error = 0;
3690 		goto nfsmout;
3691 	}
3692 	sf = &statfs;
3693 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3694 	getret = VOP_GETATTR(vp, &at);
3695 	vput(vp);
3696 	vp = NULL;
3697 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3698 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3699 			      &error));
3700 	if (info.v3)
3701 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3702 	if (error) {
3703 		error = 0;
3704 		goto nfsmout;
3705 	}
3706 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3707 	if (info.v3) {
3708 		tval = (u_quad_t)sf->f_blocks;
3709 		tval *= (u_quad_t)sf->f_bsize;
3710 		txdr_hyper(tval, &sfp->sf_tbytes);
3711 		tval = (u_quad_t)sf->f_bfree;
3712 		tval *= (u_quad_t)sf->f_bsize;
3713 		txdr_hyper(tval, &sfp->sf_fbytes);
3714 		tval = (u_quad_t)sf->f_bavail;
3715 		tval *= (u_quad_t)sf->f_bsize;
3716 		txdr_hyper(tval, &sfp->sf_abytes);
3717 		sfp->sf_tfiles.nfsuquad[0] = 0;
3718 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3719 		sfp->sf_ffiles.nfsuquad[0] = 0;
3720 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3721 		sfp->sf_afiles.nfsuquad[0] = 0;
3722 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3723 		sfp->sf_invarsec = 0;
3724 	} else {
3725 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3726 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3727 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3728 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3729 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3730 	}
3731 nfsmout:
3732 	*mrq = info.mreq;
3733 	if (vp)
3734 		vput(vp);
3735 	return(error);
3736 }
3737 
3738 /*
3739  * nfs fsinfo service
3740  */
3741 int
3742 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3743 	     struct thread *td, struct mbuf **mrq)
3744 {
3745 	struct sockaddr *nam = nfsd->nd_nam;
3746 	struct ucred *cred = &nfsd->nd_cr;
3747 	struct nfsv3_fsinfo *sip;
3748 	int error = 0, rdonly, getret = 1, pref;
3749 	struct vnode *vp = NULL;
3750 	struct mount *mp = NULL;
3751 	struct vattr at;
3752 	nfsfh_t nfh;
3753 	fhandle_t *fhp;
3754 	u_quad_t maxfsize;
3755 	struct statfs sb;
3756 	struct nfsm_info info;
3757 
3758 	info.mrep = nfsd->nd_mrep;
3759 	info.mreq = NULL;
3760 	info.md = nfsd->nd_md;
3761 	info.dpos = nfsd->nd_dpos;
3762 
3763 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3764 	fhp = &nfh.fh_generic;
3765 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3766 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3767 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3768 	if (error) {
3769 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3770 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3771 		error = 0;
3772 		goto nfsmout;
3773 	}
3774 
3775 	/* XXX Try to make a guess on the max file size. */
3776 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3777 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3778 
3779 	getret = VOP_GETATTR(vp, &at);
3780 	vput(vp);
3781 	vp = NULL;
3782 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3783 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3784 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3785 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3786 
3787 	/*
3788 	 * XXX
3789 	 * There should be file system VFS OP(s) to get this information.
3790 	 * For now, assume ufs.
3791 	 */
3792 	if (slp->ns_so->so_type == SOCK_DGRAM)
3793 		pref = NFS_MAXDGRAMDATA;
3794 	else
3795 		pref = NFS_MAXDATA;
3796 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3797 	sip->fs_rtpref = txdr_unsigned(pref);
3798 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3799 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3800 	sip->fs_wtpref = txdr_unsigned(pref);
3801 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3802 	sip->fs_dtpref = txdr_unsigned(pref);
3803 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3804 	sip->fs_timedelta.nfsv3_sec = 0;
3805 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3806 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3807 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3808 		NFSV3FSINFO_CANSETTIME);
3809 nfsmout:
3810 	*mrq = info.mreq;
3811 	if (vp)
3812 		vput(vp);
3813 	return(error);
3814 }
3815 
3816 /*
3817  * nfs pathconf service
3818  */
3819 int
3820 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3821 	       struct thread *td, struct mbuf **mrq)
3822 {
3823 	struct sockaddr *nam = nfsd->nd_nam;
3824 	struct ucred *cred = &nfsd->nd_cr;
3825 	struct nfsv3_pathconf *pc;
3826 	int error = 0, rdonly, getret = 1;
3827 	register_t linkmax, namemax, chownres, notrunc;
3828 	struct vnode *vp = NULL;
3829 	struct mount *mp = NULL;
3830 	struct vattr at;
3831 	nfsfh_t nfh;
3832 	fhandle_t *fhp;
3833 	struct nfsm_info info;
3834 
3835 	info.mrep = nfsd->nd_mrep;
3836 	info.mreq = NULL;
3837 	info.md = nfsd->nd_md;
3838 	info.dpos = nfsd->nd_dpos;
3839 
3840 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3841 	fhp = &nfh.fh_generic;
3842 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3843 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3844 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3845 	if (error) {
3846 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3847 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3848 		error = 0;
3849 		goto nfsmout;
3850 	}
3851 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3852 	if (!error)
3853 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3854 	if (!error)
3855 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3856 	if (!error)
3857 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3858 	getret = VOP_GETATTR(vp, &at);
3859 	vput(vp);
3860 	vp = NULL;
3861 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3862 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3863 			      &error));
3864 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3865 	if (error) {
3866 		error = 0;
3867 		goto nfsmout;
3868 	}
3869 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3870 
3871 	pc->pc_linkmax = txdr_unsigned(linkmax);
3872 	pc->pc_namemax = txdr_unsigned(namemax);
3873 	pc->pc_notrunc = txdr_unsigned(notrunc);
3874 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3875 
3876 	/*
3877 	 * These should probably be supported by VOP_PATHCONF(), but
3878 	 * until msdosfs is exportable (why would you want to?), the
3879 	 * Unix defaults should be ok.
3880 	 */
3881 	pc->pc_caseinsensitive = nfs_false;
3882 	pc->pc_casepreserving = nfs_true;
3883 nfsmout:
3884 	*mrq = info.mreq;
3885 	if (vp)
3886 		vput(vp);
3887 	return(error);
3888 }
3889 
3890 /*
3891  * Null operation, used by clients to ping server
3892  */
3893 /* ARGSUSED */
3894 int
3895 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3896 	   struct thread *td, struct mbuf **mrq)
3897 {
3898 	struct nfsm_info info;
3899 	int error = NFSERR_RETVOID;
3900 
3901 	info.mrep = nfsd->nd_mrep;
3902 	info.mreq = NULL;
3903 
3904 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3905 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3906 nfsmout:
3907 	*mrq = info.mreq;
3908 	return (error);
3909 }
3910 
3911 /*
3912  * No operation, used for obsolete procedures
3913  */
3914 /* ARGSUSED */
3915 int
3916 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3917 	   struct thread *td, struct mbuf **mrq)
3918 {
3919 	struct nfsm_info info;
3920 	int error;
3921 
3922 	info.mrep = nfsd->nd_mrep;
3923 	info.mreq = NULL;
3924 
3925 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3926 	if (nfsd->nd_repstat)
3927 		error = nfsd->nd_repstat;
3928 	else
3929 		error = EPROCUNAVAIL;
3930 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3931 	error = 0;
3932 nfsmout:
3933 	*mrq = info.mreq;
3934 	return (error);
3935 }
3936 
3937 /*
3938  * Perform access checking for vnodes obtained from file handles that would
3939  * refer to files already opened by a Unix client. You cannot just use
3940  * vn_writechk() and VOP_ACCESS() for two reasons.
3941  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3942  * 2 - The owner is to be given access irrespective of mode bits for some
3943  *     operations, so that processes that chmod after opening a file don't
3944  *     break. I don't like this because it opens a security hole, but since
3945  *     the nfs server opens a security hole the size of a barn door anyhow,
3946  *     what the heck.
3947  *
3948  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3949  * will return EPERM instead of EACCESS. EPERM is always an error.
3950  */
3951 static int
3952 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3953 	     int rdonly, struct thread *td, int override)
3954 {
3955 	struct vattr vattr;
3956 	int error;
3957 
3958 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3959 	if (flags & VWRITE) {
3960 		/* Just vn_writechk() changed to check rdonly */
3961 		/*
3962 		 * Disallow write attempts on read-only file systems;
3963 		 * unless the file is a socket or a block or character
3964 		 * device resident on the file system.
3965 		 */
3966 		if (rdonly ||
3967 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3968 			switch (vp->v_type) {
3969 			case VREG:
3970 			case VDIR:
3971 			case VLNK:
3972 				return (EROFS);
3973 			default:
3974 				break;
3975 			}
3976 		}
3977 		/*
3978 		 * If there's shared text associated with
3979 		 * the inode, we can't allow writing.
3980 		 */
3981 		if (vp->v_flag & VTEXT)
3982 			return (ETXTBSY);
3983 	}
3984 	error = VOP_GETATTR(vp, &vattr);
3985 	if (error)
3986 		return (error);
3987 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
3988 	/*
3989 	 * Allow certain operations for the owner (reads and writes
3990 	 * on files that are already open).
3991 	 */
3992 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3993 		error = 0;
3994 	return error;
3995 }
3996 #endif /* NFS_NOSERVER */
3997 
3998