xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision ad9f8794)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84 
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_object.h>
88 
89 #include <sys/buf2.h>
90 
91 #include <sys/thread2.h>
92 
93 #include "nfsproto.h"
94 #include "rpcv2.h"
95 #include "nfs.h"
96 #include "xdr_subs.h"
97 #include "nfsm_subs.h"
98 
99 #ifdef NFSRV_DEBUG
100 #define nfsdbprintf(info)	kprintf info
101 #else
102 #define nfsdbprintf(info)
103 #endif
104 
105 #define MAX_COMMIT_COUNT	(1024 * 1024)
106 
107 #define NUM_HEURISTIC		1017
108 #define NHUSE_INIT		64
109 #define NHUSE_INC		16
110 #define NHUSE_MAX		2048
111 
112 static struct nfsheur {
113     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
114     off_t nh_nextr;		/* next offset for sequential detection */
115     int nh_use;			/* use count for selection */
116     int nh_seqcount;		/* heuristic */
117 } nfsheur[NUM_HEURISTIC];
118 
119 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
120 		      NFFIFO, NFNON };
121 #ifndef NFS_NOSERVER
122 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
123 		      NFCHR, NFNON };
124 
125 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
126 int nfsrvw_procrastinate_v3 = 0;
127 
128 static struct timespec	nfsver;
129 
130 SYSCTL_DECL(_vfs_nfs);
131 
132 int nfs_async;
133 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
134     "Enable unstable and fast writes");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
138     "Number of committed blocks");
139 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
140     "Number of nfs blocks committed from dirty buffers");
141 
142 static int nfsrv_access (struct mount *, struct vnode *, int,
143 			struct ucred *, int, struct thread *, int);
144 static void nfsrvw_coalesce (struct nfsrv_descript *,
145 		struct nfsrv_descript *);
146 
147 /*
148  * nfs v3 access service
149  */
150 int
151 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
152 	      struct thread *td, struct mbuf **mrq)
153 {
154 	struct sockaddr *nam = nfsd->nd_nam;
155 	struct ucred *cred = &nfsd->nd_cr;
156 	struct vnode *vp = NULL;
157 	struct mount *mp = NULL;
158 	nfsfh_t nfh;
159 	fhandle_t *fhp;
160 	int error = 0, rdonly, getret;
161 	struct vattr vattr, *vap = &vattr;
162 	u_long testmode, nfsmode;
163 	struct nfsm_info info;
164 	u_int32_t *tl;
165 
166 	info.dpos = nfsd->nd_dpos;
167 	info.md = nfsd->nd_md;
168 	info.mrep = nfsd->nd_mrep;
169 	info.mreq = NULL;
170 
171 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
172 	fhp = &nfh.fh_generic;
173 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
174 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
175 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
176 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
177 	if (error) {
178 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
179 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
180 		error = 0;
181 		goto nfsmout;
182 	}
183 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
184 	if ((nfsmode & NFSV3ACCESS_READ) &&
185 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
186 		nfsmode &= ~NFSV3ACCESS_READ;
187 	if (vp->v_type == VDIR)
188 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
189 			NFSV3ACCESS_DELETE);
190 	else
191 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
192 	if ((nfsmode & testmode) &&
193 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
194 		nfsmode &= ~testmode;
195 	if (vp->v_type == VDIR)
196 		testmode = NFSV3ACCESS_LOOKUP;
197 	else
198 		testmode = NFSV3ACCESS_EXECUTE;
199 	if ((nfsmode & testmode) &&
200 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
201 		nfsmode &= ~testmode;
202 	getret = VOP_GETATTR(vp, vap);
203 	vput(vp);
204 	vp = NULL;
205 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
206 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
207 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
208 	tl = nfsm_build(&info, NFSX_UNSIGNED);
209 	*tl = txdr_unsigned(nfsmode);
210 nfsmout:
211 	*mrq = info.mreq;
212 	if (vp)
213 		vput(vp);
214 	return(error);
215 }
216 
217 /*
218  * nfs getattr service
219  */
220 int
221 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
222 	      struct thread *td, struct mbuf **mrq)
223 {
224 	struct sockaddr *nam = nfsd->nd_nam;
225 	struct ucred *cred = &nfsd->nd_cr;
226 	struct nfs_fattr *fp;
227 	struct vattr va;
228 	struct vattr *vap = &va;
229 	struct vnode *vp = NULL;
230 	struct mount *mp = NULL;
231 	nfsfh_t nfh;
232 	fhandle_t *fhp;
233 	int error = 0, rdonly;
234 	struct nfsm_info info;
235 
236 	info.mrep = nfsd->nd_mrep;
237 	info.md = nfsd->nd_md;
238 	info.dpos = nfsd->nd_dpos;
239 	info.mreq = NULL;
240 
241 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
242 	fhp = &nfh.fh_generic;
243 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
244 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
245 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
246 	if (error) {
247 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
248 		error = 0;
249 		goto nfsmout;
250 	}
251 	error = VOP_GETATTR(vp, vap);
252 	vput(vp);
253 	vp = NULL;
254 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
255 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
256 	if (error) {
257 		error = 0;
258 		goto nfsmout;
259 	}
260 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
261 	nfsm_srvfattr(nfsd, vap, fp);
262 	/* fall through */
263 
264 nfsmout:
265 	*mrq = info.mreq;
266 	if (vp)
267 		vput(vp);
268 	return(error);
269 }
270 
271 /*
272  * nfs setattr service
273  */
274 int
275 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
276 	      struct thread *td, struct mbuf **mrq)
277 {
278 	struct sockaddr *nam = nfsd->nd_nam;
279 	struct ucred *cred = &nfsd->nd_cr;
280 	struct vattr va, preat;
281 	struct vattr *vap = &va;
282 	struct nfsv2_sattr *sp;
283 	struct nfs_fattr *fp;
284 	struct vnode *vp = NULL;
285 	struct mount *mp = NULL;
286 	nfsfh_t nfh;
287 	fhandle_t *fhp;
288 	u_int32_t *tl;
289 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
290 	int gcheck = 0;
291 	struct timespec guard;
292 	struct nfsm_info info;
293 
294 	info.mrep = nfsd->nd_mrep;
295 	info.mreq = NULL;
296 	info.md = nfsd->nd_md;
297 	info.dpos = nfsd->nd_dpos;
298 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
299 
300 	guard.tv_sec = 0;	/* fix compiler warning */
301 	guard.tv_nsec = 0;
302 
303 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
304 	fhp = &nfh.fh_generic;
305 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
306 	VATTR_NULL(vap);
307 	if (info.v3) {
308 		ERROROUT(nfsm_srvsattr(&info, vap));
309 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
310 		gcheck = fxdr_unsigned(int, *tl);
311 		if (gcheck) {
312 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
313 			fxdr_nfsv3time(tl, &guard);
314 		}
315 	} else {
316 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
317 		/*
318 		 * Nah nah nah nah na nah
319 		 * There is a bug in the Sun client that puts 0xffff in the mode
320 		 * field of sattr when it should put in 0xffffffff. The u_short
321 		 * doesn't sign extend.
322 		 * --> check the low order 2 bytes for 0xffff
323 		 */
324 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
325 			vap->va_mode = nfstov_mode(sp->sa_mode);
326 		if (sp->sa_uid != nfs_xdrneg1)
327 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
328 		if (sp->sa_gid != nfs_xdrneg1)
329 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
330 		if (sp->sa_size != nfs_xdrneg1)
331 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
332 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
333 #ifdef notyet
334 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
335 #else
336 			vap->va_atime.tv_sec =
337 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
338 			vap->va_atime.tv_nsec = 0;
339 #endif
340 		}
341 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
342 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
343 
344 	}
345 
346 	/*
347 	 * Now that we have all the fields, lets do it.
348 	 */
349 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
350 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
351 	if (error) {
352 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
353 				      2 * NFSX_UNSIGNED, &error));
354 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
355 				 postat_ret, vap);
356 		error = 0;
357 		goto nfsmout;
358 	}
359 
360 	/*
361 	 * vp now an active resource, pay careful attention to cleanup
362 	 */
363 
364 	if (info.v3) {
365 		error = preat_ret = VOP_GETATTR(vp, &preat);
366 		if (!error && gcheck &&
367 			(preat.va_ctime.tv_sec != guard.tv_sec ||
368 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
369 			error = NFSERR_NOT_SYNC;
370 		if (error) {
371 			vput(vp);
372 			vp = NULL;
373 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
374 					      NFSX_WCCDATA(info.v3), &error));
375 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
376 					 postat_ret, vap);
377 			error = 0;
378 			goto nfsmout;
379 		}
380 	}
381 
382 	/*
383 	 * If the size is being changed write acces is required, otherwise
384 	 * just check for a read only file system.
385 	 */
386 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
387 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
388 			error = EROFS;
389 			goto out;
390 		}
391 	} else {
392 		if (vp->v_type == VDIR) {
393 			error = EISDIR;
394 			goto out;
395 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
396 			    td, 0)) != 0){
397 			goto out;
398 		}
399 	}
400 	error = VOP_SETATTR(vp, vap, cred);
401 	postat_ret = VOP_GETATTR(vp, vap);
402 	if (!error)
403 		error = postat_ret;
404 out:
405 	vput(vp);
406 	vp = NULL;
407 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
408 		   NFSX_WCCORFATTR(info.v3), &error));
409 	if (info.v3) {
410 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
411 				 postat_ret, vap);
412 		error = 0;
413 		goto nfsmout;
414 	} else {
415 		fp = nfsm_build(&info, NFSX_V2FATTR);
416 		nfsm_srvfattr(nfsd, vap, fp);
417 	}
418 	/* fall through */
419 
420 nfsmout:
421 	*mrq = info.mreq;
422 	if (vp)
423 		vput(vp);
424 	return(error);
425 }
426 
427 /*
428  * nfs lookup rpc
429  */
430 int
431 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
432 	     struct thread *td, struct mbuf **mrq)
433 {
434 	struct sockaddr *nam = nfsd->nd_nam;
435 	struct ucred *cred = &nfsd->nd_cr;
436 	struct nfs_fattr *fp;
437 	struct nlookupdata nd;
438 	struct vnode *vp;
439 	struct vnode *dirp;
440 	struct nchandle nch;
441 	nfsfh_t nfh;
442 	fhandle_t *fhp;
443 	int error = 0, len, dirattr_ret = 1;
444 	int pubflag;
445 	struct vattr va, dirattr, *vap = &va;
446 	struct nfsm_info info;
447 
448 	info.mrep = nfsd->nd_mrep;
449 	info.mreq = NULL;
450 	info.md = nfsd->nd_md;
451 	info.dpos = nfsd->nd_dpos;
452 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
453 
454 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
455 	nlookup_zero(&nd);
456 	dirp = NULL;
457 	vp = NULL;
458 
459 	fhp = &nfh.fh_generic;
460 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
461 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
462 
463 	pubflag = nfs_ispublicfh(fhp);
464 
465 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
466 		fhp, len, slp, nam, &info.md, &info.dpos,
467 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
468 
469 	/*
470 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
471 	 * structure in case macros jump to nfsmout.
472 	 */
473 
474 	if (error) {
475 		if (dirp) {
476 			if (info.v3)
477 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
478 			vrele(dirp);
479 			dirp = NULL;
480 		}
481 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
482 				      NFSX_POSTOPATTR(info.v3), &error));
483 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
484 		error = 0;
485 		goto nfsmout;
486 	}
487 
488 	/*
489 	 * Locate index file for public filehandle
490 	 *
491 	 * error is 0 on entry and 0 on exit from this block.
492 	 */
493 
494 	if (pubflag) {
495 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
496 			/*
497 			 * Setup call to lookup() to see if we can find
498 			 * the index file. Arguably, this doesn't belong
499 			 * in a kernel.. Ugh.  If an error occurs, do not
500 			 * try to install an index file and then clear the
501 			 * error.
502 			 *
503 			 * When we replace nd with ind and redirect ndp,
504 			 * maintenance of ni_startdir and ni_vp shift to
505 			 * ind and we have to clean them up in the old nd.
506 			 * However, the cnd resource continues to be maintained
507 			 * via the original nd.  Confused?  You aren't alone!
508 			 */
509 			vn_unlock(vp);
510 			cache_copy(&nd.nl_nch, &nch);
511 			nlookup_done(&nd);
512 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
513 						UIO_SYSSPACE, 0, cred, &nch);
514 			cache_drop(&nch);
515 			if (error == 0)
516 				error = nlookup(&nd);
517 
518 			if (error == 0) {
519 				/*
520 				 * Found an index file. Get rid of
521 				 * the old references.  transfer vp and
522 				 * load up the new vp.  Fortunately we do
523 				 * not have to deal with dvp, that would be
524 				 * a huge mess.
525 				 */
526 				if (dirp)
527 					vrele(dirp);
528 				dirp = vp;
529 				vp = NULL;
530 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
531 							LK_EXCLUSIVE, &vp);
532 				KKASSERT(error == 0);
533 			}
534 			error = 0;
535 		}
536 		/*
537 		 * If the public filehandle was used, check that this lookup
538 		 * didn't result in a filehandle outside the publicly exported
539 		 * filesystem.  We clear the poor vp here to avoid lockups due
540 		 * to NFS I/O.
541 		 */
542 
543 		if (vp->v_mount != nfs_pub.np_mount) {
544 			vput(vp);
545 			vp = NULL;
546 			error = EPERM;
547 		}
548 	}
549 
550 	if (dirp) {
551 		if (info.v3)
552 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
553 		vrele(dirp);
554 		dirp = NULL;
555 	}
556 
557 	/*
558 	 * Resources at this point:
559 	 *	ndp->ni_vp	may not be NULL
560 	 *
561 	 */
562 
563 	if (error) {
564 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
565 				      NFSX_POSTOPATTR(info.v3), &error));
566 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
567 		error = 0;
568 		goto nfsmout;
569 	}
570 
571 	/*
572 	 * Clear out some resources prior to potentially blocking.  This
573 	 * is not as critical as ni_dvp resources in other routines, but
574 	 * it helps.
575 	 */
576 	nlookup_done(&nd);
577 
578 	/*
579 	 * Get underlying attribute, then release remaining resources ( for
580 	 * the same potential blocking reason ) and reply.
581 	 */
582 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
583 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
584 	if (!error)
585 		error = VOP_GETATTR(vp, vap);
586 
587 	vput(vp);
588 	vp = NULL;
589 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
590 			      NFSX_SRVFH(info.v3) +
591 			      NFSX_POSTOPORFATTR(info.v3) +
592 			      NFSX_POSTOPATTR(info.v3),
593 			      &error));
594 	if (error) {
595 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
596 		error = 0;
597 		goto nfsmout;
598 	}
599 	nfsm_srvfhtom(&info, fhp);
600 	if (info.v3) {
601 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
602 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
603 	} else {
604 		fp = nfsm_build(&info, NFSX_V2FATTR);
605 		nfsm_srvfattr(nfsd, vap, fp);
606 	}
607 
608 nfsmout:
609 	*mrq = info.mreq;
610 	if (dirp)
611 		vrele(dirp);
612 	nlookup_done(&nd);		/* may be called twice */
613 	if (vp)
614 		vput(vp);
615 	return (error);
616 }
617 
618 /*
619  * nfs readlink service
620  */
621 int
622 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
623 	       struct thread *td, struct mbuf **mrq)
624 {
625 	struct sockaddr *nam = nfsd->nd_nam;
626 	struct ucred *cred = &nfsd->nd_cr;
627 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
628 	struct iovec *ivp = iv;
629 	u_int32_t *tl;
630 	int error = 0, rdonly, i, tlen, len, getret;
631 	struct mbuf *mp1, *mp2, *mp3;
632 	struct vnode *vp = NULL;
633 	struct mount *mp = NULL;
634 	struct vattr attr;
635 	nfsfh_t nfh;
636 	fhandle_t *fhp;
637 	struct uio io, *uiop = &io;
638 	struct nfsm_info info;
639 
640 	info.mrep = nfsd->nd_mrep;
641 	info.mreq = NULL;
642 	info.md = nfsd->nd_md;
643 	info.dpos = nfsd->nd_dpos;
644 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
645 
646 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
647 #ifndef nolint
648 	mp2 = NULL;
649 #endif
650 	mp3 = NULL;
651 	fhp = &nfh.fh_generic;
652 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
653 	len = 0;
654 	i = 0;
655 	while (len < NFS_MAXPATHLEN) {
656 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
657 		mp1->m_len = MCLBYTES;
658 		if (len == 0)
659 			mp3 = mp2 = mp1;
660 		else {
661 			mp2->m_next = mp1;
662 			mp2 = mp1;
663 		}
664 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
665 			mp1->m_len = NFS_MAXPATHLEN-len;
666 			len = NFS_MAXPATHLEN;
667 		} else
668 			len += mp1->m_len;
669 		ivp->iov_base = mtod(mp1, caddr_t);
670 		ivp->iov_len = mp1->m_len;
671 		i++;
672 		ivp++;
673 	}
674 	uiop->uio_iov = iv;
675 	uiop->uio_iovcnt = i;
676 	uiop->uio_offset = 0;
677 	uiop->uio_resid = len;
678 	uiop->uio_rw = UIO_READ;
679 	uiop->uio_segflg = UIO_SYSSPACE;
680 	uiop->uio_td = NULL;
681 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
682 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
683 	if (error) {
684 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
685 				      2 * NFSX_UNSIGNED, &error));
686 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
687 		error = 0;
688 		goto nfsmout;
689 	}
690 	if (vp->v_type != VLNK) {
691 		if (info.v3)
692 			error = EINVAL;
693 		else
694 			error = ENXIO;
695 		goto out;
696 	}
697 	error = VOP_READLINK(vp, uiop, cred);
698 out:
699 	getret = VOP_GETATTR(vp, &attr);
700 	vput(vp);
701 	vp = NULL;
702 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
703 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
704 			     &error));
705 	if (info.v3) {
706 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
707 		if (error) {
708 			error = 0;
709 			goto nfsmout;
710 		}
711 	}
712 	if (uiop->uio_resid > 0) {
713 		len -= uiop->uio_resid;
714 		tlen = nfsm_rndup(len);
715 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
716 	}
717 	tl = nfsm_build(&info, NFSX_UNSIGNED);
718 	*tl = txdr_unsigned(len);
719 	info.mb->m_next = mp3;
720 	mp3 = NULL;
721 nfsmout:
722 	*mrq = info.mreq;
723 	if (mp3)
724 		m_freem(mp3);
725 	if (vp)
726 		vput(vp);
727 	return(error);
728 }
729 
730 /*
731  * nfs read service
732  */
733 int
734 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
735 	   struct thread *td, struct mbuf **mrq)
736 {
737 	struct nfsm_info info;
738 	struct sockaddr *nam = nfsd->nd_nam;
739 	struct ucred *cred = &nfsd->nd_cr;
740 	struct iovec *iv;
741 	struct iovec *iv2;
742 	struct mbuf *m;
743 	struct nfs_fattr *fp;
744 	u_int32_t *tl;
745 	int i;
746 	int reqlen;
747 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
748 	struct mbuf *m2;
749 	struct vnode *vp = NULL;
750 	struct mount *mp = NULL;
751 	nfsfh_t nfh;
752 	fhandle_t *fhp;
753 	struct uio io, *uiop = &io;
754 	struct vattr va, *vap = &va;
755 	struct nfsheur *nh;
756 	off_t off;
757 	int ioflag = 0;
758 
759 	info.mrep = nfsd->nd_mrep;
760 	info.mreq = NULL;
761 	info.md = nfsd->nd_md;
762 	info.dpos = nfsd->nd_dpos;
763 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
764 
765 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
766 	fhp = &nfh.fh_generic;
767 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
768 	if (info.v3) {
769 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
770 		off = fxdr_hyper(tl);
771 	} else {
772 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
773 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
774 	}
775 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
776 					    NFS_SRVMAXDATA(nfsd), &error));
777 
778 	/*
779 	 * Reference vp.  If an error occurs, vp will be invalid, but we
780 	 * have to NULL it just in case.  The macros might goto nfsmout
781 	 * as well.
782 	 */
783 
784 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
785 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
786 	if (error) {
787 		vp = NULL;
788 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
789 				      2 * NFSX_UNSIGNED, &error));
790 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
791 		error = 0;
792 		goto nfsmout;
793 	}
794 
795 	if (vp->v_type != VREG) {
796 		if (info.v3)
797 			error = EINVAL;
798 		else
799 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
800 	}
801 	if (!error) {
802 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
803 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
804 	}
805 	getret = VOP_GETATTR(vp, vap);
806 	if (!error)
807 		error = getret;
808 	if (error) {
809 		vput(vp);
810 		vp = NULL;
811 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
812 				      NFSX_POSTOPATTR(info.v3), &error));
813 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
814 		error = 0;
815 		goto nfsmout;
816 	}
817 
818 	/*
819 	 * Calculate byte count to read
820 	 */
821 
822 	if (off >= vap->va_size)
823 		cnt = 0;
824 	else if ((off + reqlen) > vap->va_size)
825 		cnt = vap->va_size - off;
826 	else
827 		cnt = reqlen;
828 
829 	/*
830 	 * Calculate seqcount for heuristic
831 	 */
832 
833 	{
834 		int hi;
835 		int try = 32;
836 
837 		/*
838 		 * Locate best candidate
839 		 */
840 
841 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
842 		nh = &nfsheur[hi];
843 
844 		while (try--) {
845 			if (nfsheur[hi].nh_vp == vp) {
846 				nh = &nfsheur[hi];
847 				break;
848 			}
849 			if (nfsheur[hi].nh_use > 0)
850 				--nfsheur[hi].nh_use;
851 			hi = (hi + 1) % NUM_HEURISTIC;
852 			if (nfsheur[hi].nh_use < nh->nh_use)
853 				nh = &nfsheur[hi];
854 		}
855 
856 		if (nh->nh_vp != vp) {
857 			nh->nh_vp = vp;
858 			nh->nh_nextr = off;
859 			nh->nh_use = NHUSE_INIT;
860 			if (off == 0)
861 				nh->nh_seqcount = 4;
862 			else
863 				nh->nh_seqcount = 1;
864 		}
865 
866 		/*
867 		 * Calculate heuristic
868 		 */
869 
870 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
871 			if (++nh->nh_seqcount > IO_SEQMAX)
872 				nh->nh_seqcount = IO_SEQMAX;
873 		} else if (nh->nh_seqcount > 1) {
874 			nh->nh_seqcount = 1;
875 		} else {
876 			nh->nh_seqcount = 0;
877 		}
878 		nh->nh_use += NHUSE_INC;
879 		if (nh->nh_use > NHUSE_MAX)
880 			nh->nh_use = NHUSE_MAX;
881 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
882         }
883 
884 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
885 			      NFSX_POSTOPORFATTR(info.v3) +
886 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
887 			      &error));
888 	if (info.v3) {
889 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
890 		*tl++ = nfs_true;
891 		fp = (struct nfs_fattr *)tl;
892 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
893 	} else {
894 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
895 		fp = (struct nfs_fattr *)tl;
896 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
897 	}
898 	len = left = nfsm_rndup(cnt);
899 	if (cnt > 0) {
900 		/*
901 		 * Generate the mbuf list with the uio_iov ref. to it.
902 		 */
903 		i = 0;
904 		m = m2 = info.mb;
905 		while (left > 0) {
906 			siz = min(M_TRAILINGSPACE(m), left);
907 			if (siz > 0) {
908 				left -= siz;
909 				i++;
910 			}
911 			if (left > 0) {
912 				m = m_getcl(MB_WAIT, MT_DATA, 0);
913 				m->m_len = 0;
914 				m2->m_next = m;
915 				m2 = m;
916 			}
917 		}
918 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
919 		       M_TEMP, M_WAITOK);
920 		uiop->uio_iov = iv2 = iv;
921 		m = info.mb;
922 		left = len;
923 		i = 0;
924 		while (left > 0) {
925 			if (m == NULL)
926 				panic("nfsrv_read iov");
927 			siz = min(M_TRAILINGSPACE(m), left);
928 			if (siz > 0) {
929 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
930 				iv->iov_len = siz;
931 				m->m_len += siz;
932 				left -= siz;
933 				iv++;
934 				i++;
935 			}
936 			m = m->m_next;
937 		}
938 		uiop->uio_iovcnt = i;
939 		uiop->uio_offset = off;
940 		uiop->uio_resid = len;
941 		uiop->uio_rw = UIO_READ;
942 		uiop->uio_segflg = UIO_SYSSPACE;
943 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
944 		off = uiop->uio_offset;
945 		nh->nh_nextr = off;
946 		FREE((caddr_t)iv2, M_TEMP);
947 		if (error || (getret = VOP_GETATTR(vp, vap))) {
948 			if (!error)
949 				error = getret;
950 			m_freem(info.mreq);
951 			info.mreq = NULL;
952 			vput(vp);
953 			vp = NULL;
954 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
955 					      NFSX_POSTOPATTR(info.v3),
956 					      &error));
957 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
958 			error = 0;
959 			goto nfsmout;
960 		}
961 	} else {
962 		uiop->uio_resid = 0;
963 	}
964 	vput(vp);
965 	vp = NULL;
966 	nfsm_srvfattr(nfsd, vap, fp);
967 	tlen = len - uiop->uio_resid;
968 	cnt = cnt < tlen ? cnt : tlen;
969 	tlen = nfsm_rndup(cnt);
970 	if (len != tlen || tlen != cnt)
971 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
972 	if (info.v3) {
973 		*tl++ = txdr_unsigned(cnt);
974 		if (len < reqlen)
975 			*tl++ = nfs_true;
976 		else
977 			*tl++ = nfs_false;
978 	}
979 	*tl = txdr_unsigned(cnt);
980 nfsmout:
981 	*mrq = info.mreq;
982 	if (vp)
983 		vput(vp);
984 	return(error);
985 }
986 
987 /*
988  * nfs write service
989  */
990 int
991 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
992 	    struct thread *td, struct mbuf **mrq)
993 {
994 	struct sockaddr *nam = nfsd->nd_nam;
995 	struct ucred *cred = &nfsd->nd_cr;
996 	struct iovec *ivp;
997 	int i, cnt;
998 	struct mbuf *mp1;
999 	struct nfs_fattr *fp;
1000 	struct iovec *iv;
1001 	struct vattr va, forat;
1002 	struct vattr *vap = &va;
1003 	u_int32_t *tl;
1004 	int error = 0, rdonly, len, forat_ret = 1;
1005 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1006 	int stable = NFSV3WRITE_FILESYNC;
1007 	struct vnode *vp = NULL;
1008 	struct mount *mp = NULL;
1009 	nfsfh_t nfh;
1010 	fhandle_t *fhp;
1011 	struct uio io, *uiop = &io;
1012 	struct nfsm_info info;
1013 	off_t off;
1014 
1015 	info.mrep = nfsd->nd_mrep;
1016 	info.mreq = NULL;
1017 	info.md = nfsd->nd_md;
1018 	info.dpos = nfsd->nd_dpos;
1019 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1020 
1021 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1022 	if (info.mrep == NULL) {
1023 		error = 0;
1024 		goto nfsmout;
1025 	}
1026 	fhp = &nfh.fh_generic;
1027 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1028 	if (info.v3) {
1029 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1030 		off = fxdr_hyper(tl);
1031 		tl += 3;
1032 		stable = fxdr_unsigned(int, *tl++);
1033 	} else {
1034 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1035 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1036 		tl += 2;
1037 		if (nfs_async)
1038 	    		stable = NFSV3WRITE_UNSTABLE;
1039 	}
1040 	retlen = len = fxdr_unsigned(int32_t, *tl);
1041 	cnt = i = 0;
1042 
1043 	/*
1044 	 * For NFS Version 2, it is not obvious what a write of zero length
1045 	 * should do, but I might as well be consistent with Version 3,
1046 	 * which is to return ok so long as there are no permission problems.
1047 	 */
1048 	if (len > 0) {
1049 	    zeroing = 1;
1050 	    mp1 = info.mrep;
1051 	    while (mp1) {
1052 		if (mp1 == info.md) {
1053 			zeroing = 0;
1054 			adjust = info.dpos - mtod(mp1, caddr_t);
1055 			mp1->m_len -= adjust;
1056 			if (mp1->m_len > 0 && adjust > 0)
1057 				mp1->m_data += adjust;
1058 		}
1059 		if (zeroing)
1060 			mp1->m_len = 0;
1061 		else if (mp1->m_len > 0) {
1062 			i += mp1->m_len;
1063 			if (i > len) {
1064 				mp1->m_len -= (i - len);
1065 				zeroing	= 1;
1066 			}
1067 			if (mp1->m_len > 0)
1068 				cnt++;
1069 		}
1070 		mp1 = mp1->m_next;
1071 	    }
1072 	}
1073 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1074 		error = EIO;
1075 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1076 				      2 * NFSX_UNSIGNED, &error));
1077 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1078 				 aftat_ret, vap);
1079 		error = 0;
1080 		goto nfsmout;
1081 	}
1082 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1083 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1084 	if (error) {
1085 		vp = NULL;
1086 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1087 				      2 * NFSX_UNSIGNED, &error));
1088 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1089 				 aftat_ret, vap);
1090 		error = 0;
1091 		goto nfsmout;
1092 	}
1093 	if (info.v3)
1094 		forat_ret = VOP_GETATTR(vp, &forat);
1095 	if (vp->v_type != VREG) {
1096 		if (info.v3)
1097 			error = EINVAL;
1098 		else
1099 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1100 	}
1101 	if (!error) {
1102 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1103 	}
1104 	if (error) {
1105 		vput(vp);
1106 		vp = NULL;
1107 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1108 				      NFSX_WCCDATA(info.v3), &error));
1109 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1110 				 aftat_ret, vap);
1111 		error = 0;
1112 		goto nfsmout;
1113 	}
1114 
1115 	if (len > 0) {
1116 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1117 		M_WAITOK);
1118 	    uiop->uio_iov = iv = ivp;
1119 	    uiop->uio_iovcnt = cnt;
1120 	    mp1 = info.mrep;
1121 	    while (mp1) {
1122 		if (mp1->m_len > 0) {
1123 			ivp->iov_base = mtod(mp1, caddr_t);
1124 			ivp->iov_len = mp1->m_len;
1125 			ivp++;
1126 		}
1127 		mp1 = mp1->m_next;
1128 	    }
1129 
1130 	    /*
1131 	     * XXX
1132 	     * The IO_METASYNC flag indicates that all metadata (and not just
1133 	     * enough to ensure data integrity) mus be written to stable storage
1134 	     * synchronously.
1135 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1136 	     */
1137 	    if (stable == NFSV3WRITE_UNSTABLE)
1138 		ioflags = IO_NODELOCKED;
1139 	    else if (stable == NFSV3WRITE_DATASYNC)
1140 		ioflags = (IO_SYNC | IO_NODELOCKED);
1141 	    else
1142 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1143 	    uiop->uio_resid = len;
1144 	    uiop->uio_rw = UIO_WRITE;
1145 	    uiop->uio_segflg = UIO_SYSSPACE;
1146 	    uiop->uio_td = NULL;
1147 	    uiop->uio_offset = off;
1148 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1149 	    nfsstats.srvvop_writes++;
1150 	    FREE((caddr_t)iv, M_TEMP);
1151 	}
1152 	aftat_ret = VOP_GETATTR(vp, vap);
1153 	vput(vp);
1154 	vp = NULL;
1155 	if (!error)
1156 		error = aftat_ret;
1157 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1158 			      NFSX_PREOPATTR(info.v3) +
1159 			      NFSX_POSTOPORFATTR(info.v3) +
1160 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1161 			      &error));
1162 	if (info.v3) {
1163 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1164 				 aftat_ret, vap);
1165 		if (error) {
1166 			error = 0;
1167 			goto nfsmout;
1168 		}
1169 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1170 		*tl++ = txdr_unsigned(retlen);
1171 		/*
1172 		 * If nfs_async is set, then pretend the write was FILESYNC.
1173 		 */
1174 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1175 			*tl++ = txdr_unsigned(stable);
1176 		else
1177 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1178 		/*
1179 		 * Actually, there is no need to txdr these fields,
1180 		 * but it may make the values more human readable,
1181 		 * for debugging purposes.
1182 		 */
1183 		if (nfsver.tv_sec == 0)
1184 			nfsver = boottime;
1185 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1186 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1187 	} else {
1188 		fp = nfsm_build(&info, NFSX_V2FATTR);
1189 		nfsm_srvfattr(nfsd, vap, fp);
1190 	}
1191 nfsmout:
1192 	*mrq = info.mreq;
1193 	if (vp)
1194 		vput(vp);
1195 	return(error);
1196 }
1197 
1198 /*
1199  * NFS write service with write gathering support. Called when
1200  * nfsrvw_procrastinate > 0.
1201  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1202  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1203  * Jan. 1994.
1204  */
1205 int
1206 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1207 		  struct thread *td, struct mbuf **mrq)
1208 {
1209 	struct iovec *ivp;
1210 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1211 	struct nfs_fattr *fp;
1212 	int i;
1213 	struct iovec *iov;
1214 	struct nfsrvw_delayhash *wpp;
1215 	struct ucred *cred;
1216 	struct vattr va, forat;
1217 	u_int32_t *tl;
1218 	int error = 0, rdonly, len, forat_ret = 1;
1219 	int ioflags, aftat_ret = 1, adjust, zeroing;
1220 	struct mbuf *mp1;
1221 	struct vnode *vp = NULL;
1222 	struct mount *mp = NULL;
1223 	struct uio io, *uiop = &io;
1224 	u_quad_t cur_usec;
1225 	struct nfsm_info info;
1226 
1227 	info.mreq = NULL;
1228 
1229 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1230 #ifndef nolint
1231 	i = 0;
1232 	len = 0;
1233 #endif
1234 	if (*ndp) {
1235 	    nfsd = *ndp;
1236 	    *ndp = NULL;
1237 	    info.mrep = nfsd->nd_mrep;
1238 	    info.mreq = NULL;
1239 	    info.md = nfsd->nd_md;
1240 	    info.dpos = nfsd->nd_dpos;
1241 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1242 	    cred = &nfsd->nd_cr;
1243 	    LIST_INIT(&nfsd->nd_coalesce);
1244 	    nfsd->nd_mreq = NULL;
1245 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1246 	    cur_usec = nfs_curusec();
1247 	    nfsd->nd_time = cur_usec +
1248 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1249 
1250 	    /*
1251 	     * Now, get the write header..
1252 	     */
1253 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1254 	    if (info.v3) {
1255 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1256 		nfsd->nd_off = fxdr_hyper(tl);
1257 		tl += 3;
1258 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1259 	    } else {
1260 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1261 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1262 		tl += 2;
1263 		if (nfs_async)
1264 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1265 	    }
1266 	    len = fxdr_unsigned(int32_t, *tl);
1267 	    nfsd->nd_len = len;
1268 	    nfsd->nd_eoff = nfsd->nd_off + len;
1269 
1270 	    /*
1271 	     * Trim the header out of the mbuf list and trim off any trailing
1272 	     * junk so that the mbuf list has only the write data.
1273 	     */
1274 	    zeroing = 1;
1275 	    i = 0;
1276 	    mp1 = info.mrep;
1277 	    while (mp1) {
1278 		if (mp1 == info.md) {
1279 		    zeroing = 0;
1280 		    adjust = info.dpos - mtod(mp1, caddr_t);
1281 		    mp1->m_len -= adjust;
1282 		    if (mp1->m_len > 0 && adjust > 0)
1283 			mp1->m_data += adjust;
1284 		}
1285 		if (zeroing)
1286 		    mp1->m_len = 0;
1287 		else {
1288 		    i += mp1->m_len;
1289 		    if (i > len) {
1290 			mp1->m_len -= (i - len);
1291 			zeroing = 1;
1292 		    }
1293 		}
1294 		mp1 = mp1->m_next;
1295 	    }
1296 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1297 nfsmout:
1298 		m_freem(info.mrep);
1299 		info.mrep = NULL;
1300 		error = EIO;
1301 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1302 		if (info.v3) {
1303 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1304 				     aftat_ret, &va);
1305 		}
1306 		nfsd->nd_mreq = info.mreq;
1307 		nfsd->nd_mrep = NULL;
1308 		nfsd->nd_time = 0;
1309 	    }
1310 
1311 	    /*
1312 	     * Add this entry to the hash and time queues.
1313 	     */
1314 	    owp = NULL;
1315 	    wp = slp->ns_tq.lh_first;
1316 	    while (wp && wp->nd_time < nfsd->nd_time) {
1317 		owp = wp;
1318 		wp = wp->nd_tq.le_next;
1319 	    }
1320 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1321 	    if (owp) {
1322 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1323 	    } else {
1324 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1325 	    }
1326 	    if (nfsd->nd_mrep) {
1327 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1328 		owp = NULL;
1329 		wp = wpp->lh_first;
1330 		while (wp &&
1331 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1332 		    owp = wp;
1333 		    wp = wp->nd_hash.le_next;
1334 		}
1335 		while (wp && wp->nd_off < nfsd->nd_off &&
1336 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1337 		    owp = wp;
1338 		    wp = wp->nd_hash.le_next;
1339 		}
1340 		if (owp) {
1341 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1342 
1343 		    /*
1344 		     * Search the hash list for overlapping entries and
1345 		     * coalesce.
1346 		     */
1347 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1348 			wp = nfsd->nd_hash.le_next;
1349 			if (NFSW_SAMECRED(owp, nfsd))
1350 			    nfsrvw_coalesce(owp, nfsd);
1351 		    }
1352 		} else {
1353 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1354 		}
1355 	    }
1356 	}
1357 
1358 	/*
1359 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1360 	 * and generate the associated reply mbuf list(s).
1361 	 */
1362 loop1:
1363 	cur_usec = nfs_curusec();
1364 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1365 		owp = nfsd->nd_tq.le_next;
1366 		if (nfsd->nd_time > cur_usec)
1367 		    break;
1368 		if (nfsd->nd_mreq)
1369 		    continue;
1370 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1371 		LIST_REMOVE(nfsd, nd_tq);
1372 		LIST_REMOVE(nfsd, nd_hash);
1373 		info.mrep = nfsd->nd_mrep;
1374 		info.mreq = NULL;
1375 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1376 		nfsd->nd_mrep = NULL;
1377 		cred = &nfsd->nd_cr;
1378 		forat_ret = aftat_ret = 1;
1379 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1380 				     nfsd->nd_nam, &rdonly,
1381 				     (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1382 		if (!error) {
1383 		    if (info.v3)
1384 			forat_ret = VOP_GETATTR(vp, &forat);
1385 		    if (vp->v_type != VREG) {
1386 			if (info.v3)
1387 			    error = EINVAL;
1388 			else
1389 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1390 		    }
1391 		} else {
1392 		    vp = NULL;
1393 		}
1394 		if (!error) {
1395 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1396 		}
1397 
1398 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1399 		    ioflags = IO_NODELOCKED;
1400 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1401 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1402 		else
1403 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1404 		uiop->uio_rw = UIO_WRITE;
1405 		uiop->uio_segflg = UIO_SYSSPACE;
1406 		uiop->uio_td = NULL;
1407 		uiop->uio_offset = nfsd->nd_off;
1408 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1409 		if (uiop->uio_resid > 0) {
1410 		    mp1 = info.mrep;
1411 		    i = 0;
1412 		    while (mp1) {
1413 			if (mp1->m_len > 0)
1414 			    i++;
1415 			mp1 = mp1->m_next;
1416 		    }
1417 		    uiop->uio_iovcnt = i;
1418 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1419 			M_TEMP, M_WAITOK);
1420 		    uiop->uio_iov = ivp = iov;
1421 		    mp1 = info.mrep;
1422 		    while (mp1) {
1423 			if (mp1->m_len > 0) {
1424 			    ivp->iov_base = mtod(mp1, caddr_t);
1425 			    ivp->iov_len = mp1->m_len;
1426 			    ivp++;
1427 			}
1428 			mp1 = mp1->m_next;
1429 		    }
1430 		    if (!error) {
1431 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1432 			nfsstats.srvvop_writes++;
1433 		    }
1434 		    FREE((caddr_t)iov, M_TEMP);
1435 		}
1436 		m_freem(info.mrep);
1437 		info.mrep = NULL;
1438 		if (vp) {
1439 		    aftat_ret = VOP_GETATTR(vp, &va);
1440 		    vput(vp);
1441 		    vp = NULL;
1442 		}
1443 
1444 		/*
1445 		 * Loop around generating replies for all write rpcs that have
1446 		 * now been completed.
1447 		 */
1448 		swp = nfsd;
1449 		do {
1450 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1451 		    if (error) {
1452 			nfsm_writereply(&info, nfsd, slp, error,
1453 					NFSX_WCCDATA(info.v3));
1454 			if (info.v3) {
1455 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1456 					     aftat_ret, &va);
1457 			}
1458 		    } else {
1459 			nfsm_writereply(&info, nfsd, slp, error,
1460 					NFSX_PREOPATTR(info.v3) +
1461 					NFSX_POSTOPORFATTR(info.v3) +
1462 					2 * NFSX_UNSIGNED +
1463 					NFSX_WRITEVERF(info.v3));
1464 			if (info.v3) {
1465 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1466 					     aftat_ret, &va);
1467 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1468 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1469 			    *tl++ = txdr_unsigned(swp->nd_stable);
1470 			    /*
1471 			     * Actually, there is no need to txdr these fields,
1472 			     * but it may make the values more human readable,
1473 			     * for debugging purposes.
1474 			     */
1475 			    if (nfsver.tv_sec == 0)
1476 				    nfsver = boottime;
1477 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1478 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1479 			} else {
1480 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1481 			    nfsm_srvfattr(nfsd, &va, fp);
1482 			}
1483 		    }
1484 		    nfsd->nd_mreq = info.mreq;
1485 		    if (nfsd->nd_mrep)
1486 			panic("nfsrv_write: nd_mrep not free");
1487 
1488 		    /*
1489 		     * Done. Put it at the head of the timer queue so that
1490 		     * the final phase can return the reply.
1491 		     */
1492 		    if (nfsd != swp) {
1493 			nfsd->nd_time = 0;
1494 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1495 		    }
1496 		    nfsd = swp->nd_coalesce.lh_first;
1497 		    if (nfsd) {
1498 			LIST_REMOVE(nfsd, nd_tq);
1499 		    }
1500 		} while (nfsd);
1501 		swp->nd_time = 0;
1502 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1503 		goto loop1;
1504 	}
1505 
1506 	/*
1507 	 * Search for a reply to return.
1508 	 */
1509 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1510 		if (nfsd->nd_mreq) {
1511 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1512 		    LIST_REMOVE(nfsd, nd_tq);
1513 		    break;
1514 		}
1515 	}
1516 	if (nfsd) {
1517 		*ndp = nfsd;
1518 		*mrq = nfsd->nd_mreq;
1519 	} else {
1520 		*ndp = NULL;
1521 		*mrq = NULL;
1522 	}
1523 	return (0);
1524 }
1525 
1526 /*
1527  * Coalesce the write request nfsd into owp. To do this we must:
1528  * - remove nfsd from the queues
1529  * - merge nfsd->nd_mrep into owp->nd_mrep
1530  * - update the nd_eoff and nd_stable for owp
1531  * - put nfsd on owp's nd_coalesce list
1532  * NB: Must be called at splsoftclock().
1533  */
1534 static void
1535 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1536 {
1537         int overlap;
1538         struct mbuf *mp1;
1539 	struct nfsrv_descript *p;
1540 
1541 	NFS_DPF(WG, ("C%03x-%03x",
1542 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1543         LIST_REMOVE(nfsd, nd_hash);
1544         LIST_REMOVE(nfsd, nd_tq);
1545         if (owp->nd_eoff < nfsd->nd_eoff) {
1546             overlap = owp->nd_eoff - nfsd->nd_off;
1547             if (overlap < 0)
1548                 panic("nfsrv_coalesce: bad off");
1549             if (overlap > 0)
1550                 m_adj(nfsd->nd_mrep, overlap);
1551             mp1 = owp->nd_mrep;
1552             while (mp1->m_next)
1553                 mp1 = mp1->m_next;
1554             mp1->m_next = nfsd->nd_mrep;
1555             owp->nd_eoff = nfsd->nd_eoff;
1556         } else
1557             m_freem(nfsd->nd_mrep);
1558         nfsd->nd_mrep = NULL;
1559         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1560             owp->nd_stable = NFSV3WRITE_FILESYNC;
1561         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1562             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1563             owp->nd_stable = NFSV3WRITE_DATASYNC;
1564         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1565 
1566 	/*
1567 	 * If nfsd had anything else coalesced into it, transfer them
1568 	 * to owp, otherwise their replies will never get sent.
1569 	 */
1570 	for (p = nfsd->nd_coalesce.lh_first; p;
1571 	     p = nfsd->nd_coalesce.lh_first) {
1572 	    LIST_REMOVE(p, nd_tq);
1573 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1574 	}
1575 }
1576 
1577 /*
1578  * nfs create service
1579  * now does a truncate to 0 length via. setattr if it already exists
1580  */
1581 int
1582 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1583 	     struct thread *td, struct mbuf **mrq)
1584 {
1585 	struct sockaddr *nam = nfsd->nd_nam;
1586 	struct ucred *cred = &nfsd->nd_cr;
1587 	struct nfs_fattr *fp;
1588 	struct vattr va, dirfor, diraft;
1589 	struct vattr *vap = &va;
1590 	struct nfsv2_sattr *sp;
1591 	u_int32_t *tl;
1592 	struct nlookupdata nd;
1593 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1594 	udev_t rdev = NOUDEV;
1595 	caddr_t cp;
1596 	int how, exclusive_flag = 0;
1597 	struct vnode *dirp;
1598 	struct vnode *dvp;
1599 	struct vnode *vp;
1600 	struct mount *mp;
1601 	nfsfh_t nfh;
1602 	fhandle_t *fhp;
1603 	u_quad_t tempsize;
1604 	u_char cverf[NFSX_V3CREATEVERF];
1605 	struct nfsm_info info;
1606 
1607 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1608 	nlookup_zero(&nd);
1609 	dirp = NULL;
1610 	dvp = NULL;
1611 	vp = NULL;
1612 
1613 	info.mrep = nfsd->nd_mrep;
1614 	info.mreq = NULL;
1615 	info.md = nfsd->nd_md;
1616 	info.dpos = nfsd->nd_dpos;
1617 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1618 
1619 	fhp = &nfh.fh_generic;
1620 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1621 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1622 
1623 	/*
1624 	 * Call namei and do initial cleanup to get a few things
1625 	 * out of the way.  If we get an initial error we cleanup
1626 	 * and return here to avoid special-casing the invalid nd
1627 	 * structure through the rest of the case.  dirp may be
1628 	 * set even if an error occurs, but the nd structure will not
1629 	 * be valid at all if an error occurs so we have to invalidate it
1630 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1631 	 */
1632 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1633 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1634 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1635 	mp = vfs_getvfs(&fhp->fh_fsid);
1636 
1637 	if (dirp) {
1638 		if (info.v3) {
1639 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1640 		} else {
1641 			vrele(dirp);
1642 			dirp = NULL;
1643 		}
1644 	}
1645 	if (error) {
1646 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1647 				      NFSX_WCCDATA(info.v3), &error));
1648 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1649 				 diraft_ret, &diraft);
1650 		error = 0;
1651 		goto nfsmout;
1652 	}
1653 
1654 	/*
1655 	 * No error.  Continue.  State:
1656 	 *
1657 	 *	dirp 		may be valid
1658 	 *	vp		may be valid or NULL if the target does not
1659 	 *			exist.
1660 	 *	dvp		is valid
1661 	 *
1662 	 * The error state is set through the code and we may also do some
1663 	 * opportunistic releasing of vnodes to avoid holding locks through
1664 	 * NFS I/O.  The cleanup at the end is a catch-all
1665 	 */
1666 
1667 	VATTR_NULL(vap);
1668 	if (info.v3) {
1669 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1670 		how = fxdr_unsigned(int, *tl);
1671 		switch (how) {
1672 		case NFSV3CREATE_GUARDED:
1673 			if (vp) {
1674 				error = EEXIST;
1675 				break;
1676 			}
1677 			/* fall through */
1678 		case NFSV3CREATE_UNCHECKED:
1679 			ERROROUT(nfsm_srvsattr(&info, vap));
1680 			break;
1681 		case NFSV3CREATE_EXCLUSIVE:
1682 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1683 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1684 			exclusive_flag = 1;
1685 			break;
1686 		};
1687 		vap->va_type = VREG;
1688 	} else {
1689 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1690 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1691 		if (vap->va_type == VNON)
1692 			vap->va_type = VREG;
1693 		vap->va_mode = nfstov_mode(sp->sa_mode);
1694 		switch (vap->va_type) {
1695 		case VREG:
1696 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1697 			if (tsize != -1)
1698 				vap->va_size = (u_quad_t)tsize;
1699 			break;
1700 		case VCHR:
1701 		case VBLK:
1702 		case VFIFO:
1703 			rdev = fxdr_unsigned(long, sp->sa_size);
1704 			break;
1705 		default:
1706 			break;
1707 		};
1708 	}
1709 
1710 	/*
1711 	 * Iff doesn't exist, create it
1712 	 * otherwise just truncate to 0 length
1713 	 *   should I set the mode too ?
1714 	 *
1715 	 * The only possible error we can have at this point is EEXIST.
1716 	 * nd.ni_vp will also be non-NULL in that case.
1717 	 */
1718 	if (vp == NULL) {
1719 		if (vap->va_mode == (mode_t)VNOVAL)
1720 			vap->va_mode = 0;
1721 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1722 			vn_unlock(dvp);
1723 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1724 					    nd.nl_cred, vap);
1725 			vrele(dvp);
1726 			dvp = NULL;
1727 			if (error == 0) {
1728 				if (exclusive_flag) {
1729 					exclusive_flag = 0;
1730 					VATTR_NULL(vap);
1731 					bcopy(cverf, (caddr_t)&vap->va_atime,
1732 						NFSX_V3CREATEVERF);
1733 					error = VOP_SETATTR(vp, vap, cred);
1734 				}
1735 			}
1736 		} else if (
1737 			vap->va_type == VCHR ||
1738 			vap->va_type == VBLK ||
1739 			vap->va_type == VFIFO
1740 		) {
1741 			/*
1742 			 * Handle SysV FIFO node special cases.  All other
1743 			 * devices require super user to access.
1744 			 */
1745 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1746 				vap->va_type = VFIFO;
1747                         if (vap->va_type != VFIFO &&
1748                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1749 				goto nfsmreply0;
1750                         }
1751 			vap->va_rmajor = umajor(rdev);
1752 			vap->va_rminor = uminor(rdev);
1753 
1754 			vn_unlock(dvp);
1755 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1756 			vrele(dvp);
1757 			dvp = NULL;
1758 			if (error)
1759 				goto nfsmreply0;
1760 #if 0
1761 			/*
1762 			 * XXX what is this junk supposed to do ?
1763 			 */
1764 
1765 			vput(vp);
1766 			vp = NULL;
1767 
1768 			/*
1769 			 * release dvp prior to lookup
1770 			 */
1771 			vput(dvp);
1772 			dvp = NULL;
1773 
1774 			/*
1775 			 * Setup for lookup.
1776 			 *
1777 			 * Even though LOCKPARENT was cleared, ni_dvp may
1778 			 * be garbage.
1779 			 */
1780 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1781 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1782 			nd.ni_cnd.cn_td = td;
1783 			nd.ni_cnd.cn_cred = cred;
1784 
1785 			error = lookup(&nd);
1786 			nd.ni_dvp = NULL;
1787 
1788 			if (error != 0) {
1789 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1790 						      0, &error));
1791 				/* fall through on certain errors */
1792 			}
1793 			nfsrv_object_create(nd.ni_vp);
1794 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1795 				error = EINVAL;
1796 				goto nfsmreply0;
1797 			}
1798 #endif
1799 		} else {
1800 			error = ENXIO;
1801 		}
1802 	} else {
1803 		if (vap->va_size != -1) {
1804 			error = nfsrv_access(mp, vp, VWRITE, cred,
1805 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1806 			if (!error) {
1807 				tempsize = vap->va_size;
1808 				VATTR_NULL(vap);
1809 				vap->va_size = tempsize;
1810 				error = VOP_SETATTR(vp, vap, cred);
1811 			}
1812 		}
1813 	}
1814 
1815 	if (!error) {
1816 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1817 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1818 		if (!error)
1819 			error = VOP_GETATTR(vp, vap);
1820 	}
1821 	if (info.v3) {
1822 		if (exclusive_flag && !error &&
1823 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1824 			error = EEXIST;
1825 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1826 		vrele(dirp);
1827 		dirp = NULL;
1828 	}
1829 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1830 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1831 			      NFSX_WCCDATA(info.v3),
1832 			      &error));
1833 	if (info.v3) {
1834 		if (!error) {
1835 			nfsm_srvpostop_fh(&info, fhp);
1836 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1837 		}
1838 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1839 				 diraft_ret, &diraft);
1840 		error = 0;
1841 	} else {
1842 		nfsm_srvfhtom(&info, fhp);
1843 		fp = nfsm_build(&info, NFSX_V2FATTR);
1844 		nfsm_srvfattr(nfsd, vap, fp);
1845 	}
1846 	goto nfsmout;
1847 
1848 nfsmreply0:
1849 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1850 	error = 0;
1851 	/* fall through */
1852 
1853 nfsmout:
1854 	*mrq = info.mreq;
1855 	if (dirp)
1856 		vrele(dirp);
1857 	nlookup_done(&nd);
1858 	if (dvp) {
1859 		if (dvp == vp)
1860 			vrele(dvp);
1861 		else
1862 			vput(dvp);
1863 	}
1864 	if (vp)
1865 		vput(vp);
1866 	return (error);
1867 }
1868 
1869 /*
1870  * nfs v3 mknod service
1871  */
1872 int
1873 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1874 	    struct thread *td, struct mbuf **mrq)
1875 {
1876 	struct sockaddr *nam = nfsd->nd_nam;
1877 	struct ucred *cred = &nfsd->nd_cr;
1878 	struct vattr va, dirfor, diraft;
1879 	struct vattr *vap = &va;
1880 	u_int32_t *tl;
1881 	struct nlookupdata nd;
1882 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1883 	enum vtype vtyp;
1884 	struct vnode *dirp;
1885 	struct vnode *dvp;
1886 	struct vnode *vp;
1887 	nfsfh_t nfh;
1888 	fhandle_t *fhp;
1889 	struct nfsm_info info;
1890 
1891 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1892 	nlookup_zero(&nd);
1893 	dirp = NULL;
1894 	dvp = NULL;
1895 	vp = NULL;
1896 
1897 	info.mrep = nfsd->nd_mrep;
1898 	info.mreq = NULL;
1899 	info.md = nfsd->nd_md;
1900 	info.dpos = nfsd->nd_dpos;
1901 
1902 	fhp = &nfh.fh_generic;
1903 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1904 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1905 
1906 	/*
1907 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1908 	 * is not valid.  However, nfsm_*() routines may still jump to
1909 	 * nfsmout.
1910 	 */
1911 
1912 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1913 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1914 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1915 	if (dirp)
1916 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1917 	if (error) {
1918 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1919 			   NFSX_WCCDATA(1), &error));
1920 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1921 				 diraft_ret, &diraft);
1922 		error = 0;
1923 		goto nfsmout;
1924 	}
1925 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1926 	vtyp = nfsv3tov_type(*tl);
1927 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1928 		error = NFSERR_BADTYPE;
1929 		goto out;
1930 	}
1931 	VATTR_NULL(vap);
1932 	ERROROUT(nfsm_srvsattr(&info, vap));
1933 	if (vtyp == VCHR || vtyp == VBLK) {
1934 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1935 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1936 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1937 	}
1938 
1939 	/*
1940 	 * Iff doesn't exist, create it.
1941 	 */
1942 	if (vp) {
1943 		error = EEXIST;
1944 		goto out;
1945 	}
1946 	vap->va_type = vtyp;
1947 	if (vap->va_mode == (mode_t)VNOVAL)
1948 		vap->va_mode = 0;
1949 	if (vtyp == VSOCK) {
1950 		vn_unlock(dvp);
1951 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1952 		vrele(dvp);
1953 		dvp = NULL;
1954 	} else {
1955 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1956 			goto out;
1957 
1958 		vn_unlock(dvp);
1959 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1960 		vrele(dvp);
1961 		dvp = NULL;
1962 		if (error)
1963 			goto out;
1964 	}
1965 
1966 	/*
1967 	 * send response, cleanup, return.
1968 	 */
1969 out:
1970 	nlookup_done(&nd);
1971 	if (dvp) {
1972 		if (dvp == vp)
1973 			vrele(dvp);
1974 		else
1975 			vput(dvp);
1976 		dvp = NULL;
1977 	}
1978 	if (!error) {
1979 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1980 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1981 		if (!error)
1982 			error = VOP_GETATTR(vp, vap);
1983 	}
1984 	if (vp) {
1985 		vput(vp);
1986 		vp = NULL;
1987 	}
1988 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1989 	if (dirp) {
1990 		vrele(dirp);
1991 		dirp = NULL;
1992 	}
1993 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1994 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1995 			      NFSX_WCCDATA(1), &error));
1996 	if (!error) {
1997 		nfsm_srvpostop_fh(&info, fhp);
1998 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1999 	}
2000 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2001 			 diraft_ret, &diraft);
2002 	*mrq = info.mreq;
2003 	return (0);
2004 nfsmout:
2005 	*mrq = info.mreq;
2006 	if (dirp)
2007 		vrele(dirp);
2008 	nlookup_done(&nd);
2009 	if (dvp) {
2010 		if (dvp == vp)
2011 			vrele(dvp);
2012 		else
2013 			vput(dvp);
2014 	}
2015 	if (vp)
2016 		vput(vp);
2017 	return (error);
2018 }
2019 
2020 /*
2021  * nfs remove service
2022  */
2023 int
2024 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2025 	     struct thread *td, struct mbuf **mrq)
2026 {
2027 	struct sockaddr *nam = nfsd->nd_nam;
2028 	struct ucred *cred = &nfsd->nd_cr;
2029 	struct nlookupdata nd;
2030 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2031 	struct vnode *dirp;
2032 	struct vnode *dvp;
2033 	struct vnode *vp;
2034 	struct vattr dirfor, diraft;
2035 	nfsfh_t nfh;
2036 	fhandle_t *fhp;
2037 	struct nfsm_info info;
2038 
2039 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2040 	nlookup_zero(&nd);
2041 	dirp = NULL;
2042 	dvp = NULL;
2043 	vp = NULL;
2044 
2045 	info.mrep = nfsd->nd_mrep;
2046 	info.mreq = NULL;
2047 	info.md = nfsd->nd_md;
2048 	info.dpos = nfsd->nd_dpos;
2049 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2050 
2051 	fhp = &nfh.fh_generic;
2052 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2053 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2054 
2055 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2056 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2057 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2058 	if (dirp) {
2059 		if (info.v3)
2060 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2061 	}
2062 	if (error == 0) {
2063 		if (vp->v_type == VDIR) {
2064 			error = EPERM;		/* POSIX */
2065 			goto out;
2066 		}
2067 		/*
2068 		 * The root of a mounted filesystem cannot be deleted.
2069 		 */
2070 		if (vp->v_flag & VROOT) {
2071 			error = EBUSY;
2072 			goto out;
2073 		}
2074 out:
2075 		if (!error) {
2076 			if (dvp != vp)
2077 				vn_unlock(dvp);
2078 			if (vp) {
2079 				vput(vp);
2080 				vp = NULL;
2081 			}
2082 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2083 			vrele(dvp);
2084 			dvp = NULL;
2085 		}
2086 	}
2087 	if (dirp && info.v3)
2088 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2089 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2090 	if (info.v3) {
2091 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2092 				 diraft_ret, &diraft);
2093 		error = 0;
2094 	}
2095 nfsmout:
2096 	*mrq = info.mreq;
2097 	nlookup_done(&nd);
2098 	if (dirp)
2099 		vrele(dirp);
2100 	if (dvp) {
2101 		if (dvp == vp)
2102 			vrele(dvp);
2103 		else
2104 			vput(dvp);
2105 	}
2106 	if (vp)
2107 		vput(vp);
2108 	return(error);
2109 }
2110 
2111 /*
2112  * nfs rename service
2113  */
2114 int
2115 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2116 	     struct thread *td, struct mbuf **mrq)
2117 {
2118 	struct sockaddr *nam = nfsd->nd_nam;
2119 	struct ucred *cred = &nfsd->nd_cr;
2120 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2121 	int tdirfor_ret = 1, tdiraft_ret = 1;
2122 	struct nlookupdata fromnd, tond;
2123 	struct vnode *fvp, *fdirp, *fdvp;
2124 	struct vnode *tvp, *tdirp, *tdvp;
2125 	struct namecache *ncp;
2126 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2127 	nfsfh_t fnfh, tnfh;
2128 	fhandle_t *ffhp, *tfhp;
2129 	uid_t saved_uid;
2130 	struct nfsm_info info;
2131 
2132 	info.mrep = nfsd->nd_mrep;
2133 	info.mreq = NULL;
2134 	info.md = nfsd->nd_md;
2135 	info.dpos = nfsd->nd_dpos;
2136 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2137 
2138 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2139 #ifndef nolint
2140 	fvp = NULL;
2141 #endif
2142 	ffhp = &fnfh.fh_generic;
2143 	tfhp = &tnfh.fh_generic;
2144 
2145 	/*
2146 	 * Clear fields incase goto nfsmout occurs from macro.
2147 	 */
2148 
2149 	nlookup_zero(&fromnd);
2150 	nlookup_zero(&tond);
2151 	fdirp = NULL;
2152 	tdirp = NULL;
2153 
2154 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2155 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2156 
2157 	/*
2158 	 * Remember our original uid so that we can reset cr_uid before
2159 	 * the second nfs_namei() call, in case it is remapped.
2160 	 */
2161 	saved_uid = cred->cr_uid;
2162 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2163 			  NULL, NULL,
2164 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2165 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2166 	if (fdirp) {
2167 		if (info.v3)
2168 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2169 	}
2170 	if (error) {
2171 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2172 				      2 * NFSX_WCCDATA(info.v3), &error));
2173 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2174 				 fdiraft_ret, &fdiraft);
2175 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2176 				 tdiraft_ret, &tdiraft);
2177 		error = 0;
2178 		goto nfsmout;
2179 	}
2180 
2181 	/*
2182 	 * We have to unlock the from ncp before we can safely lookup
2183 	 * the target ncp.
2184 	 */
2185 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2186 	cache_unlock(&fromnd.nl_nch);
2187 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2188 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2189 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2190 	cred->cr_uid = saved_uid;
2191 
2192 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2193 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2194 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2195 	if (tdirp) {
2196 		if (info.v3)
2197 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2198 	}
2199 	if (error)
2200 		goto out1;
2201 
2202 	/*
2203 	 * relock the source
2204 	 */
2205 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2206 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2207 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2208 		cache_lock(&fromnd.nl_nch);
2209 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210 	} else {
2211 		cache_unlock(&tond.nl_nch);
2212 		cache_lock(&fromnd.nl_nch);
2213 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2214 		cache_lock(&tond.nl_nch);
2215 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2216 	}
2217 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2218 
2219 	fvp = fromnd.nl_nch.ncp->nc_vp;
2220 	tvp = tond.nl_nch.ncp->nc_vp;
2221 
2222 	/*
2223 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2224 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2225 	 * point).  If we get through the checks these will be guarenteed
2226 	 * to be non-NULL.
2227 	 *
2228 	 * Holding the children ncp's should be sufficient to prevent
2229 	 * fdvp and tdvp ripouts.
2230 	 */
2231 	if (fromnd.nl_nch.ncp->nc_parent)
2232 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2233 	else
2234 		fdvp = NULL;
2235 	if (tond.nl_nch.ncp->nc_parent)
2236 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2237 	else
2238 		tdvp = NULL;
2239 
2240 	if (tvp != NULL) {
2241 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2242 			if (info.v3)
2243 				error = EEXIST;
2244 			else
2245 				error = EISDIR;
2246 			goto out;
2247 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2248 			if (info.v3)
2249 				error = EEXIST;
2250 			else
2251 				error = ENOTDIR;
2252 			goto out;
2253 		}
2254 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2255 			if (info.v3)
2256 				error = EXDEV;
2257 			else
2258 				error = ENOTEMPTY;
2259 			goto out;
2260 		}
2261 	}
2262 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2263 		if (info.v3)
2264 			error = EXDEV;
2265 		else
2266 			error = ENOTEMPTY;
2267 		goto out;
2268 	}
2269 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2270 		if (info.v3)
2271 			error = EXDEV;
2272 		else
2273 			error = ENOTEMPTY;
2274 		goto out;
2275 	}
2276 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2277 		if (info.v3)
2278 			error = EINVAL;
2279 		else
2280 			error = ENOTEMPTY;
2281 	}
2282 
2283 	/*
2284 	 * You cannot rename a source into itself or a subdirectory of itself.
2285 	 * We check this by travsering the target directory upwards looking
2286 	 * for a match against the source.
2287 	 */
2288 	if (error == 0) {
2289 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2290 			if (fromnd.nl_nch.ncp == ncp) {
2291 				error = EINVAL;
2292 				break;
2293 			}
2294 		}
2295 	}
2296 
2297 	/*
2298 	 * If source is the same as the destination (that is the
2299 	 * same vnode with the same name in the same directory),
2300 	 * then there is nothing to do.
2301 	 */
2302 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2303 		error = -1;
2304 out:
2305 	if (!error) {
2306 		/*
2307 		 * The VOP_NRENAME function releases all vnode references &
2308 		 * locks prior to returning so we need to clear the pointers
2309 		 * to bypass cleanup code later on.
2310 		 */
2311 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2312 				    fdvp, tdvp, tond.nl_cred);
2313 	} else {
2314 		if (error == -1)
2315 			error = 0;
2316 	}
2317 	/* fall through */
2318 
2319 out1:
2320 	if (fdirp)
2321 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2322 	if (tdirp)
2323 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2324 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2325 			      2 * NFSX_WCCDATA(info.v3), &error));
2326 	if (info.v3) {
2327 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2328 				 fdiraft_ret, &fdiraft);
2329 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2330 				 tdiraft_ret, &tdiraft);
2331 	}
2332 	error = 0;
2333 	/* fall through */
2334 
2335 nfsmout:
2336 	*mrq = info.mreq;
2337 	if (tdirp)
2338 		vrele(tdirp);
2339 	nlookup_done(&tond);
2340 	if (fdirp)
2341 		vrele(fdirp);
2342 	nlookup_done(&fromnd);
2343 	return (error);
2344 }
2345 
2346 /*
2347  * nfs link service
2348  */
2349 int
2350 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2351 	   struct thread *td, struct mbuf **mrq)
2352 {
2353 	struct sockaddr *nam = nfsd->nd_nam;
2354 	struct ucred *cred = &nfsd->nd_cr;
2355 	struct nlookupdata nd;
2356 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2357 	int getret = 1;
2358 	struct vnode *dirp;
2359 	struct vnode *dvp;
2360 	struct vnode *vp;
2361 	struct vnode *xp;
2362 	struct mount *mp;
2363 	struct mount *xmp;
2364 	struct vattr dirfor, diraft, at;
2365 	nfsfh_t nfh, dnfh;
2366 	fhandle_t *fhp, *dfhp;
2367 	struct nfsm_info info;
2368 
2369 	info.mrep = nfsd->nd_mrep;
2370 	info.mreq = NULL;
2371 	info.md = nfsd->nd_md;
2372 	info.dpos = nfsd->nd_dpos;
2373 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2374 
2375 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2376 	nlookup_zero(&nd);
2377 	dirp = dvp = vp = xp = NULL;
2378 	mp = xmp = NULL;
2379 
2380 	fhp = &nfh.fh_generic;
2381 	dfhp = &dnfh.fh_generic;
2382 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2383 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2384 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2385 
2386 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2387 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2388 	if (error) {
2389 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2390 				      NFSX_POSTOPATTR(info.v3) +
2391 				      NFSX_WCCDATA(info.v3),
2392 				      &error));
2393 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2394 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2395 				 diraft_ret, &diraft);
2396 		xp = NULL;
2397 		error = 0;
2398 		goto nfsmout;
2399 	}
2400 	if (xp->v_type == VDIR) {
2401 		error = EPERM;		/* POSIX */
2402 		goto out1;
2403 	}
2404 
2405 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2406 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2407 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2408 	if (dirp) {
2409 		if (info.v3)
2410 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2411 	}
2412 	if (error)
2413 		goto out1;
2414 
2415 	if (vp != NULL) {
2416 		error = EEXIST;
2417 		goto out;
2418 	}
2419 	if (xp->v_mount != dvp->v_mount)
2420 		error = EXDEV;
2421 out:
2422 	if (!error) {
2423 		vn_unlock(dvp);
2424 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2425 		vrele(dvp);
2426 		dvp = NULL;
2427 	}
2428 	/* fall through */
2429 
2430 out1:
2431 	if (info.v3)
2432 		getret = VOP_GETATTR(xp, &at);
2433 	if (dirp)
2434 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2435 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2436 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2437 			      &error));
2438 	if (info.v3) {
2439 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2440 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2441 				 diraft_ret, &diraft);
2442 		error = 0;
2443 	}
2444 	/* fall through */
2445 
2446 nfsmout:
2447 	*mrq = info.mreq;
2448 	nlookup_done(&nd);
2449 	if (dirp)
2450 		vrele(dirp);
2451 	if (xp)
2452 		vrele(xp);
2453 	if (dvp) {
2454 		if (dvp == vp)
2455 			vrele(dvp);
2456 		else
2457 			vput(dvp);
2458 	}
2459 	if (vp)
2460 		vput(vp);
2461 	return(error);
2462 }
2463 
2464 /*
2465  * nfs symbolic link service
2466  */
2467 int
2468 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2469 	      struct thread *td, struct mbuf **mrq)
2470 {
2471 	struct sockaddr *nam = nfsd->nd_nam;
2472 	struct ucred *cred = &nfsd->nd_cr;
2473 	struct vattr va, dirfor, diraft;
2474 	struct nlookupdata nd;
2475 	struct vattr *vap = &va;
2476 	struct nfsv2_sattr *sp;
2477 	char *pathcp = NULL;
2478 	struct uio io;
2479 	struct iovec iv;
2480 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2481 	struct vnode *dirp;
2482 	struct vnode *vp;
2483 	struct vnode *dvp;
2484 	nfsfh_t nfh;
2485 	fhandle_t *fhp;
2486 	struct nfsm_info info;
2487 
2488 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2489 	nlookup_zero(&nd);
2490 	dirp = NULL;
2491 	dvp = NULL;
2492 	vp = NULL;
2493 
2494 	info.mrep = nfsd->nd_mrep;
2495 	info.mreq =  NULL;
2496 	info.md = nfsd->nd_md;
2497 	info.dpos = nfsd->nd_dpos;
2498 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2499 
2500 	fhp = &nfh.fh_generic;
2501 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2502 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2503 
2504 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2505 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2506 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2507 	if (dirp) {
2508 		if (info.v3)
2509 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2510 	}
2511 	if (error)
2512 		goto out;
2513 
2514 	VATTR_NULL(vap);
2515 	if (info.v3) {
2516 		ERROROUT(nfsm_srvsattr(&info, vap));
2517 	}
2518 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2519 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2520 	iv.iov_base = pathcp;
2521 	iv.iov_len = len2;
2522 	io.uio_resid = len2;
2523 	io.uio_offset = 0;
2524 	io.uio_iov = &iv;
2525 	io.uio_iovcnt = 1;
2526 	io.uio_segflg = UIO_SYSSPACE;
2527 	io.uio_rw = UIO_READ;
2528 	io.uio_td = NULL;
2529 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2530 	if (info.v3 == 0) {
2531 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2532 		vap->va_mode = nfstov_mode(sp->sa_mode);
2533 	}
2534 	*(pathcp + len2) = '\0';
2535 	if (vp) {
2536 		error = EEXIST;
2537 		goto out;
2538 	}
2539 
2540 	if (vap->va_mode == (mode_t)VNOVAL)
2541 		vap->va_mode = 0;
2542 	if (dvp != vp)
2543 		vn_unlock(dvp);
2544 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2545 	vrele(dvp);
2546 	dvp = NULL;
2547 	if (error == 0) {
2548 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2549 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2550 		if (!error)
2551 			error = VOP_GETATTR(vp, vap);
2552 	}
2553 
2554 out:
2555 	if (dvp) {
2556 		if (dvp == vp)
2557 			vrele(dvp);
2558 		else
2559 			vput(dvp);
2560 	}
2561 	if (vp) {
2562 		vput(vp);
2563 		vp = NULL;
2564 	}
2565 	if (pathcp) {
2566 		FREE(pathcp, M_TEMP);
2567 		pathcp = NULL;
2568 	}
2569 	if (dirp) {
2570 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2571 		vrele(dirp);
2572 		dirp = NULL;
2573 	}
2574 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2575 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2576 			      NFSX_WCCDATA(info.v3),
2577 			      &error));
2578 	if (info.v3) {
2579 		if (!error) {
2580 			nfsm_srvpostop_fh(&info, fhp);
2581 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2582 		}
2583 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2584 				 diraft_ret, &diraft);
2585 	}
2586 	error = 0;
2587 	/* fall through */
2588 
2589 nfsmout:
2590 	*mrq = info.mreq;
2591 	nlookup_done(&nd);
2592 	if (vp)
2593 		vput(vp);
2594 	if (dirp)
2595 		vrele(dirp);
2596 	if (pathcp)
2597 		FREE(pathcp, M_TEMP);
2598 	return (error);
2599 }
2600 
2601 /*
2602  * nfs mkdir service
2603  */
2604 int
2605 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2606 	    struct thread *td, struct mbuf **mrq)
2607 {
2608 	struct sockaddr *nam = nfsd->nd_nam;
2609 	struct ucred *cred = &nfsd->nd_cr;
2610 	struct vattr va, dirfor, diraft;
2611 	struct vattr *vap = &va;
2612 	struct nfs_fattr *fp;
2613 	struct nlookupdata nd;
2614 	u_int32_t *tl;
2615 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2616 	struct vnode *dirp;
2617 	struct vnode *dvp;
2618 	struct vnode *vp;
2619 	nfsfh_t nfh;
2620 	fhandle_t *fhp;
2621 	struct nfsm_info info;
2622 
2623 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2624 	nlookup_zero(&nd);
2625 	dirp = NULL;
2626 	dvp = NULL;
2627 	vp = NULL;
2628 
2629 	info.dpos = nfsd->nd_dpos;
2630 	info.mrep = nfsd->nd_mrep;
2631 	info.mreq =  NULL;
2632 	info.md = nfsd->nd_md;
2633 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2634 
2635 	fhp = &nfh.fh_generic;
2636 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2637 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2638 
2639 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2640 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2641 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2642 	if (dirp) {
2643 		if (info.v3)
2644 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2645 	}
2646 	if (error) {
2647 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2648 				      NFSX_WCCDATA(info.v3), &error));
2649 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2650 				 diraft_ret, &diraft);
2651 		error = 0;
2652 		goto nfsmout;
2653 	}
2654 	VATTR_NULL(vap);
2655 	if (info.v3) {
2656 		ERROROUT(nfsm_srvsattr(&info, vap));
2657 	} else {
2658 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2659 		vap->va_mode = nfstov_mode(*tl++);
2660 	}
2661 
2662 	/*
2663 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2664 	 * nd.ni_vp, if it exists, is referenced but not locked.
2665 	 */
2666 
2667 	vap->va_type = VDIR;
2668 	if (vp != NULL) {
2669 		error = EEXIST;
2670 		goto out;
2671 	}
2672 
2673 	/*
2674 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2675 	 * component is freed by the VOP call.  This will fill-in
2676 	 * nd.ni_vp, reference, and exclusively lock it.
2677 	 */
2678 	if (vap->va_mode == (mode_t)VNOVAL)
2679 		vap->va_mode = 0;
2680 	vn_unlock(dvp);
2681 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2682 	vrele(dvp);
2683 	dvp = NULL;
2684 
2685 	if (error == 0) {
2686 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2687 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2688 		if (error == 0)
2689 			error = VOP_GETATTR(vp, vap);
2690 	}
2691 out:
2692 	if (dirp)
2693 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2694 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2695 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2696 			      NFSX_WCCDATA(info.v3),
2697 			      &error));
2698 	if (info.v3) {
2699 		if (!error) {
2700 			nfsm_srvpostop_fh(&info, fhp);
2701 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2702 		}
2703 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2704 				 diraft_ret, &diraft);
2705 	} else {
2706 		nfsm_srvfhtom(&info, fhp);
2707 		fp = nfsm_build(&info, NFSX_V2FATTR);
2708 		nfsm_srvfattr(nfsd, vap, fp);
2709 	}
2710 	error = 0;
2711 	/* fall through */
2712 
2713 nfsmout:
2714 	*mrq = info.mreq;
2715 	nlookup_done(&nd);
2716 	if (dirp)
2717 		vrele(dirp);
2718 	if (dvp) {
2719 		if (dvp == vp)
2720 			vrele(dvp);
2721 		else
2722 			vput(dvp);
2723 	}
2724 	if (vp)
2725 		vput(vp);
2726 	return (error);
2727 }
2728 
2729 /*
2730  * nfs rmdir service
2731  */
2732 int
2733 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2734 	    struct thread *td, struct mbuf **mrq)
2735 {
2736 	struct sockaddr *nam = nfsd->nd_nam;
2737 	struct ucred *cred = &nfsd->nd_cr;
2738 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2739 	struct vnode *dirp;
2740 	struct vnode *dvp;
2741 	struct vnode *vp;
2742 	struct vattr dirfor, diraft;
2743 	nfsfh_t nfh;
2744 	fhandle_t *fhp;
2745 	struct nlookupdata nd;
2746 	struct nfsm_info info;
2747 
2748 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2749 	nlookup_zero(&nd);
2750 	dirp = NULL;
2751 	dvp = NULL;
2752 	vp = NULL;
2753 
2754 	info.mrep = nfsd->nd_mrep;
2755 	info.mreq = NULL;
2756 	info.md = nfsd->nd_md;
2757 	info.dpos = nfsd->nd_dpos;
2758 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2759 
2760 	fhp = &nfh.fh_generic;
2761 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2762 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2763 
2764 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2765 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2766 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2767 	if (dirp) {
2768 		if (info.v3)
2769 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2770 	}
2771 	if (error) {
2772 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2773 				      NFSX_WCCDATA(info.v3), &error));
2774 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2775 				 diraft_ret, &diraft);
2776 		error = 0;
2777 		goto nfsmout;
2778 	}
2779 	if (vp->v_type != VDIR) {
2780 		error = ENOTDIR;
2781 		goto out;
2782 	}
2783 
2784 	/*
2785 	 * The root of a mounted filesystem cannot be deleted.
2786 	 */
2787 	if (vp->v_flag & VROOT)
2788 		error = EBUSY;
2789 out:
2790 	/*
2791 	 * Issue or abort op.  Since SAVESTART is not set, path name
2792 	 * component is freed by the VOP after either.
2793 	 */
2794 	if (!error) {
2795 		if (dvp != vp)
2796 			vn_unlock(dvp);
2797 		vput(vp);
2798 		vp = NULL;
2799 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2800 		vrele(dvp);
2801 		dvp = NULL;
2802 	}
2803 	nlookup_done(&nd);
2804 
2805 	if (dirp)
2806 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2807 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2808 	if (info.v3) {
2809 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2810 				 diraft_ret, &diraft);
2811 		error = 0;
2812 	}
2813 	/* fall through */
2814 
2815 nfsmout:
2816 	*mrq = info.mreq;
2817 	if (dvp) {
2818 		if (dvp == vp)
2819 			vrele(dvp);
2820 		else
2821 			vput(dvp);
2822 	}
2823 	nlookup_done(&nd);
2824 	if (dirp)
2825 		vrele(dirp);
2826 	if (vp)
2827 		vput(vp);
2828 	return(error);
2829 }
2830 
2831 /*
2832  * nfs readdir service
2833  * - mallocs what it thinks is enough to read
2834  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2835  * - calls VOP_READDIR()
2836  * - loops around building the reply
2837  *	if the output generated exceeds count break out of loop
2838  *	The nfsm_clget macro is used here so that the reply will be packed
2839  *	tightly in mbuf clusters.
2840  * - it only knows that it has encountered eof when the VOP_READDIR()
2841  *	reads nothing
2842  * - as such one readdir rpc will return eof false although you are there
2843  *	and then the next will return eof
2844  * - it trims out records with d_fileno == 0
2845  *	this doesn't matter for Unix clients, but they might confuse clients
2846  *	for other os'.
2847  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2848  *	than requested, but this may not apply to all filesystems. For
2849  *	example, client NFS does not { although it is never remote mounted
2850  *	anyhow }
2851  *     The alternate call nfsrv_readdirplus() does lookups as well.
2852  * PS: The NFS protocol spec. does not clarify what the "count" byte
2853  *	argument is a count of.. just name strings and file id's or the
2854  *	entire reply rpc or ...
2855  *	I tried just file name and id sizes and it confused the Sun client,
2856  *	so I am using the full rpc size now. The "paranoia.." comment refers
2857  *	to including the status longwords that are not a part of the dir.
2858  *	"entry" structures, but are in the rpc.
2859  */
2860 struct flrep {
2861 	nfsuint64	fl_off;
2862 	u_int32_t	fl_postopok;
2863 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2864 	u_int32_t	fl_fhok;
2865 	u_int32_t	fl_fhsize;
2866 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2867 };
2868 
2869 int
2870 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2871 	      struct thread *td, struct mbuf **mrq)
2872 {
2873 	struct sockaddr *nam = nfsd->nd_nam;
2874 	struct ucred *cred = &nfsd->nd_cr;
2875 	char *bp, *be;
2876 	struct dirent *dp;
2877 	caddr_t cp;
2878 	u_int32_t *tl;
2879 	struct mbuf *mp1, *mp2;
2880 	char *cpos, *cend, *rbuf;
2881 	struct vnode *vp = NULL;
2882 	struct mount *mp = NULL;
2883 	struct vattr at;
2884 	nfsfh_t nfh;
2885 	fhandle_t *fhp;
2886 	struct uio io;
2887 	struct iovec iv;
2888 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2889 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2890 	u_quad_t off, toff, verf;
2891 	off_t *cookies = NULL, *cookiep;
2892 	struct nfsm_info info;
2893 
2894 	info.mrep = nfsd->nd_mrep;
2895 	info.mreq = NULL;
2896 	info.md = nfsd->nd_md;
2897 	info.dpos = nfsd->nd_dpos;
2898 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2899 
2900 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2901 	fhp = &nfh.fh_generic;
2902 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2903 	if (info.v3) {
2904 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2905 		toff = fxdr_hyper(tl);
2906 		tl += 2;
2907 		verf = fxdr_hyper(tl);
2908 		tl += 2;
2909 	} else {
2910 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2911 		toff = fxdr_unsigned(u_quad_t, *tl++);
2912 		verf = 0;	/* shut up gcc */
2913 	}
2914 	off = toff;
2915 	cnt = fxdr_unsigned(int, *tl);
2916 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2917 	xfer = NFS_SRVMAXDATA(nfsd);
2918 	if ((unsigned)cnt > xfer)
2919 		cnt = xfer;
2920 	if ((unsigned)siz > xfer)
2921 		siz = xfer;
2922 	fullsiz = siz;
2923 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2924 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2925 	if (!error && vp->v_type != VDIR) {
2926 		error = ENOTDIR;
2927 		vput(vp);
2928 		vp = NULL;
2929 	}
2930 	if (error) {
2931 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2932 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2933 		error = 0;
2934 		goto nfsmout;
2935 	}
2936 
2937 	/*
2938 	 * Obtain lock on vnode for this section of the code
2939 	 */
2940 
2941 	if (info.v3) {
2942 		error = getret = VOP_GETATTR(vp, &at);
2943 #if 0
2944 		/*
2945 		 * XXX This check may be too strict for Solaris 2.5 clients.
2946 		 */
2947 		if (!error && toff && verf && verf != at.va_filerev)
2948 			error = NFSERR_BAD_COOKIE;
2949 #endif
2950 	}
2951 	if (!error)
2952 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2953 	if (error) {
2954 		vput(vp);
2955 		vp = NULL;
2956 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2957 				      NFSX_POSTOPATTR(info.v3), &error));
2958 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2959 		error = 0;
2960 		goto nfsmout;
2961 	}
2962 	vn_unlock(vp);
2963 
2964 	/*
2965 	 * end section.  Allocate rbuf and continue
2966 	 */
2967 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2968 again:
2969 	iv.iov_base = rbuf;
2970 	iv.iov_len = fullsiz;
2971 	io.uio_iov = &iv;
2972 	io.uio_iovcnt = 1;
2973 	io.uio_offset = (off_t)off;
2974 	io.uio_resid = fullsiz;
2975 	io.uio_segflg = UIO_SYSSPACE;
2976 	io.uio_rw = UIO_READ;
2977 	io.uio_td = NULL;
2978 	eofflag = 0;
2979 	if (cookies) {
2980 		kfree((caddr_t)cookies, M_TEMP);
2981 		cookies = NULL;
2982 	}
2983 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2984 	off = (off_t)io.uio_offset;
2985 	if (!cookies && !error)
2986 		error = NFSERR_PERM;
2987 	if (info.v3) {
2988 		getret = VOP_GETATTR(vp, &at);
2989 		if (!error)
2990 			error = getret;
2991 	}
2992 	if (error) {
2993 		vrele(vp);
2994 		vp = NULL;
2995 		kfree((caddr_t)rbuf, M_TEMP);
2996 		if (cookies)
2997 			kfree((caddr_t)cookies, M_TEMP);
2998 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2999 				      NFSX_POSTOPATTR(info.v3), &error));
3000 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3001 		error = 0;
3002 		goto nfsmout;
3003 	}
3004 	if (io.uio_resid) {
3005 		siz -= io.uio_resid;
3006 
3007 		/*
3008 		 * If nothing read, return eof
3009 		 * rpc reply
3010 		 */
3011 		if (siz == 0) {
3012 			vrele(vp);
3013 			vp = NULL;
3014 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3015 					      NFSX_POSTOPATTR(info.v3) +
3016 					      NFSX_COOKIEVERF(info.v3) +
3017 					      2 * NFSX_UNSIGNED,
3018 					      &error));
3019 			if (info.v3) {
3020 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3021 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3022 				txdr_hyper(at.va_filerev, tl);
3023 				tl += 2;
3024 			} else
3025 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3026 			*tl++ = nfs_false;
3027 			*tl = nfs_true;
3028 			FREE((caddr_t)rbuf, M_TEMP);
3029 			FREE((caddr_t)cookies, M_TEMP);
3030 			error = 0;
3031 			goto nfsmout;
3032 		}
3033 	}
3034 
3035 	/*
3036 	 * Check for degenerate cases of nothing useful read.
3037 	 * If so go try again
3038 	 */
3039 	cpos = rbuf;
3040 	cend = rbuf + siz;
3041 	dp = (struct dirent *)cpos;
3042 	cookiep = cookies;
3043 	/*
3044 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3045 	 * directory offset up to a block boundary, so it is necessary to
3046 	 * skip over the records that preceed the requested offset. This
3047 	 * requires the assumption that file offset cookies monotonically
3048 	 * increase.
3049 	 */
3050 	while (cpos < cend && ncookies > 0 &&
3051 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3052 		 ((u_quad_t)(*cookiep)) <= toff)) {
3053 		dp = _DIRENT_NEXT(dp);
3054 		cpos = (char *)dp;
3055 		cookiep++;
3056 		ncookies--;
3057 	}
3058 	if (cpos >= cend || ncookies == 0) {
3059 		toff = off;
3060 		siz = fullsiz;
3061 		goto again;
3062 	}
3063 
3064 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3065 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3066 			      NFSX_POSTOPATTR(info.v3) +
3067 			      NFSX_COOKIEVERF(info.v3) + siz,
3068 			      &error));
3069 	if (info.v3) {
3070 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3071 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3072 		txdr_hyper(at.va_filerev, tl);
3073 	}
3074 	mp1 = mp2 = info.mb;
3075 	bp = info.bpos;
3076 	be = bp + M_TRAILINGSPACE(mp1);
3077 
3078 	/* Loop through the records and build reply */
3079 	while (cpos < cend && ncookies > 0) {
3080 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3081 			nlen = dp->d_namlen;
3082 			rem = nfsm_rndup(nlen) - nlen;
3083 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3084 			if (info.v3)
3085 				len += 2 * NFSX_UNSIGNED;
3086 			if (len > cnt) {
3087 				eofflag = 0;
3088 				break;
3089 			}
3090 			/*
3091 			 * Build the directory record xdr from
3092 			 * the dirent entry.
3093 			 */
3094 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3095 			*tl = nfs_true;
3096 			bp += NFSX_UNSIGNED;
3097 			if (info.v3) {
3098 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3099 				*tl = txdr_unsigned(dp->d_ino >> 32);
3100 				bp += NFSX_UNSIGNED;
3101 			}
3102 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3103 			*tl = txdr_unsigned(dp->d_ino);
3104 			bp += NFSX_UNSIGNED;
3105 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3106 			*tl = txdr_unsigned(nlen);
3107 			bp += NFSX_UNSIGNED;
3108 
3109 			/* And loop around copying the name */
3110 			xfer = nlen;
3111 			cp = dp->d_name;
3112 			while (xfer > 0) {
3113 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3114 				if ((bp+xfer) > be)
3115 					tsiz = be-bp;
3116 				else
3117 					tsiz = xfer;
3118 				bcopy(cp, bp, tsiz);
3119 				bp += tsiz;
3120 				xfer -= tsiz;
3121 				if (xfer > 0)
3122 					cp += tsiz;
3123 			}
3124 			/* And null pad to a int32_t boundary */
3125 			for (i = 0; i < rem; i++)
3126 				*bp++ = '\0';
3127 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3128 
3129 			/* Finish off the record */
3130 			if (info.v3) {
3131 				*tl = txdr_unsigned(*cookiep >> 32);
3132 				bp += NFSX_UNSIGNED;
3133 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3134 			}
3135 			*tl = txdr_unsigned(*cookiep);
3136 			bp += NFSX_UNSIGNED;
3137 		}
3138 		dp = _DIRENT_NEXT(dp);
3139 		cpos = (char *)dp;
3140 		cookiep++;
3141 		ncookies--;
3142 	}
3143 	vrele(vp);
3144 	vp = NULL;
3145 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3146 	*tl = nfs_false;
3147 	bp += NFSX_UNSIGNED;
3148 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3149 	if (eofflag)
3150 		*tl = nfs_true;
3151 	else
3152 		*tl = nfs_false;
3153 	bp += NFSX_UNSIGNED;
3154 	if (mp1 != info.mb) {
3155 		if (bp < be)
3156 			mp1->m_len = bp - mtod(mp1, caddr_t);
3157 	} else
3158 		mp1->m_len += bp - info.bpos;
3159 	FREE((caddr_t)rbuf, M_TEMP);
3160 	FREE((caddr_t)cookies, M_TEMP);
3161 
3162 nfsmout:
3163 	*mrq = info.mreq;
3164 	if (vp)
3165 		vrele(vp);
3166 	return(error);
3167 }
3168 
3169 int
3170 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3171 		  struct thread *td, struct mbuf **mrq)
3172 {
3173 	struct sockaddr *nam = nfsd->nd_nam;
3174 	struct ucred *cred = &nfsd->nd_cr;
3175 	char *bp, *be;
3176 	struct dirent *dp;
3177 	caddr_t cp;
3178 	u_int32_t *tl;
3179 	struct mbuf *mp1, *mp2;
3180 	char *cpos, *cend, *rbuf;
3181 	struct vnode *vp = NULL, *nvp;
3182 	struct mount *mp = NULL;
3183 	struct flrep fl;
3184 	nfsfh_t nfh;
3185 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3186 	struct uio io;
3187 	struct iovec iv;
3188 	struct vattr va, at, *vap = &va;
3189 	struct nfs_fattr *fp;
3190 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3191 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3192 	u_quad_t off, toff, verf;
3193 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3194 	struct nfsm_info info;
3195 
3196 	info.mrep = nfsd->nd_mrep;
3197 	info.mreq = NULL;
3198 	info.md = nfsd->nd_md;
3199 	info.dpos = nfsd->nd_dpos;
3200 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3201 
3202 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3203 	fhp = &nfh.fh_generic;
3204 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3205 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3206 	toff = fxdr_hyper(tl);
3207 	tl += 2;
3208 	verf = fxdr_hyper(tl);
3209 	tl += 2;
3210 	siz = fxdr_unsigned(int, *tl++);
3211 	cnt = fxdr_unsigned(int, *tl);
3212 	off = toff;
3213 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3214 	xfer = NFS_SRVMAXDATA(nfsd);
3215 	if ((unsigned)cnt > xfer)
3216 		cnt = xfer;
3217 	if ((unsigned)siz > xfer)
3218 		siz = xfer;
3219 	fullsiz = siz;
3220 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3221 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3222 	if (!error && vp->v_type != VDIR) {
3223 		error = ENOTDIR;
3224 		vput(vp);
3225 		vp = NULL;
3226 	}
3227 	if (error) {
3228 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3229 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3230 		error = 0;
3231 		goto nfsmout;
3232 	}
3233 	error = getret = VOP_GETATTR(vp, &at);
3234 #if 0
3235 	/*
3236 	 * XXX This check may be too strict for Solaris 2.5 clients.
3237 	 */
3238 	if (!error && toff && verf && verf != at.va_filerev)
3239 		error = NFSERR_BAD_COOKIE;
3240 #endif
3241 	if (!error) {
3242 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3243 	}
3244 	if (error) {
3245 		vput(vp);
3246 		vp = NULL;
3247 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3248 				      NFSX_V3POSTOPATTR, &error));
3249 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3250 		error = 0;
3251 		goto nfsmout;
3252 	}
3253 	vn_unlock(vp);
3254 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3255 again:
3256 	iv.iov_base = rbuf;
3257 	iv.iov_len = fullsiz;
3258 	io.uio_iov = &iv;
3259 	io.uio_iovcnt = 1;
3260 	io.uio_offset = (off_t)off;
3261 	io.uio_resid = fullsiz;
3262 	io.uio_segflg = UIO_SYSSPACE;
3263 	io.uio_rw = UIO_READ;
3264 	io.uio_td = NULL;
3265 	eofflag = 0;
3266 	if (cookies) {
3267 		kfree((caddr_t)cookies, M_TEMP);
3268 		cookies = NULL;
3269 	}
3270 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3271 	off = (u_quad_t)io.uio_offset;
3272 	getret = VOP_GETATTR(vp, &at);
3273 	if (!cookies && !error)
3274 		error = NFSERR_PERM;
3275 	if (!error)
3276 		error = getret;
3277 	if (error) {
3278 		vrele(vp);
3279 		vp = NULL;
3280 		if (cookies)
3281 			kfree((caddr_t)cookies, M_TEMP);
3282 		kfree((caddr_t)rbuf, M_TEMP);
3283 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3284 				      NFSX_V3POSTOPATTR, &error));
3285 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3286 		error = 0;
3287 		goto nfsmout;
3288 	}
3289 	if (io.uio_resid) {
3290 		siz -= io.uio_resid;
3291 
3292 		/*
3293 		 * If nothing read, return eof
3294 		 * rpc reply
3295 		 */
3296 		if (siz == 0) {
3297 			vrele(vp);
3298 			vp = NULL;
3299 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3300 					      NFSX_V3POSTOPATTR +
3301 					      NFSX_V3COOKIEVERF +
3302 					      2 * NFSX_UNSIGNED,
3303 					      &error));
3304 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3305 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3306 			txdr_hyper(at.va_filerev, tl);
3307 			tl += 2;
3308 			*tl++ = nfs_false;
3309 			*tl = nfs_true;
3310 			FREE((caddr_t)cookies, M_TEMP);
3311 			FREE((caddr_t)rbuf, M_TEMP);
3312 			error = 0;
3313 			goto nfsmout;
3314 		}
3315 	}
3316 
3317 	/*
3318 	 * Check for degenerate cases of nothing useful read.
3319 	 * If so go try again
3320 	 */
3321 	cpos = rbuf;
3322 	cend = rbuf + siz;
3323 	dp = (struct dirent *)cpos;
3324 	cookiep = cookies;
3325 	/*
3326 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3327 	 * directory offset up to a block boundary, so it is necessary to
3328 	 * skip over the records that preceed the requested offset. This
3329 	 * requires the assumption that file offset cookies monotonically
3330 	 * increase.
3331 	 */
3332 	while (cpos < cend && ncookies > 0 &&
3333 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3334 		 ((u_quad_t)(*cookiep)) <= toff)) {
3335 		dp = _DIRENT_NEXT(dp);
3336 		cpos = (char *)dp;
3337 		cookiep++;
3338 		ncookies--;
3339 	}
3340 	if (cpos >= cend || ncookies == 0) {
3341 		toff = off;
3342 		siz = fullsiz;
3343 		goto again;
3344 	}
3345 
3346 	/*
3347 	 * Probe one of the directory entries to see if the filesystem
3348 	 * supports VGET.
3349 	 */
3350 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3351 		error = NFSERR_NOTSUPP;
3352 		vrele(vp);
3353 		vp = NULL;
3354 		kfree((caddr_t)cookies, M_TEMP);
3355 		kfree((caddr_t)rbuf, M_TEMP);
3356 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3357 				      NFSX_V3POSTOPATTR, &error));
3358 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3359 		error = 0;
3360 		goto nfsmout;
3361 	}
3362 	if (nvp) {
3363 		vput(nvp);
3364 		nvp = NULL;
3365 	}
3366 
3367 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3368 			2 * NFSX_UNSIGNED;
3369 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3370 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3371 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3372 	txdr_hyper(at.va_filerev, tl);
3373 	mp1 = mp2 = info.mb;
3374 	bp = info.bpos;
3375 	be = bp + M_TRAILINGSPACE(mp1);
3376 
3377 	/* Loop through the records and build reply */
3378 	while (cpos < cend && ncookies > 0) {
3379 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3380 			nlen = dp->d_namlen;
3381 			rem = nfsm_rndup(nlen) - nlen;
3382 
3383 			/*
3384 			 * For readdir_and_lookup get the vnode using
3385 			 * the file number.
3386 			 */
3387 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3388 				goto invalid;
3389 			bzero((caddr_t)nfhp, NFSX_V3FH);
3390 			nfhp->fh_fsid = fhp->fh_fsid;
3391 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3392 				vput(nvp);
3393 				nvp = NULL;
3394 				goto invalid;
3395 			}
3396 			if (VOP_GETATTR(nvp, vap)) {
3397 				vput(nvp);
3398 				nvp = NULL;
3399 				goto invalid;
3400 			}
3401 			vput(nvp);
3402 			nvp = NULL;
3403 
3404 			/*
3405 			 * If either the dircount or maxcount will be
3406 			 * exceeded, get out now. Both of these lengths
3407 			 * are calculated conservatively, including all
3408 			 * XDR overheads.
3409 			 */
3410 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3411 				NFSX_V3POSTOPATTR);
3412 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3413 			if (len > cnt || dirlen > fullsiz) {
3414 				eofflag = 0;
3415 				break;
3416 			}
3417 
3418 			/*
3419 			 * Build the directory record xdr from
3420 			 * the dirent entry.
3421 			 */
3422 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3423 			nfsm_srvfattr(nfsd, vap, fp);
3424 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3425 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3426 			fl.fl_postopok = nfs_true;
3427 			fl.fl_fhok = nfs_true;
3428 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3429 
3430 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3431 			*tl = nfs_true;
3432 			bp += NFSX_UNSIGNED;
3433 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3434 			*tl = txdr_unsigned(dp->d_ino >> 32);
3435 			bp += NFSX_UNSIGNED;
3436 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3437 			*tl = txdr_unsigned(dp->d_ino);
3438 			bp += NFSX_UNSIGNED;
3439 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3440 			*tl = txdr_unsigned(nlen);
3441 			bp += NFSX_UNSIGNED;
3442 
3443 			/* And loop around copying the name */
3444 			xfer = nlen;
3445 			cp = dp->d_name;
3446 			while (xfer > 0) {
3447 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3448 				if ((bp + xfer) > be)
3449 					tsiz = be - bp;
3450 				else
3451 					tsiz = xfer;
3452 				bcopy(cp, bp, tsiz);
3453 				bp += tsiz;
3454 				xfer -= tsiz;
3455 				cp += tsiz;
3456 			}
3457 			/* And null pad to a int32_t boundary */
3458 			for (i = 0; i < rem; i++)
3459 				*bp++ = '\0';
3460 
3461 			/*
3462 			 * Now copy the flrep structure out.
3463 			 */
3464 			xfer = sizeof (struct flrep);
3465 			cp = (caddr_t)&fl;
3466 			while (xfer > 0) {
3467 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3468 				if ((bp + xfer) > be)
3469 					tsiz = be - bp;
3470 				else
3471 					tsiz = xfer;
3472 				bcopy(cp, bp, tsiz);
3473 				bp += tsiz;
3474 				xfer -= tsiz;
3475 				cp += tsiz;
3476 			}
3477 		}
3478 invalid:
3479 		dp = _DIRENT_NEXT(dp);
3480 		cpos = (char *)dp;
3481 		cookiep++;
3482 		ncookies--;
3483 	}
3484 	vrele(vp);
3485 	vp = NULL;
3486 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3487 	*tl = nfs_false;
3488 	bp += NFSX_UNSIGNED;
3489 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3490 	if (eofflag)
3491 		*tl = nfs_true;
3492 	else
3493 		*tl = nfs_false;
3494 	bp += NFSX_UNSIGNED;
3495 	if (mp1 != info.mb) {
3496 		if (bp < be)
3497 			mp1->m_len = bp - mtod(mp1, caddr_t);
3498 	} else
3499 		mp1->m_len += bp - info.bpos;
3500 	FREE((caddr_t)cookies, M_TEMP);
3501 	FREE((caddr_t)rbuf, M_TEMP);
3502 nfsmout:
3503 	*mrq = info.mreq;
3504 	if (vp)
3505 		vrele(vp);
3506 	return(error);
3507 }
3508 
3509 /*
3510  * nfs commit service
3511  */
3512 int
3513 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3514 	     struct thread *td, struct mbuf **mrq)
3515 {
3516 	struct sockaddr *nam = nfsd->nd_nam;
3517 	struct ucred *cred = &nfsd->nd_cr;
3518 	struct vattr bfor, aft;
3519 	struct vnode *vp = NULL;
3520 	struct mount *mp = NULL;
3521 	nfsfh_t nfh;
3522 	fhandle_t *fhp;
3523 	u_int32_t *tl;
3524 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3525 	u_quad_t off;
3526 	struct nfsm_info info;
3527 
3528 	info.mrep = nfsd->nd_mrep;
3529 	info.mreq = NULL;
3530 	info.md = nfsd->nd_md;
3531 	info.dpos = nfsd->nd_dpos;
3532 
3533 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3534 	fhp = &nfh.fh_generic;
3535 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3536 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3537 
3538 	/*
3539 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3540 	 * count parameters, so these arguments are useless (someday maybe).
3541 	 */
3542 	off = fxdr_hyper(tl);
3543 	tl += 2;
3544 	cnt = fxdr_unsigned(int, *tl);
3545 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3546 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3547 	if (error) {
3548 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3549 				      2 * NFSX_UNSIGNED, &error));
3550 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3551 				 aft_ret, &aft);
3552 		error = 0;
3553 		goto nfsmout;
3554 	}
3555 	for_ret = VOP_GETATTR(vp, &bfor);
3556 
3557 	if (cnt > MAX_COMMIT_COUNT) {
3558 		/*
3559 		 * Give up and do the whole thing
3560 		 */
3561 		if (vp->v_object &&
3562 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3563 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3564 		}
3565 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3566 	} else {
3567 		/*
3568 		 * Locate and synchronously write any buffers that fall
3569 		 * into the requested range.  Note:  we are assuming that
3570 		 * f_iosize is a power of 2.
3571 		 */
3572 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3573 		int iomask = iosize - 1;
3574 		off_t loffset;
3575 
3576 		/*
3577 		 * Align to iosize boundry, super-align to page boundry.
3578 		 */
3579 		if (off & iomask) {
3580 			cnt += off & iomask;
3581 			off &= ~(u_quad_t)iomask;
3582 		}
3583 		if (off & PAGE_MASK) {
3584 			cnt += off & PAGE_MASK;
3585 			off &= ~(u_quad_t)PAGE_MASK;
3586 		}
3587 		loffset = off;
3588 
3589 		if (vp->v_object &&
3590 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3591 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3592 			    (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3593 		}
3594 
3595 		crit_enter();
3596 		while (cnt > 0) {
3597 			struct buf *bp;
3598 
3599 			/*
3600 			 * If we have a buffer and it is marked B_DELWRI we
3601 			 * have to lock and write it.  Otherwise the prior
3602 			 * write is assumed to have already been committed.
3603 			 *
3604 			 * WARNING: FINDBLK_TEST buffers represent stable
3605 			 *	    storage but not necessarily stable
3606 			 *	    content.  It is ok in this case.
3607 			 */
3608 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3609 				if (bp->b_flags & B_DELWRI)
3610 					bp = findblk(vp, loffset, 0);
3611 				else
3612 					bp = NULL;
3613 			}
3614 			if (bp) {
3615 				if (bp->b_flags & B_DELWRI) {
3616 					bremfree(bp);
3617 					bwrite(bp);
3618 					++nfs_commit_miss;
3619 				} else {
3620 					BUF_UNLOCK(bp);
3621 				}
3622 			}
3623 			++nfs_commit_blks;
3624 			if (cnt < iosize)
3625 				break;
3626 			cnt -= iosize;
3627 			loffset += iosize;
3628 		}
3629 		crit_exit();
3630 	}
3631 
3632 	aft_ret = VOP_GETATTR(vp, &aft);
3633 	vput(vp);
3634 	vp = NULL;
3635 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3636 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3637 			      &error));
3638 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3639 			 aft_ret, &aft);
3640 	if (!error) {
3641 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3642 		if (nfsver.tv_sec == 0)
3643 			nfsver = boottime;
3644 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3645 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3646 	} else {
3647 		error = 0;
3648 	}
3649 nfsmout:
3650 	*mrq = info.mreq;
3651 	if (vp)
3652 		vput(vp);
3653 	return(error);
3654 }
3655 
3656 /*
3657  * nfs statfs service
3658  */
3659 int
3660 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3661 	     struct thread *td, struct mbuf **mrq)
3662 {
3663 	struct sockaddr *nam = nfsd->nd_nam;
3664 	struct ucred *cred = &nfsd->nd_cr;
3665 	struct statfs *sf;
3666 	struct nfs_statfs *sfp;
3667 	int error = 0, rdonly, getret = 1;
3668 	struct vnode *vp = NULL;
3669 	struct mount *mp = NULL;
3670 	struct vattr at;
3671 	nfsfh_t nfh;
3672 	fhandle_t *fhp;
3673 	struct statfs statfs;
3674 	u_quad_t tval;
3675 	struct nfsm_info info;
3676 
3677 	info.mrep = nfsd->nd_mrep;
3678 	info.mreq = NULL;
3679 	info.md = nfsd->nd_md;
3680 	info.dpos = nfsd->nd_dpos;
3681 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3682 
3683 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3684 	fhp = &nfh.fh_generic;
3685 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3686 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3687 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3688 	if (error) {
3689 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3690 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3691 		error = 0;
3692 		goto nfsmout;
3693 	}
3694 	sf = &statfs;
3695 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3696 	getret = VOP_GETATTR(vp, &at);
3697 	vput(vp);
3698 	vp = NULL;
3699 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3700 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3701 			      &error));
3702 	if (info.v3)
3703 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3704 	if (error) {
3705 		error = 0;
3706 		goto nfsmout;
3707 	}
3708 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3709 	if (info.v3) {
3710 		tval = (u_quad_t)sf->f_blocks;
3711 		tval *= (u_quad_t)sf->f_bsize;
3712 		txdr_hyper(tval, &sfp->sf_tbytes);
3713 		tval = (u_quad_t)sf->f_bfree;
3714 		tval *= (u_quad_t)sf->f_bsize;
3715 		txdr_hyper(tval, &sfp->sf_fbytes);
3716 		tval = (u_quad_t)sf->f_bavail;
3717 		tval *= (u_quad_t)sf->f_bsize;
3718 		txdr_hyper(tval, &sfp->sf_abytes);
3719 		sfp->sf_tfiles.nfsuquad[0] = 0;
3720 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3721 		sfp->sf_ffiles.nfsuquad[0] = 0;
3722 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3723 		sfp->sf_afiles.nfsuquad[0] = 0;
3724 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3725 		sfp->sf_invarsec = 0;
3726 	} else {
3727 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3728 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3729 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3730 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3731 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3732 	}
3733 nfsmout:
3734 	*mrq = info.mreq;
3735 	if (vp)
3736 		vput(vp);
3737 	return(error);
3738 }
3739 
3740 /*
3741  * nfs fsinfo service
3742  */
3743 int
3744 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3745 	     struct thread *td, struct mbuf **mrq)
3746 {
3747 	struct sockaddr *nam = nfsd->nd_nam;
3748 	struct ucred *cred = &nfsd->nd_cr;
3749 	struct nfsv3_fsinfo *sip;
3750 	int error = 0, rdonly, getret = 1, pref;
3751 	struct vnode *vp = NULL;
3752 	struct mount *mp = NULL;
3753 	struct vattr at;
3754 	nfsfh_t nfh;
3755 	fhandle_t *fhp;
3756 	u_quad_t maxfsize;
3757 	struct statfs sb;
3758 	struct nfsm_info info;
3759 
3760 	info.mrep = nfsd->nd_mrep;
3761 	info.mreq = NULL;
3762 	info.md = nfsd->nd_md;
3763 	info.dpos = nfsd->nd_dpos;
3764 
3765 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3766 	fhp = &nfh.fh_generic;
3767 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3768 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3769 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3770 	if (error) {
3771 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3772 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3773 		error = 0;
3774 		goto nfsmout;
3775 	}
3776 
3777 	/* XXX Try to make a guess on the max file size. */
3778 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3779 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3780 
3781 	getret = VOP_GETATTR(vp, &at);
3782 	vput(vp);
3783 	vp = NULL;
3784 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3785 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3786 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3787 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3788 
3789 	/*
3790 	 * XXX
3791 	 * There should be file system VFS OP(s) to get this information.
3792 	 * For now, assume ufs.
3793 	 */
3794 	if (slp->ns_so->so_type == SOCK_DGRAM)
3795 		pref = NFS_MAXDGRAMDATA;
3796 	else
3797 		pref = NFS_MAXDATA;
3798 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3799 	sip->fs_rtpref = txdr_unsigned(pref);
3800 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3801 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3802 	sip->fs_wtpref = txdr_unsigned(pref);
3803 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3804 	sip->fs_dtpref = txdr_unsigned(pref);
3805 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3806 	sip->fs_timedelta.nfsv3_sec = 0;
3807 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3808 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3809 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3810 		NFSV3FSINFO_CANSETTIME);
3811 nfsmout:
3812 	*mrq = info.mreq;
3813 	if (vp)
3814 		vput(vp);
3815 	return(error);
3816 }
3817 
3818 /*
3819  * nfs pathconf service
3820  */
3821 int
3822 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3823 	       struct thread *td, struct mbuf **mrq)
3824 {
3825 	struct sockaddr *nam = nfsd->nd_nam;
3826 	struct ucred *cred = &nfsd->nd_cr;
3827 	struct nfsv3_pathconf *pc;
3828 	int error = 0, rdonly, getret = 1;
3829 	register_t linkmax, namemax, chownres, notrunc;
3830 	struct vnode *vp = NULL;
3831 	struct mount *mp = NULL;
3832 	struct vattr at;
3833 	nfsfh_t nfh;
3834 	fhandle_t *fhp;
3835 	struct nfsm_info info;
3836 
3837 	info.mrep = nfsd->nd_mrep;
3838 	info.mreq = NULL;
3839 	info.md = nfsd->nd_md;
3840 	info.dpos = nfsd->nd_dpos;
3841 
3842 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3843 	fhp = &nfh.fh_generic;
3844 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3845 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3846 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3847 	if (error) {
3848 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3849 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3850 		error = 0;
3851 		goto nfsmout;
3852 	}
3853 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3854 	if (!error)
3855 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3856 	if (!error)
3857 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3858 	if (!error)
3859 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3860 	getret = VOP_GETATTR(vp, &at);
3861 	vput(vp);
3862 	vp = NULL;
3863 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3864 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3865 			      &error));
3866 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3867 	if (error) {
3868 		error = 0;
3869 		goto nfsmout;
3870 	}
3871 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3872 
3873 	pc->pc_linkmax = txdr_unsigned(linkmax);
3874 	pc->pc_namemax = txdr_unsigned(namemax);
3875 	pc->pc_notrunc = txdr_unsigned(notrunc);
3876 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3877 
3878 	/*
3879 	 * These should probably be supported by VOP_PATHCONF(), but
3880 	 * until msdosfs is exportable (why would you want to?), the
3881 	 * Unix defaults should be ok.
3882 	 */
3883 	pc->pc_caseinsensitive = nfs_false;
3884 	pc->pc_casepreserving = nfs_true;
3885 nfsmout:
3886 	*mrq = info.mreq;
3887 	if (vp)
3888 		vput(vp);
3889 	return(error);
3890 }
3891 
3892 /*
3893  * Null operation, used by clients to ping server
3894  */
3895 /* ARGSUSED */
3896 int
3897 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3898 	   struct thread *td, struct mbuf **mrq)
3899 {
3900 	struct nfsm_info info;
3901 	int error = NFSERR_RETVOID;
3902 
3903 	info.mrep = nfsd->nd_mrep;
3904 	info.mreq = NULL;
3905 
3906 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3907 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3908 nfsmout:
3909 	*mrq = info.mreq;
3910 	return (error);
3911 }
3912 
3913 /*
3914  * No operation, used for obsolete procedures
3915  */
3916 /* ARGSUSED */
3917 int
3918 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3919 	   struct thread *td, struct mbuf **mrq)
3920 {
3921 	struct nfsm_info info;
3922 	int error;
3923 
3924 	info.mrep = nfsd->nd_mrep;
3925 	info.mreq = NULL;
3926 
3927 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3928 	if (nfsd->nd_repstat)
3929 		error = nfsd->nd_repstat;
3930 	else
3931 		error = EPROCUNAVAIL;
3932 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3933 	error = 0;
3934 nfsmout:
3935 	*mrq = info.mreq;
3936 	return (error);
3937 }
3938 
3939 /*
3940  * Perform access checking for vnodes obtained from file handles that would
3941  * refer to files already opened by a Unix client. You cannot just use
3942  * vn_writechk() and VOP_ACCESS() for two reasons.
3943  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3944  * 2 - The owner is to be given access irrespective of mode bits for some
3945  *     operations, so that processes that chmod after opening a file don't
3946  *     break. I don't like this because it opens a security hole, but since
3947  *     the nfs server opens a security hole the size of a barn door anyhow,
3948  *     what the heck.
3949  *
3950  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3951  * will return EPERM instead of EACCESS. EPERM is always an error.
3952  */
3953 static int
3954 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3955 	     int rdonly, struct thread *td, int override)
3956 {
3957 	struct vattr vattr;
3958 	int error;
3959 
3960 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3961 	if (flags & VWRITE) {
3962 		/* Just vn_writechk() changed to check rdonly */
3963 		/*
3964 		 * Disallow write attempts on read-only file systems;
3965 		 * unless the file is a socket or a block or character
3966 		 * device resident on the file system.
3967 		 */
3968 		if (rdonly ||
3969 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3970 			switch (vp->v_type) {
3971 			case VREG:
3972 			case VDIR:
3973 			case VLNK:
3974 				return (EROFS);
3975 			default:
3976 				break;
3977 			}
3978 		}
3979 		/*
3980 		 * If there's shared text associated with
3981 		 * the inode, we can't allow writing.
3982 		 */
3983 		if (vp->v_flag & VTEXT)
3984 			return (ETXTBSY);
3985 	}
3986 	error = VOP_GETATTR(vp, &vattr);
3987 	if (error)
3988 		return (error);
3989 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
3990 	/*
3991 	 * Allow certain operations for the owner (reads and writes
3992 	 * on files that are already open).
3993 	 */
3994 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3995 		error = 0;
3996 	return error;
3997 }
3998 #endif /* NFS_NOSERVER */
3999 
4000