xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision 9c600e7d)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.6 2003/06/26 05:55:18 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  *
65  *	lookup() and namei()
66  *	may return garbage in various structural fields/return elements
67  *	if an error is returned, and may garbage up nd.ni_dvp even if no
68  *	error is returned and you did not request LOCKPARENT or WANTPARENT.
69  *
70  *	We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
71  *	buffer has been freed or not.
72  */
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/proc.h>
77 #include <sys/namei.h>
78 #include <sys/unistd.h>
79 #include <sys/vnode.h>
80 #include <sys/mount.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/malloc.h>
84 #include <sys/mbuf.h>
85 #include <sys/dirent.h>
86 #include <sys/stat.h>
87 #include <sys/kernel.h>
88 #include <sys/sysctl.h>
89 #include <sys/buf.h>
90 
91 #include <vm/vm.h>
92 #include <vm/vm_extern.h>
93 #include <vm/vm_zone.h>
94 #include <vm/vm_object.h>
95 
96 #include <sys/buf2.h>
97 
98 #include <nfs/nfsproto.h>
99 #include <nfs/rpcv2.h>
100 #include <nfs/nfs.h>
101 #include <nfs/xdr_subs.h>
102 #include <nfs/nfsm_subs.h>
103 #include <nfs/nqnfs.h>
104 
105 #ifdef NFSRV_DEBUG
106 #define nfsdbprintf(info)	printf info
107 #else
108 #define nfsdbprintf(info)
109 #endif
110 
111 #define MAX_COMMIT_COUNT	(1024 * 1024)
112 
113 #define NUM_HEURISTIC		64
114 #define NHUSE_INIT		64
115 #define NHUSE_INC		16
116 #define NHUSE_MAX		2048
117 
118 static struct nfsheur {
119     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
120     off_t nh_nextr;		/* next offset for sequential detection */
121     int nh_use;			/* use count for selection */
122     int nh_seqcount;		/* heuristic */
123 } nfsheur[NUM_HEURISTIC];
124 
125 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
126 		      NFFIFO, NFNON };
127 #ifndef NFS_NOSERVER
128 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
129 		      NFCHR, NFNON };
130 /* Global vars */
131 extern u_int32_t nfs_xdrneg1;
132 extern u_int32_t nfs_false, nfs_true;
133 extern enum vtype nv3tov_type[8];
134 extern struct nfsstats nfsstats;
135 
136 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
137 int nfsrvw_procrastinate_v3 = 0;
138 
139 static struct timeval	nfsver = { 0 };
140 
141 SYSCTL_DECL(_vfs_nfs);
142 
143 static int nfs_async;
144 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
145 static int nfs_commit_blks;
146 static int nfs_commit_miss;
147 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
148 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
149 
150 static int nfsrv_access __P((struct vnode *,int,struct ucred *,int,
151 		struct thread *, int));
152 static void nfsrvw_coalesce __P((struct nfsrv_descript *,
153 		struct nfsrv_descript *));
154 
155 /*
156  * Clear nameidata fields that are tested in nsfmout cleanup code prior
157  * to using first nfsm macro (that might jump to the cleanup code).
158  */
159 
160 static __inline
161 void
162 ndclear(struct nameidata *nd)
163 {
164 	nd->ni_cnd.cn_flags = 0;
165 	nd->ni_vp = NULL;
166 	nd->ni_dvp = NULL;
167 	nd->ni_startdir = NULL;
168 }
169 
170 /*
171  * nfs v3 access service
172  */
173 int
174 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
175 	struct thread *td, struct mbuf **mrq)
176 {
177 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
178 	struct sockaddr *nam = nfsd->nd_nam;
179 	caddr_t dpos = nfsd->nd_dpos;
180 	struct ucred *cred = &nfsd->nd_cr;
181 	struct vnode *vp = NULL;
182 	nfsfh_t nfh;
183 	fhandle_t *fhp;
184 	u_int32_t *tl;
185 	int32_t t1;
186 	caddr_t bpos;
187 	int error = 0, rdonly, cache, getret;
188 	char *cp2;
189 	struct mbuf *mb, *mreq, *mb2;
190 	struct vattr vattr, *vap = &vattr;
191 	u_long testmode, nfsmode;
192 	u_quad_t frev;
193 
194 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
195 #ifndef nolint
196 	cache = 0;
197 #endif
198 	fhp = &nfh.fh_generic;
199 	nfsm_srvmtofh(fhp);
200 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
201 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
202 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
203 	if (error) {
204 		nfsm_reply(NFSX_UNSIGNED);
205 		nfsm_srvpostop_attr(1, (struct vattr *)0);
206 		error = 0;
207 		goto nfsmout;
208 	}
209 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
210 	if ((nfsmode & NFSV3ACCESS_READ) &&
211 		nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
212 		nfsmode &= ~NFSV3ACCESS_READ;
213 	if (vp->v_type == VDIR)
214 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
215 			NFSV3ACCESS_DELETE);
216 	else
217 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
218 	if ((nfsmode & testmode) &&
219 		nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
220 		nfsmode &= ~testmode;
221 	if (vp->v_type == VDIR)
222 		testmode = NFSV3ACCESS_LOOKUP;
223 	else
224 		testmode = NFSV3ACCESS_EXECUTE;
225 	if ((nfsmode & testmode) &&
226 		nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
227 		nfsmode &= ~testmode;
228 	getret = VOP_GETATTR(vp, vap, td);
229 	vput(vp);
230 	vp = NULL;
231 	nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
232 	nfsm_srvpostop_attr(getret, vap);
233 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
234 	*tl = txdr_unsigned(nfsmode);
235 nfsmout:
236 	if (vp)
237 		vput(vp);
238 	return(error);
239 }
240 
241 /*
242  * nfs getattr service
243  */
244 int
245 nfsrv_getattr(nfsd, slp, td, mrq)
246 	struct nfsrv_descript *nfsd;
247 	struct nfssvc_sock *slp;
248 	struct thread *td;
249 	struct mbuf **mrq;
250 {
251 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
252 	struct sockaddr *nam = nfsd->nd_nam;
253 	caddr_t dpos = nfsd->nd_dpos;
254 	struct ucred *cred = &nfsd->nd_cr;
255 	struct nfs_fattr *fp;
256 	struct vattr va;
257 	struct vattr *vap = &va;
258 	struct vnode *vp = NULL;
259 	nfsfh_t nfh;
260 	fhandle_t *fhp;
261 	u_int32_t *tl;
262 	int32_t t1;
263 	caddr_t bpos;
264 	int error = 0, rdonly, cache;
265 	char *cp2;
266 	struct mbuf *mb, *mb2, *mreq;
267 	u_quad_t frev;
268 
269 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
270 	fhp = &nfh.fh_generic;
271 	nfsm_srvmtofh(fhp);
272 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
273 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
274 	if (error) {
275 		nfsm_reply(0);
276 		error = 0;
277 		goto nfsmout;
278 	}
279 	nqsrv_getl(vp, ND_READ);
280 	error = VOP_GETATTR(vp, vap, td);
281 	vput(vp);
282 	vp = NULL;
283 	nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
284 	if (error) {
285 		error = 0;
286 		goto nfsmout;
287 	}
288 	nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
289 	nfsm_srvfillattr(vap, fp);
290 	/* fall through */
291 
292 nfsmout:
293 	if (vp)
294 		vput(vp);
295 	return(error);
296 }
297 
298 /*
299  * nfs setattr service
300  */
301 int
302 nfsrv_setattr(nfsd, slp, td, mrq)
303 	struct nfsrv_descript *nfsd;
304 	struct nfssvc_sock *slp;
305 	struct thread *td;
306 	struct mbuf **mrq;
307 {
308 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
309 	struct sockaddr *nam = nfsd->nd_nam;
310 	caddr_t dpos = nfsd->nd_dpos;
311 	struct ucred *cred = &nfsd->nd_cr;
312 	struct vattr va, preat;
313 	struct vattr *vap = &va;
314 	struct nfsv2_sattr *sp;
315 	struct nfs_fattr *fp;
316 	struct vnode *vp = NULL;
317 	nfsfh_t nfh;
318 	fhandle_t *fhp;
319 	u_int32_t *tl;
320 	int32_t t1;
321 	caddr_t bpos;
322 	int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1;
323 	int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
324 	char *cp2;
325 	struct mbuf *mb, *mb2, *mreq;
326 	u_quad_t frev;
327 	struct timespec guard;
328 
329 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
330 	fhp = &nfh.fh_generic;
331 	nfsm_srvmtofh(fhp);
332 	VATTR_NULL(vap);
333 	if (v3) {
334 		nfsm_srvsattr(vap);
335 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
336 		gcheck = fxdr_unsigned(int, *tl);
337 		if (gcheck) {
338 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
339 			fxdr_nfsv3time(tl, &guard);
340 		}
341 	} else {
342 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
343 		/*
344 		 * Nah nah nah nah na nah
345 		 * There is a bug in the Sun client that puts 0xffff in the mode
346 		 * field of sattr when it should put in 0xffffffff. The u_short
347 		 * doesn't sign extend.
348 		 * --> check the low order 2 bytes for 0xffff
349 		 */
350 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
351 			vap->va_mode = nfstov_mode(sp->sa_mode);
352 		if (sp->sa_uid != nfs_xdrneg1)
353 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
354 		if (sp->sa_gid != nfs_xdrneg1)
355 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
356 		if (sp->sa_size != nfs_xdrneg1)
357 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
358 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
359 #ifdef notyet
360 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
361 #else
362 			vap->va_atime.tv_sec =
363 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
364 			vap->va_atime.tv_nsec = 0;
365 #endif
366 		}
367 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
368 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
369 
370 	}
371 
372 	/*
373 	 * Now that we have all the fields, lets do it.
374 	 */
375 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
376 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
377 	if (error) {
378 		nfsm_reply(2 * NFSX_UNSIGNED);
379 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
380 		error = 0;
381 		goto nfsmout;
382 	}
383 
384 	/*
385 	 * vp now an active resource, pay careful attention to cleanup
386 	 */
387 
388 	nqsrv_getl(vp, ND_WRITE);
389 	if (v3) {
390 		error = preat_ret = VOP_GETATTR(vp, &preat, td);
391 		if (!error && gcheck &&
392 			(preat.va_ctime.tv_sec != guard.tv_sec ||
393 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
394 			error = NFSERR_NOT_SYNC;
395 		if (error) {
396 			vput(vp);
397 			vp = NULL;
398 			nfsm_reply(NFSX_WCCDATA(v3));
399 			nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
400 			error = 0;
401 			goto nfsmout;
402 		}
403 	}
404 
405 	/*
406 	 * If the size is being changed write acces is required, otherwise
407 	 * just check for a read only file system.
408 	 */
409 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
410 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
411 			error = EROFS;
412 			goto out;
413 		}
414 	} else {
415 		if (vp->v_type == VDIR) {
416 			error = EISDIR;
417 			goto out;
418 		} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
419 			    td, 0)) != 0){
420 			goto out;
421 		}
422 	}
423 	error = VOP_SETATTR(vp, vap, cred, td);
424 	postat_ret = VOP_GETATTR(vp, vap, td);
425 	if (!error)
426 		error = postat_ret;
427 out:
428 	vput(vp);
429 	vp = NULL;
430 	nfsm_reply(NFSX_WCCORFATTR(v3));
431 	if (v3) {
432 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
433 		error = 0;
434 		goto nfsmout;
435 	} else {
436 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
437 		nfsm_srvfillattr(vap, fp);
438 	}
439 	/* fall through */
440 
441 nfsmout:
442 	if (vp)
443 		vput(vp);
444 	return(error);
445 }
446 
447 /*
448  * nfs lookup rpc
449  */
450 int
451 nfsrv_lookup(nfsd, slp, td, mrq)
452 	struct nfsrv_descript *nfsd;
453 	struct nfssvc_sock *slp;
454 	struct thread *td;
455 	struct mbuf **mrq;
456 {
457 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
458 	struct sockaddr *nam = nfsd->nd_nam;
459 	caddr_t dpos = nfsd->nd_dpos;
460 	struct ucred *cred = &nfsd->nd_cr;
461 	struct nfs_fattr *fp;
462 	struct nameidata nd, ind, *ndp = &nd;
463 	struct vnode *vp, *dirp = NULL;
464 	nfsfh_t nfh;
465 	fhandle_t *fhp;
466 	caddr_t cp;
467 	u_int32_t *tl;
468 	int32_t t1;
469 	caddr_t bpos;
470 	int error = 0, cache, len, dirattr_ret = 1;
471 	int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
472 	char *cp2;
473 	struct mbuf *mb, *mb2, *mreq;
474 	struct vattr va, dirattr, *vap = &va;
475 	u_quad_t frev;
476 
477 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
478 	ndclear(&nd);
479 
480 	fhp = &nfh.fh_generic;
481 	nfsm_srvmtofh(fhp);
482 	nfsm_srvnamesiz(len);
483 
484 	pubflag = nfs_ispublicfh(fhp);
485 
486 	nd.ni_cnd.cn_cred = cred;
487 	nd.ni_cnd.cn_nameiop = LOOKUP;
488 	nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
489 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
490 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
491 
492 	/*
493 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
494 	 * structure in case macros jump to nfsmout.
495 	 */
496 
497 	if (error) {
498 		if (dirp) {
499 			if (v3)
500 				dirattr_ret = VOP_GETATTR(dirp, &dirattr, td);
501 			vrele(dirp);
502 			dirp = NULL;
503 		}
504 		nfsm_reply(NFSX_POSTOPATTR(v3));
505 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
506 		error = 0;
507 		goto nfsmout;
508 	}
509 
510 	/*
511 	 * Locate index file for public filehandle
512 	 *
513 	 * error is 0 on entry and 0 on exit from this block.
514 	 */
515 
516 	if (pubflag) {
517 		if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
518 			/*
519 			 * Setup call to lookup() to see if we can find
520 			 * the index file. Arguably, this doesn't belong
521 			 * in a kernel.. Ugh.  If an error occurs, do not
522 			 * try to install an index file and then clear the
523 			 * error.
524 			 *
525 			 * When we replace nd with ind and redirect ndp,
526 			 * maintenance of ni_startdir and ni_vp shift to
527 			 * ind and we have to clean them up in the old nd.
528 			 * However, the cnd resource continues to be maintained
529 			 * via the original nd.  Confused?  You aren't alone!
530 			 */
531 			ind = nd;
532 			VOP_UNLOCK(nd.ni_vp, 0, td);
533 			ind.ni_pathlen = strlen(nfs_pub.np_index);
534 			ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
535 			    nfs_pub.np_index;
536 			ind.ni_startdir = nd.ni_vp;
537 			VREF(ind.ni_startdir);
538 
539 			error = lookup(&ind);
540 			ind.ni_dvp = NULL;
541 
542 			if (error == 0) {
543 				/*
544 				 * Found an index file. Get rid of
545 				 * the old references.  transfer nd.ni_vp'
546 				 */
547 				if (dirp)
548 					vrele(dirp);
549 				dirp = nd.ni_vp;
550 				nd.ni_vp = NULL;
551 				vrele(nd.ni_startdir);
552 				nd.ni_startdir = NULL;
553 				ndp = &ind;
554 			}
555 			error = 0;
556 		}
557 		/*
558 		 * If the public filehandle was used, check that this lookup
559 		 * didn't result in a filehandle outside the publicly exported
560 		 * filesystem.  We clear the poor vp here to avoid lockups due
561 		 * to NFS I/O.
562 		 */
563 
564 		if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
565 			vput(nd.ni_vp);
566 			nd.ni_vp = NULL;
567 			error = EPERM;
568 		}
569 	}
570 
571 	if (dirp) {
572 		if (v3)
573 			dirattr_ret = VOP_GETATTR(dirp, &dirattr, td);
574 		vrele(dirp);
575 		dirp = NULL;
576 	}
577 
578 	/*
579 	 * Resources at this point:
580 	 *	ndp->ni_vp	may not be NULL
581 	 *
582 	 */
583 
584 	if (error) {
585 		nfsm_reply(NFSX_POSTOPATTR(v3));
586 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
587 		error = 0;
588 		goto nfsmout;
589 	}
590 
591 	nqsrv_getl(ndp->ni_startdir, ND_READ);
592 
593 	/*
594 	 * Clear out some resources prior to potentially blocking.  This
595 	 * is not as critical as ni_dvp resources in other routines, but
596 	 * it helps.
597 	 */
598 	vrele(ndp->ni_startdir);
599 	ndp->ni_startdir = NULL;
600 	NDFREE(&nd, NDF_ONLY_PNBUF);
601 
602 	/*
603 	 * Get underlying attribute, then release remaining resources ( for
604 	 * the same potential blocking reason ) and reply.
605 	 */
606 	vp = ndp->ni_vp;
607 	bzero((caddr_t)fhp, sizeof(nfh));
608 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
609 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
610 	if (!error)
611 		error = VOP_GETATTR(vp, vap, td);
612 
613 	vput(vp);
614 	ndp->ni_vp = NULL;
615 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
616 	if (error) {
617 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
618 		error = 0;
619 		goto nfsmout;
620 	}
621 	nfsm_srvfhtom(fhp, v3);
622 	if (v3) {
623 		nfsm_srvpostop_attr(0, vap);
624 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
625 	} else {
626 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
627 		nfsm_srvfillattr(vap, fp);
628 	}
629 
630 nfsmout:
631 	if (dirp)
632 		vrele(dirp);
633 	NDFREE(&nd, NDF_ONLY_PNBUF);
634 	if (ndp->ni_startdir)
635 		vrele(ndp->ni_startdir);
636 	if (ndp->ni_vp)
637 		vput(ndp->ni_vp);
638 	return (error);
639 }
640 
641 /*
642  * nfs readlink service
643  */
644 int
645 nfsrv_readlink(nfsd, slp, td, mrq)
646 	struct nfsrv_descript *nfsd;
647 	struct nfssvc_sock *slp;
648 	struct thread *td;
649 	struct mbuf **mrq;
650 {
651 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
652 	struct sockaddr *nam = nfsd->nd_nam;
653 	caddr_t dpos = nfsd->nd_dpos;
654 	struct ucred *cred = &nfsd->nd_cr;
655 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
656 	struct iovec *ivp = iv;
657 	struct mbuf *mp;
658 	u_int32_t *tl;
659 	int32_t t1;
660 	caddr_t bpos;
661 	int error = 0, rdonly, cache, i, tlen, len, getret;
662 	int v3 = (nfsd->nd_flag & ND_NFSV3);
663 	char *cp2;
664 	struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
665 	struct vnode *vp = NULL;
666 	struct vattr attr;
667 	nfsfh_t nfh;
668 	fhandle_t *fhp;
669 	struct uio io, *uiop = &io;
670 	u_quad_t frev;
671 
672 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
673 #ifndef nolint
674 	mp2 = (struct mbuf *)0;
675 #endif
676 	mp3 = NULL;
677 	fhp = &nfh.fh_generic;
678 	nfsm_srvmtofh(fhp);
679 	len = 0;
680 	i = 0;
681 	while (len < NFS_MAXPATHLEN) {
682 		MGET(mp, M_WAIT, MT_DATA);
683 		MCLGET(mp, M_WAIT);
684 		mp->m_len = NFSMSIZ(mp);
685 		if (len == 0)
686 			mp3 = mp2 = mp;
687 		else {
688 			mp2->m_next = mp;
689 			mp2 = mp;
690 		}
691 		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
692 			mp->m_len = NFS_MAXPATHLEN-len;
693 			len = NFS_MAXPATHLEN;
694 		} else
695 			len += mp->m_len;
696 		ivp->iov_base = mtod(mp, caddr_t);
697 		ivp->iov_len = mp->m_len;
698 		i++;
699 		ivp++;
700 	}
701 	uiop->uio_iov = iv;
702 	uiop->uio_iovcnt = i;
703 	uiop->uio_offset = 0;
704 	uiop->uio_resid = len;
705 	uiop->uio_rw = UIO_READ;
706 	uiop->uio_segflg = UIO_SYSSPACE;
707 	uiop->uio_td = NULL;
708 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
709 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
710 	if (error) {
711 		nfsm_reply(2 * NFSX_UNSIGNED);
712 		nfsm_srvpostop_attr(1, (struct vattr *)0);
713 		error = 0;
714 		goto nfsmout;
715 	}
716 	if (vp->v_type != VLNK) {
717 		if (v3)
718 			error = EINVAL;
719 		else
720 			error = ENXIO;
721 		goto out;
722 	}
723 	nqsrv_getl(vp, ND_READ);
724 	error = VOP_READLINK(vp, uiop, cred);
725 out:
726 	getret = VOP_GETATTR(vp, &attr, td);
727 	vput(vp);
728 	vp = NULL;
729 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
730 	if (v3) {
731 		nfsm_srvpostop_attr(getret, &attr);
732 		if (error) {
733 			error = 0;
734 			goto nfsmout;
735 		}
736 	}
737 	if (uiop->uio_resid > 0) {
738 		len -= uiop->uio_resid;
739 		tlen = nfsm_rndup(len);
740 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
741 	}
742 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
743 	*tl = txdr_unsigned(len);
744 	mb->m_next = mp3;
745 	mp3 = NULL;
746 nfsmout:
747 	if (mp3)
748 		m_freem(mp3);
749 	if (vp)
750 		vput(vp);
751 	return(error);
752 }
753 
754 /*
755  * nfs read service
756  */
757 int
758 nfsrv_read(nfsd, slp, td, mrq)
759 	struct nfsrv_descript *nfsd;
760 	struct nfssvc_sock *slp;
761 	struct thread *td;
762 	struct mbuf **mrq;
763 {
764 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
765 	struct sockaddr *nam = nfsd->nd_nam;
766 	caddr_t dpos = nfsd->nd_dpos;
767 	struct ucred *cred = &nfsd->nd_cr;
768 	struct iovec *iv;
769 	struct iovec *iv2;
770 	struct mbuf *m;
771 	struct nfs_fattr *fp;
772 	u_int32_t *tl;
773 	int32_t t1;
774 	int i;
775 	caddr_t bpos;
776 	int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret;
777 	int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
778 	char *cp2;
779 	struct mbuf *mb, *mb2, *mreq;
780 	struct mbuf *m2;
781 	struct vnode *vp = NULL;
782 	nfsfh_t nfh;
783 	fhandle_t *fhp;
784 	struct uio io, *uiop = &io;
785 	struct vattr va, *vap = &va;
786 	struct nfsheur *nh;
787 	off_t off;
788 	int ioflag = 0;
789 	u_quad_t frev;
790 
791 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
792 	fhp = &nfh.fh_generic;
793 	nfsm_srvmtofh(fhp);
794 	if (v3) {
795 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
796 		off = fxdr_hyper(tl);
797 	} else {
798 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
799 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
800 	}
801 	nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
802 
803 	/*
804 	 * Reference vp.  If an error occurs, vp will be invalid, but we
805 	 * have to NULL it just in case.  The macros might goto nfsmout
806 	 * as well.
807 	 */
808 
809 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
810 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
811 	if (error) {
812 		vp = NULL;
813 		nfsm_reply(2 * NFSX_UNSIGNED);
814 		nfsm_srvpostop_attr(1, (struct vattr *)0);
815 		error = 0;
816 		goto nfsmout;
817 	}
818 
819 	if (vp->v_type != VREG) {
820 		if (v3)
821 			error = EINVAL;
822 		else
823 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
824 	}
825 	if (!error) {
826 	    nqsrv_getl(vp, ND_READ);
827 	    if ((error = nfsrv_access(vp, VREAD, cred, rdonly, td, 1)) != 0)
828 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 1);
829 	}
830 	getret = VOP_GETATTR(vp, vap, td);
831 	if (!error)
832 		error = getret;
833 	if (error) {
834 		vput(vp);
835 		vp = NULL;
836 		nfsm_reply(NFSX_POSTOPATTR(v3));
837 		nfsm_srvpostop_attr(getret, vap);
838 		error = 0;
839 		goto nfsmout;
840 	}
841 
842 	/*
843 	 * Calculate byte count to read
844 	 */
845 
846 	if (off >= vap->va_size)
847 		cnt = 0;
848 	else if ((off + reqlen) > vap->va_size)
849 		cnt = vap->va_size - off;
850 	else
851 		cnt = reqlen;
852 
853 	/*
854 	 * Calculate seqcount for heuristic
855 	 */
856 
857 	{
858 		int hi;
859 		int try = 4;
860 
861 		/*
862 		 * Locate best candidate
863 		 */
864 
865 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) & (NUM_HEURISTIC - 1);
866 		nh = &nfsheur[hi];
867 
868 		while (try--) {
869 			if (nfsheur[hi].nh_vp == vp) {
870 				nh = &nfsheur[hi];
871 				break;
872 			}
873 			if (nfsheur[hi].nh_use > 0)
874 				--nfsheur[hi].nh_use;
875 			hi = (hi + 1) & (NUM_HEURISTIC - 1);
876 			if (nfsheur[hi].nh_use < nh->nh_use)
877 				nh = &nfsheur[hi];
878 		}
879 
880 		if (nh->nh_vp != vp) {
881 			nh->nh_vp = vp;
882 			nh->nh_nextr = off;
883 			nh->nh_use = NHUSE_INIT;
884 			if (off == 0)
885 				nh->nh_seqcount = 4;
886 			else
887 				nh->nh_seqcount = 1;
888 		}
889 
890 		/*
891 		 * Calculate heuristic
892 		 */
893 
894 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
895 			if (++nh->nh_seqcount > IO_SEQMAX)
896 				nh->nh_seqcount = IO_SEQMAX;
897 		} else if (nh->nh_seqcount > 1) {
898 			nh->nh_seqcount = 1;
899 		} else {
900 			nh->nh_seqcount = 0;
901 		}
902 		nh->nh_use += NHUSE_INC;
903 		if (nh->nh_use > NHUSE_MAX)
904 			nh->nh_use = NHUSE_MAX;
905 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
906         }
907 
908 	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
909 	if (v3) {
910 		nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
911 		*tl++ = nfs_true;
912 		fp = (struct nfs_fattr *)tl;
913 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
914 	} else {
915 		nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
916 		fp = (struct nfs_fattr *)tl;
917 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
918 	}
919 	len = left = nfsm_rndup(cnt);
920 	if (cnt > 0) {
921 		/*
922 		 * Generate the mbuf list with the uio_iov ref. to it.
923 		 */
924 		i = 0;
925 		m = m2 = mb;
926 		while (left > 0) {
927 			siz = min(M_TRAILINGSPACE(m), left);
928 			if (siz > 0) {
929 				left -= siz;
930 				i++;
931 			}
932 			if (left > 0) {
933 				MGET(m, M_WAIT, MT_DATA);
934 				MCLGET(m, M_WAIT);
935 				m->m_len = 0;
936 				m2->m_next = m;
937 				m2 = m;
938 			}
939 		}
940 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
941 		       M_TEMP, M_WAITOK);
942 		uiop->uio_iov = iv2 = iv;
943 		m = mb;
944 		left = len;
945 		i = 0;
946 		while (left > 0) {
947 			if (m == NULL)
948 				panic("nfsrv_read iov");
949 			siz = min(M_TRAILINGSPACE(m), left);
950 			if (siz > 0) {
951 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
952 				iv->iov_len = siz;
953 				m->m_len += siz;
954 				left -= siz;
955 				iv++;
956 				i++;
957 			}
958 			m = m->m_next;
959 		}
960 		uiop->uio_iovcnt = i;
961 		uiop->uio_offset = off;
962 		uiop->uio_resid = len;
963 		uiop->uio_rw = UIO_READ;
964 		uiop->uio_segflg = UIO_SYSSPACE;
965 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
966 		off = uiop->uio_offset;
967 		nh->nh_nextr = off;
968 		FREE((caddr_t)iv2, M_TEMP);
969 		if (error || (getret = VOP_GETATTR(vp, vap, td))) {
970 			if (!error)
971 				error = getret;
972 			m_freem(mreq);
973 			vput(vp);
974 			vp = NULL;
975 			nfsm_reply(NFSX_POSTOPATTR(v3));
976 			nfsm_srvpostop_attr(getret, vap);
977 			error = 0;
978 			goto nfsmout;
979 		}
980 	} else {
981 		uiop->uio_resid = 0;
982 	}
983 	vput(vp);
984 	vp = NULL;
985 	nfsm_srvfillattr(vap, fp);
986 	tlen = len - uiop->uio_resid;
987 	cnt = cnt < tlen ? cnt : tlen;
988 	tlen = nfsm_rndup(cnt);
989 	if (len != tlen || tlen != cnt)
990 		nfsm_adj(mb, len - tlen, tlen - cnt);
991 	if (v3) {
992 		*tl++ = txdr_unsigned(cnt);
993 		if (len < reqlen)
994 			*tl++ = nfs_true;
995 		else
996 			*tl++ = nfs_false;
997 	}
998 	*tl = txdr_unsigned(cnt);
999 nfsmout:
1000 	if (vp)
1001 		vput(vp);
1002 	return(error);
1003 }
1004 
1005 /*
1006  * nfs write service
1007  */
1008 int
1009 nfsrv_write(nfsd, slp, td, mrq)
1010 	struct nfsrv_descript *nfsd;
1011 	struct nfssvc_sock *slp;
1012 	struct thread *td;
1013 	struct mbuf **mrq;
1014 {
1015 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1016 	struct sockaddr *nam = nfsd->nd_nam;
1017 	caddr_t dpos = nfsd->nd_dpos;
1018 	struct ucred *cred = &nfsd->nd_cr;
1019 	struct iovec *ivp;
1020 	int i, cnt;
1021 	struct mbuf *mp;
1022 	struct nfs_fattr *fp;
1023 	struct iovec *iv;
1024 	struct vattr va, forat;
1025 	struct vattr *vap = &va;
1026 	u_int32_t *tl;
1027 	int32_t t1;
1028 	caddr_t bpos;
1029 	int error = 0, rdonly, cache, len, forat_ret = 1;
1030 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1031 	int stable = NFSV3WRITE_FILESYNC;
1032 	int v3 = (nfsd->nd_flag & ND_NFSV3);
1033 	char *cp2;
1034 	struct mbuf *mb, *mb2, *mreq;
1035 	struct vnode *vp = NULL;
1036 	nfsfh_t nfh;
1037 	fhandle_t *fhp;
1038 	struct uio io, *uiop = &io;
1039 	off_t off;
1040 	u_quad_t frev;
1041 
1042 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1043 	if (mrep == NULL) {
1044 		*mrq = NULL;
1045 		error = 0;
1046 		goto nfsmout;
1047 	}
1048 	fhp = &nfh.fh_generic;
1049 	nfsm_srvmtofh(fhp);
1050 	if (v3) {
1051 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1052 		off = fxdr_hyper(tl);
1053 		tl += 3;
1054 		stable = fxdr_unsigned(int, *tl++);
1055 	} else {
1056 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1057 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1058 		tl += 2;
1059 		if (nfs_async)
1060 	    		stable = NFSV3WRITE_UNSTABLE;
1061 	}
1062 	retlen = len = fxdr_unsigned(int32_t, *tl);
1063 	cnt = i = 0;
1064 
1065 	/*
1066 	 * For NFS Version 2, it is not obvious what a write of zero length
1067 	 * should do, but I might as well be consistent with Version 3,
1068 	 * which is to return ok so long as there are no permission problems.
1069 	 */
1070 	if (len > 0) {
1071 	    zeroing = 1;
1072 	    mp = mrep;
1073 	    while (mp) {
1074 		if (mp == md) {
1075 			zeroing = 0;
1076 			adjust = dpos - mtod(mp, caddr_t);
1077 			mp->m_len -= adjust;
1078 			if (mp->m_len > 0 && adjust > 0)
1079 				NFSMADV(mp, adjust);
1080 		}
1081 		if (zeroing)
1082 			mp->m_len = 0;
1083 		else if (mp->m_len > 0) {
1084 			i += mp->m_len;
1085 			if (i > len) {
1086 				mp->m_len -= (i - len);
1087 				zeroing	= 1;
1088 			}
1089 			if (mp->m_len > 0)
1090 				cnt++;
1091 		}
1092 		mp = mp->m_next;
1093 	    }
1094 	}
1095 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1096 		error = EIO;
1097 		nfsm_reply(2 * NFSX_UNSIGNED);
1098 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1099 		error = 0;
1100 		goto nfsmout;
1101 	}
1102 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
1103 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1104 	if (error) {
1105 		vp = NULL;
1106 		nfsm_reply(2 * NFSX_UNSIGNED);
1107 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1108 		error = 0;
1109 		goto nfsmout;
1110 	}
1111 	if (v3)
1112 		forat_ret = VOP_GETATTR(vp, &forat, td);
1113 	if (vp->v_type != VREG) {
1114 		if (v3)
1115 			error = EINVAL;
1116 		else
1117 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1118 	}
1119 	if (!error) {
1120 		nqsrv_getl(vp, ND_WRITE);
1121 		error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1122 	}
1123 	if (error) {
1124 		vput(vp);
1125 		vp = NULL;
1126 		nfsm_reply(NFSX_WCCDATA(v3));
1127 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1128 		error = 0;
1129 		goto nfsmout;
1130 	}
1131 
1132 	if (len > 0) {
1133 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1134 		M_WAITOK);
1135 	    uiop->uio_iov = iv = ivp;
1136 	    uiop->uio_iovcnt = cnt;
1137 	    mp = mrep;
1138 	    while (mp) {
1139 		if (mp->m_len > 0) {
1140 			ivp->iov_base = mtod(mp, caddr_t);
1141 			ivp->iov_len = mp->m_len;
1142 			ivp++;
1143 		}
1144 		mp = mp->m_next;
1145 	    }
1146 
1147 	    /*
1148 	     * XXX
1149 	     * The IO_METASYNC flag indicates that all metadata (and not just
1150 	     * enough to ensure data integrity) mus be written to stable storage
1151 	     * synchronously.
1152 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1153 	     */
1154 	    if (stable == NFSV3WRITE_UNSTABLE)
1155 		ioflags = IO_NODELOCKED;
1156 	    else if (stable == NFSV3WRITE_DATASYNC)
1157 		ioflags = (IO_SYNC | IO_NODELOCKED);
1158 	    else
1159 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1160 	    uiop->uio_resid = len;
1161 	    uiop->uio_rw = UIO_WRITE;
1162 	    uiop->uio_segflg = UIO_SYSSPACE;
1163 	    uiop->uio_td = NULL;
1164 	    uiop->uio_offset = off;
1165 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1166 	    nfsstats.srvvop_writes++;
1167 	    FREE((caddr_t)iv, M_TEMP);
1168 	}
1169 	aftat_ret = VOP_GETATTR(vp, vap, td);
1170 	vput(vp);
1171 	vp = NULL;
1172 	if (!error)
1173 		error = aftat_ret;
1174 	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1175 		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1176 	if (v3) {
1177 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1178 		if (error) {
1179 			error = 0;
1180 			goto nfsmout;
1181 		}
1182 		nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1183 		*tl++ = txdr_unsigned(retlen);
1184 		/*
1185 		 * If nfs_async is set, then pretend the write was FILESYNC.
1186 		 */
1187 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1188 			*tl++ = txdr_unsigned(stable);
1189 		else
1190 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1191 		/*
1192 		 * Actually, there is no need to txdr these fields,
1193 		 * but it may make the values more human readable,
1194 		 * for debugging purposes.
1195 		 */
1196 		if (nfsver.tv_sec == 0)
1197 			nfsver = boottime;
1198 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1199 		*tl = txdr_unsigned(nfsver.tv_usec);
1200 	} else {
1201 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1202 		nfsm_srvfillattr(vap, fp);
1203 	}
1204 nfsmout:
1205 	if (vp)
1206 		vput(vp);
1207 	return(error);
1208 }
1209 
1210 /*
1211  * NFS write service with write gathering support. Called when
1212  * nfsrvw_procrastinate > 0.
1213  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1214  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1215  * Jan. 1994.
1216  */
1217 int
1218 nfsrv_writegather(ndp, slp, td, mrq)
1219 	struct nfsrv_descript **ndp;
1220 	struct nfssvc_sock *slp;
1221 	struct thread *td;
1222 	struct mbuf **mrq;
1223 {
1224 	struct iovec *ivp;
1225 	struct mbuf *mp;
1226 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1227 	struct nfs_fattr *fp;
1228 	int i;
1229 	struct iovec *iov;
1230 	struct nfsrvw_delayhash *wpp;
1231 	struct ucred *cred;
1232 	struct vattr va, forat;
1233 	u_int32_t *tl;
1234 	int32_t t1;
1235 	caddr_t bpos, dpos;
1236 	int error = 0, rdonly, cache, len, forat_ret = 1;
1237 	int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
1238 	char *cp2;
1239 	struct mbuf *mb, *mb2, *mreq, *mrep, *md;
1240 	struct vnode *vp = NULL;
1241 	struct uio io, *uiop = &io;
1242 	u_quad_t frev, cur_usec;
1243 
1244 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1245 #ifndef nolint
1246 	i = 0;
1247 	len = 0;
1248 #endif
1249 	*mrq = NULL;
1250 	if (*ndp) {
1251 	    nfsd = *ndp;
1252 	    *ndp = NULL;
1253 	    mrep = nfsd->nd_mrep;
1254 	    md = nfsd->nd_md;
1255 	    dpos = nfsd->nd_dpos;
1256 	    cred = &nfsd->nd_cr;
1257 	    v3 = (nfsd->nd_flag & ND_NFSV3);
1258 	    LIST_INIT(&nfsd->nd_coalesce);
1259 	    nfsd->nd_mreq = NULL;
1260 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1261 	    cur_usec = nfs_curusec();
1262 	    nfsd->nd_time = cur_usec +
1263 		(v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1264 
1265 	    /*
1266 	     * Now, get the write header..
1267 	     */
1268 	    nfsm_srvmtofh(&nfsd->nd_fh);
1269 	    if (v3) {
1270 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1271 		nfsd->nd_off = fxdr_hyper(tl);
1272 		tl += 3;
1273 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1274 	    } else {
1275 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1276 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1277 		tl += 2;
1278 		if (nfs_async)
1279 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1280 	    }
1281 	    len = fxdr_unsigned(int32_t, *tl);
1282 	    nfsd->nd_len = len;
1283 	    nfsd->nd_eoff = nfsd->nd_off + len;
1284 
1285 	    /*
1286 	     * Trim the header out of the mbuf list and trim off any trailing
1287 	     * junk so that the mbuf list has only the write data.
1288 	     */
1289 	    zeroing = 1;
1290 	    i = 0;
1291 	    mp = mrep;
1292 	    while (mp) {
1293 		if (mp == md) {
1294 		    zeroing = 0;
1295 		    adjust = dpos - mtod(mp, caddr_t);
1296 		    mp->m_len -= adjust;
1297 		    if (mp->m_len > 0 && adjust > 0)
1298 			NFSMADV(mp, adjust);
1299 		}
1300 		if (zeroing)
1301 		    mp->m_len = 0;
1302 		else {
1303 		    i += mp->m_len;
1304 		    if (i > len) {
1305 			mp->m_len -= (i - len);
1306 			zeroing = 1;
1307 		    }
1308 		}
1309 		mp = mp->m_next;
1310 	    }
1311 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1312 nfsmout:
1313 		m_freem(mrep);
1314 		error = EIO;
1315 		nfsm_writereply(2 * NFSX_UNSIGNED, v3);
1316 		if (v3)
1317 		    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1318 		nfsd->nd_mreq = mreq;
1319 		nfsd->nd_mrep = NULL;
1320 		nfsd->nd_time = 0;
1321 	    }
1322 
1323 	    /*
1324 	     * Add this entry to the hash and time queues.
1325 	     */
1326 	    s = splsoftclock();
1327 	    owp = NULL;
1328 	    wp = slp->ns_tq.lh_first;
1329 	    while (wp && wp->nd_time < nfsd->nd_time) {
1330 		owp = wp;
1331 		wp = wp->nd_tq.le_next;
1332 	    }
1333 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1334 	    if (owp) {
1335 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1336 	    } else {
1337 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1338 	    }
1339 	    if (nfsd->nd_mrep) {
1340 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1341 		owp = NULL;
1342 		wp = wpp->lh_first;
1343 		while (wp &&
1344 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1345 		    owp = wp;
1346 		    wp = wp->nd_hash.le_next;
1347 		}
1348 		while (wp && wp->nd_off < nfsd->nd_off &&
1349 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1350 		    owp = wp;
1351 		    wp = wp->nd_hash.le_next;
1352 		}
1353 		if (owp) {
1354 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1355 
1356 		    /*
1357 		     * Search the hash list for overlapping entries and
1358 		     * coalesce.
1359 		     */
1360 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1361 			wp = nfsd->nd_hash.le_next;
1362 			if (NFSW_SAMECRED(owp, nfsd))
1363 			    nfsrvw_coalesce(owp, nfsd);
1364 		    }
1365 		} else {
1366 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1367 		}
1368 	    }
1369 	    splx(s);
1370 	}
1371 
1372 	/*
1373 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1374 	 * and generate the associated reply mbuf list(s).
1375 	 */
1376 loop1:
1377 	cur_usec = nfs_curusec();
1378 	s = splsoftclock();
1379 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1380 		owp = nfsd->nd_tq.le_next;
1381 		if (nfsd->nd_time > cur_usec)
1382 		    break;
1383 		if (nfsd->nd_mreq)
1384 		    continue;
1385 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1386 		LIST_REMOVE(nfsd, nd_tq);
1387 		LIST_REMOVE(nfsd, nd_hash);
1388 		splx(s);
1389 		mrep = nfsd->nd_mrep;
1390 		nfsd->nd_mrep = NULL;
1391 		cred = &nfsd->nd_cr;
1392 		v3 = (nfsd->nd_flag & ND_NFSV3);
1393 		forat_ret = aftat_ret = 1;
1394 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
1395 		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1396 		if (!error) {
1397 		    if (v3)
1398 			forat_ret = VOP_GETATTR(vp, &forat, td);
1399 		    if (vp->v_type != VREG) {
1400 			if (v3)
1401 			    error = EINVAL;
1402 			else
1403 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1404 		    }
1405 		} else {
1406 		    vp = NULL;
1407 		}
1408 		if (!error) {
1409 		    nqsrv_getl(vp, ND_WRITE);
1410 		    error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1411 		}
1412 
1413 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1414 		    ioflags = IO_NODELOCKED;
1415 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1416 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1417 		else
1418 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1419 		uiop->uio_rw = UIO_WRITE;
1420 		uiop->uio_segflg = UIO_SYSSPACE;
1421 		uiop->uio_td = NULL;
1422 		uiop->uio_offset = nfsd->nd_off;
1423 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1424 		if (uiop->uio_resid > 0) {
1425 		    mp = mrep;
1426 		    i = 0;
1427 		    while (mp) {
1428 			if (mp->m_len > 0)
1429 			    i++;
1430 			mp = mp->m_next;
1431 		    }
1432 		    uiop->uio_iovcnt = i;
1433 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1434 			M_TEMP, M_WAITOK);
1435 		    uiop->uio_iov = ivp = iov;
1436 		    mp = mrep;
1437 		    while (mp) {
1438 			if (mp->m_len > 0) {
1439 			    ivp->iov_base = mtod(mp, caddr_t);
1440 			    ivp->iov_len = mp->m_len;
1441 			    ivp++;
1442 			}
1443 			mp = mp->m_next;
1444 		    }
1445 		    if (!error) {
1446 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1447 			nfsstats.srvvop_writes++;
1448 		    }
1449 		    FREE((caddr_t)iov, M_TEMP);
1450 		}
1451 		m_freem(mrep);
1452 		if (vp) {
1453 		    aftat_ret = VOP_GETATTR(vp, &va, td);
1454 		    vput(vp);
1455 		    vp = NULL;
1456 		}
1457 
1458 		/*
1459 		 * Loop around generating replies for all write rpcs that have
1460 		 * now been completed.
1461 		 */
1462 		swp = nfsd;
1463 		do {
1464 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1465 		    if (error) {
1466 			nfsm_writereply(NFSX_WCCDATA(v3), v3);
1467 			if (v3) {
1468 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1469 			}
1470 		    } else {
1471 			nfsm_writereply(NFSX_PREOPATTR(v3) +
1472 			    NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1473 			    NFSX_WRITEVERF(v3), v3);
1474 			if (v3) {
1475 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1476 			    nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1477 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1478 			    *tl++ = txdr_unsigned(swp->nd_stable);
1479 			    /*
1480 			     * Actually, there is no need to txdr these fields,
1481 			     * but it may make the values more human readable,
1482 			     * for debugging purposes.
1483 			     */
1484 			    if (nfsver.tv_sec == 0)
1485 				    nfsver = boottime;
1486 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1487 			    *tl = txdr_unsigned(nfsver.tv_usec);
1488 			} else {
1489 			    nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1490 			    nfsm_srvfillattr(&va, fp);
1491 			}
1492 		    }
1493 		    nfsd->nd_mreq = mreq;
1494 		    if (nfsd->nd_mrep)
1495 			panic("nfsrv_write: nd_mrep not free");
1496 
1497 		    /*
1498 		     * Done. Put it at the head of the timer queue so that
1499 		     * the final phase can return the reply.
1500 		     */
1501 		    s = splsoftclock();
1502 		    if (nfsd != swp) {
1503 			nfsd->nd_time = 0;
1504 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1505 		    }
1506 		    nfsd = swp->nd_coalesce.lh_first;
1507 		    if (nfsd) {
1508 			LIST_REMOVE(nfsd, nd_tq);
1509 		    }
1510 		    splx(s);
1511 		} while (nfsd);
1512 		s = splsoftclock();
1513 		swp->nd_time = 0;
1514 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1515 		splx(s);
1516 		goto loop1;
1517 	}
1518 	splx(s);
1519 
1520 	/*
1521 	 * Search for a reply to return.
1522 	 */
1523 	s = splsoftclock();
1524 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1525 		if (nfsd->nd_mreq) {
1526 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1527 		    LIST_REMOVE(nfsd, nd_tq);
1528 		    *mrq = nfsd->nd_mreq;
1529 		    *ndp = nfsd;
1530 		    break;
1531 		}
1532 	splx(s);
1533 	return (0);
1534 }
1535 
1536 /*
1537  * Coalesce the write request nfsd into owp. To do this we must:
1538  * - remove nfsd from the queues
1539  * - merge nfsd->nd_mrep into owp->nd_mrep
1540  * - update the nd_eoff and nd_stable for owp
1541  * - put nfsd on owp's nd_coalesce list
1542  * NB: Must be called at splsoftclock().
1543  */
1544 static void
1545 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1546 {
1547         int overlap;
1548         struct mbuf *mp;
1549 	struct nfsrv_descript *p;
1550 
1551 	NFS_DPF(WG, ("C%03x-%03x",
1552 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1553         LIST_REMOVE(nfsd, nd_hash);
1554         LIST_REMOVE(nfsd, nd_tq);
1555         if (owp->nd_eoff < nfsd->nd_eoff) {
1556             overlap = owp->nd_eoff - nfsd->nd_off;
1557             if (overlap < 0)
1558                 panic("nfsrv_coalesce: bad off");
1559             if (overlap > 0)
1560                 m_adj(nfsd->nd_mrep, overlap);
1561             mp = owp->nd_mrep;
1562             while (mp->m_next)
1563                 mp = mp->m_next;
1564             mp->m_next = nfsd->nd_mrep;
1565             owp->nd_eoff = nfsd->nd_eoff;
1566         } else
1567             m_freem(nfsd->nd_mrep);
1568         nfsd->nd_mrep = NULL;
1569         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1570             owp->nd_stable = NFSV3WRITE_FILESYNC;
1571         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1572             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1573             owp->nd_stable = NFSV3WRITE_DATASYNC;
1574         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1575 
1576 	/*
1577 	 * If nfsd had anything else coalesced into it, transfer them
1578 	 * to owp, otherwise their replies will never get sent.
1579 	 */
1580 	for (p = nfsd->nd_coalesce.lh_first; p;
1581 	     p = nfsd->nd_coalesce.lh_first) {
1582 	    LIST_REMOVE(p, nd_tq);
1583 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1584 	}
1585 }
1586 
1587 /*
1588  * nfs create service
1589  * now does a truncate to 0 length via. setattr if it already exists
1590  */
1591 int
1592 nfsrv_create(nfsd, slp, td, mrq)
1593 	struct nfsrv_descript *nfsd;
1594 	struct nfssvc_sock *slp;
1595 	struct thread *td;
1596 	struct mbuf **mrq;
1597 {
1598 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1599 	struct sockaddr *nam = nfsd->nd_nam;
1600 	caddr_t dpos = nfsd->nd_dpos;
1601 	struct ucred *cred = &nfsd->nd_cr;
1602 	struct nfs_fattr *fp;
1603 	struct vattr va, dirfor, diraft;
1604 	struct vattr *vap = &va;
1605 	struct nfsv2_sattr *sp;
1606 	u_int32_t *tl;
1607 	struct nameidata nd;
1608 	int32_t t1;
1609 	caddr_t bpos;
1610 	int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1611 	int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1612 	caddr_t cp;
1613 	char *cp2;
1614 	struct mbuf *mb, *mb2, *mreq;
1615 	struct vnode *dirp = (struct vnode *)0;
1616 	nfsfh_t nfh;
1617 	fhandle_t *fhp;
1618 	u_quad_t frev, tempsize;
1619 	u_char cverf[NFSX_V3CREATEVERF];
1620 
1621 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1622 #ifndef nolint
1623 	rdev = 0;
1624 #endif
1625 	ndclear(&nd);
1626 
1627 	fhp = &nfh.fh_generic;
1628 	nfsm_srvmtofh(fhp);
1629 	nfsm_srvnamesiz(len);
1630 
1631 	nd.ni_cnd.cn_cred = cred;
1632 	nd.ni_cnd.cn_nameiop = CREATE;
1633 	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
1634 
1635 	/*
1636 	 * Call namei and do initial cleanup to get a few things
1637 	 * out of the way.  If we get an initial error we cleanup
1638 	 * and return here to avoid special-casing the invalid nd
1639 	 * structure through the rest of the case.  dirp may be
1640 	 * set even if an error occurs, but the nd structure will not
1641 	 * be valid at all if an error occurs so we have to invalidate it
1642 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1643 	 */
1644 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1645 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1646 	if (dirp) {
1647 		if (v3) {
1648 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
1649 		} else {
1650 			vrele(dirp);
1651 			dirp = NULL;
1652 		}
1653 	}
1654 	if (error) {
1655 		nfsm_reply(NFSX_WCCDATA(v3));
1656 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1657 		error = 0;
1658 		goto nfsmout;
1659 	}
1660 
1661 	/*
1662 	 * No error.  Continue.  State:
1663 	 *
1664 	 *	startdir	is valid ( we release this immediately )
1665 	 *	dirp 		may be valid
1666 	 *	nd.ni_vp	may be valid
1667 	 *	nd.ni_dvp	is valid
1668 	 *
1669 	 * The error state is set through the code and we may also do some
1670 	 * opportunistic releasing of vnodes to avoid holding locks through
1671 	 * NFS I/O.  The cleanup at the end is a catch-all
1672 	 */
1673 
1674 	VATTR_NULL(vap);
1675 	if (v3) {
1676 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1677 		how = fxdr_unsigned(int, *tl);
1678 		switch (how) {
1679 		case NFSV3CREATE_GUARDED:
1680 			if (nd.ni_vp) {
1681 				error = EEXIST;
1682 				break;
1683 			}
1684 			/* fall through */
1685 		case NFSV3CREATE_UNCHECKED:
1686 			nfsm_srvsattr(vap);
1687 			break;
1688 		case NFSV3CREATE_EXCLUSIVE:
1689 			nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF);
1690 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1691 			exclusive_flag = 1;
1692 			break;
1693 		};
1694 		vap->va_type = VREG;
1695 	} else {
1696 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1697 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1698 		if (vap->va_type == VNON)
1699 			vap->va_type = VREG;
1700 		vap->va_mode = nfstov_mode(sp->sa_mode);
1701 		switch (vap->va_type) {
1702 		case VREG:
1703 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1704 			if (tsize != -1)
1705 				vap->va_size = (u_quad_t)tsize;
1706 			break;
1707 		case VCHR:
1708 		case VBLK:
1709 		case VFIFO:
1710 			rdev = fxdr_unsigned(long, sp->sa_size);
1711 			break;
1712 		default:
1713 			break;
1714 		};
1715 	}
1716 
1717 	/*
1718 	 * Iff doesn't exist, create it
1719 	 * otherwise just truncate to 0 length
1720 	 *   should I set the mode too ?
1721 	 *
1722 	 * The only possible error we can have at this point is EEXIST.
1723 	 * nd.ni_vp will also be non-NULL in that case.
1724 	 */
1725 	if (nd.ni_vp == NULL) {
1726 		if (vap->va_mode == (mode_t)VNOVAL)
1727 			vap->va_mode = 0;
1728 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1729 			nqsrv_getl(nd.ni_dvp, ND_WRITE);
1730 			error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1731 			if (error)
1732 				NDFREE(&nd, NDF_ONLY_PNBUF);
1733 			else {
1734 			    	nfsrv_object_create(nd.ni_vp);
1735 				if (exclusive_flag) {
1736 					exclusive_flag = 0;
1737 					VATTR_NULL(vap);
1738 					bcopy(cverf, (caddr_t)&vap->va_atime,
1739 						NFSX_V3CREATEVERF);
1740 					error = VOP_SETATTR(nd.ni_vp, vap, cred,
1741 						td);
1742 				}
1743 			}
1744 		} else if (
1745 			vap->va_type == VCHR ||
1746 			vap->va_type == VBLK ||
1747 			vap->va_type == VFIFO
1748 		) {
1749 			/*
1750 			 * Handle SysV FIFO node special cases.  All other
1751 			 * devices require super user to access.
1752 			 */
1753 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1754 				vap->va_type = VFIFO;
1755                         if (vap->va_type != VFIFO &&
1756                             (error = suser_cred(cred, 0))) {
1757 				goto nfsmreply0;
1758                         }
1759 			vap->va_rdev = rdev;
1760 			nqsrv_getl(nd.ni_dvp, ND_WRITE);
1761 
1762 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1763 			if (error) {
1764 				NDFREE(&nd, NDF_ONLY_PNBUF);
1765 				goto nfsmreply0;
1766 			}
1767 			vput(nd.ni_vp);
1768 			nd.ni_vp = NULL;
1769 
1770 			/*
1771 			 * release dvp prior to lookup
1772 			 */
1773 			vput(nd.ni_dvp);
1774 			nd.ni_dvp = NULL;
1775 
1776 			/*
1777 			 * Setup for lookup.
1778 			 *
1779 			 * Even though LOCKPARENT was cleared, ni_dvp may
1780 			 * be garbage.
1781 			 */
1782 			nd.ni_cnd.cn_nameiop = LOOKUP;
1783 			nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1784 			nd.ni_cnd.cn_td = td;
1785 			nd.ni_cnd.cn_cred = cred;
1786 
1787 			error = lookup(&nd);
1788 			nd.ni_dvp = NULL;
1789 
1790 			if (error != 0) {
1791 				nfsm_reply(0);
1792 				/* fall through on certain errors */
1793 			}
1794 			nfsrv_object_create(nd.ni_vp);
1795 			if (nd.ni_cnd.cn_flags & ISSYMLINK) {
1796 				error = EINVAL;
1797 				goto nfsmreply0;
1798 			}
1799 		} else {
1800 			error = ENXIO;
1801 		}
1802 	} else {
1803 		if (vap->va_size != -1) {
1804 			error = nfsrv_access(nd.ni_vp, VWRITE, cred,
1805 			    (nd.ni_cnd.cn_flags & RDONLY), td, 0);
1806 			if (!error) {
1807 				nqsrv_getl(nd.ni_vp, ND_WRITE);
1808 				tempsize = vap->va_size;
1809 				VATTR_NULL(vap);
1810 				vap->va_size = tempsize;
1811 				error = VOP_SETATTR(nd.ni_vp, vap, cred, td);
1812 			}
1813 		}
1814 	}
1815 
1816 	if (!error) {
1817 		bzero((caddr_t)fhp, sizeof(nfh));
1818 		fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
1819 		error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
1820 		if (!error)
1821 			error = VOP_GETATTR(nd.ni_vp, vap, td);
1822 	}
1823 	if (v3) {
1824 		if (exclusive_flag && !error &&
1825 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1826 			error = EEXIST;
1827 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
1828 		vrele(dirp);
1829 		dirp = NULL;
1830 	}
1831 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1832 	if (v3) {
1833 		if (!error) {
1834 			nfsm_srvpostop_fh(fhp);
1835 			nfsm_srvpostop_attr(0, vap);
1836 		}
1837 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1838 		error = 0;
1839 	} else {
1840 		nfsm_srvfhtom(fhp, v3);
1841 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1842 		nfsm_srvfillattr(vap, fp);
1843 	}
1844 	goto nfsmout;
1845 
1846 nfsmreply0:
1847 	nfsm_reply(0);
1848 	error = 0;
1849 	/* fall through */
1850 
1851 nfsmout:
1852 	if (nd.ni_startdir) {
1853 		vrele(nd.ni_startdir);
1854 		nd.ni_startdir = NULL;
1855 	}
1856 	if (dirp)
1857 		vrele(dirp);
1858 	NDFREE(&nd, NDF_ONLY_PNBUF);
1859 	if (nd.ni_dvp) {
1860 		if (nd.ni_dvp == nd.ni_vp)
1861 			vrele(nd.ni_dvp);
1862 		else
1863 			vput(nd.ni_dvp);
1864 	}
1865 	if (nd.ni_vp)
1866 		vput(nd.ni_vp);
1867 	return (error);
1868 }
1869 
1870 /*
1871  * nfs v3 mknod service
1872  */
1873 int
1874 nfsrv_mknod(nfsd, slp, td, mrq)
1875 	struct nfsrv_descript *nfsd;
1876 	struct nfssvc_sock *slp;
1877 	struct thread *td;
1878 	struct mbuf **mrq;
1879 {
1880 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1881 	struct sockaddr *nam = nfsd->nd_nam;
1882 	caddr_t dpos = nfsd->nd_dpos;
1883 	struct ucred *cred = &nfsd->nd_cr;
1884 	struct vattr va, dirfor, diraft;
1885 	struct vattr *vap = &va;
1886 	u_int32_t *tl;
1887 	struct nameidata nd;
1888 	int32_t t1;
1889 	caddr_t bpos;
1890 	int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
1891 	u_int32_t major, minor;
1892 	enum vtype vtyp;
1893 	char *cp2;
1894 	struct mbuf *mb, *mb2, *mreq;
1895 	struct vnode *vp, *dirp = (struct vnode *)0;
1896 	nfsfh_t nfh;
1897 	fhandle_t *fhp;
1898 	u_quad_t frev;
1899 
1900 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1901 	ndclear(&nd);
1902 
1903 	fhp = &nfh.fh_generic;
1904 	nfsm_srvmtofh(fhp);
1905 	nfsm_srvnamesiz(len);
1906 
1907 	nd.ni_cnd.cn_cred = cred;
1908 	nd.ni_cnd.cn_nameiop = CREATE;
1909 	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
1910 
1911 	/*
1912 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1913 	 * is not valid.  However, nfsm_*() routines may still jump to
1914 	 * nfsmout.
1915 	 */
1916 
1917 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1918 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1919 	if (dirp)
1920 		dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
1921 	if (error) {
1922 		nfsm_reply(NFSX_WCCDATA(1));
1923 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1924 		error = 0;
1925 		goto nfsmout;
1926 	}
1927 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1928 	vtyp = nfsv3tov_type(*tl);
1929 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1930 		error = NFSERR_BADTYPE;
1931 		goto out;
1932 	}
1933 	VATTR_NULL(vap);
1934 	nfsm_srvsattr(vap);
1935 	if (vtyp == VCHR || vtyp == VBLK) {
1936 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1937 		major = fxdr_unsigned(u_int32_t, *tl++);
1938 		minor = fxdr_unsigned(u_int32_t, *tl);
1939 		vap->va_rdev = makeudev(major, minor);
1940 	}
1941 
1942 	/*
1943 	 * Iff doesn't exist, create it.
1944 	 */
1945 	if (nd.ni_vp) {
1946 		error = EEXIST;
1947 		goto out;
1948 	}
1949 	vap->va_type = vtyp;
1950 	if (vap->va_mode == (mode_t)VNOVAL)
1951 		vap->va_mode = 0;
1952 	if (vtyp == VSOCK) {
1953 		vrele(nd.ni_startdir);
1954 		nd.ni_startdir = NULL;
1955 		nqsrv_getl(nd.ni_dvp, ND_WRITE);
1956 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1957 		if (error)
1958 			NDFREE(&nd, NDF_ONLY_PNBUF);
1959 	} else {
1960 		if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
1961 			goto out;
1962 		nqsrv_getl(nd.ni_dvp, ND_WRITE);
1963 
1964 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1965 		if (error) {
1966 			NDFREE(&nd, NDF_ONLY_PNBUF);
1967 			goto out;
1968 		}
1969 		vput(nd.ni_vp);
1970 		nd.ni_vp = NULL;
1971 
1972 		/*
1973 		 * Release dvp prior to lookup
1974 		 */
1975 		vput(nd.ni_dvp);
1976 		nd.ni_dvp = NULL;
1977 
1978 		KKASSERT(td->td_proc);
1979 		nd.ni_cnd.cn_nameiop = LOOKUP;
1980 		nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1981 		nd.ni_cnd.cn_td = td;
1982 		nd.ni_cnd.cn_cred = td->td_proc->p_ucred;
1983 
1984 		error = lookup(&nd);
1985 		nd.ni_dvp = NULL;
1986 
1987 		if (error)
1988 			goto out;
1989 		if (nd.ni_cnd.cn_flags & ISSYMLINK)
1990 			error = EINVAL;
1991 	}
1992 
1993 	/*
1994 	 * send response, cleanup, return.
1995 	 */
1996 out:
1997 	if (nd.ni_startdir) {
1998 		vrele(nd.ni_startdir);
1999 		nd.ni_startdir = NULL;
2000 	}
2001 	NDFREE(&nd, NDF_ONLY_PNBUF);
2002 	if (nd.ni_dvp) {
2003 		if (nd.ni_dvp == nd.ni_vp)
2004 			vrele(nd.ni_dvp);
2005 		else
2006 			vput(nd.ni_dvp);
2007 		nd.ni_dvp = NULL;
2008 	}
2009 	vp = nd.ni_vp;
2010 	if (!error) {
2011 		bzero((caddr_t)fhp, sizeof(nfh));
2012 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2013 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2014 		if (!error)
2015 			error = VOP_GETATTR(vp, vap, td);
2016 	}
2017 	if (vp) {
2018 		vput(vp);
2019 		vp = NULL;
2020 		nd.ni_vp = NULL;
2021 	}
2022 	diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2023 	if (dirp) {
2024 		vrele(dirp);
2025 		dirp = NULL;
2026 	}
2027 	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
2028 	if (!error) {
2029 		nfsm_srvpostop_fh(fhp);
2030 		nfsm_srvpostop_attr(0, vap);
2031 	}
2032 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2033 	return (0);
2034 nfsmout:
2035 	if (dirp)
2036 		vrele(dirp);
2037 	if (nd.ni_startdir)
2038 		vrele(nd.ni_startdir);
2039 	NDFREE(&nd, NDF_ONLY_PNBUF);
2040 	if (nd.ni_dvp) {
2041 		if (nd.ni_dvp == nd.ni_vp)
2042 			vrele(nd.ni_dvp);
2043 		else
2044 			vput(nd.ni_dvp);
2045 	}
2046 	if (nd.ni_vp)
2047 		vput(nd.ni_vp);
2048 	return (error);
2049 }
2050 
2051 /*
2052  * nfs remove service
2053  */
2054 int
2055 nfsrv_remove(nfsd, slp, td, mrq)
2056 	struct nfsrv_descript *nfsd;
2057 	struct nfssvc_sock *slp;
2058 	struct thread *td;
2059 	struct mbuf **mrq;
2060 {
2061 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2062 	struct sockaddr *nam = nfsd->nd_nam;
2063 	caddr_t dpos = nfsd->nd_dpos;
2064 	struct ucred *cred = &nfsd->nd_cr;
2065 	struct nameidata nd;
2066 	u_int32_t *tl;
2067 	int32_t t1;
2068 	caddr_t bpos;
2069 	int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
2070 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2071 	char *cp2;
2072 	struct mbuf *mb, *mreq;
2073 	struct vnode *dirp;
2074 	struct vattr dirfor, diraft;
2075 	nfsfh_t nfh;
2076 	fhandle_t *fhp;
2077 	u_quad_t frev;
2078 
2079 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2080 	ndclear(&nd);
2081 
2082 	fhp = &nfh.fh_generic;
2083 	nfsm_srvmtofh(fhp);
2084 	nfsm_srvnamesiz(len);
2085 
2086 	nd.ni_cnd.cn_cred = cred;
2087 	nd.ni_cnd.cn_nameiop = DELETE;
2088 	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2089 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2090 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2091 	if (dirp) {
2092 		if (v3) {
2093 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
2094 		} else {
2095 			vrele(dirp);
2096 			dirp = NULL;
2097 		}
2098 	}
2099 	if (error == 0) {
2100 		if (nd.ni_vp->v_type == VDIR) {
2101 			error = EPERM;		/* POSIX */
2102 			goto out;
2103 		}
2104 		/*
2105 		 * The root of a mounted filesystem cannot be deleted.
2106 		 */
2107 		if (nd.ni_vp->v_flag & VROOT) {
2108 			error = EBUSY;
2109 			goto out;
2110 		}
2111 out:
2112 		if (!error) {
2113 			nqsrv_getl(nd.ni_dvp, ND_WRITE);
2114 			nqsrv_getl(nd.ni_vp, ND_WRITE);
2115 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2116 			NDFREE(&nd, NDF_ONLY_PNBUF);
2117 		}
2118 	}
2119 	if (dirp && v3) {
2120 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2121 		vrele(dirp);
2122 		dirp = NULL;
2123 	}
2124 	nfsm_reply(NFSX_WCCDATA(v3));
2125 	if (v3) {
2126 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2127 		error = 0;
2128 	}
2129 nfsmout:
2130 	NDFREE(&nd, NDF_ONLY_PNBUF);
2131 	if (nd.ni_dvp) {
2132 		if (nd.ni_dvp == nd.ni_vp)
2133 			vrele(nd.ni_dvp);
2134 		else
2135 			vput(nd.ni_dvp);
2136 	}
2137 	if (nd.ni_vp)
2138 		vput(nd.ni_vp);
2139 	return(error);
2140 }
2141 
2142 /*
2143  * nfs rename service
2144  */
2145 int
2146 nfsrv_rename(nfsd, slp, td, mrq)
2147 	struct nfsrv_descript *nfsd;
2148 	struct nfssvc_sock *slp;
2149 	struct thread *td;
2150 	struct mbuf **mrq;
2151 {
2152 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2153 	struct sockaddr *nam = nfsd->nd_nam;
2154 	caddr_t dpos = nfsd->nd_dpos;
2155 	struct ucred *cred = &nfsd->nd_cr;
2156 	u_int32_t *tl;
2157 	int32_t t1;
2158 	caddr_t bpos;
2159 	int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2160 	int tdirfor_ret = 1, tdiraft_ret = 1;
2161 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2162 	char *cp2;
2163 	struct mbuf *mb, *mreq;
2164 	struct nameidata fromnd, tond;
2165 	struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0;
2166 	struct vnode *tdirp = (struct vnode *)0;
2167 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2168 	nfsfh_t fnfh, tnfh;
2169 	fhandle_t *ffhp, *tfhp;
2170 	u_quad_t frev;
2171 	uid_t saved_uid;
2172 
2173 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2174 #ifndef nolint
2175 	fvp = (struct vnode *)0;
2176 #endif
2177 	ffhp = &fnfh.fh_generic;
2178 	tfhp = &tnfh.fh_generic;
2179 
2180 	/*
2181 	 * Clear fields incase goto nfsmout occurs from macro.
2182 	 */
2183 
2184 	ndclear(&fromnd);
2185 	ndclear(&tond);
2186 
2187 	nfsm_srvmtofh(ffhp);
2188 	nfsm_srvnamesiz(len);
2189 	/*
2190 	 * Remember our original uid so that we can reset cr_uid before
2191 	 * the second nfs_namei() call, in case it is remapped.
2192 	 */
2193 	saved_uid = cred->cr_uid;
2194 	fromnd.ni_cnd.cn_cred = cred;
2195 	fromnd.ni_cnd.cn_nameiop = DELETE;
2196 	fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
2197 	error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
2198 		&dpos, &fdirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2199 	if (fdirp) {
2200 		if (v3) {
2201 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, td);
2202 		} else {
2203 			vrele(fdirp);
2204 			fdirp = NULL;
2205 		}
2206 	}
2207 	if (error) {
2208 		nfsm_reply(2 * NFSX_WCCDATA(v3));
2209 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2210 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2211 		error = 0;
2212 		goto nfsmout;
2213 	}
2214 	fvp = fromnd.ni_vp;
2215 	nfsm_srvmtofh(tfhp);
2216 	nfsm_strsiz(len2, NFS_MAXNAMLEN);
2217 	cred->cr_uid = saved_uid;
2218 	tond.ni_cnd.cn_cred = cred;
2219 	tond.ni_cnd.cn_nameiop = RENAME;
2220 	tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2221 	error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
2222 		&dpos, &tdirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2223 	if (tdirp) {
2224 		if (v3) {
2225 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, td);
2226 		} else {
2227 			vrele(tdirp);
2228 			tdirp = NULL;
2229 		}
2230 	}
2231 	if (error)
2232 		goto out1;
2233 
2234 	tdvp = tond.ni_dvp;
2235 	tvp = tond.ni_vp;
2236 	if (tvp != NULL) {
2237 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2238 			if (v3)
2239 				error = EEXIST;
2240 			else
2241 				error = EISDIR;
2242 			goto out;
2243 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2244 			if (v3)
2245 				error = EEXIST;
2246 			else
2247 				error = ENOTDIR;
2248 			goto out;
2249 		}
2250 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
2251 			if (v3)
2252 				error = EXDEV;
2253 			else
2254 				error = ENOTEMPTY;
2255 			goto out;
2256 		}
2257 	}
2258 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
2259 		if (v3)
2260 			error = EXDEV;
2261 		else
2262 			error = ENOTEMPTY;
2263 		goto out;
2264 	}
2265 	if (fvp->v_mount != tdvp->v_mount) {
2266 		if (v3)
2267 			error = EXDEV;
2268 		else
2269 			error = ENOTEMPTY;
2270 		goto out;
2271 	}
2272 	if (fvp == tdvp) {
2273 		if (v3)
2274 			error = EINVAL;
2275 		else
2276 			error = ENOTEMPTY;
2277 	}
2278 	/*
2279 	 * If source is the same as the destination (that is the
2280 	 * same vnode with the same name in the same directory),
2281 	 * then there is nothing to do.
2282 	 */
2283 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2284 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2285 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2286 	      fromnd.ni_cnd.cn_namelen))
2287 		error = -1;
2288 out:
2289 	if (!error) {
2290 		/*
2291 		 * The VOP_RENAME function releases all vnode references &
2292 		 * locks prior to returning so we need to clear the pointers
2293 		 * to bypass cleanup code later on.
2294 		 */
2295 		nqsrv_getl(fromnd.ni_dvp, ND_WRITE);
2296 		nqsrv_getl(tdvp, ND_WRITE);
2297 		if (tvp) {
2298 			nqsrv_getl(tvp, ND_WRITE);
2299 		}
2300 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2301 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2302 		fromnd.ni_dvp = NULL;
2303 		fromnd.ni_vp = NULL;
2304 		tond.ni_dvp = NULL;
2305 		tond.ni_vp = NULL;
2306 		if (error) {
2307 			fromnd.ni_cnd.cn_flags &= ~HASBUF;
2308 			tond.ni_cnd.cn_flags &= ~HASBUF;
2309 		}
2310 	} else {
2311 		if (error == -1)
2312 			error = 0;
2313 	}
2314 	/* fall through */
2315 
2316 out1:
2317 	if (fdirp)
2318 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, td);
2319 	if (tdirp)
2320 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, td);
2321 	nfsm_reply(2 * NFSX_WCCDATA(v3));
2322 	if (v3) {
2323 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2324 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2325 	}
2326 	error = 0;
2327 	/* fall through */
2328 
2329 nfsmout:
2330 	/*
2331 	 * Clear out tond related fields
2332 	 */
2333 	if (tdirp)
2334 		vrele(tdirp);
2335 	if (tond.ni_startdir)
2336 		vrele(tond.ni_startdir);
2337 	NDFREE(&tond, NDF_ONLY_PNBUF);
2338 	if (tond.ni_dvp) {
2339 		if (tond.ni_dvp == tond.ni_vp)
2340 			vrele(tond.ni_dvp);
2341 		else
2342 			vput(tond.ni_dvp);
2343 	}
2344 	if (tond.ni_vp)
2345 		vput(tond.ni_vp);
2346 
2347 	/*
2348 	 * Clear out fromnd related fields
2349 	 */
2350 	if (fdirp)
2351 		vrele(fdirp);
2352 	if (fromnd.ni_startdir)
2353 		vrele(fromnd.ni_startdir);
2354 	NDFREE(&fromnd, NDF_ONLY_PNBUF);
2355 	if (fromnd.ni_dvp)
2356 		vrele(fromnd.ni_dvp);
2357 	if (fromnd.ni_vp)
2358 		vrele(fromnd.ni_vp);
2359 
2360 	return (error);
2361 }
2362 
2363 /*
2364  * nfs link service
2365  */
2366 int
2367 nfsrv_link(nfsd, slp, td, mrq)
2368 	struct nfsrv_descript *nfsd;
2369 	struct nfssvc_sock *slp;
2370 	struct thread *td;
2371 	struct mbuf **mrq;
2372 {
2373 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2374 	struct sockaddr *nam = nfsd->nd_nam;
2375 	caddr_t dpos = nfsd->nd_dpos;
2376 	struct ucred *cred = &nfsd->nd_cr;
2377 	struct nameidata nd;
2378 	u_int32_t *tl;
2379 	int32_t t1;
2380 	caddr_t bpos;
2381 	int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1;
2382 	int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2383 	char *cp2;
2384 	struct mbuf *mb, *mreq;
2385 	struct vnode *vp = NULL, *xp, *dirp = (struct vnode *)0;
2386 	struct vattr dirfor, diraft, at;
2387 	nfsfh_t nfh, dnfh;
2388 	fhandle_t *fhp, *dfhp;
2389 	u_quad_t frev;
2390 
2391 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2392 	ndclear(&nd);
2393 
2394 	fhp = &nfh.fh_generic;
2395 	dfhp = &dnfh.fh_generic;
2396 	nfsm_srvmtofh(fhp);
2397 	nfsm_srvmtofh(dfhp);
2398 	nfsm_srvnamesiz(len);
2399 
2400 	error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam,
2401 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2402 	if (error) {
2403 		nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2404 		nfsm_srvpostop_attr(getret, &at);
2405 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2406 		vp = NULL;
2407 		error = 0;
2408 		goto nfsmout;
2409 	}
2410 	if (vp->v_type == VDIR) {
2411 		error = EPERM;		/* POSIX */
2412 		goto out1;
2413 	}
2414 	nd.ni_cnd.cn_cred = cred;
2415 	nd.ni_cnd.cn_nameiop = CREATE;
2416 	nd.ni_cnd.cn_flags = LOCKPARENT;
2417 	error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
2418 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2419 	if (dirp) {
2420 		if (v3) {
2421 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
2422 		} else {
2423 			vrele(dirp);
2424 			dirp = NULL;
2425 		}
2426 	}
2427 	if (error)
2428 		goto out1;
2429 
2430 	xp = nd.ni_vp;
2431 	if (xp != NULL) {
2432 		error = EEXIST;
2433 		goto out;
2434 	}
2435 	xp = nd.ni_dvp;
2436 	if (vp->v_mount != xp->v_mount)
2437 		error = EXDEV;
2438 out:
2439 	if (!error) {
2440 		nqsrv_getl(vp, ND_WRITE);
2441 		nqsrv_getl(xp, ND_WRITE);
2442 		error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2443 		NDFREE(&nd, NDF_ONLY_PNBUF);
2444 	}
2445 	/* fall through */
2446 
2447 out1:
2448 	if (v3)
2449 		getret = VOP_GETATTR(vp, &at, td);
2450 	if (dirp)
2451 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2452 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2453 	if (v3) {
2454 		nfsm_srvpostop_attr(getret, &at);
2455 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2456 		error = 0;
2457 	}
2458 	/* fall through */
2459 
2460 nfsmout:
2461 	NDFREE(&nd, NDF_ONLY_PNBUF);
2462 	if (dirp)
2463 		vrele(dirp);
2464 	if (vp)
2465 		vrele(vp);
2466 	if (nd.ni_dvp) {
2467 		if (nd.ni_dvp == nd.ni_vp)
2468 			vrele(nd.ni_dvp);
2469 		else
2470 			vput(nd.ni_dvp);
2471 	}
2472 	if (nd.ni_vp)
2473 		vrele(nd.ni_vp);
2474 	return(error);
2475 }
2476 
2477 /*
2478  * nfs symbolic link service
2479  */
2480 int
2481 nfsrv_symlink(nfsd, slp, td, mrq)
2482 	struct nfsrv_descript *nfsd;
2483 	struct nfssvc_sock *slp;
2484 	struct thread *td;
2485 	struct mbuf **mrq;
2486 {
2487 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2488 	struct sockaddr *nam = nfsd->nd_nam;
2489 	caddr_t dpos = nfsd->nd_dpos;
2490 	struct ucred *cred = &nfsd->nd_cr;
2491 	struct vattr va, dirfor, diraft;
2492 	struct nameidata nd;
2493 	struct vattr *vap = &va;
2494 	u_int32_t *tl;
2495 	int32_t t1;
2496 	struct nfsv2_sattr *sp;
2497 	char *bpos, *pathcp = (char *)0, *cp2;
2498 	struct uio io;
2499 	struct iovec iv;
2500 	int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1;
2501 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2502 	struct mbuf *mb, *mreq, *mb2;
2503 	struct vnode *dirp = (struct vnode *)0;
2504 	nfsfh_t nfh;
2505 	fhandle_t *fhp;
2506 	u_quad_t frev;
2507 
2508 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2509 	ndclear(&nd);
2510 
2511 	fhp = &nfh.fh_generic;
2512 	nfsm_srvmtofh(fhp);
2513 	nfsm_srvnamesiz(len);
2514 	nd.ni_cnd.cn_cred = cred;
2515 	nd.ni_cnd.cn_nameiop = CREATE;
2516 	nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART;
2517 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2518 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2519 	if (dirp) {
2520 		if (v3) {
2521 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
2522 		} else {
2523 			vrele(dirp);
2524 			dirp = NULL;
2525 		}
2526 	}
2527 	if (error)
2528 		goto out;
2529 
2530 	VATTR_NULL(vap);
2531 	if (v3)
2532 		nfsm_srvsattr(vap);
2533 	nfsm_strsiz(len2, NFS_MAXPATHLEN);
2534 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2535 	iv.iov_base = pathcp;
2536 	iv.iov_len = len2;
2537 	io.uio_resid = len2;
2538 	io.uio_offset = 0;
2539 	io.uio_iov = &iv;
2540 	io.uio_iovcnt = 1;
2541 	io.uio_segflg = UIO_SYSSPACE;
2542 	io.uio_rw = UIO_READ;
2543 	io.uio_td = NULL;
2544 	nfsm_mtouio(&io, len2);
2545 	if (!v3) {
2546 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2547 		vap->va_mode = nfstov_mode(sp->sa_mode);
2548 	}
2549 	*(pathcp + len2) = '\0';
2550 	if (nd.ni_vp) {
2551 		error = EEXIST;
2552 		goto out;
2553 	}
2554 
2555 	/*
2556 	 * issue symlink op.  SAVESTART is set so the underlying path component
2557 	 * is only freed by the VOP if an error occurs.
2558 	 */
2559 	if (vap->va_mode == (mode_t)VNOVAL)
2560 		vap->va_mode = 0;
2561 	nqsrv_getl(nd.ni_dvp, ND_WRITE);
2562 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
2563 	if (error)
2564 		NDFREE(&nd, NDF_ONLY_PNBUF);
2565 	else
2566 		vput(nd.ni_vp);
2567 	nd.ni_vp = NULL;
2568 	/*
2569 	 * releases directory prior to potential lookup op.
2570 	 */
2571 	vput(nd.ni_dvp);
2572 	nd.ni_dvp = NULL;
2573 
2574 	if (error == 0) {
2575 	    if (v3) {
2576 		/*
2577 		 * Issue lookup.  Leave SAVESTART set so we can easily free
2578 		 * the name buffer later on.
2579 		 *
2580 		 * since LOCKPARENT is not set, ni_dvp will be garbage on
2581 		 * return whether an error occurs or not.
2582 		 */
2583 		nd.ni_cnd.cn_nameiop = LOOKUP;
2584 		nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
2585 		nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
2586 		nd.ni_cnd.cn_td = td;
2587 		nd.ni_cnd.cn_cred = cred;
2588 
2589 		error = lookup(&nd);
2590 		nd.ni_dvp = NULL;
2591 
2592 		if (error == 0) {
2593 			bzero((caddr_t)fhp, sizeof(nfh));
2594 			fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2595 			error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2596 			if (!error)
2597 				error = VOP_GETATTR(nd.ni_vp, vap, td);
2598 			vput(nd.ni_vp);
2599 			nd.ni_vp = NULL;
2600 		}
2601 	    }
2602 	}
2603 out:
2604 	/*
2605 	 * These releases aren't strictly required, does even doing them
2606 	 * make any sense? XXX can nfsm_reply() block?
2607 	 */
2608 	if (pathcp) {
2609 		FREE(pathcp, M_TEMP);
2610 		pathcp = NULL;
2611 	}
2612 	if (dirp) {
2613 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2614 		vrele(dirp);
2615 		dirp = NULL;
2616 	}
2617 	if (nd.ni_startdir) {
2618 		vrele(nd.ni_startdir);
2619 		nd.ni_startdir = NULL;
2620 	}
2621 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2622 	if (v3) {
2623 		if (!error) {
2624 			nfsm_srvpostop_fh(fhp);
2625 			nfsm_srvpostop_attr(0, vap);
2626 		}
2627 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2628 	}
2629 	error = 0;
2630 	/* fall through */
2631 
2632 nfsmout:
2633 	NDFREE(&nd, NDF_ONLY_PNBUF);
2634 	if (nd.ni_dvp) {
2635 		if (nd.ni_dvp == nd.ni_vp)
2636 			vrele(nd.ni_dvp);
2637 		else
2638 			vput(nd.ni_dvp);
2639 	}
2640 	if (nd.ni_vp)
2641 		vrele(nd.ni_vp);
2642 	if (nd.ni_startdir)
2643 		vrele(nd.ni_startdir);
2644 	if (dirp)
2645 		vrele(dirp);
2646 	if (pathcp)
2647 		FREE(pathcp, M_TEMP);
2648 
2649 	return (error);
2650 }
2651 
2652 /*
2653  * nfs mkdir service
2654  */
2655 int
2656 nfsrv_mkdir(nfsd, slp, td, mrq)
2657 	struct nfsrv_descript *nfsd;
2658 	struct nfssvc_sock *slp;
2659 	struct thread *td;
2660 	struct mbuf **mrq;
2661 {
2662 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2663 	struct sockaddr *nam = nfsd->nd_nam;
2664 	caddr_t dpos = nfsd->nd_dpos;
2665 	struct ucred *cred = &nfsd->nd_cr;
2666 	struct vattr va, dirfor, diraft;
2667 	struct vattr *vap = &va;
2668 	struct nfs_fattr *fp;
2669 	struct nameidata nd;
2670 	caddr_t cp;
2671 	u_int32_t *tl;
2672 	int32_t t1;
2673 	caddr_t bpos;
2674 	int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
2675 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2676 	char *cp2;
2677 	struct mbuf *mb, *mb2, *mreq;
2678 	struct vnode *dirp = NULL;
2679 	int vpexcl = 0;
2680 	nfsfh_t nfh;
2681 	fhandle_t *fhp;
2682 	u_quad_t frev;
2683 
2684 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2685 	ndclear(&nd);
2686 
2687 	fhp = &nfh.fh_generic;
2688 	nfsm_srvmtofh(fhp);
2689 	nfsm_srvnamesiz(len);
2690 	nd.ni_cnd.cn_cred = cred;
2691 	nd.ni_cnd.cn_nameiop = CREATE;
2692 	nd.ni_cnd.cn_flags = LOCKPARENT;
2693 
2694 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2695 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2696 	if (dirp) {
2697 		if (v3) {
2698 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
2699 		} else {
2700 			vrele(dirp);
2701 			dirp = NULL;
2702 		}
2703 	}
2704 	if (error) {
2705 		nfsm_reply(NFSX_WCCDATA(v3));
2706 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2707 		error = 0;
2708 		goto nfsmout;
2709 	}
2710 	VATTR_NULL(vap);
2711 	if (v3) {
2712 		nfsm_srvsattr(vap);
2713 	} else {
2714 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2715 		vap->va_mode = nfstov_mode(*tl++);
2716 	}
2717 
2718 	/*
2719 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2720 	 * nd.ni_vp, if it exists, is referenced but not locked.
2721 	 */
2722 
2723 	vap->va_type = VDIR;
2724 	if (nd.ni_vp != NULL) {
2725 		NDFREE(&nd, NDF_ONLY_PNBUF);
2726 		error = EEXIST;
2727 		goto out;
2728 	}
2729 
2730 	/*
2731 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2732 	 * component is freed by the VOP call.  This will fill-in
2733 	 * nd.ni_vp, reference, and exclusively lock it.
2734 	 */
2735 	if (vap->va_mode == (mode_t)VNOVAL)
2736 		vap->va_mode = 0;
2737 	nqsrv_getl(nd.ni_dvp, ND_WRITE);
2738 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2739 	NDFREE(&nd, NDF_ONLY_PNBUF);
2740 	vpexcl = 1;
2741 
2742 	vput(nd.ni_dvp);
2743 	nd.ni_dvp = NULL;
2744 
2745 	if (!error) {
2746 		bzero((caddr_t)fhp, sizeof(nfh));
2747 		fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2748 		error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2749 		if (!error)
2750 			error = VOP_GETATTR(nd.ni_vp, vap, td);
2751 	}
2752 out:
2753 	if (dirp)
2754 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2755 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2756 	if (v3) {
2757 		if (!error) {
2758 			nfsm_srvpostop_fh(fhp);
2759 			nfsm_srvpostop_attr(0, vap);
2760 		}
2761 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2762 	} else {
2763 		nfsm_srvfhtom(fhp, v3);
2764 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
2765 		nfsm_srvfillattr(vap, fp);
2766 	}
2767 	error = 0;
2768 	/* fall through */
2769 
2770 nfsmout:
2771 	if (dirp)
2772 		vrele(dirp);
2773 	if (nd.ni_dvp) {
2774 		NDFREE(&nd, NDF_ONLY_PNBUF);
2775 		if (nd.ni_dvp == nd.ni_vp && vpexcl)
2776 			vrele(nd.ni_dvp);
2777 		else
2778 			vput(nd.ni_dvp);
2779 	}
2780 	if (nd.ni_vp) {
2781 		if (vpexcl)
2782 			vput(nd.ni_vp);
2783 		else
2784 			vrele(nd.ni_vp);
2785 	}
2786 	return (error);
2787 }
2788 
2789 /*
2790  * nfs rmdir service
2791  */
2792 int
2793 nfsrv_rmdir(nfsd, slp, td, mrq)
2794 	struct nfsrv_descript *nfsd;
2795 	struct nfssvc_sock *slp;
2796 	struct thread *td;
2797 	struct mbuf **mrq;
2798 {
2799 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2800 	struct sockaddr *nam = nfsd->nd_nam;
2801 	caddr_t dpos = nfsd->nd_dpos;
2802 	struct ucred *cred = &nfsd->nd_cr;
2803 	u_int32_t *tl;
2804 	int32_t t1;
2805 	caddr_t bpos;
2806 	int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1;
2807 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2808 	char *cp2;
2809 	struct mbuf *mb, *mreq;
2810 	struct vnode *vp, *dirp = (struct vnode *)0;
2811 	struct vattr dirfor, diraft;
2812 	nfsfh_t nfh;
2813 	fhandle_t *fhp;
2814 	struct nameidata nd;
2815 	u_quad_t frev;
2816 
2817 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2818 	ndclear(&nd);
2819 
2820 	fhp = &nfh.fh_generic;
2821 	nfsm_srvmtofh(fhp);
2822 	nfsm_srvnamesiz(len);
2823 	nd.ni_cnd.cn_cred = cred;
2824 	nd.ni_cnd.cn_nameiop = DELETE;
2825 	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2826 	error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2827 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2828 	if (dirp) {
2829 		if (v3) {
2830 			dirfor_ret = VOP_GETATTR(dirp, &dirfor, td);
2831 		} else {
2832 			vrele(dirp);
2833 			dirp = NULL;
2834 		}
2835 	}
2836 	if (error) {
2837 		nfsm_reply(NFSX_WCCDATA(v3));
2838 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2839 		error = 0;
2840 		goto nfsmout;
2841 	}
2842 	vp = nd.ni_vp;
2843 	if (vp->v_type != VDIR) {
2844 		error = ENOTDIR;
2845 		goto out;
2846 	}
2847 	/*
2848 	 * No rmdir "." please.
2849 	 */
2850 	if (nd.ni_dvp == vp) {
2851 		error = EINVAL;
2852 		goto out;
2853 	}
2854 	/*
2855 	 * The root of a mounted filesystem cannot be deleted.
2856 	 */
2857 	if (vp->v_flag & VROOT)
2858 		error = EBUSY;
2859 out:
2860 	/*
2861 	 * Issue or abort op.  Since SAVESTART is not set, path name
2862 	 * component is freed by the VOP after either.
2863 	 */
2864 	if (!error) {
2865 		nqsrv_getl(nd.ni_dvp, ND_WRITE);
2866 		nqsrv_getl(vp, ND_WRITE);
2867 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2868 	}
2869 	NDFREE(&nd, NDF_ONLY_PNBUF);
2870 
2871 	if (dirp)
2872 		diraft_ret = VOP_GETATTR(dirp, &diraft, td);
2873 	nfsm_reply(NFSX_WCCDATA(v3));
2874 	if (v3) {
2875 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2876 		error = 0;
2877 	}
2878 	/* fall through */
2879 
2880 nfsmout:
2881 	NDFREE(&nd, NDF_ONLY_PNBUF);
2882 	if (dirp)
2883 		vrele(dirp);
2884 	if (nd.ni_dvp) {
2885 		if (nd.ni_dvp == nd.ni_vp)
2886 			vrele(nd.ni_dvp);
2887 		else
2888 			vput(nd.ni_dvp);
2889 	}
2890 	if (nd.ni_vp)
2891 		vput(nd.ni_vp);
2892 
2893 	return(error);
2894 }
2895 
2896 /*
2897  * nfs readdir service
2898  * - mallocs what it thinks is enough to read
2899  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2900  * - calls VOP_READDIR()
2901  * - loops around building the reply
2902  *	if the output generated exceeds count break out of loop
2903  *	The nfsm_clget macro is used here so that the reply will be packed
2904  *	tightly in mbuf clusters.
2905  * - it only knows that it has encountered eof when the VOP_READDIR()
2906  *	reads nothing
2907  * - as such one readdir rpc will return eof false although you are there
2908  *	and then the next will return eof
2909  * - it trims out records with d_fileno == 0
2910  *	this doesn't matter for Unix clients, but they might confuse clients
2911  *	for other os'.
2912  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2913  *	than requested, but this may not apply to all filesystems. For
2914  *	example, client NFS does not { although it is never remote mounted
2915  *	anyhow }
2916  *     The alternate call nfsrv_readdirplus() does lookups as well.
2917  * PS: The NFS protocol spec. does not clarify what the "count" byte
2918  *	argument is a count of.. just name strings and file id's or the
2919  *	entire reply rpc or ...
2920  *	I tried just file name and id sizes and it confused the Sun client,
2921  *	so I am using the full rpc size now. The "paranoia.." comment refers
2922  *	to including the status longwords that are not a part of the dir.
2923  *	"entry" structures, but are in the rpc.
2924  */
2925 struct flrep {
2926 	nfsuint64	fl_off;
2927 	u_int32_t	fl_postopok;
2928 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2929 	u_int32_t	fl_fhok;
2930 	u_int32_t	fl_fhsize;
2931 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2932 };
2933 
2934 int
2935 nfsrv_readdir(nfsd, slp, td, mrq)
2936 	struct nfsrv_descript *nfsd;
2937 	struct nfssvc_sock *slp;
2938 	struct thread *td;
2939 	struct mbuf **mrq;
2940 {
2941 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2942 	struct sockaddr *nam = nfsd->nd_nam;
2943 	caddr_t dpos = nfsd->nd_dpos;
2944 	struct ucred *cred = &nfsd->nd_cr;
2945 	char *bp, *be;
2946 	struct mbuf *mp;
2947 	struct dirent *dp;
2948 	caddr_t cp;
2949 	u_int32_t *tl;
2950 	int32_t t1;
2951 	caddr_t bpos;
2952 	struct mbuf *mb, *mb2, *mreq, *mp2;
2953 	char *cpos, *cend, *cp2, *rbuf;
2954 	struct vnode *vp = NULL;
2955 	struct vattr at;
2956 	nfsfh_t nfh;
2957 	fhandle_t *fhp;
2958 	struct uio io;
2959 	struct iovec iv;
2960 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2961 	int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies;
2962 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2963 	u_quad_t frev, off, toff, verf;
2964 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
2965 
2966 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2967 	fhp = &nfh.fh_generic;
2968 	nfsm_srvmtofh(fhp);
2969 	if (v3) {
2970 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2971 		toff = fxdr_hyper(tl);
2972 		tl += 2;
2973 		verf = fxdr_hyper(tl);
2974 		tl += 2;
2975 	} else {
2976 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2977 		toff = fxdr_unsigned(u_quad_t, *tl++);
2978 		verf = 0;	/* shut up gcc */
2979 	}
2980 	off = toff;
2981 	cnt = fxdr_unsigned(int, *tl);
2982 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2983 	xfer = NFS_SRVMAXDATA(nfsd);
2984 	if (cnt > xfer)
2985 		cnt = xfer;
2986 	if (siz > xfer)
2987 		siz = xfer;
2988 	fullsiz = siz;
2989 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
2990 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2991 	if (!error && vp->v_type != VDIR) {
2992 		error = ENOTDIR;
2993 		vput(vp);
2994 		vp = NULL;
2995 	}
2996 	if (error) {
2997 		nfsm_reply(NFSX_UNSIGNED);
2998 		nfsm_srvpostop_attr(getret, &at);
2999 		error = 0;
3000 		goto nfsmout;
3001 	}
3002 
3003 	/*
3004 	 * Obtain lock on vnode for this section of the code
3005 	 */
3006 
3007 	nqsrv_getl(vp, ND_READ);
3008 	if (v3) {
3009 		error = getret = VOP_GETATTR(vp, &at, td);
3010 #if 0
3011 		/*
3012 		 * XXX This check may be too strict for Solaris 2.5 clients.
3013 		 */
3014 		if (!error && toff && verf && verf != at.va_filerev)
3015 			error = NFSERR_BAD_COOKIE;
3016 #endif
3017 	}
3018 	if (!error)
3019 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3020 	if (error) {
3021 		vput(vp);
3022 		vp = NULL;
3023 		nfsm_reply(NFSX_POSTOPATTR(v3));
3024 		nfsm_srvpostop_attr(getret, &at);
3025 		error = 0;
3026 		goto nfsmout;
3027 	}
3028 	VOP_UNLOCK(vp, 0, td);
3029 
3030 	/*
3031 	 * end section.  Allocate rbuf and continue
3032 	 */
3033 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3034 again:
3035 	iv.iov_base = rbuf;
3036 	iv.iov_len = fullsiz;
3037 	io.uio_iov = &iv;
3038 	io.uio_iovcnt = 1;
3039 	io.uio_offset = (off_t)off;
3040 	io.uio_resid = fullsiz;
3041 	io.uio_segflg = UIO_SYSSPACE;
3042 	io.uio_rw = UIO_READ;
3043 	io.uio_td = NULL;
3044 	eofflag = 0;
3045 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3046 	if (cookies) {
3047 		free((caddr_t)cookies, M_TEMP);
3048 		cookies = NULL;
3049 	}
3050 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3051 	off = (off_t)io.uio_offset;
3052 	if (!cookies && !error)
3053 		error = NFSERR_PERM;
3054 	if (v3) {
3055 		getret = VOP_GETATTR(vp, &at, td);
3056 		if (!error)
3057 			error = getret;
3058 	}
3059 	VOP_UNLOCK(vp, 0, td);
3060 	if (error) {
3061 		vrele(vp);
3062 		vp = NULL;
3063 		free((caddr_t)rbuf, M_TEMP);
3064 		if (cookies)
3065 			free((caddr_t)cookies, M_TEMP);
3066 		nfsm_reply(NFSX_POSTOPATTR(v3));
3067 		nfsm_srvpostop_attr(getret, &at);
3068 		error = 0;
3069 		goto nfsmout;
3070 	}
3071 	if (io.uio_resid) {
3072 		siz -= io.uio_resid;
3073 
3074 		/*
3075 		 * If nothing read, return eof
3076 		 * rpc reply
3077 		 */
3078 		if (siz == 0) {
3079 			vrele(vp);
3080 			vp = NULL;
3081 			nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
3082 				2 * NFSX_UNSIGNED);
3083 			if (v3) {
3084 				nfsm_srvpostop_attr(getret, &at);
3085 				nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3086 				txdr_hyper(at.va_filerev, tl);
3087 				tl += 2;
3088 			} else
3089 				nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3090 			*tl++ = nfs_false;
3091 			*tl = nfs_true;
3092 			FREE((caddr_t)rbuf, M_TEMP);
3093 			FREE((caddr_t)cookies, M_TEMP);
3094 			error = 0;
3095 			goto nfsmout;
3096 		}
3097 	}
3098 
3099 	/*
3100 	 * Check for degenerate cases of nothing useful read.
3101 	 * If so go try again
3102 	 */
3103 	cpos = rbuf;
3104 	cend = rbuf + siz;
3105 	dp = (struct dirent *)cpos;
3106 	cookiep = cookies;
3107 	/*
3108 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3109 	 * directory offset up to a block boundary, so it is necessary to
3110 	 * skip over the records that preceed the requested offset. This
3111 	 * requires the assumption that file offset cookies monotonically
3112 	 * increase.
3113 	 */
3114 	while (cpos < cend && ncookies > 0 &&
3115 		(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3116 		 ((u_quad_t)(*cookiep)) <= toff)) {
3117 		cpos += dp->d_reclen;
3118 		dp = (struct dirent *)cpos;
3119 		cookiep++;
3120 		ncookies--;
3121 	}
3122 	if (cpos >= cend || ncookies == 0) {
3123 		toff = off;
3124 		siz = fullsiz;
3125 		goto again;
3126 	}
3127 
3128 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3129 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
3130 	if (v3) {
3131 		nfsm_srvpostop_attr(getret, &at);
3132 		nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3133 		txdr_hyper(at.va_filerev, tl);
3134 	}
3135 	mp = mp2 = mb;
3136 	bp = bpos;
3137 	be = bp + M_TRAILINGSPACE(mp);
3138 
3139 	/* Loop through the records and build reply */
3140 	while (cpos < cend && ncookies > 0) {
3141 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3142 			nlen = dp->d_namlen;
3143 			rem = nfsm_rndup(nlen) - nlen;
3144 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3145 			if (v3)
3146 				len += 2 * NFSX_UNSIGNED;
3147 			if (len > cnt) {
3148 				eofflag = 0;
3149 				break;
3150 			}
3151 			/*
3152 			 * Build the directory record xdr from
3153 			 * the dirent entry.
3154 			 */
3155 			nfsm_clget;
3156 			*tl = nfs_true;
3157 			bp += NFSX_UNSIGNED;
3158 			if (v3) {
3159 				nfsm_clget;
3160 				*tl = 0;
3161 				bp += NFSX_UNSIGNED;
3162 			}
3163 			nfsm_clget;
3164 			*tl = txdr_unsigned(dp->d_fileno);
3165 			bp += NFSX_UNSIGNED;
3166 			nfsm_clget;
3167 			*tl = txdr_unsigned(nlen);
3168 			bp += NFSX_UNSIGNED;
3169 
3170 			/* And loop around copying the name */
3171 			xfer = nlen;
3172 			cp = dp->d_name;
3173 			while (xfer > 0) {
3174 				nfsm_clget;
3175 				if ((bp+xfer) > be)
3176 					tsiz = be-bp;
3177 				else
3178 					tsiz = xfer;
3179 				bcopy(cp, bp, tsiz);
3180 				bp += tsiz;
3181 				xfer -= tsiz;
3182 				if (xfer > 0)
3183 					cp += tsiz;
3184 			}
3185 			/* And null pad to a int32_t boundary */
3186 			for (i = 0; i < rem; i++)
3187 				*bp++ = '\0';
3188 			nfsm_clget;
3189 
3190 			/* Finish off the record */
3191 			if (v3) {
3192 				*tl = 0;
3193 				bp += NFSX_UNSIGNED;
3194 				nfsm_clget;
3195 			}
3196 			*tl = txdr_unsigned(*cookiep);
3197 			bp += NFSX_UNSIGNED;
3198 		}
3199 		cpos += dp->d_reclen;
3200 		dp = (struct dirent *)cpos;
3201 		cookiep++;
3202 		ncookies--;
3203 	}
3204 	vrele(vp);
3205 	vp = NULL;
3206 	nfsm_clget;
3207 	*tl = nfs_false;
3208 	bp += NFSX_UNSIGNED;
3209 	nfsm_clget;
3210 	if (eofflag)
3211 		*tl = nfs_true;
3212 	else
3213 		*tl = nfs_false;
3214 	bp += NFSX_UNSIGNED;
3215 	if (mp != mb) {
3216 		if (bp < be)
3217 			mp->m_len = bp - mtod(mp, caddr_t);
3218 	} else
3219 		mp->m_len += bp - bpos;
3220 	FREE((caddr_t)rbuf, M_TEMP);
3221 	FREE((caddr_t)cookies, M_TEMP);
3222 
3223 nfsmout:
3224 	if (vp)
3225 		vrele(vp);
3226 	return(error);
3227 }
3228 
3229 int
3230 nfsrv_readdirplus(nfsd, slp, td, mrq)
3231 	struct nfsrv_descript *nfsd;
3232 	struct nfssvc_sock *slp;
3233 	struct thread *td;
3234 	struct mbuf **mrq;
3235 {
3236 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3237 	struct sockaddr *nam = nfsd->nd_nam;
3238 	caddr_t dpos = nfsd->nd_dpos;
3239 	struct ucred *cred = &nfsd->nd_cr;
3240 	char *bp, *be;
3241 	struct mbuf *mp;
3242 	struct dirent *dp;
3243 	caddr_t cp;
3244 	u_int32_t *tl;
3245 	int32_t t1;
3246 	caddr_t bpos;
3247 	struct mbuf *mb, *mb2, *mreq, *mp2;
3248 	char *cpos, *cend, *cp2, *rbuf;
3249 	struct vnode *vp = NULL, *nvp;
3250 	struct flrep fl;
3251 	nfsfh_t nfh;
3252 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3253 	struct uio io;
3254 	struct iovec iv;
3255 	struct vattr va, at, *vap = &va;
3256 	struct nfs_fattr *fp;
3257 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3258 	int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies;
3259 	u_quad_t frev, off, toff, verf;
3260 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3261 
3262 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3263 	fhp = &nfh.fh_generic;
3264 	nfsm_srvmtofh(fhp);
3265 	nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3266 	toff = fxdr_hyper(tl);
3267 	tl += 2;
3268 	verf = fxdr_hyper(tl);
3269 	tl += 2;
3270 	siz = fxdr_unsigned(int, *tl++);
3271 	cnt = fxdr_unsigned(int, *tl);
3272 	off = toff;
3273 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3274 	xfer = NFS_SRVMAXDATA(nfsd);
3275 	if (cnt > xfer)
3276 		cnt = xfer;
3277 	if (siz > xfer)
3278 		siz = xfer;
3279 	fullsiz = siz;
3280 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3281 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3282 	if (!error && vp->v_type != VDIR) {
3283 		error = ENOTDIR;
3284 		vput(vp);
3285 		vp = NULL;
3286 	}
3287 	if (error) {
3288 		nfsm_reply(NFSX_UNSIGNED);
3289 		nfsm_srvpostop_attr(getret, &at);
3290 		error = 0;
3291 		goto nfsmout;
3292 	}
3293 	error = getret = VOP_GETATTR(vp, &at, td);
3294 #if 0
3295 	/*
3296 	 * XXX This check may be too strict for Solaris 2.5 clients.
3297 	 */
3298 	if (!error && toff && verf && verf != at.va_filerev)
3299 		error = NFSERR_BAD_COOKIE;
3300 #endif
3301 	if (!error) {
3302 		nqsrv_getl(vp, ND_READ);
3303 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3304 	}
3305 	if (error) {
3306 		vput(vp);
3307 		vp = NULL;
3308 		nfsm_reply(NFSX_V3POSTOPATTR);
3309 		nfsm_srvpostop_attr(getret, &at);
3310 		error = 0;
3311 		goto nfsmout;
3312 	}
3313 	VOP_UNLOCK(vp, 0, td);
3314 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3315 again:
3316 	iv.iov_base = rbuf;
3317 	iv.iov_len = fullsiz;
3318 	io.uio_iov = &iv;
3319 	io.uio_iovcnt = 1;
3320 	io.uio_offset = (off_t)off;
3321 	io.uio_resid = fullsiz;
3322 	io.uio_segflg = UIO_SYSSPACE;
3323 	io.uio_rw = UIO_READ;
3324 	io.uio_td = NULL;
3325 	eofflag = 0;
3326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3327 	if (cookies) {
3328 		free((caddr_t)cookies, M_TEMP);
3329 		cookies = NULL;
3330 	}
3331 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3332 	off = (u_quad_t)io.uio_offset;
3333 	getret = VOP_GETATTR(vp, &at, td);
3334 	VOP_UNLOCK(vp, 0, td);
3335 	if (!cookies && !error)
3336 		error = NFSERR_PERM;
3337 	if (!error)
3338 		error = getret;
3339 	if (error) {
3340 		vrele(vp);
3341 		vp = NULL;
3342 		if (cookies)
3343 			free((caddr_t)cookies, M_TEMP);
3344 		free((caddr_t)rbuf, M_TEMP);
3345 		nfsm_reply(NFSX_V3POSTOPATTR);
3346 		nfsm_srvpostop_attr(getret, &at);
3347 		error = 0;
3348 		goto nfsmout;
3349 	}
3350 	if (io.uio_resid) {
3351 		siz -= io.uio_resid;
3352 
3353 		/*
3354 		 * If nothing read, return eof
3355 		 * rpc reply
3356 		 */
3357 		if (siz == 0) {
3358 			vrele(vp);
3359 			vp = NULL;
3360 			nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3361 				2 * NFSX_UNSIGNED);
3362 			nfsm_srvpostop_attr(getret, &at);
3363 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3364 			txdr_hyper(at.va_filerev, tl);
3365 			tl += 2;
3366 			*tl++ = nfs_false;
3367 			*tl = nfs_true;
3368 			FREE((caddr_t)cookies, M_TEMP);
3369 			FREE((caddr_t)rbuf, M_TEMP);
3370 			error = 0;
3371 			goto nfsmout;
3372 		}
3373 	}
3374 
3375 	/*
3376 	 * Check for degenerate cases of nothing useful read.
3377 	 * If so go try again
3378 	 */
3379 	cpos = rbuf;
3380 	cend = rbuf + siz;
3381 	dp = (struct dirent *)cpos;
3382 	cookiep = cookies;
3383 	/*
3384 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3385 	 * directory offset up to a block boundary, so it is necessary to
3386 	 * skip over the records that preceed the requested offset. This
3387 	 * requires the assumption that file offset cookies monotonically
3388 	 * increase.
3389 	 */
3390 	while (cpos < cend && ncookies > 0 &&
3391 		(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3392 		 ((u_quad_t)(*cookiep)) <= toff)) {
3393 		cpos += dp->d_reclen;
3394 		dp = (struct dirent *)cpos;
3395 		cookiep++;
3396 		ncookies--;
3397 	}
3398 	if (cpos >= cend || ncookies == 0) {
3399 		toff = off;
3400 		siz = fullsiz;
3401 		goto again;
3402 	}
3403 
3404 	/*
3405 	 * Probe one of the directory entries to see if the filesystem
3406 	 * supports VGET.
3407 	 */
3408 	if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) {
3409 		error = NFSERR_NOTSUPP;
3410 		vrele(vp);
3411 		vp = NULL;
3412 		free((caddr_t)cookies, M_TEMP);
3413 		free((caddr_t)rbuf, M_TEMP);
3414 		nfsm_reply(NFSX_V3POSTOPATTR);
3415 		nfsm_srvpostop_attr(getret, &at);
3416 		error = 0;
3417 		goto nfsmout;
3418 	}
3419 	vput(nvp);
3420 	nvp = NULL;
3421 
3422 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
3423 	nfsm_reply(cnt);
3424 	nfsm_srvpostop_attr(getret, &at);
3425 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3426 	txdr_hyper(at.va_filerev, tl);
3427 	mp = mp2 = mb;
3428 	bp = bpos;
3429 	be = bp + M_TRAILINGSPACE(mp);
3430 
3431 	/* Loop through the records and build reply */
3432 	while (cpos < cend && ncookies > 0) {
3433 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3434 			nlen = dp->d_namlen;
3435 			rem = nfsm_rndup(nlen)-nlen;
3436 
3437 			/*
3438 			 * For readdir_and_lookup get the vnode using
3439 			 * the file number.
3440 			 */
3441 			if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
3442 				goto invalid;
3443 			bzero((caddr_t)nfhp, NFSX_V3FH);
3444 			nfhp->fh_fsid =
3445 				nvp->v_mount->mnt_stat.f_fsid;
3446 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3447 				vput(nvp);
3448 				nvp = NULL;
3449 				goto invalid;
3450 			}
3451 			if (VOP_GETATTR(nvp, vap, td)) {
3452 				vput(nvp);
3453 				nvp = NULL;
3454 				goto invalid;
3455 			}
3456 			vput(nvp);
3457 			nvp = NULL;
3458 
3459 			/*
3460 			 * If either the dircount or maxcount will be
3461 			 * exceeded, get out now. Both of these lengths
3462 			 * are calculated conservatively, including all
3463 			 * XDR overheads.
3464 			 */
3465 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3466 				NFSX_V3POSTOPATTR);
3467 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3468 			if (len > cnt || dirlen > fullsiz) {
3469 				eofflag = 0;
3470 				break;
3471 			}
3472 
3473 			/*
3474 			 * Build the directory record xdr from
3475 			 * the dirent entry.
3476 			 */
3477 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3478 			nfsm_srvfillattr(vap, fp);
3479 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3480 			fl.fl_fhok = nfs_true;
3481 			fl.fl_postopok = nfs_true;
3482 			fl.fl_off.nfsuquad[0] = 0;
3483 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3484 
3485 			nfsm_clget;
3486 			*tl = nfs_true;
3487 			bp += NFSX_UNSIGNED;
3488 			nfsm_clget;
3489 			*tl = 0;
3490 			bp += NFSX_UNSIGNED;
3491 			nfsm_clget;
3492 			*tl = txdr_unsigned(dp->d_fileno);
3493 			bp += NFSX_UNSIGNED;
3494 			nfsm_clget;
3495 			*tl = txdr_unsigned(nlen);
3496 			bp += NFSX_UNSIGNED;
3497 
3498 			/* And loop around copying the name */
3499 			xfer = nlen;
3500 			cp = dp->d_name;
3501 			while (xfer > 0) {
3502 				nfsm_clget;
3503 				if ((bp + xfer) > be)
3504 					tsiz = be - bp;
3505 				else
3506 					tsiz = xfer;
3507 				bcopy(cp, bp, tsiz);
3508 				bp += tsiz;
3509 				xfer -= tsiz;
3510 				if (xfer > 0)
3511 					cp += tsiz;
3512 			}
3513 			/* And null pad to a int32_t boundary */
3514 			for (i = 0; i < rem; i++)
3515 				*bp++ = '\0';
3516 
3517 			/*
3518 			 * Now copy the flrep structure out.
3519 			 */
3520 			xfer = sizeof (struct flrep);
3521 			cp = (caddr_t)&fl;
3522 			while (xfer > 0) {
3523 				nfsm_clget;
3524 				if ((bp + xfer) > be)
3525 					tsiz = be - bp;
3526 				else
3527 					tsiz = xfer;
3528 				bcopy(cp, bp, tsiz);
3529 				bp += tsiz;
3530 				xfer -= tsiz;
3531 				if (xfer > 0)
3532 					cp += tsiz;
3533 			}
3534 		}
3535 invalid:
3536 		cpos += dp->d_reclen;
3537 		dp = (struct dirent *)cpos;
3538 		cookiep++;
3539 		ncookies--;
3540 	}
3541 	vrele(vp);
3542 	vp = NULL;
3543 	nfsm_clget;
3544 	*tl = nfs_false;
3545 	bp += NFSX_UNSIGNED;
3546 	nfsm_clget;
3547 	if (eofflag)
3548 		*tl = nfs_true;
3549 	else
3550 		*tl = nfs_false;
3551 	bp += NFSX_UNSIGNED;
3552 	if (mp != mb) {
3553 		if (bp < be)
3554 			mp->m_len = bp - mtod(mp, caddr_t);
3555 	} else
3556 		mp->m_len += bp - bpos;
3557 	FREE((caddr_t)cookies, M_TEMP);
3558 	FREE((caddr_t)rbuf, M_TEMP);
3559 nfsmout:
3560 	if (vp)
3561 		vrele(vp);
3562 	return(error);
3563 }
3564 
3565 /*
3566  * nfs commit service
3567  */
3568 int
3569 nfsrv_commit(nfsd, slp, td, mrq)
3570 	struct nfsrv_descript *nfsd;
3571 	struct nfssvc_sock *slp;
3572 	struct thread *td;
3573 	struct mbuf **mrq;
3574 {
3575 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3576 	struct sockaddr *nam = nfsd->nd_nam;
3577 	caddr_t dpos = nfsd->nd_dpos;
3578 	struct ucred *cred = &nfsd->nd_cr;
3579 	struct vattr bfor, aft;
3580 	struct vnode *vp = NULL;
3581 	nfsfh_t nfh;
3582 	fhandle_t *fhp;
3583 	u_int32_t *tl;
3584 	int32_t t1;
3585 	caddr_t bpos;
3586 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache;
3587 	char *cp2;
3588 	struct mbuf *mb, *mb2, *mreq;
3589 	u_quad_t frev, off;
3590 
3591 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3592 #ifndef nolint
3593 	cache = 0;
3594 #endif
3595 	fhp = &nfh.fh_generic;
3596 	nfsm_srvmtofh(fhp);
3597 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3598 
3599 	/*
3600 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3601 	 * count parameters, so these arguments are useless (someday maybe).
3602 	 */
3603 	off = fxdr_hyper(tl);
3604 	tl += 2;
3605 	cnt = fxdr_unsigned(int, *tl);
3606 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3607 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3608 	if (error) {
3609 		nfsm_reply(2 * NFSX_UNSIGNED);
3610 		nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3611 		error = 0;
3612 		goto nfsmout;
3613 	}
3614 	for_ret = VOP_GETATTR(vp, &bfor, td);
3615 
3616 	if (cnt > MAX_COMMIT_COUNT) {
3617 		/*
3618 		 * Give up and do the whole thing
3619 		 */
3620 		if (vp->v_object &&
3621 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3622 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3623 		}
3624 		error = VOP_FSYNC(vp, MNT_WAIT, td);
3625 	} else {
3626 		/*
3627 		 * Locate and synchronously write any buffers that fall
3628 		 * into the requested range.  Note:  we are assuming that
3629 		 * f_iosize is a power of 2.
3630 		 */
3631 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3632 		int iomask = iosize - 1;
3633 		int s;
3634 		daddr_t lblkno;
3635 
3636 		/*
3637 		 * Align to iosize boundry, super-align to page boundry.
3638 		 */
3639 		if (off & iomask) {
3640 			cnt += off & iomask;
3641 			off &= ~(u_quad_t)iomask;
3642 		}
3643 		if (off & PAGE_MASK) {
3644 			cnt += off & PAGE_MASK;
3645 			off &= ~(u_quad_t)PAGE_MASK;
3646 		}
3647 		lblkno = off / iosize;
3648 
3649 		if (vp->v_object &&
3650 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3651 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3652 		}
3653 
3654 		s = splbio();
3655 		while (cnt > 0) {
3656 			struct buf *bp;
3657 
3658 			/*
3659 			 * If we have a buffer and it is marked B_DELWRI we
3660 			 * have to lock and write it.  Otherwise the prior
3661 			 * write is assumed to have already been committed.
3662 			 */
3663 			if ((bp = gbincore(vp, lblkno)) != NULL && (bp->b_flags & B_DELWRI)) {
3664 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
3665 					BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL);
3666 					continue; /* retry */
3667 				}
3668 				bremfree(bp);
3669 				bp->b_flags &= ~B_ASYNC;
3670 				VOP_BWRITE(bp->b_vp, bp);
3671 				++nfs_commit_miss;
3672 			}
3673 			++nfs_commit_blks;
3674 			if (cnt < iosize)
3675 				break;
3676 			cnt -= iosize;
3677 			++lblkno;
3678 		}
3679 		splx(s);
3680 	}
3681 
3682 	aft_ret = VOP_GETATTR(vp, &aft, td);
3683 	vput(vp);
3684 	vp = NULL;
3685 	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3686 	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3687 	if (!error) {
3688 		nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF);
3689 		if (nfsver.tv_sec == 0)
3690 			nfsver = boottime;
3691 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3692 		*tl = txdr_unsigned(nfsver.tv_usec);
3693 	} else {
3694 		error = 0;
3695 	}
3696 nfsmout:
3697 	if (vp)
3698 		vput(vp);
3699 	return(error);
3700 }
3701 
3702 /*
3703  * nfs statfs service
3704  */
3705 int
3706 nfsrv_statfs(nfsd, slp, td, mrq)
3707 	struct nfsrv_descript *nfsd;
3708 	struct nfssvc_sock *slp;
3709 	struct thread *td;
3710 	struct mbuf **mrq;
3711 {
3712 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3713 	struct sockaddr *nam = nfsd->nd_nam;
3714 	caddr_t dpos = nfsd->nd_dpos;
3715 	struct ucred *cred = &nfsd->nd_cr;
3716 	struct statfs *sf;
3717 	struct nfs_statfs *sfp;
3718 	u_int32_t *tl;
3719 	int32_t t1;
3720 	caddr_t bpos;
3721 	int error = 0, rdonly, cache, getret = 1;
3722 	int v3 = (nfsd->nd_flag & ND_NFSV3);
3723 	char *cp2;
3724 	struct mbuf *mb, *mb2, *mreq;
3725 	struct vnode *vp = NULL;
3726 	struct vattr at;
3727 	nfsfh_t nfh;
3728 	fhandle_t *fhp;
3729 	struct statfs statfs;
3730 	u_quad_t frev, tval;
3731 
3732 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3733 #ifndef nolint
3734 	cache = 0;
3735 #endif
3736 	fhp = &nfh.fh_generic;
3737 	nfsm_srvmtofh(fhp);
3738 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3739 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3740 	if (error) {
3741 		nfsm_reply(NFSX_UNSIGNED);
3742 		nfsm_srvpostop_attr(getret, &at);
3743 		error = 0;
3744 		goto nfsmout;
3745 	}
3746 	sf = &statfs;
3747 	error = VFS_STATFS(vp->v_mount, sf, td);
3748 	getret = VOP_GETATTR(vp, &at, td);
3749 	vput(vp);
3750 	vp = NULL;
3751 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3752 	if (v3)
3753 		nfsm_srvpostop_attr(getret, &at);
3754 	if (error) {
3755 		error = 0;
3756 		goto nfsmout;
3757 	}
3758 	nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
3759 	if (v3) {
3760 		tval = (u_quad_t)sf->f_blocks;
3761 		tval *= (u_quad_t)sf->f_bsize;
3762 		txdr_hyper(tval, &sfp->sf_tbytes);
3763 		tval = (u_quad_t)sf->f_bfree;
3764 		tval *= (u_quad_t)sf->f_bsize;
3765 		txdr_hyper(tval, &sfp->sf_fbytes);
3766 		tval = (u_quad_t)sf->f_bavail;
3767 		tval *= (u_quad_t)sf->f_bsize;
3768 		txdr_hyper(tval, &sfp->sf_abytes);
3769 		sfp->sf_tfiles.nfsuquad[0] = 0;
3770 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3771 		sfp->sf_ffiles.nfsuquad[0] = 0;
3772 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3773 		sfp->sf_afiles.nfsuquad[0] = 0;
3774 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3775 		sfp->sf_invarsec = 0;
3776 	} else {
3777 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3778 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3779 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3780 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3781 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3782 	}
3783 nfsmout:
3784 	if (vp)
3785 		vput(vp);
3786 	return(error);
3787 }
3788 
3789 /*
3790  * nfs fsinfo service
3791  */
3792 int
3793 nfsrv_fsinfo(nfsd, slp, td, mrq)
3794 	struct nfsrv_descript *nfsd;
3795 	struct nfssvc_sock *slp;
3796 	struct thread *td;
3797 	struct mbuf **mrq;
3798 {
3799 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3800 	struct sockaddr *nam = nfsd->nd_nam;
3801 	caddr_t dpos = nfsd->nd_dpos;
3802 	struct ucred *cred = &nfsd->nd_cr;
3803 	u_int32_t *tl;
3804 	struct nfsv3_fsinfo *sip;
3805 	int32_t t1;
3806 	caddr_t bpos;
3807 	int error = 0, rdonly, cache, getret = 1, pref;
3808 	char *cp2;
3809 	struct mbuf *mb, *mb2, *mreq;
3810 	struct vnode *vp = NULL;
3811 	struct vattr at;
3812 	nfsfh_t nfh;
3813 	fhandle_t *fhp;
3814 	u_quad_t frev, maxfsize;
3815 	struct statfs sb;
3816 
3817 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3818 #ifndef nolint
3819 	cache = 0;
3820 #endif
3821 	fhp = &nfh.fh_generic;
3822 	nfsm_srvmtofh(fhp);
3823 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3824 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3825 	if (error) {
3826 		nfsm_reply(NFSX_UNSIGNED);
3827 		nfsm_srvpostop_attr(getret, &at);
3828 		error = 0;
3829 		goto nfsmout;
3830 	}
3831 
3832 	/* XXX Try to make a guess on the max file size. */
3833 	VFS_STATFS(vp->v_mount, &sb, td);
3834 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3835 
3836 	getret = VOP_GETATTR(vp, &at, td);
3837 	vput(vp);
3838 	vp = NULL;
3839 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3840 	nfsm_srvpostop_attr(getret, &at);
3841 	nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3842 
3843 	/*
3844 	 * XXX
3845 	 * There should be file system VFS OP(s) to get this information.
3846 	 * For now, assume ufs.
3847 	 */
3848 	if (slp->ns_so->so_type == SOCK_DGRAM)
3849 		pref = NFS_MAXDGRAMDATA;
3850 	else
3851 		pref = NFS_MAXDATA;
3852 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3853 	sip->fs_rtpref = txdr_unsigned(pref);
3854 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3855 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3856 	sip->fs_wtpref = txdr_unsigned(pref);
3857 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3858 	sip->fs_dtpref = txdr_unsigned(pref);
3859 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3860 	sip->fs_timedelta.nfsv3_sec = 0;
3861 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3862 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3863 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3864 		NFSV3FSINFO_CANSETTIME);
3865 nfsmout:
3866 	if (vp)
3867 		vput(vp);
3868 	return(error);
3869 }
3870 
3871 /*
3872  * nfs pathconf service
3873  */
3874 int
3875 nfsrv_pathconf(nfsd, slp, td, mrq)
3876 	struct nfsrv_descript *nfsd;
3877 	struct nfssvc_sock *slp;
3878 	struct thread *td;
3879 	struct mbuf **mrq;
3880 {
3881 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3882 	struct sockaddr *nam = nfsd->nd_nam;
3883 	caddr_t dpos = nfsd->nd_dpos;
3884 	struct ucred *cred = &nfsd->nd_cr;
3885 	u_int32_t *tl;
3886 	struct nfsv3_pathconf *pc;
3887 	int32_t t1;
3888 	caddr_t bpos;
3889 	int error = 0, rdonly, cache, getret = 1;
3890 	register_t linkmax, namemax, chownres, notrunc;
3891 	char *cp2;
3892 	struct mbuf *mb, *mb2, *mreq;
3893 	struct vnode *vp = NULL;
3894 	struct vattr at;
3895 	nfsfh_t nfh;
3896 	fhandle_t *fhp;
3897 	u_quad_t frev;
3898 
3899 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3900 #ifndef nolint
3901 	cache = 0;
3902 #endif
3903 	fhp = &nfh.fh_generic;
3904 	nfsm_srvmtofh(fhp);
3905 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3906 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3907 	if (error) {
3908 		nfsm_reply(NFSX_UNSIGNED);
3909 		nfsm_srvpostop_attr(getret, &at);
3910 		error = 0;
3911 		goto nfsmout;
3912 	}
3913 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3914 	if (!error)
3915 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3916 	if (!error)
3917 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3918 	if (!error)
3919 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3920 	getret = VOP_GETATTR(vp, &at, td);
3921 	vput(vp);
3922 	vp = NULL;
3923 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
3924 	nfsm_srvpostop_attr(getret, &at);
3925 	if (error) {
3926 		error = 0;
3927 		goto nfsmout;
3928 	}
3929 	nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
3930 
3931 	pc->pc_linkmax = txdr_unsigned(linkmax);
3932 	pc->pc_namemax = txdr_unsigned(namemax);
3933 	pc->pc_notrunc = txdr_unsigned(notrunc);
3934 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3935 
3936 	/*
3937 	 * These should probably be supported by VOP_PATHCONF(), but
3938 	 * until msdosfs is exportable (why would you want to?), the
3939 	 * Unix defaults should be ok.
3940 	 */
3941 	pc->pc_caseinsensitive = nfs_false;
3942 	pc->pc_casepreserving = nfs_true;
3943 nfsmout:
3944 	if (vp)
3945 		vput(vp);
3946 	return(error);
3947 }
3948 
3949 /*
3950  * Null operation, used by clients to ping server
3951  */
3952 /* ARGSUSED */
3953 int
3954 nfsrv_null(nfsd, slp, td, mrq)
3955 	struct nfsrv_descript *nfsd;
3956 	struct nfssvc_sock *slp;
3957 	struct thread *td;
3958 	struct mbuf **mrq;
3959 {
3960 	struct mbuf *mrep = nfsd->nd_mrep;
3961 	caddr_t bpos;
3962 	int error = NFSERR_RETVOID, cache;
3963 	struct mbuf *mb, *mreq;
3964 	u_quad_t frev;
3965 
3966 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3967 #ifndef nolint
3968 	cache = 0;
3969 #endif
3970 	nfsm_reply(0);
3971 	nfsm_srvdone;
3972 }
3973 
3974 /*
3975  * No operation, used for obsolete procedures
3976  */
3977 /* ARGSUSED */
3978 int
3979 nfsrv_noop(nfsd, slp, td, mrq)
3980 	struct nfsrv_descript *nfsd;
3981 	struct nfssvc_sock *slp;
3982 	struct thread *td;
3983 	struct mbuf **mrq;
3984 {
3985 	struct mbuf *mrep = nfsd->nd_mrep;
3986 	caddr_t bpos;
3987 	int error, cache;
3988 	struct mbuf *mb, *mreq;
3989 	u_quad_t frev;
3990 
3991 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3992 #ifndef nolint
3993 	cache = 0;
3994 #endif
3995 	if (nfsd->nd_repstat)
3996 		error = nfsd->nd_repstat;
3997 	else
3998 		error = EPROCUNAVAIL;
3999 	nfsm_reply(0);
4000 	error = 0;
4001 	nfsm_srvdone;
4002 }
4003 
4004 /*
4005  * Perform access checking for vnodes obtained from file handles that would
4006  * refer to files already opened by a Unix client. You cannot just use
4007  * vn_writechk() and VOP_ACCESS() for two reasons.
4008  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
4009  * 2 - The owner is to be given access irrespective of mode bits for some
4010  *     operations, so that processes that chmod after opening a file don't
4011  *     break. I don't like this because it opens a security hole, but since
4012  *     the nfs server opens a security hole the size of a barn door anyhow,
4013  *     what the heck.
4014  *
4015  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
4016  * will return EPERM instead of EACCESS. EPERM is always an error.
4017  */
4018 static int
4019 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
4020 	int rdonly, struct thread *td, int override)
4021 {
4022 	struct vattr vattr;
4023 	int error;
4024 
4025 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4026 	if (flags & VWRITE) {
4027 		/* Just vn_writechk() changed to check rdonly */
4028 		/*
4029 		 * Disallow write attempts on read-only file systems;
4030 		 * unless the file is a socket or a block or character
4031 		 * device resident on the file system.
4032 		 */
4033 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
4034 			switch (vp->v_type) {
4035 			case VREG:
4036 			case VDIR:
4037 			case VLNK:
4038 				return (EROFS);
4039 			default:
4040 				break;
4041 			}
4042 		}
4043 		/*
4044 		 * If there's shared text associated with
4045 		 * the inode, we can't allow writing.
4046 		 */
4047 		if (vp->v_flag & VTEXT)
4048 			return (ETXTBSY);
4049 	}
4050 	error = VOP_GETATTR(vp, &vattr, td);
4051 	if (error)
4052 		return (error);
4053 	error = VOP_ACCESS(vp, flags, cred, td);
4054 	/*
4055 	 * Allow certain operations for the owner (reads and writes
4056 	 * on files that are already open).
4057 	 */
4058 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4059 		error = 0;
4060 	return error;
4061 }
4062 #endif /* NFS_NOSERVER */
4063 
4064