xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision fcce2b94)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.36 2006/05/06 18:48:53 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_zone.h>
87 #include <vm/vm_object.h>
88 
89 #include <sys/buf2.h>
90 
91 #include <sys/thread2.h>
92 
93 #include "nfsproto.h"
94 #include "rpcv2.h"
95 #include "nfs.h"
96 #include "xdr_subs.h"
97 #include "nfsm_subs.h"
98 
99 #ifdef NFSRV_DEBUG
100 #define nfsdbprintf(info)	printf info
101 #else
102 #define nfsdbprintf(info)
103 #endif
104 
105 #define MAX_COMMIT_COUNT	(1024 * 1024)
106 
107 #define NUM_HEURISTIC		1017
108 #define NHUSE_INIT		64
109 #define NHUSE_INC		16
110 #define NHUSE_MAX		2048
111 
112 static struct nfsheur {
113     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
114     off_t nh_nextr;		/* next offset for sequential detection */
115     int nh_use;			/* use count for selection */
116     int nh_seqcount;		/* heuristic */
117 } nfsheur[NUM_HEURISTIC];
118 
119 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
120 		      NFFIFO, NFNON };
121 #ifndef NFS_NOSERVER
122 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
123 		      NFCHR, NFNON };
124 /* Global vars */
125 extern u_int32_t nfs_xdrneg1;
126 extern u_int32_t nfs_false, nfs_true;
127 extern enum vtype nv3tov_type[8];
128 extern struct nfsstats nfsstats;
129 
130 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
131 int nfsrvw_procrastinate_v3 = 0;
132 
133 static struct timespec	nfsver;
134 
135 SYSCTL_DECL(_vfs_nfs);
136 
137 static int nfs_async;
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
139 static int nfs_commit_blks;
140 static int nfs_commit_miss;
141 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
142 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
143 
144 static int nfsrv_access (struct vnode *,int,struct ucred *,int,
145 		struct thread *, int);
146 static void nfsrvw_coalesce (struct nfsrv_descript *,
147 		struct nfsrv_descript *);
148 
149 /*
150  * nfs v3 access service
151  */
152 int
153 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
154 	      struct thread *td, struct mbuf **mrq)
155 {
156 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
157 	struct sockaddr *nam = nfsd->nd_nam;
158 	caddr_t dpos = nfsd->nd_dpos;
159 	struct ucred *cred = &nfsd->nd_cr;
160 	struct vnode *vp = NULL;
161 	nfsfh_t nfh;
162 	fhandle_t *fhp;
163 	u_int32_t *tl;
164 	int32_t t1;
165 	caddr_t bpos;
166 	int error = 0, rdonly, getret;
167 	char *cp2;
168 	struct mbuf *mb, *mreq, *mb2;
169 	struct vattr vattr, *vap = &vattr;
170 	u_long testmode, nfsmode;
171 
172 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
173 	fhp = &nfh.fh_generic;
174 	nfsm_srvmtofh(fhp);
175 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
176 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
177 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
178 	if (error) {
179 		nfsm_reply(NFSX_UNSIGNED);
180 		nfsm_srvpostop_attr(1, (struct vattr *)0);
181 		error = 0;
182 		goto nfsmout;
183 	}
184 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
185 	if ((nfsmode & NFSV3ACCESS_READ) &&
186 		nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
187 		nfsmode &= ~NFSV3ACCESS_READ;
188 	if (vp->v_type == VDIR)
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
190 			NFSV3ACCESS_DELETE);
191 	else
192 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
193 	if ((nfsmode & testmode) &&
194 		nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
195 		nfsmode &= ~testmode;
196 	if (vp->v_type == VDIR)
197 		testmode = NFSV3ACCESS_LOOKUP;
198 	else
199 		testmode = NFSV3ACCESS_EXECUTE;
200 	if ((nfsmode & testmode) &&
201 		nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
202 		nfsmode &= ~testmode;
203 	getret = VOP_GETATTR(vp, vap);
204 	vput(vp);
205 	vp = NULL;
206 	nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
207 	nfsm_srvpostop_attr(getret, vap);
208 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
209 	*tl = txdr_unsigned(nfsmode);
210 nfsmout:
211 	if (vp)
212 		vput(vp);
213 	return(error);
214 }
215 
216 /*
217  * nfs getattr service
218  */
219 int
220 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
221 	      struct thread *td, struct mbuf **mrq)
222 {
223 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
224 	struct sockaddr *nam = nfsd->nd_nam;
225 	caddr_t dpos = nfsd->nd_dpos;
226 	struct ucred *cred = &nfsd->nd_cr;
227 	struct nfs_fattr *fp;
228 	struct vattr va;
229 	struct vattr *vap = &va;
230 	struct vnode *vp = NULL;
231 	nfsfh_t nfh;
232 	fhandle_t *fhp;
233 	u_int32_t *tl;
234 	int32_t t1;
235 	caddr_t bpos;
236 	int error = 0, rdonly;
237 	char *cp2;
238 	struct mbuf *mb, *mb2, *mreq;
239 
240 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
241 	fhp = &nfh.fh_generic;
242 	nfsm_srvmtofh(fhp);
243 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
244 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
245 	if (error) {
246 		nfsm_reply(0);
247 		error = 0;
248 		goto nfsmout;
249 	}
250 	error = VOP_GETATTR(vp, vap);
251 	vput(vp);
252 	vp = NULL;
253 	nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
254 	if (error) {
255 		error = 0;
256 		goto nfsmout;
257 	}
258 	nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259 	nfsm_srvfillattr(vap, fp);
260 	/* fall through */
261 
262 nfsmout:
263 	if (vp)
264 		vput(vp);
265 	return(error);
266 }
267 
268 /*
269  * nfs setattr service
270  */
271 int
272 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
273 	      struct thread *td, struct mbuf **mrq)
274 {
275 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
276 	struct sockaddr *nam = nfsd->nd_nam;
277 	caddr_t dpos = nfsd->nd_dpos;
278 	struct ucred *cred = &nfsd->nd_cr;
279 	struct vattr va, preat;
280 	struct vattr *vap = &va;
281 	struct nfsv2_sattr *sp;
282 	struct nfs_fattr *fp;
283 	struct vnode *vp = NULL;
284 	nfsfh_t nfh;
285 	fhandle_t *fhp;
286 	u_int32_t *tl;
287 	int32_t t1;
288 	caddr_t bpos;
289 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
290 	int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
291 	char *cp2;
292 	struct mbuf *mb, *mb2, *mreq;
293 	struct timespec guard;
294 
295 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
296 	fhp = &nfh.fh_generic;
297 	nfsm_srvmtofh(fhp);
298 	VATTR_NULL(vap);
299 	if (v3) {
300 		nfsm_srvsattr(vap);
301 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
302 		gcheck = fxdr_unsigned(int, *tl);
303 		if (gcheck) {
304 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
305 			fxdr_nfsv3time(tl, &guard);
306 		}
307 	} else {
308 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
309 		/*
310 		 * Nah nah nah nah na nah
311 		 * There is a bug in the Sun client that puts 0xffff in the mode
312 		 * field of sattr when it should put in 0xffffffff. The u_short
313 		 * doesn't sign extend.
314 		 * --> check the low order 2 bytes for 0xffff
315 		 */
316 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
317 			vap->va_mode = nfstov_mode(sp->sa_mode);
318 		if (sp->sa_uid != nfs_xdrneg1)
319 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
320 		if (sp->sa_gid != nfs_xdrneg1)
321 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
322 		if (sp->sa_size != nfs_xdrneg1)
323 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
324 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
325 #ifdef notyet
326 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
327 #else
328 			vap->va_atime.tv_sec =
329 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
330 			vap->va_atime.tv_nsec = 0;
331 #endif
332 		}
333 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
334 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
335 
336 	}
337 
338 	/*
339 	 * Now that we have all the fields, lets do it.
340 	 */
341 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
342 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
343 	if (error) {
344 		nfsm_reply(2 * NFSX_UNSIGNED);
345 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
346 		error = 0;
347 		goto nfsmout;
348 	}
349 
350 	/*
351 	 * vp now an active resource, pay careful attention to cleanup
352 	 */
353 
354 	if (v3) {
355 		error = preat_ret = VOP_GETATTR(vp, &preat);
356 		if (!error && gcheck &&
357 			(preat.va_ctime.tv_sec != guard.tv_sec ||
358 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
359 			error = NFSERR_NOT_SYNC;
360 		if (error) {
361 			vput(vp);
362 			vp = NULL;
363 			nfsm_reply(NFSX_WCCDATA(v3));
364 			nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
365 			error = 0;
366 			goto nfsmout;
367 		}
368 	}
369 
370 	/*
371 	 * If the size is being changed write acces is required, otherwise
372 	 * just check for a read only file system.
373 	 */
374 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
375 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
376 			error = EROFS;
377 			goto out;
378 		}
379 	} else {
380 		if (vp->v_type == VDIR) {
381 			error = EISDIR;
382 			goto out;
383 		} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
384 			    td, 0)) != 0){
385 			goto out;
386 		}
387 	}
388 	error = VOP_SETATTR(vp, vap, cred);
389 	postat_ret = VOP_GETATTR(vp, vap);
390 	if (!error)
391 		error = postat_ret;
392 out:
393 	vput(vp);
394 	vp = NULL;
395 	nfsm_reply(NFSX_WCCORFATTR(v3));
396 	if (v3) {
397 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
398 		error = 0;
399 		goto nfsmout;
400 	} else {
401 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
402 		nfsm_srvfillattr(vap, fp);
403 	}
404 	/* fall through */
405 
406 nfsmout:
407 	if (vp)
408 		vput(vp);
409 	return(error);
410 }
411 
412 /*
413  * nfs lookup rpc
414  */
415 int
416 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
417 	     struct thread *td, struct mbuf **mrq)
418 {
419 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
420 	struct sockaddr *nam = nfsd->nd_nam;
421 	caddr_t dpos = nfsd->nd_dpos;
422 	struct ucred *cred = &nfsd->nd_cr;
423 	struct nfs_fattr *fp;
424 	struct nlookupdata nd;
425 	struct vnode *vp;
426 	struct vnode *dirp;
427 	struct namecache *ncp;
428 	nfsfh_t nfh;
429 	fhandle_t *fhp;
430 	caddr_t cp;
431 	u_int32_t *tl;
432 	int32_t t1;
433 	caddr_t bpos;
434 	int error = 0, len, dirattr_ret = 1;
435 	int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
436 	char *cp2;
437 	struct mbuf *mb, *mb2, *mreq;
438 	struct vattr va, dirattr, *vap = &va;
439 
440 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
441 	nlookup_zero(&nd);
442 	dirp = NULL;
443 	vp = NULL;
444 
445 	fhp = &nfh.fh_generic;
446 	nfsm_srvmtofh(fhp);
447 	nfsm_srvnamesiz(len);
448 
449 	pubflag = nfs_ispublicfh(fhp);
450 
451 	error = nfs_namei(&nd, cred, NAMEI_LOOKUP, NULL, &vp,
452 		fhp, len, slp, nam, &md, &dpos,
453 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
454 
455 	/*
456 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
457 	 * structure in case macros jump to nfsmout.
458 	 */
459 
460 	if (error) {
461 		if (dirp) {
462 			if (v3)
463 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
464 			vrele(dirp);
465 			dirp = NULL;
466 		}
467 		nfsm_reply(NFSX_POSTOPATTR(v3));
468 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
469 		error = 0;
470 		goto nfsmout;
471 	}
472 
473 	/*
474 	 * Locate index file for public filehandle
475 	 *
476 	 * error is 0 on entry and 0 on exit from this block.
477 	 */
478 
479 	if (pubflag) {
480 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
481 			/*
482 			 * Setup call to lookup() to see if we can find
483 			 * the index file. Arguably, this doesn't belong
484 			 * in a kernel.. Ugh.  If an error occurs, do not
485 			 * try to install an index file and then clear the
486 			 * error.
487 			 *
488 			 * When we replace nd with ind and redirect ndp,
489 			 * maintenance of ni_startdir and ni_vp shift to
490 			 * ind and we have to clean them up in the old nd.
491 			 * However, the cnd resource continues to be maintained
492 			 * via the original nd.  Confused?  You aren't alone!
493 			 */
494 			VOP_UNLOCK(vp, 0);
495 			ncp = cache_hold(nd.nl_ncp);
496 			nlookup_done(&nd);
497 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
498 						UIO_SYSSPACE, 0, cred, ncp);
499 			cache_drop(ncp);
500 			if (error == 0)
501 				error = nlookup(&nd);
502 
503 			if (error == 0) {
504 				/*
505 				 * Found an index file. Get rid of
506 				 * the old references.  transfer vp and
507 				 * load up the new vp.  Fortunately we do
508 				 * not have to deal with dvp, that would be
509 				 * a huge mess.
510 				 */
511 				if (dirp)
512 					vrele(dirp);
513 				dirp = vp;
514 				vp = NULL;
515 				error = cache_vget(nd.nl_ncp, nd.nl_cred,
516 							LK_EXCLUSIVE, &vp);
517 				KKASSERT(error == 0);
518 			}
519 			error = 0;
520 		}
521 		/*
522 		 * If the public filehandle was used, check that this lookup
523 		 * didn't result in a filehandle outside the publicly exported
524 		 * filesystem.  We clear the poor vp here to avoid lockups due
525 		 * to NFS I/O.
526 		 */
527 
528 		if (vp->v_mount != nfs_pub.np_mount) {
529 			vput(vp);
530 			vp = NULL;
531 			error = EPERM;
532 		}
533 	}
534 
535 	if (dirp) {
536 		if (v3)
537 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
538 		vrele(dirp);
539 		dirp = NULL;
540 	}
541 
542 	/*
543 	 * Resources at this point:
544 	 *	ndp->ni_vp	may not be NULL
545 	 *
546 	 */
547 
548 	if (error) {
549 		nfsm_reply(NFSX_POSTOPATTR(v3));
550 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
551 		error = 0;
552 		goto nfsmout;
553 	}
554 
555 	/*
556 	 * Clear out some resources prior to potentially blocking.  This
557 	 * is not as critical as ni_dvp resources in other routines, but
558 	 * it helps.
559 	 */
560 	nlookup_done(&nd);
561 
562 	/*
563 	 * Get underlying attribute, then release remaining resources ( for
564 	 * the same potential blocking reason ) and reply.
565 	 */
566 	bzero((caddr_t)fhp, sizeof(nfh));
567 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
568 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
569 	if (!error)
570 		error = VOP_GETATTR(vp, vap);
571 
572 	vput(vp);
573 	vp = NULL;
574 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
575 	if (error) {
576 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
577 		error = 0;
578 		goto nfsmout;
579 	}
580 	nfsm_srvfhtom(fhp, v3);
581 	if (v3) {
582 		nfsm_srvpostop_attr(0, vap);
583 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
584 	} else {
585 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
586 		nfsm_srvfillattr(vap, fp);
587 	}
588 
589 nfsmout:
590 	if (dirp)
591 		vrele(dirp);
592 	nlookup_done(&nd);		/* may be called twice */
593 	if (vp)
594 		vput(vp);
595 	return (error);
596 }
597 
598 /*
599  * nfs readlink service
600  */
601 int
602 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
603 	       struct thread *td, struct mbuf **mrq)
604 {
605 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
606 	struct sockaddr *nam = nfsd->nd_nam;
607 	caddr_t dpos = nfsd->nd_dpos;
608 	struct ucred *cred = &nfsd->nd_cr;
609 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
610 	struct iovec *ivp = iv;
611 	struct mbuf *mp;
612 	u_int32_t *tl;
613 	int32_t t1;
614 	caddr_t bpos;
615 	int error = 0, rdonly, i, tlen, len, getret;
616 	int v3 = (nfsd->nd_flag & ND_NFSV3);
617 	char *cp2;
618 	struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
619 	struct vnode *vp = NULL;
620 	struct vattr attr;
621 	nfsfh_t nfh;
622 	fhandle_t *fhp;
623 	struct uio io, *uiop = &io;
624 
625 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
626 #ifndef nolint
627 	mp2 = (struct mbuf *)0;
628 #endif
629 	mp3 = NULL;
630 	fhp = &nfh.fh_generic;
631 	nfsm_srvmtofh(fhp);
632 	len = 0;
633 	i = 0;
634 	while (len < NFS_MAXPATHLEN) {
635 		mp = m_getcl(MB_WAIT, MT_DATA, 0);
636 		mp->m_len = MCLBYTES;
637 		if (len == 0)
638 			mp3 = mp2 = mp;
639 		else {
640 			mp2->m_next = mp;
641 			mp2 = mp;
642 		}
643 		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
644 			mp->m_len = NFS_MAXPATHLEN-len;
645 			len = NFS_MAXPATHLEN;
646 		} else
647 			len += mp->m_len;
648 		ivp->iov_base = mtod(mp, caddr_t);
649 		ivp->iov_len = mp->m_len;
650 		i++;
651 		ivp++;
652 	}
653 	uiop->uio_iov = iv;
654 	uiop->uio_iovcnt = i;
655 	uiop->uio_offset = 0;
656 	uiop->uio_resid = len;
657 	uiop->uio_rw = UIO_READ;
658 	uiop->uio_segflg = UIO_SYSSPACE;
659 	uiop->uio_td = NULL;
660 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
661 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
662 	if (error) {
663 		nfsm_reply(2 * NFSX_UNSIGNED);
664 		nfsm_srvpostop_attr(1, (struct vattr *)0);
665 		error = 0;
666 		goto nfsmout;
667 	}
668 	if (vp->v_type != VLNK) {
669 		if (v3)
670 			error = EINVAL;
671 		else
672 			error = ENXIO;
673 		goto out;
674 	}
675 	error = VOP_READLINK(vp, uiop, cred);
676 out:
677 	getret = VOP_GETATTR(vp, &attr);
678 	vput(vp);
679 	vp = NULL;
680 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
681 	if (v3) {
682 		nfsm_srvpostop_attr(getret, &attr);
683 		if (error) {
684 			error = 0;
685 			goto nfsmout;
686 		}
687 	}
688 	if (uiop->uio_resid > 0) {
689 		len -= uiop->uio_resid;
690 		tlen = nfsm_rndup(len);
691 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
692 	}
693 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
694 	*tl = txdr_unsigned(len);
695 	mb->m_next = mp3;
696 	mp3 = NULL;
697 nfsmout:
698 	if (mp3)
699 		m_freem(mp3);
700 	if (vp)
701 		vput(vp);
702 	return(error);
703 }
704 
705 /*
706  * nfs read service
707  */
708 int
709 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
710 	   struct thread *td, struct mbuf **mrq)
711 {
712 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
713 	struct sockaddr *nam = nfsd->nd_nam;
714 	caddr_t dpos = nfsd->nd_dpos;
715 	struct ucred *cred = &nfsd->nd_cr;
716 	struct iovec *iv;
717 	struct iovec *iv2;
718 	struct mbuf *m;
719 	struct nfs_fattr *fp;
720 	u_int32_t *tl;
721 	int32_t t1;
722 	int i;
723 	caddr_t bpos;
724 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
725 	int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
726 	char *cp2;
727 	struct mbuf *mb, *mb2, *mreq;
728 	struct mbuf *m2;
729 	struct vnode *vp = NULL;
730 	nfsfh_t nfh;
731 	fhandle_t *fhp;
732 	struct uio io, *uiop = &io;
733 	struct vattr va, *vap = &va;
734 	struct nfsheur *nh;
735 	off_t off;
736 	int ioflag = 0;
737 
738 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
739 	fhp = &nfh.fh_generic;
740 	nfsm_srvmtofh(fhp);
741 	if (v3) {
742 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
743 		off = fxdr_hyper(tl);
744 	} else {
745 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
746 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
747 	}
748 	nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
749 
750 	/*
751 	 * Reference vp.  If an error occurs, vp will be invalid, but we
752 	 * have to NULL it just in case.  The macros might goto nfsmout
753 	 * as well.
754 	 */
755 
756 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
757 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
758 	if (error) {
759 		vp = NULL;
760 		nfsm_reply(2 * NFSX_UNSIGNED);
761 		nfsm_srvpostop_attr(1, (struct vattr *)0);
762 		error = 0;
763 		goto nfsmout;
764 	}
765 
766 	if (vp->v_type != VREG) {
767 		if (v3)
768 			error = EINVAL;
769 		else
770 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
771 	}
772 	if (!error) {
773 	    if ((error = nfsrv_access(vp, VREAD, cred, rdonly, td, 1)) != 0)
774 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 1);
775 	}
776 	getret = VOP_GETATTR(vp, vap);
777 	if (!error)
778 		error = getret;
779 	if (error) {
780 		vput(vp);
781 		vp = NULL;
782 		nfsm_reply(NFSX_POSTOPATTR(v3));
783 		nfsm_srvpostop_attr(getret, vap);
784 		error = 0;
785 		goto nfsmout;
786 	}
787 
788 	/*
789 	 * Calculate byte count to read
790 	 */
791 
792 	if (off >= vap->va_size)
793 		cnt = 0;
794 	else if ((off + reqlen) > vap->va_size)
795 		cnt = vap->va_size - off;
796 	else
797 		cnt = reqlen;
798 
799 	/*
800 	 * Calculate seqcount for heuristic
801 	 */
802 
803 	{
804 		int hi;
805 		int try = 32;
806 
807 		/*
808 		 * Locate best candidate
809 		 */
810 
811 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
812 		nh = &nfsheur[hi];
813 
814 		while (try--) {
815 			if (nfsheur[hi].nh_vp == vp) {
816 				nh = &nfsheur[hi];
817 				break;
818 			}
819 			if (nfsheur[hi].nh_use > 0)
820 				--nfsheur[hi].nh_use;
821 			hi = (hi + 1) % NUM_HEURISTIC;
822 			if (nfsheur[hi].nh_use < nh->nh_use)
823 				nh = &nfsheur[hi];
824 		}
825 
826 		if (nh->nh_vp != vp) {
827 			nh->nh_vp = vp;
828 			nh->nh_nextr = off;
829 			nh->nh_use = NHUSE_INIT;
830 			if (off == 0)
831 				nh->nh_seqcount = 4;
832 			else
833 				nh->nh_seqcount = 1;
834 		}
835 
836 		/*
837 		 * Calculate heuristic
838 		 */
839 
840 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
841 			if (++nh->nh_seqcount > IO_SEQMAX)
842 				nh->nh_seqcount = IO_SEQMAX;
843 		} else if (nh->nh_seqcount > 1) {
844 			nh->nh_seqcount = 1;
845 		} else {
846 			nh->nh_seqcount = 0;
847 		}
848 		nh->nh_use += NHUSE_INC;
849 		if (nh->nh_use > NHUSE_MAX)
850 			nh->nh_use = NHUSE_MAX;
851 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
852         }
853 
854 	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
855 	if (v3) {
856 		nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
857 		*tl++ = nfs_true;
858 		fp = (struct nfs_fattr *)tl;
859 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
860 	} else {
861 		nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
862 		fp = (struct nfs_fattr *)tl;
863 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
864 	}
865 	len = left = nfsm_rndup(cnt);
866 	if (cnt > 0) {
867 		/*
868 		 * Generate the mbuf list with the uio_iov ref. to it.
869 		 */
870 		i = 0;
871 		m = m2 = mb;
872 		while (left > 0) {
873 			siz = min(M_TRAILINGSPACE(m), left);
874 			if (siz > 0) {
875 				left -= siz;
876 				i++;
877 			}
878 			if (left > 0) {
879 				m = m_getcl(MB_WAIT, MT_DATA, 0);
880 				m->m_len = 0;
881 				m2->m_next = m;
882 				m2 = m;
883 			}
884 		}
885 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
886 		       M_TEMP, M_WAITOK);
887 		uiop->uio_iov = iv2 = iv;
888 		m = mb;
889 		left = len;
890 		i = 0;
891 		while (left > 0) {
892 			if (m == NULL)
893 				panic("nfsrv_read iov");
894 			siz = min(M_TRAILINGSPACE(m), left);
895 			if (siz > 0) {
896 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
897 				iv->iov_len = siz;
898 				m->m_len += siz;
899 				left -= siz;
900 				iv++;
901 				i++;
902 			}
903 			m = m->m_next;
904 		}
905 		uiop->uio_iovcnt = i;
906 		uiop->uio_offset = off;
907 		uiop->uio_resid = len;
908 		uiop->uio_rw = UIO_READ;
909 		uiop->uio_segflg = UIO_SYSSPACE;
910 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
911 		off = uiop->uio_offset;
912 		nh->nh_nextr = off;
913 		FREE((caddr_t)iv2, M_TEMP);
914 		if (error || (getret = VOP_GETATTR(vp, vap))) {
915 			if (!error)
916 				error = getret;
917 			m_freem(mreq);
918 			vput(vp);
919 			vp = NULL;
920 			nfsm_reply(NFSX_POSTOPATTR(v3));
921 			nfsm_srvpostop_attr(getret, vap);
922 			error = 0;
923 			goto nfsmout;
924 		}
925 	} else {
926 		uiop->uio_resid = 0;
927 	}
928 	vput(vp);
929 	vp = NULL;
930 	nfsm_srvfillattr(vap, fp);
931 	tlen = len - uiop->uio_resid;
932 	cnt = cnt < tlen ? cnt : tlen;
933 	tlen = nfsm_rndup(cnt);
934 	if (len != tlen || tlen != cnt)
935 		nfsm_adj(mb, len - tlen, tlen - cnt);
936 	if (v3) {
937 		*tl++ = txdr_unsigned(cnt);
938 		if (len < reqlen)
939 			*tl++ = nfs_true;
940 		else
941 			*tl++ = nfs_false;
942 	}
943 	*tl = txdr_unsigned(cnt);
944 nfsmout:
945 	if (vp)
946 		vput(vp);
947 	return(error);
948 }
949 
950 /*
951  * nfs write service
952  */
953 int
954 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
955 	    struct thread *td, struct mbuf **mrq)
956 {
957 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
958 	struct sockaddr *nam = nfsd->nd_nam;
959 	caddr_t dpos = nfsd->nd_dpos;
960 	struct ucred *cred = &nfsd->nd_cr;
961 	struct iovec *ivp;
962 	int i, cnt;
963 	struct mbuf *mp;
964 	struct nfs_fattr *fp;
965 	struct iovec *iv;
966 	struct vattr va, forat;
967 	struct vattr *vap = &va;
968 	u_int32_t *tl;
969 	int32_t t1;
970 	caddr_t bpos;
971 	int error = 0, rdonly, len, forat_ret = 1;
972 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
973 	int stable = NFSV3WRITE_FILESYNC;
974 	int v3 = (nfsd->nd_flag & ND_NFSV3);
975 	char *cp2;
976 	struct mbuf *mb, *mb2, *mreq;
977 	struct vnode *vp = NULL;
978 	nfsfh_t nfh;
979 	fhandle_t *fhp;
980 	struct uio io, *uiop = &io;
981 	off_t off;
982 
983 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
984 	if (mrep == NULL) {
985 		*mrq = NULL;
986 		error = 0;
987 		goto nfsmout;
988 	}
989 	fhp = &nfh.fh_generic;
990 	nfsm_srvmtofh(fhp);
991 	if (v3) {
992 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
993 		off = fxdr_hyper(tl);
994 		tl += 3;
995 		stable = fxdr_unsigned(int, *tl++);
996 	} else {
997 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
998 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
999 		tl += 2;
1000 		if (nfs_async)
1001 	    		stable = NFSV3WRITE_UNSTABLE;
1002 	}
1003 	retlen = len = fxdr_unsigned(int32_t, *tl);
1004 	cnt = i = 0;
1005 
1006 	/*
1007 	 * For NFS Version 2, it is not obvious what a write of zero length
1008 	 * should do, but I might as well be consistent with Version 3,
1009 	 * which is to return ok so long as there are no permission problems.
1010 	 */
1011 	if (len > 0) {
1012 	    zeroing = 1;
1013 	    mp = mrep;
1014 	    while (mp) {
1015 		if (mp == md) {
1016 			zeroing = 0;
1017 			adjust = dpos - mtod(mp, caddr_t);
1018 			mp->m_len -= adjust;
1019 			if (mp->m_len > 0 && adjust > 0)
1020 				NFSMADV(mp, adjust);
1021 		}
1022 		if (zeroing)
1023 			mp->m_len = 0;
1024 		else if (mp->m_len > 0) {
1025 			i += mp->m_len;
1026 			if (i > len) {
1027 				mp->m_len -= (i - len);
1028 				zeroing	= 1;
1029 			}
1030 			if (mp->m_len > 0)
1031 				cnt++;
1032 		}
1033 		mp = mp->m_next;
1034 	    }
1035 	}
1036 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1037 		error = EIO;
1038 		nfsm_reply(2 * NFSX_UNSIGNED);
1039 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1040 		error = 0;
1041 		goto nfsmout;
1042 	}
1043 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
1044 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1045 	if (error) {
1046 		vp = NULL;
1047 		nfsm_reply(2 * NFSX_UNSIGNED);
1048 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1049 		error = 0;
1050 		goto nfsmout;
1051 	}
1052 	if (v3)
1053 		forat_ret = VOP_GETATTR(vp, &forat);
1054 	if (vp->v_type != VREG) {
1055 		if (v3)
1056 			error = EINVAL;
1057 		else
1058 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1059 	}
1060 	if (!error) {
1061 		error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1062 	}
1063 	if (error) {
1064 		vput(vp);
1065 		vp = NULL;
1066 		nfsm_reply(NFSX_WCCDATA(v3));
1067 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1068 		error = 0;
1069 		goto nfsmout;
1070 	}
1071 
1072 	if (len > 0) {
1073 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1074 		M_WAITOK);
1075 	    uiop->uio_iov = iv = ivp;
1076 	    uiop->uio_iovcnt = cnt;
1077 	    mp = mrep;
1078 	    while (mp) {
1079 		if (mp->m_len > 0) {
1080 			ivp->iov_base = mtod(mp, caddr_t);
1081 			ivp->iov_len = mp->m_len;
1082 			ivp++;
1083 		}
1084 		mp = mp->m_next;
1085 	    }
1086 
1087 	    /*
1088 	     * XXX
1089 	     * The IO_METASYNC flag indicates that all metadata (and not just
1090 	     * enough to ensure data integrity) mus be written to stable storage
1091 	     * synchronously.
1092 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1093 	     */
1094 	    if (stable == NFSV3WRITE_UNSTABLE)
1095 		ioflags = IO_NODELOCKED;
1096 	    else if (stable == NFSV3WRITE_DATASYNC)
1097 		ioflags = (IO_SYNC | IO_NODELOCKED);
1098 	    else
1099 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1100 	    uiop->uio_resid = len;
1101 	    uiop->uio_rw = UIO_WRITE;
1102 	    uiop->uio_segflg = UIO_SYSSPACE;
1103 	    uiop->uio_td = NULL;
1104 	    uiop->uio_offset = off;
1105 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1106 	    nfsstats.srvvop_writes++;
1107 	    FREE((caddr_t)iv, M_TEMP);
1108 	}
1109 	aftat_ret = VOP_GETATTR(vp, vap);
1110 	vput(vp);
1111 	vp = NULL;
1112 	if (!error)
1113 		error = aftat_ret;
1114 	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1115 		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1116 	if (v3) {
1117 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1118 		if (error) {
1119 			error = 0;
1120 			goto nfsmout;
1121 		}
1122 		nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1123 		*tl++ = txdr_unsigned(retlen);
1124 		/*
1125 		 * If nfs_async is set, then pretend the write was FILESYNC.
1126 		 */
1127 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1128 			*tl++ = txdr_unsigned(stable);
1129 		else
1130 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1131 		/*
1132 		 * Actually, there is no need to txdr these fields,
1133 		 * but it may make the values more human readable,
1134 		 * for debugging purposes.
1135 		 */
1136 		if (nfsver.tv_sec == 0)
1137 			nfsver = boottime;
1138 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1139 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1140 	} else {
1141 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1142 		nfsm_srvfillattr(vap, fp);
1143 	}
1144 nfsmout:
1145 	if (vp)
1146 		vput(vp);
1147 	return(error);
1148 }
1149 
1150 /*
1151  * NFS write service with write gathering support. Called when
1152  * nfsrvw_procrastinate > 0.
1153  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1154  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1155  * Jan. 1994.
1156  */
1157 int
1158 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1159 		  struct thread *td, struct mbuf **mrq)
1160 {
1161 	struct iovec *ivp;
1162 	struct mbuf *mp;
1163 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1164 	struct nfs_fattr *fp;
1165 	int i;
1166 	struct iovec *iov;
1167 	struct nfsrvw_delayhash *wpp;
1168 	struct ucred *cred;
1169 	struct vattr va, forat;
1170 	u_int32_t *tl;
1171 	int32_t t1;
1172 	caddr_t bpos, dpos;
1173 	int error = 0, rdonly, len, forat_ret = 1;
1174 	int ioflags, aftat_ret = 1, adjust, v3, zeroing;
1175 	char *cp2;
1176 	struct mbuf *mb, *mb2, *mreq, *mrep, *md;
1177 	struct vnode *vp = NULL;
1178 	struct uio io, *uiop = &io;
1179 	u_quad_t cur_usec;
1180 
1181 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1182 #ifndef nolint
1183 	i = 0;
1184 	len = 0;
1185 #endif
1186 	*mrq = NULL;
1187 	if (*ndp) {
1188 	    nfsd = *ndp;
1189 	    *ndp = NULL;
1190 	    mrep = nfsd->nd_mrep;
1191 	    md = nfsd->nd_md;
1192 	    dpos = nfsd->nd_dpos;
1193 	    cred = &nfsd->nd_cr;
1194 	    v3 = (nfsd->nd_flag & ND_NFSV3);
1195 	    LIST_INIT(&nfsd->nd_coalesce);
1196 	    nfsd->nd_mreq = NULL;
1197 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1198 	    cur_usec = nfs_curusec();
1199 	    nfsd->nd_time = cur_usec +
1200 		(v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1201 
1202 	    /*
1203 	     * Now, get the write header..
1204 	     */
1205 	    nfsm_srvmtofh(&nfsd->nd_fh);
1206 	    if (v3) {
1207 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1208 		nfsd->nd_off = fxdr_hyper(tl);
1209 		tl += 3;
1210 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1211 	    } else {
1212 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1213 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1214 		tl += 2;
1215 		if (nfs_async)
1216 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1217 	    }
1218 	    len = fxdr_unsigned(int32_t, *tl);
1219 	    nfsd->nd_len = len;
1220 	    nfsd->nd_eoff = nfsd->nd_off + len;
1221 
1222 	    /*
1223 	     * Trim the header out of the mbuf list and trim off any trailing
1224 	     * junk so that the mbuf list has only the write data.
1225 	     */
1226 	    zeroing = 1;
1227 	    i = 0;
1228 	    mp = mrep;
1229 	    while (mp) {
1230 		if (mp == md) {
1231 		    zeroing = 0;
1232 		    adjust = dpos - mtod(mp, caddr_t);
1233 		    mp->m_len -= adjust;
1234 		    if (mp->m_len > 0 && adjust > 0)
1235 			NFSMADV(mp, adjust);
1236 		}
1237 		if (zeroing)
1238 		    mp->m_len = 0;
1239 		else {
1240 		    i += mp->m_len;
1241 		    if (i > len) {
1242 			mp->m_len -= (i - len);
1243 			zeroing = 1;
1244 		    }
1245 		}
1246 		mp = mp->m_next;
1247 	    }
1248 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1249 nfsmout:
1250 		m_freem(mrep);
1251 		error = EIO;
1252 		nfsm_writereply(2 * NFSX_UNSIGNED, v3);
1253 		if (v3)
1254 		    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1255 		nfsd->nd_mreq = mreq;
1256 		nfsd->nd_mrep = NULL;
1257 		nfsd->nd_time = 0;
1258 	    }
1259 
1260 	    /*
1261 	     * Add this entry to the hash and time queues.
1262 	     */
1263 	    crit_enter();
1264 	    owp = NULL;
1265 	    wp = slp->ns_tq.lh_first;
1266 	    while (wp && wp->nd_time < nfsd->nd_time) {
1267 		owp = wp;
1268 		wp = wp->nd_tq.le_next;
1269 	    }
1270 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1271 	    if (owp) {
1272 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1273 	    } else {
1274 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1275 	    }
1276 	    if (nfsd->nd_mrep) {
1277 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1278 		owp = NULL;
1279 		wp = wpp->lh_first;
1280 		while (wp &&
1281 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1282 		    owp = wp;
1283 		    wp = wp->nd_hash.le_next;
1284 		}
1285 		while (wp && wp->nd_off < nfsd->nd_off &&
1286 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1287 		    owp = wp;
1288 		    wp = wp->nd_hash.le_next;
1289 		}
1290 		if (owp) {
1291 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1292 
1293 		    /*
1294 		     * Search the hash list for overlapping entries and
1295 		     * coalesce.
1296 		     */
1297 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1298 			wp = nfsd->nd_hash.le_next;
1299 			if (NFSW_SAMECRED(owp, nfsd))
1300 			    nfsrvw_coalesce(owp, nfsd);
1301 		    }
1302 		} else {
1303 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1304 		}
1305 	    }
1306 	    crit_exit();
1307 	}
1308 
1309 	/*
1310 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1311 	 * and generate the associated reply mbuf list(s).
1312 	 */
1313 loop1:
1314 	cur_usec = nfs_curusec();
1315 	crit_enter();
1316 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1317 		owp = nfsd->nd_tq.le_next;
1318 		if (nfsd->nd_time > cur_usec)
1319 		    break;
1320 		if (nfsd->nd_mreq)
1321 		    continue;
1322 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1323 		LIST_REMOVE(nfsd, nd_tq);
1324 		LIST_REMOVE(nfsd, nd_hash);
1325 		crit_exit();
1326 		mrep = nfsd->nd_mrep;
1327 		nfsd->nd_mrep = NULL;
1328 		cred = &nfsd->nd_cr;
1329 		v3 = (nfsd->nd_flag & ND_NFSV3);
1330 		forat_ret = aftat_ret = 1;
1331 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
1332 		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1333 		if (!error) {
1334 		    if (v3)
1335 			forat_ret = VOP_GETATTR(vp, &forat);
1336 		    if (vp->v_type != VREG) {
1337 			if (v3)
1338 			    error = EINVAL;
1339 			else
1340 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1341 		    }
1342 		} else {
1343 		    vp = NULL;
1344 		}
1345 		if (!error) {
1346 		    error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1347 		}
1348 
1349 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1350 		    ioflags = IO_NODELOCKED;
1351 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1352 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1353 		else
1354 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1355 		uiop->uio_rw = UIO_WRITE;
1356 		uiop->uio_segflg = UIO_SYSSPACE;
1357 		uiop->uio_td = NULL;
1358 		uiop->uio_offset = nfsd->nd_off;
1359 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1360 		if (uiop->uio_resid > 0) {
1361 		    mp = mrep;
1362 		    i = 0;
1363 		    while (mp) {
1364 			if (mp->m_len > 0)
1365 			    i++;
1366 			mp = mp->m_next;
1367 		    }
1368 		    uiop->uio_iovcnt = i;
1369 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1370 			M_TEMP, M_WAITOK);
1371 		    uiop->uio_iov = ivp = iov;
1372 		    mp = mrep;
1373 		    while (mp) {
1374 			if (mp->m_len > 0) {
1375 			    ivp->iov_base = mtod(mp, caddr_t);
1376 			    ivp->iov_len = mp->m_len;
1377 			    ivp++;
1378 			}
1379 			mp = mp->m_next;
1380 		    }
1381 		    if (!error) {
1382 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1383 			nfsstats.srvvop_writes++;
1384 		    }
1385 		    FREE((caddr_t)iov, M_TEMP);
1386 		}
1387 		m_freem(mrep);
1388 		if (vp) {
1389 		    aftat_ret = VOP_GETATTR(vp, &va);
1390 		    vput(vp);
1391 		    vp = NULL;
1392 		}
1393 
1394 		/*
1395 		 * Loop around generating replies for all write rpcs that have
1396 		 * now been completed.
1397 		 */
1398 		swp = nfsd;
1399 		do {
1400 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1401 		    if (error) {
1402 			nfsm_writereply(NFSX_WCCDATA(v3), v3);
1403 			if (v3) {
1404 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1405 			}
1406 		    } else {
1407 			nfsm_writereply(NFSX_PREOPATTR(v3) +
1408 			    NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1409 			    NFSX_WRITEVERF(v3), v3);
1410 			if (v3) {
1411 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1412 			    nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1413 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1414 			    *tl++ = txdr_unsigned(swp->nd_stable);
1415 			    /*
1416 			     * Actually, there is no need to txdr these fields,
1417 			     * but it may make the values more human readable,
1418 			     * for debugging purposes.
1419 			     */
1420 			    if (nfsver.tv_sec == 0)
1421 				    nfsver = boottime;
1422 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1423 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1424 			} else {
1425 			    nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1426 			    nfsm_srvfillattr(&va, fp);
1427 			}
1428 		    }
1429 		    nfsd->nd_mreq = mreq;
1430 		    if (nfsd->nd_mrep)
1431 			panic("nfsrv_write: nd_mrep not free");
1432 
1433 		    /*
1434 		     * Done. Put it at the head of the timer queue so that
1435 		     * the final phase can return the reply.
1436 		     */
1437 		    crit_enter();
1438 		    if (nfsd != swp) {
1439 			nfsd->nd_time = 0;
1440 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1441 		    }
1442 		    nfsd = swp->nd_coalesce.lh_first;
1443 		    if (nfsd) {
1444 			LIST_REMOVE(nfsd, nd_tq);
1445 		    }
1446 		    crit_exit();
1447 		} while (nfsd);
1448 		crit_enter();
1449 		swp->nd_time = 0;
1450 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1451 		crit_exit();
1452 		goto loop1;
1453 	}
1454 	crit_exit();
1455 
1456 	/*
1457 	 * Search for a reply to return.
1458 	 */
1459 	crit_enter();
1460 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1461 		if (nfsd->nd_mreq) {
1462 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1463 		    LIST_REMOVE(nfsd, nd_tq);
1464 		    *mrq = nfsd->nd_mreq;
1465 		    *ndp = nfsd;
1466 		    break;
1467 		}
1468 	crit_exit();
1469 	return (0);
1470 }
1471 
1472 /*
1473  * Coalesce the write request nfsd into owp. To do this we must:
1474  * - remove nfsd from the queues
1475  * - merge nfsd->nd_mrep into owp->nd_mrep
1476  * - update the nd_eoff and nd_stable for owp
1477  * - put nfsd on owp's nd_coalesce list
1478  * NB: Must be called at splsoftclock().
1479  */
1480 static void
1481 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1482 {
1483         int overlap;
1484         struct mbuf *mp;
1485 	struct nfsrv_descript *p;
1486 
1487 	NFS_DPF(WG, ("C%03x-%03x",
1488 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1489         LIST_REMOVE(nfsd, nd_hash);
1490         LIST_REMOVE(nfsd, nd_tq);
1491         if (owp->nd_eoff < nfsd->nd_eoff) {
1492             overlap = owp->nd_eoff - nfsd->nd_off;
1493             if (overlap < 0)
1494                 panic("nfsrv_coalesce: bad off");
1495             if (overlap > 0)
1496                 m_adj(nfsd->nd_mrep, overlap);
1497             mp = owp->nd_mrep;
1498             while (mp->m_next)
1499                 mp = mp->m_next;
1500             mp->m_next = nfsd->nd_mrep;
1501             owp->nd_eoff = nfsd->nd_eoff;
1502         } else
1503             m_freem(nfsd->nd_mrep);
1504         nfsd->nd_mrep = NULL;
1505         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1506             owp->nd_stable = NFSV3WRITE_FILESYNC;
1507         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1508             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1509             owp->nd_stable = NFSV3WRITE_DATASYNC;
1510         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1511 
1512 	/*
1513 	 * If nfsd had anything else coalesced into it, transfer them
1514 	 * to owp, otherwise their replies will never get sent.
1515 	 */
1516 	for (p = nfsd->nd_coalesce.lh_first; p;
1517 	     p = nfsd->nd_coalesce.lh_first) {
1518 	    LIST_REMOVE(p, nd_tq);
1519 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1520 	}
1521 }
1522 
1523 /*
1524  * nfs create service
1525  * now does a truncate to 0 length via. setattr if it already exists
1526  */
1527 int
1528 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1529 	     struct thread *td, struct mbuf **mrq)
1530 {
1531 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1532 	struct sockaddr *nam = nfsd->nd_nam;
1533 	caddr_t dpos = nfsd->nd_dpos;
1534 	struct ucred *cred = &nfsd->nd_cr;
1535 	struct nfs_fattr *fp;
1536 	struct vattr va, dirfor, diraft;
1537 	struct vattr *vap = &va;
1538 	struct nfsv2_sattr *sp;
1539 	u_int32_t *tl;
1540 	struct nlookupdata nd;
1541 	int32_t t1;
1542 	caddr_t bpos;
1543 	int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1544 	int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1545 	caddr_t cp;
1546 	char *cp2;
1547 	struct mbuf *mb, *mb2, *mreq;
1548 	struct vnode *dirp;
1549 	struct vnode *dvp;
1550 	struct vnode *vp;
1551 	nfsfh_t nfh;
1552 	fhandle_t *fhp;
1553 	u_quad_t tempsize;
1554 	u_char cverf[NFSX_V3CREATEVERF];
1555 
1556 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1557 #ifndef nolint
1558 	rdev = 0;
1559 #endif
1560 	nlookup_zero(&nd);
1561 	dirp = NULL;
1562 	dvp = NULL;
1563 	vp = NULL;
1564 
1565 	fhp = &nfh.fh_generic;
1566 	nfsm_srvmtofh(fhp);
1567 	nfsm_srvnamesiz(len);
1568 
1569 	/*
1570 	 * Call namei and do initial cleanup to get a few things
1571 	 * out of the way.  If we get an initial error we cleanup
1572 	 * and return here to avoid special-casing the invalid nd
1573 	 * structure through the rest of the case.  dirp may be
1574 	 * set even if an error occurs, but the nd structure will not
1575 	 * be valid at all if an error occurs so we have to invalidate it
1576 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1577 	 */
1578 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
1579 			  fhp, len, slp, nam, &md, &dpos, &dirp,
1580 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1581 	if (dirp) {
1582 		if (v3) {
1583 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1584 		} else {
1585 			vrele(dirp);
1586 			dirp = NULL;
1587 		}
1588 	}
1589 	if (error) {
1590 		nfsm_reply(NFSX_WCCDATA(v3));
1591 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1592 		error = 0;
1593 		goto nfsmout;
1594 	}
1595 
1596 	/*
1597 	 * No error.  Continue.  State:
1598 	 *
1599 	 *	dirp 		may be valid
1600 	 *	vp		may be valid or NULL if the target does not
1601 	 *			exist.
1602 	 *	dvp		is valid
1603 	 *
1604 	 * The error state is set through the code and we may also do some
1605 	 * opportunistic releasing of vnodes to avoid holding locks through
1606 	 * NFS I/O.  The cleanup at the end is a catch-all
1607 	 */
1608 
1609 	VATTR_NULL(vap);
1610 	if (v3) {
1611 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1612 		how = fxdr_unsigned(int, *tl);
1613 		switch (how) {
1614 		case NFSV3CREATE_GUARDED:
1615 			if (vp) {
1616 				error = EEXIST;
1617 				break;
1618 			}
1619 			/* fall through */
1620 		case NFSV3CREATE_UNCHECKED:
1621 			nfsm_srvsattr(vap);
1622 			break;
1623 		case NFSV3CREATE_EXCLUSIVE:
1624 			nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF);
1625 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1626 			exclusive_flag = 1;
1627 			break;
1628 		};
1629 		vap->va_type = VREG;
1630 	} else {
1631 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1632 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1633 		if (vap->va_type == VNON)
1634 			vap->va_type = VREG;
1635 		vap->va_mode = nfstov_mode(sp->sa_mode);
1636 		switch (vap->va_type) {
1637 		case VREG:
1638 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1639 			if (tsize != -1)
1640 				vap->va_size = (u_quad_t)tsize;
1641 			break;
1642 		case VCHR:
1643 		case VBLK:
1644 		case VFIFO:
1645 			rdev = fxdr_unsigned(long, sp->sa_size);
1646 			break;
1647 		default:
1648 			break;
1649 		};
1650 	}
1651 
1652 	/*
1653 	 * Iff doesn't exist, create it
1654 	 * otherwise just truncate to 0 length
1655 	 *   should I set the mode too ?
1656 	 *
1657 	 * The only possible error we can have at this point is EEXIST.
1658 	 * nd.ni_vp will also be non-NULL in that case.
1659 	 */
1660 	if (vp == NULL) {
1661 		if (vap->va_mode == (mode_t)VNOVAL)
1662 			vap->va_mode = 0;
1663 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1664 			vput(dvp);
1665 			dvp = NULL;
1666 			error = VOP_NCREATE(nd.nl_ncp, &vp, nd.nl_cred, vap);
1667 			if (error == 0) {
1668 				if (exclusive_flag) {
1669 					exclusive_flag = 0;
1670 					VATTR_NULL(vap);
1671 					bcopy(cverf, (caddr_t)&vap->va_atime,
1672 						NFSX_V3CREATEVERF);
1673 					error = VOP_SETATTR(vp, vap, cred);
1674 				}
1675 			}
1676 		} else if (
1677 			vap->va_type == VCHR ||
1678 			vap->va_type == VBLK ||
1679 			vap->va_type == VFIFO
1680 		) {
1681 			/*
1682 			 * Handle SysV FIFO node special cases.  All other
1683 			 * devices require super user to access.
1684 			 */
1685 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1686 				vap->va_type = VFIFO;
1687                         if (vap->va_type != VFIFO &&
1688                             (error = suser_cred(cred, 0))) {
1689 				goto nfsmreply0;
1690                         }
1691 			vap->va_rdev = rdev;
1692 
1693 			vput(dvp);
1694 			dvp = NULL;
1695 			error = VOP_NMKNOD(nd.nl_ncp, &vp, nd.nl_cred, vap);
1696 			if (error)
1697 				goto nfsmreply0;
1698 #if 0
1699 			/*
1700 			 * XXX what is this junk supposed to do ?
1701 			 */
1702 
1703 			vput(vp);
1704 			vp = NULL;
1705 
1706 			/*
1707 			 * release dvp prior to lookup
1708 			 */
1709 			vput(dvp);
1710 			dvp = NULL;
1711 
1712 			/*
1713 			 * Setup for lookup.
1714 			 *
1715 			 * Even though LOCKPARENT was cleared, ni_dvp may
1716 			 * be garbage.
1717 			 */
1718 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1719 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1720 			nd.ni_cnd.cn_td = td;
1721 			nd.ni_cnd.cn_cred = cred;
1722 
1723 			error = lookup(&nd);
1724 			nd.ni_dvp = NULL;
1725 
1726 			if (error != 0) {
1727 				nfsm_reply(0);
1728 				/* fall through on certain errors */
1729 			}
1730 			nfsrv_object_create(nd.ni_vp);
1731 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1732 				error = EINVAL;
1733 				goto nfsmreply0;
1734 			}
1735 #endif
1736 		} else {
1737 			error = ENXIO;
1738 		}
1739 	} else {
1740 		if (vap->va_size != -1) {
1741 			error = nfsrv_access(vp, VWRITE, cred,
1742 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1743 			if (!error) {
1744 				tempsize = vap->va_size;
1745 				VATTR_NULL(vap);
1746 				vap->va_size = tempsize;
1747 				error = VOP_SETATTR(vp, vap, cred);
1748 			}
1749 		}
1750 	}
1751 
1752 	if (!error) {
1753 		bzero((caddr_t)fhp, sizeof(nfh));
1754 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1755 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1756 		if (!error)
1757 			error = VOP_GETATTR(vp, vap);
1758 	}
1759 	if (v3) {
1760 		if (exclusive_flag && !error &&
1761 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1762 			error = EEXIST;
1763 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1764 		vrele(dirp);
1765 		dirp = NULL;
1766 	}
1767 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1768 	if (v3) {
1769 		if (!error) {
1770 			nfsm_srvpostop_fh(fhp);
1771 			nfsm_srvpostop_attr(0, vap);
1772 		}
1773 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1774 		error = 0;
1775 	} else {
1776 		nfsm_srvfhtom(fhp, v3);
1777 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1778 		nfsm_srvfillattr(vap, fp);
1779 	}
1780 	goto nfsmout;
1781 
1782 nfsmreply0:
1783 	nfsm_reply(0);
1784 	error = 0;
1785 	/* fall through */
1786 
1787 nfsmout:
1788 	if (dirp)
1789 		vrele(dirp);
1790 	nlookup_done(&nd);
1791 	if (dvp) {
1792 		if (dvp == vp)
1793 			vrele(dvp);
1794 		else
1795 			vput(dvp);
1796 	}
1797 	if (vp)
1798 		vput(vp);
1799 	return (error);
1800 }
1801 
1802 /*
1803  * nfs v3 mknod service
1804  */
1805 int
1806 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1807 	    struct thread *td, struct mbuf **mrq)
1808 {
1809 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1810 	struct sockaddr *nam = nfsd->nd_nam;
1811 	caddr_t dpos = nfsd->nd_dpos;
1812 	struct ucred *cred = &nfsd->nd_cr;
1813 	struct vattr va, dirfor, diraft;
1814 	struct vattr *vap = &va;
1815 	u_int32_t *tl;
1816 	struct nlookupdata nd;
1817 	int32_t t1;
1818 	caddr_t bpos;
1819 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1820 	u_int32_t major, minor;
1821 	enum vtype vtyp;
1822 	char *cp2;
1823 	struct mbuf *mb, *mb2, *mreq;
1824 	struct vnode *dirp;
1825 	struct vnode *dvp;
1826 	struct vnode *vp;
1827 	nfsfh_t nfh;
1828 	fhandle_t *fhp;
1829 
1830 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1831 	nlookup_zero(&nd);
1832 	dirp = NULL;
1833 	dvp = NULL;
1834 	vp = NULL;
1835 
1836 	fhp = &nfh.fh_generic;
1837 	nfsm_srvmtofh(fhp);
1838 	nfsm_srvnamesiz(len);
1839 
1840 	/*
1841 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1842 	 * is not valid.  However, nfsm_*() routines may still jump to
1843 	 * nfsmout.
1844 	 */
1845 
1846 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
1847 			  fhp, len, slp, nam, &md, &dpos, &dirp,
1848 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1849 	if (dirp)
1850 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1851 	if (error) {
1852 		nfsm_reply(NFSX_WCCDATA(1));
1853 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1854 		error = 0;
1855 		goto nfsmout;
1856 	}
1857 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1858 	vtyp = nfsv3tov_type(*tl);
1859 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1860 		error = NFSERR_BADTYPE;
1861 		goto out;
1862 	}
1863 	VATTR_NULL(vap);
1864 	nfsm_srvsattr(vap);
1865 	if (vtyp == VCHR || vtyp == VBLK) {
1866 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1867 		major = fxdr_unsigned(u_int32_t, *tl++);
1868 		minor = fxdr_unsigned(u_int32_t, *tl);
1869 		vap->va_rdev = makeudev(major, minor);
1870 	}
1871 
1872 	/*
1873 	 * Iff doesn't exist, create it.
1874 	 */
1875 	if (vp) {
1876 		error = EEXIST;
1877 		goto out;
1878 	}
1879 	vap->va_type = vtyp;
1880 	if (vap->va_mode == (mode_t)VNOVAL)
1881 		vap->va_mode = 0;
1882 	if (vtyp == VSOCK) {
1883 		error = VOP_NCREATE(nd.nl_ncp, &vp, nd.nl_cred, vap);
1884 	} else {
1885 		if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
1886 			goto out;
1887 
1888 		error = VOP_NMKNOD(nd.nl_ncp, &vp, nd.nl_cred, vap);
1889 		if (error)
1890 			goto out;
1891 
1892 #if 0
1893 		vput(vp);
1894 		vp = NULL;
1895 
1896 		/*
1897 		 * Release dvp prior to lookup
1898 		 */
1899 		vput(dvp);
1900 		dvp = NULL;
1901 
1902 		/*
1903 		 * XXX what is this stuff for?
1904 		 */
1905 		KKASSERT(td->td_proc);
1906 		nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1907 		nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1908 		nd.ni_cnd.cn_td = td;
1909 		nd.ni_cnd.cn_cred = td->td_proc->p_ucred;
1910 
1911 		error = lookup(&nd);
1912 		nd.ni_dvp = NULL;
1913 
1914 		if (error)
1915 			goto out;
1916 		if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK)
1917 			error = EINVAL;
1918 #endif
1919 	}
1920 
1921 	/*
1922 	 * send response, cleanup, return.
1923 	 */
1924 out:
1925 	nlookup_done(&nd);
1926 	if (dvp) {
1927 		if (dvp == vp)
1928 			vrele(dvp);
1929 		else
1930 			vput(dvp);
1931 		dvp = NULL;
1932 	}
1933 	if (!error) {
1934 		bzero((caddr_t)fhp, sizeof(nfh));
1935 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1936 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1937 		if (!error)
1938 			error = VOP_GETATTR(vp, vap);
1939 	}
1940 	if (vp) {
1941 		vput(vp);
1942 		vp = NULL;
1943 	}
1944 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1945 	if (dirp) {
1946 		vrele(dirp);
1947 		dirp = NULL;
1948 	}
1949 	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
1950 	if (!error) {
1951 		nfsm_srvpostop_fh(fhp);
1952 		nfsm_srvpostop_attr(0, vap);
1953 	}
1954 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1955 	return (0);
1956 nfsmout:
1957 	if (dirp)
1958 		vrele(dirp);
1959 	nlookup_done(&nd);
1960 	if (dvp) {
1961 		if (dvp == vp)
1962 			vrele(dvp);
1963 		else
1964 			vput(dvp);
1965 	}
1966 	if (vp)
1967 		vput(vp);
1968 	return (error);
1969 }
1970 
1971 /*
1972  * nfs remove service
1973  */
1974 int
1975 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1976 	     struct thread *td, struct mbuf **mrq)
1977 {
1978 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1979 	struct sockaddr *nam = nfsd->nd_nam;
1980 	caddr_t dpos = nfsd->nd_dpos;
1981 	struct ucred *cred = &nfsd->nd_cr;
1982 	struct nlookupdata nd;
1983 	u_int32_t *tl;
1984 	int32_t t1;
1985 	caddr_t bpos;
1986 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1987 	int v3 = (nfsd->nd_flag & ND_NFSV3);
1988 	char *cp2;
1989 	struct mbuf *mb, *mreq;
1990 	struct vnode *dirp;
1991 	struct vnode *dvp;
1992 	struct vnode *vp;
1993 	struct vattr dirfor, diraft;
1994 	nfsfh_t nfh;
1995 	fhandle_t *fhp;
1996 
1997 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1998 	nlookup_zero(&nd);
1999 	dirp = NULL;
2000 	dvp = NULL;
2001 	vp = NULL;
2002 
2003 	fhp = &nfh.fh_generic;
2004 	nfsm_srvmtofh(fhp);
2005 	nfsm_srvnamesiz(len);
2006 
2007 	error = nfs_namei(&nd, cred, NAMEI_DELETE, &dvp, &vp,
2008 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2009 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2010 	if (dirp) {
2011 		if (v3)
2012 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2013 	}
2014 	if (error == 0) {
2015 		if (vp->v_type == VDIR) {
2016 			error = EPERM;		/* POSIX */
2017 			goto out;
2018 		}
2019 		/*
2020 		 * The root of a mounted filesystem cannot be deleted.
2021 		 */
2022 		if (vp->v_flag & VROOT) {
2023 			error = EBUSY;
2024 			goto out;
2025 		}
2026 out:
2027 		if (!error) {
2028 			if (dvp) {
2029 				if (dvp == vp)
2030 					vrele(dvp);
2031 				else
2032 					vput(dvp);
2033 				dvp = NULL;
2034 			}
2035 			if (vp) {
2036 				vput(vp);
2037 				vp = NULL;
2038 			}
2039 			error = VOP_NREMOVE(nd.nl_ncp, nd.nl_cred);
2040 		}
2041 	}
2042 	if (dirp && v3)
2043 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2044 	nfsm_reply(NFSX_WCCDATA(v3));
2045 	if (v3) {
2046 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2047 		error = 0;
2048 	}
2049 nfsmout:
2050 	nlookup_done(&nd);
2051 	if (dirp)
2052 		vrele(dirp);
2053 	if (dvp) {
2054 		if (dvp == vp)
2055 			vrele(dvp);
2056 		else
2057 			vput(dvp);
2058 	}
2059 	if (vp)
2060 		vput(vp);
2061 	return(error);
2062 }
2063 
2064 /*
2065  * nfs rename service
2066  */
2067 int
2068 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2069 	     struct thread *td, struct mbuf **mrq)
2070 {
2071 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2072 	struct sockaddr *nam = nfsd->nd_nam;
2073 	caddr_t dpos = nfsd->nd_dpos;
2074 	struct ucred *cred = &nfsd->nd_cr;
2075 	u_int32_t *tl;
2076 	int32_t t1;
2077 	caddr_t bpos;
2078 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2079 	int tdirfor_ret = 1, tdiraft_ret = 1;
2080 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2081 	char *cp2;
2082 	struct mbuf *mb, *mreq;
2083 	struct nlookupdata fromnd, tond;
2084 	struct vnode *fvp, *fdirp;
2085 	struct vnode *tvp, *tdirp;
2086 	struct namecache *ncp;
2087 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2088 	nfsfh_t fnfh, tnfh;
2089 	fhandle_t *ffhp, *tfhp;
2090 	uid_t saved_uid;
2091 
2092 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2093 #ifndef nolint
2094 	fvp = (struct vnode *)0;
2095 #endif
2096 	ffhp = &fnfh.fh_generic;
2097 	tfhp = &tnfh.fh_generic;
2098 
2099 	/*
2100 	 * Clear fields incase goto nfsmout occurs from macro.
2101 	 */
2102 
2103 	nlookup_zero(&fromnd);
2104 	nlookup_zero(&tond);
2105 	fdirp = NULL;
2106 	tdirp = NULL;
2107 
2108 	nfsm_srvmtofh(ffhp);
2109 	nfsm_srvnamesiz(len);
2110 	/*
2111 	 * Remember our original uid so that we can reset cr_uid before
2112 	 * the second nfs_namei() call, in case it is remapped.
2113 	 */
2114 	saved_uid = cred->cr_uid;
2115 	error = nfs_namei(&fromnd, cred, NAMEI_DELETE, NULL, NULL,
2116 			  ffhp, len, slp, nam, &md, &dpos, &fdirp,
2117 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2118 	if (fdirp) {
2119 		if (v3)
2120 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2121 	}
2122 	if (error) {
2123 		nfsm_reply(2 * NFSX_WCCDATA(v3));
2124 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2125 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2126 		error = 0;
2127 		goto nfsmout;
2128 	}
2129 
2130 	/*
2131 	 * We have to unlock the from ncp before we can safely lookup
2132 	 * the target ncp.
2133 	 */
2134 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2135 	cache_unlock(fromnd.nl_ncp);
2136 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2137 	nfsm_srvmtofh(tfhp);
2138 	nfsm_strsiz(len2, NFS_MAXNAMLEN);
2139 	cred->cr_uid = saved_uid;
2140 
2141 	error = nfs_namei(&tond, cred, NAMEI_RENAME, NULL, NULL,
2142 			  tfhp, len2, slp, nam, &md, &dpos, &tdirp,
2143 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2144 	if (tdirp) {
2145 		if (v3)
2146 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2147 	}
2148 	if (error)
2149 		goto out1;
2150 
2151 	/*
2152 	 * relock the source
2153 	 */
2154 	if (cache_lock_nonblock(fromnd.nl_ncp) == 0) {
2155 		cache_resolve(fromnd.nl_ncp, fromnd.nl_cred);
2156 	} else if (fromnd.nl_ncp > tond.nl_ncp) {
2157 		cache_lock(fromnd.nl_ncp);
2158 		cache_resolve(fromnd.nl_ncp, fromnd.nl_cred);
2159 	} else {
2160 		cache_unlock(tond.nl_ncp);
2161 		cache_lock(fromnd.nl_ncp);
2162 		cache_resolve(fromnd.nl_ncp, fromnd.nl_cred);
2163 		cache_lock(tond.nl_ncp);
2164 		cache_resolve(tond.nl_ncp, tond.nl_cred);
2165 	}
2166 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2167 
2168 	tvp = tond.nl_ncp->nc_vp;
2169 	fvp = fromnd.nl_ncp->nc_vp;
2170 
2171 	if (tvp != NULL) {
2172 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2173 			if (v3)
2174 				error = EEXIST;
2175 			else
2176 				error = EISDIR;
2177 			goto out;
2178 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2179 			if (v3)
2180 				error = EEXIST;
2181 			else
2182 				error = ENOTDIR;
2183 			goto out;
2184 		}
2185 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
2186 			if (v3)
2187 				error = EXDEV;
2188 			else
2189 				error = ENOTEMPTY;
2190 			goto out;
2191 		}
2192 	}
2193 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
2194 		if (v3)
2195 			error = EXDEV;
2196 		else
2197 			error = ENOTEMPTY;
2198 		goto out;
2199 	}
2200 	if (fromnd.nl_ncp->nc_mount != tond.nl_ncp->nc_mount) {
2201 		if (v3)
2202 			error = EXDEV;
2203 		else
2204 			error = ENOTEMPTY;
2205 		goto out;
2206 	}
2207 	if (fromnd.nl_ncp == tond.nl_ncp->nc_parent) {
2208 		if (v3)
2209 			error = EINVAL;
2210 		else
2211 			error = ENOTEMPTY;
2212 	}
2213 
2214 	/*
2215 	 * You cannot rename a source into itself or a subdirectory of itself.
2216 	 * We check this by travsering the target directory upwards looking
2217 	 * for a match against the source.
2218 	 */
2219 	if (error == 0) {
2220 		for (ncp = tond.nl_ncp; ncp; ncp = ncp->nc_parent) {
2221 			if (fromnd.nl_ncp == ncp) {
2222 				error = EINVAL;
2223 				break;
2224 			}
2225 		}
2226 	}
2227 
2228 	/*
2229 	 * If source is the same as the destination (that is the
2230 	 * same vnode with the same name in the same directory),
2231 	 * then there is nothing to do.
2232 	 */
2233 	if (fromnd.nl_ncp == tond.nl_ncp)
2234 		error = -1;
2235 out:
2236 	if (!error) {
2237 		/*
2238 		 * The VOP_NRENAME function releases all vnode references &
2239 		 * locks prior to returning so we need to clear the pointers
2240 		 * to bypass cleanup code later on.
2241 		 */
2242 		error = VOP_NRENAME(fromnd.nl_ncp, tond.nl_ncp, tond.nl_cred);
2243 	} else {
2244 		if (error == -1)
2245 			error = 0;
2246 	}
2247 	/* fall through */
2248 
2249 out1:
2250 	if (fdirp)
2251 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2252 	if (tdirp)
2253 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2254 	nfsm_reply(2 * NFSX_WCCDATA(v3));
2255 	if (v3) {
2256 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2257 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2258 	}
2259 	error = 0;
2260 	/* fall through */
2261 
2262 nfsmout:
2263 	if (tdirp)
2264 		vrele(tdirp);
2265 	nlookup_done(&tond);
2266 	if (fdirp)
2267 		vrele(fdirp);
2268 	nlookup_done(&fromnd);
2269 	return (error);
2270 }
2271 
2272 /*
2273  * nfs link service
2274  */
2275 int
2276 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2277 	   struct thread *td, struct mbuf **mrq)
2278 {
2279 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2280 	struct sockaddr *nam = nfsd->nd_nam;
2281 	caddr_t dpos = nfsd->nd_dpos;
2282 	struct ucred *cred = &nfsd->nd_cr;
2283 	struct nlookupdata nd;
2284 	u_int32_t *tl;
2285 	int32_t t1;
2286 	caddr_t bpos;
2287 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2288 	int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2289 	char *cp2;
2290 	struct mbuf *mb, *mreq;
2291 	struct vnode *dirp;
2292 	struct vnode *dvp;
2293 	struct vnode *vp;
2294 	struct vnode *xp;
2295 	struct vattr dirfor, diraft, at;
2296 	nfsfh_t nfh, dnfh;
2297 	fhandle_t *fhp, *dfhp;
2298 
2299 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2300 	nlookup_zero(&nd);
2301 	dirp = dvp = vp = xp = NULL;
2302 
2303 	fhp = &nfh.fh_generic;
2304 	dfhp = &dnfh.fh_generic;
2305 	nfsm_srvmtofh(fhp);
2306 	nfsm_srvmtofh(dfhp);
2307 	nfsm_srvnamesiz(len);
2308 
2309 	error = nfsrv_fhtovp(fhp, FALSE, &xp, cred, slp, nam,
2310 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2311 	if (error) {
2312 		nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2313 		nfsm_srvpostop_attr(getret, &at);
2314 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2315 		xp = NULL;
2316 		error = 0;
2317 		goto nfsmout;
2318 	}
2319 	if (xp->v_type == VDIR) {
2320 		error = EPERM;		/* POSIX */
2321 		goto out1;
2322 	}
2323 
2324 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
2325 			  dfhp, len, slp, nam, &md, &dpos, &dirp,
2326 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2327 	if (dirp) {
2328 		if (v3)
2329 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2330 	}
2331 	if (error)
2332 		goto out1;
2333 
2334 	if (vp != NULL) {
2335 		error = EEXIST;
2336 		goto out;
2337 	}
2338 	if (xp->v_mount != dvp->v_mount)
2339 		error = EXDEV;
2340 out:
2341 	if (!error) {
2342 		error = VOP_NLINK(nd.nl_ncp, xp, nd.nl_cred);
2343 	}
2344 	/* fall through */
2345 
2346 out1:
2347 	if (v3)
2348 		getret = VOP_GETATTR(xp, &at);
2349 	if (dirp)
2350 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2351 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2352 	if (v3) {
2353 		nfsm_srvpostop_attr(getret, &at);
2354 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2355 		error = 0;
2356 	}
2357 	/* fall through */
2358 
2359 nfsmout:
2360 	nlookup_done(&nd);
2361 	if (dirp)
2362 		vrele(dirp);
2363 	if (xp)
2364 		vrele(xp);
2365 	if (dvp) {
2366 		if (dvp == vp)
2367 			vrele(dvp);
2368 		else
2369 			vput(dvp);
2370 	}
2371 	if (vp)
2372 		vput(vp);
2373 	return(error);
2374 }
2375 
2376 /*
2377  * nfs symbolic link service
2378  */
2379 int
2380 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2381 	      struct thread *td, struct mbuf **mrq)
2382 {
2383 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2384 	struct sockaddr *nam = nfsd->nd_nam;
2385 	caddr_t dpos = nfsd->nd_dpos;
2386 	struct ucred *cred = &nfsd->nd_cr;
2387 	struct vattr va, dirfor, diraft;
2388 	struct nlookupdata nd;
2389 	struct vattr *vap = &va;
2390 	u_int32_t *tl;
2391 	int32_t t1;
2392 	struct nfsv2_sattr *sp;
2393 	char *bpos, *pathcp = (char *)0, *cp2;
2394 	struct uio io;
2395 	struct iovec iv;
2396 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2397 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2398 	struct mbuf *mb, *mreq, *mb2;
2399 	struct vnode *dirp;
2400 	struct vnode *vp;
2401 	nfsfh_t nfh;
2402 	fhandle_t *fhp;
2403 
2404 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2405 	nlookup_zero(&nd);
2406 	dirp = vp = NULL;
2407 
2408 	fhp = &nfh.fh_generic;
2409 	nfsm_srvmtofh(fhp);
2410 	nfsm_srvnamesiz(len);
2411 
2412 	error = nfs_namei(&nd, cred, NAMEI_CREATE, NULL, &vp,
2413 			fhp, len, slp, nam, &md, &dpos, &dirp,
2414 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2415 	if (dirp) {
2416 		if (v3)
2417 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2418 	}
2419 	if (error)
2420 		goto out;
2421 
2422 	VATTR_NULL(vap);
2423 	if (v3)
2424 		nfsm_srvsattr(vap);
2425 	nfsm_strsiz(len2, NFS_MAXPATHLEN);
2426 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2427 	iv.iov_base = pathcp;
2428 	iv.iov_len = len2;
2429 	io.uio_resid = len2;
2430 	io.uio_offset = 0;
2431 	io.uio_iov = &iv;
2432 	io.uio_iovcnt = 1;
2433 	io.uio_segflg = UIO_SYSSPACE;
2434 	io.uio_rw = UIO_READ;
2435 	io.uio_td = NULL;
2436 	nfsm_mtouio(&io, len2);
2437 	if (!v3) {
2438 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2439 		vap->va_mode = nfstov_mode(sp->sa_mode);
2440 	}
2441 	*(pathcp + len2) = '\0';
2442 	if (vp) {
2443 		error = EEXIST;
2444 		goto out;
2445 	}
2446 
2447 	if (vap->va_mode == (mode_t)VNOVAL)
2448 		vap->va_mode = 0;
2449 	error = VOP_NSYMLINK(nd.nl_ncp, &vp, nd.nl_cred, vap, pathcp);
2450 	if (error == 0) {
2451 		bzero((caddr_t)fhp, sizeof(nfh));
2452 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2453 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2454 		if (!error)
2455 			error = VOP_GETATTR(vp, vap);
2456 	}
2457 
2458 #if 0
2459 	/*
2460 	 * We have a vp in hand from the new API call, we do not have to
2461 	 * look it up again.
2462 	 */
2463 	if (error == 0) {
2464 	    if (v3) {
2465 		/*
2466 		 * Issue lookup.  Leave SAVESTART set so we can easily free
2467 		 * the name buffer later on.
2468 		 *
2469 		 * since LOCKPARENT is not set, ni_dvp will be garbage on
2470 		 * return whether an error occurs or not.
2471 		 */
2472 		nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
2473 		nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT | CNP_FOLLOW);
2474 		nd.ni_cnd.cn_td = td;
2475 		nd.ni_cnd.cn_cred = cred;
2476 
2477 		error = lookup(&nd);
2478 		nd.ni_dvp = NULL;
2479 
2480 		if (error == 0) {
2481 			bzero((caddr_t)fhp, sizeof(nfh));
2482 			fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2483 			error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2484 			if (!error)
2485 				error = VOP_GETATTR(nd.ni_vp, vap);
2486 			vput(nd.ni_vp);
2487 			nd.ni_vp = NULL;
2488 		}
2489 	    }
2490 	}
2491 #endif
2492 out:
2493 	if (vp) {
2494 		vput(vp);
2495 		vp = NULL;
2496 	}
2497 	if (pathcp) {
2498 		FREE(pathcp, M_TEMP);
2499 		pathcp = NULL;
2500 	}
2501 	if (dirp) {
2502 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2503 		vrele(dirp);
2504 		dirp = NULL;
2505 	}
2506 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2507 	if (v3) {
2508 		if (!error) {
2509 			nfsm_srvpostop_fh(fhp);
2510 			nfsm_srvpostop_attr(0, vap);
2511 		}
2512 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2513 	}
2514 	error = 0;
2515 	/* fall through */
2516 
2517 nfsmout:
2518 	nlookup_done(&nd);
2519 	if (vp)
2520 		vput(vp);
2521 	if (dirp)
2522 		vrele(dirp);
2523 	if (pathcp)
2524 		FREE(pathcp, M_TEMP);
2525 	return (error);
2526 }
2527 
2528 /*
2529  * nfs mkdir service
2530  */
2531 int
2532 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2533 	    struct thread *td, struct mbuf **mrq)
2534 {
2535 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2536 	struct sockaddr *nam = nfsd->nd_nam;
2537 	caddr_t dpos = nfsd->nd_dpos;
2538 	struct ucred *cred = &nfsd->nd_cr;
2539 	struct vattr va, dirfor, diraft;
2540 	struct vattr *vap = &va;
2541 	struct nfs_fattr *fp;
2542 	struct nlookupdata nd;
2543 	caddr_t cp;
2544 	u_int32_t *tl;
2545 	int32_t t1;
2546 	caddr_t bpos;
2547 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2548 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2549 	char *cp2;
2550 	struct mbuf *mb, *mb2, *mreq;
2551 	struct vnode *dirp;
2552 	struct vnode *vp;
2553 	nfsfh_t nfh;
2554 	fhandle_t *fhp;
2555 
2556 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2557 	nlookup_zero(&nd);
2558 	dirp = NULL;
2559 	vp = NULL;
2560 
2561 	fhp = &nfh.fh_generic;
2562 	nfsm_srvmtofh(fhp);
2563 	nfsm_srvnamesiz(len);
2564 
2565 	error = nfs_namei(&nd, cred, NAMEI_CREATE, NULL, &vp,
2566 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2567 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2568 	if (dirp) {
2569 		if (v3)
2570 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2571 	}
2572 	if (error) {
2573 		nfsm_reply(NFSX_WCCDATA(v3));
2574 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2575 		error = 0;
2576 		goto nfsmout;
2577 	}
2578 	VATTR_NULL(vap);
2579 	if (v3) {
2580 		nfsm_srvsattr(vap);
2581 	} else {
2582 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2583 		vap->va_mode = nfstov_mode(*tl++);
2584 	}
2585 
2586 	/*
2587 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2588 	 * nd.ni_vp, if it exists, is referenced but not locked.
2589 	 */
2590 
2591 	vap->va_type = VDIR;
2592 	if (vp != NULL) {
2593 		error = EEXIST;
2594 		goto out;
2595 	}
2596 
2597 	/*
2598 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2599 	 * component is freed by the VOP call.  This will fill-in
2600 	 * nd.ni_vp, reference, and exclusively lock it.
2601 	 */
2602 	if (vap->va_mode == (mode_t)VNOVAL)
2603 		vap->va_mode = 0;
2604 	error = VOP_NMKDIR(nd.nl_ncp, &vp, nd.nl_cred, vap);
2605 
2606 	if (error == 0) {
2607 		bzero((caddr_t)fhp, sizeof(nfh));
2608 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2609 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2610 		if (error == 0)
2611 			error = VOP_GETATTR(vp, vap);
2612 	}
2613 out:
2614 	if (dirp)
2615 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2616 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2617 	if (v3) {
2618 		if (!error) {
2619 			nfsm_srvpostop_fh(fhp);
2620 			nfsm_srvpostop_attr(0, vap);
2621 		}
2622 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2623 	} else {
2624 		nfsm_srvfhtom(fhp, v3);
2625 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
2626 		nfsm_srvfillattr(vap, fp);
2627 	}
2628 	error = 0;
2629 	/* fall through */
2630 
2631 nfsmout:
2632 	nlookup_done(&nd);
2633 	if (dirp)
2634 		vrele(dirp);
2635 	if (vp)
2636 		vput(vp);
2637 	return (error);
2638 }
2639 
2640 /*
2641  * nfs rmdir service
2642  */
2643 int
2644 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2645 	    struct thread *td, struct mbuf **mrq)
2646 {
2647 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2648 	struct sockaddr *nam = nfsd->nd_nam;
2649 	caddr_t dpos = nfsd->nd_dpos;
2650 	struct ucred *cred = &nfsd->nd_cr;
2651 	u_int32_t *tl;
2652 	int32_t t1;
2653 	caddr_t bpos;
2654 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2655 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2656 	char *cp2;
2657 	struct mbuf *mb, *mreq;
2658 	struct vnode *dirp;
2659 	struct vnode *vp;
2660 	struct vattr dirfor, diraft;
2661 	nfsfh_t nfh;
2662 	fhandle_t *fhp;
2663 	struct nlookupdata nd;
2664 
2665 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2666 	nlookup_zero(&nd);
2667 	dirp = NULL;
2668 	vp = NULL;
2669 
2670 	fhp = &nfh.fh_generic;
2671 	nfsm_srvmtofh(fhp);
2672 	nfsm_srvnamesiz(len);
2673 
2674 	error = nfs_namei(&nd, cred, NAMEI_DELETE, NULL, &vp,
2675 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2676 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2677 	if (dirp) {
2678 		if (v3)
2679 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2680 	}
2681 	if (error) {
2682 		nfsm_reply(NFSX_WCCDATA(v3));
2683 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2684 		error = 0;
2685 		goto nfsmout;
2686 	}
2687 	if (vp->v_type != VDIR) {
2688 		error = ENOTDIR;
2689 		goto out;
2690 	}
2691 
2692 	/*
2693 	 * The root of a mounted filesystem cannot be deleted.
2694 	 */
2695 	if (vp->v_flag & VROOT)
2696 		error = EBUSY;
2697 out:
2698 	/*
2699 	 * Issue or abort op.  Since SAVESTART is not set, path name
2700 	 * component is freed by the VOP after either.
2701 	 */
2702 	if (!error) {
2703 		vput(vp);
2704 		vp = NULL;
2705 		error = VOP_NRMDIR(nd.nl_ncp, nd.nl_cred);
2706 	}
2707 	nlookup_done(&nd);
2708 
2709 	if (dirp)
2710 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2711 	nfsm_reply(NFSX_WCCDATA(v3));
2712 	if (v3) {
2713 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2714 		error = 0;
2715 	}
2716 	/* fall through */
2717 
2718 nfsmout:
2719 	nlookup_done(&nd);
2720 	if (dirp)
2721 		vrele(dirp);
2722 	if (vp)
2723 		vput(vp);
2724 	return(error);
2725 }
2726 
2727 /*
2728  * nfs readdir service
2729  * - mallocs what it thinks is enough to read
2730  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2731  * - calls VOP_READDIR()
2732  * - loops around building the reply
2733  *	if the output generated exceeds count break out of loop
2734  *	The nfsm_clget macro is used here so that the reply will be packed
2735  *	tightly in mbuf clusters.
2736  * - it only knows that it has encountered eof when the VOP_READDIR()
2737  *	reads nothing
2738  * - as such one readdir rpc will return eof false although you are there
2739  *	and then the next will return eof
2740  * - it trims out records with d_fileno == 0
2741  *	this doesn't matter for Unix clients, but they might confuse clients
2742  *	for other os'.
2743  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2744  *	than requested, but this may not apply to all filesystems. For
2745  *	example, client NFS does not { although it is never remote mounted
2746  *	anyhow }
2747  *     The alternate call nfsrv_readdirplus() does lookups as well.
2748  * PS: The NFS protocol spec. does not clarify what the "count" byte
2749  *	argument is a count of.. just name strings and file id's or the
2750  *	entire reply rpc or ...
2751  *	I tried just file name and id sizes and it confused the Sun client,
2752  *	so I am using the full rpc size now. The "paranoia.." comment refers
2753  *	to including the status longwords that are not a part of the dir.
2754  *	"entry" structures, but are in the rpc.
2755  */
2756 struct flrep {
2757 	nfsuint64	fl_off;
2758 	u_int32_t	fl_postopok;
2759 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2760 	u_int32_t	fl_fhok;
2761 	u_int32_t	fl_fhsize;
2762 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2763 };
2764 
2765 int
2766 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2767 	      struct thread *td, struct mbuf **mrq)
2768 {
2769 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2770 	struct sockaddr *nam = nfsd->nd_nam;
2771 	caddr_t dpos = nfsd->nd_dpos;
2772 	struct ucred *cred = &nfsd->nd_cr;
2773 	char *bp, *be;
2774 	struct mbuf *mp;
2775 	struct dirent *dp;
2776 	caddr_t cp;
2777 	u_int32_t *tl;
2778 	int32_t t1;
2779 	caddr_t bpos;
2780 	struct mbuf *mb, *mb2, *mreq, *mp2;
2781 	char *cpos, *cend, *cp2, *rbuf;
2782 	struct vnode *vp = NULL;
2783 	struct vattr at;
2784 	nfsfh_t nfh;
2785 	fhandle_t *fhp;
2786 	struct uio io;
2787 	struct iovec iv;
2788 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2789 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2790 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2791 	u_quad_t off, toff, verf;
2792 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
2793 
2794 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2795 	fhp = &nfh.fh_generic;
2796 	nfsm_srvmtofh(fhp);
2797 	if (v3) {
2798 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2799 		toff = fxdr_hyper(tl);
2800 		tl += 2;
2801 		verf = fxdr_hyper(tl);
2802 		tl += 2;
2803 	} else {
2804 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2805 		toff = fxdr_unsigned(u_quad_t, *tl++);
2806 		verf = 0;	/* shut up gcc */
2807 	}
2808 	off = toff;
2809 	cnt = fxdr_unsigned(int, *tl);
2810 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2811 	xfer = NFS_SRVMAXDATA(nfsd);
2812 	if (cnt > xfer)
2813 		cnt = xfer;
2814 	if (siz > xfer)
2815 		siz = xfer;
2816 	fullsiz = siz;
2817 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
2818 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2819 	if (!error && vp->v_type != VDIR) {
2820 		error = ENOTDIR;
2821 		vput(vp);
2822 		vp = NULL;
2823 	}
2824 	if (error) {
2825 		nfsm_reply(NFSX_UNSIGNED);
2826 		nfsm_srvpostop_attr(getret, &at);
2827 		error = 0;
2828 		goto nfsmout;
2829 	}
2830 
2831 	/*
2832 	 * Obtain lock on vnode for this section of the code
2833 	 */
2834 
2835 	if (v3) {
2836 		error = getret = VOP_GETATTR(vp, &at);
2837 #if 0
2838 		/*
2839 		 * XXX This check may be too strict for Solaris 2.5 clients.
2840 		 */
2841 		if (!error && toff && verf && verf != at.va_filerev)
2842 			error = NFSERR_BAD_COOKIE;
2843 #endif
2844 	}
2845 	if (!error)
2846 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
2847 	if (error) {
2848 		vput(vp);
2849 		vp = NULL;
2850 		nfsm_reply(NFSX_POSTOPATTR(v3));
2851 		nfsm_srvpostop_attr(getret, &at);
2852 		error = 0;
2853 		goto nfsmout;
2854 	}
2855 	VOP_UNLOCK(vp, 0);
2856 
2857 	/*
2858 	 * end section.  Allocate rbuf and continue
2859 	 */
2860 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2861 again:
2862 	iv.iov_base = rbuf;
2863 	iv.iov_len = fullsiz;
2864 	io.uio_iov = &iv;
2865 	io.uio_iovcnt = 1;
2866 	io.uio_offset = (off_t)off;
2867 	io.uio_resid = fullsiz;
2868 	io.uio_segflg = UIO_SYSSPACE;
2869 	io.uio_rw = UIO_READ;
2870 	io.uio_td = NULL;
2871 	eofflag = 0;
2872 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2873 	if (cookies) {
2874 		free((caddr_t)cookies, M_TEMP);
2875 		cookies = NULL;
2876 	}
2877 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2878 	off = (off_t)io.uio_offset;
2879 	if (!cookies && !error)
2880 		error = NFSERR_PERM;
2881 	if (v3) {
2882 		getret = VOP_GETATTR(vp, &at);
2883 		if (!error)
2884 			error = getret;
2885 	}
2886 	VOP_UNLOCK(vp, 0);
2887 	if (error) {
2888 		vrele(vp);
2889 		vp = NULL;
2890 		free((caddr_t)rbuf, M_TEMP);
2891 		if (cookies)
2892 			free((caddr_t)cookies, M_TEMP);
2893 		nfsm_reply(NFSX_POSTOPATTR(v3));
2894 		nfsm_srvpostop_attr(getret, &at);
2895 		error = 0;
2896 		goto nfsmout;
2897 	}
2898 	if (io.uio_resid) {
2899 		siz -= io.uio_resid;
2900 
2901 		/*
2902 		 * If nothing read, return eof
2903 		 * rpc reply
2904 		 */
2905 		if (siz == 0) {
2906 			vrele(vp);
2907 			vp = NULL;
2908 			nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
2909 				2 * NFSX_UNSIGNED);
2910 			if (v3) {
2911 				nfsm_srvpostop_attr(getret, &at);
2912 				nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2913 				txdr_hyper(at.va_filerev, tl);
2914 				tl += 2;
2915 			} else
2916 				nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2917 			*tl++ = nfs_false;
2918 			*tl = nfs_true;
2919 			FREE((caddr_t)rbuf, M_TEMP);
2920 			FREE((caddr_t)cookies, M_TEMP);
2921 			error = 0;
2922 			goto nfsmout;
2923 		}
2924 	}
2925 
2926 	/*
2927 	 * Check for degenerate cases of nothing useful read.
2928 	 * If so go try again
2929 	 */
2930 	cpos = rbuf;
2931 	cend = rbuf + siz;
2932 	dp = (struct dirent *)cpos;
2933 	cookiep = cookies;
2934 	/*
2935 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
2936 	 * directory offset up to a block boundary, so it is necessary to
2937 	 * skip over the records that preceed the requested offset. This
2938 	 * requires the assumption that file offset cookies monotonically
2939 	 * increase.
2940 	 */
2941 	while (cpos < cend && ncookies > 0 &&
2942 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
2943 		 ((u_quad_t)(*cookiep)) <= toff)) {
2944 		dp = _DIRENT_NEXT(dp);
2945 		cpos = (char *)dp;
2946 		cookiep++;
2947 		ncookies--;
2948 	}
2949 	if (cpos >= cend || ncookies == 0) {
2950 		toff = off;
2951 		siz = fullsiz;
2952 		goto again;
2953 	}
2954 
2955 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
2956 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
2957 	if (v3) {
2958 		nfsm_srvpostop_attr(getret, &at);
2959 		nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2960 		txdr_hyper(at.va_filerev, tl);
2961 	}
2962 	mp = mp2 = mb;
2963 	bp = bpos;
2964 	be = bp + M_TRAILINGSPACE(mp);
2965 
2966 	/* Loop through the records and build reply */
2967 	while (cpos < cend && ncookies > 0) {
2968 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
2969 			nlen = dp->d_namlen;
2970 			rem = nfsm_rndup(nlen) - nlen;
2971 			len += (4 * NFSX_UNSIGNED + nlen + rem);
2972 			if (v3)
2973 				len += 2 * NFSX_UNSIGNED;
2974 			if (len > cnt) {
2975 				eofflag = 0;
2976 				break;
2977 			}
2978 			/*
2979 			 * Build the directory record xdr from
2980 			 * the dirent entry.
2981 			 */
2982 			nfsm_clget;
2983 			*tl = nfs_true;
2984 			bp += NFSX_UNSIGNED;
2985 			if (v3) {
2986 				nfsm_clget;
2987 				*tl = 0;
2988 				bp += NFSX_UNSIGNED;
2989 			}
2990 			nfsm_clget;
2991 			*tl = txdr_unsigned(dp->d_ino);
2992 			bp += NFSX_UNSIGNED;
2993 			nfsm_clget;
2994 			*tl = txdr_unsigned(nlen);
2995 			bp += NFSX_UNSIGNED;
2996 
2997 			/* And loop around copying the name */
2998 			xfer = nlen;
2999 			cp = dp->d_name;
3000 			while (xfer > 0) {
3001 				nfsm_clget;
3002 				if ((bp+xfer) > be)
3003 					tsiz = be-bp;
3004 				else
3005 					tsiz = xfer;
3006 				bcopy(cp, bp, tsiz);
3007 				bp += tsiz;
3008 				xfer -= tsiz;
3009 				if (xfer > 0)
3010 					cp += tsiz;
3011 			}
3012 			/* And null pad to a int32_t boundary */
3013 			for (i = 0; i < rem; i++)
3014 				*bp++ = '\0';
3015 			nfsm_clget;
3016 
3017 			/* Finish off the record */
3018 			if (v3) {
3019 				*tl = 0;
3020 				bp += NFSX_UNSIGNED;
3021 				nfsm_clget;
3022 			}
3023 			*tl = txdr_unsigned(*cookiep);
3024 			bp += NFSX_UNSIGNED;
3025 		}
3026 		dp = _DIRENT_NEXT(dp);
3027 		cpos = (char *)dp;
3028 		cookiep++;
3029 		ncookies--;
3030 	}
3031 	vrele(vp);
3032 	vp = NULL;
3033 	nfsm_clget;
3034 	*tl = nfs_false;
3035 	bp += NFSX_UNSIGNED;
3036 	nfsm_clget;
3037 	if (eofflag)
3038 		*tl = nfs_true;
3039 	else
3040 		*tl = nfs_false;
3041 	bp += NFSX_UNSIGNED;
3042 	if (mp != mb) {
3043 		if (bp < be)
3044 			mp->m_len = bp - mtod(mp, caddr_t);
3045 	} else
3046 		mp->m_len += bp - bpos;
3047 	FREE((caddr_t)rbuf, M_TEMP);
3048 	FREE((caddr_t)cookies, M_TEMP);
3049 
3050 nfsmout:
3051 	if (vp)
3052 		vrele(vp);
3053 	return(error);
3054 }
3055 
3056 int
3057 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3058 		  struct thread *td, struct mbuf **mrq)
3059 {
3060 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3061 	struct sockaddr *nam = nfsd->nd_nam;
3062 	caddr_t dpos = nfsd->nd_dpos;
3063 	struct ucred *cred = &nfsd->nd_cr;
3064 	char *bp, *be;
3065 	struct mbuf *mp;
3066 	struct dirent *dp;
3067 	caddr_t cp;
3068 	u_int32_t *tl;
3069 	int32_t t1;
3070 	caddr_t bpos;
3071 	struct mbuf *mb, *mb2, *mreq, *mp2;
3072 	char *cpos, *cend, *cp2, *rbuf;
3073 	struct vnode *vp = NULL, *nvp;
3074 	struct flrep fl;
3075 	nfsfh_t nfh;
3076 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3077 	struct uio io;
3078 	struct iovec iv;
3079 	struct vattr va, at, *vap = &va;
3080 	struct nfs_fattr *fp;
3081 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3082 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3083 	u_quad_t off, toff, verf;
3084 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3085 
3086 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3087 	fhp = &nfh.fh_generic;
3088 	nfsm_srvmtofh(fhp);
3089 	nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3090 	toff = fxdr_hyper(tl);
3091 	tl += 2;
3092 	verf = fxdr_hyper(tl);
3093 	tl += 2;
3094 	siz = fxdr_unsigned(int, *tl++);
3095 	cnt = fxdr_unsigned(int, *tl);
3096 	off = toff;
3097 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3098 	xfer = NFS_SRVMAXDATA(nfsd);
3099 	if (cnt > xfer)
3100 		cnt = xfer;
3101 	if (siz > xfer)
3102 		siz = xfer;
3103 	fullsiz = siz;
3104 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3105 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3106 	if (!error && vp->v_type != VDIR) {
3107 		error = ENOTDIR;
3108 		vput(vp);
3109 		vp = NULL;
3110 	}
3111 	if (error) {
3112 		nfsm_reply(NFSX_UNSIGNED);
3113 		nfsm_srvpostop_attr(getret, &at);
3114 		error = 0;
3115 		goto nfsmout;
3116 	}
3117 	error = getret = VOP_GETATTR(vp, &at);
3118 #if 0
3119 	/*
3120 	 * XXX This check may be too strict for Solaris 2.5 clients.
3121 	 */
3122 	if (!error && toff && verf && verf != at.va_filerev)
3123 		error = NFSERR_BAD_COOKIE;
3124 #endif
3125 	if (!error) {
3126 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3127 	}
3128 	if (error) {
3129 		vput(vp);
3130 		vp = NULL;
3131 		nfsm_reply(NFSX_V3POSTOPATTR);
3132 		nfsm_srvpostop_attr(getret, &at);
3133 		error = 0;
3134 		goto nfsmout;
3135 	}
3136 	VOP_UNLOCK(vp, 0);
3137 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3138 again:
3139 	iv.iov_base = rbuf;
3140 	iv.iov_len = fullsiz;
3141 	io.uio_iov = &iv;
3142 	io.uio_iovcnt = 1;
3143 	io.uio_offset = (off_t)off;
3144 	io.uio_resid = fullsiz;
3145 	io.uio_segflg = UIO_SYSSPACE;
3146 	io.uio_rw = UIO_READ;
3147 	io.uio_td = NULL;
3148 	eofflag = 0;
3149 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3150 	if (cookies) {
3151 		free((caddr_t)cookies, M_TEMP);
3152 		cookies = NULL;
3153 	}
3154 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3155 	off = (u_quad_t)io.uio_offset;
3156 	getret = VOP_GETATTR(vp, &at);
3157 	VOP_UNLOCK(vp, 0);
3158 	if (!cookies && !error)
3159 		error = NFSERR_PERM;
3160 	if (!error)
3161 		error = getret;
3162 	if (error) {
3163 		vrele(vp);
3164 		vp = NULL;
3165 		if (cookies)
3166 			free((caddr_t)cookies, M_TEMP);
3167 		free((caddr_t)rbuf, M_TEMP);
3168 		nfsm_reply(NFSX_V3POSTOPATTR);
3169 		nfsm_srvpostop_attr(getret, &at);
3170 		error = 0;
3171 		goto nfsmout;
3172 	}
3173 	if (io.uio_resid) {
3174 		siz -= io.uio_resid;
3175 
3176 		/*
3177 		 * If nothing read, return eof
3178 		 * rpc reply
3179 		 */
3180 		if (siz == 0) {
3181 			vrele(vp);
3182 			vp = NULL;
3183 			nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3184 				2 * NFSX_UNSIGNED);
3185 			nfsm_srvpostop_attr(getret, &at);
3186 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3187 			txdr_hyper(at.va_filerev, tl);
3188 			tl += 2;
3189 			*tl++ = nfs_false;
3190 			*tl = nfs_true;
3191 			FREE((caddr_t)cookies, M_TEMP);
3192 			FREE((caddr_t)rbuf, M_TEMP);
3193 			error = 0;
3194 			goto nfsmout;
3195 		}
3196 	}
3197 
3198 	/*
3199 	 * Check for degenerate cases of nothing useful read.
3200 	 * If so go try again
3201 	 */
3202 	cpos = rbuf;
3203 	cend = rbuf + siz;
3204 	dp = (struct dirent *)cpos;
3205 	cookiep = cookies;
3206 	/*
3207 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3208 	 * directory offset up to a block boundary, so it is necessary to
3209 	 * skip over the records that preceed the requested offset. This
3210 	 * requires the assumption that file offset cookies monotonically
3211 	 * increase.
3212 	 */
3213 	while (cpos < cend && ncookies > 0 &&
3214 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3215 		 ((u_quad_t)(*cookiep)) <= toff)) {
3216 		dp = _DIRENT_NEXT(dp);
3217 		cpos = (char *)dp;
3218 		cookiep++;
3219 		ncookies--;
3220 	}
3221 	if (cpos >= cend || ncookies == 0) {
3222 		toff = off;
3223 		siz = fullsiz;
3224 		goto again;
3225 	}
3226 
3227 	/*
3228 	 * Probe one of the directory entries to see if the filesystem
3229 	 * supports VGET.
3230 	 */
3231 	if (VFS_VGET(vp->v_mount, dp->d_ino, &nvp) == EOPNOTSUPP) {
3232 		error = NFSERR_NOTSUPP;
3233 		vrele(vp);
3234 		vp = NULL;
3235 		free((caddr_t)cookies, M_TEMP);
3236 		free((caddr_t)rbuf, M_TEMP);
3237 		nfsm_reply(NFSX_V3POSTOPATTR);
3238 		nfsm_srvpostop_attr(getret, &at);
3239 		error = 0;
3240 		goto nfsmout;
3241 	}
3242 	if (nvp) {
3243 		vput(nvp);
3244 		nvp = NULL;
3245 	}
3246 
3247 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
3248 	nfsm_reply(cnt);
3249 	nfsm_srvpostop_attr(getret, &at);
3250 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3251 	txdr_hyper(at.va_filerev, tl);
3252 	mp = mp2 = mb;
3253 	bp = bpos;
3254 	be = bp + M_TRAILINGSPACE(mp);
3255 
3256 	/* Loop through the records and build reply */
3257 	while (cpos < cend && ncookies > 0) {
3258 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3259 			nlen = dp->d_namlen;
3260 			rem = nfsm_rndup(nlen)-nlen;
3261 
3262 			/*
3263 			 * For readdir_and_lookup get the vnode using
3264 			 * the file number.
3265 			 */
3266 			if (VFS_VGET(vp->v_mount, dp->d_ino, &nvp))
3267 				goto invalid;
3268 			bzero((caddr_t)nfhp, NFSX_V3FH);
3269 			nfhp->fh_fsid =
3270 				nvp->v_mount->mnt_stat.f_fsid;
3271 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3272 				vput(nvp);
3273 				nvp = NULL;
3274 				goto invalid;
3275 			}
3276 			if (VOP_GETATTR(nvp, vap)) {
3277 				vput(nvp);
3278 				nvp = NULL;
3279 				goto invalid;
3280 			}
3281 			vput(nvp);
3282 			nvp = NULL;
3283 
3284 			/*
3285 			 * If either the dircount or maxcount will be
3286 			 * exceeded, get out now. Both of these lengths
3287 			 * are calculated conservatively, including all
3288 			 * XDR overheads.
3289 			 */
3290 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3291 				NFSX_V3POSTOPATTR);
3292 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3293 			if (len > cnt || dirlen > fullsiz) {
3294 				eofflag = 0;
3295 				break;
3296 			}
3297 
3298 			/*
3299 			 * Build the directory record xdr from
3300 			 * the dirent entry.
3301 			 */
3302 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3303 			nfsm_srvfillattr(vap, fp);
3304 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3305 			fl.fl_fhok = nfs_true;
3306 			fl.fl_postopok = nfs_true;
3307 			fl.fl_off.nfsuquad[0] = 0;
3308 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3309 
3310 			nfsm_clget;
3311 			*tl = nfs_true;
3312 			bp += NFSX_UNSIGNED;
3313 			nfsm_clget;
3314 			*tl = 0;
3315 			bp += NFSX_UNSIGNED;
3316 			nfsm_clget;
3317 			*tl = txdr_unsigned(dp->d_ino);
3318 			bp += NFSX_UNSIGNED;
3319 			nfsm_clget;
3320 			*tl = txdr_unsigned(nlen);
3321 			bp += NFSX_UNSIGNED;
3322 
3323 			/* And loop around copying the name */
3324 			xfer = nlen;
3325 			cp = dp->d_name;
3326 			while (xfer > 0) {
3327 				nfsm_clget;
3328 				if ((bp + xfer) > be)
3329 					tsiz = be - bp;
3330 				else
3331 					tsiz = xfer;
3332 				bcopy(cp, bp, tsiz);
3333 				bp += tsiz;
3334 				xfer -= tsiz;
3335 				if (xfer > 0)
3336 					cp += tsiz;
3337 			}
3338 			/* And null pad to a int32_t boundary */
3339 			for (i = 0; i < rem; i++)
3340 				*bp++ = '\0';
3341 
3342 			/*
3343 			 * Now copy the flrep structure out.
3344 			 */
3345 			xfer = sizeof (struct flrep);
3346 			cp = (caddr_t)&fl;
3347 			while (xfer > 0) {
3348 				nfsm_clget;
3349 				if ((bp + xfer) > be)
3350 					tsiz = be - bp;
3351 				else
3352 					tsiz = xfer;
3353 				bcopy(cp, bp, tsiz);
3354 				bp += tsiz;
3355 				xfer -= tsiz;
3356 				if (xfer > 0)
3357 					cp += tsiz;
3358 			}
3359 		}
3360 invalid:
3361 		dp = _DIRENT_NEXT(dp);
3362 		cpos = (char *)dp;
3363 		cookiep++;
3364 		ncookies--;
3365 	}
3366 	vrele(vp);
3367 	vp = NULL;
3368 	nfsm_clget;
3369 	*tl = nfs_false;
3370 	bp += NFSX_UNSIGNED;
3371 	nfsm_clget;
3372 	if (eofflag)
3373 		*tl = nfs_true;
3374 	else
3375 		*tl = nfs_false;
3376 	bp += NFSX_UNSIGNED;
3377 	if (mp != mb) {
3378 		if (bp < be)
3379 			mp->m_len = bp - mtod(mp, caddr_t);
3380 	} else
3381 		mp->m_len += bp - bpos;
3382 	FREE((caddr_t)cookies, M_TEMP);
3383 	FREE((caddr_t)rbuf, M_TEMP);
3384 nfsmout:
3385 	if (vp)
3386 		vrele(vp);
3387 	return(error);
3388 }
3389 
3390 /*
3391  * nfs commit service
3392  */
3393 int
3394 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3395 	     struct thread *td, struct mbuf **mrq)
3396 {
3397 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3398 	struct sockaddr *nam = nfsd->nd_nam;
3399 	caddr_t dpos = nfsd->nd_dpos;
3400 	struct ucred *cred = &nfsd->nd_cr;
3401 	struct vattr bfor, aft;
3402 	struct vnode *vp = NULL;
3403 	nfsfh_t nfh;
3404 	fhandle_t *fhp;
3405 	u_int32_t *tl;
3406 	int32_t t1;
3407 	caddr_t bpos;
3408 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3409 	char *cp2;
3410 	struct mbuf *mb, *mb2, *mreq;
3411 	u_quad_t off;
3412 
3413 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3414 	fhp = &nfh.fh_generic;
3415 	nfsm_srvmtofh(fhp);
3416 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3417 
3418 	/*
3419 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3420 	 * count parameters, so these arguments are useless (someday maybe).
3421 	 */
3422 	off = fxdr_hyper(tl);
3423 	tl += 2;
3424 	cnt = fxdr_unsigned(int, *tl);
3425 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3426 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3427 	if (error) {
3428 		nfsm_reply(2 * NFSX_UNSIGNED);
3429 		nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3430 		error = 0;
3431 		goto nfsmout;
3432 	}
3433 	for_ret = VOP_GETATTR(vp, &bfor);
3434 
3435 	if (cnt > MAX_COMMIT_COUNT) {
3436 		/*
3437 		 * Give up and do the whole thing
3438 		 */
3439 		if (vp->v_object &&
3440 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3441 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3442 		}
3443 		error = VOP_FSYNC(vp, MNT_WAIT);
3444 	} else {
3445 		/*
3446 		 * Locate and synchronously write any buffers that fall
3447 		 * into the requested range.  Note:  we are assuming that
3448 		 * f_iosize is a power of 2.
3449 		 */
3450 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3451 		int iomask = iosize - 1;
3452 		off_t loffset;
3453 
3454 		/*
3455 		 * Align to iosize boundry, super-align to page boundry.
3456 		 */
3457 		if (off & iomask) {
3458 			cnt += off & iomask;
3459 			off &= ~(u_quad_t)iomask;
3460 		}
3461 		if (off & PAGE_MASK) {
3462 			cnt += off & PAGE_MASK;
3463 			off &= ~(u_quad_t)PAGE_MASK;
3464 		}
3465 		loffset = off;
3466 
3467 		if (vp->v_object &&
3468 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3469 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3470 		}
3471 
3472 		crit_enter();
3473 		while (cnt > 0) {
3474 			struct buf *bp;
3475 
3476 			/*
3477 			 * If we have a buffer and it is marked B_DELWRI we
3478 			 * have to lock and write it.  Otherwise the prior
3479 			 * write is assumed to have already been committed.
3480 			 */
3481 			if ((bp = findblk(vp, loffset)) != NULL && (bp->b_flags & B_DELWRI)) {
3482 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
3483 					if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) == 0)
3484 						BUF_UNLOCK(bp);
3485 					continue; /* retry */
3486 				}
3487 				bremfree(bp);
3488 				bp->b_flags &= ~B_ASYNC;
3489 				bwrite(bp);
3490 				++nfs_commit_miss;
3491 			}
3492 			++nfs_commit_blks;
3493 			if (cnt < iosize)
3494 				break;
3495 			cnt -= iosize;
3496 			loffset += iosize;
3497 		}
3498 		crit_exit();
3499 	}
3500 
3501 	aft_ret = VOP_GETATTR(vp, &aft);
3502 	vput(vp);
3503 	vp = NULL;
3504 	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3505 	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3506 	if (!error) {
3507 		nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF);
3508 		if (nfsver.tv_sec == 0)
3509 			nfsver = boottime;
3510 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3511 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3512 	} else {
3513 		error = 0;
3514 	}
3515 nfsmout:
3516 	if (vp)
3517 		vput(vp);
3518 	return(error);
3519 }
3520 
3521 /*
3522  * nfs statfs service
3523  */
3524 int
3525 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3526 	     struct thread *td, struct mbuf **mrq)
3527 {
3528 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3529 	struct sockaddr *nam = nfsd->nd_nam;
3530 	caddr_t dpos = nfsd->nd_dpos;
3531 	struct ucred *cred = &nfsd->nd_cr;
3532 	struct statfs *sf;
3533 	struct nfs_statfs *sfp;
3534 	u_int32_t *tl;
3535 	int32_t t1;
3536 	caddr_t bpos;
3537 	int error = 0, rdonly, getret = 1;
3538 	int v3 = (nfsd->nd_flag & ND_NFSV3);
3539 	char *cp2;
3540 	struct mbuf *mb, *mb2, *mreq;
3541 	struct vnode *vp = NULL;
3542 	struct vattr at;
3543 	nfsfh_t nfh;
3544 	fhandle_t *fhp;
3545 	struct statfs statfs;
3546 	u_quad_t tval;
3547 
3548 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3549 	fhp = &nfh.fh_generic;
3550 	nfsm_srvmtofh(fhp);
3551 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3552 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3553 	if (error) {
3554 		nfsm_reply(NFSX_UNSIGNED);
3555 		nfsm_srvpostop_attr(getret, &at);
3556 		error = 0;
3557 		goto nfsmout;
3558 	}
3559 	sf = &statfs;
3560 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3561 	getret = VOP_GETATTR(vp, &at);
3562 	vput(vp);
3563 	vp = NULL;
3564 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3565 	if (v3)
3566 		nfsm_srvpostop_attr(getret, &at);
3567 	if (error) {
3568 		error = 0;
3569 		goto nfsmout;
3570 	}
3571 	nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
3572 	if (v3) {
3573 		tval = (u_quad_t)sf->f_blocks;
3574 		tval *= (u_quad_t)sf->f_bsize;
3575 		txdr_hyper(tval, &sfp->sf_tbytes);
3576 		tval = (u_quad_t)sf->f_bfree;
3577 		tval *= (u_quad_t)sf->f_bsize;
3578 		txdr_hyper(tval, &sfp->sf_fbytes);
3579 		tval = (u_quad_t)sf->f_bavail;
3580 		tval *= (u_quad_t)sf->f_bsize;
3581 		txdr_hyper(tval, &sfp->sf_abytes);
3582 		sfp->sf_tfiles.nfsuquad[0] = 0;
3583 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3584 		sfp->sf_ffiles.nfsuquad[0] = 0;
3585 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3586 		sfp->sf_afiles.nfsuquad[0] = 0;
3587 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3588 		sfp->sf_invarsec = 0;
3589 	} else {
3590 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3591 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3592 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3593 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3594 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3595 	}
3596 nfsmout:
3597 	if (vp)
3598 		vput(vp);
3599 	return(error);
3600 }
3601 
3602 /*
3603  * nfs fsinfo service
3604  */
3605 int
3606 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3607 	     struct thread *td, struct mbuf **mrq)
3608 {
3609 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3610 	struct sockaddr *nam = nfsd->nd_nam;
3611 	caddr_t dpos = nfsd->nd_dpos;
3612 	struct ucred *cred = &nfsd->nd_cr;
3613 	u_int32_t *tl;
3614 	struct nfsv3_fsinfo *sip;
3615 	int32_t t1;
3616 	caddr_t bpos;
3617 	int error = 0, rdonly, getret = 1, pref;
3618 	char *cp2;
3619 	struct mbuf *mb, *mb2, *mreq;
3620 	struct vnode *vp = NULL;
3621 	struct vattr at;
3622 	nfsfh_t nfh;
3623 	fhandle_t *fhp;
3624 	u_quad_t maxfsize;
3625 	struct statfs sb;
3626 
3627 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3628 	fhp = &nfh.fh_generic;
3629 	nfsm_srvmtofh(fhp);
3630 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3631 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3632 	if (error) {
3633 		nfsm_reply(NFSX_UNSIGNED);
3634 		nfsm_srvpostop_attr(getret, &at);
3635 		error = 0;
3636 		goto nfsmout;
3637 	}
3638 
3639 	/* XXX Try to make a guess on the max file size. */
3640 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3641 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3642 
3643 	getret = VOP_GETATTR(vp, &at);
3644 	vput(vp);
3645 	vp = NULL;
3646 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3647 	nfsm_srvpostop_attr(getret, &at);
3648 	nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3649 
3650 	/*
3651 	 * XXX
3652 	 * There should be file system VFS OP(s) to get this information.
3653 	 * For now, assume ufs.
3654 	 */
3655 	if (slp->ns_so->so_type == SOCK_DGRAM)
3656 		pref = NFS_MAXDGRAMDATA;
3657 	else
3658 		pref = NFS_MAXDATA;
3659 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3660 	sip->fs_rtpref = txdr_unsigned(pref);
3661 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3662 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3663 	sip->fs_wtpref = txdr_unsigned(pref);
3664 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3665 	sip->fs_dtpref = txdr_unsigned(pref);
3666 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3667 	sip->fs_timedelta.nfsv3_sec = 0;
3668 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3669 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3670 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3671 		NFSV3FSINFO_CANSETTIME);
3672 nfsmout:
3673 	if (vp)
3674 		vput(vp);
3675 	return(error);
3676 }
3677 
3678 /*
3679  * nfs pathconf service
3680  */
3681 int
3682 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3683 	       struct thread *td, struct mbuf **mrq)
3684 {
3685 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3686 	struct sockaddr *nam = nfsd->nd_nam;
3687 	caddr_t dpos = nfsd->nd_dpos;
3688 	struct ucred *cred = &nfsd->nd_cr;
3689 	u_int32_t *tl;
3690 	struct nfsv3_pathconf *pc;
3691 	int32_t t1;
3692 	caddr_t bpos;
3693 	int error = 0, rdonly, getret = 1;
3694 	register_t linkmax, namemax, chownres, notrunc;
3695 	char *cp2;
3696 	struct mbuf *mb, *mb2, *mreq;
3697 	struct vnode *vp = NULL;
3698 	struct vattr at;
3699 	nfsfh_t nfh;
3700 	fhandle_t *fhp;
3701 
3702 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3703 	fhp = &nfh.fh_generic;
3704 	nfsm_srvmtofh(fhp);
3705 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3706 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3707 	if (error) {
3708 		nfsm_reply(NFSX_UNSIGNED);
3709 		nfsm_srvpostop_attr(getret, &at);
3710 		error = 0;
3711 		goto nfsmout;
3712 	}
3713 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3714 	if (!error)
3715 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3716 	if (!error)
3717 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3718 	if (!error)
3719 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3720 	getret = VOP_GETATTR(vp, &at);
3721 	vput(vp);
3722 	vp = NULL;
3723 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
3724 	nfsm_srvpostop_attr(getret, &at);
3725 	if (error) {
3726 		error = 0;
3727 		goto nfsmout;
3728 	}
3729 	nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
3730 
3731 	pc->pc_linkmax = txdr_unsigned(linkmax);
3732 	pc->pc_namemax = txdr_unsigned(namemax);
3733 	pc->pc_notrunc = txdr_unsigned(notrunc);
3734 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3735 
3736 	/*
3737 	 * These should probably be supported by VOP_PATHCONF(), but
3738 	 * until msdosfs is exportable (why would you want to?), the
3739 	 * Unix defaults should be ok.
3740 	 */
3741 	pc->pc_caseinsensitive = nfs_false;
3742 	pc->pc_casepreserving = nfs_true;
3743 nfsmout:
3744 	if (vp)
3745 		vput(vp);
3746 	return(error);
3747 }
3748 
3749 /*
3750  * Null operation, used by clients to ping server
3751  */
3752 /* ARGSUSED */
3753 int
3754 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3755 	   struct thread *td, struct mbuf **mrq)
3756 {
3757 	struct mbuf *mrep = nfsd->nd_mrep;
3758 	caddr_t bpos;
3759 	int error = NFSERR_RETVOID;
3760 	struct mbuf *mb, *mreq;
3761 
3762 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3763 	nfsm_reply(0);
3764 	nfsm_srvdone;
3765 }
3766 
3767 /*
3768  * No operation, used for obsolete procedures
3769  */
3770 /* ARGSUSED */
3771 int
3772 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3773 	   struct thread *td, struct mbuf **mrq)
3774 {
3775 	struct mbuf *mrep = nfsd->nd_mrep;
3776 	caddr_t bpos;
3777 	int error;
3778 	struct mbuf *mb, *mreq;
3779 
3780 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3781 	if (nfsd->nd_repstat)
3782 		error = nfsd->nd_repstat;
3783 	else
3784 		error = EPROCUNAVAIL;
3785 	nfsm_reply(0);
3786 	error = 0;
3787 	nfsm_srvdone;
3788 }
3789 
3790 /*
3791  * Perform access checking for vnodes obtained from file handles that would
3792  * refer to files already opened by a Unix client. You cannot just use
3793  * vn_writechk() and VOP_ACCESS() for two reasons.
3794  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3795  * 2 - The owner is to be given access irrespective of mode bits for some
3796  *     operations, so that processes that chmod after opening a file don't
3797  *     break. I don't like this because it opens a security hole, but since
3798  *     the nfs server opens a security hole the size of a barn door anyhow,
3799  *     what the heck.
3800  *
3801  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3802  * will return EPERM instead of EACCESS. EPERM is always an error.
3803  */
3804 static int
3805 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
3806 	     int rdonly, struct thread *td, int override)
3807 {
3808 	struct vattr vattr;
3809 	int error;
3810 
3811 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3812 	if (flags & VWRITE) {
3813 		/* Just vn_writechk() changed to check rdonly */
3814 		/*
3815 		 * Disallow write attempts on read-only file systems;
3816 		 * unless the file is a socket or a block or character
3817 		 * device resident on the file system.
3818 		 */
3819 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3820 			switch (vp->v_type) {
3821 			case VREG:
3822 			case VDIR:
3823 			case VLNK:
3824 				return (EROFS);
3825 			default:
3826 				break;
3827 			}
3828 		}
3829 		/*
3830 		 * If there's shared text associated with
3831 		 * the inode, we can't allow writing.
3832 		 */
3833 		if (vp->v_flag & VTEXT)
3834 			return (ETXTBSY);
3835 	}
3836 	error = VOP_GETATTR(vp, &vattr);
3837 	if (error)
3838 		return (error);
3839 	error = VOP_ACCESS(vp, flags, cred);
3840 	/*
3841 	 * Allow certain operations for the owner (reads and writes
3842 	 * on files that are already open).
3843 	 */
3844 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3845 		error = 0;
3846 	return error;
3847 }
3848 #endif /* NFS_NOSERVER */
3849 
3850