xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision 3f625015)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.44 2007/05/13 01:32:28 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_zone.h>
87 #include <vm/vm_object.h>
88 
89 #include <sys/buf2.h>
90 
91 #include <sys/thread2.h>
92 
93 #include "nfsproto.h"
94 #include "rpcv2.h"
95 #include "nfs.h"
96 #include "xdr_subs.h"
97 #include "nfsm_subs.h"
98 
99 #ifdef NFSRV_DEBUG
100 #define nfsdbprintf(info)	kprintf info
101 #else
102 #define nfsdbprintf(info)
103 #endif
104 
105 #define MAX_COMMIT_COUNT	(1024 * 1024)
106 
107 #define NUM_HEURISTIC		1017
108 #define NHUSE_INIT		64
109 #define NHUSE_INC		16
110 #define NHUSE_MAX		2048
111 
112 static struct nfsheur {
113     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
114     off_t nh_nextr;		/* next offset for sequential detection */
115     int nh_use;			/* use count for selection */
116     int nh_seqcount;		/* heuristic */
117 } nfsheur[NUM_HEURISTIC];
118 
119 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
120 		      NFFIFO, NFNON };
121 #ifndef NFS_NOSERVER
122 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
123 		      NFCHR, NFNON };
124 /* Global vars */
125 extern u_int32_t nfs_xdrneg1;
126 extern u_int32_t nfs_false, nfs_true;
127 extern enum vtype nv3tov_type[8];
128 extern struct nfsstats nfsstats;
129 
130 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
131 int nfsrvw_procrastinate_v3 = 0;
132 
133 static struct timespec	nfsver;
134 
135 SYSCTL_DECL(_vfs_nfs);
136 
137 static int nfs_async;
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
139 static int nfs_commit_blks;
140 static int nfs_commit_miss;
141 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
142 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
143 
144 static int nfsrv_access (struct vnode *,int,struct ucred *,int,
145 		struct thread *, int);
146 static void nfsrvw_coalesce (struct nfsrv_descript *,
147 		struct nfsrv_descript *);
148 
149 /*
150  * nfs v3 access service
151  */
152 int
153 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
154 	      struct thread *td, struct mbuf **mrq)
155 {
156 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
157 	struct sockaddr *nam = nfsd->nd_nam;
158 	caddr_t dpos = nfsd->nd_dpos;
159 	struct ucred *cred = &nfsd->nd_cr;
160 	struct vnode *vp = NULL;
161 	nfsfh_t nfh;
162 	fhandle_t *fhp;
163 	u_int32_t *tl;
164 	int32_t t1;
165 	caddr_t bpos;
166 	int error = 0, rdonly, getret;
167 	char *cp2;
168 	struct mbuf *mb, *mreq, *mb2;
169 	struct vattr vattr, *vap = &vattr;
170 	u_long testmode, nfsmode;
171 
172 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
173 	fhp = &nfh.fh_generic;
174 	nfsm_srvmtofh(fhp);
175 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
176 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
177 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
178 	if (error) {
179 		nfsm_reply(NFSX_UNSIGNED);
180 		nfsm_srvpostop_attr(1, (struct vattr *)0);
181 		error = 0;
182 		goto nfsmout;
183 	}
184 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
185 	if ((nfsmode & NFSV3ACCESS_READ) &&
186 		nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
187 		nfsmode &= ~NFSV3ACCESS_READ;
188 	if (vp->v_type == VDIR)
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
190 			NFSV3ACCESS_DELETE);
191 	else
192 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
193 	if ((nfsmode & testmode) &&
194 		nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
195 		nfsmode &= ~testmode;
196 	if (vp->v_type == VDIR)
197 		testmode = NFSV3ACCESS_LOOKUP;
198 	else
199 		testmode = NFSV3ACCESS_EXECUTE;
200 	if ((nfsmode & testmode) &&
201 		nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
202 		nfsmode &= ~testmode;
203 	getret = VOP_GETATTR(vp, vap);
204 	vput(vp);
205 	vp = NULL;
206 	nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
207 	nfsm_srvpostop_attr(getret, vap);
208 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
209 	*tl = txdr_unsigned(nfsmode);
210 nfsmout:
211 	if (vp)
212 		vput(vp);
213 	return(error);
214 }
215 
216 /*
217  * nfs getattr service
218  */
219 int
220 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
221 	      struct thread *td, struct mbuf **mrq)
222 {
223 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
224 	struct sockaddr *nam = nfsd->nd_nam;
225 	caddr_t dpos = nfsd->nd_dpos;
226 	struct ucred *cred = &nfsd->nd_cr;
227 	struct nfs_fattr *fp;
228 	struct vattr va;
229 	struct vattr *vap = &va;
230 	struct vnode *vp = NULL;
231 	nfsfh_t nfh;
232 	fhandle_t *fhp;
233 	u_int32_t *tl;
234 	int32_t t1;
235 	caddr_t bpos;
236 	int error = 0, rdonly;
237 	char *cp2;
238 	struct mbuf *mb, *mb2, *mreq;
239 
240 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
241 	fhp = &nfh.fh_generic;
242 	nfsm_srvmtofh(fhp);
243 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
244 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
245 	if (error) {
246 		nfsm_reply(0);
247 		error = 0;
248 		goto nfsmout;
249 	}
250 	error = VOP_GETATTR(vp, vap);
251 	vput(vp);
252 	vp = NULL;
253 	nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
254 	if (error) {
255 		error = 0;
256 		goto nfsmout;
257 	}
258 	nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259 	nfsm_srvfillattr(vap, fp);
260 	/* fall through */
261 
262 nfsmout:
263 	if (vp)
264 		vput(vp);
265 	return(error);
266 }
267 
268 /*
269  * nfs setattr service
270  */
271 int
272 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
273 	      struct thread *td, struct mbuf **mrq)
274 {
275 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
276 	struct sockaddr *nam = nfsd->nd_nam;
277 	caddr_t dpos = nfsd->nd_dpos;
278 	struct ucred *cred = &nfsd->nd_cr;
279 	struct vattr va, preat;
280 	struct vattr *vap = &va;
281 	struct nfsv2_sattr *sp;
282 	struct nfs_fattr *fp;
283 	struct vnode *vp = NULL;
284 	nfsfh_t nfh;
285 	fhandle_t *fhp;
286 	u_int32_t *tl;
287 	int32_t t1;
288 	caddr_t bpos;
289 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
290 	int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
291 	char *cp2;
292 	struct mbuf *mb, *mb2, *mreq;
293 	struct timespec guard;
294 
295 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
296 	fhp = &nfh.fh_generic;
297 	nfsm_srvmtofh(fhp);
298 	VATTR_NULL(vap);
299 	if (v3) {
300 		nfsm_srvsattr(vap);
301 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
302 		gcheck = fxdr_unsigned(int, *tl);
303 		if (gcheck) {
304 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
305 			fxdr_nfsv3time(tl, &guard);
306 		}
307 	} else {
308 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
309 		/*
310 		 * Nah nah nah nah na nah
311 		 * There is a bug in the Sun client that puts 0xffff in the mode
312 		 * field of sattr when it should put in 0xffffffff. The u_short
313 		 * doesn't sign extend.
314 		 * --> check the low order 2 bytes for 0xffff
315 		 */
316 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
317 			vap->va_mode = nfstov_mode(sp->sa_mode);
318 		if (sp->sa_uid != nfs_xdrneg1)
319 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
320 		if (sp->sa_gid != nfs_xdrneg1)
321 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
322 		if (sp->sa_size != nfs_xdrneg1)
323 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
324 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
325 #ifdef notyet
326 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
327 #else
328 			vap->va_atime.tv_sec =
329 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
330 			vap->va_atime.tv_nsec = 0;
331 #endif
332 		}
333 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
334 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
335 
336 	}
337 
338 	/*
339 	 * Now that we have all the fields, lets do it.
340 	 */
341 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly,
342 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
343 	if (error) {
344 		nfsm_reply(2 * NFSX_UNSIGNED);
345 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
346 		error = 0;
347 		goto nfsmout;
348 	}
349 
350 	/*
351 	 * vp now an active resource, pay careful attention to cleanup
352 	 */
353 
354 	if (v3) {
355 		error = preat_ret = VOP_GETATTR(vp, &preat);
356 		if (!error && gcheck &&
357 			(preat.va_ctime.tv_sec != guard.tv_sec ||
358 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
359 			error = NFSERR_NOT_SYNC;
360 		if (error) {
361 			vput(vp);
362 			vp = NULL;
363 			nfsm_reply(NFSX_WCCDATA(v3));
364 			nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
365 			error = 0;
366 			goto nfsmout;
367 		}
368 	}
369 
370 	/*
371 	 * If the size is being changed write acces is required, otherwise
372 	 * just check for a read only file system.
373 	 */
374 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
375 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
376 			error = EROFS;
377 			goto out;
378 		}
379 	} else {
380 		if (vp->v_type == VDIR) {
381 			error = EISDIR;
382 			goto out;
383 		} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
384 			    td, 0)) != 0){
385 			goto out;
386 		}
387 	}
388 	error = VOP_SETATTR(vp, vap, cred);
389 	postat_ret = VOP_GETATTR(vp, vap);
390 	if (!error)
391 		error = postat_ret;
392 out:
393 	vput(vp);
394 	vp = NULL;
395 	nfsm_reply(NFSX_WCCORFATTR(v3));
396 	if (v3) {
397 		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
398 		error = 0;
399 		goto nfsmout;
400 	} else {
401 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
402 		nfsm_srvfillattr(vap, fp);
403 	}
404 	/* fall through */
405 
406 nfsmout:
407 	if (vp)
408 		vput(vp);
409 	return(error);
410 }
411 
412 /*
413  * nfs lookup rpc
414  */
415 int
416 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
417 	     struct thread *td, struct mbuf **mrq)
418 {
419 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
420 	struct sockaddr *nam = nfsd->nd_nam;
421 	caddr_t dpos = nfsd->nd_dpos;
422 	struct ucred *cred = &nfsd->nd_cr;
423 	struct nfs_fattr *fp;
424 	struct nlookupdata nd;
425 	struct vnode *vp;
426 	struct vnode *dirp;
427 	struct nchandle nch;
428 	nfsfh_t nfh;
429 	fhandle_t *fhp;
430 	caddr_t cp;
431 	u_int32_t *tl;
432 	int32_t t1;
433 	caddr_t bpos;
434 	int error = 0, len, dirattr_ret = 1;
435 	int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
436 	char *cp2;
437 	struct mbuf *mb, *mb2, *mreq;
438 	struct vattr va, dirattr, *vap = &va;
439 
440 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
441 	nlookup_zero(&nd);
442 	dirp = NULL;
443 	vp = NULL;
444 
445 	fhp = &nfh.fh_generic;
446 	nfsm_srvmtofh(fhp);
447 	nfsm_srvnamesiz(len);
448 
449 	pubflag = nfs_ispublicfh(fhp);
450 
451 	error = nfs_namei(&nd, cred, NAMEI_LOOKUP, NULL, &vp,
452 		fhp, len, slp, nam, &md, &dpos,
453 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
454 
455 	/*
456 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
457 	 * structure in case macros jump to nfsmout.
458 	 */
459 
460 	if (error) {
461 		if (dirp) {
462 			if (v3)
463 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
464 			vrele(dirp);
465 			dirp = NULL;
466 		}
467 		nfsm_reply(NFSX_POSTOPATTR(v3));
468 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
469 		error = 0;
470 		goto nfsmout;
471 	}
472 
473 	/*
474 	 * Locate index file for public filehandle
475 	 *
476 	 * error is 0 on entry and 0 on exit from this block.
477 	 */
478 
479 	if (pubflag) {
480 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
481 			/*
482 			 * Setup call to lookup() to see if we can find
483 			 * the index file. Arguably, this doesn't belong
484 			 * in a kernel.. Ugh.  If an error occurs, do not
485 			 * try to install an index file and then clear the
486 			 * error.
487 			 *
488 			 * When we replace nd with ind and redirect ndp,
489 			 * maintenance of ni_startdir and ni_vp shift to
490 			 * ind and we have to clean them up in the old nd.
491 			 * However, the cnd resource continues to be maintained
492 			 * via the original nd.  Confused?  You aren't alone!
493 			 */
494 			vn_unlock(vp);
495 			cache_copy(&nd.nl_nch, &nch);
496 			nlookup_done(&nd);
497 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
498 						UIO_SYSSPACE, 0, cred, &nch);
499 			cache_drop(&nch);
500 			if (error == 0)
501 				error = nlookup(&nd);
502 
503 			if (error == 0) {
504 				/*
505 				 * Found an index file. Get rid of
506 				 * the old references.  transfer vp and
507 				 * load up the new vp.  Fortunately we do
508 				 * not have to deal with dvp, that would be
509 				 * a huge mess.
510 				 */
511 				if (dirp)
512 					vrele(dirp);
513 				dirp = vp;
514 				vp = NULL;
515 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
516 							LK_EXCLUSIVE, &vp);
517 				KKASSERT(error == 0);
518 			}
519 			error = 0;
520 		}
521 		/*
522 		 * If the public filehandle was used, check that this lookup
523 		 * didn't result in a filehandle outside the publicly exported
524 		 * filesystem.  We clear the poor vp here to avoid lockups due
525 		 * to NFS I/O.
526 		 */
527 
528 		if (vp->v_mount != nfs_pub.np_mount) {
529 			vput(vp);
530 			vp = NULL;
531 			error = EPERM;
532 		}
533 	}
534 
535 	if (dirp) {
536 		if (v3)
537 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
538 		vrele(dirp);
539 		dirp = NULL;
540 	}
541 
542 	/*
543 	 * Resources at this point:
544 	 *	ndp->ni_vp	may not be NULL
545 	 *
546 	 */
547 
548 	if (error) {
549 		nfsm_reply(NFSX_POSTOPATTR(v3));
550 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
551 		error = 0;
552 		goto nfsmout;
553 	}
554 
555 	/*
556 	 * Clear out some resources prior to potentially blocking.  This
557 	 * is not as critical as ni_dvp resources in other routines, but
558 	 * it helps.
559 	 */
560 	nlookup_done(&nd);
561 
562 	/*
563 	 * Get underlying attribute, then release remaining resources ( for
564 	 * the same potential blocking reason ) and reply.
565 	 */
566 	bzero((caddr_t)fhp, sizeof(nfh));
567 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
568 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
569 	if (!error)
570 		error = VOP_GETATTR(vp, vap);
571 
572 	vput(vp);
573 	vp = NULL;
574 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
575 	if (error) {
576 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
577 		error = 0;
578 		goto nfsmout;
579 	}
580 	nfsm_srvfhtom(fhp, v3);
581 	if (v3) {
582 		nfsm_srvpostop_attr(0, vap);
583 		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
584 	} else {
585 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
586 		nfsm_srvfillattr(vap, fp);
587 	}
588 
589 nfsmout:
590 	if (dirp)
591 		vrele(dirp);
592 	nlookup_done(&nd);		/* may be called twice */
593 	if (vp)
594 		vput(vp);
595 	return (error);
596 }
597 
598 /*
599  * nfs readlink service
600  */
601 int
602 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
603 	       struct thread *td, struct mbuf **mrq)
604 {
605 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
606 	struct sockaddr *nam = nfsd->nd_nam;
607 	caddr_t dpos = nfsd->nd_dpos;
608 	struct ucred *cred = &nfsd->nd_cr;
609 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
610 	struct iovec *ivp = iv;
611 	struct mbuf *mp;
612 	u_int32_t *tl;
613 	int32_t t1;
614 	caddr_t bpos;
615 	int error = 0, rdonly, i, tlen, len, getret;
616 	int v3 = (nfsd->nd_flag & ND_NFSV3);
617 	char *cp2;
618 	struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
619 	struct vnode *vp = NULL;
620 	struct vattr attr;
621 	nfsfh_t nfh;
622 	fhandle_t *fhp;
623 	struct uio io, *uiop = &io;
624 
625 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
626 #ifndef nolint
627 	mp2 = (struct mbuf *)0;
628 #endif
629 	mp3 = NULL;
630 	fhp = &nfh.fh_generic;
631 	nfsm_srvmtofh(fhp);
632 	len = 0;
633 	i = 0;
634 	while (len < NFS_MAXPATHLEN) {
635 		mp = m_getcl(MB_WAIT, MT_DATA, 0);
636 		mp->m_len = MCLBYTES;
637 		if (len == 0)
638 			mp3 = mp2 = mp;
639 		else {
640 			mp2->m_next = mp;
641 			mp2 = mp;
642 		}
643 		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
644 			mp->m_len = NFS_MAXPATHLEN-len;
645 			len = NFS_MAXPATHLEN;
646 		} else
647 			len += mp->m_len;
648 		ivp->iov_base = mtod(mp, caddr_t);
649 		ivp->iov_len = mp->m_len;
650 		i++;
651 		ivp++;
652 	}
653 	uiop->uio_iov = iv;
654 	uiop->uio_iovcnt = i;
655 	uiop->uio_offset = 0;
656 	uiop->uio_resid = len;
657 	uiop->uio_rw = UIO_READ;
658 	uiop->uio_segflg = UIO_SYSSPACE;
659 	uiop->uio_td = NULL;
660 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
661 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
662 	if (error) {
663 		nfsm_reply(2 * NFSX_UNSIGNED);
664 		nfsm_srvpostop_attr(1, (struct vattr *)0);
665 		error = 0;
666 		goto nfsmout;
667 	}
668 	if (vp->v_type != VLNK) {
669 		if (v3)
670 			error = EINVAL;
671 		else
672 			error = ENXIO;
673 		goto out;
674 	}
675 	error = VOP_READLINK(vp, uiop, cred);
676 out:
677 	getret = VOP_GETATTR(vp, &attr);
678 	vput(vp);
679 	vp = NULL;
680 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
681 	if (v3) {
682 		nfsm_srvpostop_attr(getret, &attr);
683 		if (error) {
684 			error = 0;
685 			goto nfsmout;
686 		}
687 	}
688 	if (uiop->uio_resid > 0) {
689 		len -= uiop->uio_resid;
690 		tlen = nfsm_rndup(len);
691 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
692 	}
693 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
694 	*tl = txdr_unsigned(len);
695 	mb->m_next = mp3;
696 	mp3 = NULL;
697 nfsmout:
698 	if (mp3)
699 		m_freem(mp3);
700 	if (vp)
701 		vput(vp);
702 	return(error);
703 }
704 
705 /*
706  * nfs read service
707  */
708 int
709 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
710 	   struct thread *td, struct mbuf **mrq)
711 {
712 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
713 	struct sockaddr *nam = nfsd->nd_nam;
714 	caddr_t dpos = nfsd->nd_dpos;
715 	struct ucred *cred = &nfsd->nd_cr;
716 	struct iovec *iv;
717 	struct iovec *iv2;
718 	struct mbuf *m;
719 	struct nfs_fattr *fp;
720 	u_int32_t *tl;
721 	int32_t t1;
722 	int i;
723 	caddr_t bpos;
724 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
725 	int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
726 	char *cp2;
727 	struct mbuf *mb, *mb2, *mreq;
728 	struct mbuf *m2;
729 	struct vnode *vp = NULL;
730 	nfsfh_t nfh;
731 	fhandle_t *fhp;
732 	struct uio io, *uiop = &io;
733 	struct vattr va, *vap = &va;
734 	struct nfsheur *nh;
735 	off_t off;
736 	int ioflag = 0;
737 
738 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
739 	fhp = &nfh.fh_generic;
740 	nfsm_srvmtofh(fhp);
741 	if (v3) {
742 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
743 		off = fxdr_hyper(tl);
744 	} else {
745 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
746 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
747 	}
748 	nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
749 
750 	/*
751 	 * Reference vp.  If an error occurs, vp will be invalid, but we
752 	 * have to NULL it just in case.  The macros might goto nfsmout
753 	 * as well.
754 	 */
755 
756 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
757 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
758 	if (error) {
759 		vp = NULL;
760 		nfsm_reply(2 * NFSX_UNSIGNED);
761 		nfsm_srvpostop_attr(1, (struct vattr *)0);
762 		error = 0;
763 		goto nfsmout;
764 	}
765 
766 	if (vp->v_type != VREG) {
767 		if (v3)
768 			error = EINVAL;
769 		else
770 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
771 	}
772 	if (!error) {
773 	    if ((error = nfsrv_access(vp, VREAD, cred, rdonly, td, 1)) != 0)
774 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 1);
775 	}
776 	getret = VOP_GETATTR(vp, vap);
777 	if (!error)
778 		error = getret;
779 	if (error) {
780 		vput(vp);
781 		vp = NULL;
782 		nfsm_reply(NFSX_POSTOPATTR(v3));
783 		nfsm_srvpostop_attr(getret, vap);
784 		error = 0;
785 		goto nfsmout;
786 	}
787 
788 	/*
789 	 * Calculate byte count to read
790 	 */
791 
792 	if (off >= vap->va_size)
793 		cnt = 0;
794 	else if ((off + reqlen) > vap->va_size)
795 		cnt = vap->va_size - off;
796 	else
797 		cnt = reqlen;
798 
799 	/*
800 	 * Calculate seqcount for heuristic
801 	 */
802 
803 	{
804 		int hi;
805 		int try = 32;
806 
807 		/*
808 		 * Locate best candidate
809 		 */
810 
811 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
812 		nh = &nfsheur[hi];
813 
814 		while (try--) {
815 			if (nfsheur[hi].nh_vp == vp) {
816 				nh = &nfsheur[hi];
817 				break;
818 			}
819 			if (nfsheur[hi].nh_use > 0)
820 				--nfsheur[hi].nh_use;
821 			hi = (hi + 1) % NUM_HEURISTIC;
822 			if (nfsheur[hi].nh_use < nh->nh_use)
823 				nh = &nfsheur[hi];
824 		}
825 
826 		if (nh->nh_vp != vp) {
827 			nh->nh_vp = vp;
828 			nh->nh_nextr = off;
829 			nh->nh_use = NHUSE_INIT;
830 			if (off == 0)
831 				nh->nh_seqcount = 4;
832 			else
833 				nh->nh_seqcount = 1;
834 		}
835 
836 		/*
837 		 * Calculate heuristic
838 		 */
839 
840 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
841 			if (++nh->nh_seqcount > IO_SEQMAX)
842 				nh->nh_seqcount = IO_SEQMAX;
843 		} else if (nh->nh_seqcount > 1) {
844 			nh->nh_seqcount = 1;
845 		} else {
846 			nh->nh_seqcount = 0;
847 		}
848 		nh->nh_use += NHUSE_INC;
849 		if (nh->nh_use > NHUSE_MAX)
850 			nh->nh_use = NHUSE_MAX;
851 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
852         }
853 
854 	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
855 	if (v3) {
856 		nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
857 		*tl++ = nfs_true;
858 		fp = (struct nfs_fattr *)tl;
859 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
860 	} else {
861 		nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
862 		fp = (struct nfs_fattr *)tl;
863 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
864 	}
865 	len = left = nfsm_rndup(cnt);
866 	if (cnt > 0) {
867 		/*
868 		 * Generate the mbuf list with the uio_iov ref. to it.
869 		 */
870 		i = 0;
871 		m = m2 = mb;
872 		while (left > 0) {
873 			siz = min(M_TRAILINGSPACE(m), left);
874 			if (siz > 0) {
875 				left -= siz;
876 				i++;
877 			}
878 			if (left > 0) {
879 				m = m_getcl(MB_WAIT, MT_DATA, 0);
880 				m->m_len = 0;
881 				m2->m_next = m;
882 				m2 = m;
883 			}
884 		}
885 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
886 		       M_TEMP, M_WAITOK);
887 		uiop->uio_iov = iv2 = iv;
888 		m = mb;
889 		left = len;
890 		i = 0;
891 		while (left > 0) {
892 			if (m == NULL)
893 				panic("nfsrv_read iov");
894 			siz = min(M_TRAILINGSPACE(m), left);
895 			if (siz > 0) {
896 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
897 				iv->iov_len = siz;
898 				m->m_len += siz;
899 				left -= siz;
900 				iv++;
901 				i++;
902 			}
903 			m = m->m_next;
904 		}
905 		uiop->uio_iovcnt = i;
906 		uiop->uio_offset = off;
907 		uiop->uio_resid = len;
908 		uiop->uio_rw = UIO_READ;
909 		uiop->uio_segflg = UIO_SYSSPACE;
910 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
911 		off = uiop->uio_offset;
912 		nh->nh_nextr = off;
913 		FREE((caddr_t)iv2, M_TEMP);
914 		if (error || (getret = VOP_GETATTR(vp, vap))) {
915 			if (!error)
916 				error = getret;
917 			m_freem(mreq);
918 			vput(vp);
919 			vp = NULL;
920 			nfsm_reply(NFSX_POSTOPATTR(v3));
921 			nfsm_srvpostop_attr(getret, vap);
922 			error = 0;
923 			goto nfsmout;
924 		}
925 	} else {
926 		uiop->uio_resid = 0;
927 	}
928 	vput(vp);
929 	vp = NULL;
930 	nfsm_srvfillattr(vap, fp);
931 	tlen = len - uiop->uio_resid;
932 	cnt = cnt < tlen ? cnt : tlen;
933 	tlen = nfsm_rndup(cnt);
934 	if (len != tlen || tlen != cnt)
935 		nfsm_adj(mb, len - tlen, tlen - cnt);
936 	if (v3) {
937 		*tl++ = txdr_unsigned(cnt);
938 		if (len < reqlen)
939 			*tl++ = nfs_true;
940 		else
941 			*tl++ = nfs_false;
942 	}
943 	*tl = txdr_unsigned(cnt);
944 nfsmout:
945 	if (vp)
946 		vput(vp);
947 	return(error);
948 }
949 
950 /*
951  * nfs write service
952  */
953 int
954 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
955 	    struct thread *td, struct mbuf **mrq)
956 {
957 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
958 	struct sockaddr *nam = nfsd->nd_nam;
959 	caddr_t dpos = nfsd->nd_dpos;
960 	struct ucred *cred = &nfsd->nd_cr;
961 	struct iovec *ivp;
962 	int i, cnt;
963 	struct mbuf *mp;
964 	struct nfs_fattr *fp;
965 	struct iovec *iv;
966 	struct vattr va, forat;
967 	struct vattr *vap = &va;
968 	u_int32_t *tl;
969 	int32_t t1;
970 	caddr_t bpos;
971 	int error = 0, rdonly, len, forat_ret = 1;
972 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
973 	int stable = NFSV3WRITE_FILESYNC;
974 	int v3 = (nfsd->nd_flag & ND_NFSV3);
975 	char *cp2;
976 	struct mbuf *mb, *mb2, *mreq;
977 	struct vnode *vp = NULL;
978 	nfsfh_t nfh;
979 	fhandle_t *fhp;
980 	struct uio io, *uiop = &io;
981 	off_t off;
982 
983 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
984 	if (mrep == NULL) {
985 		*mrq = NULL;
986 		error = 0;
987 		goto nfsmout;
988 	}
989 	fhp = &nfh.fh_generic;
990 	nfsm_srvmtofh(fhp);
991 	if (v3) {
992 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
993 		off = fxdr_hyper(tl);
994 		tl += 3;
995 		stable = fxdr_unsigned(int, *tl++);
996 	} else {
997 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
998 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
999 		tl += 2;
1000 		if (nfs_async)
1001 	    		stable = NFSV3WRITE_UNSTABLE;
1002 	}
1003 	retlen = len = fxdr_unsigned(int32_t, *tl);
1004 	cnt = i = 0;
1005 
1006 	/*
1007 	 * For NFS Version 2, it is not obvious what a write of zero length
1008 	 * should do, but I might as well be consistent with Version 3,
1009 	 * which is to return ok so long as there are no permission problems.
1010 	 */
1011 	if (len > 0) {
1012 	    zeroing = 1;
1013 	    mp = mrep;
1014 	    while (mp) {
1015 		if (mp == md) {
1016 			zeroing = 0;
1017 			adjust = dpos - mtod(mp, caddr_t);
1018 			mp->m_len -= adjust;
1019 			if (mp->m_len > 0 && adjust > 0)
1020 				NFSMADV(mp, adjust);
1021 		}
1022 		if (zeroing)
1023 			mp->m_len = 0;
1024 		else if (mp->m_len > 0) {
1025 			i += mp->m_len;
1026 			if (i > len) {
1027 				mp->m_len -= (i - len);
1028 				zeroing	= 1;
1029 			}
1030 			if (mp->m_len > 0)
1031 				cnt++;
1032 		}
1033 		mp = mp->m_next;
1034 	    }
1035 	}
1036 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1037 		error = EIO;
1038 		nfsm_reply(2 * NFSX_UNSIGNED);
1039 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1040 		error = 0;
1041 		goto nfsmout;
1042 	}
1043 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
1044 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1045 	if (error) {
1046 		vp = NULL;
1047 		nfsm_reply(2 * NFSX_UNSIGNED);
1048 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1049 		error = 0;
1050 		goto nfsmout;
1051 	}
1052 	if (v3)
1053 		forat_ret = VOP_GETATTR(vp, &forat);
1054 	if (vp->v_type != VREG) {
1055 		if (v3)
1056 			error = EINVAL;
1057 		else
1058 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1059 	}
1060 	if (!error) {
1061 		error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1062 	}
1063 	if (error) {
1064 		vput(vp);
1065 		vp = NULL;
1066 		nfsm_reply(NFSX_WCCDATA(v3));
1067 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1068 		error = 0;
1069 		goto nfsmout;
1070 	}
1071 
1072 	if (len > 0) {
1073 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1074 		M_WAITOK);
1075 	    uiop->uio_iov = iv = ivp;
1076 	    uiop->uio_iovcnt = cnt;
1077 	    mp = mrep;
1078 	    while (mp) {
1079 		if (mp->m_len > 0) {
1080 			ivp->iov_base = mtod(mp, caddr_t);
1081 			ivp->iov_len = mp->m_len;
1082 			ivp++;
1083 		}
1084 		mp = mp->m_next;
1085 	    }
1086 
1087 	    /*
1088 	     * XXX
1089 	     * The IO_METASYNC flag indicates that all metadata (and not just
1090 	     * enough to ensure data integrity) mus be written to stable storage
1091 	     * synchronously.
1092 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1093 	     */
1094 	    if (stable == NFSV3WRITE_UNSTABLE)
1095 		ioflags = IO_NODELOCKED;
1096 	    else if (stable == NFSV3WRITE_DATASYNC)
1097 		ioflags = (IO_SYNC | IO_NODELOCKED);
1098 	    else
1099 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1100 	    uiop->uio_resid = len;
1101 	    uiop->uio_rw = UIO_WRITE;
1102 	    uiop->uio_segflg = UIO_SYSSPACE;
1103 	    uiop->uio_td = NULL;
1104 	    uiop->uio_offset = off;
1105 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1106 	    nfsstats.srvvop_writes++;
1107 	    FREE((caddr_t)iv, M_TEMP);
1108 	}
1109 	aftat_ret = VOP_GETATTR(vp, vap);
1110 	vput(vp);
1111 	vp = NULL;
1112 	if (!error)
1113 		error = aftat_ret;
1114 	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1115 		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1116 	if (v3) {
1117 		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1118 		if (error) {
1119 			error = 0;
1120 			goto nfsmout;
1121 		}
1122 		nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1123 		*tl++ = txdr_unsigned(retlen);
1124 		/*
1125 		 * If nfs_async is set, then pretend the write was FILESYNC.
1126 		 */
1127 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1128 			*tl++ = txdr_unsigned(stable);
1129 		else
1130 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1131 		/*
1132 		 * Actually, there is no need to txdr these fields,
1133 		 * but it may make the values more human readable,
1134 		 * for debugging purposes.
1135 		 */
1136 		if (nfsver.tv_sec == 0)
1137 			nfsver = boottime;
1138 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1139 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1140 	} else {
1141 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1142 		nfsm_srvfillattr(vap, fp);
1143 	}
1144 nfsmout:
1145 	if (vp)
1146 		vput(vp);
1147 	return(error);
1148 }
1149 
1150 /*
1151  * NFS write service with write gathering support. Called when
1152  * nfsrvw_procrastinate > 0.
1153  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1154  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1155  * Jan. 1994.
1156  */
1157 int
1158 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1159 		  struct thread *td, struct mbuf **mrq)
1160 {
1161 	struct iovec *ivp;
1162 	struct mbuf *mp;
1163 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1164 	struct nfs_fattr *fp;
1165 	int i;
1166 	struct iovec *iov;
1167 	struct nfsrvw_delayhash *wpp;
1168 	struct ucred *cred;
1169 	struct vattr va, forat;
1170 	u_int32_t *tl;
1171 	int32_t t1;
1172 	caddr_t bpos, dpos;
1173 	int error = 0, rdonly, len, forat_ret = 1;
1174 	int ioflags, aftat_ret = 1, adjust, v3, zeroing;
1175 	char *cp2;
1176 	struct mbuf *mb, *mb2, *mreq, *mrep, *md;
1177 	struct vnode *vp = NULL;
1178 	struct uio io, *uiop = &io;
1179 	u_quad_t cur_usec;
1180 
1181 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1182 #ifndef nolint
1183 	i = 0;
1184 	len = 0;
1185 #endif
1186 	*mrq = NULL;
1187 	if (*ndp) {
1188 	    nfsd = *ndp;
1189 	    *ndp = NULL;
1190 	    mrep = nfsd->nd_mrep;
1191 	    md = nfsd->nd_md;
1192 	    dpos = nfsd->nd_dpos;
1193 	    cred = &nfsd->nd_cr;
1194 	    v3 = (nfsd->nd_flag & ND_NFSV3);
1195 	    LIST_INIT(&nfsd->nd_coalesce);
1196 	    nfsd->nd_mreq = NULL;
1197 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1198 	    cur_usec = nfs_curusec();
1199 	    nfsd->nd_time = cur_usec +
1200 		(v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1201 
1202 	    /*
1203 	     * Now, get the write header..
1204 	     */
1205 	    nfsm_srvmtofh(&nfsd->nd_fh);
1206 	    if (v3) {
1207 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1208 		nfsd->nd_off = fxdr_hyper(tl);
1209 		tl += 3;
1210 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1211 	    } else {
1212 		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1213 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1214 		tl += 2;
1215 		if (nfs_async)
1216 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1217 	    }
1218 	    len = fxdr_unsigned(int32_t, *tl);
1219 	    nfsd->nd_len = len;
1220 	    nfsd->nd_eoff = nfsd->nd_off + len;
1221 
1222 	    /*
1223 	     * Trim the header out of the mbuf list and trim off any trailing
1224 	     * junk so that the mbuf list has only the write data.
1225 	     */
1226 	    zeroing = 1;
1227 	    i = 0;
1228 	    mp = mrep;
1229 	    while (mp) {
1230 		if (mp == md) {
1231 		    zeroing = 0;
1232 		    adjust = dpos - mtod(mp, caddr_t);
1233 		    mp->m_len -= adjust;
1234 		    if (mp->m_len > 0 && adjust > 0)
1235 			NFSMADV(mp, adjust);
1236 		}
1237 		if (zeroing)
1238 		    mp->m_len = 0;
1239 		else {
1240 		    i += mp->m_len;
1241 		    if (i > len) {
1242 			mp->m_len -= (i - len);
1243 			zeroing = 1;
1244 		    }
1245 		}
1246 		mp = mp->m_next;
1247 	    }
1248 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1249 nfsmout:
1250 		m_freem(mrep);
1251 		error = EIO;
1252 		nfsm_writereply(2 * NFSX_UNSIGNED, v3);
1253 		if (v3)
1254 		    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1255 		nfsd->nd_mreq = mreq;
1256 		nfsd->nd_mrep = NULL;
1257 		nfsd->nd_time = 0;
1258 	    }
1259 
1260 	    /*
1261 	     * Add this entry to the hash and time queues.
1262 	     */
1263 	    crit_enter();
1264 	    owp = NULL;
1265 	    wp = slp->ns_tq.lh_first;
1266 	    while (wp && wp->nd_time < nfsd->nd_time) {
1267 		owp = wp;
1268 		wp = wp->nd_tq.le_next;
1269 	    }
1270 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1271 	    if (owp) {
1272 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1273 	    } else {
1274 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1275 	    }
1276 	    if (nfsd->nd_mrep) {
1277 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1278 		owp = NULL;
1279 		wp = wpp->lh_first;
1280 		while (wp &&
1281 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1282 		    owp = wp;
1283 		    wp = wp->nd_hash.le_next;
1284 		}
1285 		while (wp && wp->nd_off < nfsd->nd_off &&
1286 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1287 		    owp = wp;
1288 		    wp = wp->nd_hash.le_next;
1289 		}
1290 		if (owp) {
1291 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1292 
1293 		    /*
1294 		     * Search the hash list for overlapping entries and
1295 		     * coalesce.
1296 		     */
1297 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1298 			wp = nfsd->nd_hash.le_next;
1299 			if (NFSW_SAMECRED(owp, nfsd))
1300 			    nfsrvw_coalesce(owp, nfsd);
1301 		    }
1302 		} else {
1303 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1304 		}
1305 	    }
1306 	    crit_exit();
1307 	}
1308 
1309 	/*
1310 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1311 	 * and generate the associated reply mbuf list(s).
1312 	 */
1313 loop1:
1314 	cur_usec = nfs_curusec();
1315 	crit_enter();
1316 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1317 		owp = nfsd->nd_tq.le_next;
1318 		if (nfsd->nd_time > cur_usec)
1319 		    break;
1320 		if (nfsd->nd_mreq)
1321 		    continue;
1322 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1323 		LIST_REMOVE(nfsd, nd_tq);
1324 		LIST_REMOVE(nfsd, nd_hash);
1325 		crit_exit();
1326 		mrep = nfsd->nd_mrep;
1327 		nfsd->nd_mrep = NULL;
1328 		cred = &nfsd->nd_cr;
1329 		v3 = (nfsd->nd_flag & ND_NFSV3);
1330 		forat_ret = aftat_ret = 1;
1331 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
1332 		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1333 		if (!error) {
1334 		    if (v3)
1335 			forat_ret = VOP_GETATTR(vp, &forat);
1336 		    if (vp->v_type != VREG) {
1337 			if (v3)
1338 			    error = EINVAL;
1339 			else
1340 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1341 		    }
1342 		} else {
1343 		    vp = NULL;
1344 		}
1345 		if (!error) {
1346 		    error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1347 		}
1348 
1349 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1350 		    ioflags = IO_NODELOCKED;
1351 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1352 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1353 		else
1354 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1355 		uiop->uio_rw = UIO_WRITE;
1356 		uiop->uio_segflg = UIO_SYSSPACE;
1357 		uiop->uio_td = NULL;
1358 		uiop->uio_offset = nfsd->nd_off;
1359 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1360 		if (uiop->uio_resid > 0) {
1361 		    mp = mrep;
1362 		    i = 0;
1363 		    while (mp) {
1364 			if (mp->m_len > 0)
1365 			    i++;
1366 			mp = mp->m_next;
1367 		    }
1368 		    uiop->uio_iovcnt = i;
1369 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1370 			M_TEMP, M_WAITOK);
1371 		    uiop->uio_iov = ivp = iov;
1372 		    mp = mrep;
1373 		    while (mp) {
1374 			if (mp->m_len > 0) {
1375 			    ivp->iov_base = mtod(mp, caddr_t);
1376 			    ivp->iov_len = mp->m_len;
1377 			    ivp++;
1378 			}
1379 			mp = mp->m_next;
1380 		    }
1381 		    if (!error) {
1382 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1383 			nfsstats.srvvop_writes++;
1384 		    }
1385 		    FREE((caddr_t)iov, M_TEMP);
1386 		}
1387 		m_freem(mrep);
1388 		if (vp) {
1389 		    aftat_ret = VOP_GETATTR(vp, &va);
1390 		    vput(vp);
1391 		    vp = NULL;
1392 		}
1393 
1394 		/*
1395 		 * Loop around generating replies for all write rpcs that have
1396 		 * now been completed.
1397 		 */
1398 		swp = nfsd;
1399 		do {
1400 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1401 		    if (error) {
1402 			nfsm_writereply(NFSX_WCCDATA(v3), v3);
1403 			if (v3) {
1404 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1405 			}
1406 		    } else {
1407 			nfsm_writereply(NFSX_PREOPATTR(v3) +
1408 			    NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1409 			    NFSX_WRITEVERF(v3), v3);
1410 			if (v3) {
1411 			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1412 			    nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1413 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1414 			    *tl++ = txdr_unsigned(swp->nd_stable);
1415 			    /*
1416 			     * Actually, there is no need to txdr these fields,
1417 			     * but it may make the values more human readable,
1418 			     * for debugging purposes.
1419 			     */
1420 			    if (nfsver.tv_sec == 0)
1421 				    nfsver = boottime;
1422 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1423 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1424 			} else {
1425 			    nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1426 			    nfsm_srvfillattr(&va, fp);
1427 			}
1428 		    }
1429 		    nfsd->nd_mreq = mreq;
1430 		    if (nfsd->nd_mrep)
1431 			panic("nfsrv_write: nd_mrep not free");
1432 
1433 		    /*
1434 		     * Done. Put it at the head of the timer queue so that
1435 		     * the final phase can return the reply.
1436 		     */
1437 		    crit_enter();
1438 		    if (nfsd != swp) {
1439 			nfsd->nd_time = 0;
1440 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1441 		    }
1442 		    nfsd = swp->nd_coalesce.lh_first;
1443 		    if (nfsd) {
1444 			LIST_REMOVE(nfsd, nd_tq);
1445 		    }
1446 		    crit_exit();
1447 		} while (nfsd);
1448 		crit_enter();
1449 		swp->nd_time = 0;
1450 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1451 		crit_exit();
1452 		goto loop1;
1453 	}
1454 	crit_exit();
1455 
1456 	/*
1457 	 * Search for a reply to return.
1458 	 */
1459 	crit_enter();
1460 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1461 		if (nfsd->nd_mreq) {
1462 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1463 		    LIST_REMOVE(nfsd, nd_tq);
1464 		    *mrq = nfsd->nd_mreq;
1465 		    *ndp = nfsd;
1466 		    break;
1467 		}
1468 	crit_exit();
1469 	return (0);
1470 }
1471 
1472 /*
1473  * Coalesce the write request nfsd into owp. To do this we must:
1474  * - remove nfsd from the queues
1475  * - merge nfsd->nd_mrep into owp->nd_mrep
1476  * - update the nd_eoff and nd_stable for owp
1477  * - put nfsd on owp's nd_coalesce list
1478  * NB: Must be called at splsoftclock().
1479  */
1480 static void
1481 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1482 {
1483         int overlap;
1484         struct mbuf *mp;
1485 	struct nfsrv_descript *p;
1486 
1487 	NFS_DPF(WG, ("C%03x-%03x",
1488 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1489         LIST_REMOVE(nfsd, nd_hash);
1490         LIST_REMOVE(nfsd, nd_tq);
1491         if (owp->nd_eoff < nfsd->nd_eoff) {
1492             overlap = owp->nd_eoff - nfsd->nd_off;
1493             if (overlap < 0)
1494                 panic("nfsrv_coalesce: bad off");
1495             if (overlap > 0)
1496                 m_adj(nfsd->nd_mrep, overlap);
1497             mp = owp->nd_mrep;
1498             while (mp->m_next)
1499                 mp = mp->m_next;
1500             mp->m_next = nfsd->nd_mrep;
1501             owp->nd_eoff = nfsd->nd_eoff;
1502         } else
1503             m_freem(nfsd->nd_mrep);
1504         nfsd->nd_mrep = NULL;
1505         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1506             owp->nd_stable = NFSV3WRITE_FILESYNC;
1507         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1508             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1509             owp->nd_stable = NFSV3WRITE_DATASYNC;
1510         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1511 
1512 	/*
1513 	 * If nfsd had anything else coalesced into it, transfer them
1514 	 * to owp, otherwise their replies will never get sent.
1515 	 */
1516 	for (p = nfsd->nd_coalesce.lh_first; p;
1517 	     p = nfsd->nd_coalesce.lh_first) {
1518 	    LIST_REMOVE(p, nd_tq);
1519 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1520 	}
1521 }
1522 
1523 /*
1524  * nfs create service
1525  * now does a truncate to 0 length via. setattr if it already exists
1526  */
1527 int
1528 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1529 	     struct thread *td, struct mbuf **mrq)
1530 {
1531 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1532 	struct sockaddr *nam = nfsd->nd_nam;
1533 	caddr_t dpos = nfsd->nd_dpos;
1534 	struct ucred *cred = &nfsd->nd_cr;
1535 	struct nfs_fattr *fp;
1536 	struct vattr va, dirfor, diraft;
1537 	struct vattr *vap = &va;
1538 	struct nfsv2_sattr *sp;
1539 	u_int32_t *tl;
1540 	struct nlookupdata nd;
1541 	int32_t t1;
1542 	caddr_t bpos;
1543 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1544 	udev_t rdev = NOUDEV;
1545 	int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1546 	caddr_t cp;
1547 	char *cp2;
1548 	struct mbuf *mb, *mb2, *mreq;
1549 	struct vnode *dirp;
1550 	struct vnode *dvp;
1551 	struct vnode *vp;
1552 	nfsfh_t nfh;
1553 	fhandle_t *fhp;
1554 	u_quad_t tempsize;
1555 	u_char cverf[NFSX_V3CREATEVERF];
1556 
1557 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1558 	nlookup_zero(&nd);
1559 	dirp = NULL;
1560 	dvp = NULL;
1561 	vp = NULL;
1562 
1563 	fhp = &nfh.fh_generic;
1564 	nfsm_srvmtofh(fhp);
1565 	nfsm_srvnamesiz(len);
1566 
1567 	/*
1568 	 * Call namei and do initial cleanup to get a few things
1569 	 * out of the way.  If we get an initial error we cleanup
1570 	 * and return here to avoid special-casing the invalid nd
1571 	 * structure through the rest of the case.  dirp may be
1572 	 * set even if an error occurs, but the nd structure will not
1573 	 * be valid at all if an error occurs so we have to invalidate it
1574 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1575 	 */
1576 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
1577 			  fhp, len, slp, nam, &md, &dpos, &dirp,
1578 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1579 	if (dirp) {
1580 		if (v3) {
1581 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1582 		} else {
1583 			vrele(dirp);
1584 			dirp = NULL;
1585 		}
1586 	}
1587 	if (error) {
1588 		nfsm_reply(NFSX_WCCDATA(v3));
1589 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1590 		error = 0;
1591 		goto nfsmout;
1592 	}
1593 
1594 	/*
1595 	 * No error.  Continue.  State:
1596 	 *
1597 	 *	dirp 		may be valid
1598 	 *	vp		may be valid or NULL if the target does not
1599 	 *			exist.
1600 	 *	dvp		is valid
1601 	 *
1602 	 * The error state is set through the code and we may also do some
1603 	 * opportunistic releasing of vnodes to avoid holding locks through
1604 	 * NFS I/O.  The cleanup at the end is a catch-all
1605 	 */
1606 
1607 	VATTR_NULL(vap);
1608 	if (v3) {
1609 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1610 		how = fxdr_unsigned(int, *tl);
1611 		switch (how) {
1612 		case NFSV3CREATE_GUARDED:
1613 			if (vp) {
1614 				error = EEXIST;
1615 				break;
1616 			}
1617 			/* fall through */
1618 		case NFSV3CREATE_UNCHECKED:
1619 			nfsm_srvsattr(vap);
1620 			break;
1621 		case NFSV3CREATE_EXCLUSIVE:
1622 			nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF);
1623 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1624 			exclusive_flag = 1;
1625 			break;
1626 		};
1627 		vap->va_type = VREG;
1628 	} else {
1629 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1630 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1631 		if (vap->va_type == VNON)
1632 			vap->va_type = VREG;
1633 		vap->va_mode = nfstov_mode(sp->sa_mode);
1634 		switch (vap->va_type) {
1635 		case VREG:
1636 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1637 			if (tsize != -1)
1638 				vap->va_size = (u_quad_t)tsize;
1639 			break;
1640 		case VCHR:
1641 		case VBLK:
1642 		case VFIFO:
1643 			rdev = fxdr_unsigned(long, sp->sa_size);
1644 			break;
1645 		default:
1646 			break;
1647 		};
1648 	}
1649 
1650 	/*
1651 	 * Iff doesn't exist, create it
1652 	 * otherwise just truncate to 0 length
1653 	 *   should I set the mode too ?
1654 	 *
1655 	 * The only possible error we can have at this point is EEXIST.
1656 	 * nd.ni_vp will also be non-NULL in that case.
1657 	 */
1658 	if (vp == NULL) {
1659 		if (vap->va_mode == (mode_t)VNOVAL)
1660 			vap->va_mode = 0;
1661 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1662 			vput(dvp);
1663 			dvp = NULL;
1664 			error = VOP_NCREATE(&nd.nl_nch, &vp, nd.nl_cred, vap);
1665 			if (error == 0) {
1666 				if (exclusive_flag) {
1667 					exclusive_flag = 0;
1668 					VATTR_NULL(vap);
1669 					bcopy(cverf, (caddr_t)&vap->va_atime,
1670 						NFSX_V3CREATEVERF);
1671 					error = VOP_SETATTR(vp, vap, cred);
1672 				}
1673 			}
1674 		} else if (
1675 			vap->va_type == VCHR ||
1676 			vap->va_type == VBLK ||
1677 			vap->va_type == VFIFO
1678 		) {
1679 			/*
1680 			 * Handle SysV FIFO node special cases.  All other
1681 			 * devices require super user to access.
1682 			 */
1683 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1684 				vap->va_type = VFIFO;
1685                         if (vap->va_type != VFIFO &&
1686                             (error = suser_cred(cred, 0))) {
1687 				goto nfsmreply0;
1688                         }
1689 			vap->va_rmajor = umajor(rdev);
1690 			vap->va_rminor = uminor(rdev);
1691 
1692 			vput(dvp);
1693 			dvp = NULL;
1694 			error = VOP_NMKNOD(&nd.nl_nch, &vp, nd.nl_cred, vap);
1695 			if (error)
1696 				goto nfsmreply0;
1697 #if 0
1698 			/*
1699 			 * XXX what is this junk supposed to do ?
1700 			 */
1701 
1702 			vput(vp);
1703 			vp = NULL;
1704 
1705 			/*
1706 			 * release dvp prior to lookup
1707 			 */
1708 			vput(dvp);
1709 			dvp = NULL;
1710 
1711 			/*
1712 			 * Setup for lookup.
1713 			 *
1714 			 * Even though LOCKPARENT was cleared, ni_dvp may
1715 			 * be garbage.
1716 			 */
1717 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1718 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1719 			nd.ni_cnd.cn_td = td;
1720 			nd.ni_cnd.cn_cred = cred;
1721 
1722 			error = lookup(&nd);
1723 			nd.ni_dvp = NULL;
1724 
1725 			if (error != 0) {
1726 				nfsm_reply(0);
1727 				/* fall through on certain errors */
1728 			}
1729 			nfsrv_object_create(nd.ni_vp);
1730 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1731 				error = EINVAL;
1732 				goto nfsmreply0;
1733 			}
1734 #endif
1735 		} else {
1736 			error = ENXIO;
1737 		}
1738 	} else {
1739 		if (vap->va_size != -1) {
1740 			error = nfsrv_access(vp, VWRITE, cred,
1741 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1742 			if (!error) {
1743 				tempsize = vap->va_size;
1744 				VATTR_NULL(vap);
1745 				vap->va_size = tempsize;
1746 				error = VOP_SETATTR(vp, vap, cred);
1747 			}
1748 		}
1749 	}
1750 
1751 	if (!error) {
1752 		bzero((caddr_t)fhp, sizeof(nfh));
1753 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1754 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1755 		if (!error)
1756 			error = VOP_GETATTR(vp, vap);
1757 	}
1758 	if (v3) {
1759 		if (exclusive_flag && !error &&
1760 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1761 			error = EEXIST;
1762 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1763 		vrele(dirp);
1764 		dirp = NULL;
1765 	}
1766 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1767 	if (v3) {
1768 		if (!error) {
1769 			nfsm_srvpostop_fh(fhp);
1770 			nfsm_srvpostop_attr(0, vap);
1771 		}
1772 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1773 		error = 0;
1774 	} else {
1775 		nfsm_srvfhtom(fhp, v3);
1776 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1777 		nfsm_srvfillattr(vap, fp);
1778 	}
1779 	goto nfsmout;
1780 
1781 nfsmreply0:
1782 	nfsm_reply(0);
1783 	error = 0;
1784 	/* fall through */
1785 
1786 nfsmout:
1787 	if (dirp)
1788 		vrele(dirp);
1789 	nlookup_done(&nd);
1790 	if (dvp) {
1791 		if (dvp == vp)
1792 			vrele(dvp);
1793 		else
1794 			vput(dvp);
1795 	}
1796 	if (vp)
1797 		vput(vp);
1798 	return (error);
1799 }
1800 
1801 /*
1802  * nfs v3 mknod service
1803  */
1804 int
1805 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1806 	    struct thread *td, struct mbuf **mrq)
1807 {
1808 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1809 	struct sockaddr *nam = nfsd->nd_nam;
1810 	caddr_t dpos = nfsd->nd_dpos;
1811 	struct ucred *cred = &nfsd->nd_cr;
1812 	struct vattr va, dirfor, diraft;
1813 	struct vattr *vap = &va;
1814 	u_int32_t *tl;
1815 	struct nlookupdata nd;
1816 	int32_t t1;
1817 	caddr_t bpos;
1818 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1819 	enum vtype vtyp;
1820 	char *cp2;
1821 	struct mbuf *mb, *mb2, *mreq;
1822 	struct vnode *dirp;
1823 	struct vnode *dvp;
1824 	struct vnode *vp;
1825 	nfsfh_t nfh;
1826 	fhandle_t *fhp;
1827 
1828 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1829 	nlookup_zero(&nd);
1830 	dirp = NULL;
1831 	dvp = NULL;
1832 	vp = NULL;
1833 
1834 	fhp = &nfh.fh_generic;
1835 	nfsm_srvmtofh(fhp);
1836 	nfsm_srvnamesiz(len);
1837 
1838 	/*
1839 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1840 	 * is not valid.  However, nfsm_*() routines may still jump to
1841 	 * nfsmout.
1842 	 */
1843 
1844 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
1845 			  fhp, len, slp, nam, &md, &dpos, &dirp,
1846 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1847 	if (dirp)
1848 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1849 	if (error) {
1850 		nfsm_reply(NFSX_WCCDATA(1));
1851 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1852 		error = 0;
1853 		goto nfsmout;
1854 	}
1855 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1856 	vtyp = nfsv3tov_type(*tl);
1857 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1858 		error = NFSERR_BADTYPE;
1859 		goto out;
1860 	}
1861 	VATTR_NULL(vap);
1862 	nfsm_srvsattr(vap);
1863 	if (vtyp == VCHR || vtyp == VBLK) {
1864 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1865 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1866 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1867 	}
1868 
1869 	/*
1870 	 * Iff doesn't exist, create it.
1871 	 */
1872 	if (vp) {
1873 		error = EEXIST;
1874 		goto out;
1875 	}
1876 	vap->va_type = vtyp;
1877 	if (vap->va_mode == (mode_t)VNOVAL)
1878 		vap->va_mode = 0;
1879 	if (vtyp == VSOCK) {
1880 		error = VOP_NCREATE(&nd.nl_nch, &vp, nd.nl_cred, vap);
1881 	} else {
1882 		if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
1883 			goto out;
1884 
1885 		error = VOP_NMKNOD(&nd.nl_nch, &vp, nd.nl_cred, vap);
1886 		if (error)
1887 			goto out;
1888 
1889 #if 0
1890 		vput(vp);
1891 		vp = NULL;
1892 
1893 		/*
1894 		 * Release dvp prior to lookup
1895 		 */
1896 		vput(dvp);
1897 		dvp = NULL;
1898 
1899 		/*
1900 		 * XXX what is this stuff for?
1901 		 */
1902 		KKASSERT(td->td_proc);
1903 		nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1904 		nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1905 		nd.ni_cnd.cn_td = td;
1906 		nd.ni_cnd.cn_cred = td->td_proc->p_ucred;
1907 
1908 		error = lookup(&nd);
1909 		nd.ni_dvp = NULL;
1910 
1911 		if (error)
1912 			goto out;
1913 		if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK)
1914 			error = EINVAL;
1915 #endif
1916 	}
1917 
1918 	/*
1919 	 * send response, cleanup, return.
1920 	 */
1921 out:
1922 	nlookup_done(&nd);
1923 	if (dvp) {
1924 		if (dvp == vp)
1925 			vrele(dvp);
1926 		else
1927 			vput(dvp);
1928 		dvp = NULL;
1929 	}
1930 	if (!error) {
1931 		bzero((caddr_t)fhp, sizeof(nfh));
1932 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1933 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1934 		if (!error)
1935 			error = VOP_GETATTR(vp, vap);
1936 	}
1937 	if (vp) {
1938 		vput(vp);
1939 		vp = NULL;
1940 	}
1941 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1942 	if (dirp) {
1943 		vrele(dirp);
1944 		dirp = NULL;
1945 	}
1946 	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
1947 	if (!error) {
1948 		nfsm_srvpostop_fh(fhp);
1949 		nfsm_srvpostop_attr(0, vap);
1950 	}
1951 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1952 	return (0);
1953 nfsmout:
1954 	if (dirp)
1955 		vrele(dirp);
1956 	nlookup_done(&nd);
1957 	if (dvp) {
1958 		if (dvp == vp)
1959 			vrele(dvp);
1960 		else
1961 			vput(dvp);
1962 	}
1963 	if (vp)
1964 		vput(vp);
1965 	return (error);
1966 }
1967 
1968 /*
1969  * nfs remove service
1970  */
1971 int
1972 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1973 	     struct thread *td, struct mbuf **mrq)
1974 {
1975 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1976 	struct sockaddr *nam = nfsd->nd_nam;
1977 	caddr_t dpos = nfsd->nd_dpos;
1978 	struct ucred *cred = &nfsd->nd_cr;
1979 	struct nlookupdata nd;
1980 	u_int32_t *tl;
1981 	int32_t t1;
1982 	caddr_t bpos;
1983 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1984 	int v3 = (nfsd->nd_flag & ND_NFSV3);
1985 	char *cp2;
1986 	struct mbuf *mb, *mreq;
1987 	struct vnode *dirp;
1988 	struct vnode *dvp;
1989 	struct vnode *vp;
1990 	struct vattr dirfor, diraft;
1991 	nfsfh_t nfh;
1992 	fhandle_t *fhp;
1993 
1994 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1995 	nlookup_zero(&nd);
1996 	dirp = NULL;
1997 	dvp = NULL;
1998 	vp = NULL;
1999 
2000 	fhp = &nfh.fh_generic;
2001 	nfsm_srvmtofh(fhp);
2002 	nfsm_srvnamesiz(len);
2003 
2004 	error = nfs_namei(&nd, cred, NAMEI_DELETE, &dvp, &vp,
2005 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2006 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2007 	if (dirp) {
2008 		if (v3)
2009 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2010 	}
2011 	if (error == 0) {
2012 		if (vp->v_type == VDIR) {
2013 			error = EPERM;		/* POSIX */
2014 			goto out;
2015 		}
2016 		/*
2017 		 * The root of a mounted filesystem cannot be deleted.
2018 		 */
2019 		if (vp->v_flag & VROOT) {
2020 			error = EBUSY;
2021 			goto out;
2022 		}
2023 out:
2024 		if (!error) {
2025 			if (dvp) {
2026 				if (dvp == vp)
2027 					vrele(dvp);
2028 				else
2029 					vput(dvp);
2030 				dvp = NULL;
2031 			}
2032 			if (vp) {
2033 				vput(vp);
2034 				vp = NULL;
2035 			}
2036 			error = VOP_NREMOVE(&nd.nl_nch, nd.nl_cred);
2037 		}
2038 	}
2039 	if (dirp && v3)
2040 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2041 	nfsm_reply(NFSX_WCCDATA(v3));
2042 	if (v3) {
2043 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2044 		error = 0;
2045 	}
2046 nfsmout:
2047 	nlookup_done(&nd);
2048 	if (dirp)
2049 		vrele(dirp);
2050 	if (dvp) {
2051 		if (dvp == vp)
2052 			vrele(dvp);
2053 		else
2054 			vput(dvp);
2055 	}
2056 	if (vp)
2057 		vput(vp);
2058 	return(error);
2059 }
2060 
2061 /*
2062  * nfs rename service
2063  */
2064 int
2065 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2066 	     struct thread *td, struct mbuf **mrq)
2067 {
2068 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2069 	struct sockaddr *nam = nfsd->nd_nam;
2070 	caddr_t dpos = nfsd->nd_dpos;
2071 	struct ucred *cred = &nfsd->nd_cr;
2072 	u_int32_t *tl;
2073 	int32_t t1;
2074 	caddr_t bpos;
2075 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2076 	int tdirfor_ret = 1, tdiraft_ret = 1;
2077 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2078 	char *cp2;
2079 	struct mbuf *mb, *mreq;
2080 	struct nlookupdata fromnd, tond;
2081 	struct vnode *fvp, *fdirp;
2082 	struct vnode *tvp, *tdirp;
2083 	struct namecache *ncp;
2084 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2085 	nfsfh_t fnfh, tnfh;
2086 	fhandle_t *ffhp, *tfhp;
2087 	uid_t saved_uid;
2088 
2089 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2090 #ifndef nolint
2091 	fvp = (struct vnode *)0;
2092 #endif
2093 	ffhp = &fnfh.fh_generic;
2094 	tfhp = &tnfh.fh_generic;
2095 
2096 	/*
2097 	 * Clear fields incase goto nfsmout occurs from macro.
2098 	 */
2099 
2100 	nlookup_zero(&fromnd);
2101 	nlookup_zero(&tond);
2102 	fdirp = NULL;
2103 	tdirp = NULL;
2104 
2105 	nfsm_srvmtofh(ffhp);
2106 	nfsm_srvnamesiz(len);
2107 	/*
2108 	 * Remember our original uid so that we can reset cr_uid before
2109 	 * the second nfs_namei() call, in case it is remapped.
2110 	 */
2111 	saved_uid = cred->cr_uid;
2112 	error = nfs_namei(&fromnd, cred, NAMEI_DELETE, NULL, NULL,
2113 			  ffhp, len, slp, nam, &md, &dpos, &fdirp,
2114 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2115 	if (fdirp) {
2116 		if (v3)
2117 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2118 	}
2119 	if (error) {
2120 		nfsm_reply(2 * NFSX_WCCDATA(v3));
2121 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2122 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2123 		error = 0;
2124 		goto nfsmout;
2125 	}
2126 
2127 	/*
2128 	 * We have to unlock the from ncp before we can safely lookup
2129 	 * the target ncp.
2130 	 */
2131 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2132 	cache_unlock(&fromnd.nl_nch);
2133 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2134 	nfsm_srvmtofh(tfhp);
2135 	nfsm_strsiz(len2, NFS_MAXNAMLEN);
2136 	cred->cr_uid = saved_uid;
2137 
2138 	error = nfs_namei(&tond, cred, NAMEI_RENAME, NULL, NULL,
2139 			  tfhp, len2, slp, nam, &md, &dpos, &tdirp,
2140 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2141 	if (tdirp) {
2142 		if (v3)
2143 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2144 	}
2145 	if (error)
2146 		goto out1;
2147 
2148 	/*
2149 	 * relock the source
2150 	 */
2151 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2152 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2153 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2154 		cache_lock(&fromnd.nl_nch);
2155 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2156 	} else {
2157 		cache_unlock(&tond.nl_nch);
2158 		cache_lock(&fromnd.nl_nch);
2159 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2160 		cache_lock(&tond.nl_nch);
2161 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2162 	}
2163 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2164 
2165 	tvp = tond.nl_nch.ncp->nc_vp;
2166 	fvp = fromnd.nl_nch.ncp->nc_vp;
2167 
2168 	if (tvp != NULL) {
2169 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2170 			if (v3)
2171 				error = EEXIST;
2172 			else
2173 				error = EISDIR;
2174 			goto out;
2175 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2176 			if (v3)
2177 				error = EEXIST;
2178 			else
2179 				error = ENOTDIR;
2180 			goto out;
2181 		}
2182 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2183 			if (v3)
2184 				error = EXDEV;
2185 			else
2186 				error = ENOTEMPTY;
2187 			goto out;
2188 		}
2189 	}
2190 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2191 		if (v3)
2192 			error = EXDEV;
2193 		else
2194 			error = ENOTEMPTY;
2195 		goto out;
2196 	}
2197 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2198 		if (v3)
2199 			error = EXDEV;
2200 		else
2201 			error = ENOTEMPTY;
2202 		goto out;
2203 	}
2204 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2205 		if (v3)
2206 			error = EINVAL;
2207 		else
2208 			error = ENOTEMPTY;
2209 	}
2210 
2211 	/*
2212 	 * You cannot rename a source into itself or a subdirectory of itself.
2213 	 * We check this by travsering the target directory upwards looking
2214 	 * for a match against the source.
2215 	 */
2216 	if (error == 0) {
2217 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2218 			if (fromnd.nl_nch.ncp == ncp) {
2219 				error = EINVAL;
2220 				break;
2221 			}
2222 		}
2223 	}
2224 
2225 	/*
2226 	 * If source is the same as the destination (that is the
2227 	 * same vnode with the same name in the same directory),
2228 	 * then there is nothing to do.
2229 	 */
2230 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2231 		error = -1;
2232 out:
2233 	if (!error) {
2234 		/*
2235 		 * The VOP_NRENAME function releases all vnode references &
2236 		 * locks prior to returning so we need to clear the pointers
2237 		 * to bypass cleanup code later on.
2238 		 */
2239 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch, tond.nl_cred);
2240 	} else {
2241 		if (error == -1)
2242 			error = 0;
2243 	}
2244 	/* fall through */
2245 
2246 out1:
2247 	if (fdirp)
2248 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2249 	if (tdirp)
2250 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2251 	nfsm_reply(2 * NFSX_WCCDATA(v3));
2252 	if (v3) {
2253 		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2254 		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2255 	}
2256 	error = 0;
2257 	/* fall through */
2258 
2259 nfsmout:
2260 	if (tdirp)
2261 		vrele(tdirp);
2262 	nlookup_done(&tond);
2263 	if (fdirp)
2264 		vrele(fdirp);
2265 	nlookup_done(&fromnd);
2266 	return (error);
2267 }
2268 
2269 /*
2270  * nfs link service
2271  */
2272 int
2273 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2274 	   struct thread *td, struct mbuf **mrq)
2275 {
2276 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2277 	struct sockaddr *nam = nfsd->nd_nam;
2278 	caddr_t dpos = nfsd->nd_dpos;
2279 	struct ucred *cred = &nfsd->nd_cr;
2280 	struct nlookupdata nd;
2281 	u_int32_t *tl;
2282 	int32_t t1;
2283 	caddr_t bpos;
2284 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2285 	int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2286 	char *cp2;
2287 	struct mbuf *mb, *mreq;
2288 	struct vnode *dirp;
2289 	struct vnode *dvp;
2290 	struct vnode *vp;
2291 	struct vnode *xp;
2292 	struct vattr dirfor, diraft, at;
2293 	nfsfh_t nfh, dnfh;
2294 	fhandle_t *fhp, *dfhp;
2295 
2296 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2297 	nlookup_zero(&nd);
2298 	dirp = dvp = vp = xp = NULL;
2299 
2300 	fhp = &nfh.fh_generic;
2301 	dfhp = &dnfh.fh_generic;
2302 	nfsm_srvmtofh(fhp);
2303 	nfsm_srvmtofh(dfhp);
2304 	nfsm_srvnamesiz(len);
2305 
2306 	error = nfsrv_fhtovp(fhp, FALSE, &xp, cred, slp, nam,
2307 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2308 	if (error) {
2309 		nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2310 		nfsm_srvpostop_attr(getret, &at);
2311 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2312 		xp = NULL;
2313 		error = 0;
2314 		goto nfsmout;
2315 	}
2316 	if (xp->v_type == VDIR) {
2317 		error = EPERM;		/* POSIX */
2318 		goto out1;
2319 	}
2320 
2321 	error = nfs_namei(&nd, cred, NAMEI_CREATE, &dvp, &vp,
2322 			  dfhp, len, slp, nam, &md, &dpos, &dirp,
2323 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2324 	if (dirp) {
2325 		if (v3)
2326 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2327 	}
2328 	if (error)
2329 		goto out1;
2330 
2331 	if (vp != NULL) {
2332 		error = EEXIST;
2333 		goto out;
2334 	}
2335 	if (xp->v_mount != dvp->v_mount)
2336 		error = EXDEV;
2337 out:
2338 	if (!error) {
2339 		error = VOP_NLINK(&nd.nl_nch, xp, nd.nl_cred);
2340 	}
2341 	/* fall through */
2342 
2343 out1:
2344 	if (v3)
2345 		getret = VOP_GETATTR(xp, &at);
2346 	if (dirp)
2347 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2348 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2349 	if (v3) {
2350 		nfsm_srvpostop_attr(getret, &at);
2351 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2352 		error = 0;
2353 	}
2354 	/* fall through */
2355 
2356 nfsmout:
2357 	nlookup_done(&nd);
2358 	if (dirp)
2359 		vrele(dirp);
2360 	if (xp)
2361 		vrele(xp);
2362 	if (dvp) {
2363 		if (dvp == vp)
2364 			vrele(dvp);
2365 		else
2366 			vput(dvp);
2367 	}
2368 	if (vp)
2369 		vput(vp);
2370 	return(error);
2371 }
2372 
2373 /*
2374  * nfs symbolic link service
2375  */
2376 int
2377 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2378 	      struct thread *td, struct mbuf **mrq)
2379 {
2380 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2381 	struct sockaddr *nam = nfsd->nd_nam;
2382 	caddr_t dpos = nfsd->nd_dpos;
2383 	struct ucred *cred = &nfsd->nd_cr;
2384 	struct vattr va, dirfor, diraft;
2385 	struct nlookupdata nd;
2386 	struct vattr *vap = &va;
2387 	u_int32_t *tl;
2388 	int32_t t1;
2389 	struct nfsv2_sattr *sp;
2390 	char *bpos, *pathcp = (char *)0, *cp2;
2391 	struct uio io;
2392 	struct iovec iv;
2393 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2394 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2395 	struct mbuf *mb, *mreq, *mb2;
2396 	struct vnode *dirp;
2397 	struct vnode *vp;
2398 	nfsfh_t nfh;
2399 	fhandle_t *fhp;
2400 
2401 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2402 	nlookup_zero(&nd);
2403 	dirp = vp = NULL;
2404 
2405 	fhp = &nfh.fh_generic;
2406 	nfsm_srvmtofh(fhp);
2407 	nfsm_srvnamesiz(len);
2408 
2409 	error = nfs_namei(&nd, cred, NAMEI_CREATE, NULL, &vp,
2410 			fhp, len, slp, nam, &md, &dpos, &dirp,
2411 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2412 	if (dirp) {
2413 		if (v3)
2414 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2415 	}
2416 	if (error)
2417 		goto out;
2418 
2419 	VATTR_NULL(vap);
2420 	if (v3)
2421 		nfsm_srvsattr(vap);
2422 	nfsm_strsiz(len2, NFS_MAXPATHLEN);
2423 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2424 	iv.iov_base = pathcp;
2425 	iv.iov_len = len2;
2426 	io.uio_resid = len2;
2427 	io.uio_offset = 0;
2428 	io.uio_iov = &iv;
2429 	io.uio_iovcnt = 1;
2430 	io.uio_segflg = UIO_SYSSPACE;
2431 	io.uio_rw = UIO_READ;
2432 	io.uio_td = NULL;
2433 	nfsm_mtouio(&io, len2);
2434 	if (!v3) {
2435 		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2436 		vap->va_mode = nfstov_mode(sp->sa_mode);
2437 	}
2438 	*(pathcp + len2) = '\0';
2439 	if (vp) {
2440 		error = EEXIST;
2441 		goto out;
2442 	}
2443 
2444 	if (vap->va_mode == (mode_t)VNOVAL)
2445 		vap->va_mode = 0;
2446 	error = VOP_NSYMLINK(&nd.nl_nch, &vp, nd.nl_cred, vap, pathcp);
2447 	if (error == 0) {
2448 		bzero((caddr_t)fhp, sizeof(nfh));
2449 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2450 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2451 		if (!error)
2452 			error = VOP_GETATTR(vp, vap);
2453 	}
2454 
2455 #if 0
2456 	/*
2457 	 * We have a vp in hand from the new API call, we do not have to
2458 	 * look it up again.
2459 	 */
2460 	if (error == 0) {
2461 	    if (v3) {
2462 		/*
2463 		 * Issue lookup.  Leave SAVESTART set so we can easily free
2464 		 * the name buffer later on.
2465 		 *
2466 		 * since LOCKPARENT is not set, ni_dvp will be garbage on
2467 		 * return whether an error occurs or not.
2468 		 */
2469 		nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
2470 		nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT | CNP_FOLLOW);
2471 		nd.ni_cnd.cn_td = td;
2472 		nd.ni_cnd.cn_cred = cred;
2473 
2474 		error = lookup(&nd);
2475 		nd.ni_dvp = NULL;
2476 
2477 		if (error == 0) {
2478 			bzero((caddr_t)fhp, sizeof(nfh));
2479 			fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2480 			error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2481 			if (!error)
2482 				error = VOP_GETATTR(nd.ni_vp, vap);
2483 			vput(nd.ni_vp);
2484 			nd.ni_vp = NULL;
2485 		}
2486 	    }
2487 	}
2488 #endif
2489 out:
2490 	if (vp) {
2491 		vput(vp);
2492 		vp = NULL;
2493 	}
2494 	if (pathcp) {
2495 		FREE(pathcp, M_TEMP);
2496 		pathcp = NULL;
2497 	}
2498 	if (dirp) {
2499 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2500 		vrele(dirp);
2501 		dirp = NULL;
2502 	}
2503 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2504 	if (v3) {
2505 		if (!error) {
2506 			nfsm_srvpostop_fh(fhp);
2507 			nfsm_srvpostop_attr(0, vap);
2508 		}
2509 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2510 	}
2511 	error = 0;
2512 	/* fall through */
2513 
2514 nfsmout:
2515 	nlookup_done(&nd);
2516 	if (vp)
2517 		vput(vp);
2518 	if (dirp)
2519 		vrele(dirp);
2520 	if (pathcp)
2521 		FREE(pathcp, M_TEMP);
2522 	return (error);
2523 }
2524 
2525 /*
2526  * nfs mkdir service
2527  */
2528 int
2529 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2530 	    struct thread *td, struct mbuf **mrq)
2531 {
2532 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2533 	struct sockaddr *nam = nfsd->nd_nam;
2534 	caddr_t dpos = nfsd->nd_dpos;
2535 	struct ucred *cred = &nfsd->nd_cr;
2536 	struct vattr va, dirfor, diraft;
2537 	struct vattr *vap = &va;
2538 	struct nfs_fattr *fp;
2539 	struct nlookupdata nd;
2540 	caddr_t cp;
2541 	u_int32_t *tl;
2542 	int32_t t1;
2543 	caddr_t bpos;
2544 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2545 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2546 	char *cp2;
2547 	struct mbuf *mb, *mb2, *mreq;
2548 	struct vnode *dirp;
2549 	struct vnode *vp;
2550 	nfsfh_t nfh;
2551 	fhandle_t *fhp;
2552 
2553 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2554 	nlookup_zero(&nd);
2555 	dirp = NULL;
2556 	vp = NULL;
2557 
2558 	fhp = &nfh.fh_generic;
2559 	nfsm_srvmtofh(fhp);
2560 	nfsm_srvnamesiz(len);
2561 
2562 	error = nfs_namei(&nd, cred, NAMEI_CREATE, NULL, &vp,
2563 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2564 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2565 	if (dirp) {
2566 		if (v3)
2567 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2568 	}
2569 	if (error) {
2570 		nfsm_reply(NFSX_WCCDATA(v3));
2571 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2572 		error = 0;
2573 		goto nfsmout;
2574 	}
2575 	VATTR_NULL(vap);
2576 	if (v3) {
2577 		nfsm_srvsattr(vap);
2578 	} else {
2579 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2580 		vap->va_mode = nfstov_mode(*tl++);
2581 	}
2582 
2583 	/*
2584 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2585 	 * nd.ni_vp, if it exists, is referenced but not locked.
2586 	 */
2587 
2588 	vap->va_type = VDIR;
2589 	if (vp != NULL) {
2590 		error = EEXIST;
2591 		goto out;
2592 	}
2593 
2594 	/*
2595 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2596 	 * component is freed by the VOP call.  This will fill-in
2597 	 * nd.ni_vp, reference, and exclusively lock it.
2598 	 */
2599 	if (vap->va_mode == (mode_t)VNOVAL)
2600 		vap->va_mode = 0;
2601 	error = VOP_NMKDIR(&nd.nl_nch, &vp, nd.nl_cred, vap);
2602 
2603 	if (error == 0) {
2604 		bzero((caddr_t)fhp, sizeof(nfh));
2605 		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2606 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2607 		if (error == 0)
2608 			error = VOP_GETATTR(vp, vap);
2609 	}
2610 out:
2611 	if (dirp)
2612 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2613 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2614 	if (v3) {
2615 		if (!error) {
2616 			nfsm_srvpostop_fh(fhp);
2617 			nfsm_srvpostop_attr(0, vap);
2618 		}
2619 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2620 	} else {
2621 		nfsm_srvfhtom(fhp, v3);
2622 		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
2623 		nfsm_srvfillattr(vap, fp);
2624 	}
2625 	error = 0;
2626 	/* fall through */
2627 
2628 nfsmout:
2629 	nlookup_done(&nd);
2630 	if (dirp)
2631 		vrele(dirp);
2632 	if (vp)
2633 		vput(vp);
2634 	return (error);
2635 }
2636 
2637 /*
2638  * nfs rmdir service
2639  */
2640 int
2641 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2642 	    struct thread *td, struct mbuf **mrq)
2643 {
2644 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2645 	struct sockaddr *nam = nfsd->nd_nam;
2646 	caddr_t dpos = nfsd->nd_dpos;
2647 	struct ucred *cred = &nfsd->nd_cr;
2648 	u_int32_t *tl;
2649 	int32_t t1;
2650 	caddr_t bpos;
2651 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2652 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2653 	char *cp2;
2654 	struct mbuf *mb, *mreq;
2655 	struct vnode *dirp;
2656 	struct vnode *vp;
2657 	struct vattr dirfor, diraft;
2658 	nfsfh_t nfh;
2659 	fhandle_t *fhp;
2660 	struct nlookupdata nd;
2661 
2662 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2663 	nlookup_zero(&nd);
2664 	dirp = NULL;
2665 	vp = NULL;
2666 
2667 	fhp = &nfh.fh_generic;
2668 	nfsm_srvmtofh(fhp);
2669 	nfsm_srvnamesiz(len);
2670 
2671 	error = nfs_namei(&nd, cred, NAMEI_DELETE, NULL, &vp,
2672 			  fhp, len, slp, nam, &md, &dpos, &dirp,
2673 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2674 	if (dirp) {
2675 		if (v3)
2676 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2677 	}
2678 	if (error) {
2679 		nfsm_reply(NFSX_WCCDATA(v3));
2680 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2681 		error = 0;
2682 		goto nfsmout;
2683 	}
2684 	if (vp->v_type != VDIR) {
2685 		error = ENOTDIR;
2686 		goto out;
2687 	}
2688 
2689 	/*
2690 	 * The root of a mounted filesystem cannot be deleted.
2691 	 */
2692 	if (vp->v_flag & VROOT)
2693 		error = EBUSY;
2694 out:
2695 	/*
2696 	 * Issue or abort op.  Since SAVESTART is not set, path name
2697 	 * component is freed by the VOP after either.
2698 	 */
2699 	if (!error) {
2700 		vput(vp);
2701 		vp = NULL;
2702 		error = VOP_NRMDIR(&nd.nl_nch, nd.nl_cred);
2703 	}
2704 	nlookup_done(&nd);
2705 
2706 	if (dirp)
2707 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2708 	nfsm_reply(NFSX_WCCDATA(v3));
2709 	if (v3) {
2710 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2711 		error = 0;
2712 	}
2713 	/* fall through */
2714 
2715 nfsmout:
2716 	nlookup_done(&nd);
2717 	if (dirp)
2718 		vrele(dirp);
2719 	if (vp)
2720 		vput(vp);
2721 	return(error);
2722 }
2723 
2724 /*
2725  * nfs readdir service
2726  * - mallocs what it thinks is enough to read
2727  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2728  * - calls VOP_READDIR()
2729  * - loops around building the reply
2730  *	if the output generated exceeds count break out of loop
2731  *	The nfsm_clget macro is used here so that the reply will be packed
2732  *	tightly in mbuf clusters.
2733  * - it only knows that it has encountered eof when the VOP_READDIR()
2734  *	reads nothing
2735  * - as such one readdir rpc will return eof false although you are there
2736  *	and then the next will return eof
2737  * - it trims out records with d_fileno == 0
2738  *	this doesn't matter for Unix clients, but they might confuse clients
2739  *	for other os'.
2740  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2741  *	than requested, but this may not apply to all filesystems. For
2742  *	example, client NFS does not { although it is never remote mounted
2743  *	anyhow }
2744  *     The alternate call nfsrv_readdirplus() does lookups as well.
2745  * PS: The NFS protocol spec. does not clarify what the "count" byte
2746  *	argument is a count of.. just name strings and file id's or the
2747  *	entire reply rpc or ...
2748  *	I tried just file name and id sizes and it confused the Sun client,
2749  *	so I am using the full rpc size now. The "paranoia.." comment refers
2750  *	to including the status longwords that are not a part of the dir.
2751  *	"entry" structures, but are in the rpc.
2752  */
2753 struct flrep {
2754 	nfsuint64	fl_off;
2755 	u_int32_t	fl_postopok;
2756 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2757 	u_int32_t	fl_fhok;
2758 	u_int32_t	fl_fhsize;
2759 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2760 };
2761 
2762 int
2763 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2764 	      struct thread *td, struct mbuf **mrq)
2765 {
2766 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2767 	struct sockaddr *nam = nfsd->nd_nam;
2768 	caddr_t dpos = nfsd->nd_dpos;
2769 	struct ucred *cred = &nfsd->nd_cr;
2770 	char *bp, *be;
2771 	struct mbuf *mp;
2772 	struct dirent *dp;
2773 	caddr_t cp;
2774 	u_int32_t *tl;
2775 	int32_t t1;
2776 	caddr_t bpos;
2777 	struct mbuf *mb, *mb2, *mreq, *mp2;
2778 	char *cpos, *cend, *cp2, *rbuf;
2779 	struct vnode *vp = NULL;
2780 	struct vattr at;
2781 	nfsfh_t nfh;
2782 	fhandle_t *fhp;
2783 	struct uio io;
2784 	struct iovec iv;
2785 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2786 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2787 	int v3 = (nfsd->nd_flag & ND_NFSV3);
2788 	u_quad_t off, toff, verf;
2789 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
2790 
2791 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2792 	fhp = &nfh.fh_generic;
2793 	nfsm_srvmtofh(fhp);
2794 	if (v3) {
2795 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2796 		toff = fxdr_hyper(tl);
2797 		tl += 2;
2798 		verf = fxdr_hyper(tl);
2799 		tl += 2;
2800 	} else {
2801 		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2802 		toff = fxdr_unsigned(u_quad_t, *tl++);
2803 		verf = 0;	/* shut up gcc */
2804 	}
2805 	off = toff;
2806 	cnt = fxdr_unsigned(int, *tl);
2807 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2808 	xfer = NFS_SRVMAXDATA(nfsd);
2809 	if (cnt > xfer)
2810 		cnt = xfer;
2811 	if (siz > xfer)
2812 		siz = xfer;
2813 	fullsiz = siz;
2814 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
2815 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2816 	if (!error && vp->v_type != VDIR) {
2817 		error = ENOTDIR;
2818 		vput(vp);
2819 		vp = NULL;
2820 	}
2821 	if (error) {
2822 		nfsm_reply(NFSX_UNSIGNED);
2823 		nfsm_srvpostop_attr(getret, &at);
2824 		error = 0;
2825 		goto nfsmout;
2826 	}
2827 
2828 	/*
2829 	 * Obtain lock on vnode for this section of the code
2830 	 */
2831 
2832 	if (v3) {
2833 		error = getret = VOP_GETATTR(vp, &at);
2834 #if 0
2835 		/*
2836 		 * XXX This check may be too strict for Solaris 2.5 clients.
2837 		 */
2838 		if (!error && toff && verf && verf != at.va_filerev)
2839 			error = NFSERR_BAD_COOKIE;
2840 #endif
2841 	}
2842 	if (!error)
2843 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
2844 	if (error) {
2845 		vput(vp);
2846 		vp = NULL;
2847 		nfsm_reply(NFSX_POSTOPATTR(v3));
2848 		nfsm_srvpostop_attr(getret, &at);
2849 		error = 0;
2850 		goto nfsmout;
2851 	}
2852 	vn_unlock(vp);
2853 
2854 	/*
2855 	 * end section.  Allocate rbuf and continue
2856 	 */
2857 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2858 again:
2859 	iv.iov_base = rbuf;
2860 	iv.iov_len = fullsiz;
2861 	io.uio_iov = &iv;
2862 	io.uio_iovcnt = 1;
2863 	io.uio_offset = (off_t)off;
2864 	io.uio_resid = fullsiz;
2865 	io.uio_segflg = UIO_SYSSPACE;
2866 	io.uio_rw = UIO_READ;
2867 	io.uio_td = NULL;
2868 	eofflag = 0;
2869 	if (cookies) {
2870 		kfree((caddr_t)cookies, M_TEMP);
2871 		cookies = NULL;
2872 	}
2873 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2874 	off = (off_t)io.uio_offset;
2875 	if (!cookies && !error)
2876 		error = NFSERR_PERM;
2877 	if (v3) {
2878 		getret = VOP_GETATTR(vp, &at);
2879 		if (!error)
2880 			error = getret;
2881 	}
2882 	if (error) {
2883 		vrele(vp);
2884 		vp = NULL;
2885 		kfree((caddr_t)rbuf, M_TEMP);
2886 		if (cookies)
2887 			kfree((caddr_t)cookies, M_TEMP);
2888 		nfsm_reply(NFSX_POSTOPATTR(v3));
2889 		nfsm_srvpostop_attr(getret, &at);
2890 		error = 0;
2891 		goto nfsmout;
2892 	}
2893 	if (io.uio_resid) {
2894 		siz -= io.uio_resid;
2895 
2896 		/*
2897 		 * If nothing read, return eof
2898 		 * rpc reply
2899 		 */
2900 		if (siz == 0) {
2901 			vrele(vp);
2902 			vp = NULL;
2903 			nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
2904 				2 * NFSX_UNSIGNED);
2905 			if (v3) {
2906 				nfsm_srvpostop_attr(getret, &at);
2907 				nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2908 				txdr_hyper(at.va_filerev, tl);
2909 				tl += 2;
2910 			} else
2911 				nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2912 			*tl++ = nfs_false;
2913 			*tl = nfs_true;
2914 			FREE((caddr_t)rbuf, M_TEMP);
2915 			FREE((caddr_t)cookies, M_TEMP);
2916 			error = 0;
2917 			goto nfsmout;
2918 		}
2919 	}
2920 
2921 	/*
2922 	 * Check for degenerate cases of nothing useful read.
2923 	 * If so go try again
2924 	 */
2925 	cpos = rbuf;
2926 	cend = rbuf + siz;
2927 	dp = (struct dirent *)cpos;
2928 	cookiep = cookies;
2929 	/*
2930 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
2931 	 * directory offset up to a block boundary, so it is necessary to
2932 	 * skip over the records that preceed the requested offset. This
2933 	 * requires the assumption that file offset cookies monotonically
2934 	 * increase.
2935 	 */
2936 	while (cpos < cend && ncookies > 0 &&
2937 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
2938 		 ((u_quad_t)(*cookiep)) <= toff)) {
2939 		dp = _DIRENT_NEXT(dp);
2940 		cpos = (char *)dp;
2941 		cookiep++;
2942 		ncookies--;
2943 	}
2944 	if (cpos >= cend || ncookies == 0) {
2945 		toff = off;
2946 		siz = fullsiz;
2947 		goto again;
2948 	}
2949 
2950 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
2951 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
2952 	if (v3) {
2953 		nfsm_srvpostop_attr(getret, &at);
2954 		nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2955 		txdr_hyper(at.va_filerev, tl);
2956 	}
2957 	mp = mp2 = mb;
2958 	bp = bpos;
2959 	be = bp + M_TRAILINGSPACE(mp);
2960 
2961 	/* Loop through the records and build reply */
2962 	while (cpos < cend && ncookies > 0) {
2963 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
2964 			nlen = dp->d_namlen;
2965 			rem = nfsm_rndup(nlen) - nlen;
2966 			len += (4 * NFSX_UNSIGNED + nlen + rem);
2967 			if (v3)
2968 				len += 2 * NFSX_UNSIGNED;
2969 			if (len > cnt) {
2970 				eofflag = 0;
2971 				break;
2972 			}
2973 			/*
2974 			 * Build the directory record xdr from
2975 			 * the dirent entry.
2976 			 */
2977 			nfsm_clget;
2978 			*tl = nfs_true;
2979 			bp += NFSX_UNSIGNED;
2980 			if (v3) {
2981 				nfsm_clget;
2982 				*tl = 0;
2983 				bp += NFSX_UNSIGNED;
2984 			}
2985 			nfsm_clget;
2986 			*tl = txdr_unsigned(dp->d_ino);
2987 			bp += NFSX_UNSIGNED;
2988 			nfsm_clget;
2989 			*tl = txdr_unsigned(nlen);
2990 			bp += NFSX_UNSIGNED;
2991 
2992 			/* And loop around copying the name */
2993 			xfer = nlen;
2994 			cp = dp->d_name;
2995 			while (xfer > 0) {
2996 				nfsm_clget;
2997 				if ((bp+xfer) > be)
2998 					tsiz = be-bp;
2999 				else
3000 					tsiz = xfer;
3001 				bcopy(cp, bp, tsiz);
3002 				bp += tsiz;
3003 				xfer -= tsiz;
3004 				if (xfer > 0)
3005 					cp += tsiz;
3006 			}
3007 			/* And null pad to a int32_t boundary */
3008 			for (i = 0; i < rem; i++)
3009 				*bp++ = '\0';
3010 			nfsm_clget;
3011 
3012 			/* Finish off the record */
3013 			if (v3) {
3014 				*tl = 0;
3015 				bp += NFSX_UNSIGNED;
3016 				nfsm_clget;
3017 			}
3018 			*tl = txdr_unsigned(*cookiep);
3019 			bp += NFSX_UNSIGNED;
3020 		}
3021 		dp = _DIRENT_NEXT(dp);
3022 		cpos = (char *)dp;
3023 		cookiep++;
3024 		ncookies--;
3025 	}
3026 	vrele(vp);
3027 	vp = NULL;
3028 	nfsm_clget;
3029 	*tl = nfs_false;
3030 	bp += NFSX_UNSIGNED;
3031 	nfsm_clget;
3032 	if (eofflag)
3033 		*tl = nfs_true;
3034 	else
3035 		*tl = nfs_false;
3036 	bp += NFSX_UNSIGNED;
3037 	if (mp != mb) {
3038 		if (bp < be)
3039 			mp->m_len = bp - mtod(mp, caddr_t);
3040 	} else
3041 		mp->m_len += bp - bpos;
3042 	FREE((caddr_t)rbuf, M_TEMP);
3043 	FREE((caddr_t)cookies, M_TEMP);
3044 
3045 nfsmout:
3046 	if (vp)
3047 		vrele(vp);
3048 	return(error);
3049 }
3050 
3051 int
3052 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3053 		  struct thread *td, struct mbuf **mrq)
3054 {
3055 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3056 	struct sockaddr *nam = nfsd->nd_nam;
3057 	caddr_t dpos = nfsd->nd_dpos;
3058 	struct ucred *cred = &nfsd->nd_cr;
3059 	char *bp, *be;
3060 	struct mbuf *mp;
3061 	struct dirent *dp;
3062 	caddr_t cp;
3063 	u_int32_t *tl;
3064 	int32_t t1;
3065 	caddr_t bpos;
3066 	struct mbuf *mb, *mb2, *mreq, *mp2;
3067 	char *cpos, *cend, *cp2, *rbuf;
3068 	struct vnode *vp = NULL, *nvp;
3069 	struct flrep fl;
3070 	nfsfh_t nfh;
3071 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3072 	struct uio io;
3073 	struct iovec iv;
3074 	struct vattr va, at, *vap = &va;
3075 	struct nfs_fattr *fp;
3076 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3077 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3078 	u_quad_t off, toff, verf;
3079 	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3080 
3081 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3082 	fhp = &nfh.fh_generic;
3083 	nfsm_srvmtofh(fhp);
3084 	nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3085 	toff = fxdr_hyper(tl);
3086 	tl += 2;
3087 	verf = fxdr_hyper(tl);
3088 	tl += 2;
3089 	siz = fxdr_unsigned(int, *tl++);
3090 	cnt = fxdr_unsigned(int, *tl);
3091 	off = toff;
3092 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3093 	xfer = NFS_SRVMAXDATA(nfsd);
3094 	if (cnt > xfer)
3095 		cnt = xfer;
3096 	if (siz > xfer)
3097 		siz = xfer;
3098 	fullsiz = siz;
3099 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3100 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3101 	if (!error && vp->v_type != VDIR) {
3102 		error = ENOTDIR;
3103 		vput(vp);
3104 		vp = NULL;
3105 	}
3106 	if (error) {
3107 		nfsm_reply(NFSX_UNSIGNED);
3108 		nfsm_srvpostop_attr(getret, &at);
3109 		error = 0;
3110 		goto nfsmout;
3111 	}
3112 	error = getret = VOP_GETATTR(vp, &at);
3113 #if 0
3114 	/*
3115 	 * XXX This check may be too strict for Solaris 2.5 clients.
3116 	 */
3117 	if (!error && toff && verf && verf != at.va_filerev)
3118 		error = NFSERR_BAD_COOKIE;
3119 #endif
3120 	if (!error) {
3121 		error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3122 	}
3123 	if (error) {
3124 		vput(vp);
3125 		vp = NULL;
3126 		nfsm_reply(NFSX_V3POSTOPATTR);
3127 		nfsm_srvpostop_attr(getret, &at);
3128 		error = 0;
3129 		goto nfsmout;
3130 	}
3131 	vn_unlock(vp);
3132 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3133 again:
3134 	iv.iov_base = rbuf;
3135 	iv.iov_len = fullsiz;
3136 	io.uio_iov = &iv;
3137 	io.uio_iovcnt = 1;
3138 	io.uio_offset = (off_t)off;
3139 	io.uio_resid = fullsiz;
3140 	io.uio_segflg = UIO_SYSSPACE;
3141 	io.uio_rw = UIO_READ;
3142 	io.uio_td = NULL;
3143 	eofflag = 0;
3144 	if (cookies) {
3145 		kfree((caddr_t)cookies, M_TEMP);
3146 		cookies = NULL;
3147 	}
3148 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3149 	off = (u_quad_t)io.uio_offset;
3150 	getret = VOP_GETATTR(vp, &at);
3151 	if (!cookies && !error)
3152 		error = NFSERR_PERM;
3153 	if (!error)
3154 		error = getret;
3155 	if (error) {
3156 		vrele(vp);
3157 		vp = NULL;
3158 		if (cookies)
3159 			kfree((caddr_t)cookies, M_TEMP);
3160 		kfree((caddr_t)rbuf, M_TEMP);
3161 		nfsm_reply(NFSX_V3POSTOPATTR);
3162 		nfsm_srvpostop_attr(getret, &at);
3163 		error = 0;
3164 		goto nfsmout;
3165 	}
3166 	if (io.uio_resid) {
3167 		siz -= io.uio_resid;
3168 
3169 		/*
3170 		 * If nothing read, return eof
3171 		 * rpc reply
3172 		 */
3173 		if (siz == 0) {
3174 			vrele(vp);
3175 			vp = NULL;
3176 			nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3177 				2 * NFSX_UNSIGNED);
3178 			nfsm_srvpostop_attr(getret, &at);
3179 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3180 			txdr_hyper(at.va_filerev, tl);
3181 			tl += 2;
3182 			*tl++ = nfs_false;
3183 			*tl = nfs_true;
3184 			FREE((caddr_t)cookies, M_TEMP);
3185 			FREE((caddr_t)rbuf, M_TEMP);
3186 			error = 0;
3187 			goto nfsmout;
3188 		}
3189 	}
3190 
3191 	/*
3192 	 * Check for degenerate cases of nothing useful read.
3193 	 * If so go try again
3194 	 */
3195 	cpos = rbuf;
3196 	cend = rbuf + siz;
3197 	dp = (struct dirent *)cpos;
3198 	cookiep = cookies;
3199 	/*
3200 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3201 	 * directory offset up to a block boundary, so it is necessary to
3202 	 * skip over the records that preceed the requested offset. This
3203 	 * requires the assumption that file offset cookies monotonically
3204 	 * increase.
3205 	 */
3206 	while (cpos < cend && ncookies > 0 &&
3207 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3208 		 ((u_quad_t)(*cookiep)) <= toff)) {
3209 		dp = _DIRENT_NEXT(dp);
3210 		cpos = (char *)dp;
3211 		cookiep++;
3212 		ncookies--;
3213 	}
3214 	if (cpos >= cend || ncookies == 0) {
3215 		toff = off;
3216 		siz = fullsiz;
3217 		goto again;
3218 	}
3219 
3220 	/*
3221 	 * Probe one of the directory entries to see if the filesystem
3222 	 * supports VGET.
3223 	 */
3224 	if (VFS_VGET(vp->v_mount, dp->d_ino, &nvp) == EOPNOTSUPP) {
3225 		error = NFSERR_NOTSUPP;
3226 		vrele(vp);
3227 		vp = NULL;
3228 		kfree((caddr_t)cookies, M_TEMP);
3229 		kfree((caddr_t)rbuf, M_TEMP);
3230 		nfsm_reply(NFSX_V3POSTOPATTR);
3231 		nfsm_srvpostop_attr(getret, &at);
3232 		error = 0;
3233 		goto nfsmout;
3234 	}
3235 	if (nvp) {
3236 		vput(nvp);
3237 		nvp = NULL;
3238 	}
3239 
3240 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
3241 	nfsm_reply(cnt);
3242 	nfsm_srvpostop_attr(getret, &at);
3243 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3244 	txdr_hyper(at.va_filerev, tl);
3245 	mp = mp2 = mb;
3246 	bp = bpos;
3247 	be = bp + M_TRAILINGSPACE(mp);
3248 
3249 	/* Loop through the records and build reply */
3250 	while (cpos < cend && ncookies > 0) {
3251 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3252 			nlen = dp->d_namlen;
3253 			rem = nfsm_rndup(nlen)-nlen;
3254 
3255 			/*
3256 			 * For readdir_and_lookup get the vnode using
3257 			 * the file number.
3258 			 */
3259 			if (VFS_VGET(vp->v_mount, dp->d_ino, &nvp))
3260 				goto invalid;
3261 			bzero((caddr_t)nfhp, NFSX_V3FH);
3262 			nfhp->fh_fsid =
3263 				nvp->v_mount->mnt_stat.f_fsid;
3264 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3265 				vput(nvp);
3266 				nvp = NULL;
3267 				goto invalid;
3268 			}
3269 			if (VOP_GETATTR(nvp, vap)) {
3270 				vput(nvp);
3271 				nvp = NULL;
3272 				goto invalid;
3273 			}
3274 			vput(nvp);
3275 			nvp = NULL;
3276 
3277 			/*
3278 			 * If either the dircount or maxcount will be
3279 			 * exceeded, get out now. Both of these lengths
3280 			 * are calculated conservatively, including all
3281 			 * XDR overheads.
3282 			 */
3283 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3284 				NFSX_V3POSTOPATTR);
3285 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3286 			if (len > cnt || dirlen > fullsiz) {
3287 				eofflag = 0;
3288 				break;
3289 			}
3290 
3291 			/*
3292 			 * Build the directory record xdr from
3293 			 * the dirent entry.
3294 			 */
3295 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3296 			nfsm_srvfillattr(vap, fp);
3297 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3298 			fl.fl_fhok = nfs_true;
3299 			fl.fl_postopok = nfs_true;
3300 			fl.fl_off.nfsuquad[0] = 0;
3301 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3302 
3303 			nfsm_clget;
3304 			*tl = nfs_true;
3305 			bp += NFSX_UNSIGNED;
3306 			nfsm_clget;
3307 			*tl = 0;
3308 			bp += NFSX_UNSIGNED;
3309 			nfsm_clget;
3310 			*tl = txdr_unsigned(dp->d_ino);
3311 			bp += NFSX_UNSIGNED;
3312 			nfsm_clget;
3313 			*tl = txdr_unsigned(nlen);
3314 			bp += NFSX_UNSIGNED;
3315 
3316 			/* And loop around copying the name */
3317 			xfer = nlen;
3318 			cp = dp->d_name;
3319 			while (xfer > 0) {
3320 				nfsm_clget;
3321 				if ((bp + xfer) > be)
3322 					tsiz = be - bp;
3323 				else
3324 					tsiz = xfer;
3325 				bcopy(cp, bp, tsiz);
3326 				bp += tsiz;
3327 				xfer -= tsiz;
3328 				if (xfer > 0)
3329 					cp += tsiz;
3330 			}
3331 			/* And null pad to a int32_t boundary */
3332 			for (i = 0; i < rem; i++)
3333 				*bp++ = '\0';
3334 
3335 			/*
3336 			 * Now copy the flrep structure out.
3337 			 */
3338 			xfer = sizeof (struct flrep);
3339 			cp = (caddr_t)&fl;
3340 			while (xfer > 0) {
3341 				nfsm_clget;
3342 				if ((bp + xfer) > be)
3343 					tsiz = be - bp;
3344 				else
3345 					tsiz = xfer;
3346 				bcopy(cp, bp, tsiz);
3347 				bp += tsiz;
3348 				xfer -= tsiz;
3349 				if (xfer > 0)
3350 					cp += tsiz;
3351 			}
3352 		}
3353 invalid:
3354 		dp = _DIRENT_NEXT(dp);
3355 		cpos = (char *)dp;
3356 		cookiep++;
3357 		ncookies--;
3358 	}
3359 	vrele(vp);
3360 	vp = NULL;
3361 	nfsm_clget;
3362 	*tl = nfs_false;
3363 	bp += NFSX_UNSIGNED;
3364 	nfsm_clget;
3365 	if (eofflag)
3366 		*tl = nfs_true;
3367 	else
3368 		*tl = nfs_false;
3369 	bp += NFSX_UNSIGNED;
3370 	if (mp != mb) {
3371 		if (bp < be)
3372 			mp->m_len = bp - mtod(mp, caddr_t);
3373 	} else
3374 		mp->m_len += bp - bpos;
3375 	FREE((caddr_t)cookies, M_TEMP);
3376 	FREE((caddr_t)rbuf, M_TEMP);
3377 nfsmout:
3378 	if (vp)
3379 		vrele(vp);
3380 	return(error);
3381 }
3382 
3383 /*
3384  * nfs commit service
3385  */
3386 int
3387 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3388 	     struct thread *td, struct mbuf **mrq)
3389 {
3390 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3391 	struct sockaddr *nam = nfsd->nd_nam;
3392 	caddr_t dpos = nfsd->nd_dpos;
3393 	struct ucred *cred = &nfsd->nd_cr;
3394 	struct vattr bfor, aft;
3395 	struct vnode *vp = NULL;
3396 	nfsfh_t nfh;
3397 	fhandle_t *fhp;
3398 	u_int32_t *tl;
3399 	int32_t t1;
3400 	caddr_t bpos;
3401 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3402 	char *cp2;
3403 	struct mbuf *mb, *mb2, *mreq;
3404 	u_quad_t off;
3405 
3406 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3407 	fhp = &nfh.fh_generic;
3408 	nfsm_srvmtofh(fhp);
3409 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3410 
3411 	/*
3412 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3413 	 * count parameters, so these arguments are useless (someday maybe).
3414 	 */
3415 	off = fxdr_hyper(tl);
3416 	tl += 2;
3417 	cnt = fxdr_unsigned(int, *tl);
3418 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3419 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3420 	if (error) {
3421 		nfsm_reply(2 * NFSX_UNSIGNED);
3422 		nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3423 		error = 0;
3424 		goto nfsmout;
3425 	}
3426 	for_ret = VOP_GETATTR(vp, &bfor);
3427 
3428 	if (cnt > MAX_COMMIT_COUNT) {
3429 		/*
3430 		 * Give up and do the whole thing
3431 		 */
3432 		if (vp->v_object &&
3433 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3434 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3435 		}
3436 		error = VOP_FSYNC(vp, MNT_WAIT);
3437 	} else {
3438 		/*
3439 		 * Locate and synchronously write any buffers that fall
3440 		 * into the requested range.  Note:  we are assuming that
3441 		 * f_iosize is a power of 2.
3442 		 */
3443 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3444 		int iomask = iosize - 1;
3445 		off_t loffset;
3446 
3447 		/*
3448 		 * Align to iosize boundry, super-align to page boundry.
3449 		 */
3450 		if (off & iomask) {
3451 			cnt += off & iomask;
3452 			off &= ~(u_quad_t)iomask;
3453 		}
3454 		if (off & PAGE_MASK) {
3455 			cnt += off & PAGE_MASK;
3456 			off &= ~(u_quad_t)PAGE_MASK;
3457 		}
3458 		loffset = off;
3459 
3460 		if (vp->v_object &&
3461 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3462 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3463 		}
3464 
3465 		crit_enter();
3466 		while (cnt > 0) {
3467 			struct buf *bp;
3468 
3469 			/*
3470 			 * If we have a buffer and it is marked B_DELWRI we
3471 			 * have to lock and write it.  Otherwise the prior
3472 			 * write is assumed to have already been committed.
3473 			 */
3474 			if ((bp = findblk(vp, loffset)) != NULL && (bp->b_flags & B_DELWRI)) {
3475 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
3476 					if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) == 0)
3477 						BUF_UNLOCK(bp);
3478 					continue; /* retry */
3479 				}
3480 				bremfree(bp);
3481 				bp->b_flags &= ~B_ASYNC;
3482 				bwrite(bp);
3483 				++nfs_commit_miss;
3484 			}
3485 			++nfs_commit_blks;
3486 			if (cnt < iosize)
3487 				break;
3488 			cnt -= iosize;
3489 			loffset += iosize;
3490 		}
3491 		crit_exit();
3492 	}
3493 
3494 	aft_ret = VOP_GETATTR(vp, &aft);
3495 	vput(vp);
3496 	vp = NULL;
3497 	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3498 	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3499 	if (!error) {
3500 		nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF);
3501 		if (nfsver.tv_sec == 0)
3502 			nfsver = boottime;
3503 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3504 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3505 	} else {
3506 		error = 0;
3507 	}
3508 nfsmout:
3509 	if (vp)
3510 		vput(vp);
3511 	return(error);
3512 }
3513 
3514 /*
3515  * nfs statfs service
3516  */
3517 int
3518 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3519 	     struct thread *td, struct mbuf **mrq)
3520 {
3521 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3522 	struct sockaddr *nam = nfsd->nd_nam;
3523 	caddr_t dpos = nfsd->nd_dpos;
3524 	struct ucred *cred = &nfsd->nd_cr;
3525 	struct statfs *sf;
3526 	struct nfs_statfs *sfp;
3527 	u_int32_t *tl;
3528 	int32_t t1;
3529 	caddr_t bpos;
3530 	int error = 0, rdonly, getret = 1;
3531 	int v3 = (nfsd->nd_flag & ND_NFSV3);
3532 	char *cp2;
3533 	struct mbuf *mb, *mb2, *mreq;
3534 	struct vnode *vp = NULL;
3535 	struct vattr at;
3536 	nfsfh_t nfh;
3537 	fhandle_t *fhp;
3538 	struct statfs statfs;
3539 	u_quad_t tval;
3540 
3541 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3542 	fhp = &nfh.fh_generic;
3543 	nfsm_srvmtofh(fhp);
3544 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3545 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3546 	if (error) {
3547 		nfsm_reply(NFSX_UNSIGNED);
3548 		nfsm_srvpostop_attr(getret, &at);
3549 		error = 0;
3550 		goto nfsmout;
3551 	}
3552 	sf = &statfs;
3553 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3554 	getret = VOP_GETATTR(vp, &at);
3555 	vput(vp);
3556 	vp = NULL;
3557 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3558 	if (v3)
3559 		nfsm_srvpostop_attr(getret, &at);
3560 	if (error) {
3561 		error = 0;
3562 		goto nfsmout;
3563 	}
3564 	nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
3565 	if (v3) {
3566 		tval = (u_quad_t)sf->f_blocks;
3567 		tval *= (u_quad_t)sf->f_bsize;
3568 		txdr_hyper(tval, &sfp->sf_tbytes);
3569 		tval = (u_quad_t)sf->f_bfree;
3570 		tval *= (u_quad_t)sf->f_bsize;
3571 		txdr_hyper(tval, &sfp->sf_fbytes);
3572 		tval = (u_quad_t)sf->f_bavail;
3573 		tval *= (u_quad_t)sf->f_bsize;
3574 		txdr_hyper(tval, &sfp->sf_abytes);
3575 		sfp->sf_tfiles.nfsuquad[0] = 0;
3576 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3577 		sfp->sf_ffiles.nfsuquad[0] = 0;
3578 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3579 		sfp->sf_afiles.nfsuquad[0] = 0;
3580 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3581 		sfp->sf_invarsec = 0;
3582 	} else {
3583 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3584 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3585 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3586 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3587 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3588 	}
3589 nfsmout:
3590 	if (vp)
3591 		vput(vp);
3592 	return(error);
3593 }
3594 
3595 /*
3596  * nfs fsinfo service
3597  */
3598 int
3599 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3600 	     struct thread *td, struct mbuf **mrq)
3601 {
3602 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3603 	struct sockaddr *nam = nfsd->nd_nam;
3604 	caddr_t dpos = nfsd->nd_dpos;
3605 	struct ucred *cred = &nfsd->nd_cr;
3606 	u_int32_t *tl;
3607 	struct nfsv3_fsinfo *sip;
3608 	int32_t t1;
3609 	caddr_t bpos;
3610 	int error = 0, rdonly, getret = 1, pref;
3611 	char *cp2;
3612 	struct mbuf *mb, *mb2, *mreq;
3613 	struct vnode *vp = NULL;
3614 	struct vattr at;
3615 	nfsfh_t nfh;
3616 	fhandle_t *fhp;
3617 	u_quad_t maxfsize;
3618 	struct statfs sb;
3619 
3620 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3621 	fhp = &nfh.fh_generic;
3622 	nfsm_srvmtofh(fhp);
3623 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3624 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3625 	if (error) {
3626 		nfsm_reply(NFSX_UNSIGNED);
3627 		nfsm_srvpostop_attr(getret, &at);
3628 		error = 0;
3629 		goto nfsmout;
3630 	}
3631 
3632 	/* XXX Try to make a guess on the max file size. */
3633 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3634 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3635 
3636 	getret = VOP_GETATTR(vp, &at);
3637 	vput(vp);
3638 	vp = NULL;
3639 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3640 	nfsm_srvpostop_attr(getret, &at);
3641 	nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3642 
3643 	/*
3644 	 * XXX
3645 	 * There should be file system VFS OP(s) to get this information.
3646 	 * For now, assume ufs.
3647 	 */
3648 	if (slp->ns_so->so_type == SOCK_DGRAM)
3649 		pref = NFS_MAXDGRAMDATA;
3650 	else
3651 		pref = NFS_MAXDATA;
3652 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3653 	sip->fs_rtpref = txdr_unsigned(pref);
3654 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3655 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3656 	sip->fs_wtpref = txdr_unsigned(pref);
3657 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3658 	sip->fs_dtpref = txdr_unsigned(pref);
3659 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3660 	sip->fs_timedelta.nfsv3_sec = 0;
3661 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3662 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3663 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3664 		NFSV3FSINFO_CANSETTIME);
3665 nfsmout:
3666 	if (vp)
3667 		vput(vp);
3668 	return(error);
3669 }
3670 
3671 /*
3672  * nfs pathconf service
3673  */
3674 int
3675 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3676 	       struct thread *td, struct mbuf **mrq)
3677 {
3678 	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3679 	struct sockaddr *nam = nfsd->nd_nam;
3680 	caddr_t dpos = nfsd->nd_dpos;
3681 	struct ucred *cred = &nfsd->nd_cr;
3682 	u_int32_t *tl;
3683 	struct nfsv3_pathconf *pc;
3684 	int32_t t1;
3685 	caddr_t bpos;
3686 	int error = 0, rdonly, getret = 1;
3687 	register_t linkmax, namemax, chownres, notrunc;
3688 	char *cp2;
3689 	struct mbuf *mb, *mb2, *mreq;
3690 	struct vnode *vp = NULL;
3691 	struct vattr at;
3692 	nfsfh_t nfh;
3693 	fhandle_t *fhp;
3694 
3695 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3696 	fhp = &nfh.fh_generic;
3697 	nfsm_srvmtofh(fhp);
3698 	error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam,
3699 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3700 	if (error) {
3701 		nfsm_reply(NFSX_UNSIGNED);
3702 		nfsm_srvpostop_attr(getret, &at);
3703 		error = 0;
3704 		goto nfsmout;
3705 	}
3706 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3707 	if (!error)
3708 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3709 	if (!error)
3710 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3711 	if (!error)
3712 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3713 	getret = VOP_GETATTR(vp, &at);
3714 	vput(vp);
3715 	vp = NULL;
3716 	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
3717 	nfsm_srvpostop_attr(getret, &at);
3718 	if (error) {
3719 		error = 0;
3720 		goto nfsmout;
3721 	}
3722 	nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
3723 
3724 	pc->pc_linkmax = txdr_unsigned(linkmax);
3725 	pc->pc_namemax = txdr_unsigned(namemax);
3726 	pc->pc_notrunc = txdr_unsigned(notrunc);
3727 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3728 
3729 	/*
3730 	 * These should probably be supported by VOP_PATHCONF(), but
3731 	 * until msdosfs is exportable (why would you want to?), the
3732 	 * Unix defaults should be ok.
3733 	 */
3734 	pc->pc_caseinsensitive = nfs_false;
3735 	pc->pc_casepreserving = nfs_true;
3736 nfsmout:
3737 	if (vp)
3738 		vput(vp);
3739 	return(error);
3740 }
3741 
3742 /*
3743  * Null operation, used by clients to ping server
3744  */
3745 /* ARGSUSED */
3746 int
3747 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3748 	   struct thread *td, struct mbuf **mrq)
3749 {
3750 	struct mbuf *mrep = nfsd->nd_mrep;
3751 	caddr_t bpos;
3752 	int error = NFSERR_RETVOID;
3753 	struct mbuf *mb, *mreq;
3754 
3755 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3756 	nfsm_reply(0);
3757 	nfsm_srvdone;
3758 }
3759 
3760 /*
3761  * No operation, used for obsolete procedures
3762  */
3763 /* ARGSUSED */
3764 int
3765 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3766 	   struct thread *td, struct mbuf **mrq)
3767 {
3768 	struct mbuf *mrep = nfsd->nd_mrep;
3769 	caddr_t bpos;
3770 	int error;
3771 	struct mbuf *mb, *mreq;
3772 
3773 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3774 	if (nfsd->nd_repstat)
3775 		error = nfsd->nd_repstat;
3776 	else
3777 		error = EPROCUNAVAIL;
3778 	nfsm_reply(0);
3779 	error = 0;
3780 	nfsm_srvdone;
3781 }
3782 
3783 /*
3784  * Perform access checking for vnodes obtained from file handles that would
3785  * refer to files already opened by a Unix client. You cannot just use
3786  * vn_writechk() and VOP_ACCESS() for two reasons.
3787  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3788  * 2 - The owner is to be given access irrespective of mode bits for some
3789  *     operations, so that processes that chmod after opening a file don't
3790  *     break. I don't like this because it opens a security hole, but since
3791  *     the nfs server opens a security hole the size of a barn door anyhow,
3792  *     what the heck.
3793  *
3794  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3795  * will return EPERM instead of EACCESS. EPERM is always an error.
3796  */
3797 static int
3798 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
3799 	     int rdonly, struct thread *td, int override)
3800 {
3801 	struct vattr vattr;
3802 	int error;
3803 
3804 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3805 	if (flags & VWRITE) {
3806 		/* Just vn_writechk() changed to check rdonly */
3807 		/*
3808 		 * Disallow write attempts on read-only file systems;
3809 		 * unless the file is a socket or a block or character
3810 		 * device resident on the file system.
3811 		 */
3812 		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3813 			switch (vp->v_type) {
3814 			case VREG:
3815 			case VDIR:
3816 			case VLNK:
3817 				return (EROFS);
3818 			default:
3819 				break;
3820 			}
3821 		}
3822 		/*
3823 		 * If there's shared text associated with
3824 		 * the inode, we can't allow writing.
3825 		 */
3826 		if (vp->v_flag & VTEXT)
3827 			return (ETXTBSY);
3828 	}
3829 	error = VOP_GETATTR(vp, &vattr);
3830 	if (error)
3831 		return (error);
3832 	error = VOP_ACCESS(vp, flags, cred);
3833 	/*
3834 	 * Allow certain operations for the owner (reads and writes
3835 	 * on files that are already open).
3836 	 */
3837 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3838 		error = 0;
3839 	return error;
3840 }
3841 #endif /* NFS_NOSERVER */
3842 
3843