xref: /dragonfly/sys/vfs/nfs/nfs_serv.c (revision cfd1aba3)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
33  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
34  */
35 
36 /*
37  * nfs version 2 and 3 server calls to vnode ops
38  * - these routines generally have 3 phases
39  *   1 - break down and validate rpc request in mbuf list
40  *   2 - do the vnode ops for the request
41  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
42  *   3 - build the rpc reply in an mbuf list
43  *   nb:
44  *	- do not mix the phases, since the nfsm_?? macros can return failures
45  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
46  *
47  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
48  *	error number iff error != 0 whereas
49  *	returning an error from the server function implies a fatal error
50  *	such as a badly constructed rpc request that should be dropped without
51  *	a reply.
52  *	For Version 3, nfsm_reply() does not return for the error case, since
53  *	most version 3 rpcs return more than the status for error cases.
54  *
55  * Other notes:
56  *	Warning: always pay careful attention to resource cleanup on return
57  *	and note that nfsm_*() macros can terminate a procedure on certain
58  *	errors.
59  */
60 
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/proc.h>
64 #include <sys/priv.h>
65 #include <sys/nlookup.h>
66 #include <sys/namei.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/mount.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/dirent.h>
75 #include <sys/stat.h>
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <sys/buf.h>
79 
80 #include <vm/vm.h>
81 #include <vm/vm_extern.h>
82 #include <vm/vm_object.h>
83 
84 #include <sys/buf2.h>
85 
86 #include <sys/thread2.h>
87 
88 #include "nfsproto.h"
89 #include "rpcv2.h"
90 #include "nfs.h"
91 #include "xdr_subs.h"
92 #include "nfsm_subs.h"
93 
94 #ifdef NFSRV_DEBUG
95 #define nfsdbprintf(info)	kprintf info
96 #else
97 #define nfsdbprintf(info)
98 #endif
99 
100 #define MAX_REORDERED_RPC	(16)
101 #define MAX_COMMIT_COUNT	(1024 * 1024)
102 
103 #define NUM_HEURISTIC		1031
104 #define NHUSE_INIT		64
105 #define NHUSE_INC		16
106 #define NHUSE_MAX		2048
107 
108 static struct nfsheur {
109     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
110     off_t nh_nextoff;		/* next offset for sequential detection */
111     int nh_use;			/* use count for selection */
112     int nh_seqcount;		/* heuristic */
113 } nfsheur[NUM_HEURISTIC];
114 
115 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
116 		      NFFIFO, NFNON };
117 #ifndef NFS_NOSERVER
118 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
119 		      NFCHR, NFNON };
120 
121 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
122 int nfsrvw_procrastinate_v3 = 0;
123 
124 static struct timespec	nfsver;
125 
126 SYSCTL_DECL(_vfs_nfs);
127 
128 int nfs_async;
129 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
130     "Enable unstable and fast writes");
131 static int nfs_commit_blks;
132 static int nfs_commit_miss;
133 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
134     "Number of committed blocks");
135 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
136     "Number of nfs blocks committed from dirty buffers");
137 
138 static int nfsrv_access (struct mount *, struct vnode *, int,
139 			struct ucred *, int, struct thread *, int);
140 static void nfsrvw_coalesce (struct nfsrv_descript *,
141 		struct nfsrv_descript *);
142 
143 /*
144  * Heuristic to detect sequential operation.
145  */
146 static struct nfsheur *
147 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp, int writeop)
148 {
149 	struct nfsheur *nh;
150 	int hi, try;
151 
152 	/* Locate best candidate */
153 	try = 32;
154 	hi = ((int)(vm_offset_t) vp / sizeof(struct vnode)) % NUM_HEURISTIC;
155 	nh = &nfsheur[hi];
156 
157 	while (try--) {
158 		if (nfsheur[hi].nh_vp == vp) {
159 			nh = &nfsheur[hi];
160 			break;
161 		}
162 		if (nfsheur[hi].nh_use > 0)
163 			--nfsheur[hi].nh_use;
164 		hi = (hi + 1) % NUM_HEURISTIC;
165 		if (nfsheur[hi].nh_use < nh->nh_use)
166 			nh = &nfsheur[hi];
167 	}
168 
169 	/* Initialize hint if this is a new file */
170 	if (nh->nh_vp != vp) {
171 		nh->nh_vp = vp;
172 		nh->nh_nextoff = uio->uio_offset;
173 		nh->nh_use = NHUSE_INIT;
174 		if (uio->uio_offset == 0)
175 			nh->nh_seqcount = 4;
176 		else
177 			nh->nh_seqcount = 1;
178 	}
179 
180 	/*
181 	 * Calculate heuristic
182 	 *
183 	 * See vfs_vnops.c:sequential_heuristic().
184 	 */
185 	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
186 	    uio->uio_offset == nh->nh_nextoff) {
187 		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
188 		if (nh->nh_seqcount > IO_SEQMAX)
189 			nh->nh_seqcount = IO_SEQMAX;
190 	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
191 		imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
192 		    /* Probably a reordered RPC, leave seqcount alone. */
193 	} else if (nh->nh_seqcount > 1) {
194 		nh->nh_seqcount /= 2;
195 	} else {
196 		nh->nh_seqcount = 0;
197 	}
198 	nh->nh_use += NHUSE_INC;
199 	if (nh->nh_use > NHUSE_MAX)
200 		nh->nh_use = NHUSE_MAX;
201 	return (nh);
202 }
203 
204 /*
205  * nfs v3 access service
206  */
207 int
208 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
209 	      struct thread *td, struct mbuf **mrq)
210 {
211 	struct sockaddr *nam = nfsd->nd_nam;
212 	struct ucred *cred = &nfsd->nd_cr;
213 	struct vnode *vp = NULL;
214 	struct mount *mp = NULL;
215 	nfsfh_t nfh;
216 	fhandle_t *fhp;
217 	int error = 0, rdonly, getret;
218 	struct vattr vattr, *vap = &vattr;
219 	u_long testmode, nfsmode;
220 	struct nfsm_info info;
221 	u_int32_t *tl;
222 
223 	info.dpos = nfsd->nd_dpos;
224 	info.md = nfsd->nd_md;
225 	info.mrep = nfsd->nd_mrep;
226 	info.mreq = NULL;
227 
228 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
229 	fhp = &nfh.fh_generic;
230 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
231 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
232 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
233 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
234 	if (error) {
235 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
236 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
237 		error = 0;
238 		goto nfsmout;
239 	}
240 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
241 	if ((nfsmode & NFSV3ACCESS_READ) &&
242 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
243 		nfsmode &= ~NFSV3ACCESS_READ;
244 	if (vp->v_type == VDIR)
245 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
246 			NFSV3ACCESS_DELETE);
247 	else
248 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
249 	if ((nfsmode & testmode) &&
250 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
251 		nfsmode &= ~testmode;
252 	if (vp->v_type == VDIR)
253 		testmode = NFSV3ACCESS_LOOKUP;
254 	else
255 		testmode = NFSV3ACCESS_EXECUTE;
256 	if ((nfsmode & testmode) &&
257 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
258 		nfsmode &= ~testmode;
259 	getret = VOP_GETATTR(vp, vap);
260 	vput(vp);
261 	vp = NULL;
262 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
263 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
264 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
265 	tl = nfsm_build(&info, NFSX_UNSIGNED);
266 	*tl = txdr_unsigned(nfsmode);
267 nfsmout:
268 	*mrq = info.mreq;
269 	if (vp)
270 		vput(vp);
271 	return(error);
272 }
273 
274 /*
275  * nfs getattr service
276  */
277 int
278 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
279 	      struct thread *td, struct mbuf **mrq)
280 {
281 	struct sockaddr *nam = nfsd->nd_nam;
282 	struct ucred *cred = &nfsd->nd_cr;
283 	struct nfs_fattr *fp;
284 	struct vattr va;
285 	struct vattr *vap = &va;
286 	struct vnode *vp = NULL;
287 	struct mount *mp = NULL;
288 	nfsfh_t nfh;
289 	fhandle_t *fhp;
290 	int error = 0, rdonly;
291 	struct nfsm_info info;
292 
293 	info.mrep = nfsd->nd_mrep;
294 	info.md = nfsd->nd_md;
295 	info.dpos = nfsd->nd_dpos;
296 	info.mreq = NULL;
297 
298 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
299 	fhp = &nfh.fh_generic;
300 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
301 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
302 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
303 	if (error) {
304 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
305 		error = 0;
306 		goto nfsmout;
307 	}
308 	error = VOP_GETATTR(vp, vap);
309 	vput(vp);
310 	vp = NULL;
311 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
312 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
313 	if (error) {
314 		error = 0;
315 		goto nfsmout;
316 	}
317 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
318 	nfsm_srvfattr(nfsd, vap, fp);
319 	/* fall through */
320 
321 nfsmout:
322 	*mrq = info.mreq;
323 	if (vp)
324 		vput(vp);
325 	return(error);
326 }
327 
328 /*
329  * nfs setattr service
330  */
331 int
332 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
333 	      struct thread *td, struct mbuf **mrq)
334 {
335 	struct sockaddr *nam = nfsd->nd_nam;
336 	struct ucred *cred = &nfsd->nd_cr;
337 	struct vattr va, preat;
338 	struct vattr *vap = &va;
339 	struct nfsv2_sattr *sp;
340 	struct nfs_fattr *fp;
341 	struct vnode *vp = NULL;
342 	struct mount *mp = NULL;
343 	nfsfh_t nfh;
344 	fhandle_t *fhp;
345 	u_int32_t *tl;
346 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
347 	int gcheck = 0;
348 	struct timespec guard;
349 	struct nfsm_info info;
350 
351 	info.mrep = nfsd->nd_mrep;
352 	info.mreq = NULL;
353 	info.md = nfsd->nd_md;
354 	info.dpos = nfsd->nd_dpos;
355 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
356 
357 	guard.tv_sec = 0;	/* fix compiler warning */
358 	guard.tv_nsec = 0;
359 
360 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
361 	fhp = &nfh.fh_generic;
362 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
363 	VATTR_NULL(vap);
364 	if (info.v3) {
365 		ERROROUT(nfsm_srvsattr(&info, vap));
366 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
367 		gcheck = fxdr_unsigned(int, *tl);
368 		if (gcheck) {
369 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
370 			fxdr_nfsv3time(tl, &guard);
371 		}
372 	} else {
373 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
374 		/*
375 		 * Nah nah nah nah na nah
376 		 * There is a bug in the Sun client that puts 0xffff in the mode
377 		 * field of sattr when it should put in 0xffffffff. The u_short
378 		 * doesn't sign extend.
379 		 * --> check the low order 2 bytes for 0xffff
380 		 */
381 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
382 			vap->va_mode = nfstov_mode(sp->sa_mode);
383 		if (sp->sa_uid != nfs_xdrneg1)
384 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
385 		if (sp->sa_gid != nfs_xdrneg1)
386 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
387 		if (sp->sa_size != nfs_xdrneg1)
388 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
389 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
390 #ifdef notyet
391 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
392 #else
393 			vap->va_atime.tv_sec =
394 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
395 			vap->va_atime.tv_nsec = 0;
396 #endif
397 		}
398 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
399 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
400 
401 	}
402 
403 	/*
404 	 * Now that we have all the fields, lets do it.
405 	 */
406 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
407 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
408 	if (error) {
409 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
410 				      2 * NFSX_UNSIGNED, &error));
411 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
412 				 postat_ret, vap);
413 		error = 0;
414 		goto nfsmout;
415 	}
416 
417 	/*
418 	 * vp now an active resource, pay careful attention to cleanup
419 	 */
420 
421 	if (info.v3) {
422 		error = preat_ret = VOP_GETATTR(vp, &preat);
423 		if (!error && gcheck &&
424 			(preat.va_ctime.tv_sec != guard.tv_sec ||
425 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
426 			error = NFSERR_NOT_SYNC;
427 		if (error) {
428 			vput(vp);
429 			vp = NULL;
430 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
431 					      NFSX_WCCDATA(info.v3), &error));
432 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
433 					 postat_ret, vap);
434 			error = 0;
435 			goto nfsmout;
436 		}
437 	}
438 
439 	/*
440 	 * If the size is being changed write acces is required, otherwise
441 	 * just check for a read only file system.
442 	 */
443 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
444 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
445 			error = EROFS;
446 			goto out;
447 		}
448 	} else {
449 		if (vp->v_type == VDIR) {
450 			error = EISDIR;
451 			goto out;
452 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
453 			    td, 0)) != 0){
454 			goto out;
455 		}
456 	}
457 	error = VOP_SETATTR(vp, vap, cred);
458 	postat_ret = VOP_GETATTR(vp, vap);
459 	if (!error)
460 		error = postat_ret;
461 out:
462 	vput(vp);
463 	vp = NULL;
464 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
465 		   NFSX_WCCORFATTR(info.v3), &error));
466 	if (info.v3) {
467 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
468 				 postat_ret, vap);
469 		error = 0;
470 		goto nfsmout;
471 	} else {
472 		fp = nfsm_build(&info, NFSX_V2FATTR);
473 		nfsm_srvfattr(nfsd, vap, fp);
474 	}
475 	/* fall through */
476 
477 nfsmout:
478 	*mrq = info.mreq;
479 	if (vp)
480 		vput(vp);
481 	return(error);
482 }
483 
484 /*
485  * nfs lookup rpc
486  */
487 int
488 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
489 	     struct thread *td, struct mbuf **mrq)
490 {
491 	struct sockaddr *nam = nfsd->nd_nam;
492 	struct ucred *cred = &nfsd->nd_cr;
493 	struct nfs_fattr *fp;
494 	struct nlookupdata nd;
495 	struct vnode *vp;
496 	struct vnode *dirp;
497 	struct nchandle nch;
498 	nfsfh_t nfh;
499 	fhandle_t *fhp;
500 	int error = 0, len, dirattr_ret = 1;
501 	int pubflag;
502 	struct vattr va, dirattr, *vap = &va;
503 	struct nfsm_info info;
504 
505 	info.mrep = nfsd->nd_mrep;
506 	info.mreq = NULL;
507 	info.md = nfsd->nd_md;
508 	info.dpos = nfsd->nd_dpos;
509 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
510 
511 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
512 	nlookup_zero(&nd);
513 	dirp = NULL;
514 	vp = NULL;
515 
516 	fhp = &nfh.fh_generic;
517 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
518 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
519 
520 	pubflag = nfs_ispublicfh(fhp);
521 
522 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
523 		fhp, len, slp, nam, &info.md, &info.dpos,
524 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
525 
526 	/*
527 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
528 	 * structure in case macros jump to nfsmout.
529 	 */
530 
531 	if (error) {
532 		if (dirp) {
533 			if (info.v3)
534 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
535 			vrele(dirp);
536 			dirp = NULL;
537 		}
538 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
539 				      NFSX_POSTOPATTR(info.v3), &error));
540 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
541 		error = 0;
542 		goto nfsmout;
543 	}
544 
545 	/*
546 	 * Locate index file for public filehandle
547 	 *
548 	 * error is 0 on entry and 0 on exit from this block.
549 	 */
550 
551 	if (pubflag) {
552 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
553 			/*
554 			 * Setup call to lookup() to see if we can find
555 			 * the index file. Arguably, this doesn't belong
556 			 * in a kernel.. Ugh.  If an error occurs, do not
557 			 * try to install an index file and then clear the
558 			 * error.
559 			 *
560 			 * When we replace nd with ind and redirect ndp,
561 			 * maintenance of ni_startdir and ni_vp shift to
562 			 * ind and we have to clean them up in the old nd.
563 			 * However, the cnd resource continues to be maintained
564 			 * via the original nd.  Confused?  You aren't alone!
565 			 */
566 			vn_unlock(vp);
567 			cache_copy(&nd.nl_nch, &nch);
568 			nlookup_done(&nd);
569 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
570 						UIO_SYSSPACE, 0, cred, &nch);
571 			cache_drop(&nch);
572 			if (error == 0)
573 				error = nlookup(&nd);
574 
575 			if (error == 0) {
576 				/*
577 				 * Found an index file. Get rid of
578 				 * the old references.  transfer vp and
579 				 * load up the new vp.  Fortunately we do
580 				 * not have to deal with dvp, that would be
581 				 * a huge mess.
582 				 */
583 				if (dirp)
584 					vrele(dirp);
585 				dirp = vp;
586 				vp = NULL;
587 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
588 							LK_EXCLUSIVE, &vp);
589 				KKASSERT(error == 0);
590 			}
591 			error = 0;
592 		}
593 		/*
594 		 * If the public filehandle was used, check that this lookup
595 		 * didn't result in a filehandle outside the publicly exported
596 		 * filesystem.  We clear the poor vp here to avoid lockups due
597 		 * to NFS I/O.
598 		 */
599 
600 		if (vp->v_mount != nfs_pub.np_mount) {
601 			vput(vp);
602 			vp = NULL;
603 			error = EPERM;
604 		}
605 	}
606 
607 	if (dirp) {
608 		if (info.v3)
609 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
610 		vrele(dirp);
611 		dirp = NULL;
612 	}
613 
614 	/*
615 	 * Resources at this point:
616 	 *	ndp->ni_vp	may not be NULL
617 	 *
618 	 */
619 
620 	if (error) {
621 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
622 				      NFSX_POSTOPATTR(info.v3), &error));
623 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
624 		error = 0;
625 		goto nfsmout;
626 	}
627 
628 	/*
629 	 * Clear out some resources prior to potentially blocking.  This
630 	 * is not as critical as ni_dvp resources in other routines, but
631 	 * it helps.
632 	 */
633 	nlookup_done(&nd);
634 
635 	/*
636 	 * Get underlying attribute, then release remaining resources ( for
637 	 * the same potential blocking reason ) and reply.
638 	 */
639 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
640 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
641 	if (!error)
642 		error = VOP_GETATTR(vp, vap);
643 
644 	vput(vp);
645 	vp = NULL;
646 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
647 			      NFSX_SRVFH(info.v3) +
648 			      NFSX_POSTOPORFATTR(info.v3) +
649 			      NFSX_POSTOPATTR(info.v3),
650 			      &error));
651 	if (error) {
652 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
653 		error = 0;
654 		goto nfsmout;
655 	}
656 	nfsm_srvfhtom(&info, fhp);
657 	if (info.v3) {
658 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
659 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
660 	} else {
661 		fp = nfsm_build(&info, NFSX_V2FATTR);
662 		nfsm_srvfattr(nfsd, vap, fp);
663 	}
664 
665 nfsmout:
666 	*mrq = info.mreq;
667 	if (dirp)
668 		vrele(dirp);
669 	nlookup_done(&nd);		/* may be called twice */
670 	if (vp)
671 		vput(vp);
672 	return (error);
673 }
674 
675 /*
676  * nfs readlink service
677  */
678 int
679 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
680 	       struct thread *td, struct mbuf **mrq)
681 {
682 	struct sockaddr *nam = nfsd->nd_nam;
683 	struct ucred *cred = &nfsd->nd_cr;
684 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
685 	struct iovec *ivp = iv;
686 	u_int32_t *tl;
687 	int error = 0, rdonly, i, tlen, len, getret;
688 	struct mbuf *mp1, *mp2, *mp3;
689 	struct vnode *vp = NULL;
690 	struct mount *mp = NULL;
691 	struct vattr attr;
692 	nfsfh_t nfh;
693 	fhandle_t *fhp;
694 	struct uio io, *uiop = &io;
695 	struct nfsm_info info;
696 
697 	info.mrep = nfsd->nd_mrep;
698 	info.mreq = NULL;
699 	info.md = nfsd->nd_md;
700 	info.dpos = nfsd->nd_dpos;
701 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
702 
703 	bzero(&io, sizeof(struct uio));
704 
705 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
706 #ifndef nolint
707 	mp2 = NULL;
708 #endif
709 	mp3 = NULL;
710 	fhp = &nfh.fh_generic;
711 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
712 	len = 0;
713 	i = 0;
714 	while (len < NFS_MAXPATHLEN) {
715 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
716 		mp1->m_len = MCLBYTES;
717 		if (len == 0)
718 			mp3 = mp2 = mp1;
719 		else {
720 			mp2->m_next = mp1;
721 			mp2 = mp1;
722 		}
723 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
724 			mp1->m_len = NFS_MAXPATHLEN-len;
725 			len = NFS_MAXPATHLEN;
726 		} else
727 			len += mp1->m_len;
728 		ivp->iov_base = mtod(mp1, caddr_t);
729 		ivp->iov_len = mp1->m_len;
730 		i++;
731 		ivp++;
732 	}
733 	uiop->uio_iov = iv;
734 	uiop->uio_iovcnt = i;
735 	uiop->uio_offset = 0;
736 	uiop->uio_resid = len;
737 	uiop->uio_rw = UIO_READ;
738 	uiop->uio_segflg = UIO_SYSSPACE;
739 	uiop->uio_td = NULL;
740 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
741 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
742 	if (error) {
743 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
744 				      2 * NFSX_UNSIGNED, &error));
745 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
746 		error = 0;
747 		goto nfsmout;
748 	}
749 	if (vp->v_type != VLNK) {
750 		if (info.v3)
751 			error = EINVAL;
752 		else
753 			error = ENXIO;
754 		goto out;
755 	}
756 	error = VOP_READLINK(vp, uiop, cred);
757 out:
758 	getret = VOP_GETATTR(vp, &attr);
759 	vput(vp);
760 	vp = NULL;
761 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
762 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
763 			     &error));
764 	if (info.v3) {
765 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
766 		if (error) {
767 			error = 0;
768 			goto nfsmout;
769 		}
770 	}
771 	if (uiop->uio_resid > 0) {
772 		len -= uiop->uio_resid;
773 		tlen = nfsm_rndup(len);
774 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
775 	}
776 	tl = nfsm_build(&info, NFSX_UNSIGNED);
777 	*tl = txdr_unsigned(len);
778 	info.mb->m_next = mp3;
779 	mp3 = NULL;
780 nfsmout:
781 	*mrq = info.mreq;
782 	if (mp3)
783 		m_freem(mp3);
784 	if (vp)
785 		vput(vp);
786 	return(error);
787 }
788 
789 /*
790  * nfs read service
791  */
792 int
793 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
794 	   struct thread *td, struct mbuf **mrq)
795 {
796 	struct nfsm_info info;
797 	struct sockaddr *nam = nfsd->nd_nam;
798 	struct ucred *cred = &nfsd->nd_cr;
799 	struct iovec *iv;
800 	struct iovec *iv2;
801 	struct mbuf *m;
802 	struct nfs_fattr *fp;
803 	u_int32_t *tl;
804 	int i;
805 	int reqlen;
806 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
807 	struct mbuf *m2;
808 	struct vnode *vp = NULL;
809 	struct mount *mp = NULL;
810 	nfsfh_t nfh;
811 	fhandle_t *fhp;
812 	struct uio io, *uiop = &io;
813 	struct vattr va, *vap = &va;
814 	struct nfsheur *nh;
815 	off_t off;
816 	int ioflag = 0;
817 
818 	info.mrep = nfsd->nd_mrep;
819 	info.mreq = NULL;
820 	info.md = nfsd->nd_md;
821 	info.dpos = nfsd->nd_dpos;
822 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
823 
824 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
825 	fhp = &nfh.fh_generic;
826 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
827 	if (info.v3) {
828 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
829 		off = fxdr_hyper(tl);
830 	} else {
831 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
832 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
833 	}
834 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
835 					    NFS_SRVMAXDATA(nfsd), &error));
836 
837 	/*
838 	 * Reference vp.  If an error occurs, vp will be invalid, but we
839 	 * have to NULL it just in case.  The macros might goto nfsmout
840 	 * as well.
841 	 */
842 
843 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
844 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
845 	if (error) {
846 		vp = NULL;
847 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
848 				      2 * NFSX_UNSIGNED, &error));
849 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
850 		error = 0;
851 		goto nfsmout;
852 	}
853 
854 	if (vp->v_type != VREG) {
855 		if (info.v3)
856 			error = EINVAL;
857 		else
858 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
859 	}
860 	if (!error) {
861 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
862 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
863 	}
864 	getret = VOP_GETATTR(vp, vap);
865 	if (!error)
866 		error = getret;
867 	if (error) {
868 		vput(vp);
869 		vp = NULL;
870 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
871 				      NFSX_POSTOPATTR(info.v3), &error));
872 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
873 		error = 0;
874 		goto nfsmout;
875 	}
876 
877 	/*
878 	 * Calculate byte count to read
879 	 */
880 
881 	if (off >= vap->va_size)
882 		cnt = 0;
883 	else if ((off + reqlen) > vap->va_size)
884 		cnt = vap->va_size - off;
885 	else
886 		cnt = reqlen;
887 
888 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
889 			      NFSX_POSTOPORFATTR(info.v3) +
890 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
891 			      &error));
892 	if (info.v3) {
893 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
894 		*tl++ = nfs_true;
895 		fp = (struct nfs_fattr *)tl;
896 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
897 	} else {
898 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
899 		fp = (struct nfs_fattr *)tl;
900 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
901 	}
902 	len = left = nfsm_rndup(cnt);
903 	if (cnt > 0) {
904 		/*
905 		 * Generate the mbuf list with the uio_iov ref. to it.
906 		 */
907 		i = 0;
908 		m = m2 = info.mb;
909 		while (left > 0) {
910 			siz = min(M_TRAILINGSPACE(m), left);
911 			if (siz > 0) {
912 				left -= siz;
913 				i++;
914 			}
915 			if (left > 0) {
916 				m = m_getcl(MB_WAIT, MT_DATA, 0);
917 				m->m_len = 0;
918 				m2->m_next = m;
919 				m2 = m;
920 			}
921 		}
922 		iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
923 		uiop->uio_iov = iv2 = iv;
924 		m = info.mb;
925 		left = len;
926 		i = 0;
927 		while (left > 0) {
928 			if (m == NULL)
929 				panic("nfsrv_read iov");
930 			siz = min(M_TRAILINGSPACE(m), left);
931 			if (siz > 0) {
932 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
933 				iv->iov_len = siz;
934 				m->m_len += siz;
935 				left -= siz;
936 				iv++;
937 				i++;
938 			}
939 			m = m->m_next;
940 		}
941 		uiop->uio_iovcnt = i;
942 		uiop->uio_offset = off;
943 		uiop->uio_resid = len;
944 		uiop->uio_rw = UIO_READ;
945 		uiop->uio_segflg = UIO_SYSSPACE;
946 		nh = nfsrv_sequential_heuristic(uiop, vp, 0);
947 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
948 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
949 		if (error == 0) {
950 			off = uiop->uio_offset;
951 			nh->nh_nextoff = off;
952 		}
953 		kfree((caddr_t)iv2, M_TEMP);
954 		if (error || (getret = VOP_GETATTR(vp, vap))) {
955 			if (!error)
956 				error = getret;
957 			m_freem(info.mreq);
958 			info.mreq = NULL;
959 			vput(vp);
960 			vp = NULL;
961 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
962 					      NFSX_POSTOPATTR(info.v3),
963 					      &error));
964 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
965 			error = 0;
966 			goto nfsmout;
967 		}
968 	} else {
969 		uiop->uio_resid = 0;
970 	}
971 	vput(vp);
972 	vp = NULL;
973 	nfsm_srvfattr(nfsd, vap, fp);
974 	tlen = len - uiop->uio_resid;
975 	cnt = cnt < tlen ? cnt : tlen;
976 	tlen = nfsm_rndup(cnt);
977 	if (len != tlen || tlen != cnt)
978 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
979 	if (info.v3) {
980 		*tl++ = txdr_unsigned(cnt);
981 		if (cnt < reqlen)
982 			*tl++ = nfs_true;
983 		else
984 			*tl++ = nfs_false;
985 	}
986 	*tl = txdr_unsigned(cnt);
987 nfsmout:
988 	*mrq = info.mreq;
989 	if (vp)
990 		vput(vp);
991 	return(error);
992 }
993 
994 /*
995  * nfs write service
996  */
997 int
998 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
999 	    struct thread *td, struct mbuf **mrq)
1000 {
1001 	struct sockaddr *nam = nfsd->nd_nam;
1002 	struct ucred *cred = &nfsd->nd_cr;
1003 	struct iovec *ivp;
1004 	int i, cnt;
1005 	struct mbuf *mp1;
1006 	struct nfs_fattr *fp;
1007 	struct iovec *iv;
1008 	struct vattr va, forat;
1009 	struct vattr *vap = &va;
1010 	u_int32_t *tl;
1011 	int error = 0, rdonly, len, forat_ret = 1;
1012 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1013 	int stable = NFSV3WRITE_FILESYNC;
1014 	struct vnode *vp = NULL;
1015 	struct mount *mp = NULL;
1016 	struct nfsheur *nh;
1017 	nfsfh_t nfh;
1018 	fhandle_t *fhp;
1019 	struct uio io, *uiop = &io;
1020 	struct nfsm_info info;
1021 	off_t off;
1022 
1023 	info.mrep = nfsd->nd_mrep;
1024 	info.mreq = NULL;
1025 	info.md = nfsd->nd_md;
1026 	info.dpos = nfsd->nd_dpos;
1027 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1028 
1029 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1030 	if (info.mrep == NULL) {
1031 		error = 0;
1032 		goto nfsmout;
1033 	}
1034 	fhp = &nfh.fh_generic;
1035 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1036 	if (info.v3) {
1037 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1038 		off = fxdr_hyper(tl);
1039 		tl += 3;
1040 		stable = fxdr_unsigned(int, *tl++);
1041 	} else {
1042 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1043 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1044 		tl += 2;
1045 		if (nfs_async)
1046 	    		stable = NFSV3WRITE_UNSTABLE;
1047 	}
1048 	retlen = len = fxdr_unsigned(int32_t, *tl);
1049 	cnt = i = 0;
1050 
1051 	/*
1052 	 * For NFS Version 2, it is not obvious what a write of zero length
1053 	 * should do, but I might as well be consistent with Version 3,
1054 	 * which is to return ok so long as there are no permission problems.
1055 	 */
1056 	if (len > 0) {
1057 	    zeroing = 1;
1058 	    mp1 = info.mrep;
1059 	    while (mp1) {
1060 		if (mp1 == info.md) {
1061 			zeroing = 0;
1062 			adjust = info.dpos - mtod(mp1, caddr_t);
1063 			mp1->m_len -= adjust;
1064 			if (mp1->m_len > 0 && adjust > 0)
1065 				mp1->m_data += adjust;
1066 		}
1067 		if (zeroing)
1068 			mp1->m_len = 0;
1069 		else if (mp1->m_len > 0) {
1070 			i += mp1->m_len;
1071 			if (i > len) {
1072 				mp1->m_len -= (i - len);
1073 				zeroing	= 1;
1074 			}
1075 			if (mp1->m_len > 0)
1076 				cnt++;
1077 		}
1078 		mp1 = mp1->m_next;
1079 	    }
1080 	}
1081 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1082 		error = EIO;
1083 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1084 				      2 * NFSX_UNSIGNED, &error));
1085 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1086 				 aftat_ret, vap);
1087 		error = 0;
1088 		goto nfsmout;
1089 	}
1090 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1091 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1092 	if (error) {
1093 		vp = NULL;
1094 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1095 				      2 * NFSX_UNSIGNED, &error));
1096 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1097 				 aftat_ret, vap);
1098 		error = 0;
1099 		goto nfsmout;
1100 	}
1101 	if (info.v3)
1102 		forat_ret = VOP_GETATTR(vp, &forat);
1103 	if (vp->v_type != VREG) {
1104 		if (info.v3)
1105 			error = EINVAL;
1106 		else
1107 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1108 	}
1109 	if (!error) {
1110 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1111 	}
1112 	if (error) {
1113 		vput(vp);
1114 		vp = NULL;
1115 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1116 				      NFSX_WCCDATA(info.v3), &error));
1117 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1118 				 aftat_ret, vap);
1119 		error = 0;
1120 		goto nfsmout;
1121 	}
1122 
1123 	if (len > 0) {
1124 	    ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1125 	    uiop->uio_iov = iv = ivp;
1126 	    uiop->uio_iovcnt = cnt;
1127 	    mp1 = info.mrep;
1128 	    while (mp1) {
1129 		if (mp1->m_len > 0) {
1130 			ivp->iov_base = mtod(mp1, caddr_t);
1131 			ivp->iov_len = mp1->m_len;
1132 			ivp++;
1133 		}
1134 		mp1 = mp1->m_next;
1135 	    }
1136 
1137 	    /*
1138 	     * XXX
1139 	     * The IO_METASYNC flag indicates that all metadata (and not just
1140 	     * enough to ensure data integrity) mus be written to stable storage
1141 	     * synchronously.
1142 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1143 	     */
1144 	    if (stable == NFSV3WRITE_UNSTABLE)
1145 		ioflags = IO_NODELOCKED;
1146 	    else if (stable == NFSV3WRITE_DATASYNC)
1147 		ioflags = (IO_SYNC | IO_NODELOCKED);
1148 	    else
1149 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1150 	    uiop->uio_resid = len;
1151 	    uiop->uio_rw = UIO_WRITE;
1152 	    uiop->uio_segflg = UIO_SYSSPACE;
1153 	    uiop->uio_td = NULL;
1154 	    uiop->uio_offset = off;
1155 	    nh = nfsrv_sequential_heuristic(uiop, vp, 1);
1156 	    ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1157 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1158 	    if (error == 0)
1159 		nh->nh_nextoff = uiop->uio_offset;
1160 	    nfsstats.srvvop_writes++;
1161 	    kfree((caddr_t)iv, M_TEMP);
1162 	}
1163 	aftat_ret = VOP_GETATTR(vp, vap);
1164 	vput(vp);
1165 	vp = NULL;
1166 	if (!error)
1167 		error = aftat_ret;
1168 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1169 			      NFSX_PREOPATTR(info.v3) +
1170 			      NFSX_POSTOPORFATTR(info.v3) +
1171 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1172 			      &error));
1173 	if (info.v3) {
1174 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1175 				 aftat_ret, vap);
1176 		if (error) {
1177 			error = 0;
1178 			goto nfsmout;
1179 		}
1180 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1181 		*tl++ = txdr_unsigned(retlen);
1182 		/*
1183 		 * If nfs_async is set, then pretend the write was FILESYNC.
1184 		 */
1185 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1186 			*tl++ = txdr_unsigned(stable);
1187 		else
1188 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1189 		/*
1190 		 * Actually, there is no need to txdr these fields,
1191 		 * but it may make the values more human readable,
1192 		 * for debugging purposes.
1193 		 */
1194 		if (nfsver.tv_sec == 0)
1195 			nfsver = boottime;
1196 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1197 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1198 	} else {
1199 		fp = nfsm_build(&info, NFSX_V2FATTR);
1200 		nfsm_srvfattr(nfsd, vap, fp);
1201 	}
1202 nfsmout:
1203 	*mrq = info.mreq;
1204 	if (vp)
1205 		vput(vp);
1206 	return(error);
1207 }
1208 
1209 /*
1210  * NFS write service with write gathering support. Called when
1211  * nfsrvw_procrastinate > 0.
1212  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1213  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1214  * Jan. 1994.
1215  */
1216 int
1217 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1218 		  struct thread *td, struct mbuf **mrq)
1219 {
1220 	struct iovec *ivp;
1221 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1222 	struct nfs_fattr *fp;
1223 	int i;
1224 	struct iovec *iov;
1225 	struct nfsrvw_delayhash *wpp;
1226 	struct ucred *cred;
1227 	struct vattr va, forat;
1228 	u_int32_t *tl;
1229 	int error = 0, rdonly, len, forat_ret = 1;
1230 	int ioflags, aftat_ret = 1, adjust, zeroing;
1231 	struct mbuf *mp1;
1232 	struct vnode *vp = NULL;
1233 	struct mount *mp = NULL;
1234 	struct uio io, *uiop = &io;
1235 	u_quad_t cur_usec;
1236 	struct nfsm_info info;
1237 
1238 	info.mreq = NULL;
1239 
1240 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1241 #ifndef nolint
1242 	i = 0;
1243 	len = 0;
1244 #endif
1245 	if (*ndp) {
1246 	    nfsd = *ndp;
1247 	    *ndp = NULL;
1248 	    info.mrep = nfsd->nd_mrep;
1249 	    info.mreq = NULL;
1250 	    info.md = nfsd->nd_md;
1251 	    info.dpos = nfsd->nd_dpos;
1252 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1253 	    cred = &nfsd->nd_cr;
1254 	    LIST_INIT(&nfsd->nd_coalesce);
1255 	    nfsd->nd_mreq = NULL;
1256 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1257 	    cur_usec = nfs_curusec();
1258 	    nfsd->nd_time = cur_usec +
1259 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1260 
1261 	    /*
1262 	     * Now, get the write header..
1263 	     */
1264 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1265 	    if (info.v3) {
1266 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1267 		nfsd->nd_off = fxdr_hyper(tl);
1268 		tl += 3;
1269 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1270 	    } else {
1271 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1272 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1273 		tl += 2;
1274 		if (nfs_async)
1275 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1276 	    }
1277 	    len = fxdr_unsigned(int32_t, *tl);
1278 	    nfsd->nd_len = len;
1279 	    nfsd->nd_eoff = nfsd->nd_off + len;
1280 
1281 	    /*
1282 	     * Trim the header out of the mbuf list and trim off any trailing
1283 	     * junk so that the mbuf list has only the write data.
1284 	     */
1285 	    zeroing = 1;
1286 	    i = 0;
1287 	    mp1 = info.mrep;
1288 	    while (mp1) {
1289 		if (mp1 == info.md) {
1290 		    zeroing = 0;
1291 		    adjust = info.dpos - mtod(mp1, caddr_t);
1292 		    mp1->m_len -= adjust;
1293 		    if (mp1->m_len > 0 && adjust > 0)
1294 			mp1->m_data += adjust;
1295 		}
1296 		if (zeroing)
1297 		    mp1->m_len = 0;
1298 		else {
1299 		    i += mp1->m_len;
1300 		    if (i > len) {
1301 			mp1->m_len -= (i - len);
1302 			zeroing = 1;
1303 		    }
1304 		}
1305 		mp1 = mp1->m_next;
1306 	    }
1307 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1308 nfsmout:
1309 		m_freem(info.mrep);
1310 		info.mrep = NULL;
1311 		error = EIO;
1312 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1313 		if (info.v3) {
1314 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1315 				     aftat_ret, &va);
1316 		}
1317 		nfsd->nd_mreq = info.mreq;
1318 		nfsd->nd_mrep = NULL;
1319 		nfsd->nd_time = 0;
1320 	    }
1321 
1322 	    /*
1323 	     * Add this entry to the hash and time queues.
1324 	     */
1325 	    owp = NULL;
1326 	    wp = slp->ns_tq.lh_first;
1327 	    while (wp && wp->nd_time < nfsd->nd_time) {
1328 		owp = wp;
1329 		wp = wp->nd_tq.le_next;
1330 	    }
1331 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1332 	    if (owp) {
1333 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1334 	    } else {
1335 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1336 	    }
1337 	    if (nfsd->nd_mrep) {
1338 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1339 		owp = NULL;
1340 		wp = wpp->lh_first;
1341 		while (wp &&
1342 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1343 		    owp = wp;
1344 		    wp = wp->nd_hash.le_next;
1345 		}
1346 		while (wp && wp->nd_off < nfsd->nd_off &&
1347 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1348 		    owp = wp;
1349 		    wp = wp->nd_hash.le_next;
1350 		}
1351 		if (owp) {
1352 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1353 
1354 		    /*
1355 		     * Search the hash list for overlapping entries and
1356 		     * coalesce.
1357 		     */
1358 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1359 			wp = nfsd->nd_hash.le_next;
1360 			if (NFSW_SAMECRED(owp, nfsd))
1361 			    nfsrvw_coalesce(owp, nfsd);
1362 		    }
1363 		} else {
1364 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1365 		}
1366 	    }
1367 	}
1368 
1369 	/*
1370 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1371 	 * and generate the associated reply mbuf list(s).
1372 	 */
1373 loop1:
1374 	cur_usec = nfs_curusec();
1375 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1376 		owp = nfsd->nd_tq.le_next;
1377 		if (nfsd->nd_time > cur_usec)
1378 		    break;
1379 		if (nfsd->nd_mreq)
1380 		    continue;
1381 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1382 		LIST_REMOVE(nfsd, nd_tq);
1383 		LIST_REMOVE(nfsd, nd_hash);
1384 		info.mrep = nfsd->nd_mrep;
1385 		info.mreq = NULL;
1386 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1387 		nfsd->nd_mrep = NULL;
1388 		cred = &nfsd->nd_cr;
1389 		forat_ret = aftat_ret = 1;
1390 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1391 				     nfsd->nd_nam, &rdonly,
1392 				     (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1393 		if (!error) {
1394 		    if (info.v3)
1395 			forat_ret = VOP_GETATTR(vp, &forat);
1396 		    if (vp->v_type != VREG) {
1397 			if (info.v3)
1398 			    error = EINVAL;
1399 			else
1400 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1401 		    }
1402 		} else {
1403 		    vp = NULL;
1404 		}
1405 		if (!error) {
1406 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1407 		}
1408 
1409 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1410 		    ioflags = IO_NODELOCKED;
1411 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1412 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1413 		else
1414 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1415 		uiop->uio_rw = UIO_WRITE;
1416 		uiop->uio_segflg = UIO_SYSSPACE;
1417 		uiop->uio_td = NULL;
1418 		uiop->uio_offset = nfsd->nd_off;
1419 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1420 		if (uiop->uio_resid > 0) {
1421 		    mp1 = info.mrep;
1422 		    i = 0;
1423 		    while (mp1) {
1424 			if (mp1->m_len > 0)
1425 			    i++;
1426 			mp1 = mp1->m_next;
1427 		    }
1428 		    uiop->uio_iovcnt = i;
1429 		    iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1430 		    uiop->uio_iov = ivp = iov;
1431 		    mp1 = info.mrep;
1432 		    while (mp1) {
1433 			if (mp1->m_len > 0) {
1434 			    ivp->iov_base = mtod(mp1, caddr_t);
1435 			    ivp->iov_len = mp1->m_len;
1436 			    ivp++;
1437 			}
1438 			mp1 = mp1->m_next;
1439 		    }
1440 		    if (!error) {
1441 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1442 			nfsstats.srvvop_writes++;
1443 		    }
1444 		    kfree((caddr_t)iov, M_TEMP);
1445 		}
1446 		m_freem(info.mrep);
1447 		info.mrep = NULL;
1448 		if (vp) {
1449 		    aftat_ret = VOP_GETATTR(vp, &va);
1450 		    vput(vp);
1451 		    vp = NULL;
1452 		}
1453 
1454 		/*
1455 		 * Loop around generating replies for all write rpcs that have
1456 		 * now been completed.
1457 		 */
1458 		swp = nfsd;
1459 		do {
1460 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1461 		    if (error) {
1462 			nfsm_writereply(&info, nfsd, slp, error,
1463 					NFSX_WCCDATA(info.v3));
1464 			if (info.v3) {
1465 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1466 					     aftat_ret, &va);
1467 			}
1468 		    } else {
1469 			nfsm_writereply(&info, nfsd, slp, error,
1470 					NFSX_PREOPATTR(info.v3) +
1471 					NFSX_POSTOPORFATTR(info.v3) +
1472 					2 * NFSX_UNSIGNED +
1473 					NFSX_WRITEVERF(info.v3));
1474 			if (info.v3) {
1475 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1476 					     aftat_ret, &va);
1477 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1478 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1479 			    *tl++ = txdr_unsigned(swp->nd_stable);
1480 			    /*
1481 			     * Actually, there is no need to txdr these fields,
1482 			     * but it may make the values more human readable,
1483 			     * for debugging purposes.
1484 			     */
1485 			    if (nfsver.tv_sec == 0)
1486 				    nfsver = boottime;
1487 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1488 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1489 			} else {
1490 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1491 			    nfsm_srvfattr(nfsd, &va, fp);
1492 			}
1493 		    }
1494 		    nfsd->nd_mreq = info.mreq;
1495 		    if (nfsd->nd_mrep)
1496 			panic("nfsrv_write: nd_mrep not free");
1497 
1498 		    /*
1499 		     * Done. Put it at the head of the timer queue so that
1500 		     * the final phase can return the reply.
1501 		     */
1502 		    if (nfsd != swp) {
1503 			nfsd->nd_time = 0;
1504 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1505 		    }
1506 		    nfsd = swp->nd_coalesce.lh_first;
1507 		    if (nfsd) {
1508 			LIST_REMOVE(nfsd, nd_tq);
1509 		    }
1510 		} while (nfsd);
1511 		swp->nd_time = 0;
1512 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1513 		goto loop1;
1514 	}
1515 
1516 	/*
1517 	 * Search for a reply to return.
1518 	 */
1519 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1520 		if (nfsd->nd_mreq) {
1521 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1522 		    LIST_REMOVE(nfsd, nd_tq);
1523 		    break;
1524 		}
1525 	}
1526 	if (nfsd) {
1527 		*ndp = nfsd;
1528 		*mrq = nfsd->nd_mreq;
1529 	} else {
1530 		*ndp = NULL;
1531 		*mrq = NULL;
1532 	}
1533 	return (0);
1534 }
1535 
1536 /*
1537  * Coalesce the write request nfsd into owp. To do this we must:
1538  * - remove nfsd from the queues
1539  * - merge nfsd->nd_mrep into owp->nd_mrep
1540  * - update the nd_eoff and nd_stable for owp
1541  * - put nfsd on owp's nd_coalesce list
1542  * NB: Must be called at splsoftclock().
1543  */
1544 static void
1545 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1546 {
1547         int overlap;
1548         struct mbuf *mp1;
1549 	struct nfsrv_descript *p;
1550 
1551 	NFS_DPF(WG, ("C%03x-%03x",
1552 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1553         LIST_REMOVE(nfsd, nd_hash);
1554         LIST_REMOVE(nfsd, nd_tq);
1555         if (owp->nd_eoff < nfsd->nd_eoff) {
1556             overlap = owp->nd_eoff - nfsd->nd_off;
1557             if (overlap < 0)
1558                 panic("nfsrv_coalesce: bad off");
1559             if (overlap > 0)
1560                 m_adj(nfsd->nd_mrep, overlap);
1561             mp1 = owp->nd_mrep;
1562             while (mp1->m_next)
1563                 mp1 = mp1->m_next;
1564             mp1->m_next = nfsd->nd_mrep;
1565             owp->nd_eoff = nfsd->nd_eoff;
1566         } else
1567             m_freem(nfsd->nd_mrep);
1568         nfsd->nd_mrep = NULL;
1569         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1570             owp->nd_stable = NFSV3WRITE_FILESYNC;
1571         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1572             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1573             owp->nd_stable = NFSV3WRITE_DATASYNC;
1574         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1575 
1576 	/*
1577 	 * If nfsd had anything else coalesced into it, transfer them
1578 	 * to owp, otherwise their replies will never get sent.
1579 	 */
1580 	for (p = nfsd->nd_coalesce.lh_first; p;
1581 	     p = nfsd->nd_coalesce.lh_first) {
1582 	    LIST_REMOVE(p, nd_tq);
1583 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1584 	}
1585 }
1586 
1587 /*
1588  * nfs create service
1589  * now does a truncate to 0 length via. setattr if it already exists
1590  */
1591 int
1592 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1593 	     struct thread *td, struct mbuf **mrq)
1594 {
1595 	struct sockaddr *nam = nfsd->nd_nam;
1596 	struct ucred *cred = &nfsd->nd_cr;
1597 	struct nfs_fattr *fp;
1598 	struct vattr va, dirfor, diraft;
1599 	struct vattr *vap = &va;
1600 	struct nfsv2_sattr *sp;
1601 	u_int32_t *tl;
1602 	struct nlookupdata nd;
1603 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1604 	udev_t rdev = NOUDEV;
1605 	caddr_t cp;
1606 	int how, exclusive_flag = 0;
1607 	struct vnode *dirp;
1608 	struct vnode *dvp;
1609 	struct vnode *vp;
1610 	struct mount *mp;
1611 	nfsfh_t nfh;
1612 	fhandle_t *fhp;
1613 	u_quad_t tempsize;
1614 	u_char cverf[NFSX_V3CREATEVERF];
1615 	struct nfsm_info info;
1616 
1617 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1618 	nlookup_zero(&nd);
1619 	dirp = NULL;
1620 	dvp = NULL;
1621 	vp = NULL;
1622 
1623 	info.mrep = nfsd->nd_mrep;
1624 	info.mreq = NULL;
1625 	info.md = nfsd->nd_md;
1626 	info.dpos = nfsd->nd_dpos;
1627 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1628 
1629 	fhp = &nfh.fh_generic;
1630 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1631 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1632 
1633 	/*
1634 	 * Call namei and do initial cleanup to get a few things
1635 	 * out of the way.  If we get an initial error we cleanup
1636 	 * and return here to avoid special-casing the invalid nd
1637 	 * structure through the rest of the case.  dirp may be
1638 	 * set even if an error occurs, but the nd structure will not
1639 	 * be valid at all if an error occurs so we have to invalidate it
1640 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1641 	 */
1642 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1643 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1644 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1645 	mp = vfs_getvfs(&fhp->fh_fsid);
1646 
1647 	if (dirp) {
1648 		if (info.v3) {
1649 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1650 		} else {
1651 			vrele(dirp);
1652 			dirp = NULL;
1653 		}
1654 	}
1655 	if (error) {
1656 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1657 				      NFSX_WCCDATA(info.v3), &error));
1658 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1659 				 diraft_ret, &diraft);
1660 		error = 0;
1661 		goto nfsmout;
1662 	}
1663 
1664 	/*
1665 	 * No error.  Continue.  State:
1666 	 *
1667 	 *	dirp 		may be valid
1668 	 *	vp		may be valid or NULL if the target does not
1669 	 *			exist.
1670 	 *	dvp		is valid
1671 	 *
1672 	 * The error state is set through the code and we may also do some
1673 	 * opportunistic releasing of vnodes to avoid holding locks through
1674 	 * NFS I/O.  The cleanup at the end is a catch-all
1675 	 */
1676 
1677 	VATTR_NULL(vap);
1678 	if (info.v3) {
1679 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1680 		how = fxdr_unsigned(int, *tl);
1681 		switch (how) {
1682 		case NFSV3CREATE_GUARDED:
1683 			if (vp) {
1684 				error = EEXIST;
1685 				break;
1686 			}
1687 			/* fall through */
1688 		case NFSV3CREATE_UNCHECKED:
1689 			ERROROUT(nfsm_srvsattr(&info, vap));
1690 			break;
1691 		case NFSV3CREATE_EXCLUSIVE:
1692 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1693 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1694 			exclusive_flag = 1;
1695 			break;
1696 		}
1697 		vap->va_type = VREG;
1698 	} else {
1699 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1700 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1701 		if (vap->va_type == VNON)
1702 			vap->va_type = VREG;
1703 		vap->va_mode = nfstov_mode(sp->sa_mode);
1704 		switch (vap->va_type) {
1705 		case VREG:
1706 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1707 			if (tsize != -1)
1708 				vap->va_size = (u_quad_t)tsize;
1709 			break;
1710 		case VCHR:
1711 		case VBLK:
1712 		case VFIFO:
1713 			rdev = fxdr_unsigned(long, sp->sa_size);
1714 			break;
1715 		default:
1716 			break;
1717 		}
1718 	}
1719 
1720 	/*
1721 	 * Iff doesn't exist, create it
1722 	 * otherwise just truncate to 0 length
1723 	 *   should I set the mode too ?
1724 	 *
1725 	 * The only possible error we can have at this point is EEXIST.
1726 	 * nd.ni_vp will also be non-NULL in that case.
1727 	 */
1728 	if (vp == NULL) {
1729 		if (vap->va_mode == (mode_t)VNOVAL)
1730 			vap->va_mode = 0;
1731 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1732 			vn_unlock(dvp);
1733 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1734 					    nd.nl_cred, vap);
1735 			vrele(dvp);
1736 			dvp = NULL;
1737 			if (error == 0) {
1738 				if (exclusive_flag) {
1739 					exclusive_flag = 0;
1740 					VATTR_NULL(vap);
1741 					bcopy(cverf, (caddr_t)&vap->va_atime,
1742 						NFSX_V3CREATEVERF);
1743 					error = VOP_SETATTR(vp, vap, cred);
1744 				}
1745 			}
1746 		} else if (
1747 			vap->va_type == VCHR ||
1748 			vap->va_type == VBLK ||
1749 			vap->va_type == VFIFO
1750 		) {
1751 			/*
1752 			 * Handle SysV FIFO node special cases.  All other
1753 			 * devices require super user to access.
1754 			 */
1755 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1756 				vap->va_type = VFIFO;
1757                         if (vap->va_type != VFIFO &&
1758                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1759 				goto nfsmreply0;
1760                         }
1761 			vap->va_rmajor = umajor(rdev);
1762 			vap->va_rminor = uminor(rdev);
1763 
1764 			vn_unlock(dvp);
1765 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1766 			vrele(dvp);
1767 			dvp = NULL;
1768 			if (error)
1769 				goto nfsmreply0;
1770 #if 0
1771 			/*
1772 			 * XXX what is this junk supposed to do ?
1773 			 */
1774 
1775 			vput(vp);
1776 			vp = NULL;
1777 
1778 			/*
1779 			 * release dvp prior to lookup
1780 			 */
1781 			vput(dvp);
1782 			dvp = NULL;
1783 
1784 			/*
1785 			 * Setup for lookup.
1786 			 *
1787 			 * Even though LOCKPARENT was cleared, ni_dvp may
1788 			 * be garbage.
1789 			 */
1790 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1791 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1792 			nd.ni_cnd.cn_td = td;
1793 			nd.ni_cnd.cn_cred = cred;
1794 
1795 			error = lookup(&nd);
1796 			nd.ni_dvp = NULL;
1797 
1798 			if (error != 0) {
1799 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1800 						      0, &error));
1801 				/* fall through on certain errors */
1802 			}
1803 			nfsrv_object_create(nd.ni_vp);
1804 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1805 				error = EINVAL;
1806 				goto nfsmreply0;
1807 			}
1808 #endif
1809 		} else {
1810 			error = ENXIO;
1811 		}
1812 	} else {
1813 		if (vap->va_size != -1) {
1814 			error = nfsrv_access(mp, vp, VWRITE, cred,
1815 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1816 			if (!error) {
1817 				tempsize = vap->va_size;
1818 				VATTR_NULL(vap);
1819 				vap->va_size = tempsize;
1820 				error = VOP_SETATTR(vp, vap, cred);
1821 			}
1822 		}
1823 	}
1824 
1825 	if (!error) {
1826 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1827 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1828 		if (!error)
1829 			error = VOP_GETATTR(vp, vap);
1830 	}
1831 	if (info.v3) {
1832 		if (exclusive_flag && !error &&
1833 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1834 			error = EEXIST;
1835 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1836 		vrele(dirp);
1837 		dirp = NULL;
1838 	}
1839 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1840 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1841 			      NFSX_WCCDATA(info.v3),
1842 			      &error));
1843 	if (info.v3) {
1844 		if (!error) {
1845 			nfsm_srvpostop_fh(&info, fhp);
1846 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1847 		}
1848 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1849 				 diraft_ret, &diraft);
1850 		error = 0;
1851 	} else {
1852 		nfsm_srvfhtom(&info, fhp);
1853 		fp = nfsm_build(&info, NFSX_V2FATTR);
1854 		nfsm_srvfattr(nfsd, vap, fp);
1855 	}
1856 	goto nfsmout;
1857 
1858 nfsmreply0:
1859 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1860 	error = 0;
1861 	/* fall through */
1862 
1863 nfsmout:
1864 	*mrq = info.mreq;
1865 	if (dirp)
1866 		vrele(dirp);
1867 	nlookup_done(&nd);
1868 	if (dvp) {
1869 		if (dvp == vp)
1870 			vrele(dvp);
1871 		else
1872 			vput(dvp);
1873 	}
1874 	if (vp)
1875 		vput(vp);
1876 	return (error);
1877 }
1878 
1879 /*
1880  * nfs v3 mknod service
1881  */
1882 int
1883 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1884 	    struct thread *td, struct mbuf **mrq)
1885 {
1886 	struct sockaddr *nam = nfsd->nd_nam;
1887 	struct ucred *cred = &nfsd->nd_cr;
1888 	struct vattr va, dirfor, diraft;
1889 	struct vattr *vap = &va;
1890 	u_int32_t *tl;
1891 	struct nlookupdata nd;
1892 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1893 	enum vtype vtyp;
1894 	struct vnode *dirp;
1895 	struct vnode *dvp;
1896 	struct vnode *vp;
1897 	nfsfh_t nfh;
1898 	fhandle_t *fhp;
1899 	struct nfsm_info info;
1900 
1901 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1902 	nlookup_zero(&nd);
1903 	dirp = NULL;
1904 	dvp = NULL;
1905 	vp = NULL;
1906 
1907 	info.mrep = nfsd->nd_mrep;
1908 	info.mreq = NULL;
1909 	info.md = nfsd->nd_md;
1910 	info.dpos = nfsd->nd_dpos;
1911 
1912 	fhp = &nfh.fh_generic;
1913 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1914 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1915 
1916 	/*
1917 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1918 	 * is not valid.  However, nfsm_*() routines may still jump to
1919 	 * nfsmout.
1920 	 */
1921 
1922 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1923 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1924 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1925 	if (dirp)
1926 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1927 	if (error) {
1928 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1929 			   NFSX_WCCDATA(1), &error));
1930 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1931 				 diraft_ret, &diraft);
1932 		error = 0;
1933 		goto nfsmout;
1934 	}
1935 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1936 	vtyp = nfsv3tov_type(*tl);
1937 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1938 		error = NFSERR_BADTYPE;
1939 		goto out;
1940 	}
1941 	VATTR_NULL(vap);
1942 	ERROROUT(nfsm_srvsattr(&info, vap));
1943 	if (vtyp == VCHR || vtyp == VBLK) {
1944 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1945 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1946 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1947 	}
1948 
1949 	/*
1950 	 * Iff doesn't exist, create it.
1951 	 */
1952 	if (vp) {
1953 		error = EEXIST;
1954 		goto out;
1955 	}
1956 	vap->va_type = vtyp;
1957 	if (vap->va_mode == (mode_t)VNOVAL)
1958 		vap->va_mode = 0;
1959 	if (vtyp == VSOCK) {
1960 		vn_unlock(dvp);
1961 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1962 		vrele(dvp);
1963 		dvp = NULL;
1964 	} else {
1965 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1966 			goto out;
1967 
1968 		vn_unlock(dvp);
1969 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1970 		vrele(dvp);
1971 		dvp = NULL;
1972 		if (error)
1973 			goto out;
1974 	}
1975 
1976 	/*
1977 	 * send response, cleanup, return.
1978 	 */
1979 out:
1980 	nlookup_done(&nd);
1981 	if (dvp) {
1982 		if (dvp == vp)
1983 			vrele(dvp);
1984 		else
1985 			vput(dvp);
1986 		dvp = NULL;
1987 	}
1988 	if (!error) {
1989 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1990 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1991 		if (!error)
1992 			error = VOP_GETATTR(vp, vap);
1993 	}
1994 	if (vp) {
1995 		vput(vp);
1996 		vp = NULL;
1997 	}
1998 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1999 	if (dirp) {
2000 		vrele(dirp);
2001 		dirp = NULL;
2002 	}
2003 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2004 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
2005 			      NFSX_WCCDATA(1), &error));
2006 	if (!error) {
2007 		nfsm_srvpostop_fh(&info, fhp);
2008 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2009 	}
2010 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2011 			 diraft_ret, &diraft);
2012 	*mrq = info.mreq;
2013 	return (0);
2014 nfsmout:
2015 	*mrq = info.mreq;
2016 	if (dirp)
2017 		vrele(dirp);
2018 	nlookup_done(&nd);
2019 	if (dvp) {
2020 		if (dvp == vp)
2021 			vrele(dvp);
2022 		else
2023 			vput(dvp);
2024 	}
2025 	if (vp)
2026 		vput(vp);
2027 	return (error);
2028 }
2029 
2030 /*
2031  * nfs remove service
2032  */
2033 int
2034 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2035 	     struct thread *td, struct mbuf **mrq)
2036 {
2037 	struct sockaddr *nam = nfsd->nd_nam;
2038 	struct ucred *cred = &nfsd->nd_cr;
2039 	struct nlookupdata nd;
2040 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2041 	struct vnode *dirp;
2042 	struct vnode *dvp;
2043 	struct vnode *vp;
2044 	struct vattr dirfor, diraft;
2045 	nfsfh_t nfh;
2046 	fhandle_t *fhp;
2047 	struct nfsm_info info;
2048 
2049 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2050 	nlookup_zero(&nd);
2051 	dirp = NULL;
2052 	dvp = NULL;
2053 	vp = NULL;
2054 
2055 	info.mrep = nfsd->nd_mrep;
2056 	info.mreq = NULL;
2057 	info.md = nfsd->nd_md;
2058 	info.dpos = nfsd->nd_dpos;
2059 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2060 
2061 	fhp = &nfh.fh_generic;
2062 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2063 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2064 
2065 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2066 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2067 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2068 	if (dirp) {
2069 		if (info.v3)
2070 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2071 	}
2072 	if (error == 0) {
2073 		if (vp->v_type == VDIR) {
2074 			error = EPERM;		/* POSIX */
2075 			goto out;
2076 		}
2077 		/*
2078 		 * The root of a mounted filesystem cannot be deleted.
2079 		 */
2080 		if (vp->v_flag & VROOT) {
2081 			error = EBUSY;
2082 			goto out;
2083 		}
2084 out:
2085 		if (!error) {
2086 			if (dvp != vp)
2087 				vn_unlock(dvp);
2088 			if (vp) {
2089 				vput(vp);
2090 				vp = NULL;
2091 			}
2092 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2093 			vrele(dvp);
2094 			dvp = NULL;
2095 		}
2096 	}
2097 	if (dirp && info.v3)
2098 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2099 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2100 	if (info.v3) {
2101 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2102 				 diraft_ret, &diraft);
2103 		error = 0;
2104 	}
2105 nfsmout:
2106 	*mrq = info.mreq;
2107 	nlookup_done(&nd);
2108 	if (dirp)
2109 		vrele(dirp);
2110 	if (dvp) {
2111 		if (dvp == vp)
2112 			vrele(dvp);
2113 		else
2114 			vput(dvp);
2115 	}
2116 	if (vp)
2117 		vput(vp);
2118 	return(error);
2119 }
2120 
2121 /*
2122  * nfs rename service
2123  */
2124 int
2125 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2126 	     struct thread *td, struct mbuf **mrq)
2127 {
2128 	struct sockaddr *nam = nfsd->nd_nam;
2129 	struct ucred *cred = &nfsd->nd_cr;
2130 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2131 	int tdirfor_ret = 1, tdiraft_ret = 1;
2132 	struct nlookupdata fromnd, tond;
2133 	struct vnode *fvp, *fdirp, *fdvp;
2134 	struct vnode *tvp, *tdirp, *tdvp;
2135 	struct namecache *ncp;
2136 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2137 	nfsfh_t fnfh, tnfh;
2138 	fhandle_t *ffhp, *tfhp;
2139 	uid_t saved_uid;
2140 	struct nfsm_info info;
2141 
2142 	info.mrep = nfsd->nd_mrep;
2143 	info.mreq = NULL;
2144 	info.md = nfsd->nd_md;
2145 	info.dpos = nfsd->nd_dpos;
2146 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2147 
2148 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2149 #ifndef nolint
2150 	fvp = NULL;
2151 #endif
2152 	ffhp = &fnfh.fh_generic;
2153 	tfhp = &tnfh.fh_generic;
2154 
2155 	/*
2156 	 * Clear fields incase goto nfsmout occurs from macro.
2157 	 */
2158 
2159 	nlookup_zero(&fromnd);
2160 	nlookup_zero(&tond);
2161 	fdirp = NULL;
2162 	tdirp = NULL;
2163 
2164 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2165 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2166 
2167 	/*
2168 	 * Remember our original uid so that we can reset cr_uid before
2169 	 * the second nfs_namei() call, in case it is remapped.
2170 	 */
2171 	saved_uid = cred->cr_uid;
2172 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2173 			  NULL, NULL,
2174 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2175 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2176 	if (fdirp) {
2177 		if (info.v3)
2178 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2179 	}
2180 	if (error) {
2181 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2182 				      2 * NFSX_WCCDATA(info.v3), &error));
2183 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2184 				 fdiraft_ret, &fdiraft);
2185 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2186 				 tdiraft_ret, &tdiraft);
2187 		error = 0;
2188 		goto nfsmout;
2189 	}
2190 
2191 	/*
2192 	 * We have to unlock the from ncp before we can safely lookup
2193 	 * the target ncp.
2194 	 */
2195 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2196 	cache_unlock(&fromnd.nl_nch);
2197 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2198 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2199 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2200 	cred->cr_uid = saved_uid;
2201 
2202 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2203 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2204 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2205 	if (tdirp) {
2206 		if (info.v3)
2207 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2208 	}
2209 	if (error)
2210 		goto out1;
2211 
2212 	/*
2213 	 * relock the source
2214 	 */
2215 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2216 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2217 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2218 		cache_lock(&fromnd.nl_nch);
2219 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2220 	} else {
2221 		cache_unlock(&tond.nl_nch);
2222 		cache_lock(&fromnd.nl_nch);
2223 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2224 		cache_lock(&tond.nl_nch);
2225 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2226 	}
2227 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2228 
2229 	fvp = fromnd.nl_nch.ncp->nc_vp;
2230 	tvp = tond.nl_nch.ncp->nc_vp;
2231 
2232 	/*
2233 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2234 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2235 	 * point).  If we get through the checks these will be guarenteed
2236 	 * to be non-NULL.
2237 	 *
2238 	 * Holding the children ncp's should be sufficient to prevent
2239 	 * fdvp and tdvp ripouts.
2240 	 */
2241 	if (fromnd.nl_nch.ncp->nc_parent)
2242 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2243 	else
2244 		fdvp = NULL;
2245 	if (tond.nl_nch.ncp->nc_parent)
2246 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2247 	else
2248 		tdvp = NULL;
2249 
2250 	if (tvp != NULL) {
2251 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2252 			if (info.v3)
2253 				error = EEXIST;
2254 			else
2255 				error = EISDIR;
2256 			goto out;
2257 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2258 			if (info.v3)
2259 				error = EEXIST;
2260 			else
2261 				error = ENOTDIR;
2262 			goto out;
2263 		}
2264 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2265 			if (info.v3)
2266 				error = EXDEV;
2267 			else
2268 				error = ENOTEMPTY;
2269 			goto out;
2270 		}
2271 	}
2272 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2273 		if (info.v3)
2274 			error = EXDEV;
2275 		else
2276 			error = ENOTEMPTY;
2277 		goto out;
2278 	}
2279 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2280 		if (info.v3)
2281 			error = EXDEV;
2282 		else
2283 			error = ENOTEMPTY;
2284 		goto out;
2285 	}
2286 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2287 		if (info.v3)
2288 			error = EINVAL;
2289 		else
2290 			error = ENOTEMPTY;
2291 	}
2292 
2293 	/*
2294 	 * You cannot rename a source into itself or a subdirectory of itself.
2295 	 * We check this by travsering the target directory upwards looking
2296 	 * for a match against the source.
2297 	 */
2298 	if (error == 0) {
2299 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2300 			if (fromnd.nl_nch.ncp == ncp) {
2301 				error = EINVAL;
2302 				break;
2303 			}
2304 		}
2305 	}
2306 
2307 	/*
2308 	 * If source is the same as the destination (that is the
2309 	 * same vnode with the same name in the same directory),
2310 	 * then there is nothing to do.
2311 	 */
2312 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2313 		error = -1;
2314 out:
2315 	if (!error) {
2316 		/*
2317 		 * The VOP_NRENAME function releases all vnode references &
2318 		 * locks prior to returning so we need to clear the pointers
2319 		 * to bypass cleanup code later on.
2320 		 */
2321 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2322 				    fdvp, tdvp, tond.nl_cred);
2323 	} else {
2324 		if (error == -1)
2325 			error = 0;
2326 	}
2327 	/* fall through */
2328 
2329 out1:
2330 	if (fdirp)
2331 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2332 	if (tdirp)
2333 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2334 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2335 			      2 * NFSX_WCCDATA(info.v3), &error));
2336 	if (info.v3) {
2337 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2338 				 fdiraft_ret, &fdiraft);
2339 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2340 				 tdiraft_ret, &tdiraft);
2341 	}
2342 	error = 0;
2343 	/* fall through */
2344 
2345 nfsmout:
2346 	*mrq = info.mreq;
2347 	if (tdirp)
2348 		vrele(tdirp);
2349 	nlookup_done(&tond);
2350 	if (fdirp)
2351 		vrele(fdirp);
2352 	nlookup_done(&fromnd);
2353 	return (error);
2354 }
2355 
2356 /*
2357  * nfs link service
2358  */
2359 int
2360 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2361 	   struct thread *td, struct mbuf **mrq)
2362 {
2363 	struct sockaddr *nam = nfsd->nd_nam;
2364 	struct ucred *cred = &nfsd->nd_cr;
2365 	struct nlookupdata nd;
2366 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2367 	int getret = 1;
2368 	struct vnode *dirp;
2369 	struct vnode *dvp;
2370 	struct vnode *vp;
2371 	struct vnode *xp;
2372 	struct mount *xmp;
2373 	struct vattr dirfor, diraft, at;
2374 	nfsfh_t nfh, dnfh;
2375 	fhandle_t *fhp, *dfhp;
2376 	struct nfsm_info info;
2377 
2378 	info.mrep = nfsd->nd_mrep;
2379 	info.mreq = NULL;
2380 	info.md = nfsd->nd_md;
2381 	info.dpos = nfsd->nd_dpos;
2382 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2383 
2384 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2385 	nlookup_zero(&nd);
2386 	dirp = dvp = vp = xp = NULL;
2387 	xmp = NULL;
2388 
2389 	fhp = &nfh.fh_generic;
2390 	dfhp = &dnfh.fh_generic;
2391 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2392 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2393 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2394 
2395 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2396 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2397 	if (error) {
2398 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2399 				      NFSX_POSTOPATTR(info.v3) +
2400 				      NFSX_WCCDATA(info.v3),
2401 				      &error));
2402 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2403 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2404 				 diraft_ret, &diraft);
2405 		xp = NULL;
2406 		error = 0;
2407 		goto nfsmout;
2408 	}
2409 	if (xp->v_type == VDIR) {
2410 		error = EPERM;		/* POSIX */
2411 		goto out1;
2412 	}
2413 
2414 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2415 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2416 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2417 	if (dirp) {
2418 		if (info.v3)
2419 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2420 	}
2421 	if (error)
2422 		goto out1;
2423 
2424 	if (vp != NULL) {
2425 		error = EEXIST;
2426 		goto out;
2427 	}
2428 	if (xp->v_mount != dvp->v_mount)
2429 		error = EXDEV;
2430 out:
2431 	if (!error) {
2432 		vn_unlock(dvp);
2433 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2434 		vrele(dvp);
2435 		dvp = NULL;
2436 	}
2437 	/* fall through */
2438 
2439 out1:
2440 	if (info.v3)
2441 		getret = VOP_GETATTR(xp, &at);
2442 	if (dirp)
2443 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2444 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2445 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2446 			      &error));
2447 	if (info.v3) {
2448 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2449 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2450 				 diraft_ret, &diraft);
2451 		error = 0;
2452 	}
2453 	/* fall through */
2454 
2455 nfsmout:
2456 	*mrq = info.mreq;
2457 	nlookup_done(&nd);
2458 	if (dirp)
2459 		vrele(dirp);
2460 	if (xp)
2461 		vrele(xp);
2462 	if (dvp) {
2463 		if (dvp == vp)
2464 			vrele(dvp);
2465 		else
2466 			vput(dvp);
2467 	}
2468 	if (vp)
2469 		vput(vp);
2470 	return(error);
2471 }
2472 
2473 /*
2474  * nfs symbolic link service
2475  */
2476 int
2477 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2478 	      struct thread *td, struct mbuf **mrq)
2479 {
2480 	struct sockaddr *nam = nfsd->nd_nam;
2481 	struct ucred *cred = &nfsd->nd_cr;
2482 	struct vattr va, dirfor, diraft;
2483 	struct nlookupdata nd;
2484 	struct vattr *vap = &va;
2485 	struct nfsv2_sattr *sp;
2486 	char *pathcp = NULL;
2487 	struct uio io;
2488 	struct iovec iv;
2489 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2490 	struct vnode *dirp;
2491 	struct vnode *vp;
2492 	struct vnode *dvp;
2493 	nfsfh_t nfh;
2494 	fhandle_t *fhp;
2495 	struct nfsm_info info;
2496 
2497 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2498 	nlookup_zero(&nd);
2499 	dirp = NULL;
2500 	dvp = NULL;
2501 	vp = NULL;
2502 
2503 	info.mrep = nfsd->nd_mrep;
2504 	info.mreq =  NULL;
2505 	info.md = nfsd->nd_md;
2506 	info.dpos = nfsd->nd_dpos;
2507 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2508 
2509 	fhp = &nfh.fh_generic;
2510 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2511 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2512 
2513 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2514 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2515 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2516 	if (dirp) {
2517 		if (info.v3)
2518 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2519 	}
2520 	if (error)
2521 		goto out;
2522 
2523 	VATTR_NULL(vap);
2524 	if (info.v3) {
2525 		ERROROUT(nfsm_srvsattr(&info, vap));
2526 	}
2527 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2528 	pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2529 	iv.iov_base = pathcp;
2530 	iv.iov_len = len2;
2531 	io.uio_resid = len2;
2532 	io.uio_offset = 0;
2533 	io.uio_iov = &iv;
2534 	io.uio_iovcnt = 1;
2535 	io.uio_segflg = UIO_SYSSPACE;
2536 	io.uio_rw = UIO_READ;
2537 	io.uio_td = NULL;
2538 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2539 	if (info.v3 == 0) {
2540 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2541 		vap->va_mode = nfstov_mode(sp->sa_mode);
2542 	}
2543 	*(pathcp + len2) = '\0';
2544 	if (vp) {
2545 		error = EEXIST;
2546 		goto out;
2547 	}
2548 
2549 	if (vap->va_mode == (mode_t)VNOVAL)
2550 		vap->va_mode = 0;
2551 	if (dvp != vp)
2552 		vn_unlock(dvp);
2553 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2554 	vrele(dvp);
2555 	dvp = NULL;
2556 	if (error == 0) {
2557 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2558 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2559 		if (!error)
2560 			error = VOP_GETATTR(vp, vap);
2561 	}
2562 
2563 out:
2564 	if (dvp) {
2565 		if (dvp == vp)
2566 			vrele(dvp);
2567 		else
2568 			vput(dvp);
2569 	}
2570 	if (vp) {
2571 		vput(vp);
2572 		vp = NULL;
2573 	}
2574 	if (pathcp) {
2575 		kfree(pathcp, M_TEMP);
2576 		pathcp = NULL;
2577 	}
2578 	if (dirp) {
2579 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2580 		vrele(dirp);
2581 		dirp = NULL;
2582 	}
2583 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2584 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2585 			      NFSX_WCCDATA(info.v3),
2586 			      &error));
2587 	if (info.v3) {
2588 		if (!error) {
2589 			nfsm_srvpostop_fh(&info, fhp);
2590 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2591 		}
2592 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2593 				 diraft_ret, &diraft);
2594 	}
2595 	error = 0;
2596 	/* fall through */
2597 
2598 nfsmout:
2599 	*mrq = info.mreq;
2600 	nlookup_done(&nd);
2601 	if (vp)
2602 		vput(vp);
2603 	if (dirp)
2604 		vrele(dirp);
2605 	if (pathcp)
2606 		kfree(pathcp, M_TEMP);
2607 	return (error);
2608 }
2609 
2610 /*
2611  * nfs mkdir service
2612  */
2613 int
2614 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2615 	    struct thread *td, struct mbuf **mrq)
2616 {
2617 	struct sockaddr *nam = nfsd->nd_nam;
2618 	struct ucred *cred = &nfsd->nd_cr;
2619 	struct vattr va, dirfor, diraft;
2620 	struct vattr *vap = &va;
2621 	struct nfs_fattr *fp;
2622 	struct nlookupdata nd;
2623 	u_int32_t *tl;
2624 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2625 	struct vnode *dirp;
2626 	struct vnode *dvp;
2627 	struct vnode *vp;
2628 	nfsfh_t nfh;
2629 	fhandle_t *fhp;
2630 	struct nfsm_info info;
2631 
2632 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2633 	nlookup_zero(&nd);
2634 	dirp = NULL;
2635 	dvp = NULL;
2636 	vp = NULL;
2637 
2638 	info.dpos = nfsd->nd_dpos;
2639 	info.mrep = nfsd->nd_mrep;
2640 	info.mreq =  NULL;
2641 	info.md = nfsd->nd_md;
2642 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2643 
2644 	fhp = &nfh.fh_generic;
2645 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2646 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2647 
2648 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2649 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2650 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2651 	if (dirp) {
2652 		if (info.v3)
2653 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2654 	}
2655 	if (error) {
2656 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2657 				      NFSX_WCCDATA(info.v3), &error));
2658 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2659 				 diraft_ret, &diraft);
2660 		error = 0;
2661 		goto nfsmout;
2662 	}
2663 	VATTR_NULL(vap);
2664 	if (info.v3) {
2665 		ERROROUT(nfsm_srvsattr(&info, vap));
2666 	} else {
2667 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2668 		vap->va_mode = nfstov_mode(*tl++);
2669 	}
2670 
2671 	/*
2672 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2673 	 * nd.ni_vp, if it exists, is referenced but not locked.
2674 	 */
2675 
2676 	vap->va_type = VDIR;
2677 	if (vp != NULL) {
2678 		error = EEXIST;
2679 		goto out;
2680 	}
2681 
2682 	/*
2683 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2684 	 * component is freed by the VOP call.  This will fill-in
2685 	 * nd.ni_vp, reference, and exclusively lock it.
2686 	 */
2687 	if (vap->va_mode == (mode_t)VNOVAL)
2688 		vap->va_mode = 0;
2689 	vn_unlock(dvp);
2690 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2691 	vrele(dvp);
2692 	dvp = NULL;
2693 
2694 	if (error == 0) {
2695 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2696 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2697 		if (error == 0)
2698 			error = VOP_GETATTR(vp, vap);
2699 	}
2700 out:
2701 	if (dirp)
2702 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2703 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2704 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2705 			      NFSX_WCCDATA(info.v3),
2706 			      &error));
2707 	if (info.v3) {
2708 		if (!error) {
2709 			nfsm_srvpostop_fh(&info, fhp);
2710 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2711 		}
2712 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2713 				 diraft_ret, &diraft);
2714 	} else {
2715 		nfsm_srvfhtom(&info, fhp);
2716 		fp = nfsm_build(&info, NFSX_V2FATTR);
2717 		nfsm_srvfattr(nfsd, vap, fp);
2718 	}
2719 	error = 0;
2720 	/* fall through */
2721 
2722 nfsmout:
2723 	*mrq = info.mreq;
2724 	nlookup_done(&nd);
2725 	if (dirp)
2726 		vrele(dirp);
2727 	if (dvp) {
2728 		if (dvp == vp)
2729 			vrele(dvp);
2730 		else
2731 			vput(dvp);
2732 	}
2733 	if (vp)
2734 		vput(vp);
2735 	return (error);
2736 }
2737 
2738 /*
2739  * nfs rmdir service
2740  */
2741 int
2742 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2743 	    struct thread *td, struct mbuf **mrq)
2744 {
2745 	struct sockaddr *nam = nfsd->nd_nam;
2746 	struct ucred *cred = &nfsd->nd_cr;
2747 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2748 	struct vnode *dirp;
2749 	struct vnode *dvp;
2750 	struct vnode *vp;
2751 	struct vattr dirfor, diraft;
2752 	nfsfh_t nfh;
2753 	fhandle_t *fhp;
2754 	struct nlookupdata nd;
2755 	struct nfsm_info info;
2756 
2757 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2758 	nlookup_zero(&nd);
2759 	dirp = NULL;
2760 	dvp = NULL;
2761 	vp = NULL;
2762 
2763 	info.mrep = nfsd->nd_mrep;
2764 	info.mreq = NULL;
2765 	info.md = nfsd->nd_md;
2766 	info.dpos = nfsd->nd_dpos;
2767 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2768 
2769 	fhp = &nfh.fh_generic;
2770 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2771 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2772 
2773 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2774 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2775 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2776 	if (dirp) {
2777 		if (info.v3)
2778 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2779 	}
2780 	if (error) {
2781 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2782 				      NFSX_WCCDATA(info.v3), &error));
2783 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2784 				 diraft_ret, &diraft);
2785 		error = 0;
2786 		goto nfsmout;
2787 	}
2788 	if (vp->v_type != VDIR) {
2789 		error = ENOTDIR;
2790 		goto out;
2791 	}
2792 
2793 	/*
2794 	 * The root of a mounted filesystem cannot be deleted.
2795 	 */
2796 	if (vp->v_flag & VROOT)
2797 		error = EBUSY;
2798 out:
2799 	/*
2800 	 * Issue or abort op.  Since SAVESTART is not set, path name
2801 	 * component is freed by the VOP after either.
2802 	 */
2803 	if (!error) {
2804 		if (dvp != vp)
2805 			vn_unlock(dvp);
2806 		vput(vp);
2807 		vp = NULL;
2808 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2809 		vrele(dvp);
2810 		dvp = NULL;
2811 	}
2812 	nlookup_done(&nd);
2813 
2814 	if (dirp)
2815 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2816 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2817 	if (info.v3) {
2818 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2819 				 diraft_ret, &diraft);
2820 		error = 0;
2821 	}
2822 	/* fall through */
2823 
2824 nfsmout:
2825 	*mrq = info.mreq;
2826 	if (dvp) {
2827 		if (dvp == vp)
2828 			vrele(dvp);
2829 		else
2830 			vput(dvp);
2831 	}
2832 	nlookup_done(&nd);
2833 	if (dirp)
2834 		vrele(dirp);
2835 	if (vp)
2836 		vput(vp);
2837 	return(error);
2838 }
2839 
2840 /*
2841  * nfs readdir service
2842  * - mallocs what it thinks is enough to read
2843  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2844  * - calls VOP_READDIR()
2845  * - loops around building the reply
2846  *	if the output generated exceeds count break out of loop
2847  *	The nfsm_clget macro is used here so that the reply will be packed
2848  *	tightly in mbuf clusters.
2849  * - it only knows that it has encountered eof when the VOP_READDIR()
2850  *	reads nothing
2851  * - as such one readdir rpc will return eof false although you are there
2852  *	and then the next will return eof
2853  * - it trims out records with d_fileno == 0
2854  *	this doesn't matter for Unix clients, but they might confuse clients
2855  *	for other os'.
2856  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2857  *	than requested, but this may not apply to all filesystems. For
2858  *	example, client NFS does not { although it is never remote mounted
2859  *	anyhow }
2860  *     The alternate call nfsrv_readdirplus() does lookups as well.
2861  * PS: The NFS protocol spec. does not clarify what the "count" byte
2862  *	argument is a count of.. just name strings and file id's or the
2863  *	entire reply rpc or ...
2864  *	I tried just file name and id sizes and it confused the Sun client,
2865  *	so I am using the full rpc size now. The "paranoia.." comment refers
2866  *	to including the status longwords that are not a part of the dir.
2867  *	"entry" structures, but are in the rpc.
2868  */
2869 struct flrep {
2870 	nfsuint64	fl_off;
2871 	u_int32_t	fl_postopok;
2872 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2873 	u_int32_t	fl_fhok;
2874 	u_int32_t	fl_fhsize;
2875 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2876 };
2877 
2878 int
2879 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2880 	      struct thread *td, struct mbuf **mrq)
2881 {
2882 	struct sockaddr *nam = nfsd->nd_nam;
2883 	struct ucred *cred = &nfsd->nd_cr;
2884 	char *bp, *be;
2885 	struct dirent *dp;
2886 	caddr_t cp;
2887 	u_int32_t *tl;
2888 	struct mbuf *mp1, *mp2;
2889 	char *cpos, *cend, *rbuf;
2890 	struct vnode *vp = NULL;
2891 	struct mount *mp = NULL;
2892 	struct vattr at;
2893 	nfsfh_t nfh;
2894 	fhandle_t *fhp;
2895 	struct uio io;
2896 	struct iovec iv;
2897 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2898 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2899 	u_quad_t off, toff;
2900 #if 0
2901 	u_quad_t verf;
2902 #endif
2903 	off_t *cookies = NULL, *cookiep;
2904 	struct nfsm_info info;
2905 
2906 	info.mrep = nfsd->nd_mrep;
2907 	info.mreq = NULL;
2908 	info.md = nfsd->nd_md;
2909 	info.dpos = nfsd->nd_dpos;
2910 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2911 
2912 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2913 	fhp = &nfh.fh_generic;
2914 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2915 	if (info.v3) {
2916 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2917 		toff = fxdr_hyper(tl);
2918 		tl += 2;
2919 #if 0
2920 		verf = fxdr_hyper(tl);
2921 #endif
2922 		tl += 2;
2923 	} else {
2924 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2925 		toff = fxdr_unsigned(u_quad_t, *tl++);
2926 #if 0
2927 		verf = 0;	/* shut up gcc */
2928 #endif
2929 	}
2930 	off = toff;
2931 	cnt = fxdr_unsigned(int, *tl);
2932 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2933 	xfer = NFS_SRVMAXDATA(nfsd);
2934 	if ((unsigned)cnt > xfer)
2935 		cnt = xfer;
2936 	if ((unsigned)siz > xfer)
2937 		siz = xfer;
2938 	fullsiz = siz;
2939 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2940 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2941 	if (!error && vp->v_type != VDIR) {
2942 		error = ENOTDIR;
2943 		vput(vp);
2944 		vp = NULL;
2945 	}
2946 	if (error) {
2947 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2948 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2949 		error = 0;
2950 		goto nfsmout;
2951 	}
2952 
2953 	/*
2954 	 * Obtain lock on vnode for this section of the code
2955 	 */
2956 
2957 	if (info.v3) {
2958 		error = getret = VOP_GETATTR(vp, &at);
2959 #if 0
2960 		/*
2961 		 * XXX This check may be too strict for Solaris 2.5 clients.
2962 		 */
2963 		if (!error && toff && verf && verf != at.va_filerev)
2964 			error = NFSERR_BAD_COOKIE;
2965 #endif
2966 	}
2967 	if (!error)
2968 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2969 	if (error) {
2970 		vput(vp);
2971 		vp = NULL;
2972 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2973 				      NFSX_POSTOPATTR(info.v3), &error));
2974 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2975 		error = 0;
2976 		goto nfsmout;
2977 	}
2978 	vn_unlock(vp);
2979 
2980 	/*
2981 	 * end section.  Allocate rbuf and continue
2982 	 */
2983 	rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2984 again:
2985 	iv.iov_base = rbuf;
2986 	iv.iov_len = fullsiz;
2987 	io.uio_iov = &iv;
2988 	io.uio_iovcnt = 1;
2989 	io.uio_offset = (off_t)off;
2990 	io.uio_resid = fullsiz;
2991 	io.uio_segflg = UIO_SYSSPACE;
2992 	io.uio_rw = UIO_READ;
2993 	io.uio_td = NULL;
2994 	eofflag = 0;
2995 	if (cookies) {
2996 		kfree((caddr_t)cookies, M_TEMP);
2997 		cookies = NULL;
2998 	}
2999 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3000 	off = (off_t)io.uio_offset;
3001 	if (!cookies && !error)
3002 		error = NFSERR_PERM;
3003 	if (info.v3) {
3004 		getret = VOP_GETATTR(vp, &at);
3005 		if (!error)
3006 			error = getret;
3007 	}
3008 	if (error) {
3009 		vrele(vp);
3010 		vp = NULL;
3011 		kfree((caddr_t)rbuf, M_TEMP);
3012 		if (cookies)
3013 			kfree((caddr_t)cookies, M_TEMP);
3014 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3015 				      NFSX_POSTOPATTR(info.v3), &error));
3016 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3017 		error = 0;
3018 		goto nfsmout;
3019 	}
3020 	if (io.uio_resid) {
3021 		siz -= io.uio_resid;
3022 
3023 		/*
3024 		 * If nothing read, return eof
3025 		 * rpc reply
3026 		 */
3027 		if (siz == 0) {
3028 			vrele(vp);
3029 			vp = NULL;
3030 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3031 					      NFSX_POSTOPATTR(info.v3) +
3032 					      NFSX_COOKIEVERF(info.v3) +
3033 					      2 * NFSX_UNSIGNED,
3034 					      &error));
3035 			if (info.v3) {
3036 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3037 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3038 				txdr_hyper(at.va_filerev, tl);
3039 				tl += 2;
3040 			} else
3041 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3042 			*tl++ = nfs_false;
3043 			*tl = nfs_true;
3044 			kfree((caddr_t)rbuf, M_TEMP);
3045 			kfree((caddr_t)cookies, M_TEMP);
3046 			error = 0;
3047 			goto nfsmout;
3048 		}
3049 	}
3050 
3051 	/*
3052 	 * Check for degenerate cases of nothing useful read.
3053 	 * If so go try again
3054 	 */
3055 	cpos = rbuf;
3056 	cend = rbuf + siz;
3057 	dp = (struct dirent *)cpos;
3058 	cookiep = cookies;
3059 	/*
3060 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3061 	 * directory offset up to a block boundary, so it is necessary to
3062 	 * skip over the records that preceed the requested offset. This
3063 	 * requires the assumption that file offset cookies monotonically
3064 	 * increase.
3065 	 */
3066 	while (cpos < cend && ncookies > 0 &&
3067 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3068 		 ((u_quad_t)(*cookiep)) <= toff)) {
3069 		dp = _DIRENT_NEXT(dp);
3070 		cpos = (char *)dp;
3071 		cookiep++;
3072 		ncookies--;
3073 	}
3074 	if (cpos >= cend || ncookies == 0) {
3075 		toff = off;
3076 		siz = fullsiz;
3077 		goto again;
3078 	}
3079 
3080 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3081 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3082 			      NFSX_POSTOPATTR(info.v3) +
3083 			      NFSX_COOKIEVERF(info.v3) + siz,
3084 			      &error));
3085 	if (info.v3) {
3086 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3087 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3088 		txdr_hyper(at.va_filerev, tl);
3089 	}
3090 	mp1 = mp2 = info.mb;
3091 	bp = info.bpos;
3092 	be = bp + M_TRAILINGSPACE(mp1);
3093 
3094 	/* Loop through the records and build reply */
3095 	while (cpos < cend && ncookies > 0) {
3096 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3097 			nlen = dp->d_namlen;
3098 			rem = nfsm_rndup(nlen) - nlen;
3099 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3100 			if (info.v3)
3101 				len += 2 * NFSX_UNSIGNED;
3102 			if (len > cnt) {
3103 				eofflag = 0;
3104 				break;
3105 			}
3106 			/*
3107 			 * Build the directory record xdr from
3108 			 * the dirent entry.
3109 			 */
3110 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3111 			*tl = nfs_true;
3112 			bp += NFSX_UNSIGNED;
3113 			if (info.v3) {
3114 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3115 				*tl = txdr_unsigned(dp->d_ino >> 32);
3116 				bp += NFSX_UNSIGNED;
3117 			}
3118 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3119 			*tl = txdr_unsigned(dp->d_ino);
3120 			bp += NFSX_UNSIGNED;
3121 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3122 			*tl = txdr_unsigned(nlen);
3123 			bp += NFSX_UNSIGNED;
3124 
3125 			/* And loop around copying the name */
3126 			xfer = nlen;
3127 			cp = dp->d_name;
3128 			while (xfer > 0) {
3129 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3130 				if ((bp+xfer) > be)
3131 					tsiz = be-bp;
3132 				else
3133 					tsiz = xfer;
3134 				bcopy(cp, bp, tsiz);
3135 				bp += tsiz;
3136 				xfer -= tsiz;
3137 				if (xfer > 0)
3138 					cp += tsiz;
3139 			}
3140 			/* And null pad to a int32_t boundary */
3141 			for (i = 0; i < rem; i++)
3142 				*bp++ = '\0';
3143 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3144 
3145 			/* Finish off the record */
3146 			if (info.v3) {
3147 				*tl = txdr_unsigned(*cookiep >> 32);
3148 				bp += NFSX_UNSIGNED;
3149 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3150 			}
3151 			*tl = txdr_unsigned(*cookiep);
3152 			bp += NFSX_UNSIGNED;
3153 		}
3154 		dp = _DIRENT_NEXT(dp);
3155 		cpos = (char *)dp;
3156 		cookiep++;
3157 		ncookies--;
3158 	}
3159 	vrele(vp);
3160 	vp = NULL;
3161 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3162 	*tl = nfs_false;
3163 	bp += NFSX_UNSIGNED;
3164 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3165 	if (eofflag)
3166 		*tl = nfs_true;
3167 	else
3168 		*tl = nfs_false;
3169 	bp += NFSX_UNSIGNED;
3170 	if (mp1 != info.mb) {
3171 		if (bp < be)
3172 			mp1->m_len = bp - mtod(mp1, caddr_t);
3173 	} else
3174 		mp1->m_len += bp - info.bpos;
3175 	kfree((caddr_t)rbuf, M_TEMP);
3176 	kfree((caddr_t)cookies, M_TEMP);
3177 
3178 nfsmout:
3179 	*mrq = info.mreq;
3180 	if (vp)
3181 		vrele(vp);
3182 	return(error);
3183 }
3184 
3185 int
3186 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3187 		  struct thread *td, struct mbuf **mrq)
3188 {
3189 	struct sockaddr *nam = nfsd->nd_nam;
3190 	struct ucred *cred = &nfsd->nd_cr;
3191 	char *bp, *be;
3192 	struct dirent *dp;
3193 	caddr_t cp;
3194 	u_int32_t *tl;
3195 	struct mbuf *mp1, *mp2;
3196 	char *cpos, *cend, *rbuf;
3197 	struct vnode *vp = NULL, *nvp;
3198 	struct mount *mp = NULL;
3199 	struct flrep fl;
3200 	nfsfh_t nfh;
3201 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3202 	struct uio io;
3203 	struct iovec iv;
3204 	struct vattr va, at, *vap = &va;
3205 	struct nfs_fattr *fp;
3206 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3207 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3208 	u_quad_t off, toff;
3209 #if 0
3210 	u_quad_t verf;
3211 #endif
3212 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3213 	struct nfsm_info info;
3214 
3215 	info.mrep = nfsd->nd_mrep;
3216 	info.mreq = NULL;
3217 	info.md = nfsd->nd_md;
3218 	info.dpos = nfsd->nd_dpos;
3219 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3220 
3221 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3222 	fhp = &nfh.fh_generic;
3223 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3224 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3225 	toff = fxdr_hyper(tl);
3226 	tl += 2;
3227 #if 0
3228 	verf = fxdr_hyper(tl);
3229 #endif
3230 	tl += 2;
3231 	siz = fxdr_unsigned(int, *tl++);
3232 	cnt = fxdr_unsigned(int, *tl);
3233 	off = toff;
3234 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3235 	xfer = NFS_SRVMAXDATA(nfsd);
3236 	if ((unsigned)cnt > xfer)
3237 		cnt = xfer;
3238 	if ((unsigned)siz > xfer)
3239 		siz = xfer;
3240 	fullsiz = siz;
3241 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3242 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3243 	if (!error && vp->v_type != VDIR) {
3244 		error = ENOTDIR;
3245 		vput(vp);
3246 		vp = NULL;
3247 	}
3248 	if (error) {
3249 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3250 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3251 		error = 0;
3252 		goto nfsmout;
3253 	}
3254 	error = getret = VOP_GETATTR(vp, &at);
3255 #if 0
3256 	/*
3257 	 * XXX This check may be too strict for Solaris 2.5 clients.
3258 	 */
3259 	if (!error && toff && verf && verf != at.va_filerev)
3260 		error = NFSERR_BAD_COOKIE;
3261 #endif
3262 	if (!error) {
3263 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3264 	}
3265 	if (error) {
3266 		vput(vp);
3267 		vp = NULL;
3268 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3269 				      NFSX_V3POSTOPATTR, &error));
3270 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3271 		error = 0;
3272 		goto nfsmout;
3273 	}
3274 	vn_unlock(vp);
3275 	rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3276 again:
3277 	iv.iov_base = rbuf;
3278 	iv.iov_len = fullsiz;
3279 	io.uio_iov = &iv;
3280 	io.uio_iovcnt = 1;
3281 	io.uio_offset = (off_t)off;
3282 	io.uio_resid = fullsiz;
3283 	io.uio_segflg = UIO_SYSSPACE;
3284 	io.uio_rw = UIO_READ;
3285 	io.uio_td = NULL;
3286 	eofflag = 0;
3287 	if (cookies) {
3288 		kfree((caddr_t)cookies, M_TEMP);
3289 		cookies = NULL;
3290 	}
3291 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3292 	off = (u_quad_t)io.uio_offset;
3293 	getret = VOP_GETATTR(vp, &at);
3294 	if (!cookies && !error)
3295 		error = NFSERR_PERM;
3296 	if (!error)
3297 		error = getret;
3298 	if (error) {
3299 		vrele(vp);
3300 		vp = NULL;
3301 		if (cookies)
3302 			kfree((caddr_t)cookies, M_TEMP);
3303 		kfree((caddr_t)rbuf, M_TEMP);
3304 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3305 				      NFSX_V3POSTOPATTR, &error));
3306 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3307 		error = 0;
3308 		goto nfsmout;
3309 	}
3310 	if (io.uio_resid) {
3311 		siz -= io.uio_resid;
3312 
3313 		/*
3314 		 * If nothing read, return eof
3315 		 * rpc reply
3316 		 */
3317 		if (siz == 0) {
3318 			vrele(vp);
3319 			vp = NULL;
3320 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3321 					      NFSX_V3POSTOPATTR +
3322 					      NFSX_V3COOKIEVERF +
3323 					      2 * NFSX_UNSIGNED,
3324 					      &error));
3325 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3326 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3327 			txdr_hyper(at.va_filerev, tl);
3328 			tl += 2;
3329 			*tl++ = nfs_false;
3330 			*tl = nfs_true;
3331 			kfree((caddr_t)cookies, M_TEMP);
3332 			kfree((caddr_t)rbuf, M_TEMP);
3333 			error = 0;
3334 			goto nfsmout;
3335 		}
3336 	}
3337 
3338 	/*
3339 	 * Check for degenerate cases of nothing useful read.
3340 	 * If so go try again
3341 	 */
3342 	cpos = rbuf;
3343 	cend = rbuf + siz;
3344 	dp = (struct dirent *)cpos;
3345 	cookiep = cookies;
3346 	/*
3347 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3348 	 * directory offset up to a block boundary, so it is necessary to
3349 	 * skip over the records that preceed the requested offset. This
3350 	 * requires the assumption that file offset cookies monotonically
3351 	 * increase.
3352 	 */
3353 	while (cpos < cend && ncookies > 0 &&
3354 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3355 		 ((u_quad_t)(*cookiep)) <= toff)) {
3356 		dp = _DIRENT_NEXT(dp);
3357 		cpos = (char *)dp;
3358 		cookiep++;
3359 		ncookies--;
3360 	}
3361 	if (cpos >= cend || ncookies == 0) {
3362 		toff = off;
3363 		siz = fullsiz;
3364 		goto again;
3365 	}
3366 
3367 	/*
3368 	 * Probe one of the directory entries to see if the filesystem
3369 	 * supports VGET.
3370 	 */
3371 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3372 		error = NFSERR_NOTSUPP;
3373 		vrele(vp);
3374 		vp = NULL;
3375 		kfree((caddr_t)cookies, M_TEMP);
3376 		kfree((caddr_t)rbuf, M_TEMP);
3377 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3378 				      NFSX_V3POSTOPATTR, &error));
3379 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3380 		error = 0;
3381 		goto nfsmout;
3382 	}
3383 	if (nvp) {
3384 		vput(nvp);
3385 		nvp = NULL;
3386 	}
3387 
3388 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3389 			2 * NFSX_UNSIGNED;
3390 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3391 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3392 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3393 	txdr_hyper(at.va_filerev, tl);
3394 	mp1 = mp2 = info.mb;
3395 	bp = info.bpos;
3396 	be = bp + M_TRAILINGSPACE(mp1);
3397 
3398 	/* Loop through the records and build reply */
3399 	while (cpos < cend && ncookies > 0) {
3400 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3401 			nlen = dp->d_namlen;
3402 			rem = nfsm_rndup(nlen) - nlen;
3403 
3404 			/*
3405 			 * For readdir_and_lookup get the vnode using
3406 			 * the file number.
3407 			 */
3408 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3409 				goto invalid;
3410 			bzero((caddr_t)nfhp, NFSX_V3FH);
3411 			nfhp->fh_fsid = fhp->fh_fsid;
3412 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3413 				vput(nvp);
3414 				nvp = NULL;
3415 				goto invalid;
3416 			}
3417 			if (VOP_GETATTR(nvp, vap)) {
3418 				vput(nvp);
3419 				nvp = NULL;
3420 				goto invalid;
3421 			}
3422 			vput(nvp);
3423 			nvp = NULL;
3424 
3425 			/*
3426 			 * If either the dircount or maxcount will be
3427 			 * exceeded, get out now. Both of these lengths
3428 			 * are calculated conservatively, including all
3429 			 * XDR overheads.
3430 			 */
3431 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3432 				NFSX_V3POSTOPATTR);
3433 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3434 			if (len > cnt || dirlen > fullsiz) {
3435 				eofflag = 0;
3436 				break;
3437 			}
3438 
3439 			/*
3440 			 * Build the directory record xdr from
3441 			 * the dirent entry.
3442 			 */
3443 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3444 			nfsm_srvfattr(nfsd, vap, fp);
3445 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3446 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3447 			fl.fl_postopok = nfs_true;
3448 			fl.fl_fhok = nfs_true;
3449 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3450 
3451 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3452 			*tl = nfs_true;
3453 			bp += NFSX_UNSIGNED;
3454 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3455 			*tl = txdr_unsigned(dp->d_ino >> 32);
3456 			bp += NFSX_UNSIGNED;
3457 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3458 			*tl = txdr_unsigned(dp->d_ino);
3459 			bp += NFSX_UNSIGNED;
3460 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3461 			*tl = txdr_unsigned(nlen);
3462 			bp += NFSX_UNSIGNED;
3463 
3464 			/* And loop around copying the name */
3465 			xfer = nlen;
3466 			cp = dp->d_name;
3467 			while (xfer > 0) {
3468 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3469 				if ((bp + xfer) > be)
3470 					tsiz = be - bp;
3471 				else
3472 					tsiz = xfer;
3473 				bcopy(cp, bp, tsiz);
3474 				bp += tsiz;
3475 				xfer -= tsiz;
3476 				cp += tsiz;
3477 			}
3478 			/* And null pad to a int32_t boundary */
3479 			for (i = 0; i < rem; i++)
3480 				*bp++ = '\0';
3481 
3482 			/*
3483 			 * Now copy the flrep structure out.
3484 			 */
3485 			xfer = sizeof (struct flrep);
3486 			cp = (caddr_t)&fl;
3487 			while (xfer > 0) {
3488 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3489 				if ((bp + xfer) > be)
3490 					tsiz = be - bp;
3491 				else
3492 					tsiz = xfer;
3493 				bcopy(cp, bp, tsiz);
3494 				bp += tsiz;
3495 				xfer -= tsiz;
3496 				cp += tsiz;
3497 			}
3498 		}
3499 invalid:
3500 		dp = _DIRENT_NEXT(dp);
3501 		cpos = (char *)dp;
3502 		cookiep++;
3503 		ncookies--;
3504 	}
3505 	vrele(vp);
3506 	vp = NULL;
3507 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3508 	*tl = nfs_false;
3509 	bp += NFSX_UNSIGNED;
3510 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3511 	if (eofflag)
3512 		*tl = nfs_true;
3513 	else
3514 		*tl = nfs_false;
3515 	bp += NFSX_UNSIGNED;
3516 	if (mp1 != info.mb) {
3517 		if (bp < be)
3518 			mp1->m_len = bp - mtod(mp1, caddr_t);
3519 	} else
3520 		mp1->m_len += bp - info.bpos;
3521 	kfree((caddr_t)cookies, M_TEMP);
3522 	kfree((caddr_t)rbuf, M_TEMP);
3523 nfsmout:
3524 	*mrq = info.mreq;
3525 	if (vp)
3526 		vrele(vp);
3527 	return(error);
3528 }
3529 
3530 /*
3531  * nfs commit service
3532  */
3533 int
3534 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3535 	     struct thread *td, struct mbuf **mrq)
3536 {
3537 	struct sockaddr *nam = nfsd->nd_nam;
3538 	struct ucred *cred = &nfsd->nd_cr;
3539 	struct vattr bfor, aft;
3540 	struct vnode *vp = NULL;
3541 	struct mount *mp = NULL;
3542 	nfsfh_t nfh;
3543 	fhandle_t *fhp;
3544 	u_int32_t *tl;
3545 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3546 	u_quad_t off;
3547 	struct nfsm_info info;
3548 
3549 	info.mrep = nfsd->nd_mrep;
3550 	info.mreq = NULL;
3551 	info.md = nfsd->nd_md;
3552 	info.dpos = nfsd->nd_dpos;
3553 
3554 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3555 	fhp = &nfh.fh_generic;
3556 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3557 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3558 
3559 	/*
3560 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3561 	 * count parameters, so these arguments are useless (someday maybe).
3562 	 */
3563 	off = fxdr_hyper(tl);
3564 	tl += 2;
3565 	cnt = fxdr_unsigned(int, *tl);
3566 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3567 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3568 	if (error) {
3569 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3570 				      2 * NFSX_UNSIGNED, &error));
3571 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3572 				 aft_ret, &aft);
3573 		error = 0;
3574 		goto nfsmout;
3575 	}
3576 	for_ret = VOP_GETATTR(vp, &bfor);
3577 
3578 	/*
3579 	 * RFC 1813 3.3.21: If count is 0, a flush from offset to the end of
3580 	 * file is done. At this time VOP_FSYNC does not accept offset and
3581 	 * byte count parameters, so call VOP_FSYNC the whole file for now.
3582 	 */
3583 	if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
3584 		/*
3585 		 * Give up and do the whole thing
3586 		 */
3587 		if (vp->v_object &&
3588 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3589 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3590 		}
3591 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3592 	} else {
3593 		/*
3594 		 * Locate and synchronously write any buffers that fall
3595 		 * into the requested range.  Note:  we are assuming that
3596 		 * f_iosize is a power of 2.
3597 		 */
3598 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3599 		int iomask = iosize - 1;
3600 		off_t loffset;
3601 
3602 		/*
3603 		 * Align to iosize boundry, super-align to page boundry.
3604 		 */
3605 		if (off & iomask) {
3606 			cnt += off & iomask;
3607 			off &= ~(u_quad_t)iomask;
3608 		}
3609 		if (off & PAGE_MASK) {
3610 			cnt += off & PAGE_MASK;
3611 			off &= ~(u_quad_t)PAGE_MASK;
3612 		}
3613 		loffset = off;
3614 
3615 		if (vp->v_object &&
3616 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3617 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3618 			    (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3619 		}
3620 
3621 		crit_enter();
3622 		while (error == 0 || cnt > 0) {
3623 			struct buf *bp;
3624 
3625 			/*
3626 			 * If we have a buffer and it is marked B_DELWRI we
3627 			 * have to lock and write it.  Otherwise the prior
3628 			 * write is assumed to have already been committed.
3629 			 *
3630 			 * WARNING: FINDBLK_TEST buffers represent stable
3631 			 *	    storage but not necessarily stable
3632 			 *	    content.  It is ok in this case.
3633 			 */
3634 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3635 				if (bp->b_flags & B_DELWRI)
3636 					bp = findblk(vp, loffset, 0);
3637 				else
3638 					bp = NULL;
3639 			}
3640 			if (bp) {
3641 				if (bp->b_flags & B_DELWRI) {
3642 					bremfree(bp);
3643 					error = bwrite(bp);
3644 					++nfs_commit_miss;
3645 				} else {
3646 					BUF_UNLOCK(bp);
3647 				}
3648 			}
3649 			++nfs_commit_blks;
3650 			if (cnt < iosize)
3651 				break;
3652 			cnt -= iosize;
3653 			loffset += iosize;
3654 		}
3655 		crit_exit();
3656 	}
3657 
3658 	aft_ret = VOP_GETATTR(vp, &aft);
3659 	vput(vp);
3660 	vp = NULL;
3661 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3662 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3663 			      &error));
3664 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3665 			 aft_ret, &aft);
3666 	if (!error) {
3667 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3668 		if (nfsver.tv_sec == 0)
3669 			nfsver = boottime;
3670 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3671 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3672 	} else {
3673 		error = 0;
3674 	}
3675 nfsmout:
3676 	*mrq = info.mreq;
3677 	if (vp)
3678 		vput(vp);
3679 	return(error);
3680 }
3681 
3682 /*
3683  * nfs statfs service
3684  */
3685 int
3686 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3687 	     struct thread *td, struct mbuf **mrq)
3688 {
3689 	struct sockaddr *nam = nfsd->nd_nam;
3690 	struct ucred *cred = &nfsd->nd_cr;
3691 	struct statfs *sf;
3692 	struct nfs_statfs *sfp;
3693 	int error = 0, rdonly, getret = 1;
3694 	struct vnode *vp = NULL;
3695 	struct mount *mp = NULL;
3696 	struct vattr at;
3697 	nfsfh_t nfh;
3698 	fhandle_t *fhp;
3699 	struct statfs statfs;
3700 	u_quad_t tval;
3701 	struct nfsm_info info;
3702 
3703 	info.mrep = nfsd->nd_mrep;
3704 	info.mreq = NULL;
3705 	info.md = nfsd->nd_md;
3706 	info.dpos = nfsd->nd_dpos;
3707 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3708 
3709 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3710 	fhp = &nfh.fh_generic;
3711 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3712 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3713 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3714 	if (error) {
3715 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3716 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3717 		error = 0;
3718 		goto nfsmout;
3719 	}
3720 	sf = &statfs;
3721 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3722 	getret = VOP_GETATTR(vp, &at);
3723 	vput(vp);
3724 	vp = NULL;
3725 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3726 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3727 			      &error));
3728 	if (info.v3)
3729 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3730 	if (error) {
3731 		error = 0;
3732 		goto nfsmout;
3733 	}
3734 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3735 	if (info.v3) {
3736 		tval = (u_quad_t)sf->f_blocks;
3737 		tval *= (u_quad_t)sf->f_bsize;
3738 		txdr_hyper(tval, &sfp->sf_tbytes);
3739 		tval = (u_quad_t)sf->f_bfree;
3740 		tval *= (u_quad_t)sf->f_bsize;
3741 		txdr_hyper(tval, &sfp->sf_fbytes);
3742 		tval = (u_quad_t)sf->f_bavail;
3743 		tval *= (u_quad_t)sf->f_bsize;
3744 		txdr_hyper(tval, &sfp->sf_abytes);
3745 		sfp->sf_tfiles.nfsuquad[0] = 0;
3746 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3747 		sfp->sf_ffiles.nfsuquad[0] = 0;
3748 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3749 		sfp->sf_afiles.nfsuquad[0] = 0;
3750 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3751 		sfp->sf_invarsec = 0;
3752 	} else {
3753 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3754 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3755 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3756 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3757 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3758 	}
3759 nfsmout:
3760 	*mrq = info.mreq;
3761 	if (vp)
3762 		vput(vp);
3763 	return(error);
3764 }
3765 
3766 /*
3767  * nfs fsinfo service
3768  */
3769 int
3770 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3771 	     struct thread *td, struct mbuf **mrq)
3772 {
3773 	struct sockaddr *nam = nfsd->nd_nam;
3774 	struct ucred *cred = &nfsd->nd_cr;
3775 	struct nfsv3_fsinfo *sip;
3776 	int error = 0, rdonly, getret = 1, pref;
3777 	struct vnode *vp = NULL;
3778 	struct mount *mp = NULL;
3779 	struct vattr at;
3780 	nfsfh_t nfh;
3781 	fhandle_t *fhp;
3782 	u_quad_t maxfsize;
3783 	struct statfs sb;
3784 	struct nfsm_info info;
3785 
3786 	info.mrep = nfsd->nd_mrep;
3787 	info.mreq = NULL;
3788 	info.md = nfsd->nd_md;
3789 	info.dpos = nfsd->nd_dpos;
3790 
3791 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3792 	fhp = &nfh.fh_generic;
3793 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3794 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3795 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3796 	if (error) {
3797 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3798 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3799 		error = 0;
3800 		goto nfsmout;
3801 	}
3802 
3803 	/* XXX Try to make a guess on the max file size. */
3804 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3805 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3806 
3807 	getret = VOP_GETATTR(vp, &at);
3808 	vput(vp);
3809 	vp = NULL;
3810 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3811 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3812 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3813 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3814 
3815 	/*
3816 	 * XXX
3817 	 * There should be file system VFS OP(s) to get this information.
3818 	 * For now, assume ufs.
3819 	 */
3820 	if (slp->ns_so->so_type == SOCK_DGRAM)
3821 		pref = NFS_MAXDGRAMDATA;
3822 	else
3823 		pref = NFS_MAXDATA;
3824 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3825 	sip->fs_rtpref = txdr_unsigned(pref);
3826 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3827 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3828 	sip->fs_wtpref = txdr_unsigned(pref);
3829 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3830 	sip->fs_dtpref = txdr_unsigned(pref);
3831 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3832 	sip->fs_timedelta.nfsv3_sec = 0;
3833 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3834 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3835 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3836 		NFSV3FSINFO_CANSETTIME);
3837 nfsmout:
3838 	*mrq = info.mreq;
3839 	if (vp)
3840 		vput(vp);
3841 	return(error);
3842 }
3843 
3844 /*
3845  * nfs pathconf service
3846  */
3847 int
3848 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3849 	       struct thread *td, struct mbuf **mrq)
3850 {
3851 	struct sockaddr *nam = nfsd->nd_nam;
3852 	struct ucred *cred = &nfsd->nd_cr;
3853 	struct nfsv3_pathconf *pc;
3854 	int error = 0, rdonly, getret = 1;
3855 	register_t linkmax, namemax, chownres, notrunc;
3856 	struct vnode *vp = NULL;
3857 	struct mount *mp = NULL;
3858 	struct vattr at;
3859 	nfsfh_t nfh;
3860 	fhandle_t *fhp;
3861 	struct nfsm_info info;
3862 
3863 	info.mrep = nfsd->nd_mrep;
3864 	info.mreq = NULL;
3865 	info.md = nfsd->nd_md;
3866 	info.dpos = nfsd->nd_dpos;
3867 
3868 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3869 	fhp = &nfh.fh_generic;
3870 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3871 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3872 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3873 	if (error) {
3874 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3875 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3876 		error = 0;
3877 		goto nfsmout;
3878 	}
3879 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3880 	if (!error)
3881 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3882 	if (!error)
3883 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3884 	if (!error)
3885 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3886 	getret = VOP_GETATTR(vp, &at);
3887 	vput(vp);
3888 	vp = NULL;
3889 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3890 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3891 			      &error));
3892 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3893 	if (error) {
3894 		error = 0;
3895 		goto nfsmout;
3896 	}
3897 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3898 
3899 	pc->pc_linkmax = txdr_unsigned(linkmax);
3900 	pc->pc_namemax = txdr_unsigned(namemax);
3901 	pc->pc_notrunc = txdr_unsigned(notrunc);
3902 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3903 
3904 	/*
3905 	 * These should probably be supported by VOP_PATHCONF(), but
3906 	 * until msdosfs is exportable (why would you want to?), the
3907 	 * Unix defaults should be ok.
3908 	 */
3909 	pc->pc_caseinsensitive = nfs_false;
3910 	pc->pc_casepreserving = nfs_true;
3911 nfsmout:
3912 	*mrq = info.mreq;
3913 	if (vp)
3914 		vput(vp);
3915 	return(error);
3916 }
3917 
3918 /*
3919  * Null operation, used by clients to ping server
3920  */
3921 /* ARGSUSED */
3922 int
3923 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3924 	   struct thread *td, struct mbuf **mrq)
3925 {
3926 	struct nfsm_info info;
3927 	int error = NFSERR_RETVOID;
3928 
3929 	info.mrep = nfsd->nd_mrep;
3930 	info.mreq = NULL;
3931 
3932 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3933 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3934 nfsmout:
3935 	*mrq = info.mreq;
3936 	return (error);
3937 }
3938 
3939 /*
3940  * No operation, used for obsolete procedures
3941  */
3942 /* ARGSUSED */
3943 int
3944 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3945 	   struct thread *td, struct mbuf **mrq)
3946 {
3947 	struct nfsm_info info;
3948 	int error;
3949 
3950 	info.mrep = nfsd->nd_mrep;
3951 	info.mreq = NULL;
3952 
3953 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3954 	if (nfsd->nd_repstat)
3955 		error = nfsd->nd_repstat;
3956 	else
3957 		error = EPROCUNAVAIL;
3958 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3959 	error = 0;
3960 nfsmout:
3961 	*mrq = info.mreq;
3962 	return (error);
3963 }
3964 
3965 /*
3966  * Perform access checking for vnodes obtained from file handles that would
3967  * refer to files already opened by a Unix client. You cannot just use
3968  * vn_writechk() and VOP_ACCESS() for two reasons.
3969  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3970  * 2 - The owner is to be given access irrespective of mode bits for some
3971  *     operations, so that processes that chmod after opening a file don't
3972  *     break. I don't like this because it opens a security hole, but since
3973  *     the nfs server opens a security hole the size of a barn door anyhow,
3974  *     what the heck.
3975  *
3976  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3977  * will return EPERM instead of EACCESS. EPERM is always an error.
3978  */
3979 static int
3980 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3981 	     int rdonly, struct thread *td, int override)
3982 {
3983 	struct vattr vattr;
3984 	int error;
3985 
3986 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3987 	if (flags & VWRITE) {
3988 		/* Just vn_writechk() changed to check rdonly */
3989 		/*
3990 		 * Disallow write attempts on read-only file systems;
3991 		 * unless the file is a socket or a block or character
3992 		 * device resident on the file system.
3993 		 */
3994 		if (rdonly ||
3995 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3996 			switch (vp->v_type) {
3997 			case VREG:
3998 			case VDIR:
3999 			case VLNK:
4000 				return (EROFS);
4001 			default:
4002 				break;
4003 			}
4004 		}
4005 		/*
4006 		 * If there's shared text associated with
4007 		 * the inode, we can't allow writing.
4008 		 */
4009 		if (vp->v_flag & VTEXT)
4010 			return (ETXTBSY);
4011 	}
4012 	error = VOP_GETATTR(vp, &vattr);
4013 	if (error)
4014 		return (error);
4015 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
4016 	/*
4017 	 * Allow certain operations for the owner (reads and writes
4018 	 * on files that are already open).
4019 	 */
4020 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4021 		error = 0;
4022 	return error;
4023 }
4024 #endif /* NFS_NOSERVER */
4025 
4026