xref: /freebsd/sys/fs/nfsclient/nfs_clrpcops.c (revision 19261079)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46 
47 #include "opt_inet6.h"
48 
49 #include <fs/nfs/nfsport.h>
50 #include <fs/nfsclient/nfs.h>
51 #include <sys/extattr.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54 
55 SYSCTL_DECL(_vfs_nfs);
56 
57 static int	nfsignore_eexist = 0;
58 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
59     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
60 
61 static int	nfscl_dssameconn = 0;
62 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
63     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
64 
65 /*
66  * Global variables
67  */
68 extern struct nfsstatsv1 nfsstatsv1;
69 extern int nfs_numnfscbd;
70 extern struct timeval nfsboottime;
71 extern u_int32_t newnfs_false, newnfs_true;
72 extern nfstype nfsv34_type[9];
73 extern int nfsrv_useacl;
74 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
75 extern int nfscl_debuglevel;
76 extern int nfs_pnfsiothreads;
77 extern u_long sb_max_adj;
78 extern int nfs_maxcopyrange;
79 NFSCLSTATEMUTEX;
80 int nfstest_outofseq = 0;
81 int nfscl_assumeposixlocks = 1;
82 int nfscl_enablecallb = 0;
83 short nfsv4_cbport = NFSV4_CBPORT;
84 int nfstest_openallsetattr = 0;
85 
86 #define	DIRHDSIZ	offsetof(struct dirent, d_name)
87 
88 /*
89  * nfscl_getsameserver() can return one of three values:
90  * NFSDSP_USETHISSESSION - Use this session for the DS.
91  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
92  *     session.
93  * NFSDSP_NOTFOUND - No matching server was found.
94  */
95 enum nfsclds_state {
96 	NFSDSP_USETHISSESSION = 0,
97 	NFSDSP_SEQTHISSESSION = 1,
98 	NFSDSP_NOTFOUND = 2,
99 };
100 
101 /*
102  * Do a write RPC on a DS data file, using this structure for the arguments,
103  * so that this function can be executed by a separate kernel process.
104  */
105 struct nfsclwritedsdorpc {
106 	int			done;
107 	int			inprog;
108 	struct task		tsk;
109 	struct vnode		*vp;
110 	int			iomode;
111 	int			must_commit;
112 	nfsv4stateid_t		*stateidp;
113 	struct nfsclds		*dsp;
114 	uint64_t		off;
115 	int			len;
116 #ifdef notyet
117 	int			advise;
118 #endif
119 	struct nfsfh		*fhp;
120 	struct mbuf		*m;
121 	int			vers;
122 	int			minorvers;
123 	struct ucred		*cred;
124 	NFSPROC_T		*p;
125 	int			err;
126 };
127 
128 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
129     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
130 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
131     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
132 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
133     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
134     void *);
135 static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
136     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
137 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
138     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
139     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
140 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
141     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
142     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
143     int *, void *, int *);
144 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
145     struct nfscllockowner *, u_int64_t, u_int64_t,
146     u_int32_t, struct ucred *, NFSPROC_T *, int);
147 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
148     struct acl *, nfsv4stateid_t *, void *);
149 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
150     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
151     struct ucred *, NFSPROC_T *);
152 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
153     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
154     NFSPROC_T *);
155 static void nfscl_initsessionslots(struct nfsclsession *);
156 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
157     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
158     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
159     NFSPROC_T *);
160 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
161     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
162     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
163     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
164 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
165     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
166     struct ucred *, NFSPROC_T *);
167 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
168     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
169     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
170 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
171     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
172     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
173 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
174     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
175     struct ucred *, NFSPROC_T *);
176 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
177     struct nfsclds *, struct nfsclds **, uint32_t *);
178 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
179     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
180     NFSPROC_T *);
181 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
182     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
183 #ifdef notyet
184 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
185     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
186     NFSPROC_T *);
187 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
188     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
189 #endif
190 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
191     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
192 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
193     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
194 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
195     NFSPROC_T *);
196 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
197     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
198 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
199     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
200     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
201 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
202     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
203     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
204     struct nfsfh **, int *, int *, void *, int *);
205 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
206     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
207     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
208     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
209 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
210     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
211     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
212     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
213     int, int, int, int *, struct nfsclflayouthead *, int *);
214 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
215     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
216     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
217 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
218     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
219     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
220 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
221     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
222     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
223 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
224     int, struct nfsvattr *, int *, struct ucred *);
225 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
226 
227 int nfs_pnfsio(task_fn_t *, void *);
228 
229 /*
230  * nfs null call from vfs.
231  */
232 int
233 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
234 {
235 	int error;
236 	struct nfsrv_descript nfsd, *nd = &nfsd;
237 
238 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
239 	error = nfscl_request(nd, vp, p, cred, NULL);
240 	if (nd->nd_repstat && !error)
241 		error = nd->nd_repstat;
242 	m_freem(nd->nd_mrep);
243 	return (error);
244 }
245 
246 /*
247  * nfs access rpc op.
248  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
249  * modes are changed on the server, accesses might still fail later.
250  */
251 int
252 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
253     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
254 {
255 	int error;
256 	u_int32_t mode, rmode;
257 
258 	if (acmode & VREAD)
259 		mode = NFSACCESS_READ;
260 	else
261 		mode = 0;
262 	if (vnode_vtype(vp) == VDIR) {
263 		if (acmode & VWRITE)
264 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
265 				 NFSACCESS_DELETE);
266 		if (acmode & VEXEC)
267 			mode |= NFSACCESS_LOOKUP;
268 	} else {
269 		if (acmode & VWRITE)
270 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
271 		if (acmode & VEXEC)
272 			mode |= NFSACCESS_EXECUTE;
273 	}
274 
275 	/*
276 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
277 	 */
278 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
279 	    NULL);
280 
281 	/*
282 	 * The NFS V3 spec does not clarify whether or not
283 	 * the returned access bits can be a superset of
284 	 * the ones requested, so...
285 	 */
286 	if (!error && (rmode & mode) != mode)
287 		error = EACCES;
288 	return (error);
289 }
290 
291 /*
292  * The actual rpc, separated out for Darwin.
293  */
294 int
295 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
296     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
297     void *stuff)
298 {
299 	u_int32_t *tl;
300 	u_int32_t supported, rmode;
301 	int error;
302 	struct nfsrv_descript nfsd, *nd = &nfsd;
303 	nfsattrbit_t attrbits;
304 
305 	*attrflagp = 0;
306 	supported = mode;
307 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
308 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
309 	*tl = txdr_unsigned(mode);
310 	if (nd->nd_flag & ND_NFSV4) {
311 		/*
312 		 * And do a Getattr op.
313 		 */
314 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
315 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
316 		NFSGETATTR_ATTRBIT(&attrbits);
317 		(void) nfsrv_putattrbit(nd, &attrbits);
318 	}
319 	error = nfscl_request(nd, vp, p, cred, stuff);
320 	if (error)
321 		return (error);
322 	if (nd->nd_flag & ND_NFSV3) {
323 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
324 		if (error)
325 			goto nfsmout;
326 	}
327 	if (!nd->nd_repstat) {
328 		if (nd->nd_flag & ND_NFSV4) {
329 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
330 			supported = fxdr_unsigned(u_int32_t, *tl++);
331 		} else {
332 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
333 		}
334 		rmode = fxdr_unsigned(u_int32_t, *tl);
335 		if (nd->nd_flag & ND_NFSV4)
336 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
337 
338 		/*
339 		 * It's not obvious what should be done about
340 		 * unsupported access modes. For now, be paranoid
341 		 * and clear the unsupported ones.
342 		 */
343 		rmode &= supported;
344 		*rmodep = rmode;
345 	} else
346 		error = nd->nd_repstat;
347 nfsmout:
348 	m_freem(nd->nd_mrep);
349 	return (error);
350 }
351 
352 /*
353  * nfs open rpc
354  */
355 int
356 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
357 {
358 	struct nfsclopen *op;
359 	struct nfscldeleg *dp;
360 	struct nfsfh *nfhp;
361 	struct nfsnode *np = VTONFS(vp);
362 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
363 	u_int32_t mode, clidrev;
364 	int ret, newone, error, expireret = 0, retrycnt;
365 
366 	/*
367 	 * For NFSv4, Open Ops are only done on Regular Files.
368 	 */
369 	if (vnode_vtype(vp) != VREG)
370 		return (0);
371 	mode = 0;
372 	if (amode & FREAD)
373 		mode |= NFSV4OPEN_ACCESSREAD;
374 	if (amode & FWRITE)
375 		mode |= NFSV4OPEN_ACCESSWRITE;
376 	nfhp = np->n_fhp;
377 
378 	retrycnt = 0;
379 #ifdef notdef
380 { char name[100]; int namel;
381 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
382 bcopy(NFS4NODENAME(np->n_v4), name, namel);
383 name[namel] = '\0';
384 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
385 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
386 else printf(" fhl=0\n");
387 }
388 #endif
389 	do {
390 	    dp = NULL;
391 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
392 		cred, p, NULL, &op, &newone, &ret, 1);
393 	    if (error) {
394 		return (error);
395 	    }
396 	    if (nmp->nm_clp != NULL)
397 		clidrev = nmp->nm_clp->nfsc_clientidrev;
398 	    else
399 		clidrev = 0;
400 	    if (ret == NFSCLOPEN_DOOPEN) {
401 		if (np->n_v4 != NULL) {
402 			/*
403 			 * For the first attempt, try and get a layout, if
404 			 * pNFS is enabled for the mount.
405 			 */
406 			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
407 			    nfs_numnfscbd == 0 ||
408 			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
409 				error = nfsrpc_openrpc(nmp, vp,
410 				    np->n_v4->n4_data,
411 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
412 				    np->n_fhp->nfh_len, mode, op,
413 				    NFS4NODENAME(np->n_v4),
414 				    np->n_v4->n4_namelen,
415 				    &dp, 0, 0x0, cred, p, 0, 0);
416 			else
417 				error = nfsrpc_getopenlayout(nmp, vp,
418 				    np->n_v4->n4_data,
419 				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
420 				    np->n_fhp->nfh_len, mode, op,
421 				    NFS4NODENAME(np->n_v4),
422 				    np->n_v4->n4_namelen, &dp, cred, p);
423 			if (dp != NULL) {
424 #ifdef APPLE
425 				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
426 #else
427 				NFSLOCKNODE(np);
428 				np->n_flag &= ~NDELEGMOD;
429 				/*
430 				 * Invalidate the attribute cache, so that
431 				 * attributes that pre-date the issue of a
432 				 * delegation are not cached, since the
433 				 * cached attributes will remain valid while
434 				 * the delegation is held.
435 				 */
436 				NFSINVALATTRCACHE(np);
437 				NFSUNLOCKNODE(np);
438 #endif
439 				(void) nfscl_deleg(nmp->nm_mountp,
440 				    op->nfso_own->nfsow_clp,
441 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
442 			}
443 		} else {
444 			error = EIO;
445 		}
446 		newnfs_copyincred(cred, &op->nfso_cred);
447 	    } else if (ret == NFSCLOPEN_SETCRED)
448 		/*
449 		 * This is a new local open on a delegation. It needs
450 		 * to have credentials so that an open can be done
451 		 * against the server during recovery.
452 		 */
453 		newnfs_copyincred(cred, &op->nfso_cred);
454 
455 	    /*
456 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
457 	     * been done on this Open successfully and a VOP_CLOSE()
458 	     * is expected for each of these.
459 	     * If error is non-zero, don't increment it, since the Open
460 	     * hasn't succeeded yet.
461 	     */
462 	    if (!error) {
463 		op->nfso_opencnt++;
464 		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
465 		    NFSLOCKNODE(np);
466 		    np->n_openstateid = op;
467 		    NFSUNLOCKNODE(np);
468 		}
469 	    }
470 	    nfscl_openrelease(nmp, op, error, newone);
471 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
472 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
473 		error == NFSERR_BADSESSION) {
474 		(void) nfs_catnap(PZERO, error, "nfs_open");
475 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
476 		&& clidrev != 0) {
477 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
478 		retrycnt++;
479 	    }
480 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
481 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
482 	    error == NFSERR_BADSESSION ||
483 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
484 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
485 	if (error && retrycnt >= 4)
486 		error = EIO;
487 	return (error);
488 }
489 
490 /*
491  * the actual open rpc
492  */
493 int
494 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
495     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
496     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
497     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
498     int syscred, int recursed)
499 {
500 	u_int32_t *tl;
501 	struct nfsrv_descript nfsd, *nd = &nfsd;
502 	struct nfscldeleg *dp, *ndp = NULL;
503 	struct nfsvattr nfsva;
504 	u_int32_t rflags, deleg;
505 	nfsattrbit_t attrbits;
506 	int error, ret, acesize, limitby;
507 	struct nfsclsession *tsep;
508 
509 	dp = *dpp;
510 	*dpp = NULL;
511 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
512 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
513 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
514 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
515 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
516 	tsep = nfsmnt_mdssession(nmp);
517 	*tl++ = tsep->nfsess_clientid.lval[0];
518 	*tl = tsep->nfsess_clientid.lval[1];
519 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
520 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
521 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
522 	if (reclaim) {
523 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
524 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
525 		*tl = txdr_unsigned(delegtype);
526 	} else {
527 		if (dp != NULL) {
528 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
529 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
530 			if (NFSHASNFSV4N(nmp))
531 				*tl++ = 0;
532 			else
533 				*tl++ = dp->nfsdl_stateid.seqid;
534 			*tl++ = dp->nfsdl_stateid.other[0];
535 			*tl++ = dp->nfsdl_stateid.other[1];
536 			*tl = dp->nfsdl_stateid.other[2];
537 		} else {
538 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
539 		}
540 		(void) nfsm_strtom(nd, name, namelen);
541 	}
542 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
543 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
544 	NFSZERO_ATTRBIT(&attrbits);
545 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
546 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
547 	(void) nfsrv_putattrbit(nd, &attrbits);
548 	if (syscred)
549 		nd->nd_flag |= ND_USEGSSNAME;
550 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
551 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
552 	if (error)
553 		return (error);
554 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
555 	if (!nd->nd_repstat) {
556 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
557 		    6 * NFSX_UNSIGNED);
558 		op->nfso_stateid.seqid = *tl++;
559 		op->nfso_stateid.other[0] = *tl++;
560 		op->nfso_stateid.other[1] = *tl++;
561 		op->nfso_stateid.other[2] = *tl;
562 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
563 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
564 		if (error)
565 			goto nfsmout;
566 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
567 		deleg = fxdr_unsigned(u_int32_t, *tl);
568 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
569 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
570 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
571 			      NFSCLFLAGS_FIRSTDELEG))
572 				op->nfso_own->nfsow_clp->nfsc_flags |=
573 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
574 			ndp = malloc(
575 			    sizeof (struct nfscldeleg) + newfhlen,
576 			    M_NFSCLDELEG, M_WAITOK);
577 			LIST_INIT(&ndp->nfsdl_owner);
578 			LIST_INIT(&ndp->nfsdl_lock);
579 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
580 			ndp->nfsdl_fhlen = newfhlen;
581 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
582 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
583 			nfscl_lockinit(&ndp->nfsdl_rwlock);
584 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
585 			    NFSX_UNSIGNED);
586 			ndp->nfsdl_stateid.seqid = *tl++;
587 			ndp->nfsdl_stateid.other[0] = *tl++;
588 			ndp->nfsdl_stateid.other[1] = *tl++;
589 			ndp->nfsdl_stateid.other[2] = *tl++;
590 			ret = fxdr_unsigned(int, *tl);
591 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
592 				ndp->nfsdl_flags = NFSCLDL_WRITE;
593 				/*
594 				 * Indicates how much the file can grow.
595 				 */
596 				NFSM_DISSECT(tl, u_int32_t *,
597 				    3 * NFSX_UNSIGNED);
598 				limitby = fxdr_unsigned(int, *tl++);
599 				switch (limitby) {
600 				case NFSV4OPEN_LIMITSIZE:
601 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
602 					break;
603 				case NFSV4OPEN_LIMITBLOCKS:
604 					ndp->nfsdl_sizelimit =
605 					    fxdr_unsigned(u_int64_t, *tl++);
606 					ndp->nfsdl_sizelimit *=
607 					    fxdr_unsigned(u_int64_t, *tl);
608 					break;
609 				default:
610 					error = NFSERR_BADXDR;
611 					goto nfsmout;
612 				}
613 			} else {
614 				ndp->nfsdl_flags = NFSCLDL_READ;
615 			}
616 			if (ret)
617 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
618 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
619 			    &acesize, p);
620 			if (error)
621 				goto nfsmout;
622 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
623 			error = NFSERR_BADXDR;
624 			goto nfsmout;
625 		}
626 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
627 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
628 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
629 		    NULL, NULL, NULL, p, cred);
630 		if (error)
631 			goto nfsmout;
632 		if (ndp != NULL) {
633 			ndp->nfsdl_change = nfsva.na_filerev;
634 			ndp->nfsdl_modtime = nfsva.na_mtime;
635 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
636 		}
637 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
638 		    do {
639 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
640 			    cred, p);
641 			if (ret == NFSERR_DELAY)
642 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
643 		    } while (ret == NFSERR_DELAY);
644 		    error = ret;
645 		}
646 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
647 		    nfscl_assumeposixlocks)
648 		    op->nfso_posixlock = 1;
649 		else
650 		    op->nfso_posixlock = 0;
651 
652 		/*
653 		 * If the server is handing out delegations, but we didn't
654 		 * get one because an OpenConfirm was required, try the
655 		 * Open again, to get a delegation. This is a harmless no-op,
656 		 * from a server's point of view.
657 		 */
658 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
659 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
660 		    && !error && dp == NULL && ndp == NULL && !recursed) {
661 		    do {
662 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
663 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
664 			    cred, p, syscred, 1);
665 			if (ret == NFSERR_DELAY)
666 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
667 		    } while (ret == NFSERR_DELAY);
668 		    if (ret) {
669 			if (ndp != NULL) {
670 				free(ndp, M_NFSCLDELEG);
671 				ndp = NULL;
672 			}
673 			if (ret == NFSERR_STALECLIENTID ||
674 			    ret == NFSERR_STALEDONTRECOVER ||
675 			    ret == NFSERR_BADSESSION)
676 				error = ret;
677 		    }
678 		}
679 	}
680 	if (nd->nd_repstat != 0 && error == 0)
681 		error = nd->nd_repstat;
682 	if (error == NFSERR_STALECLIENTID)
683 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
684 nfsmout:
685 	if (!error)
686 		*dpp = ndp;
687 	else if (ndp != NULL)
688 		free(ndp, M_NFSCLDELEG);
689 	m_freem(nd->nd_mrep);
690 	return (error);
691 }
692 
693 /*
694  * open downgrade rpc
695  */
696 int
697 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
698     struct ucred *cred, NFSPROC_T *p)
699 {
700 	u_int32_t *tl;
701 	struct nfsrv_descript nfsd, *nd = &nfsd;
702 	int error;
703 
704 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
705 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
706 	if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
707 		*tl++ = 0;
708 	else
709 		*tl++ = op->nfso_stateid.seqid;
710 	*tl++ = op->nfso_stateid.other[0];
711 	*tl++ = op->nfso_stateid.other[1];
712 	*tl++ = op->nfso_stateid.other[2];
713 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
714 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
715 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
716 	error = nfscl_request(nd, vp, p, cred, NULL);
717 	if (error)
718 		return (error);
719 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
720 	if (!nd->nd_repstat) {
721 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
722 		op->nfso_stateid.seqid = *tl++;
723 		op->nfso_stateid.other[0] = *tl++;
724 		op->nfso_stateid.other[1] = *tl++;
725 		op->nfso_stateid.other[2] = *tl;
726 	}
727 	if (nd->nd_repstat && error == 0)
728 		error = nd->nd_repstat;
729 	if (error == NFSERR_STALESTATEID)
730 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
731 nfsmout:
732 	m_freem(nd->nd_mrep);
733 	return (error);
734 }
735 
736 /*
737  * V4 Close operation.
738  */
739 int
740 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
741 {
742 	struct nfsclclient *clp;
743 	int error;
744 
745 	if (vnode_vtype(vp) != VREG)
746 		return (0);
747 	if (doclose)
748 		error = nfscl_doclose(vp, &clp, p);
749 	else
750 		error = nfscl_getclose(vp, &clp);
751 	if (error)
752 		return (error);
753 
754 	nfscl_clientrelease(clp);
755 	return (0);
756 }
757 
758 /*
759  * Close the open.
760  */
761 void
762 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
763 {
764 	struct nfsrv_descript nfsd, *nd = &nfsd;
765 	struct nfscllockowner *lp, *nlp;
766 	struct nfscllock *lop, *nlop;
767 	struct ucred *tcred;
768 	u_int64_t off = 0, len = 0;
769 	u_int32_t type = NFSV4LOCKT_READ;
770 	int error, do_unlock, trycnt;
771 
772 	tcred = newnfs_getcred();
773 	newnfs_copycred(&op->nfso_cred, tcred);
774 	/*
775 	 * (Theoretically this could be done in the same
776 	 *  compound as the close, but having multiple
777 	 *  sequenced Ops in the same compound might be
778 	 *  too scary for some servers.)
779 	 */
780 	if (op->nfso_posixlock) {
781 		off = 0;
782 		len = NFS64BITSSET;
783 		type = NFSV4LOCKT_READ;
784 	}
785 
786 	/*
787 	 * Since this function is only called from VOP_INACTIVE(), no
788 	 * other thread will be manipulating this Open. As such, the
789 	 * lock lists are not being changed by other threads, so it should
790 	 * be safe to do this without locking.
791 	 */
792 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
793 		do_unlock = 1;
794 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
795 			if (op->nfso_posixlock == 0) {
796 				off = lop->nfslo_first;
797 				len = lop->nfslo_end - lop->nfslo_first;
798 				if (lop->nfslo_type == F_WRLCK)
799 					type = NFSV4LOCKT_WRITE;
800 				else
801 					type = NFSV4LOCKT_READ;
802 			}
803 			if (do_unlock) {
804 				trycnt = 0;
805 				do {
806 					error = nfsrpc_locku(nd, nmp, lp, off,
807 					    len, type, tcred, p, 0);
808 					if ((nd->nd_repstat == NFSERR_GRACE ||
809 					    nd->nd_repstat == NFSERR_DELAY) &&
810 					    error == 0)
811 						(void) nfs_catnap(PZERO,
812 						    (int)nd->nd_repstat,
813 						    "nfs_close");
814 				} while ((nd->nd_repstat == NFSERR_GRACE ||
815 				    nd->nd_repstat == NFSERR_DELAY) &&
816 				    error == 0 && trycnt++ < 5);
817 				if (op->nfso_posixlock)
818 					do_unlock = 0;
819 			}
820 			nfscl_freelock(lop, 0);
821 		}
822 		/*
823 		 * Do a ReleaseLockOwner.
824 		 * The lock owner name nfsl_owner may be used by other opens for
825 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
826 		 * puts on the wire has the file handle for this file appended
827 		 * to it, so it can be done now.
828 		 */
829 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
830 		    lp->nfsl_open->nfso_fhlen, tcred, p);
831 	}
832 
833 	/*
834 	 * There could be other Opens for different files on the same
835 	 * OpenOwner, so locking is required.
836 	 */
837 	NFSLOCKCLSTATE();
838 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
839 	NFSUNLOCKCLSTATE();
840 	do {
841 		error = nfscl_tryclose(op, tcred, nmp, p);
842 		if (error == NFSERR_GRACE)
843 			(void) nfs_catnap(PZERO, error, "nfs_close");
844 	} while (error == NFSERR_GRACE);
845 	NFSLOCKCLSTATE();
846 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
847 
848 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
849 		nfscl_freelockowner(lp, 0);
850 	nfscl_freeopen(op, 0);
851 	NFSUNLOCKCLSTATE();
852 	NFSFREECRED(tcred);
853 }
854 
855 /*
856  * The actual Close RPC.
857  */
858 int
859 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
860     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
861     int syscred)
862 {
863 	u_int32_t *tl;
864 	int error;
865 
866 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
867 	    op->nfso_fhlen, NULL, NULL, 0, 0);
868 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
869 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
870 	if (NFSHASNFSV4N(nmp))
871 		*tl++ = 0;
872 	else
873 		*tl++ = op->nfso_stateid.seqid;
874 	*tl++ = op->nfso_stateid.other[0];
875 	*tl++ = op->nfso_stateid.other[1];
876 	*tl = op->nfso_stateid.other[2];
877 	if (syscred)
878 		nd->nd_flag |= ND_USEGSSNAME;
879 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
880 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
881 	if (error)
882 		return (error);
883 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
884 	if (nd->nd_repstat == 0)
885 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
886 	error = nd->nd_repstat;
887 	if (error == NFSERR_STALESTATEID)
888 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
889 nfsmout:
890 	m_freem(nd->nd_mrep);
891 	return (error);
892 }
893 
894 /*
895  * V4 Open Confirm RPC.
896  */
897 int
898 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
899     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
900 {
901 	u_int32_t *tl;
902 	struct nfsrv_descript nfsd, *nd = &nfsd;
903 	struct nfsmount *nmp;
904 	int error;
905 
906 	nmp = VFSTONFS(vp->v_mount);
907 	if (NFSHASNFSV4N(nmp))
908 		return (0);		/* No confirmation for NFSv4.1. */
909 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
910 	    0, 0);
911 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
912 	*tl++ = op->nfso_stateid.seqid;
913 	*tl++ = op->nfso_stateid.other[0];
914 	*tl++ = op->nfso_stateid.other[1];
915 	*tl++ = op->nfso_stateid.other[2];
916 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
917 	error = nfscl_request(nd, vp, p, cred, NULL);
918 	if (error)
919 		return (error);
920 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
921 	if (!nd->nd_repstat) {
922 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
923 		op->nfso_stateid.seqid = *tl++;
924 		op->nfso_stateid.other[0] = *tl++;
925 		op->nfso_stateid.other[1] = *tl++;
926 		op->nfso_stateid.other[2] = *tl;
927 	}
928 	error = nd->nd_repstat;
929 	if (error == NFSERR_STALESTATEID)
930 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
931 nfsmout:
932 	m_freem(nd->nd_mrep);
933 	return (error);
934 }
935 
936 /*
937  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
938  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
939  */
940 int
941 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
942     bool *retokp, struct ucred *cred, NFSPROC_T *p)
943 {
944 	u_int32_t *tl;
945 	struct nfsrv_descript nfsd;
946 	struct nfsrv_descript *nd = &nfsd;
947 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
948 	u_short port;
949 	int error, isinet6 = 0, callblen;
950 	nfsquad_t confirm;
951 	static u_int32_t rev = 0;
952 	struct nfsclds *dsp, *odsp;
953 	struct in6_addr a6;
954 	struct nfsclsession *tsep;
955 	struct rpc_reconupcall recon;
956 	struct nfscl_reconarg *rcp;
957 
958 	if (nfsboottime.tv_sec == 0)
959 		NFSSETBOOTTIME(nfsboottime);
960 	if (NFSHASNFSV4N(nmp)) {
961 		error = NFSERR_BADSESSION;
962 		odsp = dsp = NULL;
963 		if (retokp != NULL) {
964 			NFSLOCKMNT(nmp);
965 			odsp = TAILQ_FIRST(&nmp->nm_sess);
966 			NFSUNLOCKMNT(nmp);
967 		}
968 		if (odsp != NULL) {
969 			/*
970 			 * When a session already exists, first try a
971 			 * CreateSession with the extant ClientID.
972 			 */
973 			dsp = malloc(sizeof(struct nfsclds) +
974 			    odsp->nfsclds_servownlen + 1, M_NFSCLDS,
975 			    M_WAITOK | M_ZERO);
976 			dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
977 			dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
978 			dsp->nfsclds_sess.nfsess_clientid =
979 			    odsp->nfsclds_sess.nfsess_clientid;
980 			dsp->nfsclds_sess.nfsess_sequenceid =
981 			    odsp->nfsclds_sess.nfsess_sequenceid;
982 			dsp->nfsclds_flags = odsp->nfsclds_flags;
983 			if (dsp->nfsclds_servownlen > 0)
984 				memcpy(dsp->nfsclds_serverown,
985 				    odsp->nfsclds_serverown,
986 				    dsp->nfsclds_servownlen + 1);
987 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
988 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
989 			    NULL, MTX_DEF);
990 			nfscl_initsessionslots(&dsp->nfsclds_sess);
991 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
992 			    &nmp->nm_sockreq, NULL,
993 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
994 			NFSCL_DEBUG(1, "create session for extant "
995 			    "ClientID=%d\n", error);
996 			if (error != 0) {
997 				nfscl_freenfsclds(dsp);
998 				dsp = NULL;
999 				/*
1000 				 * If *retokp is true, return any error other
1001 				 * than NFSERR_STALECLIENTID,
1002 				 * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1003 				 * so that nfscl_recover() will not loop.
1004 				 */
1005 				if (*retokp)
1006 					return (NFSERR_IO);
1007 			} else
1008 				*retokp = true;
1009 		} else if (retokp != NULL && *retokp)
1010 			return (NFSERR_IO);
1011 		if (error != 0) {
1012 			/*
1013 			 * Either there was no previous session or the
1014 			 * CreateSession attempt failed, so...
1015 			 * do an ExchangeID followed by the CreateSession.
1016 			 */
1017 			clp->nfsc_rev = rev++;
1018 			error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1019 			    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1020 			    cred, p);
1021 			NFSCL_DEBUG(1, "aft exch=%d\n", error);
1022 			if (error == 0)
1023 				error = nfsrpc_createsession(nmp,
1024 				    &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1025 				    dsp->nfsclds_sess.nfsess_sequenceid, 1,
1026 				    cred, p);
1027 			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1028 		}
1029 		if (error == 0) {
1030 			/*
1031 			 * If the session supports a backchannel, set up
1032 			 * the BindConnectionToSession call in the krpc
1033 			 * so that it is done on a reconnection.
1034 			 */
1035 			if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1036 				rcp = mem_alloc(sizeof(*rcp));
1037 				rcp->minorvers = nmp->nm_minorvers;
1038 				memcpy(rcp->sessionid,
1039 				    dsp->nfsclds_sess.nfsess_sessionid,
1040 				    NFSX_V4SESSIONID);
1041 				recon.call = nfsrpc_bindconnsess;
1042 				recon.arg = rcp;
1043 				CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1044 				    &recon);
1045 			}
1046 
1047 			NFSLOCKMNT(nmp);
1048 			/*
1049 			 * The old sessions cannot be safely free'd
1050 			 * here, since they may still be used by
1051 			 * in-progress RPCs.
1052 			 */
1053 			tsep = NULL;
1054 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
1055 				tsep = NFSMNT_MDSSESSION(nmp);
1056 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1057 			    nfsclds_list);
1058 			/*
1059 			 * Wake up RPCs waiting for a slot on the
1060 			 * old session. These will then fail with
1061 			 * NFSERR_BADSESSION and be retried with the
1062 			 * new session by nfsv4_setsequence().
1063 			 * Also wakeup() processes waiting for the
1064 			 * new session.
1065 			 */
1066 			if (tsep != NULL)
1067 				wakeup(&tsep->nfsess_slots);
1068 			wakeup(&nmp->nm_sess);
1069 			NFSUNLOCKMNT(nmp);
1070 		} else if (dsp != NULL)
1071 			nfscl_freenfsclds(dsp);
1072 		if (error == 0 && reclaim == 0) {
1073 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
1074 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1075 			if (error == NFSERR_COMPLETEALREADY ||
1076 			    error == NFSERR_NOTSUPP)
1077 				/* Ignore this error. */
1078 				error = 0;
1079 		}
1080 		return (error);
1081 	} else if (retokp != NULL && *retokp)
1082 		return (NFSERR_IO);
1083 	clp->nfsc_rev = rev++;
1084 
1085 	/*
1086 	 * Allocate a single session structure for NFSv4.0, because some of
1087 	 * the fields are used by NFSv4.0 although it doesn't do a session.
1088 	 */
1089 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1090 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1091 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1092 	NFSLOCKMNT(nmp);
1093 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1094 	tsep = NFSMNT_MDSSESSION(nmp);
1095 	NFSUNLOCKMNT(nmp);
1096 
1097 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
1098 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1099 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
1100 	*tl = txdr_unsigned(clp->nfsc_rev);
1101 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1102 
1103 	/*
1104 	 * set up the callback address
1105 	 */
1106 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1107 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
1108 	callblen = strlen(nfsv4_callbackaddr);
1109 	if (callblen == 0)
1110 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
1111 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1112 	    (callblen > 0 || cp != NULL)) {
1113 		port = htons(nfsv4_cbport);
1114 		cp2 = (u_int8_t *)&port;
1115 #ifdef INET6
1116 		if ((callblen > 0 &&
1117 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1118 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1119 
1120 			(void) nfsm_strtom(nd, "tcp6", 4);
1121 			if (callblen == 0) {
1122 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1123 				ip6add = ip6buf;
1124 			} else {
1125 				ip6add = nfsv4_callbackaddr;
1126 			}
1127 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1128 			    ip6add, cp2[0], cp2[1]);
1129 		} else
1130 #endif
1131 		{
1132 			(void) nfsm_strtom(nd, "tcp", 3);
1133 			if (callblen == 0)
1134 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1135 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1136 				    cp[2], cp[3], cp2[0], cp2[1]);
1137 			else
1138 				snprintf(addr, INET6_ADDRSTRLEN + 9,
1139 				    "%s.%d.%d", nfsv4_callbackaddr,
1140 				    cp2[0], cp2[1]);
1141 		}
1142 		(void) nfsm_strtom(nd, addr, strlen(addr));
1143 	} else {
1144 		(void) nfsm_strtom(nd, "tcp", 3);
1145 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1146 	}
1147 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1148 	*tl = txdr_unsigned(clp->nfsc_cbident);
1149 	nd->nd_flag |= ND_USEGSSNAME;
1150 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1151 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1152 	if (error)
1153 		return (error);
1154 	if (nd->nd_repstat == 0) {
1155 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1156 	    tsep->nfsess_clientid.lval[0] = *tl++;
1157 	    tsep->nfsess_clientid.lval[1] = *tl++;
1158 	    confirm.lval[0] = *tl++;
1159 	    confirm.lval[1] = *tl;
1160 	    m_freem(nd->nd_mrep);
1161 	    nd->nd_mrep = NULL;
1162 
1163 	    /*
1164 	     * and confirm it.
1165 	     */
1166 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1167 		NULL, 0, 0);
1168 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1169 	    *tl++ = tsep->nfsess_clientid.lval[0];
1170 	    *tl++ = tsep->nfsess_clientid.lval[1];
1171 	    *tl++ = confirm.lval[0];
1172 	    *tl = confirm.lval[1];
1173 	    nd->nd_flag |= ND_USEGSSNAME;
1174 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1175 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1176 	    if (error)
1177 		return (error);
1178 	    m_freem(nd->nd_mrep);
1179 	    nd->nd_mrep = NULL;
1180 	}
1181 	error = nd->nd_repstat;
1182 nfsmout:
1183 	m_freem(nd->nd_mrep);
1184 	return (error);
1185 }
1186 
1187 /*
1188  * nfs getattr call.
1189  */
1190 int
1191 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1192     struct nfsvattr *nap, void *stuff)
1193 {
1194 	struct nfsrv_descript nfsd, *nd = &nfsd;
1195 	int error;
1196 	nfsattrbit_t attrbits;
1197 
1198 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1199 	if (nd->nd_flag & ND_NFSV4) {
1200 		NFSGETATTR_ATTRBIT(&attrbits);
1201 		(void) nfsrv_putattrbit(nd, &attrbits);
1202 	}
1203 	error = nfscl_request(nd, vp, p, cred, stuff);
1204 	if (error)
1205 		return (error);
1206 	if (!nd->nd_repstat)
1207 		error = nfsm_loadattr(nd, nap);
1208 	else
1209 		error = nd->nd_repstat;
1210 	m_freem(nd->nd_mrep);
1211 	return (error);
1212 }
1213 
1214 /*
1215  * nfs getattr call with non-vnode arguments.
1216  */
1217 int
1218 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1219     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1220     uint32_t *leasep)
1221 {
1222 	struct nfsrv_descript nfsd, *nd = &nfsd;
1223 	int error, vers = NFS_VER2;
1224 	nfsattrbit_t attrbits;
1225 
1226 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1227 	if (nd->nd_flag & ND_NFSV4) {
1228 		vers = NFS_VER4;
1229 		NFSGETATTR_ATTRBIT(&attrbits);
1230 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1231 		(void) nfsrv_putattrbit(nd, &attrbits);
1232 	} else if (nd->nd_flag & ND_NFSV3) {
1233 		vers = NFS_VER3;
1234 	}
1235 	if (syscred)
1236 		nd->nd_flag |= ND_USEGSSNAME;
1237 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1238 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1239 	if (error)
1240 		return (error);
1241 	if (nd->nd_repstat == 0) {
1242 		if ((nd->nd_flag & ND_NFSV4) != 0)
1243 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1244 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1245 			    NULL, NULL);
1246 		else
1247 			error = nfsm_loadattr(nd, nap);
1248 	} else
1249 		error = nd->nd_repstat;
1250 	m_freem(nd->nd_mrep);
1251 	return (error);
1252 }
1253 
1254 /*
1255  * Do an nfs setattr operation.
1256  */
1257 int
1258 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1259     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1260     void *stuff)
1261 {
1262 	int error, expireret = 0, openerr, retrycnt;
1263 	u_int32_t clidrev = 0, mode;
1264 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1265 	struct nfsfh *nfhp;
1266 	nfsv4stateid_t stateid;
1267 	void *lckp;
1268 
1269 	if (nmp->nm_clp != NULL)
1270 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1271 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1272 		mode = NFSV4OPEN_ACCESSWRITE;
1273 	else
1274 		mode = NFSV4OPEN_ACCESSREAD;
1275 	retrycnt = 0;
1276 	do {
1277 		lckp = NULL;
1278 		openerr = 1;
1279 		if (NFSHASNFSV4(nmp)) {
1280 			nfhp = VTONFS(vp)->n_fhp;
1281 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1282 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1283 			if (error && vnode_vtype(vp) == VREG &&
1284 			    (mode == NFSV4OPEN_ACCESSWRITE ||
1285 			     nfstest_openallsetattr)) {
1286 				/*
1287 				 * No Open stateid, so try and open the file
1288 				 * now.
1289 				 */
1290 				if (mode == NFSV4OPEN_ACCESSWRITE)
1291 					openerr = nfsrpc_open(vp, FWRITE, cred,
1292 					    p);
1293 				else
1294 					openerr = nfsrpc_open(vp, FREAD, cred,
1295 					    p);
1296 				if (!openerr)
1297 					(void) nfscl_getstateid(vp,
1298 					    nfhp->nfh_fh, nfhp->nfh_len,
1299 					    mode, 0, cred, p, &stateid, &lckp);
1300 			}
1301 		}
1302 		if (vap != NULL)
1303 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1304 			    rnap, attrflagp, stuff);
1305 		else
1306 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1307 			    stuff);
1308 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1309 			NFSLOCKMNT(nmp);
1310 			nmp->nm_state |= NFSSTA_OPENMODE;
1311 			NFSUNLOCKMNT(nmp);
1312 		}
1313 		if (error == NFSERR_STALESTATEID)
1314 			nfscl_initiate_recovery(nmp->nm_clp);
1315 		if (lckp != NULL)
1316 			nfscl_lockderef(lckp);
1317 		if (!openerr)
1318 			(void) nfsrpc_close(vp, 0, p);
1319 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1320 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1321 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1322 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1323 		} else if ((error == NFSERR_EXPIRED ||
1324 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1325 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1326 		}
1327 		retrycnt++;
1328 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1329 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1330 	    error == NFSERR_BADSESSION ||
1331 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1332 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1333 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1334 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1335 	     retrycnt < 4));
1336 	if (error && retrycnt >= 4)
1337 		error = EIO;
1338 	return (error);
1339 }
1340 
1341 static int
1342 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1343     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1344     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1345 {
1346 	u_int32_t *tl;
1347 	struct nfsrv_descript nfsd, *nd = &nfsd;
1348 	int error;
1349 	nfsattrbit_t attrbits;
1350 
1351 	*attrflagp = 0;
1352 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1353 	if (nd->nd_flag & ND_NFSV4)
1354 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1355 	vap->va_type = vnode_vtype(vp);
1356 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1357 	if (nd->nd_flag & ND_NFSV3) {
1358 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1359 		*tl = newnfs_false;
1360 	} else if (nd->nd_flag & ND_NFSV4) {
1361 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1362 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1363 		NFSGETATTR_ATTRBIT(&attrbits);
1364 		(void) nfsrv_putattrbit(nd, &attrbits);
1365 	}
1366 	error = nfscl_request(nd, vp, p, cred, stuff);
1367 	if (error)
1368 		return (error);
1369 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1370 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1371 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1372 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1373 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1374 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1375 	m_freem(nd->nd_mrep);
1376 	if (nd->nd_repstat && !error)
1377 		error = nd->nd_repstat;
1378 	return (error);
1379 }
1380 
1381 /*
1382  * nfs lookup rpc
1383  */
1384 int
1385 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1386     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1387     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff,
1388     uint32_t openmode)
1389 {
1390 	uint32_t deleg, rflags, *tl;
1391 	struct nfsrv_descript nfsd, *nd = &nfsd;
1392 	struct nfsmount *nmp;
1393 	struct nfsnode *np;
1394 	struct nfsfh *nfhp;
1395 	nfsattrbit_t attrbits;
1396 	int error = 0, lookupp = 0, newone, ret, retop;
1397 	uint8_t own[NFSV4CL_LOCKNAMELEN];
1398 	struct nfsclopen *op;
1399 	struct nfscldeleg *ndp;
1400 	nfsv4stateid_t stateid;
1401 
1402 	*attrflagp = 0;
1403 	*dattrflagp = 0;
1404 	if (vnode_vtype(dvp) != VDIR)
1405 		return (ENOTDIR);
1406 	nmp = VFSTONFS(dvp->v_mount);
1407 	if (len > NFS_MAXNAMLEN)
1408 		return (ENAMETOOLONG);
1409 	if (NFSHASNFSV4(nmp) && len == 1 &&
1410 		name[0] == '.') {
1411 		/*
1412 		 * Just return the current dir's fh.
1413 		 */
1414 		np = VTONFS(dvp);
1415 		nfhp = malloc(sizeof (struct nfsfh) +
1416 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1417 		nfhp->nfh_len = np->n_fhp->nfh_len;
1418 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1419 		*nfhpp = nfhp;
1420 		return (0);
1421 	}
1422 	if (NFSHASNFSV4(nmp) && len == 2 &&
1423 		name[0] == '.' && name[1] == '.') {
1424 		lookupp = 1;
1425 		openmode = 0;
1426 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1427 	} else if (openmode != 0) {
1428 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp);
1429 		nfsm_strtom(nd, name, len);
1430 	} else {
1431 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1432 		(void) nfsm_strtom(nd, name, len);
1433 	}
1434 	if (nd->nd_flag & ND_NFSV4) {
1435 		NFSGETATTR_ATTRBIT(&attrbits);
1436 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1437 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1438 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1439 		(void) nfsrv_putattrbit(nd, &attrbits);
1440 		if (openmode != 0) {
1441 			/* Test for a VREG file. */
1442 			NFSZERO_ATTRBIT(&attrbits);
1443 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1444 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1445 			*tl = txdr_unsigned(NFSV4OP_VERIFY);
1446 			nfsrv_putattrbit(nd, &attrbits);
1447 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1448 			*tl++ = txdr_unsigned(NFSX_UNSIGNED);
1449 			*tl = vtonfsv34_type(VREG);
1450 
1451 			/* Attempt the Open for VREG. */
1452 			nfscl_filllockowner(NULL, own, F_POSIX);
1453 			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1454 			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
1455 			*tl++ = 0;		/* seqid, ignored. */
1456 			*tl++ = txdr_unsigned(openmode);
1457 			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1458 			*tl++ = 0;		/* ClientID, ignored. */
1459 			*tl = 0;
1460 			nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1461 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1462 			*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1463 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1464 		}
1465 	}
1466 	error = nfscl_request(nd, dvp, p, cred, stuff);
1467 	if (error)
1468 		return (error);
1469 	ndp = NULL;
1470 	if (nd->nd_repstat) {
1471 		/*
1472 		 * When an NFSv4 Lookupp returns ENOENT, it means that
1473 		 * the lookup is at the root of an fs, so return this dir.
1474 		 */
1475 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1476 		    np = VTONFS(dvp);
1477 		    nfhp = malloc(sizeof (struct nfsfh) +
1478 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1479 		    nfhp->nfh_len = np->n_fhp->nfh_len;
1480 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1481 		    *nfhpp = nfhp;
1482 		    m_freem(nd->nd_mrep);
1483 		    return (0);
1484 		}
1485 		if (nd->nd_flag & ND_NFSV3)
1486 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1487 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1488 		    ND_NFSV4) {
1489 			/* Load the directory attributes. */
1490 			error = nfsm_loadattr(nd, dnap);
1491 			if (error == 0)
1492 				*dattrflagp = 1;
1493 			else
1494 				goto nfsmout;
1495 		}
1496 		/* Check Lookup operation reply status. */
1497 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1498 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1499 			if (*++tl != 0)
1500 				goto nfsmout;
1501 		}
1502 		/* Look for GetFH reply. */
1503 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1504 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1505 			if (*++tl != 0)
1506 				goto nfsmout;
1507 			error = nfsm_getfh(nd, nfhpp);
1508 			if (error)
1509 				goto nfsmout;
1510 		}
1511 		/* Look for Getattr reply. */
1512 		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1513 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1514 			if (*++tl != 0)
1515 				goto nfsmout;
1516 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1517 			if (error == 0)
1518 				/* Successfully got Lookup done. */
1519 				nd->nd_repstat = 0;
1520 		}
1521 		goto nfsmout;
1522 	}
1523 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1524 		/* Load the directory attributes. */
1525 		error = nfsm_loadattr(nd, dnap);
1526 		if (error != 0)
1527 			goto nfsmout;
1528 		*dattrflagp = 1;
1529 		/* Skip over the Lookup and GetFH operation status values. */
1530 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1531 	}
1532 	error = nfsm_getfh(nd, nfhpp);
1533 	if (error)
1534 		goto nfsmout;
1535 
1536 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1537 	if (openmode != 0 && error == 0) {
1538 		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1539 		    10 * NFSX_UNSIGNED);
1540 		tl += 4;	/* Skip over Verify+Open status. */
1541 		stateid.seqid = *tl++;
1542 		stateid.other[0] = *tl++;
1543 		stateid.other[1] = *tl++;
1544 		stateid.other[2] = *tl;
1545 		rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1546 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1547 		if (error != 0)
1548 			goto nfsmout;
1549 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1550 		deleg = fxdr_unsigned(uint32_t, *tl);
1551 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
1552 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
1553 			/*
1554 			 * Just need to fill in the fields used by
1555 			 * nfscl_trydelegreturn().
1556 			 * Mark the mount point as acquiring
1557 			 * delegations, so NFSPROC_LOOKUPOPEN will
1558 			 * no longer be done.
1559 			 */
1560 			NFSLOCKMNT(nmp);
1561 			nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1562 			NFSUNLOCKMNT(nmp);
1563 			ndp = malloc(sizeof(struct nfscldeleg) +
1564 			    (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1565 			ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1566 			NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1567 			    ndp->nfsdl_fhlen);
1568 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
1569 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1570 			ndp->nfsdl_stateid.seqid = *tl++;
1571 			ndp->nfsdl_stateid.other[0] = *tl++;
1572 			ndp->nfsdl_stateid.other[1] = *tl++;
1573 			ndp->nfsdl_stateid.other[2] = *tl++;
1574 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
1575 			error = NFSERR_BADXDR;
1576 			goto nfsmout;
1577 		}
1578 		ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1579 		    openmode, 0, cred, p, NULL, &op, &newone, &retop, 1);
1580 		if (ret != 0)
1581 			goto nfsmout;
1582 		if (newone != 0) {
1583 			op->nfso_stateid.seqid = stateid.seqid;
1584 			op->nfso_stateid.other[0] = stateid.other[0];
1585 			op->nfso_stateid.other[1] = stateid.other[1];
1586 			op->nfso_stateid.other[2] = stateid.other[2];
1587 			op->nfso_mode = openmode;
1588 		} else {
1589 			op->nfso_stateid.seqid = stateid.seqid;
1590 			if (retop == NFSCLOPEN_DOOPEN)
1591 				op->nfso_mode |= openmode;
1592 		}
1593 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1594 		    nfscl_assumeposixlocks)
1595 			op->nfso_posixlock = 1;
1596 		else
1597 			op->nfso_posixlock = 0;
1598 		nfscl_openrelease(nmp, op, 0, 0);
1599 		if (ndp != NULL) {
1600 			/*
1601 			 * Since we do not have the vnode, we
1602 			 * cannot invalidate cached attributes.
1603 			 * Just return the delegation.
1604 			 */
1605 			nfscl_trydelegreturn(ndp, cred, nmp, p);
1606 		}
1607 	}
1608 	if ((nd->nd_flag & ND_NFSV3) && !error)
1609 		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1610 nfsmout:
1611 	m_freem(nd->nd_mrep);
1612 	if (!error && nd->nd_repstat)
1613 		error = nd->nd_repstat;
1614 	free(ndp, M_NFSCLDELEG);
1615 	return (error);
1616 }
1617 
1618 /*
1619  * Do a readlink rpc.
1620  */
1621 int
1622 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1623     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1624 {
1625 	u_int32_t *tl;
1626 	struct nfsrv_descript nfsd, *nd = &nfsd;
1627 	struct nfsnode *np = VTONFS(vp);
1628 	nfsattrbit_t attrbits;
1629 	int error, len, cangetattr = 1;
1630 
1631 	*attrflagp = 0;
1632 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1633 	if (nd->nd_flag & ND_NFSV4) {
1634 		/*
1635 		 * And do a Getattr op.
1636 		 */
1637 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1638 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1639 		NFSGETATTR_ATTRBIT(&attrbits);
1640 		(void) nfsrv_putattrbit(nd, &attrbits);
1641 	}
1642 	error = nfscl_request(nd, vp, p, cred, stuff);
1643 	if (error)
1644 		return (error);
1645 	if (nd->nd_flag & ND_NFSV3)
1646 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1647 	if (!nd->nd_repstat && !error) {
1648 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1649 		/*
1650 		 * This seems weird to me, but must have been added to
1651 		 * FreeBSD for some reason. The only thing I can think of
1652 		 * is that there was/is some server that replies with
1653 		 * more link data than it should?
1654 		 */
1655 		if (len == NFS_MAXPATHLEN) {
1656 			NFSLOCKNODE(np);
1657 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1658 				len = np->n_size;
1659 				cangetattr = 0;
1660 			}
1661 			NFSUNLOCKNODE(np);
1662 		}
1663 		error = nfsm_mbufuio(nd, uiop, len);
1664 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1665 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1666 	}
1667 	if (nd->nd_repstat && !error)
1668 		error = nd->nd_repstat;
1669 nfsmout:
1670 	m_freem(nd->nd_mrep);
1671 	return (error);
1672 }
1673 
1674 /*
1675  * Read operation.
1676  */
1677 int
1678 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1679     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1680 {
1681 	int error, expireret = 0, retrycnt;
1682 	u_int32_t clidrev = 0;
1683 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1684 	struct nfsnode *np = VTONFS(vp);
1685 	struct ucred *newcred;
1686 	struct nfsfh *nfhp = NULL;
1687 	nfsv4stateid_t stateid;
1688 	void *lckp;
1689 
1690 	if (nmp->nm_clp != NULL)
1691 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1692 	newcred = cred;
1693 	if (NFSHASNFSV4(nmp)) {
1694 		nfhp = np->n_fhp;
1695 		newcred = NFSNEWCRED(cred);
1696 	}
1697 	retrycnt = 0;
1698 	do {
1699 		lckp = NULL;
1700 		if (NFSHASNFSV4(nmp))
1701 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1702 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1703 			    &lckp);
1704 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1705 		    attrflagp, stuff);
1706 		if (error == NFSERR_OPENMODE) {
1707 			NFSLOCKMNT(nmp);
1708 			nmp->nm_state |= NFSSTA_OPENMODE;
1709 			NFSUNLOCKMNT(nmp);
1710 		}
1711 		if (error == NFSERR_STALESTATEID)
1712 			nfscl_initiate_recovery(nmp->nm_clp);
1713 		if (lckp != NULL)
1714 			nfscl_lockderef(lckp);
1715 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1716 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1717 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1718 			(void) nfs_catnap(PZERO, error, "nfs_read");
1719 		} else if ((error == NFSERR_EXPIRED ||
1720 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1721 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1722 		}
1723 		retrycnt++;
1724 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1725 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1726 	    error == NFSERR_BADSESSION ||
1727 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1728 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1729 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1730 	    (error == NFSERR_OPENMODE && retrycnt < 4));
1731 	if (error && retrycnt >= 4)
1732 		error = EIO;
1733 	if (NFSHASNFSV4(nmp))
1734 		NFSFREECRED(newcred);
1735 	return (error);
1736 }
1737 
1738 /*
1739  * The actual read RPC.
1740  */
1741 static int
1742 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1743     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1744     int *attrflagp, void *stuff)
1745 {
1746 	u_int32_t *tl;
1747 	int error = 0, len, retlen, tsiz, eof = 0;
1748 	struct nfsrv_descript nfsd;
1749 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1750 	struct nfsrv_descript *nd = &nfsd;
1751 	int rsize;
1752 	off_t tmp_off;
1753 
1754 	*attrflagp = 0;
1755 	tsiz = uiop->uio_resid;
1756 	tmp_off = uiop->uio_offset + tsiz;
1757 	NFSLOCKMNT(nmp);
1758 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1759 		NFSUNLOCKMNT(nmp);
1760 		return (EFBIG);
1761 	}
1762 	rsize = nmp->nm_rsize;
1763 	NFSUNLOCKMNT(nmp);
1764 	nd->nd_mrep = NULL;
1765 	while (tsiz > 0) {
1766 		*attrflagp = 0;
1767 		len = (tsiz > rsize) ? rsize : tsiz;
1768 		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1769 		if (nd->nd_flag & ND_NFSV4)
1770 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1771 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1772 		if (nd->nd_flag & ND_NFSV2) {
1773 			*tl++ = txdr_unsigned(uiop->uio_offset);
1774 			*tl++ = txdr_unsigned(len);
1775 			*tl = 0;
1776 		} else {
1777 			txdr_hyper(uiop->uio_offset, tl);
1778 			*(tl + 2) = txdr_unsigned(len);
1779 		}
1780 		/*
1781 		 * Since I can't do a Getattr for NFSv4 for Write, there
1782 		 * doesn't seem any point in doing one here, either.
1783 		 * (See the comment in nfsrpc_writerpc() for more info.)
1784 		 */
1785 		error = nfscl_request(nd, vp, p, cred, stuff);
1786 		if (error)
1787 			return (error);
1788 		if (nd->nd_flag & ND_NFSV3) {
1789 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1790 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1791 			error = nfsm_loadattr(nd, nap);
1792 			if (!error)
1793 				*attrflagp = 1;
1794 		}
1795 		if (nd->nd_repstat || error) {
1796 			if (!error)
1797 				error = nd->nd_repstat;
1798 			goto nfsmout;
1799 		}
1800 		if (nd->nd_flag & ND_NFSV3) {
1801 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1802 			eof = fxdr_unsigned(int, *(tl + 1));
1803 		} else if (nd->nd_flag & ND_NFSV4) {
1804 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1805 			eof = fxdr_unsigned(int, *tl);
1806 		}
1807 		NFSM_STRSIZ(retlen, len);
1808 		error = nfsm_mbufuio(nd, uiop, retlen);
1809 		if (error)
1810 			goto nfsmout;
1811 		m_freem(nd->nd_mrep);
1812 		nd->nd_mrep = NULL;
1813 		tsiz -= retlen;
1814 		if (!(nd->nd_flag & ND_NFSV2)) {
1815 			if (eof || retlen == 0)
1816 				tsiz = 0;
1817 		} else if (retlen < len)
1818 			tsiz = 0;
1819 	}
1820 	return (0);
1821 nfsmout:
1822 	if (nd->nd_mrep != NULL)
1823 		m_freem(nd->nd_mrep);
1824 	return (error);
1825 }
1826 
1827 /*
1828  * nfs write operation
1829  * When called_from_strategy != 0, it should return EIO for an error that
1830  * indicates recovery is in progress, so that the buffer will be left
1831  * dirty and be written back to the server later. If it loops around,
1832  * the recovery thread could get stuck waiting for the buffer and recovery
1833  * will then deadlock.
1834  */
1835 int
1836 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1837     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1838     void *stuff, int called_from_strategy)
1839 {
1840 	int error, expireret = 0, retrycnt, nostateid;
1841 	u_int32_t clidrev = 0;
1842 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1843 	struct nfsnode *np = VTONFS(vp);
1844 	struct ucred *newcred;
1845 	struct nfsfh *nfhp = NULL;
1846 	nfsv4stateid_t stateid;
1847 	void *lckp;
1848 
1849 	*must_commit = 0;
1850 	if (nmp->nm_clp != NULL)
1851 		clidrev = nmp->nm_clp->nfsc_clientidrev;
1852 	newcred = cred;
1853 	if (NFSHASNFSV4(nmp)) {
1854 		newcred = NFSNEWCRED(cred);
1855 		nfhp = np->n_fhp;
1856 	}
1857 	retrycnt = 0;
1858 	do {
1859 		lckp = NULL;
1860 		nostateid = 0;
1861 		if (NFSHASNFSV4(nmp)) {
1862 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1863 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1864 			    &lckp);
1865 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1866 			    stateid.other[2] == 0) {
1867 				nostateid = 1;
1868 				NFSCL_DEBUG(1, "stateid0 in write\n");
1869 			}
1870 		}
1871 
1872 		/*
1873 		 * If there is no stateid for NFSv4, it means this is an
1874 		 * extraneous write after close. Basically a poorly
1875 		 * implemented buffer cache. Just don't do the write.
1876 		 */
1877 		if (nostateid)
1878 			error = 0;
1879 		else
1880 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1881 			    newcred, &stateid, p, nap, attrflagp, stuff);
1882 		if (error == NFSERR_STALESTATEID)
1883 			nfscl_initiate_recovery(nmp->nm_clp);
1884 		if (lckp != NULL)
1885 			nfscl_lockderef(lckp);
1886 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1887 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1888 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1889 			(void) nfs_catnap(PZERO, error, "nfs_write");
1890 		} else if ((error == NFSERR_EXPIRED ||
1891 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1892 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1893 		}
1894 		retrycnt++;
1895 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1896 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1897 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1898 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1899 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1900 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1901 	if (error != 0 && (retrycnt >= 4 ||
1902 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1903 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1904 		error = EIO;
1905 	if (NFSHASNFSV4(nmp))
1906 		NFSFREECRED(newcred);
1907 	return (error);
1908 }
1909 
1910 /*
1911  * The actual write RPC.
1912  */
1913 static int
1914 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1915     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1916     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1917 {
1918 	u_int32_t *tl;
1919 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1920 	struct nfsnode *np = VTONFS(vp);
1921 	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1922 	int wccflag = 0, wsize;
1923 	int32_t backup;
1924 	struct nfsrv_descript nfsd;
1925 	struct nfsrv_descript *nd = &nfsd;
1926 	nfsattrbit_t attrbits;
1927 	off_t tmp_off;
1928 
1929 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1930 	*attrflagp = 0;
1931 	tsiz = uiop->uio_resid;
1932 	tmp_off = uiop->uio_offset + tsiz;
1933 	NFSLOCKMNT(nmp);
1934 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1935 		NFSUNLOCKMNT(nmp);
1936 		return (EFBIG);
1937 	}
1938 	wsize = nmp->nm_wsize;
1939 	NFSUNLOCKMNT(nmp);
1940 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1941 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1942 	while (tsiz > 0) {
1943 		*attrflagp = 0;
1944 		len = (tsiz > wsize) ? wsize : tsiz;
1945 		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1946 		if (nd->nd_flag & ND_NFSV4) {
1947 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1948 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1949 			txdr_hyper(uiop->uio_offset, tl);
1950 			tl += 2;
1951 			*tl++ = txdr_unsigned(*iomode);
1952 			*tl = txdr_unsigned(len);
1953 		} else if (nd->nd_flag & ND_NFSV3) {
1954 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1955 			txdr_hyper(uiop->uio_offset, tl);
1956 			tl += 2;
1957 			*tl++ = txdr_unsigned(len);
1958 			*tl++ = txdr_unsigned(*iomode);
1959 			*tl = txdr_unsigned(len);
1960 		} else {
1961 			u_int32_t x;
1962 
1963 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1964 			/*
1965 			 * Not sure why someone changed this, since the
1966 			 * RFC clearly states that "beginoffset" and
1967 			 * "totalcount" are ignored, but it wouldn't
1968 			 * surprise me if there's a busted server out there.
1969 			 */
1970 			/* Set both "begin" and "current" to non-garbage. */
1971 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1972 			*tl++ = x;      /* "begin offset" */
1973 			*tl++ = x;      /* "current offset" */
1974 			x = txdr_unsigned(len);
1975 			*tl++ = x;      /* total to this offset */
1976 			*tl = x;        /* size of this write */
1977 		}
1978 		nfsm_uiombuf(nd, uiop, len);
1979 		/*
1980 		 * Although it is tempting to do a normal Getattr Op in the
1981 		 * NFSv4 compound, the result can be a nearly hung client
1982 		 * system if the Getattr asks for Owner and/or OwnerGroup.
1983 		 * It occurs when the client can't map either the Owner or
1984 		 * Owner_group name in the Getattr reply to a uid/gid. When
1985 		 * there is a cache miss, the kernel does an upcall to the
1986 		 * nfsuserd. Then, it can try and read the local /etc/passwd
1987 		 * or /etc/group file. It can then block in getnewbuf(),
1988 		 * waiting for dirty writes to be pushed to the NFS server.
1989 		 * The only reason this doesn't result in a complete
1990 		 * deadlock, is that the upcall times out and allows
1991 		 * the write to complete. However, progress is so slow
1992 		 * that it might just as well be deadlocked.
1993 		 * As such, we get the rest of the attributes, but not
1994 		 * Owner or Owner_group.
1995 		 * nb: nfscl_loadattrcache() needs to be told that these
1996 		 *     partial attributes from a write rpc are being
1997 		 *     passed in, via a argument flag.
1998 		 */
1999 		if (nd->nd_flag & ND_NFSV4) {
2000 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
2001 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2002 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2003 			(void) nfsrv_putattrbit(nd, &attrbits);
2004 		}
2005 		error = nfscl_request(nd, vp, p, cred, stuff);
2006 		if (error)
2007 			return (error);
2008 		if (nd->nd_repstat) {
2009 			/*
2010 			 * In case the rpc gets retried, roll
2011 			 * the uio fields changed by nfsm_uiombuf()
2012 			 * back.
2013 			 */
2014 			uiop->uio_offset -= len;
2015 			uiop->uio_resid += len;
2016 			uiop->uio_iov->iov_base =
2017 			    (char *)uiop->uio_iov->iov_base - len;
2018 			uiop->uio_iov->iov_len += len;
2019 		}
2020 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2021 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2022 			    &wccflag, stuff);
2023 			if (error)
2024 				goto nfsmout;
2025 		}
2026 		if (!nd->nd_repstat) {
2027 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2028 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2029 					+ NFSX_VERF);
2030 				rlen = fxdr_unsigned(int, *tl++);
2031 				if (rlen == 0) {
2032 					error = NFSERR_IO;
2033 					goto nfsmout;
2034 				} else if (rlen < len) {
2035 					backup = len - rlen;
2036 					uiop->uio_iov->iov_base =
2037 					    (char *)uiop->uio_iov->iov_base -
2038 					    backup;
2039 					uiop->uio_iov->iov_len += backup;
2040 					uiop->uio_offset -= backup;
2041 					uiop->uio_resid += backup;
2042 					len = rlen;
2043 				}
2044 				commit = fxdr_unsigned(int, *tl++);
2045 
2046 				/*
2047 				 * Return the lowest commitment level
2048 				 * obtained by any of the RPCs.
2049 				 */
2050 				if (committed == NFSWRITE_FILESYNC)
2051 					committed = commit;
2052 				else if (committed == NFSWRITE_DATASYNC &&
2053 					commit == NFSWRITE_UNSTABLE)
2054 					committed = commit;
2055 				NFSLOCKMNT(nmp);
2056 				if (!NFSHASWRITEVERF(nmp)) {
2057 					NFSBCOPY((caddr_t)tl,
2058 					    (caddr_t)&nmp->nm_verf[0],
2059 					    NFSX_VERF);
2060 					NFSSETWRITEVERF(nmp);
2061 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
2062 				    NFSX_VERF)) {
2063 					*must_commit = 1;
2064 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2065 				}
2066 				NFSUNLOCKMNT(nmp);
2067 			}
2068 			if (nd->nd_flag & ND_NFSV4)
2069 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2070 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2071 				error = nfsm_loadattr(nd, nap);
2072 				if (!error)
2073 					*attrflagp = NFS_LATTR_NOSHRINK;
2074 			}
2075 		} else {
2076 			error = nd->nd_repstat;
2077 		}
2078 		if (error)
2079 			goto nfsmout;
2080 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2081 		m_freem(nd->nd_mrep);
2082 		nd->nd_mrep = NULL;
2083 		tsiz -= len;
2084 	}
2085 nfsmout:
2086 	if (nd->nd_mrep != NULL)
2087 		m_freem(nd->nd_mrep);
2088 	*iomode = committed;
2089 	if (nd->nd_repstat && !error)
2090 		error = nd->nd_repstat;
2091 	return (error);
2092 }
2093 
2094 /*
2095  * Do an nfs deallocate operation.
2096  */
2097 int
2098 nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2099     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
2100 {
2101 	int error, expireret = 0, openerr, retrycnt;
2102 	uint32_t clidrev = 0;
2103 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2104 	struct nfsfh *nfhp;
2105 	nfsv4stateid_t stateid;
2106 	void *lckp;
2107 
2108 	if (nmp->nm_clp != NULL)
2109 		clidrev = nmp->nm_clp->nfsc_clientidrev;
2110 	retrycnt = 0;
2111 	do {
2112 		lckp = NULL;
2113 		openerr = 1;
2114 		nfhp = VTONFS(vp)->n_fhp;
2115 		error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2116 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2117 		if (error != 0) {
2118 			/*
2119 			 * No Open stateid, so try and open the file
2120 			 * now.
2121 			 */
2122 			openerr = nfsrpc_open(vp, FWRITE, cred, p);
2123 			if (openerr == 0)
2124 				nfscl_getstateid(vp, nfhp->nfh_fh,
2125 				    nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2126 				    cred, p, &stateid, &lckp);
2127 		}
2128 		error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2129 		    attrflagp, cred, p, stuff);
2130 		if (error == NFSERR_STALESTATEID)
2131 			nfscl_initiate_recovery(nmp->nm_clp);
2132 		if (lckp != NULL)
2133 			nfscl_lockderef(lckp);
2134 		if (openerr == 0)
2135 			nfsrpc_close(vp, 0, p);
2136 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2137 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2138 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2139 			(void) nfs_catnap(PZERO, error, "nfs_deallocate");
2140 		} else if ((error == NFSERR_EXPIRED ||
2141 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2142 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2143 		}
2144 		retrycnt++;
2145 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2146 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2147 	    error == NFSERR_BADSESSION ||
2148 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2149 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2150 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2151 	if (error && retrycnt >= 4)
2152 		error = EIO;
2153 	return (error);
2154 }
2155 
2156 /*
2157  * The actual deallocate RPC.
2158  */
2159 static int
2160 nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2161     nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2162     struct ucred *cred, NFSPROC_T *p, void *stuff)
2163 {
2164 	uint32_t *tl;
2165 	struct nfsnode *np = VTONFS(vp);
2166 	int error, wccflag;
2167 	struct nfsrv_descript nfsd;
2168 	struct nfsrv_descript *nd = &nfsd;
2169 	nfsattrbit_t attrbits;
2170 
2171 	*attrflagp = 0;
2172 	NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp);
2173 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2174 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2175 	txdr_hyper(offs, tl);
2176 	tl += 2;
2177 	txdr_hyper(len, tl);
2178 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
2179 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2180 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2181 	nfsrv_putattrbit(nd, &attrbits);
2182 	error = nfscl_request(nd, vp, p, cred, stuff);
2183 	if (error != 0)
2184 		return (error);
2185 	wccflag = 0;
2186 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff);
2187 	if (error != 0)
2188 		goto nfsmout;
2189 	if (nd->nd_repstat == 0) {
2190 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2191 		error = nfsm_loadattr(nd, nap);
2192 		if (error != 0)
2193 			goto nfsmout;
2194 		*attrflagp = NFS_LATTR_NOSHRINK;
2195 	}
2196 	NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2197 nfsmout:
2198 	m_freem(nd->nd_mrep);
2199 	if (nd->nd_repstat != 0 && error == 0)
2200 		error = nd->nd_repstat;
2201 	return (error);
2202 }
2203 
2204 /*
2205  * nfs mknod rpc
2206  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2207  * mode set to specify the file type and the size field for rdev.
2208  */
2209 int
2210 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2211     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
2212     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2213     int *attrflagp, int *dattrflagp, void *dstuff)
2214 {
2215 	u_int32_t *tl;
2216 	int error = 0;
2217 	struct nfsrv_descript nfsd, *nd = &nfsd;
2218 	nfsattrbit_t attrbits;
2219 
2220 	*nfhpp = NULL;
2221 	*attrflagp = 0;
2222 	*dattrflagp = 0;
2223 	if (namelen > NFS_MAXNAMLEN)
2224 		return (ENAMETOOLONG);
2225 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
2226 	if (nd->nd_flag & ND_NFSV4) {
2227 		if (vtyp == VBLK || vtyp == VCHR) {
2228 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2229 			*tl++ = vtonfsv34_type(vtyp);
2230 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2231 			*tl = txdr_unsigned(NFSMINOR(rdev));
2232 		} else {
2233 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2234 			*tl = vtonfsv34_type(vtyp);
2235 		}
2236 	}
2237 	(void) nfsm_strtom(nd, name, namelen);
2238 	if (nd->nd_flag & ND_NFSV3) {
2239 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2240 		*tl = vtonfsv34_type(vtyp);
2241 	}
2242 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2243 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2244 	if ((nd->nd_flag & ND_NFSV3) &&
2245 	    (vtyp == VCHR || vtyp == VBLK)) {
2246 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2247 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2248 		*tl = txdr_unsigned(NFSMINOR(rdev));
2249 	}
2250 	if (nd->nd_flag & ND_NFSV4) {
2251 		NFSGETATTR_ATTRBIT(&attrbits);
2252 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2253 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2254 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2255 		(void) nfsrv_putattrbit(nd, &attrbits);
2256 	}
2257 	if (nd->nd_flag & ND_NFSV2)
2258 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2259 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2260 	if (error)
2261 		return (error);
2262 	if (nd->nd_flag & ND_NFSV4)
2263 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2264 	if (!nd->nd_repstat) {
2265 		if (nd->nd_flag & ND_NFSV4) {
2266 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2267 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2268 			if (error)
2269 				goto nfsmout;
2270 		}
2271 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2272 		if (error)
2273 			goto nfsmout;
2274 	}
2275 	if (nd->nd_flag & ND_NFSV3)
2276 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2277 	if (!error && nd->nd_repstat)
2278 		error = nd->nd_repstat;
2279 nfsmout:
2280 	m_freem(nd->nd_mrep);
2281 	return (error);
2282 }
2283 
2284 /*
2285  * nfs file create call
2286  * Mostly just call the approriate routine. (I separated out v4, so that
2287  * error recovery wouldn't be as difficult.)
2288  */
2289 int
2290 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2291     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2292     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2293     int *attrflagp, int *dattrflagp, void *dstuff)
2294 {
2295 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
2296 	struct nfsclowner *owp;
2297 	struct nfscldeleg *dp;
2298 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2299 	u_int32_t clidrev;
2300 
2301 	if (NFSHASNFSV4(nmp)) {
2302 	    retrycnt = 0;
2303 	    do {
2304 		dp = NULL;
2305 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2306 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2307 		    NULL, 1);
2308 		if (error)
2309 			return (error);
2310 		if (nmp->nm_clp != NULL)
2311 			clidrev = nmp->nm_clp->nfsc_clientidrev;
2312 		else
2313 			clidrev = 0;
2314 		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2315 		    nfs_numnfscbd == 0 || retrycnt > 0)
2316 			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2317 			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2318 			  attrflagp, dattrflagp, dstuff, &unlocked);
2319 		else
2320 			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2321 			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2322 			  attrflagp, dattrflagp, dstuff, &unlocked);
2323 		/*
2324 		 * There is no need to invalidate cached attributes here,
2325 		 * since new post-delegation issue attributes are always
2326 		 * returned by nfsrpc_createv4() and these will update the
2327 		 * attribute cache.
2328 		 */
2329 		if (dp != NULL)
2330 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2331 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2332 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2333 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2334 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2335 		    error == NFSERR_BADSESSION) {
2336 			(void) nfs_catnap(PZERO, error, "nfs_open");
2337 		} else if ((error == NFSERR_EXPIRED ||
2338 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2339 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2340 			retrycnt++;
2341 		}
2342 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2343 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2344 		error == NFSERR_BADSESSION ||
2345 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2346 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
2347 	    if (error && retrycnt >= 4)
2348 		    error = EIO;
2349 	} else {
2350 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2351 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2352 		    dstuff);
2353 	}
2354 	return (error);
2355 }
2356 
2357 /*
2358  * The create rpc for v2 and 3.
2359  */
2360 static int
2361 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2362     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2363     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2364     int *attrflagp, int *dattrflagp, void *dstuff)
2365 {
2366 	u_int32_t *tl;
2367 	int error = 0;
2368 	struct nfsrv_descript nfsd, *nd = &nfsd;
2369 
2370 	*nfhpp = NULL;
2371 	*attrflagp = 0;
2372 	*dattrflagp = 0;
2373 	if (namelen > NFS_MAXNAMLEN)
2374 		return (ENAMETOOLONG);
2375 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2376 	(void) nfsm_strtom(nd, name, namelen);
2377 	if (nd->nd_flag & ND_NFSV3) {
2378 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2379 		if (fmode & O_EXCL) {
2380 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2381 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2382 			*tl++ = cverf.lval[0];
2383 			*tl = cverf.lval[1];
2384 		} else {
2385 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2386 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2387 		}
2388 	} else {
2389 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2390 	}
2391 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2392 	if (error)
2393 		return (error);
2394 	if (nd->nd_repstat == 0) {
2395 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2396 		if (error)
2397 			goto nfsmout;
2398 	}
2399 	if (nd->nd_flag & ND_NFSV3)
2400 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2401 	if (nd->nd_repstat != 0 && error == 0)
2402 		error = nd->nd_repstat;
2403 nfsmout:
2404 	m_freem(nd->nd_mrep);
2405 	return (error);
2406 }
2407 
2408 static int
2409 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2410     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2411     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2412     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2413     int *dattrflagp, void *dstuff, int *unlockedp)
2414 {
2415 	u_int32_t *tl;
2416 	int error = 0, deleg, newone, ret, acesize, limitby;
2417 	struct nfsrv_descript nfsd, *nd = &nfsd;
2418 	struct nfsclopen *op;
2419 	struct nfscldeleg *dp = NULL;
2420 	struct nfsnode *np;
2421 	struct nfsfh *nfhp;
2422 	nfsattrbit_t attrbits;
2423 	nfsv4stateid_t stateid;
2424 	u_int32_t rflags;
2425 	struct nfsmount *nmp;
2426 	struct nfsclsession *tsep;
2427 
2428 	nmp = VFSTONFS(dvp->v_mount);
2429 	np = VTONFS(dvp);
2430 	*unlockedp = 0;
2431 	*nfhpp = NULL;
2432 	*dpp = NULL;
2433 	*attrflagp = 0;
2434 	*dattrflagp = 0;
2435 	if (namelen > NFS_MAXNAMLEN)
2436 		return (ENAMETOOLONG);
2437 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2438 	/*
2439 	 * For V4, this is actually an Open op.
2440 	 */
2441 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2442 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2443 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2444 	    NFSV4OPEN_ACCESSREAD);
2445 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2446 	tsep = nfsmnt_mdssession(nmp);
2447 	*tl++ = tsep->nfsess_clientid.lval[0];
2448 	*tl = tsep->nfsess_clientid.lval[1];
2449 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2450 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2451 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2452 	if (fmode & O_EXCL) {
2453 		if (NFSHASNFSV4N(nmp)) {
2454 			if (NFSHASSESSPERSIST(nmp)) {
2455 				/* Use GUARDED for persistent sessions. */
2456 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2457 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2458 			} else {
2459 				/* Otherwise, use EXCLUSIVE4_1. */
2460 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2461 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2462 				*tl++ = cverf.lval[0];
2463 				*tl = cverf.lval[1];
2464 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2465 			}
2466 		} else {
2467 			/* NFSv4.0 */
2468 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2469 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2470 			*tl++ = cverf.lval[0];
2471 			*tl = cverf.lval[1];
2472 		}
2473 	} else {
2474 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2475 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2476 	}
2477 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2478 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2479 	(void) nfsm_strtom(nd, name, namelen);
2480 	/* Get the new file's handle and attributes. */
2481 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2482 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2483 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2484 	NFSGETATTR_ATTRBIT(&attrbits);
2485 	(void) nfsrv_putattrbit(nd, &attrbits);
2486 	/* Get the directory's post-op attributes. */
2487 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2488 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2489 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2490 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2491 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2492 	(void) nfsrv_putattrbit(nd, &attrbits);
2493 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2494 	if (error)
2495 		return (error);
2496 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2497 	if (nd->nd_repstat == 0) {
2498 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2499 		    6 * NFSX_UNSIGNED);
2500 		stateid.seqid = *tl++;
2501 		stateid.other[0] = *tl++;
2502 		stateid.other[1] = *tl++;
2503 		stateid.other[2] = *tl;
2504 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2505 		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2506 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2507 		deleg = fxdr_unsigned(int, *tl);
2508 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2509 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2510 			if (!(owp->nfsow_clp->nfsc_flags &
2511 			      NFSCLFLAGS_FIRSTDELEG))
2512 				owp->nfsow_clp->nfsc_flags |=
2513 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2514 			dp = malloc(
2515 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2516 			    M_NFSCLDELEG, M_WAITOK);
2517 			LIST_INIT(&dp->nfsdl_owner);
2518 			LIST_INIT(&dp->nfsdl_lock);
2519 			dp->nfsdl_clp = owp->nfsow_clp;
2520 			newnfs_copyincred(cred, &dp->nfsdl_cred);
2521 			nfscl_lockinit(&dp->nfsdl_rwlock);
2522 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2523 			    NFSX_UNSIGNED);
2524 			dp->nfsdl_stateid.seqid = *tl++;
2525 			dp->nfsdl_stateid.other[0] = *tl++;
2526 			dp->nfsdl_stateid.other[1] = *tl++;
2527 			dp->nfsdl_stateid.other[2] = *tl++;
2528 			ret = fxdr_unsigned(int, *tl);
2529 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2530 				dp->nfsdl_flags = NFSCLDL_WRITE;
2531 				/*
2532 				 * Indicates how much the file can grow.
2533 				 */
2534 				NFSM_DISSECT(tl, u_int32_t *,
2535 				    3 * NFSX_UNSIGNED);
2536 				limitby = fxdr_unsigned(int, *tl++);
2537 				switch (limitby) {
2538 				case NFSV4OPEN_LIMITSIZE:
2539 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2540 					break;
2541 				case NFSV4OPEN_LIMITBLOCKS:
2542 					dp->nfsdl_sizelimit =
2543 					    fxdr_unsigned(u_int64_t, *tl++);
2544 					dp->nfsdl_sizelimit *=
2545 					    fxdr_unsigned(u_int64_t, *tl);
2546 					break;
2547 				default:
2548 					error = NFSERR_BADXDR;
2549 					goto nfsmout;
2550 				}
2551 			} else {
2552 				dp->nfsdl_flags = NFSCLDL_READ;
2553 			}
2554 			if (ret)
2555 				dp->nfsdl_flags |= NFSCLDL_RECALL;
2556 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2557 			    &acesize, p);
2558 			if (error)
2559 				goto nfsmout;
2560 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2561 			error = NFSERR_BADXDR;
2562 			goto nfsmout;
2563 		}
2564 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2565 		if (error)
2566 			goto nfsmout;
2567 		/* Get rid of the PutFH and Getattr status values. */
2568 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2569 		/* Load the directory attributes. */
2570 		error = nfsm_loadattr(nd, dnap);
2571 		if (error)
2572 			goto nfsmout;
2573 		*dattrflagp = 1;
2574 		if (dp != NULL && *attrflagp) {
2575 			dp->nfsdl_change = nnap->na_filerev;
2576 			dp->nfsdl_modtime = nnap->na_mtime;
2577 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2578 		}
2579 		/*
2580 		 * We can now complete the Open state.
2581 		 */
2582 		nfhp = *nfhpp;
2583 		if (dp != NULL) {
2584 			dp->nfsdl_fhlen = nfhp->nfh_len;
2585 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2586 		}
2587 		/*
2588 		 * Get an Open structure that will be
2589 		 * attached to the OpenOwner, acquired already.
2590 		 */
2591 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2592 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2593 		    cred, p, NULL, &op, &newone, NULL, 0);
2594 		if (error)
2595 			goto nfsmout;
2596 		op->nfso_stateid = stateid;
2597 		newnfs_copyincred(cred, &op->nfso_cred);
2598 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2599 		    do {
2600 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2601 			    nfhp->nfh_len, op, cred, p);
2602 			if (ret == NFSERR_DELAY)
2603 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2604 		    } while (ret == NFSERR_DELAY);
2605 		    error = ret;
2606 		}
2607 
2608 		/*
2609 		 * If the server is handing out delegations, but we didn't
2610 		 * get one because an OpenConfirm was required, try the
2611 		 * Open again, to get a delegation. This is a harmless no-op,
2612 		 * from a server's point of view.
2613 		 */
2614 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2615 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2616 		    !error && dp == NULL) {
2617 		    do {
2618 			ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2619 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2620 			    nfhp->nfh_fh, nfhp->nfh_len,
2621 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2622 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2623 			if (ret == NFSERR_DELAY)
2624 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2625 		    } while (ret == NFSERR_DELAY);
2626 		    if (ret) {
2627 			if (dp != NULL) {
2628 				free(dp, M_NFSCLDELEG);
2629 				dp = NULL;
2630 			}
2631 			if (ret == NFSERR_STALECLIENTID ||
2632 			    ret == NFSERR_STALEDONTRECOVER ||
2633 			    ret == NFSERR_BADSESSION)
2634 				error = ret;
2635 		    }
2636 		}
2637 		nfscl_openrelease(nmp, op, error, newone);
2638 		*unlockedp = 1;
2639 	}
2640 	if (nd->nd_repstat != 0 && error == 0)
2641 		error = nd->nd_repstat;
2642 	if (error == NFSERR_STALECLIENTID)
2643 		nfscl_initiate_recovery(owp->nfsow_clp);
2644 nfsmout:
2645 	if (!error)
2646 		*dpp = dp;
2647 	else if (dp != NULL)
2648 		free(dp, M_NFSCLDELEG);
2649 	m_freem(nd->nd_mrep);
2650 	return (error);
2651 }
2652 
2653 /*
2654  * Nfs remove rpc
2655  */
2656 int
2657 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2658     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2659     void *dstuff)
2660 {
2661 	u_int32_t *tl;
2662 	struct nfsrv_descript nfsd, *nd = &nfsd;
2663 	struct nfsnode *np;
2664 	struct nfsmount *nmp;
2665 	nfsv4stateid_t dstateid;
2666 	int error, ret = 0, i;
2667 
2668 	*dattrflagp = 0;
2669 	if (namelen > NFS_MAXNAMLEN)
2670 		return (ENAMETOOLONG);
2671 	nmp = VFSTONFS(dvp->v_mount);
2672 tryagain:
2673 	if (NFSHASNFSV4(nmp) && ret == 0) {
2674 		ret = nfscl_removedeleg(vp, p, &dstateid);
2675 		if (ret == 1) {
2676 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2677 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2678 			    NFSX_UNSIGNED);
2679 			if (NFSHASNFSV4N(nmp))
2680 				*tl++ = 0;
2681 			else
2682 				*tl++ = dstateid.seqid;
2683 			*tl++ = dstateid.other[0];
2684 			*tl++ = dstateid.other[1];
2685 			*tl++ = dstateid.other[2];
2686 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2687 			np = VTONFS(dvp);
2688 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2689 			    np->n_fhp->nfh_len, 0);
2690 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2691 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2692 		}
2693 	} else {
2694 		ret = 0;
2695 	}
2696 	if (ret == 0)
2697 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2698 	(void) nfsm_strtom(nd, name, namelen);
2699 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2700 	if (error)
2701 		return (error);
2702 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2703 		/* For NFSv4, parse out any Delereturn replies. */
2704 		if (ret > 0 && nd->nd_repstat != 0 &&
2705 		    (nd->nd_flag & ND_NOMOREDATA)) {
2706 			/*
2707 			 * If the Delegreturn failed, try again without
2708 			 * it. The server will Recall, as required.
2709 			 */
2710 			m_freem(nd->nd_mrep);
2711 			goto tryagain;
2712 		}
2713 		for (i = 0; i < (ret * 2); i++) {
2714 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2715 			    ND_NFSV4) {
2716 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2717 			    if (*(tl + 1))
2718 				nd->nd_flag |= ND_NOMOREDATA;
2719 			}
2720 		}
2721 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2722 	}
2723 	if (nd->nd_repstat && !error)
2724 		error = nd->nd_repstat;
2725 nfsmout:
2726 	m_freem(nd->nd_mrep);
2727 	return (error);
2728 }
2729 
2730 /*
2731  * Do an nfs rename rpc.
2732  */
2733 int
2734 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2735     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2736     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2737     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2738 {
2739 	u_int32_t *tl;
2740 	struct nfsrv_descript nfsd, *nd = &nfsd;
2741 	struct nfsmount *nmp;
2742 	struct nfsnode *np;
2743 	nfsattrbit_t attrbits;
2744 	nfsv4stateid_t fdstateid, tdstateid;
2745 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2746 
2747 	*fattrflagp = 0;
2748 	*tattrflagp = 0;
2749 	nmp = VFSTONFS(fdvp->v_mount);
2750 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2751 		return (ENAMETOOLONG);
2752 tryagain:
2753 	if (NFSHASNFSV4(nmp) && ret == 0) {
2754 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2755 		    &tdstateid, &gottd, p);
2756 		if (gotfd && gottd) {
2757 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2758 		} else if (gotfd) {
2759 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2760 		} else if (gottd) {
2761 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2762 		}
2763 		if (gotfd) {
2764 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2765 			if (NFSHASNFSV4N(nmp))
2766 				*tl++ = 0;
2767 			else
2768 				*tl++ = fdstateid.seqid;
2769 			*tl++ = fdstateid.other[0];
2770 			*tl++ = fdstateid.other[1];
2771 			*tl = fdstateid.other[2];
2772 			if (gottd) {
2773 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2774 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2775 				np = VTONFS(tvp);
2776 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2777 				    np->n_fhp->nfh_len, 0);
2778 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2779 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2780 			}
2781 		}
2782 		if (gottd) {
2783 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2784 			if (NFSHASNFSV4N(nmp))
2785 				*tl++ = 0;
2786 			else
2787 				*tl++ = tdstateid.seqid;
2788 			*tl++ = tdstateid.other[0];
2789 			*tl++ = tdstateid.other[1];
2790 			*tl = tdstateid.other[2];
2791 		}
2792 		if (ret > 0) {
2793 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2794 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2795 			np = VTONFS(fdvp);
2796 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2797 			    np->n_fhp->nfh_len, 0);
2798 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2799 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2800 		}
2801 	} else {
2802 		ret = 0;
2803 	}
2804 	if (ret == 0)
2805 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2806 	if (nd->nd_flag & ND_NFSV4) {
2807 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2808 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2809 		NFSWCCATTR_ATTRBIT(&attrbits);
2810 		(void) nfsrv_putattrbit(nd, &attrbits);
2811 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2812 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2813 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2814 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2815 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2816 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2817 		(void) nfsrv_putattrbit(nd, &attrbits);
2818 		nd->nd_flag |= ND_V4WCCATTR;
2819 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2820 		*tl = txdr_unsigned(NFSV4OP_RENAME);
2821 	}
2822 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2823 	if (!(nd->nd_flag & ND_NFSV4))
2824 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2825 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2826 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2827 	error = nfscl_request(nd, fdvp, p, cred, fstuff);
2828 	if (error)
2829 		return (error);
2830 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2831 		/* For NFSv4, parse out any Delereturn replies. */
2832 		if (ret > 0 && nd->nd_repstat != 0 &&
2833 		    (nd->nd_flag & ND_NOMOREDATA)) {
2834 			/*
2835 			 * If the Delegreturn failed, try again without
2836 			 * it. The server will Recall, as required.
2837 			 */
2838 			m_freem(nd->nd_mrep);
2839 			goto tryagain;
2840 		}
2841 		for (i = 0; i < (ret * 2); i++) {
2842 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2843 			    ND_NFSV4) {
2844 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2845 			    if (*(tl + 1)) {
2846 				if (i == 0 && ret > 1) {
2847 				    /*
2848 				     * If the Delegreturn failed, try again
2849 				     * without it. The server will Recall, as
2850 				     * required.
2851 				     * If ret > 1, the first iteration of this
2852 				     * loop is the second DelegReturn result.
2853 				     */
2854 				    m_freem(nd->nd_mrep);
2855 				    goto tryagain;
2856 				} else {
2857 				    nd->nd_flag |= ND_NOMOREDATA;
2858 				}
2859 			    }
2860 			}
2861 		}
2862 		/* Now, the first wcc attribute reply. */
2863 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2864 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2865 			if (*(tl + 1))
2866 				nd->nd_flag |= ND_NOMOREDATA;
2867 		}
2868 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2869 		    fstuff);
2870 		/* and the second wcc attribute reply. */
2871 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2872 		    !error) {
2873 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2874 			if (*(tl + 1))
2875 				nd->nd_flag |= ND_NOMOREDATA;
2876 		}
2877 		if (!error)
2878 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2879 			    NULL, tstuff);
2880 	}
2881 	if (nd->nd_repstat && !error)
2882 		error = nd->nd_repstat;
2883 nfsmout:
2884 	m_freem(nd->nd_mrep);
2885 	return (error);
2886 }
2887 
2888 /*
2889  * nfs hard link create rpc
2890  */
2891 int
2892 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2893     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2894     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2895 {
2896 	u_int32_t *tl;
2897 	struct nfsrv_descript nfsd, *nd = &nfsd;
2898 	nfsattrbit_t attrbits;
2899 	int error = 0;
2900 
2901 	*attrflagp = 0;
2902 	*dattrflagp = 0;
2903 	if (namelen > NFS_MAXNAMLEN)
2904 		return (ENAMETOOLONG);
2905 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2906 	if (nd->nd_flag & ND_NFSV4) {
2907 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2908 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2909 	}
2910 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2911 		VTONFS(dvp)->n_fhp->nfh_len, 0);
2912 	if (nd->nd_flag & ND_NFSV4) {
2913 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2914 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2915 		NFSWCCATTR_ATTRBIT(&attrbits);
2916 		(void) nfsrv_putattrbit(nd, &attrbits);
2917 		nd->nd_flag |= ND_V4WCCATTR;
2918 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2919 		*tl = txdr_unsigned(NFSV4OP_LINK);
2920 	}
2921 	(void) nfsm_strtom(nd, name, namelen);
2922 	error = nfscl_request(nd, vp, p, cred, dstuff);
2923 	if (error)
2924 		return (error);
2925 	if (nd->nd_flag & ND_NFSV3) {
2926 		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2927 		if (!error)
2928 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2929 			    NULL, dstuff);
2930 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2931 		/*
2932 		 * First, parse out the PutFH and Getattr result.
2933 		 */
2934 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2935 		if (!(*(tl + 1)))
2936 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2937 		if (*(tl + 1))
2938 			nd->nd_flag |= ND_NOMOREDATA;
2939 		/*
2940 		 * Get the pre-op attributes.
2941 		 */
2942 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2943 	}
2944 	if (nd->nd_repstat && !error)
2945 		error = nd->nd_repstat;
2946 nfsmout:
2947 	m_freem(nd->nd_mrep);
2948 	return (error);
2949 }
2950 
2951 /*
2952  * nfs symbolic link create rpc
2953  */
2954 int
2955 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
2956     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2957     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2958     int *dattrflagp, void *dstuff)
2959 {
2960 	u_int32_t *tl;
2961 	struct nfsrv_descript nfsd, *nd = &nfsd;
2962 	struct nfsmount *nmp;
2963 	int slen, error = 0;
2964 
2965 	*nfhpp = NULL;
2966 	*attrflagp = 0;
2967 	*dattrflagp = 0;
2968 	nmp = VFSTONFS(dvp->v_mount);
2969 	slen = strlen(target);
2970 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2971 		return (ENAMETOOLONG);
2972 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2973 	if (nd->nd_flag & ND_NFSV4) {
2974 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2975 		*tl = txdr_unsigned(NFLNK);
2976 		(void) nfsm_strtom(nd, target, slen);
2977 	}
2978 	(void) nfsm_strtom(nd, name, namelen);
2979 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2980 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2981 	if (!(nd->nd_flag & ND_NFSV4))
2982 		(void) nfsm_strtom(nd, target, slen);
2983 	if (nd->nd_flag & ND_NFSV2)
2984 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2985 	error = nfscl_request(nd, dvp, p, cred, dstuff);
2986 	if (error)
2987 		return (error);
2988 	if (nd->nd_flag & ND_NFSV4)
2989 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2990 	if ((nd->nd_flag & ND_NFSV3) && !error) {
2991 		if (!nd->nd_repstat)
2992 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2993 		if (!error)
2994 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2995 			    NULL, dstuff);
2996 	}
2997 	if (nd->nd_repstat && !error)
2998 		error = nd->nd_repstat;
2999 	m_freem(nd->nd_mrep);
3000 	/*
3001 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3002 	 * Only do this if vfs.nfs.ignore_eexist is set.
3003 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3004 	 * should guarantee "exactly once" RPC semantics.
3005 	 */
3006 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3007 	    nmp->nm_minorvers == 0))
3008 		error = 0;
3009 	return (error);
3010 }
3011 
3012 /*
3013  * nfs make dir rpc
3014  */
3015 int
3016 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3017     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3018     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3019     int *dattrflagp, void *dstuff)
3020 {
3021 	u_int32_t *tl;
3022 	struct nfsrv_descript nfsd, *nd = &nfsd;
3023 	nfsattrbit_t attrbits;
3024 	int error = 0;
3025 	struct nfsfh *fhp;
3026 	struct nfsmount *nmp;
3027 
3028 	*nfhpp = NULL;
3029 	*attrflagp = 0;
3030 	*dattrflagp = 0;
3031 	nmp = VFSTONFS(dvp->v_mount);
3032 	fhp = VTONFS(dvp)->n_fhp;
3033 	if (namelen > NFS_MAXNAMLEN)
3034 		return (ENAMETOOLONG);
3035 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
3036 	if (nd->nd_flag & ND_NFSV4) {
3037 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3038 		*tl = txdr_unsigned(NFDIR);
3039 	}
3040 	(void) nfsm_strtom(nd, name, namelen);
3041 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3042 	if (nd->nd_flag & ND_NFSV4) {
3043 		NFSGETATTR_ATTRBIT(&attrbits);
3044 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3045 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3046 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3047 		(void) nfsrv_putattrbit(nd, &attrbits);
3048 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3049 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3050 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
3051 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3052 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3053 		(void) nfsrv_putattrbit(nd, &attrbits);
3054 	}
3055 	error = nfscl_request(nd, dvp, p, cred, dstuff);
3056 	if (error)
3057 		return (error);
3058 	if (nd->nd_flag & ND_NFSV4)
3059 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
3060 	if (!nd->nd_repstat && !error) {
3061 		if (nd->nd_flag & ND_NFSV4) {
3062 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3063 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3064 		}
3065 		if (!error)
3066 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3067 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3068 			/* Get rid of the PutFH and Getattr status values. */
3069 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3070 			/* Load the directory attributes. */
3071 			error = nfsm_loadattr(nd, dnap);
3072 			if (error == 0)
3073 				*dattrflagp = 1;
3074 		}
3075 	}
3076 	if ((nd->nd_flag & ND_NFSV3) && !error)
3077 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
3078 	if (nd->nd_repstat && !error)
3079 		error = nd->nd_repstat;
3080 nfsmout:
3081 	m_freem(nd->nd_mrep);
3082 	/*
3083 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3084 	 * Only do this if vfs.nfs.ignore_eexist is set.
3085 	 * Never do this for NFSv4.1 or later minor versions, since sessions
3086 	 * should guarantee "exactly once" RPC semantics.
3087 	 */
3088 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3089 	    nmp->nm_minorvers == 0))
3090 		error = 0;
3091 	return (error);
3092 }
3093 
3094 /*
3095  * nfs remove directory call
3096  */
3097 int
3098 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3099     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
3100 {
3101 	struct nfsrv_descript nfsd, *nd = &nfsd;
3102 	int error = 0;
3103 
3104 	*dattrflagp = 0;
3105 	if (namelen > NFS_MAXNAMLEN)
3106 		return (ENAMETOOLONG);
3107 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
3108 	(void) nfsm_strtom(nd, name, namelen);
3109 	error = nfscl_request(nd, dvp, p, cred, dstuff);
3110 	if (error)
3111 		return (error);
3112 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3113 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
3114 	if (nd->nd_repstat && !error)
3115 		error = nd->nd_repstat;
3116 	m_freem(nd->nd_mrep);
3117 	/*
3118 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3119 	 */
3120 	if (error == ENOENT)
3121 		error = 0;
3122 	return (error);
3123 }
3124 
3125 /*
3126  * Readdir rpc.
3127  * Always returns with either uio_resid unchanged, if you are at the
3128  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3129  * filled in.
3130  * I felt this would allow caching of directory blocks more easily
3131  * than returning a pertially filled block.
3132  * Directory offset cookies:
3133  * Oh my, what to do with them...
3134  * I can think of three ways to deal with them:
3135  * 1 - have the layer above these RPCs maintain a map between logical
3136  *     directory byte offsets and the NFS directory offset cookies
3137  * 2 - pass the opaque directory offset cookies up into userland
3138  *     and let the libc functions deal with them, via the system call
3139  * 3 - return them to userland in the "struct dirent", so future versions
3140  *     of libc can use them and do whatever is necessary to make things work
3141  *     above these rpc calls, in the meantime
3142  * For now, I do #3 by "hiding" the directory offset cookies after the
3143  * d_name field in struct dirent. This is space inside d_reclen that
3144  * will be ignored by anything that doesn't know about them.
3145  * The directory offset cookies are filled in as the last 8 bytes of
3146  * each directory entry, after d_name. Someday, the userland libc
3147  * functions may be able to use these. In the meantime, it satisfies
3148  * OpenBSD's requirements for cookies being returned.
3149  * If expects the directory offset cookie for the read to be in uio_offset
3150  * and returns the one for the next entry after this directory block in
3151  * there, as well.
3152  */
3153 int
3154 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3155     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3156     int *eofp, void *stuff)
3157 {
3158 	int len, left;
3159 	struct dirent *dp = NULL;
3160 	u_int32_t *tl;
3161 	nfsquad_t cookie, ncookie;
3162 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3163 	struct nfsnode *dnp = VTONFS(vp);
3164 	struct nfsvattr nfsva;
3165 	struct nfsrv_descript nfsd, *nd = &nfsd;
3166 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3167 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3168 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3169 	char *cp;
3170 	nfsattrbit_t attrbits, dattrbits;
3171 	u_int32_t rderr, *tl2 = NULL;
3172 	size_t tresid;
3173 
3174 	KASSERT(uiop->uio_iovcnt == 1 &&
3175 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3176 	    ("nfs readdirrpc bad uio"));
3177 	ncookie.lval[0] = ncookie.lval[1] = 0;
3178 	/*
3179 	 * There is no point in reading a lot more than uio_resid, however
3180 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3181 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3182 	 * will never make readsize > nm_readdirsize.
3183 	 */
3184 	readsize = nmp->nm_readdirsize;
3185 	if (readsize > uiop->uio_resid)
3186 		readsize = uiop->uio_resid + DIRBLKSIZ;
3187 
3188 	*attrflagp = 0;
3189 	if (eofp)
3190 		*eofp = 0;
3191 	tresid = uiop->uio_resid;
3192 	cookie.lval[0] = cookiep->nfsuquad[0];
3193 	cookie.lval[1] = cookiep->nfsuquad[1];
3194 	nd->nd_mrep = NULL;
3195 
3196 	/*
3197 	 * For NFSv4, first create the "." and ".." entries.
3198 	 */
3199 	if (NFSHASNFSV4(nmp)) {
3200 		reqsize = 6 * NFSX_UNSIGNED;
3201 		NFSGETATTR_ATTRBIT(&dattrbits);
3202 		NFSZERO_ATTRBIT(&attrbits);
3203 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3204 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3205 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3206 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3207 			NFSSETBIT_ATTRBIT(&attrbits,
3208 			    NFSATTRBIT_MOUNTEDONFILEID);
3209 			gotmnton = 1;
3210 		} else {
3211 			/*
3212 			 * Must fake it. Use the fileno, except when the
3213 			 * fsid is != to that of the directory. For that
3214 			 * case, generate a fake fileno that is not the same.
3215 			 */
3216 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3217 			gotmnton = 0;
3218 		}
3219 
3220 		/*
3221 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3222 		 */
3223 		if (uiop->uio_offset == 0) {
3224 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3225 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3226 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3227 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3228 			(void) nfsrv_putattrbit(nd, &attrbits);
3229 			error = nfscl_request(nd, vp, p, cred, stuff);
3230 			if (error)
3231 			    return (error);
3232 			dotfileid = 0;	/* Fake out the compiler. */
3233 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3234 			    error = nfsm_loadattr(nd, &nfsva);
3235 			    if (error != 0)
3236 				goto nfsmout;
3237 			    dotfileid = nfsva.na_fileid;
3238 			}
3239 			if (nd->nd_repstat == 0) {
3240 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3241 			    len = fxdr_unsigned(int, *(tl + 4));
3242 			    if (len > 0 && len <= NFSX_V4FHMAX)
3243 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3244 			    else
3245 				error = EPERM;
3246 			    if (!error) {
3247 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3248 				nfsva.na_mntonfileno = UINT64_MAX;
3249 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3250 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3251 				    NULL, NULL, NULL, p, cred);
3252 				if (error) {
3253 				    dotdotfileid = dotfileid;
3254 				} else if (gotmnton) {
3255 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3256 					dotdotfileid = nfsva.na_mntonfileno;
3257 				    else
3258 					dotdotfileid = nfsva.na_fileid;
3259 				} else if (nfsva.na_filesid[0] ==
3260 				    dnp->n_vattr.na_filesid[0] &&
3261 				    nfsva.na_filesid[1] ==
3262 				    dnp->n_vattr.na_filesid[1]) {
3263 				    dotdotfileid = nfsva.na_fileid;
3264 				} else {
3265 				    do {
3266 					fakefileno--;
3267 				    } while (fakefileno ==
3268 					nfsva.na_fileid);
3269 				    dotdotfileid = fakefileno;
3270 				}
3271 			    }
3272 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3273 			    /*
3274 			     * Lookupp returns NFSERR_NOENT when we are
3275 			     * at the root, so just use the current dir.
3276 			     */
3277 			    nd->nd_repstat = 0;
3278 			    dotdotfileid = dotfileid;
3279 			} else {
3280 			    error = nd->nd_repstat;
3281 			}
3282 			m_freem(nd->nd_mrep);
3283 			if (error)
3284 			    return (error);
3285 			nd->nd_mrep = NULL;
3286 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3287 			dp->d_pad0 = dp->d_pad1 = 0;
3288 			dp->d_off = 0;
3289 			dp->d_type = DT_DIR;
3290 			dp->d_fileno = dotfileid;
3291 			dp->d_namlen = 1;
3292 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3293 			dp->d_name[0] = '.';
3294 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3295 			/*
3296 			 * Just make these offset cookie 0.
3297 			 */
3298 			tl = (u_int32_t *)&dp->d_name[8];
3299 			*tl++ = 0;
3300 			*tl = 0;
3301 			blksiz += dp->d_reclen;
3302 			uiop->uio_resid -= dp->d_reclen;
3303 			uiop->uio_offset += dp->d_reclen;
3304 			uiop->uio_iov->iov_base =
3305 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3306 			uiop->uio_iov->iov_len -= dp->d_reclen;
3307 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3308 			dp->d_pad0 = dp->d_pad1 = 0;
3309 			dp->d_off = 0;
3310 			dp->d_type = DT_DIR;
3311 			dp->d_fileno = dotdotfileid;
3312 			dp->d_namlen = 2;
3313 			*((uint64_t *)dp->d_name) = 0;
3314 			dp->d_name[0] = '.';
3315 			dp->d_name[1] = '.';
3316 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3317 			/*
3318 			 * Just make these offset cookie 0.
3319 			 */
3320 			tl = (u_int32_t *)&dp->d_name[8];
3321 			*tl++ = 0;
3322 			*tl = 0;
3323 			blksiz += dp->d_reclen;
3324 			uiop->uio_resid -= dp->d_reclen;
3325 			uiop->uio_offset += dp->d_reclen;
3326 			uiop->uio_iov->iov_base =
3327 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3328 			uiop->uio_iov->iov_len -= dp->d_reclen;
3329 		}
3330 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3331 	} else {
3332 		reqsize = 5 * NFSX_UNSIGNED;
3333 	}
3334 
3335 	/*
3336 	 * Loop around doing readdir rpc's of size readsize.
3337 	 * The stopping criteria is EOF or buffer full.
3338 	 */
3339 	while (more_dirs && bigenough) {
3340 		*attrflagp = 0;
3341 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3342 		if (nd->nd_flag & ND_NFSV2) {
3343 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3344 			*tl++ = cookie.lval[1];
3345 			*tl = txdr_unsigned(readsize);
3346 		} else {
3347 			NFSM_BUILD(tl, u_int32_t *, reqsize);
3348 			*tl++ = cookie.lval[0];
3349 			*tl++ = cookie.lval[1];
3350 			if (cookie.qval == 0) {
3351 				*tl++ = 0;
3352 				*tl++ = 0;
3353 			} else {
3354 				NFSLOCKNODE(dnp);
3355 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
3356 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
3357 				NFSUNLOCKNODE(dnp);
3358 			}
3359 			if (nd->nd_flag & ND_NFSV4) {
3360 				*tl++ = txdr_unsigned(readsize);
3361 				*tl = txdr_unsigned(readsize);
3362 				(void) nfsrv_putattrbit(nd, &attrbits);
3363 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3364 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
3365 				(void) nfsrv_putattrbit(nd, &dattrbits);
3366 			} else {
3367 				*tl = txdr_unsigned(readsize);
3368 			}
3369 		}
3370 		error = nfscl_request(nd, vp, p, cred, stuff);
3371 		if (error)
3372 			return (error);
3373 		if (!(nd->nd_flag & ND_NFSV2)) {
3374 			if (nd->nd_flag & ND_NFSV3)
3375 				error = nfscl_postop_attr(nd, nap, attrflagp,
3376 				    stuff);
3377 			if (!nd->nd_repstat && !error) {
3378 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3379 				NFSLOCKNODE(dnp);
3380 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3381 				dnp->n_cookieverf.nfsuquad[1] = *tl;
3382 				NFSUNLOCKNODE(dnp);
3383 			}
3384 		}
3385 		if (nd->nd_repstat || error) {
3386 			if (!error)
3387 				error = nd->nd_repstat;
3388 			goto nfsmout;
3389 		}
3390 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3391 		more_dirs = fxdr_unsigned(int, *tl);
3392 		if (!more_dirs)
3393 			tryformoredirs = 0;
3394 
3395 		/* loop through the dir entries, doctoring them to 4bsd form */
3396 		while (more_dirs && bigenough) {
3397 			if (nd->nd_flag & ND_NFSV4) {
3398 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3399 				ncookie.lval[0] = *tl++;
3400 				ncookie.lval[1] = *tl++;
3401 				len = fxdr_unsigned(int, *tl);
3402 			} else if (nd->nd_flag & ND_NFSV3) {
3403 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3404 				nfsva.na_fileid = fxdr_hyper(tl);
3405 				tl += 2;
3406 				len = fxdr_unsigned(int, *tl);
3407 			} else {
3408 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3409 				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3410 				    *tl++);
3411 				len = fxdr_unsigned(int, *tl);
3412 			}
3413 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3414 				error = EBADRPC;
3415 				goto nfsmout;
3416 			}
3417 			tlen = roundup2(len, 8);
3418 			if (tlen == len)
3419 				tlen += 8;  /* To ensure null termination. */
3420 			left = DIRBLKSIZ - blksiz;
3421 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3422 				NFSBZERO(uiop->uio_iov->iov_base, left);
3423 				dp->d_reclen += left;
3424 				uiop->uio_iov->iov_base =
3425 				    (char *)uiop->uio_iov->iov_base + left;
3426 				uiop->uio_iov->iov_len -= left;
3427 				uiop->uio_resid -= left;
3428 				uiop->uio_offset += left;
3429 				blksiz = 0;
3430 			}
3431 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3432 			    uiop->uio_resid)
3433 				bigenough = 0;
3434 			if (bigenough) {
3435 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3436 				dp->d_pad0 = dp->d_pad1 = 0;
3437 				dp->d_off = 0;
3438 				dp->d_namlen = len;
3439 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3440 				    NFSX_HYPER;
3441 				dp->d_type = DT_UNKNOWN;
3442 				blksiz += dp->d_reclen;
3443 				if (blksiz == DIRBLKSIZ)
3444 					blksiz = 0;
3445 				uiop->uio_resid -= DIRHDSIZ;
3446 				uiop->uio_offset += DIRHDSIZ;
3447 				uiop->uio_iov->iov_base =
3448 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3449 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3450 				error = nfsm_mbufuio(nd, uiop, len);
3451 				if (error)
3452 					goto nfsmout;
3453 				cp = uiop->uio_iov->iov_base;
3454 				tlen -= len;
3455 				NFSBZERO(cp, tlen);
3456 				cp += tlen;	/* points to cookie storage */
3457 				tl2 = (u_int32_t *)cp;
3458 				uiop->uio_iov->iov_base =
3459 				    (char *)uiop->uio_iov->iov_base + tlen +
3460 				    NFSX_HYPER;
3461 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3462 				uiop->uio_resid -= tlen + NFSX_HYPER;
3463 				uiop->uio_offset += (tlen + NFSX_HYPER);
3464 			} else {
3465 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3466 				if (error)
3467 					goto nfsmout;
3468 			}
3469 			if (nd->nd_flag & ND_NFSV4) {
3470 				rderr = 0;
3471 				nfsva.na_mntonfileno = UINT64_MAX;
3472 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3473 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3474 				    NULL, NULL, &rderr, p, cred);
3475 				if (error)
3476 					goto nfsmout;
3477 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3478 			} else if (nd->nd_flag & ND_NFSV3) {
3479 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3480 				ncookie.lval[0] = *tl++;
3481 				ncookie.lval[1] = *tl++;
3482 			} else {
3483 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3484 				ncookie.lval[0] = 0;
3485 				ncookie.lval[1] = *tl++;
3486 			}
3487 			if (bigenough) {
3488 			    if (nd->nd_flag & ND_NFSV4) {
3489 				if (rderr) {
3490 				    dp->d_fileno = 0;
3491 				} else {
3492 				    if (gotmnton) {
3493 					if (nfsva.na_mntonfileno != UINT64_MAX)
3494 					    dp->d_fileno = nfsva.na_mntonfileno;
3495 					else
3496 					    dp->d_fileno = nfsva.na_fileid;
3497 				    } else if (nfsva.na_filesid[0] ==
3498 					dnp->n_vattr.na_filesid[0] &&
3499 					nfsva.na_filesid[1] ==
3500 					dnp->n_vattr.na_filesid[1]) {
3501 					dp->d_fileno = nfsva.na_fileid;
3502 				    } else {
3503 					do {
3504 					    fakefileno--;
3505 					} while (fakefileno ==
3506 					    nfsva.na_fileid);
3507 					dp->d_fileno = fakefileno;
3508 				    }
3509 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3510 				}
3511 			    } else {
3512 				dp->d_fileno = nfsva.na_fileid;
3513 			    }
3514 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3515 				ncookie.lval[0];
3516 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3517 				ncookie.lval[1];
3518 			}
3519 			more_dirs = fxdr_unsigned(int, *tl);
3520 		}
3521 		/*
3522 		 * If at end of rpc data, get the eof boolean
3523 		 */
3524 		if (!more_dirs) {
3525 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3526 			eof = fxdr_unsigned(int, *tl);
3527 			if (tryformoredirs)
3528 				more_dirs = !eof;
3529 			if (nd->nd_flag & ND_NFSV4) {
3530 				error = nfscl_postop_attr(nd, nap, attrflagp,
3531 				    stuff);
3532 				if (error)
3533 					goto nfsmout;
3534 			}
3535 		}
3536 		m_freem(nd->nd_mrep);
3537 		nd->nd_mrep = NULL;
3538 	}
3539 	/*
3540 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3541 	 * by increasing d_reclen for the last record.
3542 	 */
3543 	if (blksiz > 0) {
3544 		left = DIRBLKSIZ - blksiz;
3545 		NFSBZERO(uiop->uio_iov->iov_base, left);
3546 		dp->d_reclen += left;
3547 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3548 		    left;
3549 		uiop->uio_iov->iov_len -= left;
3550 		uiop->uio_resid -= left;
3551 		uiop->uio_offset += left;
3552 	}
3553 
3554 	/*
3555 	 * If returning no data, assume end of file.
3556 	 * If not bigenough, return not end of file, since you aren't
3557 	 *    returning all the data
3558 	 * Otherwise, return the eof flag from the server.
3559 	 */
3560 	if (eofp) {
3561 		if (tresid == ((size_t)(uiop->uio_resid)))
3562 			*eofp = 1;
3563 		else if (!bigenough)
3564 			*eofp = 0;
3565 		else
3566 			*eofp = eof;
3567 	}
3568 
3569 	/*
3570 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3571 	 */
3572 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3573 		dp = (struct dirent *)uiop->uio_iov->iov_base;
3574 		NFSBZERO(dp, DIRBLKSIZ);
3575 		dp->d_type = DT_UNKNOWN;
3576 		tl = (u_int32_t *)&dp->d_name[4];
3577 		*tl++ = cookie.lval[0];
3578 		*tl = cookie.lval[1];
3579 		dp->d_reclen = DIRBLKSIZ;
3580 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3581 		    DIRBLKSIZ;
3582 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
3583 		uiop->uio_resid -= DIRBLKSIZ;
3584 		uiop->uio_offset += DIRBLKSIZ;
3585 	}
3586 
3587 nfsmout:
3588 	if (nd->nd_mrep != NULL)
3589 		m_freem(nd->nd_mrep);
3590 	return (error);
3591 }
3592 
3593 #ifndef APPLE
3594 /*
3595  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3596  * (Also used for NFS V4 when mount flag set.)
3597  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3598  */
3599 int
3600 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3601     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3602     int *eofp, void *stuff)
3603 {
3604 	int len, left;
3605 	struct dirent *dp = NULL;
3606 	u_int32_t *tl;
3607 	vnode_t newvp = NULLVP;
3608 	struct nfsrv_descript nfsd, *nd = &nfsd;
3609 	struct nameidata nami, *ndp = &nami;
3610 	struct componentname *cnp = &ndp->ni_cnd;
3611 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3612 	struct nfsnode *dnp = VTONFS(vp), *np;
3613 	struct nfsvattr nfsva;
3614 	struct nfsfh *nfhp;
3615 	nfsquad_t cookie, ncookie;
3616 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3617 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3618 	int isdotdot = 0, unlocknewvp = 0;
3619 	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3620 	u_int64_t fileno = 0;
3621 	char *cp;
3622 	nfsattrbit_t attrbits, dattrbits;
3623 	size_t tresid;
3624 	u_int32_t *tl2 = NULL, rderr;
3625 	struct timespec dctime;
3626 
3627 	KASSERT(uiop->uio_iovcnt == 1 &&
3628 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3629 	    ("nfs readdirplusrpc bad uio"));
3630 	ncookie.lval[0] = ncookie.lval[1] = 0;
3631 	timespecclear(&dctime);
3632 	*attrflagp = 0;
3633 	if (eofp != NULL)
3634 		*eofp = 0;
3635 	ndp->ni_dvp = vp;
3636 	nd->nd_mrep = NULL;
3637 	cookie.lval[0] = cookiep->nfsuquad[0];
3638 	cookie.lval[1] = cookiep->nfsuquad[1];
3639 	tresid = uiop->uio_resid;
3640 
3641 	/*
3642 	 * For NFSv4, first create the "." and ".." entries.
3643 	 */
3644 	if (NFSHASNFSV4(nmp)) {
3645 		NFSGETATTR_ATTRBIT(&dattrbits);
3646 		NFSZERO_ATTRBIT(&attrbits);
3647 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3648 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3649 		    NFSATTRBIT_MOUNTEDONFILEID)) {
3650 			NFSSETBIT_ATTRBIT(&attrbits,
3651 			    NFSATTRBIT_MOUNTEDONFILEID);
3652 			gotmnton = 1;
3653 		} else {
3654 			/*
3655 			 * Must fake it. Use the fileno, except when the
3656 			 * fsid is != to that of the directory. For that
3657 			 * case, generate a fake fileno that is not the same.
3658 			 */
3659 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3660 			gotmnton = 0;
3661 		}
3662 
3663 		/*
3664 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3665 		 */
3666 		if (uiop->uio_offset == 0) {
3667 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3668 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3669 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3670 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3671 			(void) nfsrv_putattrbit(nd, &attrbits);
3672 			error = nfscl_request(nd, vp, p, cred, stuff);
3673 			if (error)
3674 			    return (error);
3675 			dotfileid = 0;	/* Fake out the compiler. */
3676 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3677 			    error = nfsm_loadattr(nd, &nfsva);
3678 			    if (error != 0)
3679 				goto nfsmout;
3680 			    dctime = nfsva.na_ctime;
3681 			    dotfileid = nfsva.na_fileid;
3682 			}
3683 			if (nd->nd_repstat == 0) {
3684 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3685 			    len = fxdr_unsigned(int, *(tl + 4));
3686 			    if (len > 0 && len <= NFSX_V4FHMAX)
3687 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3688 			    else
3689 				error = EPERM;
3690 			    if (!error) {
3691 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3692 				nfsva.na_mntonfileno = UINT64_MAX;
3693 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3694 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3695 				    NULL, NULL, NULL, p, cred);
3696 				if (error) {
3697 				    dotdotfileid = dotfileid;
3698 				} else if (gotmnton) {
3699 				    if (nfsva.na_mntonfileno != UINT64_MAX)
3700 					dotdotfileid = nfsva.na_mntonfileno;
3701 				    else
3702 					dotdotfileid = nfsva.na_fileid;
3703 				} else if (nfsva.na_filesid[0] ==
3704 				    dnp->n_vattr.na_filesid[0] &&
3705 				    nfsva.na_filesid[1] ==
3706 				    dnp->n_vattr.na_filesid[1]) {
3707 				    dotdotfileid = nfsva.na_fileid;
3708 				} else {
3709 				    do {
3710 					fakefileno--;
3711 				    } while (fakefileno ==
3712 					nfsva.na_fileid);
3713 				    dotdotfileid = fakefileno;
3714 				}
3715 			    }
3716 			} else if (nd->nd_repstat == NFSERR_NOENT) {
3717 			    /*
3718 			     * Lookupp returns NFSERR_NOENT when we are
3719 			     * at the root, so just use the current dir.
3720 			     */
3721 			    nd->nd_repstat = 0;
3722 			    dotdotfileid = dotfileid;
3723 			} else {
3724 			    error = nd->nd_repstat;
3725 			}
3726 			m_freem(nd->nd_mrep);
3727 			if (error)
3728 			    return (error);
3729 			nd->nd_mrep = NULL;
3730 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3731 			dp->d_pad0 = dp->d_pad1 = 0;
3732 			dp->d_off = 0;
3733 			dp->d_type = DT_DIR;
3734 			dp->d_fileno = dotfileid;
3735 			dp->d_namlen = 1;
3736 			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3737 			dp->d_name[0] = '.';
3738 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3739 			/*
3740 			 * Just make these offset cookie 0.
3741 			 */
3742 			tl = (u_int32_t *)&dp->d_name[8];
3743 			*tl++ = 0;
3744 			*tl = 0;
3745 			blksiz += dp->d_reclen;
3746 			uiop->uio_resid -= dp->d_reclen;
3747 			uiop->uio_offset += dp->d_reclen;
3748 			uiop->uio_iov->iov_base =
3749 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3750 			uiop->uio_iov->iov_len -= dp->d_reclen;
3751 			dp = (struct dirent *)uiop->uio_iov->iov_base;
3752 			dp->d_pad0 = dp->d_pad1 = 0;
3753 			dp->d_off = 0;
3754 			dp->d_type = DT_DIR;
3755 			dp->d_fileno = dotdotfileid;
3756 			dp->d_namlen = 2;
3757 			*((uint64_t *)dp->d_name) = 0;
3758 			dp->d_name[0] = '.';
3759 			dp->d_name[1] = '.';
3760 			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3761 			/*
3762 			 * Just make these offset cookie 0.
3763 			 */
3764 			tl = (u_int32_t *)&dp->d_name[8];
3765 			*tl++ = 0;
3766 			*tl = 0;
3767 			blksiz += dp->d_reclen;
3768 			uiop->uio_resid -= dp->d_reclen;
3769 			uiop->uio_offset += dp->d_reclen;
3770 			uiop->uio_iov->iov_base =
3771 			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3772 			uiop->uio_iov->iov_len -= dp->d_reclen;
3773 		}
3774 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3775 		if (gotmnton)
3776 			NFSSETBIT_ATTRBIT(&attrbits,
3777 			    NFSATTRBIT_MOUNTEDONFILEID);
3778 		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3779 		    NFSATTRBIT_TIMECREATE))
3780 			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3781 	}
3782 
3783 	/*
3784 	 * Loop around doing readdir rpc's of size nm_readdirsize.
3785 	 * The stopping criteria is EOF or buffer full.
3786 	 */
3787 	while (more_dirs && bigenough) {
3788 		*attrflagp = 0;
3789 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3790  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3791 		*tl++ = cookie.lval[0];
3792 		*tl++ = cookie.lval[1];
3793 		if (cookie.qval == 0) {
3794 			*tl++ = 0;
3795 			*tl++ = 0;
3796 		} else {
3797 			NFSLOCKNODE(dnp);
3798 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3799 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3800 			NFSUNLOCKNODE(dnp);
3801 		}
3802 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3803 		*tl = txdr_unsigned(nmp->nm_readdirsize);
3804 		if (nd->nd_flag & ND_NFSV4) {
3805 			(void) nfsrv_putattrbit(nd, &attrbits);
3806 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3807 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3808 			(void) nfsrv_putattrbit(nd, &dattrbits);
3809 		}
3810 		error = nfscl_request(nd, vp, p, cred, stuff);
3811 		if (error)
3812 			return (error);
3813 		if (nd->nd_flag & ND_NFSV3)
3814 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3815 		if (nd->nd_repstat || error) {
3816 			if (!error)
3817 				error = nd->nd_repstat;
3818 			goto nfsmout;
3819 		}
3820 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3821 			dctime = nap->na_ctime;
3822 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3823 		NFSLOCKNODE(dnp);
3824 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3825 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3826 		NFSUNLOCKNODE(dnp);
3827 		more_dirs = fxdr_unsigned(int, *tl);
3828 		if (!more_dirs)
3829 			tryformoredirs = 0;
3830 
3831 		/* loop through the dir entries, doctoring them to 4bsd form */
3832 		while (more_dirs && bigenough) {
3833 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3834 			if (nd->nd_flag & ND_NFSV4) {
3835 				ncookie.lval[0] = *tl++;
3836 				ncookie.lval[1] = *tl++;
3837 			} else {
3838 				fileno = fxdr_hyper(tl);
3839 				tl += 2;
3840 			}
3841 			len = fxdr_unsigned(int, *tl);
3842 			if (len <= 0 || len > NFS_MAXNAMLEN) {
3843 				error = EBADRPC;
3844 				goto nfsmout;
3845 			}
3846 			tlen = roundup2(len, 8);
3847 			if (tlen == len)
3848 				tlen += 8;  /* To ensure null termination. */
3849 			left = DIRBLKSIZ - blksiz;
3850 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3851 				NFSBZERO(uiop->uio_iov->iov_base, left);
3852 				dp->d_reclen += left;
3853 				uiop->uio_iov->iov_base =
3854 				    (char *)uiop->uio_iov->iov_base + left;
3855 				uiop->uio_iov->iov_len -= left;
3856 				uiop->uio_resid -= left;
3857 				uiop->uio_offset += left;
3858 				blksiz = 0;
3859 			}
3860 			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3861 			    uiop->uio_resid)
3862 				bigenough = 0;
3863 			if (bigenough) {
3864 				dp = (struct dirent *)uiop->uio_iov->iov_base;
3865 				dp->d_pad0 = dp->d_pad1 = 0;
3866 				dp->d_off = 0;
3867 				dp->d_namlen = len;
3868 				dp->d_reclen = _GENERIC_DIRLEN(len) +
3869 				    NFSX_HYPER;
3870 				dp->d_type = DT_UNKNOWN;
3871 				blksiz += dp->d_reclen;
3872 				if (blksiz == DIRBLKSIZ)
3873 					blksiz = 0;
3874 				uiop->uio_resid -= DIRHDSIZ;
3875 				uiop->uio_offset += DIRHDSIZ;
3876 				uiop->uio_iov->iov_base =
3877 				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3878 				uiop->uio_iov->iov_len -= DIRHDSIZ;
3879 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
3880 				cnp->cn_namelen = len;
3881 				NFSCNHASHZERO(cnp);
3882 				error = nfsm_mbufuio(nd, uiop, len);
3883 				if (error)
3884 					goto nfsmout;
3885 				cp = uiop->uio_iov->iov_base;
3886 				tlen -= len;
3887 				NFSBZERO(cp, tlen);
3888 				cp += tlen;	/* points to cookie storage */
3889 				tl2 = (u_int32_t *)cp;
3890 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3891 				    cnp->cn_nameptr[1] == '.')
3892 					isdotdot = 1;
3893 				else
3894 					isdotdot = 0;
3895 				uiop->uio_iov->iov_base =
3896 				    (char *)uiop->uio_iov->iov_base + tlen +
3897 				    NFSX_HYPER;
3898 				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3899 				uiop->uio_resid -= tlen + NFSX_HYPER;
3900 				uiop->uio_offset += (tlen + NFSX_HYPER);
3901 			} else {
3902 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3903 				if (error)
3904 					goto nfsmout;
3905 			}
3906 			nfhp = NULL;
3907 			if (nd->nd_flag & ND_NFSV3) {
3908 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3909 				ncookie.lval[0] = *tl++;
3910 				ncookie.lval[1] = *tl++;
3911 				attrflag = fxdr_unsigned(int, *tl);
3912 				if (attrflag) {
3913 				  error = nfsm_loadattr(nd, &nfsva);
3914 				  if (error)
3915 					goto nfsmout;
3916 				}
3917 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3918 				if (*tl) {
3919 					error = nfsm_getfh(nd, &nfhp);
3920 					if (error)
3921 					    goto nfsmout;
3922 				}
3923 				if (!attrflag && nfhp != NULL) {
3924 					free(nfhp, M_NFSFH);
3925 					nfhp = NULL;
3926 				}
3927 			} else {
3928 				rderr = 0;
3929 				nfsva.na_mntonfileno = 0xffffffff;
3930 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3931 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3932 				    NULL, NULL, &rderr, p, cred);
3933 				if (error)
3934 					goto nfsmout;
3935 			}
3936 
3937 			if (bigenough) {
3938 			    if (nd->nd_flag & ND_NFSV4) {
3939 				if (rderr) {
3940 				    dp->d_fileno = 0;
3941 				} else if (gotmnton) {
3942 				    if (nfsva.na_mntonfileno != 0xffffffff)
3943 					dp->d_fileno = nfsva.na_mntonfileno;
3944 				    else
3945 					dp->d_fileno = nfsva.na_fileid;
3946 				} else if (nfsva.na_filesid[0] ==
3947 				    dnp->n_vattr.na_filesid[0] &&
3948 				    nfsva.na_filesid[1] ==
3949 				    dnp->n_vattr.na_filesid[1]) {
3950 				    dp->d_fileno = nfsva.na_fileid;
3951 				} else {
3952 				    do {
3953 					fakefileno--;
3954 				    } while (fakefileno ==
3955 					nfsva.na_fileid);
3956 				    dp->d_fileno = fakefileno;
3957 				}
3958 			    } else {
3959 				dp->d_fileno = fileno;
3960 			    }
3961 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3962 				ncookie.lval[0];
3963 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3964 				ncookie.lval[1];
3965 
3966 			    if (nfhp != NULL) {
3967 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3968 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3969 				    VREF(vp);
3970 				    newvp = vp;
3971 				    unlocknewvp = 0;
3972 				    free(nfhp, M_NFSFH);
3973 				    np = dnp;
3974 				} else if (isdotdot != 0) {
3975 				    /*
3976 				     * Skip doing a nfscl_nget() call for "..".
3977 				     * There's a race between acquiring the nfs
3978 				     * node here and lookups that look for the
3979 				     * directory being read (in the parent).
3980 				     * It would try to get a lock on ".." here,
3981 				     * owning the lock on the directory being
3982 				     * read. Lookup will hold the lock on ".."
3983 				     * and try to acquire the lock on the
3984 				     * directory being read.
3985 				     * If the directory is unlocked/relocked,
3986 				     * then there is a LOR with the buflock
3987 				     * vp is relocked.
3988 				     */
3989 				    free(nfhp, M_NFSFH);
3990 				} else {
3991 				    error = nfscl_nget(vp->v_mount, vp,
3992 				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3993 				    if (!error) {
3994 					newvp = NFSTOV(np);
3995 					unlocknewvp = 1;
3996 				    }
3997 				}
3998 				nfhp = NULL;
3999 				if (newvp != NULLVP) {
4000 				    error = nfscl_loadattrcache(&newvp,
4001 					&nfsva, NULL, NULL, 0, 0);
4002 				    if (error) {
4003 					if (unlocknewvp)
4004 					    vput(newvp);
4005 					else
4006 					    vrele(newvp);
4007 					goto nfsmout;
4008 				    }
4009 				    dp->d_type =
4010 					vtonfs_dtype(np->n_vattr.na_type);
4011 				    ndp->ni_vp = newvp;
4012 				    NFSCNHASH(cnp, HASHINIT);
4013 				    if (cnp->cn_namelen <= NCHNAMLEN &&
4014 					ndp->ni_dvp != ndp->ni_vp &&
4015 					(newvp->v_type != VDIR ||
4016 					 dctime.tv_sec != 0)) {
4017 					cache_enter_time_flags(ndp->ni_dvp,
4018 					    ndp->ni_vp, cnp,
4019 					    &nfsva.na_ctime,
4020 					    newvp->v_type != VDIR ? NULL :
4021 					    &dctime, VFS_CACHE_DROPOLD);
4022 				    }
4023 				    if (unlocknewvp)
4024 					vput(newvp);
4025 				    else
4026 					vrele(newvp);
4027 				    newvp = NULLVP;
4028 				}
4029 			    }
4030 			} else if (nfhp != NULL) {
4031 			    free(nfhp, M_NFSFH);
4032 			}
4033 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4034 			more_dirs = fxdr_unsigned(int, *tl);
4035 		}
4036 		/*
4037 		 * If at end of rpc data, get the eof boolean
4038 		 */
4039 		if (!more_dirs) {
4040 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4041 			eof = fxdr_unsigned(int, *tl);
4042 			if (tryformoredirs)
4043 				more_dirs = !eof;
4044 			if (nd->nd_flag & ND_NFSV4) {
4045 				error = nfscl_postop_attr(nd, nap, attrflagp,
4046 				    stuff);
4047 				if (error)
4048 					goto nfsmout;
4049 			}
4050 		}
4051 		m_freem(nd->nd_mrep);
4052 		nd->nd_mrep = NULL;
4053 	}
4054 	/*
4055 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4056 	 * by increasing d_reclen for the last record.
4057 	 */
4058 	if (blksiz > 0) {
4059 		left = DIRBLKSIZ - blksiz;
4060 		NFSBZERO(uiop->uio_iov->iov_base, left);
4061 		dp->d_reclen += left;
4062 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4063 		    left;
4064 		uiop->uio_iov->iov_len -= left;
4065 		uiop->uio_resid -= left;
4066 		uiop->uio_offset += left;
4067 	}
4068 
4069 	/*
4070 	 * If returning no data, assume end of file.
4071 	 * If not bigenough, return not end of file, since you aren't
4072 	 *    returning all the data
4073 	 * Otherwise, return the eof flag from the server.
4074 	 */
4075 	if (eofp != NULL) {
4076 		if (tresid == uiop->uio_resid)
4077 			*eofp = 1;
4078 		else if (!bigenough)
4079 			*eofp = 0;
4080 		else
4081 			*eofp = eof;
4082 	}
4083 
4084 	/*
4085 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
4086 	 */
4087 	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4088 		dp = (struct dirent *)uiop->uio_iov->iov_base;
4089 		NFSBZERO(dp, DIRBLKSIZ);
4090 		dp->d_type = DT_UNKNOWN;
4091 		tl = (u_int32_t *)&dp->d_name[4];
4092 		*tl++ = cookie.lval[0];
4093 		*tl = cookie.lval[1];
4094 		dp->d_reclen = DIRBLKSIZ;
4095 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4096 		    DIRBLKSIZ;
4097 		uiop->uio_iov->iov_len -= DIRBLKSIZ;
4098 		uiop->uio_resid -= DIRBLKSIZ;
4099 		uiop->uio_offset += DIRBLKSIZ;
4100 	}
4101 
4102 nfsmout:
4103 	if (nd->nd_mrep != NULL)
4104 		m_freem(nd->nd_mrep);
4105 	return (error);
4106 }
4107 #endif	/* !APPLE */
4108 
4109 /*
4110  * Nfs commit rpc
4111  */
4112 int
4113 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4114     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4115 {
4116 	u_int32_t *tl;
4117 	struct nfsrv_descript nfsd, *nd = &nfsd;
4118 	nfsattrbit_t attrbits;
4119 	int error;
4120 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4121 
4122 	*attrflagp = 0;
4123 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
4124 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4125 	txdr_hyper(offset, tl);
4126 	tl += 2;
4127 	*tl = txdr_unsigned(cnt);
4128 	if (nd->nd_flag & ND_NFSV4) {
4129 		/*
4130 		 * And do a Getattr op.
4131 		 */
4132 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4133 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
4134 		NFSGETATTR_ATTRBIT(&attrbits);
4135 		(void) nfsrv_putattrbit(nd, &attrbits);
4136 	}
4137 	error = nfscl_request(nd, vp, p, cred, stuff);
4138 	if (error)
4139 		return (error);
4140 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
4141 	if (!error && !nd->nd_repstat) {
4142 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4143 		NFSLOCKMNT(nmp);
4144 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4145 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4146 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
4147 		}
4148 		NFSUNLOCKMNT(nmp);
4149 		if (nd->nd_flag & ND_NFSV4)
4150 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4151 	}
4152 nfsmout:
4153 	if (!error && nd->nd_repstat)
4154 		error = nd->nd_repstat;
4155 	m_freem(nd->nd_mrep);
4156 	return (error);
4157 }
4158 
4159 /*
4160  * NFS byte range lock rpc.
4161  * (Mostly just calls one of the three lower level RPC routines.)
4162  */
4163 int
4164 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4165     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4166 {
4167 	struct nfscllockowner *lp;
4168 	struct nfsclclient *clp;
4169 	struct nfsfh *nfhp;
4170 	struct nfsrv_descript nfsd, *nd = &nfsd;
4171 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4172 	u_int64_t off, len;
4173 	off_t start, end;
4174 	u_int32_t clidrev = 0;
4175 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4176 	int callcnt, dorpc;
4177 
4178 	/*
4179 	 * Convert the flock structure into a start and end and do POSIX
4180 	 * bounds checking.
4181 	 */
4182 	switch (fl->l_whence) {
4183 	case SEEK_SET:
4184 	case SEEK_CUR:
4185 		/*
4186 		 * Caller is responsible for adding any necessary offset
4187 		 * when SEEK_CUR is used.
4188 		 */
4189 		start = fl->l_start;
4190 		off = fl->l_start;
4191 		break;
4192 	case SEEK_END:
4193 		start = size + fl->l_start;
4194 		off = size + fl->l_start;
4195 		break;
4196 	default:
4197 		return (EINVAL);
4198 	}
4199 	if (start < 0)
4200 		return (EINVAL);
4201 	if (fl->l_len != 0) {
4202 		end = start + fl->l_len - 1;
4203 		if (end < start)
4204 			return (EINVAL);
4205 	}
4206 
4207 	len = fl->l_len;
4208 	if (len == 0)
4209 		len = NFS64BITSSET;
4210 	retrycnt = 0;
4211 	do {
4212 	    nd->nd_repstat = 0;
4213 	    if (op == F_GETLK) {
4214 		error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
4215 		if (error)
4216 			return (error);
4217 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4218 		if (!error) {
4219 			clidrev = clp->nfsc_clientidrev;
4220 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4221 			    p, id, flags);
4222 		} else if (error == -1) {
4223 			error = 0;
4224 		}
4225 		nfscl_clientrelease(clp);
4226 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4227 		/*
4228 		 * We must loop around for all lockowner cases.
4229 		 */
4230 		callcnt = 0;
4231 		error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
4232 		if (error)
4233 			return (error);
4234 		do {
4235 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4236 			clp, id, flags, &lp, &dorpc);
4237 		    /*
4238 		     * If it returns a NULL lp, we're done.
4239 		     */
4240 		    if (lp == NULL) {
4241 			if (callcnt == 0)
4242 			    nfscl_clientrelease(clp);
4243 			else
4244 			    nfscl_releasealllocks(clp, vp, p, id, flags);
4245 			return (error);
4246 		    }
4247 		    if (nmp->nm_clp != NULL)
4248 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4249 		    else
4250 			clidrev = 0;
4251 		    /*
4252 		     * If the server doesn't support Posix lock semantics,
4253 		     * only allow locks on the entire file, since it won't
4254 		     * handle overlapping byte ranges.
4255 		     * There might still be a problem when a lock
4256 		     * upgrade/downgrade (read<->write) occurs, since the
4257 		     * server "might" expect an unlock first?
4258 		     */
4259 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4260 			(off == 0 && len == NFS64BITSSET))) {
4261 			/*
4262 			 * Since the lock records will go away, we must
4263 			 * wait for grace and delay here.
4264 			 */
4265 			do {
4266 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
4267 				NFSV4LOCKT_READ, cred, p, 0);
4268 			    if ((nd->nd_repstat == NFSERR_GRACE ||
4269 				 nd->nd_repstat == NFSERR_DELAY) &&
4270 				error == 0)
4271 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4272 				    "nfs_advlock");
4273 			} while ((nd->nd_repstat == NFSERR_GRACE ||
4274 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
4275 		    }
4276 		    callcnt++;
4277 		} while (error == 0 && nd->nd_repstat == 0);
4278 		nfscl_releasealllocks(clp, vp, p, id, flags);
4279 	    } else if (op == F_SETLK) {
4280 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4281 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4282 		if (error || donelocally) {
4283 			return (error);
4284 		}
4285 		if (nmp->nm_clp != NULL)
4286 			clidrev = nmp->nm_clp->nfsc_clientidrev;
4287 		else
4288 			clidrev = 0;
4289 		nfhp = VTONFS(vp)->n_fhp;
4290 		if (!lp->nfsl_open->nfso_posixlock &&
4291 		    (off != 0 || len != NFS64BITSSET)) {
4292 			error = EINVAL;
4293 		} else {
4294 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4295 			    nfhp->nfh_len, lp, newone, reclaim, off,
4296 			    len, fl->l_type, cred, p, 0);
4297 		}
4298 		if (!error)
4299 			error = nd->nd_repstat;
4300 		nfscl_lockrelease(lp, error, newone);
4301 	    } else {
4302 		error = EINVAL;
4303 	    }
4304 	    if (!error)
4305 	        error = nd->nd_repstat;
4306 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4307 		error == NFSERR_STALEDONTRECOVER ||
4308 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4309 		error == NFSERR_BADSESSION) {
4310 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
4311 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4312 		&& clidrev != 0) {
4313 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4314 		retrycnt++;
4315 	    }
4316 	} while (error == NFSERR_GRACE ||
4317 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4318 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4319 	    error == NFSERR_BADSESSION ||
4320 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4321 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
4322 	if (error && retrycnt >= 4)
4323 		error = EIO;
4324 	return (error);
4325 }
4326 
4327 /*
4328  * The lower level routine for the LockT case.
4329  */
4330 int
4331 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4332     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4333     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4334 {
4335 	u_int32_t *tl;
4336 	int error, type, size;
4337 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4338 	struct nfsnode *np;
4339 	struct nfsmount *nmp;
4340 	struct nfsclsession *tsep;
4341 
4342 	nmp = VFSTONFS(vp->v_mount);
4343 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
4344 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4345 	if (fl->l_type == F_RDLCK)
4346 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4347 	else
4348 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4349 	txdr_hyper(off, tl);
4350 	tl += 2;
4351 	txdr_hyper(len, tl);
4352 	tl += 2;
4353 	tsep = nfsmnt_mdssession(nmp);
4354 	*tl++ = tsep->nfsess_clientid.lval[0];
4355 	*tl = tsep->nfsess_clientid.lval[1];
4356 	nfscl_filllockowner(id, own, flags);
4357 	np = VTONFS(vp);
4358 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4359 	    np->n_fhp->nfh_len);
4360 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4361 	error = nfscl_request(nd, vp, p, cred, NULL);
4362 	if (error)
4363 		return (error);
4364 	if (nd->nd_repstat == 0) {
4365 		fl->l_type = F_UNLCK;
4366 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4367 		nd->nd_repstat = 0;
4368 		fl->l_whence = SEEK_SET;
4369 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4370 		fl->l_start = fxdr_hyper(tl);
4371 		tl += 2;
4372 		len = fxdr_hyper(tl);
4373 		tl += 2;
4374 		if (len == NFS64BITSSET)
4375 			fl->l_len = 0;
4376 		else
4377 			fl->l_len = len;
4378 		type = fxdr_unsigned(int, *tl++);
4379 		if (type == NFSV4LOCKT_WRITE)
4380 			fl->l_type = F_WRLCK;
4381 		else
4382 			fl->l_type = F_RDLCK;
4383 		/*
4384 		 * XXX For now, I have no idea what to do with the
4385 		 * conflicting lock_owner, so I'll just set the pid == 0
4386 		 * and skip over the lock_owner.
4387 		 */
4388 		fl->l_pid = (pid_t)0;
4389 		tl += 2;
4390 		size = fxdr_unsigned(int, *tl);
4391 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4392 			error = EBADRPC;
4393 		if (!error)
4394 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4395 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4396 		nfscl_initiate_recovery(clp);
4397 nfsmout:
4398 	m_freem(nd->nd_mrep);
4399 	return (error);
4400 }
4401 
4402 /*
4403  * Lower level function that performs the LockU RPC.
4404  */
4405 static int
4406 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4407     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4408     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4409 {
4410 	u_int32_t *tl;
4411 	int error;
4412 
4413 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4414 	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4415 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4416 	*tl++ = txdr_unsigned(type);
4417 	*tl = txdr_unsigned(lp->nfsl_seqid);
4418 	if (nfstest_outofseq &&
4419 	    (arc4random() % nfstest_outofseq) == 0)
4420 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4421 	tl++;
4422 	if (NFSHASNFSV4N(nmp))
4423 		*tl++ = 0;
4424 	else
4425 		*tl++ = lp->nfsl_stateid.seqid;
4426 	*tl++ = lp->nfsl_stateid.other[0];
4427 	*tl++ = lp->nfsl_stateid.other[1];
4428 	*tl++ = lp->nfsl_stateid.other[2];
4429 	txdr_hyper(off, tl);
4430 	tl += 2;
4431 	txdr_hyper(len, tl);
4432 	if (syscred)
4433 		nd->nd_flag |= ND_USEGSSNAME;
4434 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4435 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4436 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4437 	if (error)
4438 		return (error);
4439 	if (nd->nd_repstat == 0) {
4440 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4441 		lp->nfsl_stateid.seqid = *tl++;
4442 		lp->nfsl_stateid.other[0] = *tl++;
4443 		lp->nfsl_stateid.other[1] = *tl++;
4444 		lp->nfsl_stateid.other[2] = *tl;
4445 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4446 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4447 nfsmout:
4448 	m_freem(nd->nd_mrep);
4449 	return (error);
4450 }
4451 
4452 /*
4453  * The actual Lock RPC.
4454  */
4455 int
4456 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4457     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4458     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4459     NFSPROC_T *p, int syscred)
4460 {
4461 	u_int32_t *tl;
4462 	int error, size;
4463 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4464 	struct nfsclsession *tsep;
4465 
4466 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4467 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4468 	if (type == F_RDLCK)
4469 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4470 	else
4471 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4472 	*tl++ = txdr_unsigned(reclaim);
4473 	txdr_hyper(off, tl);
4474 	tl += 2;
4475 	txdr_hyper(len, tl);
4476 	tl += 2;
4477 	if (newone) {
4478 	    *tl = newnfs_true;
4479 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4480 		2 * NFSX_UNSIGNED + NFSX_HYPER);
4481 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4482 	    if (NFSHASNFSV4N(nmp))
4483 		*tl++ = 0;
4484 	    else
4485 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4486 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4487 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4488 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4489 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4490 	    tsep = nfsmnt_mdssession(nmp);
4491 	    *tl++ = tsep->nfsess_clientid.lval[0];
4492 	    *tl = tsep->nfsess_clientid.lval[1];
4493 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4494 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4495 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4496 	} else {
4497 	    *tl = newnfs_false;
4498 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4499 	    if (NFSHASNFSV4N(nmp))
4500 		*tl++ = 0;
4501 	    else
4502 		*tl++ = lp->nfsl_stateid.seqid;
4503 	    *tl++ = lp->nfsl_stateid.other[0];
4504 	    *tl++ = lp->nfsl_stateid.other[1];
4505 	    *tl++ = lp->nfsl_stateid.other[2];
4506 	    *tl = txdr_unsigned(lp->nfsl_seqid);
4507 	    if (nfstest_outofseq &&
4508 		(arc4random() % nfstest_outofseq) == 0)
4509 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4510 	}
4511 	if (syscred)
4512 		nd->nd_flag |= ND_USEGSSNAME;
4513 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4514 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4515 	if (error)
4516 		return (error);
4517 	if (newone)
4518 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4519 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4520 	if (nd->nd_repstat == 0) {
4521 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4522 		lp->nfsl_stateid.seqid = *tl++;
4523 		lp->nfsl_stateid.other[0] = *tl++;
4524 		lp->nfsl_stateid.other[1] = *tl++;
4525 		lp->nfsl_stateid.other[2] = *tl;
4526 	} else if (nd->nd_repstat == NFSERR_DENIED) {
4527 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4528 		size = fxdr_unsigned(int, *(tl + 7));
4529 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4530 			error = EBADRPC;
4531 		if (!error)
4532 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4533 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4534 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4535 nfsmout:
4536 	m_freem(nd->nd_mrep);
4537 	return (error);
4538 }
4539 
4540 /*
4541  * nfs statfs rpc
4542  * (always called with the vp for the mount point)
4543  */
4544 int
4545 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4546     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4547     void *stuff)
4548 {
4549 	u_int32_t *tl = NULL;
4550 	struct nfsrv_descript nfsd, *nd = &nfsd;
4551 	struct nfsmount *nmp;
4552 	nfsattrbit_t attrbits;
4553 	int error;
4554 
4555 	*attrflagp = 0;
4556 	nmp = VFSTONFS(vp->v_mount);
4557 	if (NFSHASNFSV4(nmp)) {
4558 		/*
4559 		 * For V4, you actually do a getattr.
4560 		 */
4561 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4562 		NFSSTATFS_GETATTRBIT(&attrbits);
4563 		(void) nfsrv_putattrbit(nd, &attrbits);
4564 		nd->nd_flag |= ND_USEGSSNAME;
4565 		error = nfscl_request(nd, vp, p, cred, stuff);
4566 		if (error)
4567 			return (error);
4568 		if (nd->nd_repstat == 0) {
4569 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4570 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4571 			    cred);
4572 			if (!error) {
4573 				nmp->nm_fsid[0] = nap->na_filesid[0];
4574 				nmp->nm_fsid[1] = nap->na_filesid[1];
4575 				NFSSETHASSETFSID(nmp);
4576 				*attrflagp = 1;
4577 			}
4578 		} else {
4579 			error = nd->nd_repstat;
4580 		}
4581 		if (error)
4582 			goto nfsmout;
4583 	} else {
4584 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4585 		error = nfscl_request(nd, vp, p, cred, stuff);
4586 		if (error)
4587 			return (error);
4588 		if (nd->nd_flag & ND_NFSV3) {
4589 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4590 			if (error)
4591 				goto nfsmout;
4592 		}
4593 		if (nd->nd_repstat) {
4594 			error = nd->nd_repstat;
4595 			goto nfsmout;
4596 		}
4597 		NFSM_DISSECT(tl, u_int32_t *,
4598 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4599 	}
4600 	if (NFSHASNFSV3(nmp)) {
4601 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4602 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4603 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4604 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4605 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4606 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4607 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4608 	} else if (NFSHASNFSV4(nmp) == 0) {
4609 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4610 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4611 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4612 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4613 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4614 	}
4615 nfsmout:
4616 	m_freem(nd->nd_mrep);
4617 	return (error);
4618 }
4619 
4620 /*
4621  * nfs pathconf rpc
4622  */
4623 int
4624 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4625     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4626     void *stuff)
4627 {
4628 	struct nfsrv_descript nfsd, *nd = &nfsd;
4629 	struct nfsmount *nmp;
4630 	u_int32_t *tl;
4631 	nfsattrbit_t attrbits;
4632 	int error;
4633 
4634 	*attrflagp = 0;
4635 	nmp = VFSTONFS(vp->v_mount);
4636 	if (NFSHASNFSV4(nmp)) {
4637 		/*
4638 		 * For V4, you actually do a getattr.
4639 		 */
4640 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4641 		NFSPATHCONF_GETATTRBIT(&attrbits);
4642 		(void) nfsrv_putattrbit(nd, &attrbits);
4643 		nd->nd_flag |= ND_USEGSSNAME;
4644 		error = nfscl_request(nd, vp, p, cred, stuff);
4645 		if (error)
4646 			return (error);
4647 		if (nd->nd_repstat == 0) {
4648 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4649 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4650 			    cred);
4651 			if (!error)
4652 				*attrflagp = 1;
4653 		} else {
4654 			error = nd->nd_repstat;
4655 		}
4656 	} else {
4657 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4658 		error = nfscl_request(nd, vp, p, cred, stuff);
4659 		if (error)
4660 			return (error);
4661 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4662 		if (nd->nd_repstat && !error)
4663 			error = nd->nd_repstat;
4664 		if (!error) {
4665 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4666 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4667 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4668 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4669 			pc->pc_chownrestricted =
4670 			    fxdr_unsigned(u_int32_t, *tl++);
4671 			pc->pc_caseinsensitive =
4672 			    fxdr_unsigned(u_int32_t, *tl++);
4673 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4674 		}
4675 	}
4676 nfsmout:
4677 	m_freem(nd->nd_mrep);
4678 	return (error);
4679 }
4680 
4681 /*
4682  * nfs version 3 fsinfo rpc call
4683  */
4684 int
4685 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4686     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4687 {
4688 	u_int32_t *tl;
4689 	struct nfsrv_descript nfsd, *nd = &nfsd;
4690 	int error;
4691 
4692 	*attrflagp = 0;
4693 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4694 	error = nfscl_request(nd, vp, p, cred, stuff);
4695 	if (error)
4696 		return (error);
4697 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4698 	if (nd->nd_repstat && !error)
4699 		error = nd->nd_repstat;
4700 	if (!error) {
4701 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4702 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4703 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4704 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4705 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4706 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4707 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4708 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4709 		fsp->fs_maxfilesize = fxdr_hyper(tl);
4710 		tl += 2;
4711 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4712 		tl += 2;
4713 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4714 	}
4715 nfsmout:
4716 	m_freem(nd->nd_mrep);
4717 	return (error);
4718 }
4719 
4720 /*
4721  * This function performs the Renew RPC.
4722  */
4723 int
4724 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4725     NFSPROC_T *p)
4726 {
4727 	u_int32_t *tl;
4728 	struct nfsrv_descript nfsd;
4729 	struct nfsrv_descript *nd = &nfsd;
4730 	struct nfsmount *nmp;
4731 	int error;
4732 	struct nfssockreq *nrp;
4733 	struct nfsclsession *tsep;
4734 
4735 	nmp = clp->nfsc_nmp;
4736 	if (nmp == NULL)
4737 		return (0);
4738 	if (dsp == NULL)
4739 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4740 		    0);
4741 	else
4742 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4743 		    &dsp->nfsclds_sess, 0, 0);
4744 	if (!NFSHASNFSV4N(nmp)) {
4745 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4746 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4747 		tsep = nfsmnt_mdssession(nmp);
4748 		*tl++ = tsep->nfsess_clientid.lval[0];
4749 		*tl = tsep->nfsess_clientid.lval[1];
4750 	}
4751 	nrp = NULL;
4752 	if (dsp != NULL)
4753 		nrp = dsp->nfsclds_sockp;
4754 	if (nrp == NULL)
4755 		/* If NULL, use the MDS socket. */
4756 		nrp = &nmp->nm_sockreq;
4757 	nd->nd_flag |= ND_USEGSSNAME;
4758 	if (dsp == NULL)
4759 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4760 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4761 	else {
4762 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4763 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4764 		if (error == ENXIO)
4765 			nfscl_cancelreqs(dsp);
4766 	}
4767 	if (error)
4768 		return (error);
4769 	error = nd->nd_repstat;
4770 	m_freem(nd->nd_mrep);
4771 	return (error);
4772 }
4773 
4774 /*
4775  * This function performs the Releaselockowner RPC.
4776  */
4777 int
4778 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4779     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4780 {
4781 	struct nfsrv_descript nfsd, *nd = &nfsd;
4782 	u_int32_t *tl;
4783 	int error;
4784 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4785 	struct nfsclsession *tsep;
4786 
4787 	if (NFSHASNFSV4N(nmp)) {
4788 		/* For NFSv4.1, do a FreeStateID. */
4789 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4790 		    NULL, 0, 0);
4791 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4792 	} else {
4793 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4794 		    NULL, 0, 0);
4795 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4796 		tsep = nfsmnt_mdssession(nmp);
4797 		*tl++ = tsep->nfsess_clientid.lval[0];
4798 		*tl = tsep->nfsess_clientid.lval[1];
4799 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4800 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4801 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4802 	}
4803 	nd->nd_flag |= ND_USEGSSNAME;
4804 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4805 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4806 	if (error)
4807 		return (error);
4808 	error = nd->nd_repstat;
4809 	m_freem(nd->nd_mrep);
4810 	return (error);
4811 }
4812 
4813 /*
4814  * This function performs the Compound to get the mount pt FH.
4815  */
4816 int
4817 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4818     NFSPROC_T *p)
4819 {
4820 	u_int32_t *tl;
4821 	struct nfsrv_descript nfsd;
4822 	struct nfsrv_descript *nd = &nfsd;
4823 	u_char *cp, *cp2;
4824 	int error, cnt, len, setnil;
4825 	u_int32_t *opcntp;
4826 
4827 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4828 	    0);
4829 	cp = dirpath;
4830 	cnt = 0;
4831 	do {
4832 		setnil = 0;
4833 		while (*cp == '/')
4834 			cp++;
4835 		cp2 = cp;
4836 		while (*cp2 != '\0' && *cp2 != '/')
4837 			cp2++;
4838 		if (*cp2 == '/') {
4839 			setnil = 1;
4840 			*cp2 = '\0';
4841 		}
4842 		if (cp2 != cp) {
4843 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4844 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4845 			nfsm_strtom(nd, cp, strlen(cp));
4846 			cnt++;
4847 		}
4848 		if (setnil)
4849 			*cp2++ = '/';
4850 		cp = cp2;
4851 	} while (*cp != '\0');
4852 	if (NFSHASNFSV4N(nmp))
4853 		/* Has a Sequence Op done by nfscl_reqstart(). */
4854 		*opcntp = txdr_unsigned(3 + cnt);
4855 	else
4856 		*opcntp = txdr_unsigned(2 + cnt);
4857 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4858 	*tl = txdr_unsigned(NFSV4OP_GETFH);
4859 	nd->nd_flag |= ND_USEGSSNAME;
4860 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4861 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4862 	if (error)
4863 		return (error);
4864 	if (nd->nd_repstat == 0) {
4865 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4866 		tl += (2 + 2 * cnt);
4867 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4868 			len > NFSX_FHMAX) {
4869 			nd->nd_repstat = NFSERR_BADXDR;
4870 		} else {
4871 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4872 			if (nd->nd_repstat == 0)
4873 				nmp->nm_fhsize = len;
4874 		}
4875 	}
4876 	error = nd->nd_repstat;
4877 nfsmout:
4878 	m_freem(nd->nd_mrep);
4879 	return (error);
4880 }
4881 
4882 /*
4883  * This function performs the Delegreturn RPC.
4884  */
4885 int
4886 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4887     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4888 {
4889 	u_int32_t *tl;
4890 	struct nfsrv_descript nfsd;
4891 	struct nfsrv_descript *nd = &nfsd;
4892 	int error;
4893 
4894 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4895 	    dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4896 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4897 	if (NFSHASNFSV4N(nmp))
4898 		*tl++ = 0;
4899 	else
4900 		*tl++ = dp->nfsdl_stateid.seqid;
4901 	*tl++ = dp->nfsdl_stateid.other[0];
4902 	*tl++ = dp->nfsdl_stateid.other[1];
4903 	*tl = dp->nfsdl_stateid.other[2];
4904 	if (syscred)
4905 		nd->nd_flag |= ND_USEGSSNAME;
4906 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4907 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4908 	if (error)
4909 		return (error);
4910 	error = nd->nd_repstat;
4911 	m_freem(nd->nd_mrep);
4912 	return (error);
4913 }
4914 
4915 /*
4916  * nfs getacl call.
4917  */
4918 int
4919 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4920     struct acl *aclp, void *stuff)
4921 {
4922 	struct nfsrv_descript nfsd, *nd = &nfsd;
4923 	int error;
4924 	nfsattrbit_t attrbits;
4925 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4926 
4927 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4928 		return (EOPNOTSUPP);
4929 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4930 	NFSZERO_ATTRBIT(&attrbits);
4931 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4932 	(void) nfsrv_putattrbit(nd, &attrbits);
4933 	error = nfscl_request(nd, vp, p, cred, stuff);
4934 	if (error)
4935 		return (error);
4936 	if (!nd->nd_repstat)
4937 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4938 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4939 	else
4940 		error = nd->nd_repstat;
4941 	m_freem(nd->nd_mrep);
4942 	return (error);
4943 }
4944 
4945 /*
4946  * nfs setacl call.
4947  */
4948 int
4949 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4950     struct acl *aclp, void *stuff)
4951 {
4952 	int error;
4953 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4954 
4955 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4956 		return (EOPNOTSUPP);
4957 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4958 	return (error);
4959 }
4960 
4961 /*
4962  * nfs setacl call.
4963  */
4964 static int
4965 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4966     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4967 {
4968 	struct nfsrv_descript nfsd, *nd = &nfsd;
4969 	int error;
4970 	nfsattrbit_t attrbits;
4971 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4972 
4973 	if (!NFSHASNFSV4(nmp))
4974 		return (EOPNOTSUPP);
4975 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4976 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4977 	NFSZERO_ATTRBIT(&attrbits);
4978 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4979 	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
4980 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4981 	error = nfscl_request(nd, vp, p, cred, stuff);
4982 	if (error)
4983 		return (error);
4984 	/* Don't care about the pre/postop attributes */
4985 	m_freem(nd->nd_mrep);
4986 	return (nd->nd_repstat);
4987 }
4988 
4989 /*
4990  * Do the NFSv4.1 Exchange ID.
4991  */
4992 int
4993 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4994     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
4995     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
4996 {
4997 	uint32_t *tl, v41flags;
4998 	struct nfsrv_descript nfsd;
4999 	struct nfsrv_descript *nd = &nfsd;
5000 	struct nfsclds *dsp;
5001 	struct timespec verstime;
5002 	int error, len;
5003 
5004 	*dspp = NULL;
5005 	if (minorvers == 0)
5006 		minorvers = nmp->nm_minorvers;
5007 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5008 	    NFS_VER4, minorvers);
5009 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5010 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
5011 	*tl = txdr_unsigned(clp->nfsc_rev);
5012 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5013 
5014 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5015 	*tl++ = txdr_unsigned(exchflags);
5016 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5017 
5018 	/* Set the implementation id4 */
5019 	*tl = txdr_unsigned(1);
5020 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5021 	(void) nfsm_strtom(nd, version, strlen(version));
5022 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5023 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
5024 	verstime.tv_nsec = 0;
5025 	txdr_nfsv4time(&verstime, tl);
5026 	nd->nd_flag |= ND_USEGSSNAME;
5027 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5028 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5029 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5030 	    (int)nd->nd_repstat);
5031 	if (error != 0)
5032 		return (error);
5033 	if (nd->nd_repstat == 0) {
5034 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5035 		len = fxdr_unsigned(int, *(tl + 7));
5036 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5037 			error = NFSERR_BADXDR;
5038 			goto nfsmout;
5039 		}
5040 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5041 		    M_WAITOK | M_ZERO);
5042 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5043 		dsp->nfsclds_servownlen = len;
5044 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5045 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5046 		dsp->nfsclds_sess.nfsess_sequenceid =
5047 		    fxdr_unsigned(uint32_t, *tl++);
5048 		v41flags = fxdr_unsigned(uint32_t, *tl);
5049 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5050 		    NFSHASPNFSOPT(nmp)) {
5051 			NFSCL_DEBUG(1, "set PNFS\n");
5052 			NFSLOCKMNT(nmp);
5053 			nmp->nm_state |= NFSSTA_PNFS;
5054 			NFSUNLOCKMNT(nmp);
5055 			dsp->nfsclds_flags |= NFSCLDS_MDS;
5056 		}
5057 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5058 			dsp->nfsclds_flags |= NFSCLDS_DS;
5059 		if (minorvers == NFSV42_MINORVERSION)
5060 			dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5061 		if (len > 0)
5062 			nd->nd_repstat = nfsrv_mtostr(nd,
5063 			    dsp->nfsclds_serverown, len);
5064 		if (nd->nd_repstat == 0) {
5065 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5066 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5067 			    NULL, MTX_DEF);
5068 			nfscl_initsessionslots(&dsp->nfsclds_sess);
5069 			*dspp = dsp;
5070 		} else
5071 			free(dsp, M_NFSCLDS);
5072 	}
5073 	error = nd->nd_repstat;
5074 nfsmout:
5075 	m_freem(nd->nd_mrep);
5076 	return (error);
5077 }
5078 
5079 /*
5080  * Do the NFSv4.1 Create Session.
5081  */
5082 int
5083 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5084     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5085     struct ucred *cred, NFSPROC_T *p)
5086 {
5087 	uint32_t crflags, maxval, *tl;
5088 	struct nfsrv_descript nfsd;
5089 	struct nfsrv_descript *nd = &nfsd;
5090 	int error, irdcnt, minorvers;
5091 
5092 	/* Make sure nm_rsize, nm_wsize is set. */
5093 	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5094 		nmp->nm_rsize = NFS_MAXBSIZE;
5095 	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5096 		nmp->nm_wsize = NFS_MAXBSIZE;
5097 	if (dsp == NULL)
5098 		minorvers = nmp->nm_minorvers;
5099 	else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5100 		minorvers = NFSV42_MINORVERSION;
5101 	else
5102 		minorvers = NFSV41_MINORVERSION;
5103 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5104 	    NFS_VER4, minorvers);
5105 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5106 	*tl++ = sep->nfsess_clientid.lval[0];
5107 	*tl++ = sep->nfsess_clientid.lval[1];
5108 	*tl++ = txdr_unsigned(sequenceid);
5109 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5110 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5111 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
5112 	*tl = txdr_unsigned(crflags);
5113 
5114 	/* Fill in fore channel attributes. */
5115 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5116 	*tl++ = 0;				/* Header pad size */
5117 	if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5118 	    nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5119 		/*
5120 		 * NFSv4.2 Extended Attribute operations may want to do
5121 		 * requests/replies that are larger than nm_rsize/nm_wsize.
5122 		 */
5123 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5124 		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5125 	} else {
5126 		*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5127 		*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5128 	}
5129 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5130 	*tl++ = txdr_unsigned(20);		/* Max operations */
5131 	*tl++ = txdr_unsigned(64);		/* Max slots */
5132 	*tl = 0;				/* No rdma ird */
5133 
5134 	/* Fill in back channel attributes. */
5135 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5136 	*tl++ = 0;				/* Header pad size */
5137 	*tl++ = txdr_unsigned(10000);		/* Max request size */
5138 	*tl++ = txdr_unsigned(10000);		/* Max response size */
5139 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5140 	*tl++ = txdr_unsigned(4);		/* Max operations */
5141 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
5142 	*tl = 0;				/* No rdma ird */
5143 
5144 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5145 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
5146 
5147 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
5148 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
5149 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
5150 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5151 	*tl++ = 0;				/* Null machine name */
5152 	*tl++ = 0;				/* Uid == 0 */
5153 	*tl++ = 0;				/* Gid == 0 */
5154 	*tl = 0;				/* No additional gids */
5155 	nd->nd_flag |= ND_USEGSSNAME;
5156 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5157 	    NFS_VER4, NULL, 1, NULL, NULL);
5158 	if (error != 0)
5159 		return (error);
5160 	if (nd->nd_repstat == 0) {
5161 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5162 		    2 * NFSX_UNSIGNED);
5163 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5164 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5165 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5166 		crflags = fxdr_unsigned(uint32_t, *tl);
5167 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5168 			NFSLOCKMNT(nmp);
5169 			nmp->nm_state |= NFSSTA_SESSPERSIST;
5170 			NFSUNLOCKMNT(nmp);
5171 		}
5172 
5173 		/* Get the fore channel slot count. */
5174 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5175 		tl++;			/* Skip the header pad size. */
5176 
5177 		/* Make sure nm_wsize is small enough. */
5178 		maxval = fxdr_unsigned(uint32_t, *tl++);
5179 		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5180 			if (nmp->nm_wsize > 8096)
5181 				nmp->nm_wsize /= 2;
5182 			else
5183 				break;
5184 		}
5185 		sep->nfsess_maxreq = maxval;
5186 
5187 		/* Make sure nm_rsize is small enough. */
5188 		maxval = fxdr_unsigned(uint32_t, *tl++);
5189 		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5190 			if (nmp->nm_rsize > 8096)
5191 				nmp->nm_rsize /= 2;
5192 			else
5193 				break;
5194 		}
5195 		sep->nfsess_maxresp = maxval;
5196 
5197 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5198 		tl++;
5199 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5200 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5201 		irdcnt = fxdr_unsigned(int, *tl);
5202 		if (irdcnt > 0)
5203 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5204 
5205 		/* and the back channel slot count. */
5206 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5207 		tl += 5;
5208 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5209 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5210 	}
5211 	error = nd->nd_repstat;
5212 nfsmout:
5213 	m_freem(nd->nd_mrep);
5214 	return (error);
5215 }
5216 
5217 /*
5218  * Do the NFSv4.1 Destroy Session.
5219  */
5220 int
5221 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
5222     struct ucred *cred, NFSPROC_T *p)
5223 {
5224 	uint32_t *tl;
5225 	struct nfsrv_descript nfsd;
5226 	struct nfsrv_descript *nd = &nfsd;
5227 	int error;
5228 	struct nfsclsession *tsep;
5229 
5230 	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
5231 	    0);
5232 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
5233 	tsep = nfsmnt_mdssession(nmp);
5234 	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
5235 	nd->nd_flag |= ND_USEGSSNAME;
5236 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5237 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5238 	if (error != 0)
5239 		return (error);
5240 	error = nd->nd_repstat;
5241 	m_freem(nd->nd_mrep);
5242 	return (error);
5243 }
5244 
5245 /*
5246  * Do the NFSv4.1 Destroy Client.
5247  */
5248 int
5249 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5250     struct ucred *cred, NFSPROC_T *p)
5251 {
5252 	uint32_t *tl;
5253 	struct nfsrv_descript nfsd;
5254 	struct nfsrv_descript *nd = &nfsd;
5255 	int error;
5256 	struct nfsclsession *tsep;
5257 
5258 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5259 	    0);
5260 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5261 	tsep = nfsmnt_mdssession(nmp);
5262 	*tl++ = tsep->nfsess_clientid.lval[0];
5263 	*tl = tsep->nfsess_clientid.lval[1];
5264 	nd->nd_flag |= ND_USEGSSNAME;
5265 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5266 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5267 	if (error != 0)
5268 		return (error);
5269 	error = nd->nd_repstat;
5270 	m_freem(nd->nd_mrep);
5271 	return (error);
5272 }
5273 
5274 /*
5275  * Do the NFSv4.1 LayoutGet.
5276  */
5277 static int
5278 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5279     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5280     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5281     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
5282     void *stuff)
5283 {
5284 	struct nfsrv_descript nfsd, *nd = &nfsd;
5285 	int error;
5286 
5287 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5288 	    0);
5289 	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5290 	    layouttype, layoutlen, 0);
5291 	nd->nd_flag |= ND_USEGSSNAME;
5292 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5293 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5294 	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5295 	if (error != 0)
5296 		return (error);
5297 	if (nd->nd_repstat == 0)
5298 		error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5299 		    flhp);
5300 	if (error == 0 && nd->nd_repstat != 0)
5301 		error = nd->nd_repstat;
5302 	m_freem(nd->nd_mrep);
5303 	return (error);
5304 }
5305 
5306 /*
5307  * Do the NFSv4.1 Get Device Info.
5308  */
5309 int
5310 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5311     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5312     NFSPROC_T *p)
5313 {
5314 	uint32_t cnt, *tl, vers, minorvers;
5315 	struct nfsrv_descript nfsd;
5316 	struct nfsrv_descript *nd = &nfsd;
5317 	struct sockaddr_in sin, ssin;
5318 	struct sockaddr_in6 sin6, ssin6;
5319 	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5320 	struct nfscldevinfo *ndi;
5321 	int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5322 	int stripecnt;
5323 	uint8_t stripeindex;
5324 	sa_family_t af, safilled;
5325 
5326 	ssin.sin_port = 0;		/* To shut up compiler. */
5327 	ssin.sin_addr.s_addr = 0;	/* ditto */
5328 	*ndip = NULL;
5329 	ndi = NULL;
5330 	gotdspp = NULL;
5331 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5332 	    0);
5333 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5334 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5335 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5336 	*tl++ = txdr_unsigned(layouttype);
5337 	*tl++ = txdr_unsigned(100000);
5338 	if (notifybitsp != NULL && *notifybitsp != 0) {
5339 		*tl = txdr_unsigned(1);		/* One word of bits. */
5340 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5341 		*tl = txdr_unsigned(*notifybitsp);
5342 	} else
5343 		*tl = txdr_unsigned(0);
5344 	nd->nd_flag |= ND_USEGSSNAME;
5345 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5346 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5347 	if (error != 0)
5348 		return (error);
5349 	if (nd->nd_repstat == 0) {
5350 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5351 		if (layouttype != fxdr_unsigned(int, *tl))
5352 			printf("EEK! devinfo layout type not same!\n");
5353 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5354 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5355 			stripecnt = fxdr_unsigned(int, *tl);
5356 			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5357 			if (stripecnt < 1 || stripecnt > 4096) {
5358 				printf("pNFS File layout devinfo stripecnt %d:"
5359 				    " out of range\n", stripecnt);
5360 				error = NFSERR_BADXDR;
5361 				goto nfsmout;
5362 			}
5363 			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5364 			    NFSX_UNSIGNED);
5365 			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5366 			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5367 			if (addrcnt < 1 || addrcnt > 128) {
5368 				printf("NFS devinfo addrcnt %d: out of range\n",
5369 				    addrcnt);
5370 				error = NFSERR_BADXDR;
5371 				goto nfsmout;
5372 			}
5373 
5374 			/*
5375 			 * Now we know how many stripe indices and addresses, so
5376 			 * we can allocate the structure the correct size.
5377 			 */
5378 			i = (stripecnt * sizeof(uint8_t)) /
5379 			    sizeof(struct nfsclds *) + 1;
5380 			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5381 			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5382 			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5383 			    M_ZERO);
5384 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5385 			    NFSX_V4DEVICEID);
5386 			ndi->nfsdi_refcnt = 0;
5387 			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5388 			ndi->nfsdi_stripecnt = stripecnt;
5389 			ndi->nfsdi_addrcnt = addrcnt;
5390 			/* Fill in the stripe indices. */
5391 			for (i = 0; i < stripecnt; i++) {
5392 				stripeindex = fxdr_unsigned(uint8_t, *tl++);
5393 				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5394 				if (stripeindex >= addrcnt) {
5395 					printf("pNFS File Layout devinfo"
5396 					    " stripeindex %d: too big\n",
5397 					    (int)stripeindex);
5398 					error = NFSERR_BADXDR;
5399 					goto nfsmout;
5400 				}
5401 				nfsfldi_setstripeindex(ndi, i, stripeindex);
5402 			}
5403 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
5404 			/* For Flex File, we only get one address list. */
5405 			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5406 			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
5407 			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5408 			    NFSX_V4DEVICEID);
5409 			ndi->nfsdi_refcnt = 0;
5410 			ndi->nfsdi_flags = NFSDI_FLEXFILE;
5411 			addrcnt = ndi->nfsdi_addrcnt = 1;
5412 		}
5413 
5414 		/* Now, dissect the server address(es). */
5415 		safilled = AF_UNSPEC;
5416 		for (i = 0; i < addrcnt; i++) {
5417 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5418 			cnt = fxdr_unsigned(uint32_t, *tl);
5419 			if (cnt == 0) {
5420 				printf("NFS devinfo 0 len addrlist\n");
5421 				error = NFSERR_BADXDR;
5422 				goto nfsmout;
5423 			}
5424 			dspp = nfsfldi_addr(ndi, i);
5425 			safilled = AF_UNSPEC;
5426 			for (j = 0; j < cnt; j++) {
5427 				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5428 				    &isudp);
5429 				if (error != 0 && error != EPERM) {
5430 					error = NFSERR_BADXDR;
5431 					goto nfsmout;
5432 				}
5433 				if (error == 0 && isudp == 0) {
5434 					/*
5435 					 * The priority is:
5436 					 * - Same address family.
5437 					 * Save the address and dspp, so that
5438 					 * the connection can be done after
5439 					 * parsing is complete.
5440 					 */
5441 					if (safilled == AF_UNSPEC ||
5442 					    (af == nmp->nm_nam->sa_family &&
5443 					     safilled != nmp->nm_nam->sa_family)
5444 					   ) {
5445 						if (af == AF_INET)
5446 							ssin = sin;
5447 						else
5448 							ssin6 = sin6;
5449 						safilled = af;
5450 						gotdspp = dspp;
5451 					}
5452 				}
5453 			}
5454 		}
5455 
5456 		gotvers = NFS_VER4;	/* Default NFSv4.1 for File Layout. */
5457 		gotminor = NFSV41_MINORVERSION;
5458 		/* For Flex File, we will take one of the versions to use. */
5459 		if (layouttype == NFSLAYOUT_FLEXFILE) {
5460 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5461 			j = fxdr_unsigned(int, *tl);
5462 			if (j < 1 || j > NFSDEV_MAXVERS) {
5463 				printf("pNFS: too many versions\n");
5464 				error = NFSERR_BADXDR;
5465 				goto nfsmout;
5466 			}
5467 			gotvers = 0;
5468 			gotminor = 0;
5469 			for (i = 0; i < j; i++) {
5470 				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5471 				vers = fxdr_unsigned(uint32_t, *tl++);
5472 				minorvers = fxdr_unsigned(uint32_t, *tl++);
5473 				if (vers == NFS_VER3)
5474 					minorvers = 0;
5475 				if ((vers == NFS_VER4 && ((minorvers ==
5476 				    NFSV41_MINORVERSION && gotminor == 0) ||
5477 				    minorvers == NFSV42_MINORVERSION)) ||
5478 				    (vers == NFS_VER3 && gotvers == 0)) {
5479 					gotvers = vers;
5480 					gotminor = minorvers;
5481 					/* We'll take this one. */
5482 					ndi->nfsdi_versindex = i;
5483 					ndi->nfsdi_vers = vers;
5484 					ndi->nfsdi_minorvers = minorvers;
5485 					ndi->nfsdi_rsize = fxdr_unsigned(
5486 					    uint32_t, *tl++);
5487 					ndi->nfsdi_wsize = fxdr_unsigned(
5488 					    uint32_t, *tl++);
5489 					if (*tl == newnfs_true)
5490 						ndi->nfsdi_flags |=
5491 						    NFSDI_TIGHTCOUPLED;
5492 					else
5493 						ndi->nfsdi_flags &=
5494 						    ~NFSDI_TIGHTCOUPLED;
5495 				}
5496 			}
5497 			if (gotvers == 0) {
5498 				printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5499 				error = NFSERR_BADXDR;
5500 				goto nfsmout;
5501 			}
5502 		}
5503 
5504 		/* And the notify bits. */
5505 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5506 		bitcnt = fxdr_unsigned(int, *tl);
5507 		if (bitcnt > 0) {
5508 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5509 			if (notifybitsp != NULL)
5510 				*notifybitsp =
5511 				    fxdr_unsigned(uint32_t, *tl);
5512 		}
5513 		if (safilled != AF_UNSPEC) {
5514 			KASSERT(ndi != NULL, ("ndi is NULL"));
5515 			*ndip = ndi;
5516 		} else
5517 			error = EPERM;
5518 		if (error == 0) {
5519 			/*
5520 			 * Now we can do a TCP connection for the correct
5521 			 * NFS version and IP address.
5522 			 */
5523 			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5524 			    gotvers, gotminor, &dsp, p);
5525 		}
5526 		if (error == 0) {
5527 			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5528 			*gotdspp = dsp;
5529 		}
5530 	}
5531 	if (nd->nd_repstat != 0 && error == 0)
5532 		error = nd->nd_repstat;
5533 nfsmout:
5534 	if (error != 0 && ndi != NULL)
5535 		nfscl_freedevinfo(ndi);
5536 	m_freem(nd->nd_mrep);
5537 	return (error);
5538 }
5539 
5540 /*
5541  * Do the NFSv4.1 LayoutCommit.
5542  */
5543 int
5544 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5545     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5546     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5547 {
5548 	uint32_t *tl;
5549 	struct nfsrv_descript nfsd, *nd = &nfsd;
5550 	int error;
5551 
5552 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5553 	    0, 0);
5554 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5555 	    NFSX_STATEID);
5556 	txdr_hyper(off, tl);
5557 	tl += 2;
5558 	txdr_hyper(len, tl);
5559 	tl += 2;
5560 	if (reclaim != 0)
5561 		*tl++ = newnfs_true;
5562 	else
5563 		*tl++ = newnfs_false;
5564 	*tl++ = txdr_unsigned(stateidp->seqid);
5565 	*tl++ = stateidp->other[0];
5566 	*tl++ = stateidp->other[1];
5567 	*tl++ = stateidp->other[2];
5568 	*tl++ = newnfs_true;
5569 	if (lastbyte < off)
5570 		lastbyte = off;
5571 	else if (lastbyte >= (off + len))
5572 		lastbyte = off + len - 1;
5573 	txdr_hyper(lastbyte, tl);
5574 	tl += 2;
5575 	*tl++ = newnfs_false;
5576 	*tl++ = txdr_unsigned(layouttype);
5577 	/* All supported layouts are 0 length. */
5578 	*tl = txdr_unsigned(0);
5579 	nd->nd_flag |= ND_USEGSSNAME;
5580 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5581 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5582 	if (error != 0)
5583 		return (error);
5584 	error = nd->nd_repstat;
5585 	m_freem(nd->nd_mrep);
5586 	return (error);
5587 }
5588 
5589 /*
5590  * Do the NFSv4.1 LayoutReturn.
5591  */
5592 int
5593 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5594     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5595     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5596     uint32_t stat, uint32_t op, char *devid)
5597 {
5598 	uint32_t *tl;
5599 	struct nfsrv_descript nfsd, *nd = &nfsd;
5600 	uint64_t tu64;
5601 	int error;
5602 
5603 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5604 	    0, 0);
5605 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5606 	if (reclaim != 0)
5607 		*tl++ = newnfs_true;
5608 	else
5609 		*tl++ = newnfs_false;
5610 	*tl++ = txdr_unsigned(layouttype);
5611 	*tl++ = txdr_unsigned(iomode);
5612 	*tl = txdr_unsigned(layoutreturn);
5613 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5614 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5615 		    NFSX_UNSIGNED);
5616 		txdr_hyper(offset, tl);
5617 		tl += 2;
5618 		txdr_hyper(len, tl);
5619 		tl += 2;
5620 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5621 		*tl++ = txdr_unsigned(stateidp->seqid);
5622 		*tl++ = stateidp->other[0];
5623 		*tl++ = stateidp->other[1];
5624 		*tl++ = stateidp->other[2];
5625 		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5626 			*tl = txdr_unsigned(0);
5627 		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5628 			if (stat != 0) {
5629 				*tl = txdr_unsigned(2 * NFSX_HYPER +
5630 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5631 				    NFSX_UNSIGNED);
5632 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5633 				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5634 				    NFSX_UNSIGNED);
5635 				*tl++ = txdr_unsigned(1);	/* One error. */
5636 				tu64 = 0;			/* Offset. */
5637 				txdr_hyper(tu64, tl); tl += 2;
5638 				tu64 = UINT64_MAX;		/* Length. */
5639 				txdr_hyper(tu64, tl); tl += 2;
5640 				NFSBCOPY(stateidp, tl, NFSX_STATEID);
5641 				tl += (NFSX_STATEID / NFSX_UNSIGNED);
5642 				*tl++ = txdr_unsigned(1);	/* One error. */
5643 				NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5644 				tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5645 				*tl++ = txdr_unsigned(stat);
5646 				*tl++ = txdr_unsigned(op);
5647 			} else {
5648 				*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5649 				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5650 				/* No ioerrs. */
5651 				*tl++ = 0;
5652 			}
5653 			*tl = 0;	/* No stats yet. */
5654 		}
5655 	}
5656 	nd->nd_flag |= ND_USEGSSNAME;
5657 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5658 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5659 	if (error != 0)
5660 		return (error);
5661 	if (nd->nd_repstat == 0) {
5662 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5663 		if (*tl != 0) {
5664 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5665 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5666 			stateidp->other[0] = *tl++;
5667 			stateidp->other[1] = *tl++;
5668 			stateidp->other[2] = *tl;
5669 		}
5670 	} else
5671 		error = nd->nd_repstat;
5672 nfsmout:
5673 	m_freem(nd->nd_mrep);
5674 	return (error);
5675 }
5676 
5677 /*
5678  * Acquire a layout and devinfo, if possible. The caller must have acquired
5679  * a reference count on the nfsclclient structure before calling this.
5680  * Return the layout in lypp with a reference count on it, if successful.
5681  */
5682 static int
5683 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5684     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5685     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5686 {
5687 	struct nfscllayout *lyp;
5688 	struct nfsclflayout *flp;
5689 	struct nfsclflayouthead flh;
5690 	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5691 	nfsv4stateid_t stateid;
5692 	struct nfsclsession *tsep;
5693 
5694 	*lypp = NULL;
5695 	if (NFSHASFLEXFILE(nmp))
5696 		layouttype = NFSLAYOUT_FLEXFILE;
5697 	else
5698 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5699 	/*
5700 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5701 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5702 	 * flp == NULL.
5703 	 */
5704 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5705 	    off, &flp, &recalled);
5706 	islocked = 0;
5707 	if (lyp == NULL || flp == NULL) {
5708 		if (recalled != 0)
5709 			return (EIO);
5710 		LIST_INIT(&flh);
5711 		tsep = nfsmnt_mdssession(nmp);
5712 		layoutlen = tsep->nfsess_maxcache -
5713 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5714 		if (lyp == NULL) {
5715 			stateid.seqid = 0;
5716 			stateid.other[0] = stateidp->other[0];
5717 			stateid.other[1] = stateidp->other[1];
5718 			stateid.other[2] = stateidp->other[2];
5719 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5720 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5721 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5722 			    &retonclose, &flh, cred, p, NULL);
5723 		} else {
5724 			islocked = 1;
5725 			stateid.seqid = lyp->nfsly_stateid.seqid;
5726 			stateid.other[0] = lyp->nfsly_stateid.other[0];
5727 			stateid.other[1] = lyp->nfsly_stateid.other[1];
5728 			stateid.other[2] = lyp->nfsly_stateid.other[2];
5729 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5730 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5731 			    (uint64_t)0, layouttype, layoutlen, &stateid,
5732 			    &retonclose, &flh, cred, p, NULL);
5733 		}
5734 		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5735 		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5736 		    &flh, layouttype, error, NULL, cred, p);
5737 		if (error == 0)
5738 			*lypp = lyp;
5739 		else if (islocked != 0)
5740 			nfscl_rellayout(lyp, 1);
5741 	} else
5742 		*lypp = lyp;
5743 	return (error);
5744 }
5745 
5746 /*
5747  * Do a TCP connection plus exchange id and create session.
5748  * If successful, a "struct nfsclds" is linked into the list for the
5749  * mount point and a pointer to it is returned.
5750  */
5751 static int
5752 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5753     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5754     struct nfsclds **dspp, NFSPROC_T *p)
5755 {
5756 	struct sockaddr_in *msad, *sad;
5757 	struct sockaddr_in6 *msad6, *sad6;
5758 	struct nfsclclient *clp;
5759 	struct nfssockreq *nrp;
5760 	struct nfsclds *dsp, *tdsp;
5761 	int error, firsttry;
5762 	enum nfsclds_state retv;
5763 	uint32_t sequenceid = 0;
5764 
5765 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5766 	    ("nfsrpc_fillsa: NULL nr_cred"));
5767 	NFSLOCKCLSTATE();
5768 	clp = nmp->nm_clp;
5769 	NFSUNLOCKCLSTATE();
5770 	if (clp == NULL)
5771 		return (EPERM);
5772 	if (af == AF_INET) {
5773 		NFSLOCKMNT(nmp);
5774 		/*
5775 		 * Check to see if we already have a session for this
5776 		 * address that is usable for a DS.
5777 		 * Note that the MDS's address is in a different place
5778 		 * than the sessions already acquired for DS's.
5779 		 */
5780 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5781 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5782 		while (tdsp != NULL) {
5783 			if (msad != NULL && msad->sin_family == AF_INET &&
5784 			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5785 			    sin->sin_port == msad->sin_port &&
5786 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5787 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5788 				*dspp = tdsp;
5789 				NFSUNLOCKMNT(nmp);
5790 				NFSCL_DEBUG(4, "fnd same addr\n");
5791 				return (0);
5792 			}
5793 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5794 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5795 				msad = (struct sockaddr_in *)
5796 				    tdsp->nfsclds_sockp->nr_nam;
5797 			else
5798 				msad = NULL;
5799 		}
5800 		NFSUNLOCKMNT(nmp);
5801 
5802 		/* No IP address match, so look for new/trunked one. */
5803 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5804 		sad->sin_len = sizeof(*sad);
5805 		sad->sin_family = AF_INET;
5806 		sad->sin_port = sin->sin_port;
5807 		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5808 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5809 		nrp->nr_nam = (struct sockaddr *)sad;
5810 	} else if (af == AF_INET6) {
5811 		NFSLOCKMNT(nmp);
5812 		/*
5813 		 * Check to see if we already have a session for this
5814 		 * address that is usable for a DS.
5815 		 * Note that the MDS's address is in a different place
5816 		 * than the sessions already acquired for DS's.
5817 		 */
5818 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5819 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5820 		while (tdsp != NULL) {
5821 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5822 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5823 			    &msad6->sin6_addr) &&
5824 			    sin6->sin6_port == msad6->sin6_port &&
5825 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5826 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5827 				*dspp = tdsp;
5828 				NFSUNLOCKMNT(nmp);
5829 				return (0);
5830 			}
5831 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5832 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5833 				msad6 = (struct sockaddr_in6 *)
5834 				    tdsp->nfsclds_sockp->nr_nam;
5835 			else
5836 				msad6 = NULL;
5837 		}
5838 		NFSUNLOCKMNT(nmp);
5839 
5840 		/* No IP address match, so look for new/trunked one. */
5841 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5842 		sad6->sin6_len = sizeof(*sad6);
5843 		sad6->sin6_family = AF_INET6;
5844 		sad6->sin6_port = sin6->sin6_port;
5845 		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5846 		    sizeof(struct in6_addr));
5847 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5848 		nrp->nr_nam = (struct sockaddr *)sad6;
5849 	} else
5850 		return (EPERM);
5851 
5852 	nrp->nr_sotype = SOCK_STREAM;
5853 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5854 	nrp->nr_prog = NFS_PROG;
5855 	nrp->nr_vers = vers;
5856 
5857 	/*
5858 	 * Use the credentials that were used for the mount, which are
5859 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5860 	 * Ref. counting the credentials with crhold() is probably not
5861 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5862 	 * unmount, but I did it anyhow.
5863 	 */
5864 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5865 	error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
5866 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5867 
5868 	dsp = NULL;
5869 	/* Now, do the exchangeid and create session. */
5870 	if (error == 0) {
5871 		if (vers == NFS_VER4) {
5872 			firsttry = 0;
5873 			do {
5874 				error = nfsrpc_exchangeid(nmp, clp, nrp,
5875 				    minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5876 				    nrp->nr_cred, p);
5877 				NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5878 				if (error == NFSERR_MINORVERMISMATCH)
5879 					minorvers = NFSV42_MINORVERSION;
5880 			} while (error == NFSERR_MINORVERMISMATCH &&
5881 			    firsttry++ == 0);
5882 			if (error != 0)
5883 				newnfs_disconnect(NULL, nrp);
5884 		} else {
5885 			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5886 			    M_WAITOK | M_ZERO);
5887 			dsp->nfsclds_flags |= NFSCLDS_DS;
5888 			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5889 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5890 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5891 			    NULL, MTX_DEF);
5892 		}
5893 	}
5894 	if (error == 0) {
5895 		dsp->nfsclds_sockp = nrp;
5896 		if (vers == NFS_VER4) {
5897 			NFSLOCKMNT(nmp);
5898 			retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5899 			    &sequenceid);
5900 			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5901 			if (retv == NFSDSP_USETHISSESSION &&
5902 			    nfscl_dssameconn != 0) {
5903 				NFSLOCKDS(tdsp);
5904 				tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5905 				NFSUNLOCKDS(tdsp);
5906 				NFSUNLOCKMNT(nmp);
5907 				/*
5908 				 * If there is already a session for this
5909 				 * server, use it.
5910 				 */
5911 				newnfs_disconnect(NULL, nrp);
5912 				nfscl_freenfsclds(dsp);
5913 				*dspp = tdsp;
5914 				return (0);
5915 			}
5916 			if (retv == NFSDSP_NOTFOUND)
5917 				sequenceid =
5918 				    dsp->nfsclds_sess.nfsess_sequenceid;
5919 			NFSUNLOCKMNT(nmp);
5920 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5921 			    nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
5922 			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5923 		}
5924 	} else {
5925 		NFSFREECRED(nrp->nr_cred);
5926 		NFSFREEMUTEX(&nrp->nr_mtx);
5927 		free(nrp->nr_nam, M_SONAME);
5928 		free(nrp, M_NFSSOCKREQ);
5929 	}
5930 	if (error == 0) {
5931 		NFSCL_DEBUG(3, "add DS session\n");
5932 		/*
5933 		 * Put it at the end of the list. That way the list
5934 		 * is ordered by when the entry was added. This matters
5935 		 * since the one done first is the one that should be
5936 		 * used for sequencid'ing any subsequent create sessions.
5937 		 */
5938 		NFSLOCKMNT(nmp);
5939 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5940 		NFSUNLOCKMNT(nmp);
5941 		*dspp = dsp;
5942 	} else if (dsp != NULL) {
5943 		newnfs_disconnect(NULL, nrp);
5944 		nfscl_freenfsclds(dsp);
5945 	}
5946 	return (error);
5947 }
5948 
5949 /*
5950  * Do the NFSv4.1 Reclaim Complete.
5951  */
5952 int
5953 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5954 {
5955 	uint32_t *tl;
5956 	struct nfsrv_descript nfsd;
5957 	struct nfsrv_descript *nd = &nfsd;
5958 	int error;
5959 
5960 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5961 	    0);
5962 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5963 	*tl = newnfs_false;
5964 	nd->nd_flag |= ND_USEGSSNAME;
5965 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5966 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5967 	if (error != 0)
5968 		return (error);
5969 	error = nd->nd_repstat;
5970 	m_freem(nd->nd_mrep);
5971 	return (error);
5972 }
5973 
5974 /*
5975  * Initialize the slot tables for a session.
5976  */
5977 static void
5978 nfscl_initsessionslots(struct nfsclsession *sep)
5979 {
5980 	int i;
5981 
5982 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5983 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5984 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5985 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5986 	}
5987 	for (i = 0; i < 64; i++)
5988 		sep->nfsess_slotseq[i] = 0;
5989 	sep->nfsess_slots = 0;
5990 }
5991 
5992 /*
5993  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5994  */
5995 int
5996 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5997     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5998 {
5999 	struct nfsnode *np = VTONFS(vp);
6000 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6001 	struct nfscllayout *layp;
6002 	struct nfscldevinfo *dip;
6003 	struct nfsclflayout *rflp;
6004 	struct mbuf *m, *m2;
6005 	struct nfsclwritedsdorpc *drpc, *tdrpc;
6006 	nfsv4stateid_t stateid;
6007 	struct ucred *newcred;
6008 	uint64_t lastbyte, len, off, oresid, xfer;
6009 	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6010 	void *lckp;
6011 	uint8_t *dev;
6012 	void *iovbase = NULL;
6013 	size_t iovlen = 0;
6014 	off_t offs = 0;
6015 	ssize_t resid = 0;
6016 
6017 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6018 	    (np->n_flag & NNOLAYOUT) != 0)
6019 		return (EIO);
6020 	/* Now, get a reference cnt on the clientid for this mount. */
6021 	if (nfscl_getref(nmp) == 0)
6022 		return (EIO);
6023 
6024 	/* Find an appropriate stateid. */
6025 	newcred = NFSNEWCRED(cred);
6026 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6027 	    rwaccess, 1, newcred, p, &stateid, &lckp);
6028 	if (error != 0) {
6029 		NFSFREECRED(newcred);
6030 		nfscl_relref(nmp);
6031 		return (error);
6032 	}
6033 	/* Search for a layout for this file. */
6034 	off = uiop->uio_offset;
6035 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6036 	    np->n_fhp->nfh_len, off, &rflp, &recalled);
6037 	if (layp == NULL || rflp == NULL) {
6038 		if (recalled != 0) {
6039 			NFSFREECRED(newcred);
6040 			nfscl_relref(nmp);
6041 			return (EIO);
6042 		}
6043 		if (layp != NULL) {
6044 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6045 			layp = NULL;
6046 		}
6047 		/* Try and get a Layout, if it is supported. */
6048 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6049 		    (np->n_flag & NWRITEOPENED) != 0)
6050 			iolaymode = NFSLAYOUTIOMODE_RW;
6051 		else
6052 			iolaymode = NFSLAYOUTIOMODE_READ;
6053 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6054 		    NULL, &stateid, off, &layp, newcred, p);
6055 		if (error != 0) {
6056 			NFSLOCKNODE(np);
6057 			np->n_flag |= NNOLAYOUT;
6058 			NFSUNLOCKNODE(np);
6059 			if (lckp != NULL)
6060 				nfscl_lockderef(lckp);
6061 			NFSFREECRED(newcred);
6062 			if (layp != NULL)
6063 				nfscl_rellayout(layp, 0);
6064 			nfscl_relref(nmp);
6065 			return (error);
6066 		}
6067 	}
6068 
6069 	/*
6070 	 * Loop around finding a layout that works for the first part of
6071 	 * this I/O operation, and then call the function that actually
6072 	 * does the RPC.
6073 	 */
6074 	eof = 0;
6075 	len = (uint64_t)uiop->uio_resid;
6076 	while (len > 0 && error == 0 && eof == 0) {
6077 		off = uiop->uio_offset;
6078 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6079 		if (error == 0) {
6080 			oresid = xfer = (uint64_t)uiop->uio_resid;
6081 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6082 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6083 			/*
6084 			 * For Flex File layout with mirrored DSs, select one
6085 			 * of them at random for reads. For writes and commits,
6086 			 * do all mirrors.
6087 			 */
6088 			m = NULL;
6089 			tdrpc = drpc = NULL;
6090 			firstmirror = 0;
6091 			mirrorcnt = 1;
6092 			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6093 			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6094 				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6095 					firstmirror = arc4random() % mirrorcnt;
6096 					mirrorcnt = firstmirror + 1;
6097 				} else {
6098 					if (docommit == 0) {
6099 						/*
6100 						 * Save values, so uiop can be
6101 						 * rolled back upon a write
6102 						 * error.
6103 						 */
6104 						offs = uiop->uio_offset;
6105 						resid = uiop->uio_resid;
6106 						iovbase =
6107 						    uiop->uio_iov->iov_base;
6108 						iovlen = uiop->uio_iov->iov_len;
6109 						m = nfsm_uiombuflist(uiop, len,
6110 						    0);
6111 					}
6112 					tdrpc = drpc = malloc(sizeof(*drpc) *
6113 					    (mirrorcnt - 1), M_TEMP, M_WAITOK |
6114 					    M_ZERO);
6115 				}
6116 			}
6117 			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6118 				m2 = NULL;
6119 				if (m != NULL && i < mirrorcnt - 1)
6120 					m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6121 				else {
6122 					m2 = m;
6123 					m = NULL;
6124 				}
6125 				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6126 					dev = rflp->nfsfl_ffm[i].dev;
6127 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6128 					    rflp->nfsfl_ffm[i].devp);
6129 				} else {
6130 					dev = rflp->nfsfl_dev;
6131 					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6132 					    rflp->nfsfl_devp);
6133 				}
6134 				if (dip != NULL) {
6135 					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6136 					    != 0)
6137 						error = nfscl_dofflayoutio(vp,
6138 						    uiop, iomode, must_commit,
6139 						    &eof, &stateid, rwaccess,
6140 						    dip, layp, rflp, off, xfer,
6141 						    i, docommit, m2, tdrpc,
6142 						    newcred, p);
6143 					else
6144 						error = nfscl_doflayoutio(vp,
6145 						    uiop, iomode, must_commit,
6146 						    &eof, &stateid, rwaccess,
6147 						    dip, layp, rflp, off, xfer,
6148 						    docommit, newcred, p);
6149 					nfscl_reldevinfo(dip);
6150 				} else {
6151 					if (m2 != NULL)
6152 						m_freem(m2);
6153 					error = EIO;
6154 				}
6155 				tdrpc++;
6156 			}
6157 			if (m != NULL)
6158 				m_freem(m);
6159 			tdrpc = drpc;
6160 			timo = hz / 50;		/* Wait for 20msec. */
6161 			if (timo < 1)
6162 				timo = 1;
6163 			for (i = firstmirror; i < mirrorcnt - 1 &&
6164 			    tdrpc != NULL; i++, tdrpc++) {
6165 				/*
6166 				 * For the unused drpc entries, both inprog and
6167 				 * err == 0, so this loop won't break.
6168 				 */
6169 				while (tdrpc->inprog != 0 && tdrpc->done == 0)
6170 					tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6171 					    timo);
6172 				if (error == 0 && tdrpc->err != 0)
6173 					error = tdrpc->err;
6174 			}
6175 			free(drpc, M_TEMP);
6176 			if (error == 0) {
6177 				if (mirrorcnt > 1 && rwaccess ==
6178 				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6179 					NFSLOCKCLSTATE();
6180 					layp->nfsly_flags |= NFSLY_WRITTEN;
6181 					NFSUNLOCKCLSTATE();
6182 				}
6183 				lastbyte = off + xfer - 1;
6184 				NFSLOCKCLSTATE();
6185 				if (lastbyte > layp->nfsly_lastbyte)
6186 					layp->nfsly_lastbyte = lastbyte;
6187 				NFSUNLOCKCLSTATE();
6188 			} else if (error == NFSERR_OPENMODE &&
6189 			    rwaccess == NFSV4OPEN_ACCESSREAD) {
6190 				NFSLOCKMNT(nmp);
6191 				nmp->nm_state |= NFSSTA_OPENMODE;
6192 				NFSUNLOCKMNT(nmp);
6193 			} else
6194 				error = EIO;
6195 			if (error == 0)
6196 				len -= (oresid - (uint64_t)uiop->uio_resid);
6197 			else if (mirrorcnt > 1 && rwaccess ==
6198 			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6199 				/*
6200 				 * In case the rpc gets retried, roll the
6201 				 * uio fields changed by nfsm_uiombuflist()
6202 				 * back.
6203 				 */
6204 				uiop->uio_offset = offs;
6205 				uiop->uio_resid = resid;
6206 				uiop->uio_iov->iov_base = iovbase;
6207 				uiop->uio_iov->iov_len = iovlen;
6208 			}
6209 		}
6210 	}
6211 	if (lckp != NULL)
6212 		nfscl_lockderef(lckp);
6213 	NFSFREECRED(newcred);
6214 	nfscl_rellayout(layp, 0);
6215 	nfscl_relref(nmp);
6216 	return (error);
6217 }
6218 
6219 /*
6220  * Find a file layout that will handle the first bytes of the requested
6221  * range and return the information from it needed to the I/O operation.
6222  */
6223 int
6224 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6225     struct nfsclflayout **retflpp)
6226 {
6227 	struct nfsclflayout *flp, *nflp, *rflp;
6228 	uint32_t rw;
6229 
6230 	rflp = NULL;
6231 	rw = rwaccess;
6232 	/* For reading, do the Read list first and then the Write list. */
6233 	do {
6234 		if (rw == NFSV4OPEN_ACCESSREAD)
6235 			flp = LIST_FIRST(&lyp->nfsly_flayread);
6236 		else
6237 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
6238 		while (flp != NULL) {
6239 			nflp = LIST_NEXT(flp, nfsfl_list);
6240 			if (flp->nfsfl_off > off)
6241 				break;
6242 			if (flp->nfsfl_end > off &&
6243 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6244 				rflp = flp;
6245 			flp = nflp;
6246 		}
6247 		if (rw == NFSV4OPEN_ACCESSREAD)
6248 			rw = NFSV4OPEN_ACCESSWRITE;
6249 		else
6250 			rw = 0;
6251 	} while (rw != 0);
6252 	if (rflp != NULL) {
6253 		/* This one covers the most bytes starting at off. */
6254 		*retflpp = rflp;
6255 		return (0);
6256 	}
6257 	return (EIO);
6258 }
6259 
6260 /*
6261  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6262  */
6263 static int
6264 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6265     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6266     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6267     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6268 {
6269 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6270 	int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6271 	struct nfsnode *np;
6272 	struct nfsfh *fhp;
6273 	struct nfsclds **dspp;
6274 
6275 	np = VTONFS(vp);
6276 	rel_off = off - flp->nfsfl_patoff;
6277 	stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6278 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6279 	    dp->nfsdi_stripecnt;
6280 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6281 	error = 0;
6282 
6283 	/* Loop around, doing I/O for each stripe unit. */
6284 	while (len > 0 && error == 0) {
6285 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6286 		dspp = nfsfldi_addr(dp, stripe_index);
6287 		if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6288 			minorvers = NFSV42_MINORVERSION;
6289 		else
6290 			minorvers = NFSV41_MINORVERSION;
6291 		if (len > transfer && docommit == 0)
6292 			xfer = transfer;
6293 		else
6294 			xfer = len;
6295 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6296 			/* Dense layout. */
6297 			if (stripe_pos >= flp->nfsfl_fhcnt)
6298 				return (EIO);
6299 			fhp = flp->nfsfl_fh[stripe_pos];
6300 			io_off = (rel_off / (stripe_unit_size *
6301 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
6302 			    rel_off % stripe_unit_size;
6303 		} else {
6304 			/* Sparse layout. */
6305 			if (flp->nfsfl_fhcnt > 1) {
6306 				if (stripe_index >= flp->nfsfl_fhcnt)
6307 					return (EIO);
6308 				fhp = flp->nfsfl_fh[stripe_index];
6309 			} else if (flp->nfsfl_fhcnt == 1)
6310 				fhp = flp->nfsfl_fh[0];
6311 			else
6312 				fhp = np->n_fhp;
6313 			io_off = off;
6314 		}
6315 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6316 			commit_thru_mds = 1;
6317 			if (docommit != 0)
6318 				error = EIO;
6319 		} else {
6320 			commit_thru_mds = 0;
6321 			NFSLOCKNODE(np);
6322 			np->n_flag |= NDSCOMMIT;
6323 			NFSUNLOCKNODE(np);
6324 		}
6325 		if (docommit != 0) {
6326 			if (error == 0)
6327 				error = nfsrpc_commitds(vp, io_off, xfer,
6328 				    *dspp, fhp, NFS_VER4, minorvers, cred, p);
6329 			if (error == 0) {
6330 				/*
6331 				 * Set both eof and uio_resid = 0 to end any
6332 				 * loops.
6333 				 */
6334 				*eofp = 1;
6335 				uiop->uio_resid = 0;
6336 			} else {
6337 				NFSLOCKNODE(np);
6338 				np->n_flag &= ~NDSCOMMIT;
6339 				NFSUNLOCKNODE(np);
6340 			}
6341 		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
6342 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6343 			    io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6344 		else {
6345 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6346 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6347 			    0, NFS_VER4, minorvers, cred, p);
6348 			if (error == 0) {
6349 				NFSLOCKCLSTATE();
6350 				lyp->nfsly_flags |= NFSLY_WRITTEN;
6351 				NFSUNLOCKCLSTATE();
6352 			}
6353 		}
6354 		if (error == 0) {
6355 			transfer = stripe_unit_size;
6356 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6357 			len -= xfer;
6358 			off += xfer;
6359 		}
6360 	}
6361 	return (error);
6362 }
6363 
6364 /*
6365  * Do I/O using an NFSv4.1 flex file layout.
6366  */
6367 static int
6368 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6369     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6370     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6371     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6372     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6373 {
6374 	uint64_t xfer;
6375 	int error;
6376 	struct nfsnode *np;
6377 	struct nfsfh *fhp;
6378 	struct nfsclds **dspp;
6379 	struct ucred *tcred;
6380 	struct mbuf *m, *m2;
6381 	uint32_t copylen;
6382 
6383 	np = VTONFS(vp);
6384 	error = 0;
6385 	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6386 	    (uintmax_t)len);
6387 	/* Loop around, doing I/O for each stripe unit. */
6388 	while (len > 0 && error == 0) {
6389 		dspp = nfsfldi_addr(dp, 0);
6390 		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6391 		stateidp = &flp->nfsfl_ffm[mirror].st;
6392 		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6393 		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6394 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6395 			tcred = NFSNEWCRED(cred);
6396 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6397 			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6398 			tcred->cr_ngroups = 1;
6399 		} else
6400 			tcred = cred;
6401 		if (rwflag == NFSV4OPEN_ACCESSREAD)
6402 			copylen = dp->nfsdi_rsize;
6403 		else {
6404 			copylen = dp->nfsdi_wsize;
6405 			if (len > copylen && mp != NULL) {
6406 				/*
6407 				 * When a mirrored configuration needs to do
6408 				 * multiple writes to each mirror, all writes
6409 				 * except the last one must be a multiple of
6410 				 * 4 bytes.  This is required so that the XDR
6411 				 * does not need padding.
6412 				 * If possible, clip the size to an exact
6413 				 * multiple of the mbuf length, so that the
6414 				 * split will be on an mbuf boundary.
6415 				 */
6416 				copylen &= 0xfffffffc;
6417 				if (copylen > mp->m_len)
6418 					copylen = copylen / mp->m_len *
6419 					    mp->m_len;
6420 			}
6421 		}
6422 		NFSLOCKNODE(np);
6423 		np->n_flag |= NDSCOMMIT;
6424 		NFSUNLOCKNODE(np);
6425 		if (len > copylen && docommit == 0)
6426 			xfer = copylen;
6427 		else
6428 			xfer = len;
6429 		if (docommit != 0) {
6430 			if (error == 0) {
6431 				/*
6432 				 * Do last mirrored DS commit with this thread.
6433 				 */
6434 				if (mirror < flp->nfsfl_mirrorcnt - 1)
6435 					error = nfsio_commitds(vp, off, xfer,
6436 					    *dspp, fhp, dp->nfsdi_vers,
6437 					    dp->nfsdi_minorvers, drpc, tcred,
6438 					    p);
6439 				else
6440 					error = nfsrpc_commitds(vp, off, xfer,
6441 					    *dspp, fhp, dp->nfsdi_vers,
6442 					    dp->nfsdi_minorvers, tcred, p);
6443 				NFSCL_DEBUG(4, "commitds=%d\n", error);
6444 				if (error != 0 && error != EACCES && error !=
6445 				    ESTALE) {
6446 					NFSCL_DEBUG(4,
6447 					    "DS layreterr for commit\n");
6448 					nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6449 					    lyp, *dspp);
6450 				}
6451 			}
6452 			NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6453 			if (error == 0) {
6454 				/*
6455 				 * Set both eof and uio_resid = 0 to end any
6456 				 * loops.
6457 				 */
6458 				*eofp = 1;
6459 				uiop->uio_resid = 0;
6460 			} else {
6461 				NFSLOCKNODE(np);
6462 				np->n_flag &= ~NDSCOMMIT;
6463 				NFSUNLOCKNODE(np);
6464 			}
6465 		} else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6466 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6467 			    off, xfer, fhp, 1, dp->nfsdi_vers,
6468 			    dp->nfsdi_minorvers, tcred, p);
6469 			NFSCL_DEBUG(4, "readds=%d\n", error);
6470 			if (error != 0 && error != EACCES && error != ESTALE) {
6471 				NFSCL_DEBUG(4, "DS layreterr for read\n");
6472 				nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6473 				    *dspp);
6474 			}
6475 		} else {
6476 			if (flp->nfsfl_mirrorcnt == 1) {
6477 				error = nfsrpc_writeds(vp, uiop, iomode,
6478 				    must_commit, stateidp, *dspp, off, xfer,
6479 				    fhp, 0, 1, dp->nfsdi_vers,
6480 				    dp->nfsdi_minorvers, tcred, p);
6481 				if (error == 0) {
6482 					NFSLOCKCLSTATE();
6483 					lyp->nfsly_flags |= NFSLY_WRITTEN;
6484 					NFSUNLOCKCLSTATE();
6485 				}
6486 			} else {
6487 				m = mp;
6488 				if (xfer < len) {
6489 					/* The mbuf list must be split. */
6490 					m2 = nfsm_split(mp, xfer);
6491 					if (m2 != NULL)
6492 						mp = m2;
6493 					else {
6494 						m_freem(mp);
6495 						error = EIO;
6496 					}
6497 				}
6498 				NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6499 				    (uintmax_t)len, (uintmax_t)xfer);
6500 				/*
6501 				 * Do last write to a mirrored DS with this
6502 				 * thread.
6503 				 */
6504 				if (error == 0) {
6505 					if (mirror < flp->nfsfl_mirrorcnt - 1)
6506 						error = nfsio_writedsmir(vp,
6507 						    iomode, must_commit,
6508 						    stateidp, *dspp, off,
6509 						    xfer, fhp, m,
6510 						    dp->nfsdi_vers,
6511 						    dp->nfsdi_minorvers, drpc,
6512 						    tcred, p);
6513 					else
6514 						error = nfsrpc_writedsmir(vp,
6515 						    iomode, must_commit,
6516 						    stateidp, *dspp, off,
6517 						    xfer, fhp, m,
6518 						    dp->nfsdi_vers,
6519 						    dp->nfsdi_minorvers, tcred,
6520 						    p);
6521 				}
6522 				NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6523 				if (error != 0 && error != EACCES && error !=
6524 				    ESTALE) {
6525 					NFSCL_DEBUG(4,
6526 					    "DS layreterr for write\n");
6527 					nfscl_dserr(NFSV4OP_WRITE, error, dp,
6528 					    lyp, *dspp);
6529 				}
6530 			}
6531 		}
6532 		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6533 		if (error == 0) {
6534 			len -= xfer;
6535 			off += xfer;
6536 		}
6537 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6538 			NFSFREECRED(tcred);
6539 	}
6540 	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6541 	return (error);
6542 }
6543 
6544 /*
6545  * The actual read RPC done to a DS.
6546  */
6547 static int
6548 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6549     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6550     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6551 {
6552 	uint32_t *tl;
6553 	int attrflag, error, retlen;
6554 	struct nfsrv_descript nfsd;
6555 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6556 	struct nfsrv_descript *nd = &nfsd;
6557 	struct nfssockreq *nrp;
6558 	struct nfsvattr na;
6559 
6560 	nd->nd_mrep = NULL;
6561 	if (vers == 0 || vers == NFS_VER4) {
6562 		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6563 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6564 		vers = NFS_VER4;
6565 		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6566 		if (flex != 0)
6567 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6568 		else
6569 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6570 	} else {
6571 		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6572 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6573 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6574 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6575 		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6576 	}
6577 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6578 	txdr_hyper(io_off, tl);
6579 	*(tl + 2) = txdr_unsigned(len);
6580 	nrp = dsp->nfsclds_sockp;
6581 	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6582 	if (nrp == NULL)
6583 		/* If NULL, use the MDS socket. */
6584 		nrp = &nmp->nm_sockreq;
6585 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6586 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6587 	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6588 	    error);
6589 	if (error != 0)
6590 		return (error);
6591 	if (vers == NFS_VER3) {
6592 		error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6593 		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6594 		if (error != 0)
6595 			goto nfsmout;
6596 	}
6597 	if (nd->nd_repstat != 0) {
6598 		error = nd->nd_repstat;
6599 		goto nfsmout;
6600 	}
6601 	if (vers == NFS_VER3) {
6602 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6603 		*eofp = fxdr_unsigned(int, *(tl + 1));
6604 	} else {
6605 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6606 		*eofp = fxdr_unsigned(int, *tl);
6607 	}
6608 	NFSM_STRSIZ(retlen, len);
6609 	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6610 	error = nfsm_mbufuio(nd, uiop, retlen);
6611 nfsmout:
6612 	if (nd->nd_mrep != NULL)
6613 		m_freem(nd->nd_mrep);
6614 	return (error);
6615 }
6616 
6617 /*
6618  * The actual write RPC done to a DS.
6619  */
6620 static int
6621 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6622     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6623     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6624     struct ucred *cred, NFSPROC_T *p)
6625 {
6626 	uint32_t *tl;
6627 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6628 	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6629 	int32_t backup;
6630 	struct nfsrv_descript nfsd;
6631 	struct nfsrv_descript *nd = &nfsd;
6632 	struct nfssockreq *nrp;
6633 	struct nfsvattr na;
6634 
6635 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6636 	nd->nd_mrep = NULL;
6637 	if (vers == 0 || vers == NFS_VER4) {
6638 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6639 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6640 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6641 		vers = NFS_VER4;
6642 		if (flex != 0)
6643 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6644 		else
6645 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6646 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6647 	} else {
6648 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6649 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6650 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6651 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6652 		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6653 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6654 	}
6655 	txdr_hyper(io_off, tl);
6656 	tl += 2;
6657 	if (vers == NFS_VER3)
6658 		*tl++ = txdr_unsigned(len);
6659 	*tl++ = txdr_unsigned(*iomode);
6660 	*tl = txdr_unsigned(len);
6661 	nfsm_uiombuf(nd, uiop, len);
6662 	nrp = dsp->nfsclds_sockp;
6663 	if (nrp == NULL)
6664 		/* If NULL, use the MDS socket. */
6665 		nrp = &nmp->nm_sockreq;
6666 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6667 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6668 	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6669 	    nd->nd_repstat);
6670 	if (error != 0)
6671 		return (error);
6672 	if (nd->nd_repstat != 0) {
6673 		/*
6674 		 * In case the rpc gets retried, roll
6675 		 * the uio fields changed by nfsm_uiombuf()
6676 		 * back.
6677 		 */
6678 		uiop->uio_offset -= len;
6679 		uiop->uio_resid += len;
6680 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6681 		uiop->uio_iov->iov_len += len;
6682 		error = nd->nd_repstat;
6683 	} else {
6684 		if (vers == NFS_VER3) {
6685 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6686 			    NULL);
6687 			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6688 			if (error != 0)
6689 				goto nfsmout;
6690 		}
6691 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6692 		rlen = fxdr_unsigned(int, *tl++);
6693 		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6694 		if (rlen == 0) {
6695 			error = NFSERR_IO;
6696 			goto nfsmout;
6697 		} else if (rlen < len) {
6698 			backup = len - rlen;
6699 			uiop->uio_iov->iov_base =
6700 			    (char *)uiop->uio_iov->iov_base - backup;
6701 			uiop->uio_iov->iov_len += backup;
6702 			uiop->uio_offset -= backup;
6703 			uiop->uio_resid += backup;
6704 			len = rlen;
6705 		}
6706 		commit = fxdr_unsigned(int, *tl++);
6707 
6708 		/*
6709 		 * Return the lowest commitment level
6710 		 * obtained by any of the RPCs.
6711 		 */
6712 		if (committed == NFSWRITE_FILESYNC)
6713 			committed = commit;
6714 		else if (committed == NFSWRITE_DATASYNC &&
6715 		    commit == NFSWRITE_UNSTABLE)
6716 			committed = commit;
6717 		if (commit_thru_mds != 0) {
6718 			NFSLOCKMNT(nmp);
6719 			if (!NFSHASWRITEVERF(nmp)) {
6720 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6721 				NFSSETWRITEVERF(nmp);
6722 	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6723 				*must_commit = 1;
6724 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6725 			}
6726 			NFSUNLOCKMNT(nmp);
6727 		} else {
6728 			NFSLOCKDS(dsp);
6729 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6730 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6731 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6732 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6733 				*must_commit = 1;
6734 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6735 			}
6736 			NFSUNLOCKDS(dsp);
6737 		}
6738 	}
6739 nfsmout:
6740 	if (nd->nd_mrep != NULL)
6741 		m_freem(nd->nd_mrep);
6742 	*iomode = committed;
6743 	if (nd->nd_repstat != 0 && error == 0)
6744 		error = nd->nd_repstat;
6745 	return (error);
6746 }
6747 
6748 /*
6749  * The actual write RPC done to a DS.
6750  * This variant is called from a separate kernel process for mirrors.
6751  * Any short write is considered an IO error.
6752  */
6753 static int
6754 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6755     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6756     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6757     struct ucred *cred, NFSPROC_T *p)
6758 {
6759 	uint32_t *tl;
6760 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6761 	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6762 	struct nfsrv_descript nfsd;
6763 	struct nfsrv_descript *nd = &nfsd;
6764 	struct nfssockreq *nrp;
6765 	struct nfsvattr na;
6766 
6767 	nd->nd_mrep = NULL;
6768 	if (vers == 0 || vers == NFS_VER4) {
6769 		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6770 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6771 		vers = NFS_VER4;
6772 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6773 		    minorvers);
6774 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6775 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6776 	} else {
6777 		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6778 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6779 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6780 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6781 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6782 		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6783 	}
6784 	txdr_hyper(io_off, tl);
6785 	tl += 2;
6786 	if (vers == NFS_VER3)
6787 		*tl++ = txdr_unsigned(len);
6788 	*tl++ = txdr_unsigned(*iomode);
6789 	*tl = txdr_unsigned(len);
6790 	if (len > 0) {
6791 		/* Put data in mbuf chain. */
6792 		nd->nd_mb->m_next = m;
6793 	}
6794 	nrp = dsp->nfsclds_sockp;
6795 	if (nrp == NULL)
6796 		/* If NULL, use the MDS socket. */
6797 		nrp = &nmp->nm_sockreq;
6798 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6799 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6800 	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6801 	    nd->nd_repstat);
6802 	if (error != 0)
6803 		return (error);
6804 	if (nd->nd_repstat != 0)
6805 		error = nd->nd_repstat;
6806 	else {
6807 		if (vers == NFS_VER3) {
6808 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6809 			    NULL);
6810 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6811 			    error);
6812 			if (error != 0)
6813 				goto nfsmout;
6814 		}
6815 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6816 		rlen = fxdr_unsigned(int, *tl++);
6817 		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6818 		    rlen);
6819 		if (rlen != len) {
6820 			error = NFSERR_IO;
6821 			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6822 			    len, rlen);
6823 			goto nfsmout;
6824 		}
6825 		commit = fxdr_unsigned(int, *tl++);
6826 
6827 		/*
6828 		 * Return the lowest commitment level
6829 		 * obtained by any of the RPCs.
6830 		 */
6831 		if (committed == NFSWRITE_FILESYNC)
6832 			committed = commit;
6833 		else if (committed == NFSWRITE_DATASYNC &&
6834 		    commit == NFSWRITE_UNSTABLE)
6835 			committed = commit;
6836 		NFSLOCKDS(dsp);
6837 		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6838 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6839 			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6840 		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6841 			*must_commit = 1;
6842 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6843 		}
6844 		NFSUNLOCKDS(dsp);
6845 	}
6846 nfsmout:
6847 	if (nd->nd_mrep != NULL)
6848 		m_freem(nd->nd_mrep);
6849 	*iomode = committed;
6850 	if (nd->nd_repstat != 0 && error == 0)
6851 		error = nd->nd_repstat;
6852 	return (error);
6853 }
6854 
6855 /*
6856  * Start up the thread that will execute nfsrpc_writedsmir().
6857  */
6858 static void
6859 start_writedsmir(void *arg, int pending)
6860 {
6861 	struct nfsclwritedsdorpc *drpc;
6862 
6863 	drpc = (struct nfsclwritedsdorpc *)arg;
6864 	drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6865 	    &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6866 	    drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6867 	    drpc->p);
6868 	drpc->done = 1;
6869 	NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6870 }
6871 
6872 /*
6873  * Set up the write DS mirror call for the pNFS I/O thread.
6874  */
6875 static int
6876 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6877     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6878     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6879     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6880 {
6881 	int error, ret;
6882 
6883 	error = 0;
6884 	drpc->done = 0;
6885 	drpc->vp = vp;
6886 	drpc->iomode = *iomode;
6887 	drpc->must_commit = *must_commit;
6888 	drpc->stateidp = stateidp;
6889 	drpc->dsp = dsp;
6890 	drpc->off = off;
6891 	drpc->len = len;
6892 	drpc->fhp = fhp;
6893 	drpc->m = m;
6894 	drpc->vers = vers;
6895 	drpc->minorvers = minorvers;
6896 	drpc->cred = cred;
6897 	drpc->p = p;
6898 	drpc->inprog = 0;
6899 	ret = EIO;
6900 	if (nfs_pnfsiothreads != 0) {
6901 		ret = nfs_pnfsio(start_writedsmir, drpc);
6902 		NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6903 	}
6904 	if (ret != 0)
6905 		error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6906 		    dsp, off, len, fhp, m, vers, minorvers, cred, p);
6907 	NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6908 	return (error);
6909 }
6910 
6911 /*
6912  * Free up the nfsclds structure.
6913  */
6914 void
6915 nfscl_freenfsclds(struct nfsclds *dsp)
6916 {
6917 	int i;
6918 
6919 	if (dsp == NULL)
6920 		return;
6921 	if (dsp->nfsclds_sockp != NULL) {
6922 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6923 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6924 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6925 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6926 	}
6927 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
6928 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6929 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6930 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6931 			m_freem(
6932 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6933 	}
6934 	free(dsp, M_NFSCLDS);
6935 }
6936 
6937 static enum nfsclds_state
6938 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6939     struct nfsclds **retdspp, uint32_t *sequencep)
6940 {
6941 	struct nfsclds *dsp;
6942 	int fndseq;
6943 
6944 	/*
6945 	 * Search the list of nfsclds structures for one with the same
6946 	 * server.
6947 	 */
6948 	fndseq = 0;
6949 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6950 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6951 		    dsp->nfsclds_servownlen != 0 &&
6952 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6953 		    dsp->nfsclds_servownlen) &&
6954 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
6955 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6956 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
6957 			    dsp->nfsclds_flags);
6958 			if (fndseq == 0) {
6959 				/* Get sequenceid# from first entry. */
6960 				*sequencep =
6961 				    dsp->nfsclds_sess.nfsess_sequenceid;
6962 				fndseq = 1;
6963 			}
6964 			/* Server major id matches. */
6965 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6966 				*retdspp = dsp;
6967 				return (NFSDSP_USETHISSESSION);
6968 			}
6969 		}
6970 	}
6971 	if (fndseq != 0)
6972 		return (NFSDSP_SEQTHISSESSION);
6973 	return (NFSDSP_NOTFOUND);
6974 }
6975 
6976 /*
6977  * NFS commit rpc to a NFSv4.1 DS.
6978  */
6979 static int
6980 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6981     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6982     NFSPROC_T *p)
6983 {
6984 	uint32_t *tl;
6985 	struct nfsrv_descript nfsd, *nd = &nfsd;
6986 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6987 	struct nfssockreq *nrp;
6988 	struct nfsvattr na;
6989 	int attrflag, error;
6990 
6991 	nd->nd_mrep = NULL;
6992 	if (vers == 0 || vers == NFS_VER4) {
6993 		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6994 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6995 		vers = NFS_VER4;
6996 	} else {
6997 		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6998 		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6999 		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7000 		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7001 	}
7002 	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7003 	    minorvers);
7004 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7005 	txdr_hyper(offset, tl);
7006 	tl += 2;
7007 	*tl = txdr_unsigned(cnt);
7008 	nrp = dsp->nfsclds_sockp;
7009 	if (nrp == NULL)
7010 		/* If NULL, use the MDS socket. */
7011 		nrp = &nmp->nm_sockreq;
7012 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7013 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7014 	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7015 	    nd->nd_repstat);
7016 	if (error != 0)
7017 		return (error);
7018 	if (nd->nd_repstat == 0) {
7019 		if (vers == NFS_VER3) {
7020 			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7021 			    NULL);
7022 			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7023 			if (error != 0)
7024 				goto nfsmout;
7025 		}
7026 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7027 		NFSLOCKDS(dsp);
7028 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7029 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7030 			error = NFSERR_STALEWRITEVERF;
7031 		}
7032 		NFSUNLOCKDS(dsp);
7033 	}
7034 nfsmout:
7035 	if (error == 0 && nd->nd_repstat != 0)
7036 		error = nd->nd_repstat;
7037 	m_freem(nd->nd_mrep);
7038 	return (error);
7039 }
7040 
7041 /*
7042  * Start up the thread that will execute nfsrpc_commitds().
7043  */
7044 static void
7045 start_commitds(void *arg, int pending)
7046 {
7047 	struct nfsclwritedsdorpc *drpc;
7048 
7049 	drpc = (struct nfsclwritedsdorpc *)arg;
7050 	drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7051 	    drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7052 	    drpc->p);
7053 	drpc->done = 1;
7054 	NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7055 }
7056 
7057 /*
7058  * Set up the commit DS mirror call for the pNFS I/O thread.
7059  */
7060 static int
7061 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7062     struct nfsfh *fhp, int vers, int minorvers,
7063     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7064 {
7065 	int error, ret;
7066 
7067 	error = 0;
7068 	drpc->done = 0;
7069 	drpc->vp = vp;
7070 	drpc->off = offset;
7071 	drpc->len = cnt;
7072 	drpc->dsp = dsp;
7073 	drpc->fhp = fhp;
7074 	drpc->vers = vers;
7075 	drpc->minorvers = minorvers;
7076 	drpc->cred = cred;
7077 	drpc->p = p;
7078 	drpc->inprog = 0;
7079 	ret = EIO;
7080 	if (nfs_pnfsiothreads != 0) {
7081 		ret = nfs_pnfsio(start_commitds, drpc);
7082 		NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7083 	}
7084 	if (ret != 0)
7085 		error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7086 		    minorvers, cred, p);
7087 	NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7088 	return (error);
7089 }
7090 
7091 /*
7092  * NFS Advise rpc
7093  */
7094 int
7095 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7096     struct ucred *cred, NFSPROC_T *p)
7097 {
7098 	u_int32_t *tl;
7099 	struct nfsrv_descript nfsd, *nd = &nfsd;
7100 	nfsattrbit_t hints;
7101 	int error;
7102 
7103 	NFSZERO_ATTRBIT(&hints);
7104 	if (advise == POSIX_FADV_WILLNEED)
7105 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7106 	else if (advise == POSIX_FADV_DONTNEED)
7107 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7108 	else
7109 		return (0);
7110 	NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp);
7111 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7112 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7113 	txdr_hyper(offset, tl);
7114 	tl += 2;
7115 	txdr_hyper(cnt, tl);
7116 	nfsrv_putattrbit(nd, &hints);
7117 	error = nfscl_request(nd, vp, p, cred, NULL);
7118 	if (error != 0)
7119 		return (error);
7120 	if (nd->nd_repstat != 0)
7121 		error = nd->nd_repstat;
7122 	m_freem(nd->nd_mrep);
7123 	return (error);
7124 }
7125 
7126 #ifdef notyet
7127 /*
7128  * NFS advise rpc to a NFSv4.2 DS.
7129  */
7130 static int
7131 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7132     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7133     struct ucred *cred, NFSPROC_T *p)
7134 {
7135 	uint32_t *tl;
7136 	struct nfsrv_descript nfsd, *nd = &nfsd;
7137 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7138 	struct nfssockreq *nrp;
7139 	nfsattrbit_t hints;
7140 	int error;
7141 
7142 	/* For NFS DSs prior to NFSv4.2, just return OK. */
7143 	if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7144 		return (0);
7145 	NFSZERO_ATTRBIT(&hints);
7146 	if (advise == POSIX_FADV_WILLNEED)
7147 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7148 	else if (advise == POSIX_FADV_DONTNEED)
7149 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7150 	else
7151 		return (0);
7152 	nd->nd_mrep = NULL;
7153 	nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7154 	    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
7155 	vers = NFS_VER4;
7156 	NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7157 	    minorvers);
7158 	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7159 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7160 	txdr_hyper(offset, tl);
7161 	tl += 2;
7162 	*tl = txdr_unsigned(cnt);
7163 	nfsrv_putattrbit(nd, &hints);
7164 	nrp = dsp->nfsclds_sockp;
7165 	if (nrp == NULL)
7166 		/* If NULL, use the MDS socket. */
7167 		nrp = &nmp->nm_sockreq;
7168 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7169 	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7170 	NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7171 	    nd->nd_repstat);
7172 	if (error != 0)
7173 		return (error);
7174 	if (nd->nd_repstat != 0)
7175 		error = nd->nd_repstat;
7176 	m_freem(nd->nd_mrep);
7177 	return (error);
7178 }
7179 
7180 /*
7181  * Start up the thread that will execute nfsrpc_commitds().
7182  */
7183 static void
7184 start_adviseds(void *arg, int pending)
7185 {
7186 	struct nfsclwritedsdorpc *drpc;
7187 
7188 	drpc = (struct nfsclwritedsdorpc *)arg;
7189 	drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7190 	    drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7191 	    drpc->cred, drpc->p);
7192 	drpc->done = 1;
7193 	NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7194 }
7195 
7196 /*
7197  * Set up the commit DS mirror call for the pNFS I/O thread.
7198  */
7199 static int
7200 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7201     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7202     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7203 {
7204 	int error, ret;
7205 
7206 	error = 0;
7207 	drpc->done = 0;
7208 	drpc->vp = vp;
7209 	drpc->off = offset;
7210 	drpc->len = cnt;
7211 	drpc->advise = advise;
7212 	drpc->dsp = dsp;
7213 	drpc->fhp = fhp;
7214 	drpc->vers = vers;
7215 	drpc->minorvers = minorvers;
7216 	drpc->cred = cred;
7217 	drpc->p = p;
7218 	drpc->inprog = 0;
7219 	ret = EIO;
7220 	if (nfs_pnfsiothreads != 0) {
7221 		ret = nfs_pnfsio(start_adviseds, drpc);
7222 		NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7223 	}
7224 	if (ret != 0)
7225 		error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7226 		    minorvers, cred, p);
7227 	NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7228 	return (error);
7229 }
7230 #endif	/* notyet */
7231 
7232 /*
7233  * Do the Allocate operation, retrying for recovery.
7234  */
7235 int
7236 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7237     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
7238 {
7239 	int error, expireret = 0, retrycnt, nostateid;
7240 	uint32_t clidrev = 0;
7241 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7242 	struct nfsfh *nfhp = NULL;
7243 	nfsv4stateid_t stateid;
7244 	off_t tmp_off;
7245 	void *lckp;
7246 
7247 	if (len < 0)
7248 		return (EINVAL);
7249 	if (len == 0)
7250 		return (0);
7251 	tmp_off = off + len;
7252 	NFSLOCKMNT(nmp);
7253 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7254 		NFSUNLOCKMNT(nmp);
7255 		return (EFBIG);
7256 	}
7257 	if (nmp->nm_clp != NULL)
7258 		clidrev = nmp->nm_clp->nfsc_clientidrev;
7259 	NFSUNLOCKMNT(nmp);
7260 	nfhp = VTONFS(vp)->n_fhp;
7261 	retrycnt = 0;
7262 	do {
7263 		lckp = NULL;
7264 		nostateid = 0;
7265 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7266 		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7267 		if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7268 		    stateid.other[2] == 0) {
7269 			nostateid = 1;
7270 			NFSCL_DEBUG(1, "stateid0 in allocate\n");
7271 		}
7272 
7273 		/*
7274 		 * Not finding a stateid should probably never happen,
7275 		 * but just return an error for this case.
7276 		 */
7277 		if (nostateid != 0)
7278 			error = EIO;
7279 		else
7280 			error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7281 			    nap, attrflagp, cred, p, stuff);
7282 		if (error == NFSERR_STALESTATEID)
7283 			nfscl_initiate_recovery(nmp->nm_clp);
7284 		if (lckp != NULL)
7285 			nfscl_lockderef(lckp);
7286 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7287 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7288 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7289 			(void) nfs_catnap(PZERO, error, "nfs_allocate");
7290 		} else if ((error == NFSERR_EXPIRED ||
7291 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
7292 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7293 		}
7294 		retrycnt++;
7295 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7296 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7297 	    error == NFSERR_STALEDONTRECOVER ||
7298 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7299 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7300 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
7301 	if (error != 0 && retrycnt >= 4)
7302 		error = EIO;
7303 	return (error);
7304 }
7305 
7306 /*
7307  * The allocate RPC.
7308  */
7309 static int
7310 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7311     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p,
7312     void *stuff)
7313 {
7314 	uint32_t *tl;
7315 	int error;
7316 	struct nfsrv_descript nfsd;
7317 	struct nfsrv_descript *nd = &nfsd;
7318 	nfsattrbit_t attrbits;
7319 
7320 	*attrflagp = 0;
7321 	NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp);
7322 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7323 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7324 	txdr_hyper(off, tl); tl += 2;
7325 	txdr_hyper(len, tl); tl += 2;
7326 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7327 	NFSGETATTR_ATTRBIT(&attrbits);
7328 	nfsrv_putattrbit(nd, &attrbits);
7329 	error = nfscl_request(nd, vp, p, cred, stuff);
7330 	if (error != 0)
7331 		return (error);
7332 	if (nd->nd_repstat == 0) {
7333 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7334 		error = nfsm_loadattr(nd, nap);
7335 		if (error == 0)
7336 			*attrflagp = NFS_LATTR_NOSHRINK;
7337 	} else
7338 		error = nd->nd_repstat;
7339 nfsmout:
7340 	m_freem(nd->nd_mrep);
7341 	return (error);
7342 }
7343 
7344 /*
7345  * Set up the XDR arguments for the LayoutGet operation.
7346  */
7347 static void
7348 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7349     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7350     int layoutlen, int usecurstateid)
7351 {
7352 	uint32_t *tl;
7353 
7354 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7355 	    NFSX_STATEID);
7356 	*tl++ = newnfs_false;		/* Don't signal availability. */
7357 	*tl++ = txdr_unsigned(layouttype);
7358 	*tl++ = txdr_unsigned(iomode);
7359 	txdr_hyper(offset, tl);
7360 	tl += 2;
7361 	txdr_hyper(len, tl);
7362 	tl += 2;
7363 	txdr_hyper(minlen, tl);
7364 	tl += 2;
7365 	if (usecurstateid != 0) {
7366 		/* Special stateid for Current stateid. */
7367 		*tl++ = txdr_unsigned(1);
7368 		*tl++ = 0;
7369 		*tl++ = 0;
7370 		*tl++ = 0;
7371 	} else {
7372 		*tl++ = txdr_unsigned(stateidp->seqid);
7373 		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7374 		*tl++ = stateidp->other[0];
7375 		*tl++ = stateidp->other[1];
7376 		*tl++ = stateidp->other[2];
7377 	}
7378 	*tl = txdr_unsigned(layoutlen);
7379 }
7380 
7381 /*
7382  * Parse the reply for a successful LayoutGet operation.
7383  */
7384 static int
7385 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7386     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7387 {
7388 	uint32_t *tl;
7389 	struct nfsclflayout *flp, *prevflp, *tflp;
7390 	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7391 	int m, mirrorcnt;
7392 	uint64_t retlen, off;
7393 	struct nfsfh *nfhp;
7394 	uint8_t *cp;
7395 	uid_t user;
7396 	gid_t grp;
7397 
7398 	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7399 	error = 0;
7400 	flp = NULL;
7401 	gotiomode = -1;
7402 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7403 	if (*tl++ != 0)
7404 		*retonclosep = 1;
7405 	else
7406 		*retonclosep = 0;
7407 	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7408 	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7409 	    (int)stateidp->seqid);
7410 	stateidp->other[0] = *tl++;
7411 	stateidp->other[1] = *tl++;
7412 	stateidp->other[2] = *tl++;
7413 	cnt = fxdr_unsigned(int, *tl);
7414 	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7415 	if (cnt <= 0 || cnt > 10000) {
7416 		/* Don't accept more than 10000 layouts in reply. */
7417 		error = NFSERR_BADXDR;
7418 		goto nfsmout;
7419 	}
7420 	for (i = 0; i < cnt; i++) {
7421 		/* Dissect to the layout type. */
7422 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7423 		    3 * NFSX_UNSIGNED);
7424 		off = fxdr_hyper(tl); tl += 2;
7425 		retlen = fxdr_hyper(tl); tl += 2;
7426 		iomode = fxdr_unsigned(int, *tl++);
7427 		laytype = fxdr_unsigned(int, *tl);
7428 		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7429 		    (uintmax_t)off, (uintmax_t)retlen, iomode);
7430 		/* Ignore length of layout body for now. */
7431 		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7432 			/* Parse the File layout up to fhcnt. */
7433 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7434 			    NFSX_HYPER + NFSX_V4DEVICEID);
7435 			fhcnt = fxdr_unsigned(int, *(tl + 4 +
7436 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
7437 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7438 			if (fhcnt < 0 || fhcnt > 100) {
7439 				/* Don't accept more than 100 file handles. */
7440 				error = NFSERR_BADXDR;
7441 				goto nfsmout;
7442 			}
7443 			if (fhcnt > 0)
7444 				flp = malloc(sizeof(*flp) + fhcnt *
7445 				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
7446 				    M_WAITOK);
7447 			else
7448 				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7449 				    M_WAITOK);
7450 			flp->nfsfl_flags = NFSFL_FILE;
7451 			flp->nfsfl_fhcnt = 0;
7452 			flp->nfsfl_devp = NULL;
7453 			flp->nfsfl_off = off;
7454 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7455 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7456 			else
7457 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7458 			flp->nfsfl_iomode = iomode;
7459 			if (gotiomode == -1)
7460 				gotiomode = flp->nfsfl_iomode;
7461 			/* Ignore layout body length for now. */
7462 			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7463 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7464 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7465 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7466 			mtx_lock(&nmp->nm_mtx);
7467 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7468 			    NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7469 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7470 			mtx_unlock(&nmp->nm_mtx);
7471 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7472 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7473 			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7474 			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7475 			for (j = 0; j < fhcnt; j++) {
7476 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7477 				nfhlen = fxdr_unsigned(int, *tl);
7478 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7479 					error = NFSERR_BADXDR;
7480 					goto nfsmout;
7481 				}
7482 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7483 				    M_NFSFH, M_WAITOK);
7484 				flp->nfsfl_fh[j] = nfhp;
7485 				flp->nfsfl_fhcnt++;
7486 				nfhp->nfh_len = nfhlen;
7487 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7488 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7489 			}
7490 		} else if (laytype == NFSLAYOUT_FLEXFILE) {
7491 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7492 			    NFSX_HYPER);
7493 			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7494 			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7495 			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7496 				error = NFSERR_BADXDR;
7497 				goto nfsmout;
7498 			}
7499 			flp = malloc(sizeof(*flp) + mirrorcnt *
7500 			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7501 			flp->nfsfl_flags = NFSFL_FLEXFILE;
7502 			flp->nfsfl_mirrorcnt = mirrorcnt;
7503 			for (j = 0; j < mirrorcnt; j++)
7504 				flp->nfsfl_ffm[j].devp = NULL;
7505 			flp->nfsfl_off = off;
7506 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7507 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7508 			else
7509 				flp->nfsfl_end = flp->nfsfl_off + retlen;
7510 			flp->nfsfl_iomode = iomode;
7511 			if (gotiomode == -1)
7512 				gotiomode = flp->nfsfl_iomode;
7513 			flp->nfsfl_stripeunit = fxdr_hyper(tl);
7514 			NFSCL_DEBUG(4, "stripeunit=%ju\n",
7515 			    (uintmax_t)flp->nfsfl_stripeunit);
7516 			for (j = 0; j < mirrorcnt; j++) {
7517 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7518 				k = fxdr_unsigned(int, *tl);
7519 				if (k < 1 || k > 128) {
7520 					error = NFSERR_BADXDR;
7521 					goto nfsmout;
7522 				}
7523 				NFSCL_DEBUG(4, "servercnt=%d\n", k);
7524 				for (l = 0; l < k; l++) {
7525 					NFSM_DISSECT(tl, uint32_t *,
7526 					    NFSX_V4DEVICEID + NFSX_STATEID +
7527 					    2 * NFSX_UNSIGNED);
7528 					if (l == 0) {
7529 						/* Just use the first server. */
7530 						NFSBCOPY(tl,
7531 						    flp->nfsfl_ffm[j].dev,
7532 						    NFSX_V4DEVICEID);
7533 						tl += (NFSX_V4DEVICEID /
7534 						    NFSX_UNSIGNED);
7535 						tl++;
7536 						flp->nfsfl_ffm[j].st.seqid =
7537 						    *tl++;
7538 						flp->nfsfl_ffm[j].st.other[0] =
7539 						    *tl++;
7540 						flp->nfsfl_ffm[j].st.other[1] =
7541 						    *tl++;
7542 						flp->nfsfl_ffm[j].st.other[2] =
7543 						    *tl++;
7544 						NFSCL_DEBUG(4, "st.seqid=%u "
7545 						 "st.o0=0x%x st.o1=0x%x "
7546 						 "st.o2=0x%x\n",
7547 						 flp->nfsfl_ffm[j].st.seqid,
7548 						 flp->nfsfl_ffm[j].st.other[0],
7549 						 flp->nfsfl_ffm[j].st.other[1],
7550 						 flp->nfsfl_ffm[j].st.other[2]);
7551 					} else
7552 						tl += ((NFSX_V4DEVICEID +
7553 						    NFSX_STATEID +
7554 						    NFSX_UNSIGNED) /
7555 						    NFSX_UNSIGNED);
7556 					fhcnt = fxdr_unsigned(int, *tl);
7557 					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7558 					if (fhcnt < 1 ||
7559 					    fhcnt > NFSDEV_MAXVERS) {
7560 						error = NFSERR_BADXDR;
7561 						goto nfsmout;
7562 					}
7563 					for (m = 0; m < fhcnt; m++) {
7564 						NFSM_DISSECT(tl, uint32_t *,
7565 						    NFSX_UNSIGNED);
7566 						nfhlen = fxdr_unsigned(int,
7567 						    *tl);
7568 						NFSCL_DEBUG(4, "nfhlen=%d\n",
7569 						    nfhlen);
7570 						if (nfhlen <= 0 || nfhlen >
7571 						    NFSX_V4FHMAX) {
7572 							error = NFSERR_BADXDR;
7573 							goto nfsmout;
7574 						}
7575 						NFSM_DISSECT(cp, uint8_t *,
7576 						    NFSM_RNDUP(nfhlen));
7577 						if (l == 0) {
7578 							flp->nfsfl_ffm[j].fhcnt
7579 							    = fhcnt;
7580 							nfhp = malloc(
7581 							    sizeof(*nfhp) +
7582 							    nfhlen - 1, M_NFSFH,
7583 							    M_WAITOK);
7584 							flp->nfsfl_ffm[j].fh[m]
7585 							    = nfhp;
7586 							nfhp->nfh_len = nfhlen;
7587 							NFSBCOPY(cp,
7588 							    nfhp->nfh_fh,
7589 							    nfhlen);
7590 							NFSCL_DEBUG(4,
7591 							    "got fh\n");
7592 						}
7593 					}
7594 					/* Now, get the ffsd_user/ffds_group. */
7595 					error = nfsrv_parseug(nd, 0, &user,
7596 					    &grp, curthread);
7597 					NFSCL_DEBUG(4, "after parseu=%d\n",
7598 					    error);
7599 					if (error == 0)
7600 						error = nfsrv_parseug(nd, 1,
7601 						    &user, &grp, curthread);
7602 					NFSCL_DEBUG(4, "aft parseg=%d\n",
7603 					    grp);
7604 					if (error != 0)
7605 						goto nfsmout;
7606 					NFSCL_DEBUG(4, "user=%d group=%d\n",
7607 					    user, grp);
7608 					if (l == 0) {
7609 						flp->nfsfl_ffm[j].user = user;
7610 						flp->nfsfl_ffm[j].group = grp;
7611 						NFSCL_DEBUG(4,
7612 						    "usr=%d grp=%d\n", user,
7613 						    grp);
7614 					}
7615 				}
7616 			}
7617 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7618 			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7619 #ifdef notnow
7620 			/*
7621 			 * At this time, there is no flag.
7622 			 * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7623 			 * added, or it may never exist?
7624 			 */
7625 			mtx_lock(&nmp->nm_mtx);
7626 			if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7627 			    NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7628 				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7629 			mtx_unlock(&nmp->nm_mtx);
7630 #endif
7631 			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7632 			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7633 			    flp->nfsfl_fflags, flp->nfsfl_statshint);
7634 		} else {
7635 			error = NFSERR_BADXDR;
7636 			goto nfsmout;
7637 		}
7638 		if (flp->nfsfl_iomode == gotiomode) {
7639 			/* Keep the list in increasing offset order. */
7640 			tflp = LIST_FIRST(flhp);
7641 			prevflp = NULL;
7642 			while (tflp != NULL &&
7643 			    tflp->nfsfl_off < flp->nfsfl_off) {
7644 				prevflp = tflp;
7645 				tflp = LIST_NEXT(tflp, nfsfl_list);
7646 			}
7647 			if (prevflp == NULL)
7648 				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7649 			else
7650 				LIST_INSERT_AFTER(prevflp, flp,
7651 				    nfsfl_list);
7652 			NFSCL_DEBUG(4, "flp inserted\n");
7653 		} else {
7654 			printf("nfscl_layoutget(): got wrong iomode\n");
7655 			nfscl_freeflayout(flp);
7656 		}
7657 		flp = NULL;
7658 	}
7659 nfsmout:
7660 	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7661 	if (error != 0 && flp != NULL)
7662 		nfscl_freeflayout(flp);
7663 	return (error);
7664 }
7665 
7666 /*
7667  * Parse a user/group digit string.
7668  */
7669 static int
7670 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7671     NFSPROC_T *p)
7672 {
7673 	uint32_t *tl;
7674 	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7675 	uint32_t len = 0;
7676 	int error = 0;
7677 
7678 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7679 	len = fxdr_unsigned(uint32_t, *tl);
7680 	str = NULL;
7681 	if (len > NFSV4_OPAQUELIMIT) {
7682 		error = NFSERR_BADXDR;
7683 		goto nfsmout;
7684 	}
7685 	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7686 	if (len == 0) {
7687 		if (dogrp != 0)
7688 			*gidp = GID_NOGROUP;
7689 		else
7690 			*uidp = UID_NOBODY;
7691 		return (0);
7692 	}
7693 	if (len > NFSV4_SMALLSTR)
7694 		str = malloc(len + 1, M_TEMP, M_WAITOK);
7695 	else
7696 		str = str0;
7697 	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7698 	NFSBCOPY(cp, str, len);
7699 	str[len] = '\0';
7700 	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7701 	if (dogrp != 0)
7702 		error = nfsv4_strtogid(nd, str, len, gidp);
7703 	else
7704 		error = nfsv4_strtouid(nd, str, len, uidp);
7705 nfsmout:
7706 	if (len > NFSV4_SMALLSTR)
7707 		free(str, M_TEMP);
7708 	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7709 	return (error);
7710 }
7711 
7712 /*
7713  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7714  * so that it does both an Open and a Layoutget.
7715  */
7716 static int
7717 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7718     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7719     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7720     struct ucred *cred, NFSPROC_T *p)
7721 {
7722 	struct nfscllayout *lyp;
7723 	struct nfsclflayout *flp;
7724 	struct nfsclflayouthead flh;
7725 	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7726 	int layouttype, laystat;
7727 	nfsv4stateid_t stateid;
7728 	struct nfsclsession *tsep;
7729 
7730 	error = 0;
7731 	if (NFSHASFLEXFILE(nmp))
7732 		layouttype = NFSLAYOUT_FLEXFILE;
7733 	else
7734 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
7735 	/*
7736 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7737 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7738 	 * flp == NULL.
7739 	 */
7740 	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7741 	    &recalled);
7742 	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7743 	if (lyp == NULL)
7744 		islocked = 0;
7745 	else if (flp != NULL)
7746 		islocked = 1;
7747 	else
7748 		islocked = 2;
7749 	if ((lyp == NULL || flp == NULL) && recalled == 0) {
7750 		LIST_INIT(&flh);
7751 		tsep = nfsmnt_mdssession(nmp);
7752 		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7753 		    3 * NFSX_UNSIGNED);
7754 		if (lyp == NULL)
7755 			usecurstateid = 1;
7756 		else {
7757 			usecurstateid = 0;
7758 			stateid.seqid = lyp->nfsly_stateid.seqid;
7759 			stateid.other[0] = lyp->nfsly_stateid.other[0];
7760 			stateid.other[1] = lyp->nfsly_stateid.other[1];
7761 			stateid.other[2] = lyp->nfsly_stateid.other[2];
7762 		}
7763 		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7764 		    newfhp, newfhlen, mode, op, name, namelen,
7765 		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
7766 		    &retonclose, &flh, &laystat, cred, p);
7767 		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7768 		    laystat, error);
7769 		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7770 		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7771 		    &islocked, cred, p);
7772 	} else
7773 		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7774 		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7775 	if (islocked == 2)
7776 		nfscl_rellayout(lyp, 1);
7777 	else if (islocked == 1)
7778 		nfscl_rellayout(lyp, 0);
7779 	return (error);
7780 }
7781 
7782 /*
7783  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7784  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7785  * handled by nfsrpc_openrpc().
7786  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7787  * can be NULL.
7788  */
7789 static int
7790 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7791     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7792     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7793     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7794     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7795     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7796 {
7797 	uint32_t *tl;
7798 	struct nfsrv_descript nfsd, *nd = &nfsd;
7799 	struct nfscldeleg *ndp = NULL;
7800 	struct nfsvattr nfsva;
7801 	struct nfsclsession *tsep;
7802 	uint32_t rflags, deleg;
7803 	nfsattrbit_t attrbits;
7804 	int error, ret, acesize, limitby, iomode;
7805 
7806 	*dpp = NULL;
7807 	*laystatp = ENXIO;
7808 	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7809 	    0, 0);
7810 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7811 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7812 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7813 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7814 	tsep = nfsmnt_mdssession(nmp);
7815 	*tl++ = tsep->nfsess_clientid.lval[0];
7816 	*tl = tsep->nfsess_clientid.lval[1];
7817 	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7818 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7819 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7820 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7821 	nfsm_strtom(nd, name, namelen);
7822 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7823 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7824 	NFSZERO_ATTRBIT(&attrbits);
7825 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7826 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7827 	nfsrv_putattrbit(nd, &attrbits);
7828 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7829 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7830 	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7831 		iomode = NFSLAYOUTIOMODE_RW;
7832 	else
7833 		iomode = NFSLAYOUTIOMODE_READ;
7834 	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7835 	    layouttype, layoutlen, usecurstateid);
7836 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7837 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7838 	if (error != 0)
7839 		return (error);
7840 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7841 	if (nd->nd_repstat != 0)
7842 		*laystatp = nd->nd_repstat;
7843 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7844 		/* ND_NOMOREDATA will be set if the Open operation failed. */
7845 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7846 		    6 * NFSX_UNSIGNED);
7847 		op->nfso_stateid.seqid = *tl++;
7848 		op->nfso_stateid.other[0] = *tl++;
7849 		op->nfso_stateid.other[1] = *tl++;
7850 		op->nfso_stateid.other[2] = *tl;
7851 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7852 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7853 		if (error != 0)
7854 			goto nfsmout;
7855 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7856 		deleg = fxdr_unsigned(u_int32_t, *tl);
7857 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
7858 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
7859 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7860 			      NFSCLFLAGS_FIRSTDELEG))
7861 				op->nfso_own->nfsow_clp->nfsc_flags |=
7862 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7863 			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7864 			    M_NFSCLDELEG, M_WAITOK);
7865 			LIST_INIT(&ndp->nfsdl_owner);
7866 			LIST_INIT(&ndp->nfsdl_lock);
7867 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7868 			ndp->nfsdl_fhlen = newfhlen;
7869 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7870 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
7871 			nfscl_lockinit(&ndp->nfsdl_rwlock);
7872 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7873 			    NFSX_UNSIGNED);
7874 			ndp->nfsdl_stateid.seqid = *tl++;
7875 			ndp->nfsdl_stateid.other[0] = *tl++;
7876 			ndp->nfsdl_stateid.other[1] = *tl++;
7877 			ndp->nfsdl_stateid.other[2] = *tl++;
7878 			ret = fxdr_unsigned(int, *tl);
7879 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7880 				ndp->nfsdl_flags = NFSCLDL_WRITE;
7881 				/*
7882 				 * Indicates how much the file can grow.
7883 				 */
7884 				NFSM_DISSECT(tl, u_int32_t *,
7885 				    3 * NFSX_UNSIGNED);
7886 				limitby = fxdr_unsigned(int, *tl++);
7887 				switch (limitby) {
7888 				case NFSV4OPEN_LIMITSIZE:
7889 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7890 					break;
7891 				case NFSV4OPEN_LIMITBLOCKS:
7892 					ndp->nfsdl_sizelimit =
7893 					    fxdr_unsigned(u_int64_t, *tl++);
7894 					ndp->nfsdl_sizelimit *=
7895 					    fxdr_unsigned(u_int64_t, *tl);
7896 					break;
7897 				default:
7898 					error = NFSERR_BADXDR;
7899 					goto nfsmout;
7900 				};
7901 			} else
7902 				ndp->nfsdl_flags = NFSCLDL_READ;
7903 			if (ret != 0)
7904 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
7905 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7906 			    &acesize, p);
7907 			if (error != 0)
7908 				goto nfsmout;
7909 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
7910 			error = NFSERR_BADXDR;
7911 			goto nfsmout;
7912 		}
7913 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7914 		    nfscl_assumeposixlocks)
7915 			op->nfso_posixlock = 1;
7916 		else
7917 			op->nfso_posixlock = 0;
7918 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7919 		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
7920 		if (*++tl == 0) {
7921 			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7922 			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7923 			    NULL, NULL, NULL, p, cred);
7924 			if (error != 0)
7925 				goto nfsmout;
7926 			if (ndp != NULL) {
7927 				ndp->nfsdl_change = nfsva.na_filerev;
7928 				ndp->nfsdl_modtime = nfsva.na_mtime;
7929 				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7930 				*dpp = ndp;
7931 				ndp = NULL;
7932 			}
7933 			/*
7934 			 * At this point, the Open has succeeded, so set
7935 			 * nd_repstat = NFS_OK.  If the Layoutget failed,
7936 			 * this function just won't return a layout.
7937 			 */
7938 			if (nd->nd_repstat == 0) {
7939 				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7940 				*laystatp = fxdr_unsigned(int, *++tl);
7941 				if (*laystatp == 0) {
7942 					error = nfsrv_parselayoutget(nmp, nd,
7943 					    stateidp, retonclosep, flhp);
7944 					if (error != 0)
7945 						*laystatp = error;
7946 				}
7947 			} else
7948 				nd->nd_repstat = 0;	/* Return 0 for Open. */
7949 		}
7950 	}
7951 	if (nd->nd_repstat != 0 && error == 0)
7952 		error = nd->nd_repstat;
7953 nfsmout:
7954 	free(ndp, M_NFSCLDELEG);
7955 	m_freem(nd->nd_mrep);
7956 	return (error);
7957 }
7958 
7959 /*
7960  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7961  * Used only for mounts with pNFS enabled.
7962  */
7963 static int
7964 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7965     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7966     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7967     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7968     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7969     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7970     struct nfsclflayouthead *flhp, int *laystatp)
7971 {
7972 	uint32_t *tl;
7973 	int error = 0, deleg, newone, ret, acesize, limitby;
7974 	struct nfsrv_descript nfsd, *nd = &nfsd;
7975 	struct nfsclopen *op;
7976 	struct nfscldeleg *dp = NULL;
7977 	struct nfsnode *np;
7978 	struct nfsfh *nfhp;
7979 	struct nfsclsession *tsep;
7980 	nfsattrbit_t attrbits;
7981 	nfsv4stateid_t stateid;
7982 	struct nfsmount *nmp;
7983 
7984 	nmp = VFSTONFS(dvp->v_mount);
7985 	np = VTONFS(dvp);
7986 	*laystatp = ENXIO;
7987 	*unlockedp = 0;
7988 	*nfhpp = NULL;
7989 	*dpp = NULL;
7990 	*attrflagp = 0;
7991 	*dattrflagp = 0;
7992 	if (namelen > NFS_MAXNAMLEN)
7993 		return (ENAMETOOLONG);
7994 	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7995 	/*
7996 	 * For V4, this is actually an Open op.
7997 	 */
7998 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7999 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
8000 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8001 	    NFSV4OPEN_ACCESSREAD);
8002 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8003 	tsep = nfsmnt_mdssession(nmp);
8004 	*tl++ = tsep->nfsess_clientid.lval[0];
8005 	*tl = tsep->nfsess_clientid.lval[1];
8006 	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8007 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8008 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8009 	if ((fmode & O_EXCL) != 0) {
8010 		if (NFSHASSESSPERSIST(nmp)) {
8011 			/* Use GUARDED for persistent sessions. */
8012 			*tl = txdr_unsigned(NFSCREATE_GUARDED);
8013 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8014 		} else {
8015 			/* Otherwise, use EXCLUSIVE4_1. */
8016 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8017 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8018 			*tl++ = cverf.lval[0];
8019 			*tl = cverf.lval[1];
8020 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8021 		}
8022 	} else {
8023 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8024 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
8025 	}
8026 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8027 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8028 	nfsm_strtom(nd, name, namelen);
8029 	/* Get the new file's handle and attributes, plus save the FH. */
8030 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8031 	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8032 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
8033 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8034 	NFSGETATTR_ATTRBIT(&attrbits);
8035 	nfsrv_putattrbit(nd, &attrbits);
8036 	/* Get the directory's post-op attributes. */
8037 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8038 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8039 	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8040 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8041 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8042 	nfsrv_putattrbit(nd, &attrbits);
8043 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8044 	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8045 	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8046 	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8047 	    layouttype, layoutlen, usecurstateid);
8048 	error = nfscl_request(nd, dvp, p, cred, dstuff);
8049 	if (error != 0)
8050 		return (error);
8051 	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8052 	    error);
8053 	if (nd->nd_repstat != 0)
8054 		*laystatp = nd->nd_repstat;
8055 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8056 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8057 		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8058 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8059 		    6 * NFSX_UNSIGNED);
8060 		stateid.seqid = *tl++;
8061 		stateid.other[0] = *tl++;
8062 		stateid.other[1] = *tl++;
8063 		stateid.other[2] = *tl;
8064 		nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8065 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8066 		deleg = fxdr_unsigned(int, *tl);
8067 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8068 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8069 			if (!(owp->nfsow_clp->nfsc_flags &
8070 			      NFSCLFLAGS_FIRSTDELEG))
8071 				owp->nfsow_clp->nfsc_flags |=
8072 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8073 			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8074 			    M_NFSCLDELEG, M_WAITOK);
8075 			LIST_INIT(&dp->nfsdl_owner);
8076 			LIST_INIT(&dp->nfsdl_lock);
8077 			dp->nfsdl_clp = owp->nfsow_clp;
8078 			newnfs_copyincred(cred, &dp->nfsdl_cred);
8079 			nfscl_lockinit(&dp->nfsdl_rwlock);
8080 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8081 			    NFSX_UNSIGNED);
8082 			dp->nfsdl_stateid.seqid = *tl++;
8083 			dp->nfsdl_stateid.other[0] = *tl++;
8084 			dp->nfsdl_stateid.other[1] = *tl++;
8085 			dp->nfsdl_stateid.other[2] = *tl++;
8086 			ret = fxdr_unsigned(int, *tl);
8087 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8088 				dp->nfsdl_flags = NFSCLDL_WRITE;
8089 				/*
8090 				 * Indicates how much the file can grow.
8091 				 */
8092 				NFSM_DISSECT(tl, u_int32_t *,
8093 				    3 * NFSX_UNSIGNED);
8094 				limitby = fxdr_unsigned(int, *tl++);
8095 				switch (limitby) {
8096 				case NFSV4OPEN_LIMITSIZE:
8097 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
8098 					break;
8099 				case NFSV4OPEN_LIMITBLOCKS:
8100 					dp->nfsdl_sizelimit =
8101 					    fxdr_unsigned(u_int64_t, *tl++);
8102 					dp->nfsdl_sizelimit *=
8103 					    fxdr_unsigned(u_int64_t, *tl);
8104 					break;
8105 				default:
8106 					error = NFSERR_BADXDR;
8107 					goto nfsmout;
8108 				};
8109 			} else {
8110 				dp->nfsdl_flags = NFSCLDL_READ;
8111 			}
8112 			if (ret != 0)
8113 				dp->nfsdl_flags |= NFSCLDL_RECALL;
8114 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
8115 			    &acesize, p);
8116 			if (error != 0)
8117 				goto nfsmout;
8118 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8119 			error = NFSERR_BADXDR;
8120 			goto nfsmout;
8121 		}
8122 
8123 		/* Now, we should have the status for the SaveFH. */
8124 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8125 		if (*++tl == 0) {
8126 			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8127 			/*
8128 			 * Now, process the GetFH and Getattr for the newly
8129 			 * created file. nfscl_mtofh() will set
8130 			 * ND_NOMOREDATA if these weren't successful.
8131 			 */
8132 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8133 			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8134 			if (error != 0)
8135 				goto nfsmout;
8136 		} else
8137 			nd->nd_flag |= ND_NOMOREDATA;
8138 		/* Now we have the PutFH and Getattr for the directory. */
8139 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8140 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8141 			if (*++tl != 0)
8142 				nd->nd_flag |= ND_NOMOREDATA;
8143 			else {
8144 				NFSM_DISSECT(tl, uint32_t *, 2 *
8145 				    NFSX_UNSIGNED);
8146 				if (*++tl != 0)
8147 					nd->nd_flag |= ND_NOMOREDATA;
8148 			}
8149 		}
8150 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8151 			/* Load the directory attributes. */
8152 			error = nfsm_loadattr(nd, dnap);
8153 			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8154 			if (error != 0)
8155 				goto nfsmout;
8156 			*dattrflagp = 1;
8157 			if (dp != NULL && *attrflagp != 0) {
8158 				dp->nfsdl_change = nnap->na_filerev;
8159 				dp->nfsdl_modtime = nnap->na_mtime;
8160 				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8161 			}
8162 			/*
8163 			 * We can now complete the Open state.
8164 			 */
8165 			nfhp = *nfhpp;
8166 			if (dp != NULL) {
8167 				dp->nfsdl_fhlen = nfhp->nfh_len;
8168 				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8169 				    nfhp->nfh_len);
8170 			}
8171 			/*
8172 			 * Get an Open structure that will be
8173 			 * attached to the OpenOwner, acquired already.
8174 			 */
8175 			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
8176 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8177 			    cred, p, NULL, &op, &newone, NULL, 0);
8178 			if (error != 0)
8179 				goto nfsmout;
8180 			op->nfso_stateid = stateid;
8181 			newnfs_copyincred(cred, &op->nfso_cred);
8182 
8183 			nfscl_openrelease(nmp, op, error, newone);
8184 			*unlockedp = 1;
8185 
8186 			/* Now, handle the RestoreFH and LayoutGet. */
8187 			if (nd->nd_repstat == 0) {
8188 				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8189 				*laystatp = fxdr_unsigned(int, *(tl + 3));
8190 				if (*laystatp == 0) {
8191 					error = nfsrv_parselayoutget(nmp, nd,
8192 					    stateidp, retonclosep, flhp);
8193 					if (error != 0)
8194 						*laystatp = error;
8195 				}
8196 				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8197 				    error);
8198 			} else
8199 				nd->nd_repstat = 0;
8200 		}
8201 	}
8202 	if (nd->nd_repstat != 0 && error == 0)
8203 		error = nd->nd_repstat;
8204 	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
8205 		nfscl_initiate_recovery(owp->nfsow_clp);
8206 nfsmout:
8207 	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8208 	if (error == 0)
8209 		*dpp = dp;
8210 	else
8211 		free(dp, M_NFSCLDELEG);
8212 	m_freem(nd->nd_mrep);
8213 	return (error);
8214 }
8215 
8216 /*
8217  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8218  */
8219 static int
8220 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8221     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8222     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8223     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8224     int *dattrflagp, void *dstuff, int *unlockedp)
8225 {
8226 	struct nfscllayout *lyp;
8227 	struct nfsclflayouthead flh;
8228 	struct nfsfh *nfhp;
8229 	struct nfsclsession *tsep;
8230 	struct nfsmount *nmp;
8231 	nfsv4stateid_t stateid;
8232 	int error, layoutlen, layouttype, retonclose, laystat;
8233 
8234 	error = 0;
8235 	nmp = VFSTONFS(dvp->v_mount);
8236 	if (NFSHASFLEXFILE(nmp))
8237 		layouttype = NFSLAYOUT_FLEXFILE;
8238 	else
8239 		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8240 	LIST_INIT(&flh);
8241 	tsep = nfsmnt_mdssession(nmp);
8242 	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8243 	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8244 	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8245 	    dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8246 	    &flh, &laystat);
8247 	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8248 	    laystat, error);
8249 	lyp = NULL;
8250 	if (laystat == 0) {
8251 		nfhp = *nfhpp;
8252 		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8253 		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8254 		    layouttype, laystat, NULL, cred, p);
8255 	} else
8256 		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8257 		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8258 		    cred, p);
8259 	if (laystat == 0)
8260 		nfscl_rellayout(lyp, 0);
8261 	return (error);
8262 }
8263 
8264 /*
8265  * Process the results of a layoutget() operation.
8266  */
8267 static int
8268 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8269     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8270     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8271     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8272 {
8273 	struct nfsclflayout *tflp;
8274 	struct nfscldevinfo *dip;
8275 	uint8_t *dev;
8276 	int i, mirrorcnt;
8277 
8278 	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8279 		NFSLOCKMNT(nmp);
8280 		if (!NFSHASFLEXFILE(nmp)) {
8281 			/* Switch to using Flex File Layout. */
8282 			nmp->nm_state |= NFSSTA_FLEXFILE;
8283 		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
8284 			/* Disable pNFS. */
8285 			NFSCL_DEBUG(1, "disable PNFS\n");
8286 			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8287 		}
8288 		NFSUNLOCKMNT(nmp);
8289 	}
8290 	if (laystat == 0) {
8291 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8292 		LIST_FOREACH(tflp, flhp, nfsfl_list) {
8293 			if (layouttype == NFSLAYOUT_FLEXFILE)
8294 				mirrorcnt = tflp->nfsfl_mirrorcnt;
8295 			else
8296 				mirrorcnt = 1;
8297 			for (i = 0; i < mirrorcnt; i++) {
8298 				laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8299 				NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8300 				if (laystat != 0) {
8301 					if (layouttype == NFSLAYOUT_FLEXFILE)
8302 						dev = tflp->nfsfl_ffm[i].dev;
8303 					else
8304 						dev = tflp->nfsfl_dev;
8305 					laystat = nfsrpc_getdeviceinfo(nmp, dev,
8306 					    layouttype, notifybit, &dip, cred,
8307 					    p);
8308 					NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8309 					    laystat);
8310 					if (laystat != 0)
8311 						goto out;
8312 					laystat = nfscl_adddevinfo(nmp, dip, i,
8313 					    tflp);
8314 					if (laystat != 0)
8315 						printf("nfsrpc_layoutgetresout"
8316 						    ": cannot add\n");
8317 				}
8318 			}
8319 		}
8320 	}
8321 out:
8322 	if (laystat == 0) {
8323 		/*
8324 		 * nfscl_layout() always returns with the nfsly_lock
8325 		 * set to a refcnt (shared lock).
8326 		 * Passing in dvp is sufficient, since it is only used to
8327 		 * get the fsid for the file system.
8328 		 */
8329 		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8330 		    layouttype, retonclose, flhp, lypp, cred, p);
8331 		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8332 		    laystat);
8333 		if (laystat == 0 && islockedp != NULL)
8334 			*islockedp = 1;
8335 	}
8336 	return (laystat);
8337 }
8338 
8339 /*
8340  * nfs copy_file_range operation.
8341  */
8342 int
8343 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8344     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8345     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8346     struct ucred *cred, bool consecutive, bool *must_commitp)
8347 {
8348 	int commit, error, expireret = 0, retrycnt;
8349 	u_int32_t clidrev = 0;
8350 	struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8351 	struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8352 	nfsv4stateid_t instateid, outstateid;
8353 	void *inlckp, *outlckp;
8354 
8355 	if (nmp->nm_clp != NULL)
8356 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8357 	innfhp = VTONFS(invp)->n_fhp;
8358 	outnfhp = VTONFS(outvp)->n_fhp;
8359 	retrycnt = 0;
8360 	do {
8361 		/* Get both stateids. */
8362 		inlckp = NULL;
8363 		nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8364 		    NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8365 		    &inlckp);
8366 		outlckp = NULL;
8367 		nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8368 		    NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8369 		    &outlckp);
8370 
8371 		error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8372 		    &instateid, &outstateid, innap, inattrflagp, outnap,
8373 		    outattrflagp, consecutive, &commit, cred, curthread);
8374 		if (error == 0) {
8375 			if (commit != NFSWRITE_FILESYNC)
8376 				*must_commitp = true;
8377 			*inoffp += *lenp;
8378 			*outoffp += *lenp;
8379 		} else if (error == NFSERR_STALESTATEID)
8380 			nfscl_initiate_recovery(nmp->nm_clp);
8381 		if (inlckp != NULL)
8382 			nfscl_lockderef(inlckp);
8383 		if (outlckp != NULL)
8384 			nfscl_lockderef(outlckp);
8385 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8386 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8387 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8388 			(void) nfs_catnap(PZERO, error, "nfs_cfr");
8389 		} else if ((error == NFSERR_EXPIRED ||
8390 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
8391 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8392 			    curthread);
8393 		}
8394 		retrycnt++;
8395 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8396 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8397 	      error == NFSERR_STALEDONTRECOVER ||
8398 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8399 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8400 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
8401 	if (error != 0 && (retrycnt >= 4 ||
8402 	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8403 	      error == NFSERR_STALEDONTRECOVER))
8404 		error = EIO;
8405 	return (error);
8406 }
8407 
8408 /*
8409  * The copy RPC.
8410  */
8411 static int
8412 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8413     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8414     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8415     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8416     NFSPROC_T *p)
8417 {
8418 	uint32_t *tl;
8419 	int error;
8420 	struct nfsrv_descript nfsd;
8421 	struct nfsrv_descript *nd = &nfsd;
8422 	struct nfsmount *nmp;
8423 	nfsattrbit_t attrbits;
8424 	uint64_t len;
8425 
8426 	nmp = VFSTONFS(outvp->v_mount);
8427 	*inattrflagp = *outattrflagp = 0;
8428 	*commitp = NFSWRITE_UNSTABLE;
8429 	len = *lenp;
8430 	*lenp = 0;
8431 	if (len > nfs_maxcopyrange)
8432 		len = nfs_maxcopyrange;
8433 	NFSCL_REQSTART(nd, NFSPROC_COPY, invp);
8434 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8435 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8436 	NFSGETATTR_ATTRBIT(&attrbits);
8437 	nfsrv_putattrbit(nd, &attrbits);
8438 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8439 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8440 	nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh,
8441 	    VTONFS(outvp)->n_fhp->nfh_len, 0);
8442 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8443 	*tl = txdr_unsigned(NFSV4OP_COPY);
8444 	nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8445 	nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8446 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8447 	txdr_hyper(inoff, tl); tl += 2;
8448 	txdr_hyper(outoff, tl); tl += 2;
8449 	txdr_hyper(len, tl); tl += 2;
8450 	if (consecutive)
8451 		*tl++ = newnfs_true;
8452 	else
8453 		*tl++ = newnfs_false;
8454 	*tl++ = newnfs_true;
8455 	*tl++ = 0;
8456 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8457 	NFSWRITEGETATTR_ATTRBIT(&attrbits);
8458 	nfsrv_putattrbit(nd, &attrbits);
8459 	error = nfscl_request(nd, invp, p, cred, NULL);
8460 	if (error != 0)
8461 		return (error);
8462 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8463 		/* Get the input file's attributes. */
8464 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8465 		if (*(tl + 1) == 0) {
8466 			error = nfsm_loadattr(nd, innap);
8467 			if (error != 0)
8468 				goto nfsmout;
8469 			*inattrflagp = 1;
8470 		} else
8471 			nd->nd_flag |= ND_NOMOREDATA;
8472 	}
8473 	/* Skip over return stat for PutFH. */
8474 	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8475 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8476 		if (*++tl != 0)
8477 			nd->nd_flag |= ND_NOMOREDATA;
8478 	}
8479 	/* Skip over return stat for Copy. */
8480 	if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8481 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8482 	if (nd->nd_repstat == 0) {
8483 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8484 		if (*tl != 0) {
8485 			/* There should be no callback ids. */
8486 			error = NFSERR_BADXDR;
8487 			goto nfsmout;
8488 		}
8489 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8490 		    NFSX_VERF);
8491 		len = fxdr_hyper(tl); tl += 2;
8492 		*commitp = fxdr_unsigned(int, *tl++);
8493 		NFSLOCKMNT(nmp);
8494 		if (!NFSHASWRITEVERF(nmp)) {
8495 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8496 			NFSSETWRITEVERF(nmp);
8497 	    	} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8498 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8499 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
8500 		}
8501 		NFSUNLOCKMNT(nmp);
8502 		tl += (NFSX_VERF / NFSX_UNSIGNED);
8503 		if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8504 			/* Must be a synchronous copy. */
8505 			nd->nd_repstat = NFSERR_NOTSUPP;
8506 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8507 		error = nfsm_loadattr(nd, outnap);
8508 		if (error == 0)
8509 			*outattrflagp = NFS_LATTR_NOSHRINK;
8510 		if (nd->nd_repstat == 0)
8511 			*lenp = len;
8512 	} else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8513 		/*
8514 		 * For the case where consecutive is not supported, but
8515 		 * synchronous is supported, we can try consecutive == false
8516 		 * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8517 		 * since Copy cannot be done.
8518 		 */
8519 		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8520 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8521 			if (!consecutive || *++tl == newnfs_false)
8522 				nd->nd_repstat = NFSERR_NOTSUPP;
8523 		} else
8524 			nd->nd_repstat = NFSERR_BADXDR;
8525 	}
8526 	if (error == 0)
8527 		error = nd->nd_repstat;
8528 nfsmout:
8529 	m_freem(nd->nd_mrep);
8530 	return (error);
8531 }
8532 
8533 /*
8534  * Seek operation.
8535  */
8536 int
8537 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8538     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8539 {
8540 	int error, expireret = 0, retrycnt;
8541 	u_int32_t clidrev = 0;
8542 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8543 	struct nfsnode *np = VTONFS(vp);
8544 	struct nfsfh *nfhp = NULL;
8545 	nfsv4stateid_t stateid;
8546 	void *lckp;
8547 
8548 	if (nmp->nm_clp != NULL)
8549 		clidrev = nmp->nm_clp->nfsc_clientidrev;
8550 	nfhp = np->n_fhp;
8551 	retrycnt = 0;
8552 	do {
8553 		lckp = NULL;
8554 		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8555 		    NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8556 		error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8557 		    nap, attrflagp, cred);
8558 		if (error == NFSERR_STALESTATEID)
8559 			nfscl_initiate_recovery(nmp->nm_clp);
8560 		if (lckp != NULL)
8561 			nfscl_lockderef(lckp);
8562 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8563 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8564 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8565 			(void) nfs_catnap(PZERO, error, "nfs_seek");
8566 		} else if ((error == NFSERR_EXPIRED ||
8567 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
8568 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8569 			    curthread);
8570 		}
8571 		retrycnt++;
8572 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8573 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8574 	    error == NFSERR_BADSESSION ||
8575 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8576 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8577 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8578 	    (error == NFSERR_OPENMODE && retrycnt < 4));
8579 	if (error && retrycnt >= 4)
8580 		error = EIO;
8581 	return (error);
8582 }
8583 
8584 /*
8585  * The seek RPC.
8586  */
8587 static int
8588 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8589     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8590 {
8591 	uint32_t *tl;
8592 	int error;
8593 	struct nfsrv_descript nfsd;
8594 	struct nfsrv_descript *nd = &nfsd;
8595 	nfsattrbit_t attrbits;
8596 
8597 	*attrflagp = 0;
8598 	NFSCL_REQSTART(nd, NFSPROC_SEEK, vp);
8599 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8600 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8601 	txdr_hyper(*offp, tl); tl += 2;
8602 	*tl++ = txdr_unsigned(content);
8603 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8604 	NFSGETATTR_ATTRBIT(&attrbits);
8605 	nfsrv_putattrbit(nd, &attrbits);
8606 	error = nfscl_request(nd, vp, curthread, cred, NULL);
8607 	if (error != 0)
8608 		return (error);
8609 	if (nd->nd_repstat == 0) {
8610 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8611 		if (*tl++ == newnfs_true)
8612 			*eofp = true;
8613 		else
8614 			*eofp = false;
8615 		*offp = fxdr_hyper(tl);
8616 		/* Just skip over Getattr op status. */
8617 		error = nfsm_loadattr(nd, nap);
8618 		if (error == 0)
8619 			*attrflagp = 1;
8620 	}
8621 	error = nd->nd_repstat;
8622 nfsmout:
8623 	m_freem(nd->nd_mrep);
8624 	return (error);
8625 }
8626 
8627 /*
8628  * The getextattr RPC.
8629  */
8630 int
8631 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8632     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8633 {
8634 	uint32_t *tl;
8635 	int error;
8636 	struct nfsrv_descript nfsd;
8637 	struct nfsrv_descript *nd = &nfsd;
8638 	nfsattrbit_t attrbits;
8639 	uint32_t len, len2;
8640 
8641 	*attrflagp = 0;
8642 	NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp);
8643 	nfsm_strtom(nd, name, strlen(name));
8644 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8645 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8646 	NFSGETATTR_ATTRBIT(&attrbits);
8647 	nfsrv_putattrbit(nd, &attrbits);
8648 	error = nfscl_request(nd, vp, p, cred, NULL);
8649 	if (error != 0)
8650 		return (error);
8651 	if (nd->nd_repstat == 0) {
8652 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8653 		len = fxdr_unsigned(uint32_t, *tl);
8654 		/* Sanity check lengths. */
8655 		if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8656 		    uiop->uio_resid <= UINT32_MAX) {
8657 			len2 = uiop->uio_resid;
8658 			if (len2 >= len)
8659 				error = nfsm_mbufuio(nd, uiop, len);
8660 			else {
8661 				error = nfsm_mbufuio(nd, uiop, len2);
8662 				if (error == 0) {
8663 					/*
8664 					 * nfsm_mbufuio() advances to a multiple
8665 					 * of 4, so round up len2 as well.  Then
8666 					 * we need to advance over the rest of
8667 					 * the data, rounding up the remaining
8668 					 * length.
8669 					 */
8670 					len2 = NFSM_RNDUP(len2);
8671 					len2 = NFSM_RNDUP(len - len2);
8672 					if (len2 > 0)
8673 						error = nfsm_advance(nd, len2,
8674 						    -1);
8675 				}
8676 			}
8677 		} else if (uiop == NULL && len > 0) {
8678 			/* Just wants the length and not the data. */
8679 			error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8680 		} else if (len > 0)
8681 			error = ENOATTR;
8682 		if (error != 0)
8683 			goto nfsmout;
8684 		*lenp = len;
8685 		/* Just skip over Getattr op status. */
8686 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8687 		error = nfsm_loadattr(nd, nap);
8688 		if (error == 0)
8689 			*attrflagp = 1;
8690 	}
8691 	if (error == 0)
8692 		error = nd->nd_repstat;
8693 nfsmout:
8694 	m_freem(nd->nd_mrep);
8695 	return (error);
8696 }
8697 
8698 /*
8699  * The setextattr RPC.
8700  */
8701 int
8702 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8703     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8704 {
8705 	uint32_t *tl;
8706 	int error;
8707 	struct nfsrv_descript nfsd;
8708 	struct nfsrv_descript *nd = &nfsd;
8709 	nfsattrbit_t attrbits;
8710 
8711 	*attrflagp = 0;
8712 	NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp);
8713 	if (uiop->uio_resid > nd->nd_maxreq) {
8714 		/* nd_maxreq is set by NFSCL_REQSTART(). */
8715 		m_freem(nd->nd_mreq);
8716 		return (EINVAL);
8717 	}
8718 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8719 	*tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8720 	nfsm_strtom(nd, name, strlen(name));
8721 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8722 	*tl = txdr_unsigned(uiop->uio_resid);
8723 	nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8724 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8725 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8726 	NFSGETATTR_ATTRBIT(&attrbits);
8727 	nfsrv_putattrbit(nd, &attrbits);
8728 	error = nfscl_request(nd, vp, p, cred, NULL);
8729 	if (error != 0)
8730 		return (error);
8731 	if (nd->nd_repstat == 0) {
8732 		/* Just skip over the reply and Getattr op status. */
8733 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8734 		    NFSX_UNSIGNED);
8735 		error = nfsm_loadattr(nd, nap);
8736 		if (error == 0)
8737 			*attrflagp = 1;
8738 	}
8739 	if (error == 0)
8740 		error = nd->nd_repstat;
8741 nfsmout:
8742 	m_freem(nd->nd_mrep);
8743 	return (error);
8744 }
8745 
8746 /*
8747  * The removeextattr RPC.
8748  */
8749 int
8750 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8751     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8752 {
8753 	uint32_t *tl;
8754 	int error;
8755 	struct nfsrv_descript nfsd;
8756 	struct nfsrv_descript *nd = &nfsd;
8757 	nfsattrbit_t attrbits;
8758 
8759 	*attrflagp = 0;
8760 	NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp);
8761 	nfsm_strtom(nd, name, strlen(name));
8762 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8763 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8764 	NFSGETATTR_ATTRBIT(&attrbits);
8765 	nfsrv_putattrbit(nd, &attrbits);
8766 	error = nfscl_request(nd, vp, p, cred, NULL);
8767 	if (error != 0)
8768 		return (error);
8769 	if (nd->nd_repstat == 0) {
8770 		/* Just skip over the reply and Getattr op status. */
8771 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8772 		    NFSX_UNSIGNED);
8773 		error = nfsm_loadattr(nd, nap);
8774 		if (error == 0)
8775 			*attrflagp = 1;
8776 	}
8777 	if (error == 0)
8778 		error = nd->nd_repstat;
8779 nfsmout:
8780 	m_freem(nd->nd_mrep);
8781 	return (error);
8782 }
8783 
8784 /*
8785  * The listextattr RPC.
8786  */
8787 int
8788 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8789     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8790     struct ucred *cred, NFSPROC_T *p)
8791 {
8792 	uint32_t *tl;
8793 	int cnt, error, i, len;
8794 	struct nfsrv_descript nfsd;
8795 	struct nfsrv_descript *nd = &nfsd;
8796 	nfsattrbit_t attrbits;
8797 	u_char c;
8798 
8799 	*attrflagp = 0;
8800 	NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp);
8801 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8802 	txdr_hyper(*cookiep, tl); tl += 2;
8803 	*tl++ = txdr_unsigned(*lenp);
8804 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8805 	NFSGETATTR_ATTRBIT(&attrbits);
8806 	nfsrv_putattrbit(nd, &attrbits);
8807 	error = nfscl_request(nd, vp, p, cred, NULL);
8808 	if (error != 0)
8809 		return (error);
8810 	*eofp = true;
8811 	*lenp = 0;
8812 	if (nd->nd_repstat == 0) {
8813 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8814 		*cookiep = fxdr_hyper(tl); tl += 2;
8815 		cnt = fxdr_unsigned(int, *tl);
8816 		if (cnt < 0) {
8817 			error = EBADRPC;
8818 			goto nfsmout;
8819 		}
8820 		for (i = 0; i < cnt; i++) {
8821 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8822 			len = fxdr_unsigned(int, *tl);
8823 			if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8824 				error = EBADRPC;
8825 				goto nfsmout;
8826 			}
8827 			if (uiop == NULL)
8828 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8829 			else if (uiop->uio_resid >= len + 1) {
8830 				c = len;
8831 				error = uiomove(&c, sizeof(c), uiop);
8832 				if (error == 0)
8833 					error = nfsm_mbufuio(nd, uiop, len);
8834 			} else {
8835 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8836 				*eofp = false;
8837 			}
8838 			if (error != 0)
8839 				goto nfsmout;
8840 			*lenp += (len + 1);
8841 		}
8842 		/* Get the eof and skip over the Getattr op status. */
8843 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8844 		/*
8845 		 * *eofp is set false above, because it wasn't able to copy
8846 		 * all of the reply.
8847 		 */
8848 		if (*eofp && *tl == 0)
8849 			*eofp = false;
8850 		error = nfsm_loadattr(nd, nap);
8851 		if (error == 0)
8852 			*attrflagp = 1;
8853 	}
8854 	if (error == 0)
8855 		error = nd->nd_repstat;
8856 nfsmout:
8857 	m_freem(nd->nd_mrep);
8858 	return (error);
8859 }
8860 
8861 /*
8862  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8863  */
8864 static struct mbuf *
8865 nfsm_split(struct mbuf *mp, uint64_t xfer)
8866 {
8867 	struct mbuf *m, *m2;
8868 	vm_page_t pg;
8869 	int i, j, left, pgno, plen, trim;
8870 	char *cp, *cp2;
8871 
8872 	if ((mp->m_flags & M_EXTPG) == 0) {
8873 		m = m_split(mp, xfer, M_WAITOK);
8874 		return (m);
8875 	}
8876 
8877 	/* Find the correct mbuf to split at. */
8878 	for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
8879 		xfer -= m->m_len;
8880 	if (m == NULL)
8881 		return (NULL);
8882 
8883 	/* If xfer == m->m_len, we can just split the mbuf list. */
8884 	if (xfer == m->m_len) {
8885 		m2 = m->m_next;
8886 		m->m_next = NULL;
8887 		return (m2);
8888 	}
8889 
8890 	/* Find the page to split at. */
8891 	pgno = 0;
8892 	left = xfer;
8893 	do {
8894 		if (pgno == 0)
8895 			plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
8896 		else
8897 			plen = m_epg_pagelen(m, pgno, 0);
8898 		if (left <= plen)
8899 			break;
8900 		left -= plen;
8901 		pgno++;
8902 	} while (pgno < m->m_epg_npgs);
8903 	if (pgno == m->m_epg_npgs)
8904 		panic("nfsm_split: eroneous ext_pgs mbuf");
8905 
8906 	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
8907 	m2->m_epg_flags |= EPG_FLAG_ANON;
8908 
8909 	/*
8910 	 * If left < plen, allocate a new page for the new mbuf
8911 	 * and copy the data after left in the page to this new
8912 	 * page.
8913 	 */
8914 	if (left < plen) {
8915 		do {
8916 			pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
8917 			    VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
8918 			    VM_ALLOC_WIRED);
8919 			if (pg == NULL)
8920 				vm_wait(NULL);
8921 		} while (pg == NULL);
8922 		m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
8923 		m2->m_epg_npgs = 1;
8924 
8925 		/* Copy the data after left to the new page. */
8926 		trim = plen - left;
8927 		cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
8928 		if (pgno == 0)
8929 			cp += m->m_epg_1st_off;
8930 		cp += left;
8931 		cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
8932 		if (pgno == m->m_epg_npgs - 1)
8933 			m2->m_epg_last_len = trim;
8934 		else {
8935 			cp2 += PAGE_SIZE - trim;
8936 			m2->m_epg_1st_off = PAGE_SIZE - trim;
8937 			m2->m_epg_last_len = m->m_epg_last_len;
8938 		}
8939 		memcpy(cp2, cp, trim);
8940 		m2->m_len = trim;
8941 	} else {
8942 		m2->m_len = 0;
8943 		m2->m_epg_last_len = m->m_epg_last_len;
8944 	}
8945 
8946 	/* Move the pages beyond pgno to the new mbuf. */
8947 	for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
8948 		m2->m_epg_pa[j] = m->m_epg_pa[i];
8949 		/* Never moves page 0. */
8950 		m2->m_len += m_epg_pagelen(m, i, 0);
8951 	}
8952 	m2->m_epg_npgs = j;
8953 	m->m_epg_npgs = pgno + 1;
8954 	m->m_epg_last_len = left;
8955 	m->m_len = xfer;
8956 
8957 	m2->m_next = m->m_next;
8958 	m->m_next = NULL;
8959 	return (m2);
8960 }
8961 
8962 /*
8963  * Do the NFSv4.1 Bind Connection to Session.
8964  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
8965  */
8966 void
8967 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
8968 {
8969 	struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
8970 	uint32_t res, *tl;
8971 	struct nfsrv_descript nfsd;
8972 	struct nfsrv_descript *nd = &nfsd;
8973 	struct rpc_callextra ext;
8974 	struct timeval utimeout;
8975 	enum clnt_stat stat;
8976 	int error;
8977 
8978 	nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
8979 	    NFS_VER4, rcp->minorvers);
8980 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
8981 	memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
8982 	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
8983 	*tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
8984 	*tl = newnfs_false;
8985 
8986 	memset(&ext, 0, sizeof(ext));
8987 	utimeout.tv_sec = 30;
8988 	utimeout.tv_usec = 0;
8989 	ext.rc_auth = authunix_create(cr);
8990 	nd->nd_mrep = NULL;
8991 	stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
8992 	    &nd->nd_mrep, utimeout);
8993 	AUTH_DESTROY(ext.rc_auth);
8994 	if (stat != RPC_SUCCESS) {
8995 		printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
8996 		return;
8997 	}
8998 	if (nd->nd_mrep == NULL) {
8999 		printf("nfsrpc_bindconnsess: no reply args\n");
9000 		return;
9001 	}
9002 	error = 0;
9003 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
9004 	nd->nd_md = nd->nd_mrep;
9005 	nd->nd_dpos = mtod(nd->nd_md, char *);
9006 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9007 	nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9008 	if (nd->nd_repstat == NFSERR_OK) {
9009 		res = fxdr_unsigned(uint32_t, *tl);
9010 		if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9011 		    -1)) != 0)
9012 			goto nfsmout;
9013 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9014 		    4 * NFSX_UNSIGNED);
9015 		tl += 3;
9016 		if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9017 			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9018 			res = fxdr_unsigned(uint32_t, *tl);
9019 			if (res != NFSCDFS4_BOTH)
9020 				printf("nfsrpc_bindconnsess: did not "
9021 				    "return FS4_BOTH\n");
9022 		} else
9023 			printf("nfsrpc_bindconnsess: not same "
9024 			    "sessionid\n");
9025 	} else if (nd->nd_repstat != NFSERR_BADSESSION)
9026 		printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9027 nfsmout:
9028 	if (error != 0)
9029 		printf("nfsrpc_bindconnsess: reply bad xdr\n");
9030 	m_freem(nd->nd_mrep);
9031 }
9032