xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_state.c (revision d17be682)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2018 Nexenta Systems, Inc.
28  * Copyright 2019 Nexenta by DDN, Inc.
29  * Copyright 2023 MNX Cloud, Inc.
30  */
31 
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/atomic.h>
36 #include <sys/clconf.h>
37 #include <sys/cladm.h>
38 #include <sys/flock.h>
39 #include <nfs/export.h>
40 #include <nfs/nfs.h>
41 #include <nfs/nfs4.h>
42 #include <nfs/nfssys.h>
43 #include <nfs/lm.h>
44 #include <sys/pathname.h>
45 #include <sys/sdt.h>
46 #include <sys/nvpair.h>
47 
48 extern u_longlong_t nfs4_srv_caller_id;
49 
50 extern uint_t nfs4_srv_vkey;
51 
52 stateid4 special0 = {
53 	0,
54 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
55 };
56 
57 stateid4 special1 = {
58 	0xffffffff,
59 	{
60 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
62 		(char)0xff, (char)0xff, (char)0xff, (char)0xff
63 	}
64 };
65 
66 
67 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
68 			stateid4_cmp(id, &special1))
69 
70 /* For embedding the cluster nodeid into our clientid */
71 #define	CLUSTER_NODEID_SHIFT	24
72 #define	CLUSTER_MAX_NODEID	255
73 
74 #ifdef DEBUG
75 int rfs4_debug;
76 #endif
77 
78 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
79 static uint32_t rfs4_database_debug = 0x00;
80 
81 /* CSTYLED */
82 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
83 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
84 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
85 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
86 
87 /*
88  * Couple of simple init/destroy functions for a general waiter
89  */
90 void
91 rfs4_sw_init(rfs4_state_wait_t *swp)
92 {
93 	mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
94 	cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
95 	swp->sw_active = FALSE;
96 	swp->sw_wait_count = 0;
97 }
98 
99 void
100 rfs4_sw_destroy(rfs4_state_wait_t *swp)
101 {
102 	mutex_destroy(swp->sw_cv_lock);
103 	cv_destroy(swp->sw_cv);
104 }
105 
106 void
107 rfs4_sw_enter(rfs4_state_wait_t *swp)
108 {
109 	mutex_enter(swp->sw_cv_lock);
110 	while (swp->sw_active) {
111 		swp->sw_wait_count++;
112 		cv_wait(swp->sw_cv, swp->sw_cv_lock);
113 		swp->sw_wait_count--;
114 	}
115 	ASSERT(swp->sw_active == FALSE);
116 	swp->sw_active = TRUE;
117 	mutex_exit(swp->sw_cv_lock);
118 }
119 
120 void
121 rfs4_sw_exit(rfs4_state_wait_t *swp)
122 {
123 	mutex_enter(swp->sw_cv_lock);
124 	ASSERT(swp->sw_active == TRUE);
125 	swp->sw_active = FALSE;
126 	if (swp->sw_wait_count != 0)
127 		cv_broadcast(swp->sw_cv);
128 	mutex_exit(swp->sw_cv_lock);
129 }
130 
131 static void
132 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
133 {
134 	lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
135 	lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
136 
137 	if (sres->status == NFS4ERR_DENIED) {
138 		dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
139 		bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
140 	}
141 }
142 
143 /*
144  * CPR callback id -- not related to v4 callbacks
145  */
146 static callb_id_t cpr_id = 0;
147 
148 static void
149 deep_lock_free(LOCK4res *res)
150 {
151 	lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
152 
153 	if (res->status == NFS4ERR_DENIED)
154 		kmem_free(lo->owner_val, lo->owner_len);
155 }
156 
157 static void
158 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
159 {
160 	nfsace4 *sacep, *dacep;
161 
162 	if (sres->status != NFS4_OK) {
163 		return;
164 	}
165 
166 	dres->attrset = sres->attrset;
167 
168 	switch (sres->delegation.delegation_type) {
169 	case OPEN_DELEGATE_NONE:
170 		return;
171 	case OPEN_DELEGATE_READ:
172 		sacep = &sres->delegation.open_delegation4_u.read.permissions;
173 		dacep = &dres->delegation.open_delegation4_u.read.permissions;
174 		break;
175 	case OPEN_DELEGATE_WRITE:
176 		sacep = &sres->delegation.open_delegation4_u.write.permissions;
177 		dacep = &dres->delegation.open_delegation4_u.write.permissions;
178 		break;
179 	}
180 	dacep->who.utf8string_val =
181 	    kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
182 	bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
183 	    sacep->who.utf8string_len);
184 }
185 
186 static void
187 deep_open_free(OPEN4res *res)
188 {
189 	nfsace4 *acep;
190 	if (res->status != NFS4_OK)
191 		return;
192 
193 	switch (res->delegation.delegation_type) {
194 	case OPEN_DELEGATE_NONE:
195 		return;
196 	case OPEN_DELEGATE_READ:
197 		acep = &res->delegation.open_delegation4_u.read.permissions;
198 		break;
199 	case OPEN_DELEGATE_WRITE:
200 		acep = &res->delegation.open_delegation4_u.write.permissions;
201 		break;
202 	}
203 
204 	if (acep->who.utf8string_val) {
205 		kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
206 		acep->who.utf8string_val = NULL;
207 	}
208 }
209 
210 void
211 rfs4_free_reply(nfs_resop4 *rp)
212 {
213 	switch (rp->resop) {
214 	case OP_LOCK:
215 		deep_lock_free(&rp->nfs_resop4_u.oplock);
216 		break;
217 	case OP_OPEN:
218 		deep_open_free(&rp->nfs_resop4_u.opopen);
219 	default:
220 		break;
221 	}
222 }
223 
224 void
225 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
226 {
227 	*dst = *src;
228 
229 	/* Handle responses that need deep copy */
230 	switch (src->resop) {
231 	case OP_LOCK:
232 		deep_lock_copy(&dst->nfs_resop4_u.oplock,
233 		    &src->nfs_resop4_u.oplock);
234 		break;
235 	case OP_OPEN:
236 		deep_open_copy(&dst->nfs_resop4_u.opopen,
237 		    &src->nfs_resop4_u.opopen);
238 		break;
239 	default:
240 		break;
241 	};
242 }
243 
244 /*
245  * This is the implementation of the underlying state engine. The
246  * public interface to this engine is described by
247  * nfs4_state.h. Callers to the engine should hold no state engine
248  * locks when they call in to it. If the protocol needs to lock data
249  * structures it should do so after acquiring all references to them
250  * first and then follow the following lock order:
251  *
252  *	client > openowner > state > lo_state > lockowner > file.
253  *
254  * Internally we only allow a thread to hold one hash bucket lock at a
255  * time and the lock is higher in the lock order (must be acquired
256  * first) than the data structure that is on that hash list.
257  *
258  * If a new reference was acquired by the caller, that reference needs
259  * to be released after releasing all acquired locks with the
260  * corresponding rfs4_*_rele routine.
261  */
262 
263 /*
264  * This code is some what prototypical for now. Its purpose currently is to
265  * implement the interfaces sufficiently to finish the higher protocol
266  * elements. This will be replaced by a dynamically resizeable tables
267  * backed by kmem_cache allocator. However synchronization is handled
268  * correctly (I hope) and will not change by much.  The mutexes for
269  * the hash buckets that can be used to create new instances of data
270  * structures  might be good candidates to evolve into reader writer
271  * locks. If it has to do a creation, it would be holding the
272  * mutex across a kmem_alloc with KM_SLEEP specified.
273  */
274 
275 #ifdef DEBUG
276 #define	TABSIZE 17
277 #else
278 #define	TABSIZE 2047
279 #endif
280 
281 #define	ADDRHASH(key) ((unsigned long)(key) >> 3)
282 
283 #define	MAXTABSZ 1024*1024
284 
285 /* The values below are rfs4_lease_time units */
286 
287 #ifdef DEBUG
288 #define	CLIENT_CACHE_TIME 1
289 #define	OPENOWNER_CACHE_TIME 1
290 #define	STATE_CACHE_TIME 1
291 #define	LO_STATE_CACHE_TIME 1
292 #define	LOCKOWNER_CACHE_TIME 1
293 #define	FILE_CACHE_TIME 3
294 #define	DELEG_STATE_CACHE_TIME 1
295 #else
296 #define	CLIENT_CACHE_TIME 10
297 #define	OPENOWNER_CACHE_TIME 5
298 #define	STATE_CACHE_TIME 1
299 #define	LO_STATE_CACHE_TIME 1
300 #define	LOCKOWNER_CACHE_TIME 3
301 #define	FILE_CACHE_TIME 40
302 #define	DELEG_STATE_CACHE_TIME 1
303 #endif
304 
305 /*
306  * NFSv4 server state databases
307  *
308  * Initilized when the module is loaded and used by NFSv4 state tables.
309  * These kmem_cache databases are global, the tables that make use of these
310  * are per zone.
311  */
312 kmem_cache_t *rfs4_client_mem_cache;
313 kmem_cache_t *rfs4_clntIP_mem_cache;
314 kmem_cache_t *rfs4_openown_mem_cache;
315 kmem_cache_t *rfs4_openstID_mem_cache;
316 kmem_cache_t *rfs4_lockstID_mem_cache;
317 kmem_cache_t *rfs4_lockown_mem_cache;
318 kmem_cache_t *rfs4_file_mem_cache;
319 kmem_cache_t *rfs4_delegstID_mem_cache;
320 
321 /*
322  * NFSv4 state table functions
323  */
324 static bool_t rfs4_client_create(rfs4_entry_t, void *);
325 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
326 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
327 static void rfs4_client_destroy(rfs4_entry_t);
328 static bool_t rfs4_client_expiry(rfs4_entry_t);
329 static uint32_t clientid_hash(void *);
330 static bool_t clientid_compare(rfs4_entry_t, void *);
331 static void *clientid_mkkey(rfs4_entry_t);
332 static uint32_t nfsclnt_hash(void *);
333 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
334 static void *nfsclnt_mkkey(rfs4_entry_t);
335 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
336 static void rfs4_clntip_destroy(rfs4_entry_t);
337 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
338 static uint32_t clntip_hash(void *);
339 static bool_t clntip_compare(rfs4_entry_t, void *);
340 static void *clntip_mkkey(rfs4_entry_t);
341 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
342 static void rfs4_openowner_destroy(rfs4_entry_t);
343 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
344 static uint32_t openowner_hash(void *);
345 static bool_t openowner_compare(rfs4_entry_t, void *);
346 static void *openowner_mkkey(rfs4_entry_t);
347 static bool_t rfs4_state_create(rfs4_entry_t, void *);
348 static void rfs4_state_destroy(rfs4_entry_t);
349 static bool_t rfs4_state_expiry(rfs4_entry_t);
350 static uint32_t state_hash(void *);
351 static bool_t state_compare(rfs4_entry_t, void *);
352 static void *state_mkkey(rfs4_entry_t);
353 static uint32_t state_owner_file_hash(void *);
354 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
355 static void *state_owner_file_mkkey(rfs4_entry_t);
356 static uint32_t state_file_hash(void *);
357 static bool_t state_file_compare(rfs4_entry_t, void *);
358 static void *state_file_mkkey(rfs4_entry_t);
359 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
360 static void rfs4_lo_state_destroy(rfs4_entry_t);
361 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
362 static uint32_t lo_state_hash(void *);
363 static bool_t lo_state_compare(rfs4_entry_t, void *);
364 static void *lo_state_mkkey(rfs4_entry_t);
365 static uint32_t lo_state_lo_hash(void *);
366 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
367 static void *lo_state_lo_mkkey(rfs4_entry_t);
368 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
369 static void rfs4_lockowner_destroy(rfs4_entry_t);
370 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
371 static uint32_t lockowner_hash(void *);
372 static bool_t lockowner_compare(rfs4_entry_t, void *);
373 static void *lockowner_mkkey(rfs4_entry_t);
374 static uint32_t pid_hash(void *);
375 static bool_t pid_compare(rfs4_entry_t, void *);
376 static void *pid_mkkey(rfs4_entry_t);
377 static bool_t rfs4_file_create(rfs4_entry_t, void *);
378 static void rfs4_file_destroy(rfs4_entry_t);
379 static uint32_t file_hash(void *);
380 static bool_t file_compare(rfs4_entry_t, void *);
381 static void *file_mkkey(rfs4_entry_t);
382 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
383 static void rfs4_deleg_state_destroy(rfs4_entry_t);
384 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
385 static uint32_t deleg_hash(void *);
386 static bool_t deleg_compare(rfs4_entry_t, void *);
387 static void *deleg_mkkey(rfs4_entry_t);
388 static uint32_t deleg_state_hash(void *);
389 static bool_t deleg_state_compare(rfs4_entry_t, void *);
390 static void *deleg_state_mkkey(rfs4_entry_t);
391 
392 static void rfs4_state_rele_nounlock(rfs4_state_t *);
393 
394 static int rfs4_ss_enabled = 0;
395 
396 void
397 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
398 {
399 	kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
400 }
401 
402 static rfs4_ss_pn_t *
403 rfs4_ss_pnalloc(char *dir, char *leaf)
404 {
405 	rfs4_ss_pn_t *ss_pn;
406 	int dir_len, leaf_len;
407 
408 	/*
409 	 * validate we have a resonable path
410 	 * (account for the '/' and trailing null)
411 	 */
412 	if ((dir_len = strlen(dir)) > MAXPATHLEN ||
413 	    (leaf_len = strlen(leaf)) > MAXNAMELEN ||
414 	    (dir_len + leaf_len + 2) > MAXPATHLEN) {
415 		return (NULL);
416 	}
417 
418 	ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
419 
420 	(void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
421 	/* Handy pointer to just the leaf name */
422 	ss_pn->leaf = ss_pn->pn + dir_len + 1;
423 	return (ss_pn);
424 }
425 
426 
427 /*
428  * Move the "leaf" filename from "sdir" directory
429  * to the "ddir" directory. Return the pathname of
430  * the destination unless the rename fails in which
431  * case we need to return the source pathname.
432  */
433 static rfs4_ss_pn_t *
434 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
435 {
436 	rfs4_ss_pn_t *src, *dst;
437 
438 	if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
439 		return (NULL);
440 
441 	if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
442 		rfs4_ss_pnfree(src);
443 		return (NULL);
444 	}
445 
446 	/*
447 	 * If the rename fails we shall return the src
448 	 * pathname and free the dst. Otherwise we need
449 	 * to free the src and return the dst pathanme.
450 	 */
451 	if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
452 		rfs4_ss_pnfree(dst);
453 		return (src);
454 	}
455 	rfs4_ss_pnfree(src);
456 	return (dst);
457 }
458 
459 
460 static rfs4_oldstate_t *
461 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
462 {
463 	struct uio uio;
464 	struct iovec iov[3];
465 
466 	rfs4_oldstate_t *cl_ss = NULL;
467 	vnode_t *vp;
468 	vattr_t va;
469 	uint_t id_len;
470 	int err, kill_file, file_vers;
471 
472 	if (ss_pn == NULL)
473 		return (NULL);
474 
475 	/*
476 	 * open the state file.
477 	 */
478 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
479 		return (NULL);
480 	}
481 
482 	if (vp->v_type != VREG) {
483 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
484 		VN_RELE(vp);
485 		return (NULL);
486 	}
487 
488 	err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
489 	if (err) {
490 		/*
491 		 * We don't have read access? better get the heck out.
492 		 */
493 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
494 		VN_RELE(vp);
495 		return (NULL);
496 	}
497 
498 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
499 	/*
500 	 * get the file size to do some basic validation
501 	 */
502 	va.va_mask = AT_SIZE;
503 	err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
504 
505 	kill_file = (va.va_size == 0 || va.va_size <
506 	    (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
507 
508 	if (err || kill_file) {
509 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
510 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
511 		VN_RELE(vp);
512 		if (kill_file) {
513 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
514 		}
515 		return (NULL);
516 	}
517 
518 	cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
519 
520 	/*
521 	 * build iovecs to read in the file_version, verifier and id_len
522 	 */
523 	iov[0].iov_base = (caddr_t)&file_vers;
524 	iov[0].iov_len = sizeof (int);
525 	iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
526 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
527 	iov[2].iov_base = (caddr_t)&id_len;
528 	iov[2].iov_len = sizeof (uint_t);
529 
530 	uio.uio_iov = iov;
531 	uio.uio_iovcnt = 3;
532 	uio.uio_segflg = UIO_SYSSPACE;
533 	uio.uio_loffset = 0;
534 	uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
535 
536 	if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
537 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
538 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
539 		VN_RELE(vp);
540 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
541 		return (NULL);
542 	}
543 
544 	/*
545 	 * if the file_version doesn't match or if the
546 	 * id_len is zero or the combination of the verifier,
547 	 * id_len and id_val is bigger than the file we have
548 	 * a problem. If so ditch the file.
549 	 */
550 	kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
551 	    (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
552 
553 	if (err || kill_file) {
554 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
555 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
556 		VN_RELE(vp);
557 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
558 		if (kill_file) {
559 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
560 		}
561 		return (NULL);
562 	}
563 
564 	/*
565 	 * now get the client id value
566 	 */
567 	cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
568 	iov[0].iov_base = cl_ss->cl_id4.id_val;
569 	iov[0].iov_len = id_len;
570 
571 	uio.uio_iov = iov;
572 	uio.uio_iovcnt = 1;
573 	uio.uio_segflg = UIO_SYSSPACE;
574 	uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
575 
576 	if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
577 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
578 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
579 		VN_RELE(vp);
580 		kmem_free(cl_ss->cl_id4.id_val, id_len);
581 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
582 		return (NULL);
583 	}
584 
585 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
586 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
587 	VN_RELE(vp);
588 	return (cl_ss);
589 }
590 
591 #ifdef	nextdp
592 #undef nextdp
593 #endif
594 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
595 
596 /*
597  * Add entries from statedir to supplied oldstate list.
598  * Optionally, move all entries from statedir -> destdir.
599  */
600 void
601 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
602 {
603 	rfs4_ss_pn_t *ss_pn;
604 	rfs4_oldstate_t *cl_ss = NULL;
605 	char	*dirt = NULL;
606 	int	err, dir_eof = 0, size = 0;
607 	vnode_t *dvp;
608 	struct iovec iov;
609 	struct uio uio;
610 	struct dirent64 *dep;
611 	offset_t dirchunk_offset = 0;
612 
613 	/*
614 	 * open the state directory
615 	 */
616 	if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
617 		return;
618 
619 	if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
620 		goto out;
621 
622 	dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
623 
624 	/*
625 	 * Get and process the directory entries
626 	 */
627 	while (!dir_eof) {
628 		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
629 		iov.iov_base = dirt;
630 		iov.iov_len = RFS4_SS_DIRSIZE;
631 		uio.uio_iov = &iov;
632 		uio.uio_iovcnt = 1;
633 		uio.uio_segflg = UIO_SYSSPACE;
634 		uio.uio_loffset = dirchunk_offset;
635 		uio.uio_resid = RFS4_SS_DIRSIZE;
636 
637 		err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
638 		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
639 		if (err)
640 			goto out;
641 
642 		size = RFS4_SS_DIRSIZE - uio.uio_resid;
643 
644 		/*
645 		 * Process all the directory entries in this
646 		 * readdir chunk
647 		 */
648 		for (dep = (struct dirent64 *)dirt; size > 0;
649 		    dep = nextdp(dep)) {
650 
651 			size -= dep->d_reclen;
652 			dirchunk_offset = dep->d_off;
653 
654 			/*
655 			 * Skip '.' and '..'
656 			 */
657 			if (NFS_IS_DOTNAME(dep->d_name))
658 				continue;
659 
660 			ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
661 			if (ss_pn == NULL)
662 				continue;
663 
664 			if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
665 				if (destdir != NULL) {
666 					rfs4_ss_pnfree(ss_pn);
667 					cl_ss->ss_pn = rfs4_ss_movestate(
668 					    statedir, destdir, dep->d_name);
669 				} else {
670 					cl_ss->ss_pn = ss_pn;
671 				}
672 				insque(cl_ss, oldstate);
673 			} else {
674 				rfs4_ss_pnfree(ss_pn);
675 			}
676 		}
677 	}
678 
679 out:
680 	(void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
681 	VN_RELE(dvp);
682 	if (dirt)
683 		kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
684 }
685 
686 static void
687 rfs4_ss_init(nfs4_srv_t *nsrv4)
688 {
689 	int npaths = 1;
690 	char *default_dss_path = NFS4_DSS_VAR_DIR;
691 
692 	/* read the default stable storage state */
693 	rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
694 
695 	rfs4_ss_enabled = 1;
696 }
697 
698 static void
699 rfs4_ss_fini(nfs4_srv_t *nsrv4)
700 {
701 	rfs4_servinst_t *sip;
702 
703 	mutex_enter(&nsrv4->servinst_lock);
704 	sip = nsrv4->nfs4_cur_servinst;
705 	while (sip != NULL) {
706 		rfs4_dss_clear_oldstate(sip);
707 		sip = sip->next;
708 	}
709 	mutex_exit(&nsrv4->servinst_lock);
710 }
711 
712 /*
713  * Remove all oldstate files referenced by this servinst.
714  */
715 static void
716 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
717 {
718 	rfs4_oldstate_t *os_head, *osp;
719 
720 	rw_enter(&sip->oldstate_lock, RW_WRITER);
721 	os_head = sip->oldstate;
722 
723 	if (os_head == NULL) {
724 		rw_exit(&sip->oldstate_lock);
725 		return;
726 	}
727 
728 	/* skip dummy entry */
729 	osp = os_head->next;
730 	while (osp != os_head) {
731 		char *leaf = osp->ss_pn->leaf;
732 		rfs4_oldstate_t *os_next;
733 
734 		rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
735 
736 		if (osp->cl_id4.id_val)
737 			kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
738 		rfs4_ss_pnfree(osp->ss_pn);
739 
740 		os_next = osp->next;
741 		remque(osp);
742 		kmem_free(osp, sizeof (rfs4_oldstate_t));
743 		osp = os_next;
744 	}
745 
746 	rw_exit(&sip->oldstate_lock);
747 }
748 
749 /*
750  * Form the state and oldstate paths, and read in the stable storage files.
751  */
752 void
753 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
754 {
755 	int i;
756 	char *state, *oldstate;
757 
758 	state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
759 	oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760 
761 	for (i = 0; i < npaths; i++) {
762 		char *path = paths[i];
763 
764 		(void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
765 		(void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
766 
767 		/*
768 		 * Populate the current server instance's oldstate list.
769 		 *
770 		 * 1. Read stable storage data from old state directory,
771 		 *    leaving its contents alone.
772 		 *
773 		 * 2. Read stable storage data from state directory,
774 		 *    and move the latter's contents to old state
775 		 *    directory.
776 		 */
777 		/* CSTYLED */
778 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
779 		/* CSTYLED */
780 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
781 	}
782 
783 	kmem_free(state, MAXPATHLEN);
784 	kmem_free(oldstate, MAXPATHLEN);
785 }
786 
787 
788 /*
789  * Check if we are still in grace and if the client can be
790  * granted permission to perform reclaims.
791  */
792 void
793 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
794 {
795 	rfs4_servinst_t *sip;
796 
797 	/*
798 	 * It should be sufficient to check the oldstate data for just
799 	 * this client's instance. However, since our per-instance
800 	 * client grouping is solely temporal, HA-NFSv4 RG failover
801 	 * might result in clients of the same RG being partitioned into
802 	 * separate instances.
803 	 *
804 	 * Until the client grouping is improved, we must check the
805 	 * oldstate data for all instances with an active grace period.
806 	 *
807 	 * This also serves as the mechanism to remove stale oldstate data.
808 	 * The first time we check an instance after its grace period has
809 	 * expired, the oldstate data should be cleared.
810 	 *
811 	 * Start at the current instance, and walk the list backwards
812 	 * to the first.
813 	 */
814 	mutex_enter(&nsrv4->servinst_lock);
815 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
816 		rfs4_ss_chkclid_sip(cp, sip);
817 
818 		/* if the above check found this client, we're done */
819 		if (cp->rc_can_reclaim)
820 			break;
821 	}
822 	mutex_exit(&nsrv4->servinst_lock);
823 }
824 
825 static void
826 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
827 {
828 	rfs4_oldstate_t *osp, *os_head;
829 
830 	/* short circuit everything if this server instance has no oldstate */
831 	rw_enter(&sip->oldstate_lock, RW_READER);
832 	os_head = sip->oldstate;
833 	rw_exit(&sip->oldstate_lock);
834 	if (os_head == NULL)
835 		return;
836 
837 	/*
838 	 * If this server instance is no longer in a grace period then
839 	 * the client won't be able to reclaim. No further need for this
840 	 * instance's oldstate data, so it can be cleared.
841 	 */
842 	if (!rfs4_servinst_in_grace(sip))
843 		return;
844 
845 	/* this instance is still in grace; search for the clientid */
846 
847 	rw_enter(&sip->oldstate_lock, RW_READER);
848 
849 	os_head = sip->oldstate;
850 	/* skip dummy entry */
851 	osp = os_head->next;
852 	while (osp != os_head) {
853 		if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
854 			if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
855 			    osp->cl_id4.id_len) == 0) {
856 				cp->rc_can_reclaim = 1;
857 				break;
858 			}
859 		}
860 		osp = osp->next;
861 	}
862 
863 	rw_exit(&sip->oldstate_lock);
864 }
865 
866 /*
867  * Place client information into stable storage: 1/3.
868  * First, generate the leaf filename, from the client's IP address and
869  * the server-generated short-hand clientid.
870  */
871 void
872 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
873 {
874 	const char *kinet_ntop6(uchar_t *, char *, size_t);
875 	char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
876 	struct sockaddr *ca;
877 	uchar_t *b;
878 
879 	if (rfs4_ss_enabled == 0) {
880 		return;
881 	}
882 
883 	buf[0] = 0;
884 
885 	ca = (struct sockaddr *)&cp->rc_addr;
886 
887 	/*
888 	 * Convert the caller's IP address to a dotted string
889 	 */
890 	if (ca->sa_family == AF_INET) {
891 		b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
892 		(void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
893 		    b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
894 	} else if (ca->sa_family == AF_INET6) {
895 		struct sockaddr_in6 *sin6;
896 
897 		sin6 = (struct sockaddr_in6 *)ca;
898 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
899 		    buf, INET6_ADDRSTRLEN);
900 	}
901 
902 	(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
903 	    (longlong_t)cp->rc_clientid);
904 	rfs4_ss_clid_write(nsrv4, cp, leaf);
905 }
906 
907 /*
908  * Place client information into stable storage: 2/3.
909  * DSS: distributed stable storage: the file may need to be written to
910  * multiple directories.
911  */
912 static void
913 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
914 {
915 	rfs4_servinst_t *sip;
916 
917 	/*
918 	 * It should be sufficient to write the leaf file to (all) DSS paths
919 	 * associated with just this client's instance. However, since our
920 	 * per-instance client grouping is solely temporal, HA-NFSv4 RG
921 	 * failover might result in us losing DSS data.
922 	 *
923 	 * Until the client grouping is improved, we must write the DSS data
924 	 * to all instances' paths. Start at the current instance, and
925 	 * walk the list backwards to the first.
926 	 */
927 	mutex_enter(&nsrv4->servinst_lock);
928 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
929 		int i, npaths = sip->dss_npaths;
930 
931 		/* write the leaf file to all DSS paths */
932 		for (i = 0; i < npaths; i++) {
933 			rfs4_dss_path_t *dss_path = sip->dss_paths[i];
934 
935 			/* HA-NFSv4 path might have been failed-away from us */
936 			if (dss_path == NULL)
937 				continue;
938 
939 			rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
940 		}
941 	}
942 	mutex_exit(&nsrv4->servinst_lock);
943 }
944 
945 /*
946  * Place client information into stable storage: 3/3.
947  * Write the stable storage data to the requested file.
948  */
949 static void
950 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
951 {
952 	int ioflag;
953 	int file_vers = NFS4_SS_VERSION;
954 	size_t dirlen;
955 	struct uio uio;
956 	struct iovec iov[4];
957 	char *dir;
958 	rfs4_ss_pn_t *ss_pn;
959 	vnode_t *vp;
960 	nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
961 
962 	/* allow 2 extra bytes for '/' & NUL */
963 	dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
964 	dir = kmem_alloc(dirlen, KM_SLEEP);
965 	(void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
966 
967 	ss_pn = rfs4_ss_pnalloc(dir, leaf);
968 	/* rfs4_ss_pnalloc takes its own copy */
969 	kmem_free(dir, dirlen);
970 	if (ss_pn == NULL)
971 		return;
972 
973 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
974 	    CRCREAT, 0)) {
975 		rfs4_ss_pnfree(ss_pn);
976 		return;
977 	}
978 
979 	/*
980 	 * We need to record leaf - i.e. the filename - so that we know
981 	 * what to remove, in the future. However, the dir part of cp->ss_pn
982 	 * should never be referenced directly, since it's potentially only
983 	 * one of several paths with this leaf in it.
984 	 */
985 	if (cp->rc_ss_pn != NULL) {
986 		if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
987 			/* we've already recorded *this* leaf */
988 			rfs4_ss_pnfree(ss_pn);
989 		} else {
990 			/* replace with this leaf */
991 			rfs4_ss_pnfree(cp->rc_ss_pn);
992 			cp->rc_ss_pn = ss_pn;
993 		}
994 	} else {
995 		cp->rc_ss_pn = ss_pn;
996 	}
997 
998 	/*
999 	 * Build a scatter list that points to the nfs_client_id4
1000 	 */
1001 	iov[0].iov_base = (caddr_t)&file_vers;
1002 	iov[0].iov_len = sizeof (int);
1003 	iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1004 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
1005 	iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1006 	iov[2].iov_len = sizeof (uint_t);
1007 	iov[3].iov_base = (caddr_t)cl_id4->id_val;
1008 	iov[3].iov_len = cl_id4->id_len;
1009 
1010 	uio.uio_iov = iov;
1011 	uio.uio_iovcnt = 4;
1012 	uio.uio_loffset = 0;
1013 	uio.uio_segflg = UIO_SYSSPACE;
1014 	uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1015 	uio.uio_resid = cl_id4->id_len + sizeof (int) +
1016 	    NFS4_VERIFIER_SIZE + sizeof (uint_t);
1017 
1018 	ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1019 	uio.uio_extflg = UIO_COPY_DEFAULT;
1020 
1021 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1022 	/* write the full client id to the file. */
1023 	(void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1024 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1025 
1026 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1027 	VN_RELE(vp);
1028 }
1029 
1030 /*
1031  * DSS: distributed stable storage.
1032  * Unpack the list of paths passed by nfsd.
1033  * Use nvlist_alloc(9F) to manage the data.
1034  * The caller is responsible for allocating and freeing the buffer.
1035  */
1036 int
1037 rfs4_dss_setpaths(char *buf, size_t buflen)
1038 {
1039 	int error;
1040 
1041 	/*
1042 	 * If this is a "warm start", i.e. we previously had DSS paths,
1043 	 * preserve the old paths.
1044 	 */
1045 	if (rfs4_dss_paths != NULL) {
1046 		/*
1047 		 * Before we lose the ptr, destroy the nvlist and pathnames
1048 		 * array from the warm start before this one.
1049 		 */
1050 		nvlist_free(rfs4_dss_oldpaths);
1051 		rfs4_dss_oldpaths = rfs4_dss_paths;
1052 	}
1053 
1054 	/* unpack the buffer into a searchable nvlist */
1055 	error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1056 	if (error)
1057 		return (error);
1058 
1059 	/*
1060 	 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1061 	 * in the list, and record its location.
1062 	 */
1063 	error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1064 	    &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1065 	return (error);
1066 }
1067 
1068 /*
1069  * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1070  * to find and mark the client for forced expire.
1071  */
1072 static void
1073 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1074 {
1075 	rfs4_client_t *cp = (rfs4_client_t *)ent;
1076 	struct nfs4clrst_args *clr = arg;
1077 	struct sockaddr_in6 *ent_sin6;
1078 	struct in6_addr  clr_in6;
1079 	struct sockaddr_in  *ent_sin;
1080 	struct in_addr   clr_in;
1081 
1082 	if (clr->addr_type != cp->rc_addr.ss_family) {
1083 		return;
1084 	}
1085 
1086 	switch (clr->addr_type) {
1087 
1088 	case AF_INET6:
1089 		/* copyin the address from user space */
1090 		if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1091 			break;
1092 		}
1093 
1094 		ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1095 
1096 		/*
1097 		 * now compare, and if equivalent mark entry
1098 		 * for forced expiration
1099 		 */
1100 		if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1101 			cp->rc_forced_expire = 1;
1102 		}
1103 		break;
1104 
1105 	case AF_INET:
1106 		/* copyin the address from user space */
1107 		if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1108 			break;
1109 		}
1110 
1111 		ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1112 
1113 		/*
1114 		 * now compare, and if equivalent mark entry
1115 		 * for forced expiration
1116 		 */
1117 		if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1118 			cp->rc_forced_expire = 1;
1119 		}
1120 		break;
1121 
1122 	default:
1123 		/* force this assert to fail */
1124 		ASSERT(clr->addr_type != clr->addr_type);
1125 	}
1126 }
1127 
1128 /*
1129  * This is called from nfssys() in order to clear server state
1130  * for the specified client IP Address.
1131  */
1132 int
1133 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1134 {
1135 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1136 	int rc;
1137 
1138 	/* Once nfssrv is loaded, every zone should have one of these. */
1139 	VERIFY(nsrv4 != NULL);
1140 
1141 	mutex_enter(&nsrv4->state_lock);
1142 	/*
1143 	 * But only after NFS service is running is the nfs4_server_state
1144 	 * around. It's dirty (and needs the state_lock held), but all of the
1145 	 * databases live deep in the nfs4_server_state, so it's the only thing
1146 	 * to legitimately check prior to using anything. The pointers
1147 	 * themselves may be stale.
1148 	 */
1149 	if (nsrv4->nfs4_server_state != NULL) {
1150 		VERIFY(nsrv4->rfs4_client_tab != NULL);
1151 		rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1152 		rc = 0;
1153 	} else {
1154 		rc = ENXIO;
1155 	}
1156 	mutex_exit(&nsrv4->state_lock);
1157 	return (rc);
1158 }
1159 
1160 /*
1161  * Used to initialize the NFSv4 server's state or database.  All of
1162  * the tables are created and timers are set.
1163  */
1164 void
1165 rfs4_state_g_init()
1166 {
1167 	extern boolean_t rfs4_cpr_callb(void *, int);
1168 	/*
1169 	 * Add a CPR callback so that we can update client
1170 	 * access times to extend the lease after a suspend
1171 	 * and resume (using the same class as rpcmod/connmgr)
1172 	 */
1173 	cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1174 
1175 	/*
1176 	 * NFSv4 server state databases
1177 	 *
1178 	 * Initialized when the module is loaded and used by NFSv4 state
1179 	 * tables.  These kmem_cache free pools are used globally, the NFSv4
1180 	 * state tables which make use of these kmem_cache free pools are per
1181 	 * zone.
1182 	 *
1183 	 * initialize the global kmem_cache free pools which will be used by
1184 	 * the NFSv4 state tables.
1185 	 */
1186 	/* CSTYLED */
1187 	rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1188 	/* CSTYLED */
1189 	rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1190 	/* CSTYLED */
1191 	rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1192 	/* CSTYLED */
1193 	rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1194 	/* CSTYLED */
1195 	rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1196 	/* CSTYLED */
1197 	rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1198 	/* CSTYLED */
1199 	rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1200 	/* CSTYLED */
1201 	rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1202 }
1203 
1204 
1205 /*
1206  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1207  * and other state.
1208  */
1209 void
1210 rfs4_state_g_fini()
1211 {
1212 	int i;
1213 	/*
1214 	 * Cleanup the CPR callback.
1215 	 */
1216 	if (cpr_id)
1217 		(void) callb_delete(cpr_id);
1218 
1219 	/* free the NFSv4 state databases */
1220 	for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1221 		kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1222 		rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1223 	}
1224 
1225 	rfs4_client_mem_cache = NULL;
1226 	rfs4_clntIP_mem_cache = NULL;
1227 	rfs4_openown_mem_cache = NULL;
1228 	rfs4_openstID_mem_cache = NULL;
1229 	rfs4_lockstID_mem_cache = NULL;
1230 	rfs4_lockown_mem_cache = NULL;
1231 	rfs4_file_mem_cache = NULL;
1232 	rfs4_delegstID_mem_cache = NULL;
1233 
1234 	/* DSS: distributed stable storage */
1235 	nvlist_free(rfs4_dss_oldpaths);
1236 	nvlist_free(rfs4_dss_paths);
1237 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1238 }
1239 
1240 /*
1241  * Used to initialize the per zone NFSv4 server's state
1242  */
1243 void
1244 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1245 {
1246 	time_t start_time;
1247 	int start_grace;
1248 	char *dss_path = NFS4_DSS_VAR_DIR;
1249 
1250 	/* DSS: distributed stable storage: initialise served paths list */
1251 	nsrv4->dss_pathlist = NULL;
1252 
1253 	/*
1254 	 * Set the boot time.  If the server
1255 	 * has been restarted quickly and has had the opportunity to
1256 	 * service clients, then the start_time needs to be bumped
1257 	 * regardless.  A small window but it exists...
1258 	 */
1259 	start_time = gethrestime_sec();
1260 	if (nsrv4->rfs4_start_time < start_time)
1261 		nsrv4->rfs4_start_time = start_time;
1262 	else
1263 		nsrv4->rfs4_start_time++;
1264 
1265 	/*
1266 	 * Create the first server instance, or a new one if the server has
1267 	 * been restarted; see above comments on rfs4_start_time. Don't
1268 	 * start its grace period; that will be done later, to maximise the
1269 	 * clients' recovery window.
1270 	 */
1271 	start_grace = 0;
1272 	if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1273 		int i;
1274 		char **dss_allpaths = NULL;
1275 		dss_allpaths = kmem_alloc(sizeof (char *) *
1276 		    (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1277 		/*
1278 		 * Add the default path into the list of paths for saving
1279 		 * state informantion.
1280 		 */
1281 		dss_allpaths[0] = dss_path;
1282 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1283 			dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1284 		}
1285 		rfs4_servinst_create(nsrv4, start_grace,
1286 		    (rfs4_dss_numnewpaths + 1), dss_allpaths);
1287 		kmem_free(dss_allpaths,
1288 		    (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1289 	} else {
1290 		rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1291 	}
1292 
1293 	/* reset the "first NFSv4 request" status */
1294 	nsrv4->seen_first_compound = 0;
1295 
1296 	mutex_enter(&nsrv4->state_lock);
1297 
1298 	/*
1299 	 * If the server state database has already been initialized,
1300 	 * skip it
1301 	 */
1302 	if (nsrv4->nfs4_server_state != NULL) {
1303 		mutex_exit(&nsrv4->state_lock);
1304 		return;
1305 	}
1306 
1307 	rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1308 
1309 	/* set the various cache timers for table creation */
1310 	if (nsrv4->rfs4_client_cache_time == 0)
1311 		nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1312 	if (nsrv4->rfs4_openowner_cache_time == 0)
1313 		nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1314 	if (nsrv4->rfs4_state_cache_time == 0)
1315 		nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1316 	if (nsrv4->rfs4_lo_state_cache_time == 0)
1317 		nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1318 	if (nsrv4->rfs4_lockowner_cache_time == 0)
1319 		nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1320 	if (nsrv4->rfs4_file_cache_time == 0)
1321 		nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1322 	if (nsrv4->rfs4_deleg_state_cache_time == 0)
1323 		nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1324 
1325 	/* Create the overall database to hold all server state */
1326 	nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1327 
1328 	/* Now create the individual tables */
1329 	nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1330 	nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1331 	    "Client",
1332 	    nsrv4->rfs4_client_cache_time,
1333 	    2,
1334 	    rfs4_client_create,
1335 	    rfs4_client_destroy,
1336 	    rfs4_client_expiry,
1337 	    sizeof (rfs4_client_t),
1338 	    TABSIZE,
1339 	    MAXTABSZ/8, 100);
1340 	nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1341 	    "nfs_client_id4", nfsclnt_hash,
1342 	    nfsclnt_compare, nfsclnt_mkkey,
1343 	    TRUE);
1344 	nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1345 	    "client_id", clientid_hash,
1346 	    clientid_compare, clientid_mkkey,
1347 	    FALSE);
1348 
1349 	nsrv4->rfs4_clntip_cache_time = 86400 * 365;	/* about a year */
1350 	nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1351 	    "ClntIP",
1352 	    nsrv4->rfs4_clntip_cache_time,
1353 	    1,
1354 	    rfs4_clntip_create,
1355 	    rfs4_clntip_destroy,
1356 	    rfs4_clntip_expiry,
1357 	    sizeof (rfs4_clntip_t),
1358 	    TABSIZE,
1359 	    MAXTABSZ, 100);
1360 	nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1361 	    "client_ip", clntip_hash,
1362 	    clntip_compare, clntip_mkkey,
1363 	    TRUE);
1364 
1365 	nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1366 	nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1367 	    "OpenOwner",
1368 	    nsrv4->rfs4_openowner_cache_time,
1369 	    1,
1370 	    rfs4_openowner_create,
1371 	    rfs4_openowner_destroy,
1372 	    rfs4_openowner_expiry,
1373 	    sizeof (rfs4_openowner_t),
1374 	    TABSIZE,
1375 	    MAXTABSZ, 100);
1376 	nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1377 	    "open_owner4", openowner_hash,
1378 	    openowner_compare,
1379 	    openowner_mkkey, TRUE);
1380 
1381 	nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1382 	nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1383 	    "OpenStateID",
1384 	    nsrv4->rfs4_state_cache_time,
1385 	    3,
1386 	    rfs4_state_create,
1387 	    rfs4_state_destroy,
1388 	    rfs4_state_expiry,
1389 	    sizeof (rfs4_state_t),
1390 	    TABSIZE,
1391 	    MAXTABSZ, 100);
1392 
1393 	/* CSTYLED */
1394 	nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1395 	    "Openowner-File",
1396 	    state_owner_file_hash,
1397 	    state_owner_file_compare,
1398 	    state_owner_file_mkkey, TRUE);
1399 
1400 	nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1401 	    "State-id", state_hash,
1402 	    state_compare, state_mkkey, FALSE);
1403 
1404 	nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1405 	    "File", state_file_hash,
1406 	    state_file_compare, state_file_mkkey,
1407 	    FALSE);
1408 
1409 	nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1410 	nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1411 	    "LockStateID",
1412 	    nsrv4->rfs4_lo_state_cache_time,
1413 	    2,
1414 	    rfs4_lo_state_create,
1415 	    rfs4_lo_state_destroy,
1416 	    rfs4_lo_state_expiry,
1417 	    sizeof (rfs4_lo_state_t),
1418 	    TABSIZE,
1419 	    MAXTABSZ, 100);
1420 
1421 	/* CSTYLED */
1422 	nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1423 	    "lockownerxstate",
1424 	    lo_state_lo_hash,
1425 	    lo_state_lo_compare,
1426 	    lo_state_lo_mkkey, TRUE);
1427 
1428 	nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1429 	    "State-id",
1430 	    lo_state_hash, lo_state_compare,
1431 	    lo_state_mkkey, FALSE);
1432 
1433 	nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1434 
1435 	nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1436 	    "Lockowner",
1437 	    nsrv4->rfs4_lockowner_cache_time,
1438 	    2,
1439 	    rfs4_lockowner_create,
1440 	    rfs4_lockowner_destroy,
1441 	    rfs4_lockowner_expiry,
1442 	    sizeof (rfs4_lockowner_t),
1443 	    TABSIZE,
1444 	    MAXTABSZ, 100);
1445 
1446 	nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1447 	    "lock_owner4", lockowner_hash,
1448 	    lockowner_compare,
1449 	    lockowner_mkkey, TRUE);
1450 
1451 	/* CSTYLED */
1452 	nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1453 	    "pid", pid_hash,
1454 	    pid_compare, pid_mkkey,
1455 	    FALSE);
1456 
1457 	nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1458 	nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1459 	    "File",
1460 	    nsrv4->rfs4_file_cache_time,
1461 	    1,
1462 	    rfs4_file_create,
1463 	    rfs4_file_destroy,
1464 	    NULL,
1465 	    sizeof (rfs4_file_t),
1466 	    TABSIZE,
1467 	    MAXTABSZ, -1);
1468 
1469 	nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1470 	    "Filehandle", file_hash,
1471 	    file_compare, file_mkkey, TRUE);
1472 
1473 	nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1474 	/* CSTYLED */
1475 	nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1476 	    "DelegStateID",
1477 	    nsrv4->rfs4_deleg_state_cache_time,
1478 	    2,
1479 	    rfs4_deleg_state_create,
1480 	    rfs4_deleg_state_destroy,
1481 	    rfs4_deleg_state_expiry,
1482 	    sizeof (rfs4_deleg_state_t),
1483 	    TABSIZE,
1484 	    MAXTABSZ, 100);
1485 	nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1486 	    "DelegByFileClient",
1487 	    deleg_hash,
1488 	    deleg_compare,
1489 	    deleg_mkkey, TRUE);
1490 
1491 	/* CSTYLED */
1492 	nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1493 	    "DelegState",
1494 	    deleg_state_hash,
1495 	    deleg_state_compare,
1496 	    deleg_state_mkkey, FALSE);
1497 
1498 	mutex_exit(&nsrv4->state_lock);
1499 
1500 	/*
1501 	 * Init the stable storage.
1502 	 */
1503 	rfs4_ss_init(nsrv4);
1504 }
1505 
1506 /*
1507  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1508  * and state.
1509  */
1510 void
1511 rfs4_state_zone_fini()
1512 {
1513 	rfs4_database_t *dbp;
1514 	nfs4_srv_t *nsrv4;
1515 	nsrv4 = nfs4_get_srv();
1516 
1517 	rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1518 
1519 	/*
1520 	 * Clean up any dangling stable storage structures BEFORE calling
1521 	 * rfs4_servinst_destroy_all() so there are no dangling structures
1522 	 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1523 	 * freed).
1524 	 */
1525 	rfs4_ss_fini(nsrv4);
1526 
1527 	mutex_enter(&nsrv4->state_lock);
1528 
1529 	if (nsrv4->nfs4_server_state == NULL) {
1530 		mutex_exit(&nsrv4->state_lock);
1531 		return;
1532 	}
1533 
1534 	/* destroy server instances and current instance ptr */
1535 	rfs4_servinst_destroy_all(nsrv4);
1536 
1537 	/* reset the "first NFSv4 request" status */
1538 	nsrv4->seen_first_compound = 0;
1539 
1540 	dbp = nsrv4->nfs4_server_state;
1541 	nsrv4->nfs4_server_state = NULL;
1542 
1543 	rw_destroy(&nsrv4->rfs4_findclient_lock);
1544 
1545 	/* First stop all of the reaper threads in the database */
1546 	rfs4_database_shutdown(dbp);
1547 
1548 	/*
1549 	 * WARNING: There may be consumers of the rfs4 database still
1550 	 * active as we destroy these.  IF that's the case, consider putting
1551 	 * some of their _zone_fini()-like functions into the zsd key as
1552 	 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
1553 	 * maintain some ordering guarantees better that way.
1554 	 */
1555 	/* Now destroy/release the database tables */
1556 	rfs4_database_destroy(dbp);
1557 
1558 	/* Reset the cache timers for next time */
1559 	nsrv4->rfs4_client_cache_time = 0;
1560 	nsrv4->rfs4_openowner_cache_time = 0;
1561 	nsrv4->rfs4_state_cache_time = 0;
1562 	nsrv4->rfs4_lo_state_cache_time = 0;
1563 	nsrv4->rfs4_lockowner_cache_time = 0;
1564 	nsrv4->rfs4_file_cache_time = 0;
1565 	nsrv4->rfs4_deleg_state_cache_time = 0;
1566 
1567 	mutex_exit(&nsrv4->state_lock);
1568 }
1569 
1570 typedef union {
1571 	struct {
1572 		uint32_t start_time;
1573 		uint32_t c_id;
1574 	} impl_id;
1575 	clientid4 id4;
1576 } cid;
1577 
1578 static int foreign_stateid(stateid_t *id);
1579 static int foreign_clientid(cid *cidp);
1580 static void embed_nodeid(cid *cidp);
1581 
1582 typedef union {
1583 	struct {
1584 		uint32_t c_id;
1585 		uint32_t gen_num;
1586 	} cv_impl;
1587 	verifier4	confirm_verf;
1588 } scid_confirm_verf;
1589 
1590 static uint32_t
1591 clientid_hash(void *key)
1592 {
1593 	cid *idp = key;
1594 
1595 	return (idp->impl_id.c_id);
1596 }
1597 
1598 static bool_t
1599 clientid_compare(rfs4_entry_t entry, void *key)
1600 {
1601 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1602 	clientid4 *idp = key;
1603 
1604 	return (*idp == cp->rc_clientid);
1605 }
1606 
1607 static void *
1608 clientid_mkkey(rfs4_entry_t entry)
1609 {
1610 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1611 
1612 	return (&cp->rc_clientid);
1613 }
1614 
1615 static uint32_t
1616 nfsclnt_hash(void *key)
1617 {
1618 	nfs_client_id4 *client = key;
1619 	int i;
1620 	uint32_t hash = 0;
1621 
1622 	for (i = 0; i < client->id_len; i++) {
1623 		hash <<= 1;
1624 		hash += (uint_t)client->id_val[i];
1625 	}
1626 	return (hash);
1627 }
1628 
1629 
1630 static bool_t
1631 nfsclnt_compare(rfs4_entry_t entry, void *key)
1632 {
1633 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1634 	nfs_client_id4 *nfs_client = key;
1635 
1636 	if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1637 		return (FALSE);
1638 
1639 	return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1640 	    nfs_client->id_len) == 0);
1641 }
1642 
1643 static void *
1644 nfsclnt_mkkey(rfs4_entry_t entry)
1645 {
1646 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1647 
1648 	return (&cp->rc_nfs_client);
1649 }
1650 
1651 static bool_t
1652 rfs4_client_expiry(rfs4_entry_t u_entry)
1653 {
1654 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1655 	bool_t cp_expired;
1656 
1657 	if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1658 		cp->rc_ss_remove = 1;
1659 		return (TRUE);
1660 	}
1661 	/*
1662 	 * If the sysadmin has used clear_locks for this
1663 	 * entry then forced_expire will be set and we
1664 	 * want this entry to be reaped. Or the entry
1665 	 * has exceeded its lease period.
1666 	 */
1667 	cp_expired = (cp->rc_forced_expire ||
1668 	    (gethrestime_sec() - cp->rc_last_access
1669 	    > rfs4_lease_time));
1670 
1671 	if (!cp->rc_ss_remove && cp_expired)
1672 		cp->rc_ss_remove = 1;
1673 	return (cp_expired);
1674 }
1675 
1676 /*
1677  * Remove the leaf file from all distributed stable storage paths.
1678  */
1679 static void
1680 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1681 {
1682 	nfs4_srv_t *nsrv4;
1683 	rfs4_servinst_t *sip;
1684 	char *leaf = cp->rc_ss_pn->leaf;
1685 
1686 	/*
1687 	 * since the state files are written to all DSS
1688 	 * paths we must remove this leaf file instance
1689 	 * from all server instances.
1690 	 */
1691 
1692 	nsrv4 = nfs4_get_srv();
1693 	mutex_enter(&nsrv4->servinst_lock);
1694 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1695 		/* remove the leaf file associated with this server instance */
1696 		rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1697 	}
1698 	mutex_exit(&nsrv4->servinst_lock);
1699 }
1700 
1701 static void
1702 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1703 {
1704 	int i, npaths = sip->dss_npaths;
1705 
1706 	for (i = 0; i < npaths; i++) {
1707 		rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1708 		char *path, *dir;
1709 		size_t pathlen;
1710 
1711 		/* the HA-NFSv4 path might have been failed-over away from us */
1712 		if (dss_path == NULL)
1713 			continue;
1714 
1715 		dir = dss_path->path;
1716 
1717 		/* allow 3 extra bytes for two '/' & a NUL */
1718 		pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1719 		path = kmem_alloc(pathlen, KM_SLEEP);
1720 		(void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1721 
1722 		(void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1723 
1724 		kmem_free(path, pathlen);
1725 	}
1726 }
1727 
1728 static void
1729 rfs4_client_destroy(rfs4_entry_t u_entry)
1730 {
1731 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1732 
1733 	mutex_destroy(cp->rc_cbinfo.cb_lock);
1734 	cv_destroy(cp->rc_cbinfo.cb_cv);
1735 	cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1736 	list_destroy(&cp->rc_openownerlist);
1737 
1738 	/* free callback info */
1739 	rfs4_cbinfo_free(&cp->rc_cbinfo);
1740 
1741 	if (cp->rc_cp_confirmed)
1742 		rfs4_client_rele(cp->rc_cp_confirmed);
1743 
1744 	if (cp->rc_ss_pn) {
1745 		/* check if the stable storage files need to be removed */
1746 		if (cp->rc_ss_remove)
1747 			rfs4_dss_remove_cpleaf(cp);
1748 		rfs4_ss_pnfree(cp->rc_ss_pn);
1749 	}
1750 
1751 	/* Free the client supplied client id */
1752 	kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1753 
1754 	if (cp->rc_sysidt != LM_NOSYSID)
1755 		lm_free_sysidt(cp->rc_sysidt);
1756 }
1757 
1758 static bool_t
1759 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1760 {
1761 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1762 	nfs_client_id4 *client = (nfs_client_id4 *)arg;
1763 	struct sockaddr *ca;
1764 	cid *cidp;
1765 	scid_confirm_verf *scvp;
1766 	nfs4_srv_t *nsrv4;
1767 
1768 	nsrv4 = nfs4_get_srv();
1769 
1770 	/* Get a clientid to give to the client */
1771 	cidp = (cid *)&cp->rc_clientid;
1772 	cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1773 	cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1774 
1775 	/* If we are booted as a cluster node, embed our nodeid */
1776 	if (cluster_bootflags & CLUSTER_BOOTED)
1777 		embed_nodeid(cidp);
1778 
1779 	/* Allocate and copy client's client id value */
1780 	cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1781 	cp->rc_nfs_client.id_len = client->id_len;
1782 	bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1783 	cp->rc_nfs_client.verifier = client->verifier;
1784 
1785 	/* Copy client's IP address */
1786 	ca = client->cl_addr;
1787 	if (ca->sa_family == AF_INET)
1788 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1789 	else if (ca->sa_family == AF_INET6)
1790 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1791 	cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1792 
1793 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1794 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1795 	scvp->cv_impl.c_id = cidp->impl_id.c_id;
1796 	scvp->cv_impl.gen_num = 0;
1797 
1798 	/* An F_UNLKSYS has been done for this client */
1799 	cp->rc_unlksys_completed = FALSE;
1800 
1801 	/* We need the client to ack us */
1802 	cp->rc_need_confirm = TRUE;
1803 	cp->rc_cp_confirmed = NULL;
1804 
1805 	/* TRUE all the time until the callback path actually fails */
1806 	cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1807 
1808 	/* Initialize the access time to now */
1809 	cp->rc_last_access = gethrestime_sec();
1810 
1811 	cp->rc_cr_set = NULL;
1812 
1813 	cp->rc_sysidt = LM_NOSYSID;
1814 
1815 	list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1816 	    offsetof(rfs4_openowner_t, ro_node));
1817 
1818 	/* set up the callback control structure */
1819 	cp->rc_cbinfo.cb_state = CB_UNINIT;
1820 	mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1821 	cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1822 	cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1823 
1824 	/*
1825 	 * Associate the client_t with the current server instance.
1826 	 * The hold is solely to satisfy the calling requirement of
1827 	 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1828 	 */
1829 	rfs4_dbe_hold(cp->rc_dbe);
1830 	rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1831 	rfs4_dbe_rele(cp->rc_dbe);
1832 
1833 	return (TRUE);
1834 }
1835 
1836 /*
1837  * Caller wants to generate/update the setclientid_confirm verifier
1838  * associated with a client.  This is done during the SETCLIENTID
1839  * processing.
1840  */
1841 void
1842 rfs4_client_scv_next(rfs4_client_t *cp)
1843 {
1844 	scid_confirm_verf *scvp;
1845 
1846 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1847 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1848 	scvp->cv_impl.gen_num++;
1849 }
1850 
1851 void
1852 rfs4_client_rele(rfs4_client_t *cp)
1853 {
1854 	rfs4_dbe_rele(cp->rc_dbe);
1855 }
1856 
1857 rfs4_client_t *
1858 rfs4_findclient(nfs_client_id4 *client, bool_t *create,	rfs4_client_t *oldcp)
1859 {
1860 	rfs4_client_t *cp;
1861 	nfs4_srv_t *nsrv4;
1862 	nsrv4 = nfs4_get_srv();
1863 
1864 
1865 	if (oldcp) {
1866 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1867 		rfs4_dbe_hide(oldcp->rc_dbe);
1868 	} else {
1869 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1870 	}
1871 
1872 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1873 	    create, (void *)client, RFS4_DBS_VALID);
1874 
1875 	if (oldcp)
1876 		rfs4_dbe_unhide(oldcp->rc_dbe);
1877 
1878 	rw_exit(&nsrv4->rfs4_findclient_lock);
1879 
1880 	return (cp);
1881 }
1882 
1883 rfs4_client_t *
1884 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1885 {
1886 	rfs4_client_t *cp;
1887 	bool_t create = FALSE;
1888 	cid *cidp = (cid *)&clientid;
1889 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1890 
1891 	/* If we're a cluster and the nodeid isn't right, short-circuit */
1892 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1893 		return (NULL);
1894 
1895 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1896 
1897 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1898 	    &create, NULL, RFS4_DBS_VALID);
1899 
1900 	rw_exit(&nsrv4->rfs4_findclient_lock);
1901 
1902 	if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1903 		rfs4_client_rele(cp);
1904 		return (NULL);
1905 	} else {
1906 		return (cp);
1907 	}
1908 }
1909 
1910 static uint32_t
1911 clntip_hash(void *key)
1912 {
1913 	struct sockaddr *addr = key;
1914 	int i, len = 0;
1915 	uint32_t hash = 0;
1916 	char *ptr;
1917 
1918 	if (addr->sa_family == AF_INET) {
1919 		struct sockaddr_in *a = (struct sockaddr_in *)addr;
1920 		len = sizeof (struct in_addr);
1921 		ptr = (char *)&a->sin_addr;
1922 	} else if (addr->sa_family == AF_INET6) {
1923 		struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1924 		len = sizeof (struct in6_addr);
1925 		ptr = (char *)&a->sin6_addr;
1926 	} else
1927 		return (0);
1928 
1929 	for (i = 0; i < len; i++) {
1930 		hash <<= 1;
1931 		hash += (uint_t)ptr[i];
1932 	}
1933 	return (hash);
1934 }
1935 
1936 static bool_t
1937 clntip_compare(rfs4_entry_t entry, void *key)
1938 {
1939 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1940 	struct sockaddr *addr = key;
1941 	int len = 0;
1942 	char *p1, *p2;
1943 
1944 	if (addr->sa_family == AF_INET) {
1945 		struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1946 		struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1947 		len = sizeof (struct in_addr);
1948 		p1 = (char *)&a1->sin_addr;
1949 		p2 = (char *)&a2->sin_addr;
1950 	} else if (addr->sa_family == AF_INET6) {
1951 		struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1952 		struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1953 		len = sizeof (struct in6_addr);
1954 		p1 = (char *)&a1->sin6_addr;
1955 		p2 = (char *)&a2->sin6_addr;
1956 	} else
1957 		return (0);
1958 
1959 	return (bcmp(p1, p2, len) == 0);
1960 }
1961 
1962 static void *
1963 clntip_mkkey(rfs4_entry_t entry)
1964 {
1965 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1966 
1967 	return (&cp->ri_addr);
1968 }
1969 
1970 static bool_t
1971 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1972 {
1973 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1974 
1975 	if (rfs4_dbe_is_invalid(cp->ri_dbe))
1976 		return (TRUE);
1977 	return (FALSE);
1978 }
1979 
1980 /* ARGSUSED */
1981 static void
1982 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1983 {
1984 }
1985 
1986 static bool_t
1987 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1988 {
1989 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1990 	struct sockaddr *ca = (struct sockaddr *)arg;
1991 
1992 	/* Copy client's IP address */
1993 	if (ca->sa_family == AF_INET)
1994 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1995 	else if (ca->sa_family == AF_INET6)
1996 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1997 	else
1998 		return (FALSE);
1999 	cp->ri_no_referrals = 1;
2000 
2001 	return (TRUE);
2002 }
2003 
2004 rfs4_clntip_t *
2005 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
2006 {
2007 	rfs4_clntip_t *cp;
2008 	nfs4_srv_t *nsrv4;
2009 
2010 	nsrv4 = nfs4_get_srv();
2011 
2012 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2013 
2014 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2015 	    create, addr, RFS4_DBS_VALID);
2016 
2017 	rw_exit(&nsrv4->rfs4_findclient_lock);
2018 
2019 	return (cp);
2020 }
2021 
2022 void
2023 rfs4_invalidate_clntip(struct sockaddr *addr)
2024 {
2025 	rfs4_clntip_t *cp;
2026 	bool_t create = FALSE;
2027 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2028 
2029 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2030 
2031 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2032 	    &create, NULL, RFS4_DBS_VALID);
2033 	if (cp == NULL) {
2034 		rw_exit(&nsrv4->rfs4_findclient_lock);
2035 		return;
2036 	}
2037 	rfs4_dbe_invalidate(cp->ri_dbe);
2038 	rfs4_dbe_rele(cp->ri_dbe);
2039 
2040 	rw_exit(&nsrv4->rfs4_findclient_lock);
2041 }
2042 
2043 bool_t
2044 rfs4_lease_expired(rfs4_client_t *cp)
2045 {
2046 	bool_t rc;
2047 
2048 	rfs4_dbe_lock(cp->rc_dbe);
2049 
2050 	/*
2051 	 * If the admin has executed clear_locks for this
2052 	 * client id, force expire will be set, so no need
2053 	 * to calculate anything because it's "outa here".
2054 	 */
2055 	if (cp->rc_forced_expire) {
2056 		rc = TRUE;
2057 	} else {
2058 		rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2059 	}
2060 
2061 	/*
2062 	 * If the lease has expired we will also want
2063 	 * to remove any stable storage state data. So
2064 	 * mark the client id accordingly.
2065 	 */
2066 	if (!cp->rc_ss_remove)
2067 		cp->rc_ss_remove = (rc == TRUE);
2068 
2069 	rfs4_dbe_unlock(cp->rc_dbe);
2070 
2071 	return (rc);
2072 }
2073 
2074 void
2075 rfs4_update_lease(rfs4_client_t *cp)
2076 {
2077 	rfs4_dbe_lock(cp->rc_dbe);
2078 	if (!cp->rc_forced_expire)
2079 		cp->rc_last_access = gethrestime_sec();
2080 	rfs4_dbe_unlock(cp->rc_dbe);
2081 }
2082 
2083 
2084 static bool_t
2085 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2086 {
2087 	bool_t rc;
2088 
2089 	if (a->clientid != b->clientid)
2090 		return (FALSE);
2091 
2092 	if (a->owner_len != b->owner_len)
2093 		return (FALSE);
2094 
2095 	rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2096 
2097 	return (rc);
2098 }
2099 
2100 static uint_t
2101 openowner_hash(void *key)
2102 {
2103 	int i;
2104 	open_owner4 *openowner = key;
2105 	uint_t hash = 0;
2106 
2107 	for (i = 0; i < openowner->owner_len; i++) {
2108 		hash <<= 4;
2109 		hash += (uint_t)openowner->owner_val[i];
2110 	}
2111 	hash += (uint_t)openowner->clientid;
2112 	hash |= (openowner->clientid >> 32);
2113 
2114 	return (hash);
2115 }
2116 
2117 static bool_t
2118 openowner_compare(rfs4_entry_t u_entry, void *key)
2119 {
2120 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2121 	open_owner4 *arg = key;
2122 
2123 	return (EQOPENOWNER(&oo->ro_owner, arg));
2124 }
2125 
2126 void *
2127 openowner_mkkey(rfs4_entry_t u_entry)
2128 {
2129 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2130 
2131 	return (&oo->ro_owner);
2132 }
2133 
2134 /* ARGSUSED */
2135 static bool_t
2136 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2137 {
2138 	/* openstateid held us and did all needed delay */
2139 	return (TRUE);
2140 }
2141 
2142 static void
2143 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2144 {
2145 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2146 
2147 	/* Remove open owner from client's lists of open owners */
2148 	rfs4_dbe_lock(oo->ro_client->rc_dbe);
2149 	list_remove(&oo->ro_client->rc_openownerlist, oo);
2150 	rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2151 
2152 	/* One less reference to the client */
2153 	rfs4_client_rele(oo->ro_client);
2154 	oo->ro_client = NULL;
2155 
2156 	/* Free the last reply for this lock owner */
2157 	rfs4_free_reply(&oo->ro_reply);
2158 
2159 	if (oo->ro_reply_fh.nfs_fh4_val) {
2160 		kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2161 		    oo->ro_reply_fh.nfs_fh4_len);
2162 		oo->ro_reply_fh.nfs_fh4_val = NULL;
2163 		oo->ro_reply_fh.nfs_fh4_len = 0;
2164 	}
2165 
2166 	rfs4_sw_destroy(&oo->ro_sw);
2167 	list_destroy(&oo->ro_statelist);
2168 
2169 	/* Free the lock owner id */
2170 	kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2171 }
2172 
2173 void
2174 rfs4_openowner_rele(rfs4_openowner_t *oo)
2175 {
2176 	rfs4_dbe_rele(oo->ro_dbe);
2177 }
2178 
2179 static bool_t
2180 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2181 {
2182 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2183 	rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2184 	open_owner4 *openowner = &argp->ro_owner;
2185 	seqid4 seqid = argp->ro_open_seqid;
2186 	rfs4_client_t *cp;
2187 	bool_t create = FALSE;
2188 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2189 
2190 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2191 
2192 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2193 	    &openowner->clientid,
2194 	    &create, NULL, RFS4_DBS_VALID);
2195 
2196 	rw_exit(&nsrv4->rfs4_findclient_lock);
2197 
2198 	if (cp == NULL)
2199 		return (FALSE);
2200 
2201 	oo->ro_reply_fh.nfs_fh4_len = 0;
2202 	oo->ro_reply_fh.nfs_fh4_val = NULL;
2203 
2204 	oo->ro_owner.clientid = openowner->clientid;
2205 	oo->ro_owner.owner_val =
2206 	    kmem_alloc(openowner->owner_len, KM_SLEEP);
2207 
2208 	bcopy(openowner->owner_val,
2209 	    oo->ro_owner.owner_val, openowner->owner_len);
2210 
2211 	oo->ro_owner.owner_len = openowner->owner_len;
2212 
2213 	oo->ro_need_confirm = TRUE;
2214 
2215 	rfs4_sw_init(&oo->ro_sw);
2216 
2217 	oo->ro_open_seqid = seqid;
2218 	bzero(&oo->ro_reply, sizeof (nfs_resop4));
2219 	oo->ro_client = cp;
2220 	oo->ro_cr_set = NULL;
2221 
2222 	list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2223 	    offsetof(rfs4_state_t, rs_node));
2224 
2225 	/* Insert openowner into client's open owner list */
2226 	rfs4_dbe_lock(cp->rc_dbe);
2227 	list_insert_tail(&cp->rc_openownerlist, oo);
2228 	rfs4_dbe_unlock(cp->rc_dbe);
2229 
2230 	return (TRUE);
2231 }
2232 
2233 rfs4_openowner_t *
2234 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2235 {
2236 	rfs4_openowner_t *oo;
2237 	rfs4_openowner_t arg;
2238 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2239 
2240 	arg.ro_owner = *openowner;
2241 	arg.ro_open_seqid = seqid;
2242 	/* CSTYLED */
2243 	oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2244 	    create, &arg, RFS4_DBS_VALID);
2245 
2246 	return (oo);
2247 }
2248 
2249 void
2250 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2251 {
2252 
2253 	rfs4_dbe_lock(oo->ro_dbe);
2254 
2255 	oo->ro_open_seqid++;
2256 
2257 	rfs4_dbe_unlock(oo->ro_dbe);
2258 }
2259 
2260 void
2261 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2262 {
2263 
2264 	rfs4_dbe_lock(oo->ro_dbe);
2265 
2266 	rfs4_free_reply(&oo->ro_reply);
2267 
2268 	rfs4_copy_reply(&oo->ro_reply, resp);
2269 
2270 	/* Save the filehandle if provided and free if not used */
2271 	if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2272 	    fh && fh->nfs_fh4_len) {
2273 		if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2274 			oo->ro_reply_fh.nfs_fh4_val =
2275 			    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2276 		nfs_fh4_copy(fh, &oo->ro_reply_fh);
2277 	} else {
2278 		if (oo->ro_reply_fh.nfs_fh4_val) {
2279 			kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2280 			    oo->ro_reply_fh.nfs_fh4_len);
2281 			oo->ro_reply_fh.nfs_fh4_val = NULL;
2282 			oo->ro_reply_fh.nfs_fh4_len = 0;
2283 		}
2284 	}
2285 
2286 	rfs4_dbe_unlock(oo->ro_dbe);
2287 }
2288 
2289 static bool_t
2290 lockowner_compare(rfs4_entry_t u_entry, void *key)
2291 {
2292 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2293 	lock_owner4 *b = (lock_owner4 *)key;
2294 
2295 	if (lo->rl_owner.clientid != b->clientid)
2296 		return (FALSE);
2297 
2298 	if (lo->rl_owner.owner_len != b->owner_len)
2299 		return (FALSE);
2300 
2301 	return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2302 	    lo->rl_owner.owner_len) == 0);
2303 }
2304 
2305 void *
2306 lockowner_mkkey(rfs4_entry_t u_entry)
2307 {
2308 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2309 
2310 	return (&lo->rl_owner);
2311 }
2312 
2313 static uint32_t
2314 lockowner_hash(void *key)
2315 {
2316 	int i;
2317 	lock_owner4 *lockowner = key;
2318 	uint_t hash = 0;
2319 
2320 	for (i = 0; i < lockowner->owner_len; i++) {
2321 		hash <<= 4;
2322 		hash += (uint_t)lockowner->owner_val[i];
2323 	}
2324 	hash += (uint_t)lockowner->clientid;
2325 	hash |= (lockowner->clientid >> 32);
2326 
2327 	return (hash);
2328 }
2329 
2330 static uint32_t
2331 pid_hash(void *key)
2332 {
2333 	return ((uint32_t)(uintptr_t)key);
2334 }
2335 
2336 static void *
2337 pid_mkkey(rfs4_entry_t u_entry)
2338 {
2339 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2340 
2341 	return ((void *)(uintptr_t)lo->rl_pid);
2342 }
2343 
2344 static bool_t
2345 pid_compare(rfs4_entry_t u_entry, void *key)
2346 {
2347 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2348 
2349 	return (lo->rl_pid == (pid_t)(uintptr_t)key);
2350 }
2351 
2352 static void
2353 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2354 {
2355 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2356 
2357 	/* Free the lock owner id */
2358 	kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2359 	rfs4_client_rele(lo->rl_client);
2360 }
2361 
2362 void
2363 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2364 {
2365 	rfs4_dbe_rele(lo->rl_dbe);
2366 }
2367 
2368 /* ARGSUSED */
2369 static bool_t
2370 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2371 {
2372 	/*
2373 	 * Since expiry is called with no other references on
2374 	 * this struct, go ahead and have it removed.
2375 	 */
2376 	return (TRUE);
2377 }
2378 
2379 static bool_t
2380 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2381 {
2382 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2383 	lock_owner4 *lockowner = (lock_owner4 *)arg;
2384 	rfs4_client_t *cp;
2385 	bool_t create = FALSE;
2386 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2387 
2388 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2389 
2390 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2391 	    &lockowner->clientid,
2392 	    &create, NULL, RFS4_DBS_VALID);
2393 
2394 	rw_exit(&nsrv4->rfs4_findclient_lock);
2395 
2396 	if (cp == NULL)
2397 		return (FALSE);
2398 
2399 	/* Reference client */
2400 	lo->rl_client = cp;
2401 	lo->rl_owner.clientid = lockowner->clientid;
2402 	lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2403 	bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2404 	    lockowner->owner_len);
2405 	lo->rl_owner.owner_len = lockowner->owner_len;
2406 	lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2407 
2408 	return (TRUE);
2409 }
2410 
2411 rfs4_lockowner_t *
2412 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2413 {
2414 	rfs4_lockowner_t *lo;
2415 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2416 
2417 	/* CSTYLED */
2418 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2419 	    create, lockowner, RFS4_DBS_VALID);
2420 
2421 	return (lo);
2422 }
2423 
2424 rfs4_lockowner_t *
2425 rfs4_findlockowner_by_pid(pid_t pid)
2426 {
2427 	rfs4_lockowner_t *lo;
2428 	bool_t create = FALSE;
2429 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2430 
2431 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2432 	    (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2433 
2434 	return (lo);
2435 }
2436 
2437 
2438 static uint32_t
2439 file_hash(void *key)
2440 {
2441 	return (ADDRHASH(key));
2442 }
2443 
2444 static void *
2445 file_mkkey(rfs4_entry_t u_entry)
2446 {
2447 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2448 
2449 	return (fp->rf_vp);
2450 }
2451 
2452 static bool_t
2453 file_compare(rfs4_entry_t u_entry, void *key)
2454 {
2455 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2456 
2457 	return (fp->rf_vp == (vnode_t *)key);
2458 }
2459 
2460 static void
2461 rfs4_file_destroy(rfs4_entry_t u_entry)
2462 {
2463 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2464 
2465 	list_destroy(&fp->rf_delegstatelist);
2466 
2467 	if (fp->rf_filehandle.nfs_fh4_val)
2468 		kmem_free(fp->rf_filehandle.nfs_fh4_val,
2469 		    fp->rf_filehandle.nfs_fh4_len);
2470 	cv_destroy(fp->rf_dinfo.rd_recall_cv);
2471 	if (fp->rf_vp) {
2472 		vnode_t *vp = fp->rf_vp;
2473 
2474 		mutex_enter(&vp->v_vsd_lock);
2475 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
2476 		mutex_exit(&vp->v_vsd_lock);
2477 		VN_RELE(vp);
2478 		fp->rf_vp = NULL;
2479 	}
2480 	rw_destroy(&fp->rf_file_rwlock);
2481 }
2482 
2483 /*
2484  * Used to unlock the underlying dbe struct only
2485  */
2486 void
2487 rfs4_file_rele(rfs4_file_t *fp)
2488 {
2489 	rfs4_dbe_rele(fp->rf_dbe);
2490 }
2491 
2492 typedef struct {
2493     vnode_t *vp;
2494     nfs_fh4 *fh;
2495 } rfs4_fcreate_arg;
2496 
2497 static bool_t
2498 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2499 {
2500 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2501 	rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2502 	vnode_t *vp = ap->vp;
2503 	nfs_fh4 *fh = ap->fh;
2504 
2505 	VN_HOLD(vp);
2506 
2507 	fp->rf_filehandle.nfs_fh4_len = 0;
2508 	fp->rf_filehandle.nfs_fh4_val = NULL;
2509 	ASSERT(fh && fh->nfs_fh4_len);
2510 	if (fh && fh->nfs_fh4_len) {
2511 		fp->rf_filehandle.nfs_fh4_val =
2512 		    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2513 		nfs_fh4_copy(fh, &fp->rf_filehandle);
2514 	}
2515 	fp->rf_vp = vp;
2516 
2517 	list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2518 	    offsetof(rfs4_deleg_state_t, rds_node));
2519 
2520 	fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2521 	fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2522 
2523 	mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2524 	cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2525 
2526 	fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2527 
2528 	rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2529 
2530 	mutex_enter(&vp->v_vsd_lock);
2531 	VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2532 	mutex_exit(&vp->v_vsd_lock);
2533 
2534 	return (TRUE);
2535 }
2536 
2537 rfs4_file_t *
2538 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2539 {
2540 	rfs4_file_t *fp;
2541 	rfs4_fcreate_arg arg;
2542 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2543 
2544 	arg.vp = vp;
2545 	arg.fh = fh;
2546 
2547 	if (*create == TRUE)
2548 		/* CSTYLED */
2549 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2550 		    &arg, RFS4_DBS_VALID);
2551 	else {
2552 		mutex_enter(&vp->v_vsd_lock);
2553 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2554 		if (fp) {
2555 			rfs4_dbe_lock(fp->rf_dbe);
2556 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2557 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2558 				rfs4_dbe_unlock(fp->rf_dbe);
2559 				fp = NULL;
2560 			} else {
2561 				rfs4_dbe_hold(fp->rf_dbe);
2562 				rfs4_dbe_unlock(fp->rf_dbe);
2563 			}
2564 		}
2565 		mutex_exit(&vp->v_vsd_lock);
2566 	}
2567 	return (fp);
2568 }
2569 
2570 /*
2571  * Find a file in the db and once it is located, take the rw lock.
2572  * Need to check the vnode pointer and if it does not exist (it was
2573  * removed between the db location and check) redo the find.  This
2574  * assumes that a file struct that has a NULL vnode pointer is marked
2575  * at 'invalid' and will not be found in the db the second time
2576  * around.
2577  */
2578 rfs4_file_t *
2579 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2580 {
2581 	rfs4_file_t *fp;
2582 	rfs4_fcreate_arg arg;
2583 	bool_t screate = *create;
2584 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2585 
2586 	if (screate == FALSE) {
2587 		mutex_enter(&vp->v_vsd_lock);
2588 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2589 		if (fp) {
2590 			rfs4_dbe_lock(fp->rf_dbe);
2591 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2592 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2593 				rfs4_dbe_unlock(fp->rf_dbe);
2594 				mutex_exit(&vp->v_vsd_lock);
2595 				fp = NULL;
2596 			} else {
2597 				rfs4_dbe_hold(fp->rf_dbe);
2598 				rfs4_dbe_unlock(fp->rf_dbe);
2599 				mutex_exit(&vp->v_vsd_lock);
2600 				rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2601 				if (fp->rf_vp == NULL) {
2602 					rw_exit(&fp->rf_file_rwlock);
2603 					rfs4_file_rele(fp);
2604 					fp = NULL;
2605 				}
2606 			}
2607 		} else {
2608 			mutex_exit(&vp->v_vsd_lock);
2609 		}
2610 	} else {
2611 retry:
2612 		arg.vp = vp;
2613 		arg.fh = fh;
2614 
2615 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2616 		    create, &arg, RFS4_DBS_VALID);
2617 		if (fp != NULL) {
2618 			rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2619 			if (fp->rf_vp == NULL) {
2620 				rw_exit(&fp->rf_file_rwlock);
2621 				rfs4_file_rele(fp);
2622 				*create = screate;
2623 				goto retry;
2624 			}
2625 		}
2626 	}
2627 
2628 	return (fp);
2629 }
2630 
2631 static uint32_t
2632 lo_state_hash(void *key)
2633 {
2634 	stateid_t *id = key;
2635 
2636 	return (id->bits.ident+id->bits.pid);
2637 }
2638 
2639 static bool_t
2640 lo_state_compare(rfs4_entry_t u_entry, void *key)
2641 {
2642 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2643 	stateid_t *id = key;
2644 	bool_t rc;
2645 
2646 	rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2647 	    lsp->rls_lockid.bits.type == id->bits.type &&
2648 	    lsp->rls_lockid.bits.ident == id->bits.ident &&
2649 	    lsp->rls_lockid.bits.pid == id->bits.pid);
2650 
2651 	return (rc);
2652 }
2653 
2654 static void *
2655 lo_state_mkkey(rfs4_entry_t u_entry)
2656 {
2657 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2658 
2659 	return (&lsp->rls_lockid);
2660 }
2661 
2662 static bool_t
2663 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2664 {
2665 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2666 
2667 	if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2668 		return (TRUE);
2669 	if (lsp->rls_state->rs_closed)
2670 		return (TRUE);
2671 	return ((gethrestime_sec() -
2672 	    lsp->rls_state->rs_owner->ro_client->rc_last_access
2673 	    > rfs4_lease_time));
2674 }
2675 
2676 static void
2677 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2678 {
2679 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2680 
2681 	rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2682 	list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2683 	rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2684 
2685 	rfs4_sw_destroy(&lsp->rls_sw);
2686 
2687 	/* Make sure to release the file locks */
2688 	if (lsp->rls_locks_cleaned == FALSE) {
2689 		lsp->rls_locks_cleaned = TRUE;
2690 		if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2691 			/* Is the PxFS kernel module loaded? */
2692 			if (lm_remove_file_locks != NULL) {
2693 				int new_sysid;
2694 
2695 				/* Encode the cluster nodeid in new sysid */
2696 				new_sysid =
2697 				    lsp->rls_locker->rl_client->rc_sysidt;
2698 				lm_set_nlmid_flk(&new_sysid);
2699 
2700 				/*
2701 				 * This PxFS routine removes file locks for a
2702 				 * client over all nodes of a cluster.
2703 				 */
2704 				DTRACE_PROBE1(nfss_i_clust_rm_lck,
2705 				    int, new_sysid);
2706 				(*lm_remove_file_locks)(new_sysid);
2707 			} else {
2708 				(void) cleanlocks(
2709 				    lsp->rls_state->rs_finfo->rf_vp,
2710 				    lsp->rls_locker->rl_pid,
2711 				    lsp->rls_locker->rl_client->rc_sysidt);
2712 			}
2713 		}
2714 	}
2715 
2716 	/* Free the last reply for this state */
2717 	rfs4_free_reply(&lsp->rls_reply);
2718 
2719 	rfs4_lockowner_rele(lsp->rls_locker);
2720 	lsp->rls_locker = NULL;
2721 
2722 	rfs4_state_rele_nounlock(lsp->rls_state);
2723 	lsp->rls_state = NULL;
2724 }
2725 
2726 static bool_t
2727 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2728 {
2729 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2730 	rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2731 	rfs4_lockowner_t *lo = argp->rls_locker;
2732 	rfs4_state_t *sp = argp->rls_state;
2733 
2734 	lsp->rls_state = sp;
2735 
2736 	lsp->rls_lockid = sp->rs_stateid;
2737 	lsp->rls_lockid.bits.type = LOCKID;
2738 	lsp->rls_lockid.bits.chgseq = 0;
2739 	lsp->rls_lockid.bits.pid = lo->rl_pid;
2740 
2741 	lsp->rls_locks_cleaned = FALSE;
2742 	lsp->rls_lock_completed = FALSE;
2743 
2744 	rfs4_sw_init(&lsp->rls_sw);
2745 
2746 	/* Attached the supplied lock owner */
2747 	rfs4_dbe_hold(lo->rl_dbe);
2748 	lsp->rls_locker = lo;
2749 
2750 	rfs4_dbe_lock(sp->rs_dbe);
2751 	list_insert_tail(&sp->rs_lostatelist, lsp);
2752 	rfs4_dbe_hold(sp->rs_dbe);
2753 	rfs4_dbe_unlock(sp->rs_dbe);
2754 
2755 	return (TRUE);
2756 }
2757 
2758 void
2759 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2760 {
2761 	if (unlock_fp == TRUE)
2762 		rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2763 	rfs4_dbe_rele(lsp->rls_dbe);
2764 }
2765 
2766 static rfs4_lo_state_t *
2767 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2768 {
2769 	rfs4_lo_state_t *lsp;
2770 	bool_t create = FALSE;
2771 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2772 
2773 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2774 	    &create, NULL, RFS4_DBS_VALID);
2775 	if (lock_fp == TRUE && lsp != NULL)
2776 		rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2777 
2778 	return (lsp);
2779 }
2780 
2781 
2782 static uint32_t
2783 lo_state_lo_hash(void *key)
2784 {
2785 	rfs4_lo_state_t *lsp = key;
2786 
2787 	return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2788 }
2789 
2790 static bool_t
2791 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2792 {
2793 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2794 	rfs4_lo_state_t *keyp = key;
2795 
2796 	return (keyp->rls_locker == lsp->rls_locker &&
2797 	    keyp->rls_state == lsp->rls_state);
2798 }
2799 
2800 static void *
2801 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2802 {
2803 	return (u_entry);
2804 }
2805 
2806 rfs4_lo_state_t *
2807 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2808     bool_t *create)
2809 {
2810 	rfs4_lo_state_t *lsp;
2811 	rfs4_lo_state_t arg;
2812 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2813 
2814 	arg.rls_locker = lo;
2815 	arg.rls_state = sp;
2816 
2817 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2818 	    &arg, create, &arg, RFS4_DBS_VALID);
2819 
2820 	return (lsp);
2821 }
2822 
2823 static stateid_t
2824 get_stateid(id_t eid)
2825 {
2826 	stateid_t id;
2827 	nfs4_srv_t *nsrv4;
2828 
2829 	nsrv4 = nfs4_get_srv();
2830 
2831 	id.bits.boottime = nsrv4->rfs4_start_time;
2832 	id.bits.ident = eid;
2833 	id.bits.chgseq = 0;
2834 	id.bits.type = 0;
2835 	id.bits.pid = 0;
2836 
2837 	/*
2838 	 * If we are booted as a cluster node, embed our nodeid.
2839 	 * We've already done sanity checks in rfs4_client_create() so no
2840 	 * need to repeat them here.
2841 	 */
2842 	id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2843 	    clconf_get_nodeid() : 0;
2844 
2845 	return (id);
2846 }
2847 
2848 /*
2849  * For use only when booted as a cluster node.
2850  * Returns TRUE if the embedded nodeid indicates that this stateid was
2851  * generated on another node.
2852  */
2853 static int
2854 foreign_stateid(stateid_t *id)
2855 {
2856 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2857 	return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2858 }
2859 
2860 /*
2861  * For use only when booted as a cluster node.
2862  * Returns TRUE if the embedded nodeid indicates that this clientid was
2863  * generated on another node.
2864  */
2865 static int
2866 foreign_clientid(cid *cidp)
2867 {
2868 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2869 	return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2870 	    (uint32_t)clconf_get_nodeid());
2871 }
2872 
2873 /*
2874  * For use only when booted as a cluster node.
2875  * Embed our cluster nodeid into the clientid.
2876  */
2877 static void
2878 embed_nodeid(cid *cidp)
2879 {
2880 	int clnodeid;
2881 	/*
2882 	 * Currently, our state tables are small enough that their
2883 	 * ids will leave enough bits free for the nodeid. If the
2884 	 * tables become larger, we mustn't overwrite the id.
2885 	 * Equally, we only have room for so many bits of nodeid, so
2886 	 * must check that too.
2887 	 */
2888 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2889 	ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2890 	clnodeid = clconf_get_nodeid();
2891 	ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2892 	ASSERT(clnodeid != NODEID_UNKNOWN);
2893 	cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2894 }
2895 
2896 static uint32_t
2897 state_hash(void *key)
2898 {
2899 	stateid_t *ip = (stateid_t *)key;
2900 
2901 	return (ip->bits.ident);
2902 }
2903 
2904 static bool_t
2905 state_compare(rfs4_entry_t u_entry, void *key)
2906 {
2907 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2908 	stateid_t *id = (stateid_t *)key;
2909 	bool_t rc;
2910 
2911 	rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2912 	    sp->rs_stateid.bits.ident == id->bits.ident);
2913 
2914 	return (rc);
2915 }
2916 
2917 static void *
2918 state_mkkey(rfs4_entry_t u_entry)
2919 {
2920 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2921 
2922 	return (&sp->rs_stateid);
2923 }
2924 
2925 static void
2926 rfs4_state_destroy(rfs4_entry_t u_entry)
2927 {
2928 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2929 
2930 	/* remove from openowner list */
2931 	rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2932 	list_remove(&sp->rs_owner->ro_statelist, sp);
2933 	rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2934 
2935 	list_destroy(&sp->rs_lostatelist);
2936 
2937 	/* release any share locks for this stateid if it's still open */
2938 	if (!sp->rs_closed) {
2939 		rfs4_dbe_lock(sp->rs_dbe);
2940 		(void) rfs4_unshare(sp);
2941 		rfs4_dbe_unlock(sp->rs_dbe);
2942 	}
2943 
2944 	/* Were done with the file */
2945 	rfs4_file_rele(sp->rs_finfo);
2946 	sp->rs_finfo = NULL;
2947 
2948 	/* And now with the openowner */
2949 	rfs4_openowner_rele(sp->rs_owner);
2950 	sp->rs_owner = NULL;
2951 }
2952 
2953 static void
2954 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2955 {
2956 	rfs4_dbe_rele(sp->rs_dbe);
2957 }
2958 
2959 void
2960 rfs4_state_rele(rfs4_state_t *sp)
2961 {
2962 	rw_exit(&sp->rs_finfo->rf_file_rwlock);
2963 	rfs4_dbe_rele(sp->rs_dbe);
2964 }
2965 
2966 static uint32_t
2967 deleg_hash(void *key)
2968 {
2969 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2970 
2971 	return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2972 }
2973 
2974 static bool_t
2975 deleg_compare(rfs4_entry_t u_entry, void *key)
2976 {
2977 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2978 	rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2979 
2980 	return (dsp->rds_client == kdsp->rds_client &&
2981 	    dsp->rds_finfo == kdsp->rds_finfo);
2982 }
2983 
2984 static void *
2985 deleg_mkkey(rfs4_entry_t u_entry)
2986 {
2987 	return (u_entry);
2988 }
2989 
2990 static uint32_t
2991 deleg_state_hash(void *key)
2992 {
2993 	stateid_t *ip = (stateid_t *)key;
2994 
2995 	return (ip->bits.ident);
2996 }
2997 
2998 static bool_t
2999 deleg_state_compare(rfs4_entry_t u_entry, void *key)
3000 {
3001 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3002 	stateid_t *id = (stateid_t *)key;
3003 	bool_t rc;
3004 
3005 	if (id->bits.type != DELEGID)
3006 		return (FALSE);
3007 
3008 	rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
3009 	    dsp->rds_delegid.bits.ident == id->bits.ident);
3010 
3011 	return (rc);
3012 }
3013 
3014 static void *
3015 deleg_state_mkkey(rfs4_entry_t u_entry)
3016 {
3017 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3018 
3019 	return (&dsp->rds_delegid);
3020 }
3021 
3022 static bool_t
3023 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3024 {
3025 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3026 
3027 	if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3028 		return (TRUE);
3029 
3030 	if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3031 		return (TRUE);
3032 
3033 	if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3034 	    > rfs4_lease_time)) {
3035 		rfs4_dbe_invalidate(dsp->rds_dbe);
3036 		return (TRUE);
3037 	}
3038 
3039 	return (FALSE);
3040 }
3041 
3042 static bool_t
3043 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3044 {
3045 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3046 	rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3047 	rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3048 
3049 	rfs4_dbe_hold(fp->rf_dbe);
3050 	rfs4_dbe_hold(cp->rc_dbe);
3051 
3052 	dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3053 	dsp->rds_delegid.bits.type = DELEGID;
3054 	dsp->rds_finfo = fp;
3055 	dsp->rds_client = cp;
3056 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
3057 
3058 	dsp->rds_time_granted = gethrestime_sec();	/* observability */
3059 	dsp->rds_time_revoked = 0;
3060 
3061 	list_link_init(&dsp->rds_node);
3062 
3063 	return (TRUE);
3064 }
3065 
3066 static void
3067 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3068 {
3069 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3070 
3071 	/* return delegation if necessary */
3072 	rfs4_return_deleg(dsp, FALSE);
3073 
3074 	/* Were done with the file */
3075 	rfs4_file_rele(dsp->rds_finfo);
3076 	dsp->rds_finfo = NULL;
3077 
3078 	/* And now with the openowner */
3079 	rfs4_client_rele(dsp->rds_client);
3080 	dsp->rds_client = NULL;
3081 }
3082 
3083 rfs4_deleg_state_t *
3084 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3085 {
3086 	rfs4_deleg_state_t ds, *dsp;
3087 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3088 
3089 	ds.rds_client = sp->rs_owner->ro_client;
3090 	ds.rds_finfo = sp->rs_finfo;
3091 
3092 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3093 	    create, &ds, RFS4_DBS_VALID);
3094 
3095 	return (dsp);
3096 }
3097 
3098 rfs4_deleg_state_t *
3099 rfs4_finddelegstate(stateid_t *id)
3100 {
3101 	rfs4_deleg_state_t *dsp;
3102 	bool_t create = FALSE;
3103 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3104 
3105 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3106 	    id, &create, NULL, RFS4_DBS_VALID);
3107 
3108 	return (dsp);
3109 }
3110 
3111 void
3112 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3113 {
3114 	rfs4_dbe_rele(dsp->rds_dbe);
3115 }
3116 
3117 void
3118 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3119 {
3120 
3121 	rfs4_dbe_lock(lsp->rls_dbe);
3122 
3123 	/*
3124 	 * If we are skipping sequence id checking, this means that
3125 	 * this is the first lock request and therefore the sequence
3126 	 * id does not need to be updated.  This only happens on the
3127 	 * first lock request for a lockowner
3128 	 */
3129 	if (!lsp->rls_skip_seqid_check)
3130 		lsp->rls_seqid++;
3131 
3132 	rfs4_dbe_unlock(lsp->rls_dbe);
3133 }
3134 
3135 void
3136 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3137 {
3138 
3139 	rfs4_dbe_lock(lsp->rls_dbe);
3140 
3141 	rfs4_free_reply(&lsp->rls_reply);
3142 
3143 	rfs4_copy_reply(&lsp->rls_reply, resp);
3144 
3145 	rfs4_dbe_unlock(lsp->rls_dbe);
3146 }
3147 
3148 void
3149 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3150     bool_t close_of_client)
3151 {
3152 	rfs4_state_t *sp;
3153 
3154 	rfs4_dbe_lock(oo->ro_dbe);
3155 
3156 	for (sp = list_head(&oo->ro_statelist); sp != NULL;
3157 	    sp = list_next(&oo->ro_statelist, sp)) {
3158 		rfs4_state_close(sp, FALSE, close_of_client, CRED());
3159 		if (invalidate == TRUE)
3160 			rfs4_dbe_invalidate(sp->rs_dbe);
3161 	}
3162 
3163 	rfs4_dbe_invalidate(oo->ro_dbe);
3164 	rfs4_dbe_unlock(oo->ro_dbe);
3165 }
3166 
3167 static uint32_t
3168 state_owner_file_hash(void *key)
3169 {
3170 	rfs4_state_t *sp = key;
3171 
3172 	return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3173 }
3174 
3175 static bool_t
3176 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3177 {
3178 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3179 	rfs4_state_t *arg = key;
3180 
3181 	if (sp->rs_closed == TRUE)
3182 		return (FALSE);
3183 
3184 	return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3185 }
3186 
3187 static void *
3188 state_owner_file_mkkey(rfs4_entry_t u_entry)
3189 {
3190 	return (u_entry);
3191 }
3192 
3193 static uint32_t
3194 state_file_hash(void *key)
3195 {
3196 	return (ADDRHASH(key));
3197 }
3198 
3199 static bool_t
3200 state_file_compare(rfs4_entry_t u_entry, void *key)
3201 {
3202 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3203 	rfs4_file_t *fp = key;
3204 
3205 	if (sp->rs_closed == TRUE)
3206 		return (FALSE);
3207 
3208 	return (fp == sp->rs_finfo);
3209 }
3210 
3211 static void *
3212 state_file_mkkey(rfs4_entry_t u_entry)
3213 {
3214 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3215 
3216 	return (sp->rs_finfo);
3217 }
3218 
3219 rfs4_state_t *
3220 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3221     bool_t *create)
3222 {
3223 	rfs4_state_t *sp;
3224 	rfs4_state_t key;
3225 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3226 
3227 	key.rs_owner = oo;
3228 	key.rs_finfo = fp;
3229 
3230 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3231 	    &key, create, &key, RFS4_DBS_VALID);
3232 
3233 	return (sp);
3234 }
3235 
3236 /* This returns ANY state struct that refers to this file */
3237 static rfs4_state_t *
3238 rfs4_findstate_by_file(rfs4_file_t *fp)
3239 {
3240 	bool_t create = FALSE;
3241 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3242 
3243 	return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3244 	    &create, fp, RFS4_DBS_VALID));
3245 }
3246 
3247 static bool_t
3248 rfs4_state_expiry(rfs4_entry_t u_entry)
3249 {
3250 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3251 
3252 	if (rfs4_dbe_is_invalid(sp->rs_dbe))
3253 		return (TRUE);
3254 
3255 	if (sp->rs_closed == TRUE &&
3256 	    ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3257 	    > rfs4_lease_time))
3258 		return (TRUE);
3259 
3260 	return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3261 	    > rfs4_lease_time));
3262 }
3263 
3264 static bool_t
3265 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3266 {
3267 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3268 	rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3269 	rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3270 
3271 	rfs4_dbe_hold(fp->rf_dbe);
3272 	rfs4_dbe_hold(oo->ro_dbe);
3273 	sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3274 	sp->rs_stateid.bits.type = OPENID;
3275 	sp->rs_owner = oo;
3276 	sp->rs_finfo = fp;
3277 
3278 	list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3279 	    offsetof(rfs4_lo_state_t, rls_node));
3280 
3281 	/* Insert state on per open owner's list */
3282 	rfs4_dbe_lock(oo->ro_dbe);
3283 	list_insert_tail(&oo->ro_statelist, sp);
3284 	rfs4_dbe_unlock(oo->ro_dbe);
3285 
3286 	return (TRUE);
3287 }
3288 
3289 static rfs4_state_t *
3290 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3291 {
3292 	rfs4_state_t *sp;
3293 	bool_t create = FALSE;
3294 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3295 
3296 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3297 	    &create, NULL, find_invalid);
3298 	if (lock_fp == TRUE && sp != NULL)
3299 		rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3300 
3301 	return (sp);
3302 }
3303 
3304 void
3305 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3306     cred_t *cr)
3307 {
3308 	/* Remove the associated lo_state owners */
3309 	if (!lock_held)
3310 		rfs4_dbe_lock(sp->rs_dbe);
3311 
3312 	/*
3313 	 * If refcnt == 0, the dbe is about to be destroyed.
3314 	 * lock state will be released by the reaper thread.
3315 	 */
3316 
3317 	if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3318 		if (sp->rs_closed == FALSE) {
3319 			rfs4_release_share_lock_state(sp, cr, close_of_client);
3320 			sp->rs_closed = TRUE;
3321 		}
3322 	}
3323 
3324 	if (!lock_held)
3325 		rfs4_dbe_unlock(sp->rs_dbe);
3326 }
3327 
3328 /*
3329  * Remove all state associated with the given client.
3330  */
3331 void
3332 rfs4_client_state_remove(rfs4_client_t *cp)
3333 {
3334 	rfs4_openowner_t *oo;
3335 
3336 	rfs4_dbe_lock(cp->rc_dbe);
3337 
3338 	for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3339 	    oo = list_next(&cp->rc_openownerlist, oo)) {
3340 		rfs4_free_opens(oo, TRUE, TRUE);
3341 	}
3342 
3343 	rfs4_dbe_unlock(cp->rc_dbe);
3344 }
3345 
3346 void
3347 rfs4_client_close(rfs4_client_t *cp)
3348 {
3349 	/* Mark client as going away. */
3350 	rfs4_dbe_lock(cp->rc_dbe);
3351 	rfs4_dbe_invalidate(cp->rc_dbe);
3352 	rfs4_dbe_unlock(cp->rc_dbe);
3353 
3354 	rfs4_client_state_remove(cp);
3355 
3356 	/* Release the client */
3357 	rfs4_client_rele(cp);
3358 }
3359 
3360 nfsstat4
3361 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3362 {
3363 	cid *cidp = (cid *) cp;
3364 	nfs4_srv_t *nsrv4;
3365 
3366 	nsrv4 = nfs4_get_srv();
3367 
3368 	/*
3369 	 * If we are booted as a cluster node, check the embedded nodeid.
3370 	 * If it indicates that this clientid was generated on another node,
3371 	 * inform the client accordingly.
3372 	 */
3373 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3374 		return (NFS4ERR_STALE_CLIENTID);
3375 
3376 	/*
3377 	 * If the server start time matches the time provided
3378 	 * by the client (via the clientid) and this is NOT a
3379 	 * setclientid_confirm then return EXPIRED.
3380 	 */
3381 	if (!setclid_confirm &&
3382 	    cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3383 		return (NFS4ERR_EXPIRED);
3384 
3385 	return (NFS4ERR_STALE_CLIENTID);
3386 }
3387 
3388 /*
3389  * This is used when a stateid has not been found amongst the
3390  * current server's state.  Check the stateid to see if it
3391  * was from this server instantiation or not.
3392  */
3393 static nfsstat4
3394 what_stateid_error(stateid_t *id, stateid_type_t type)
3395 {
3396 	nfs4_srv_t *nsrv4;
3397 
3398 	nsrv4 = nfs4_get_srv();
3399 
3400 	/* If we are booted as a cluster node, was stateid locally generated? */
3401 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3402 		return (NFS4ERR_STALE_STATEID);
3403 
3404 	/* If types don't match then no use checking further */
3405 	if (type != id->bits.type)
3406 		return (NFS4ERR_BAD_STATEID);
3407 
3408 	/* From a different server instantiation, return STALE */
3409 	if (id->bits.boottime != nsrv4->rfs4_start_time)
3410 		return (NFS4ERR_STALE_STATEID);
3411 
3412 	/*
3413 	 * From this server but the state is most likely beyond lease
3414 	 * timeout: return NFS4ERR_EXPIRED.  However, there is the
3415 	 * case of a delegation stateid.  For delegations, there is a
3416 	 * case where the state can be removed without the client's
3417 	 * knowledge/consent: revocation.  In the case of delegation
3418 	 * revocation, the delegation state will be removed and will
3419 	 * not be found.  If the client does something like a
3420 	 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3421 	 * that has been revoked, the server should return BAD_STATEID
3422 	 * instead of the more common EXPIRED error.
3423 	 */
3424 	if (id->bits.boottime == nsrv4->rfs4_start_time) {
3425 		if (type == DELEGID)
3426 			return (NFS4ERR_BAD_STATEID);
3427 		else
3428 			return (NFS4ERR_EXPIRED);
3429 	}
3430 
3431 	return (NFS4ERR_BAD_STATEID);
3432 }
3433 
3434 /*
3435  * Used later on to find the various state structs.  When called from
3436  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3437  * taken (it is not needed) and helps on the read/write path with
3438  * respect to performance.
3439  */
3440 static nfsstat4
3441 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3442     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3443 {
3444 	stateid_t *id = (stateid_t *)stateid;
3445 	rfs4_state_t *sp;
3446 
3447 	*spp = NULL;
3448 
3449 	/* If we are booted as a cluster node, was stateid locally generated? */
3450 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3451 		return (NFS4ERR_STALE_STATEID);
3452 
3453 	sp = rfs4_findstate(id, find_invalid, lock_fp);
3454 	if (sp == NULL) {
3455 		return (what_stateid_error(id, OPENID));
3456 	}
3457 
3458 	if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3459 		if (lock_fp == TRUE)
3460 			rfs4_state_rele(sp);
3461 		else
3462 			rfs4_state_rele_nounlock(sp);
3463 		return (NFS4ERR_EXPIRED);
3464 	}
3465 
3466 	*spp = sp;
3467 
3468 	return (NFS4_OK);
3469 }
3470 
3471 nfsstat4
3472 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3473     rfs4_dbsearch_type_t find_invalid)
3474 {
3475 	return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3476 }
3477 
3478 int
3479 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3480 {
3481 	stateid_t *id = (stateid_t *)stateid;
3482 
3483 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
3484 		return (NFS4_CHECK_STATEID_EXPIRED);
3485 
3486 	/* Stateid is some time in the future - that's bad */
3487 	if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3488 		return (NFS4_CHECK_STATEID_BAD);
3489 
3490 	if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3491 		return (NFS4_CHECK_STATEID_REPLAY);
3492 
3493 	/* Stateid is some time in the past - that's old */
3494 	if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3495 		return (NFS4_CHECK_STATEID_OLD);
3496 
3497 	/* Caller needs to know about confirmation before closure */
3498 	if (sp->rs_owner->ro_need_confirm)
3499 		return (NFS4_CHECK_STATEID_UNCONFIRMED);
3500 
3501 	if (sp->rs_closed == TRUE)
3502 		return (NFS4_CHECK_STATEID_CLOSED);
3503 
3504 	return (NFS4_CHECK_STATEID_OKAY);
3505 }
3506 
3507 int
3508 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3509 {
3510 	stateid_t *id = (stateid_t *)stateid;
3511 
3512 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3513 		return (NFS4_CHECK_STATEID_EXPIRED);
3514 
3515 	/* Stateid is some time in the future - that's bad */
3516 	if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3517 		return (NFS4_CHECK_STATEID_BAD);
3518 
3519 	if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3520 		return (NFS4_CHECK_STATEID_REPLAY);
3521 
3522 	/* Stateid is some time in the past - that's old */
3523 	if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3524 		return (NFS4_CHECK_STATEID_OLD);
3525 
3526 	if (lsp->rls_state->rs_closed == TRUE)
3527 		return (NFS4_CHECK_STATEID_CLOSED);
3528 
3529 	return (NFS4_CHECK_STATEID_OKAY);
3530 }
3531 
3532 nfsstat4
3533 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3534 {
3535 	stateid_t *id = (stateid_t *)stateid;
3536 	rfs4_deleg_state_t *dsp;
3537 
3538 	*dspp = NULL;
3539 
3540 	/* If we are booted as a cluster node, was stateid locally generated? */
3541 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3542 		return (NFS4ERR_STALE_STATEID);
3543 
3544 	dsp = rfs4_finddelegstate(id);
3545 	if (dsp == NULL) {
3546 		return (what_stateid_error(id, DELEGID));
3547 	}
3548 
3549 	if (rfs4_lease_expired(dsp->rds_client)) {
3550 		rfs4_deleg_state_rele(dsp);
3551 		return (NFS4ERR_EXPIRED);
3552 	}
3553 
3554 	*dspp = dsp;
3555 
3556 	return (NFS4_OK);
3557 }
3558 
3559 nfsstat4
3560 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3561 {
3562 	stateid_t *id = (stateid_t *)stateid;
3563 	rfs4_lo_state_t *lsp;
3564 
3565 	*lspp = NULL;
3566 
3567 	/* If we are booted as a cluster node, was stateid locally generated? */
3568 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3569 		return (NFS4ERR_STALE_STATEID);
3570 
3571 	lsp = rfs4_findlo_state(id, lock_fp);
3572 	if (lsp == NULL) {
3573 		return (what_stateid_error(id, LOCKID));
3574 	}
3575 
3576 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3577 		rfs4_lo_state_rele(lsp, lock_fp);
3578 		return (NFS4ERR_EXPIRED);
3579 	}
3580 
3581 	*lspp = lsp;
3582 
3583 	return (NFS4_OK);
3584 }
3585 
3586 static nfsstat4
3587 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3588     rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3589 {
3590 	rfs4_state_t *sp = NULL;
3591 	rfs4_deleg_state_t *dsp = NULL;
3592 	rfs4_lo_state_t *lsp = NULL;
3593 	stateid_t *id;
3594 	nfsstat4 status;
3595 
3596 	*spp = NULL; *dspp = NULL; *lspp = NULL;
3597 
3598 	id = (stateid_t *)sid;
3599 	switch (id->bits.type) {
3600 	case OPENID:
3601 		status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3602 		break;
3603 	case DELEGID:
3604 		status = rfs4_get_deleg_state(sid, &dsp);
3605 		break;
3606 	case LOCKID:
3607 		status = rfs4_get_lo_state(sid, &lsp, FALSE);
3608 		if (status == NFS4_OK) {
3609 			sp = lsp->rls_state;
3610 			rfs4_dbe_hold(sp->rs_dbe);
3611 		}
3612 		break;
3613 	default:
3614 		status = NFS4ERR_BAD_STATEID;
3615 	}
3616 
3617 	if (status == NFS4_OK) {
3618 		*spp = sp;
3619 		*dspp = dsp;
3620 		*lspp = lsp;
3621 	}
3622 
3623 	return (status);
3624 }
3625 
3626 /*
3627  * Given the I/O mode (FREAD or FWRITE), this checks whether the
3628  * rfs4_state_t struct has access to do this operation and if so
3629  * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3630  */
3631 nfsstat4
3632 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3633 {
3634 	nfsstat4 stat = NFS4_OK;
3635 	rfs4_file_t *fp;
3636 	bool_t create = FALSE;
3637 
3638 	rfs4_dbe_lock(sp->rs_dbe);
3639 	if (mode == FWRITE) {
3640 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3641 			stat = NFS4ERR_OPENMODE;
3642 		}
3643 	} else if (mode == FREAD) {
3644 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3645 			/*
3646 			 * If we have OPENed the file with DENYing access
3647 			 * to both READ and WRITE then no one else could
3648 			 * have OPENed the file, hence no conflicting READ
3649 			 * deny.  This check is merely an optimization.
3650 			 */
3651 			if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3652 				goto out;
3653 
3654 			/* Check against file struct's DENY mode */
3655 			fp = rfs4_findfile(vp, NULL, &create);
3656 			if (fp != NULL) {
3657 				int deny_read = 0;
3658 				rfs4_dbe_lock(fp->rf_dbe);
3659 				/*
3660 				 * Check if any other open owner has the file
3661 				 * OPENed with deny READ.
3662 				 */
3663 				if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3664 					deny_read = 1;
3665 				ASSERT(fp->rf_deny_read >= deny_read);
3666 				if (fp->rf_deny_read > deny_read)
3667 					stat = NFS4ERR_OPENMODE;
3668 				rfs4_dbe_unlock(fp->rf_dbe);
3669 				rfs4_file_rele(fp);
3670 			}
3671 		}
3672 	} else {
3673 		/* Illegal I/O mode */
3674 		stat = NFS4ERR_INVAL;
3675 	}
3676 out:
3677 	rfs4_dbe_unlock(sp->rs_dbe);
3678 	return (stat);
3679 }
3680 
3681 /*
3682  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3683  * the file is being truncated, return NFS4_OK if allowed or appropriate
3684  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3685  * the associated file will be done if the I/O is not consistent with any
3686  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3687  * as reader or writer as appropriate. rfs4_op_open will acquire the
3688  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3689  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3690  * deleg parameter, we will return whether a write delegation is held by
3691  * the client associated with this stateid.
3692  * If the server instance associated with the relevant client is in its
3693  * grace period, return NFS4ERR_GRACE.
3694  */
3695 
3696 nfsstat4
3697 rfs4_check_stateid(int mode, vnode_t *vp,
3698     stateid4 *stateid, bool_t trunc, bool_t *deleg,
3699     bool_t do_access, caller_context_t *ct)
3700 {
3701 	rfs4_file_t *fp;
3702 	bool_t create = FALSE;
3703 	rfs4_state_t *sp;
3704 	rfs4_deleg_state_t *dsp;
3705 	rfs4_lo_state_t *lsp;
3706 	stateid_t *id = (stateid_t *)stateid;
3707 	nfsstat4 stat = NFS4_OK;
3708 
3709 	if (ct != NULL) {
3710 		ct->cc_sysid = 0;
3711 		ct->cc_pid = 0;
3712 		ct->cc_caller_id = nfs4_srv_caller_id;
3713 		ct->cc_flags = CC_DONTBLOCK;
3714 	}
3715 
3716 	if (ISSPECIAL(stateid)) {
3717 		fp = rfs4_findfile(vp, NULL, &create);
3718 		if (fp == NULL)
3719 			return (NFS4_OK);
3720 		if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3721 			rfs4_file_rele(fp);
3722 			return (NFS4_OK);
3723 		}
3724 		if (mode == FWRITE ||
3725 		    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3726 			rfs4_recall_deleg(fp, trunc, NULL);
3727 			rfs4_file_rele(fp);
3728 			return (NFS4ERR_DELAY);
3729 		}
3730 		rfs4_file_rele(fp);
3731 		return (NFS4_OK);
3732 	} else {
3733 		stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3734 		if (stat != NFS4_OK)
3735 			return (stat);
3736 		if (lsp != NULL) {
3737 			/* Is associated server instance in its grace period? */
3738 			if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3739 				rfs4_lo_state_rele(lsp, FALSE);
3740 				if (sp != NULL)
3741 					rfs4_state_rele_nounlock(sp);
3742 				return (NFS4ERR_GRACE);
3743 			}
3744 			if (id->bits.type == LOCKID) {
3745 				/* Seqid in the future? - that's bad */
3746 				if (lsp->rls_lockid.bits.chgseq <
3747 				    id->bits.chgseq) {
3748 					rfs4_lo_state_rele(lsp, FALSE);
3749 					if (sp != NULL)
3750 						rfs4_state_rele_nounlock(sp);
3751 					return (NFS4ERR_BAD_STATEID);
3752 				}
3753 				/* Seqid in the past? - that's old */
3754 				if (lsp->rls_lockid.bits.chgseq >
3755 				    id->bits.chgseq) {
3756 					rfs4_lo_state_rele(lsp, FALSE);
3757 					if (sp != NULL)
3758 						rfs4_state_rele_nounlock(sp);
3759 					return (NFS4ERR_OLD_STATEID);
3760 				}
3761 				/* Ensure specified filehandle matches */
3762 				if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3763 					rfs4_lo_state_rele(lsp, FALSE);
3764 					if (sp != NULL)
3765 						rfs4_state_rele_nounlock(sp);
3766 					return (NFS4ERR_BAD_STATEID);
3767 				}
3768 			}
3769 			if (ct != NULL) {
3770 				ct->cc_sysid =
3771 				    lsp->rls_locker->rl_client->rc_sysidt;
3772 				ct->cc_pid = lsp->rls_locker->rl_pid;
3773 			}
3774 			rfs4_lo_state_rele(lsp, FALSE);
3775 		}
3776 
3777 		/* Stateid provided was an "open" stateid */
3778 		if (sp != NULL) {
3779 			/* Is associated server instance in its grace period? */
3780 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3781 				rfs4_state_rele_nounlock(sp);
3782 				return (NFS4ERR_GRACE);
3783 			}
3784 			if (id->bits.type == OPENID) {
3785 				/* Seqid in the future? - that's bad */
3786 				if (sp->rs_stateid.bits.chgseq <
3787 				    id->bits.chgseq) {
3788 					rfs4_state_rele_nounlock(sp);
3789 					return (NFS4ERR_BAD_STATEID);
3790 				}
3791 				/* Seqid in the past - that's old */
3792 				if (sp->rs_stateid.bits.chgseq >
3793 				    id->bits.chgseq) {
3794 					rfs4_state_rele_nounlock(sp);
3795 					return (NFS4ERR_OLD_STATEID);
3796 				}
3797 			}
3798 			/* Ensure specified filehandle matches */
3799 			if (sp->rs_finfo->rf_vp != vp) {
3800 				rfs4_state_rele_nounlock(sp);
3801 				return (NFS4ERR_BAD_STATEID);
3802 			}
3803 
3804 			if (sp->rs_owner->ro_need_confirm) {
3805 				rfs4_state_rele_nounlock(sp);
3806 				return (NFS4ERR_BAD_STATEID);
3807 			}
3808 
3809 			if (sp->rs_closed == TRUE) {
3810 				rfs4_state_rele_nounlock(sp);
3811 				return (NFS4ERR_OLD_STATEID);
3812 			}
3813 
3814 			if (do_access)
3815 				stat = rfs4_state_has_access(sp, mode, vp);
3816 			else
3817 				stat = NFS4_OK;
3818 
3819 			/*
3820 			 * Return whether this state has write
3821 			 * delegation if desired
3822 			 */
3823 			if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3824 			    OPEN_DELEGATE_WRITE))
3825 				*deleg = TRUE;
3826 
3827 			/*
3828 			 * We got a valid stateid, so we update the
3829 			 * lease on the client. Ideally we would like
3830 			 * to do this after the calling op succeeds,
3831 			 * but for now this will be good
3832 			 * enough. Callers of this routine are
3833 			 * currently insulated from the state stuff.
3834 			 */
3835 			rfs4_update_lease(sp->rs_owner->ro_client);
3836 
3837 			/*
3838 			 * If a delegation is present on this file and
3839 			 * this is a WRITE, then update the lastwrite
3840 			 * time to indicate that activity is present.
3841 			 */
3842 			if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3843 			    OPEN_DELEGATE_WRITE &&
3844 			    mode == FWRITE) {
3845 				sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3846 				    gethrestime_sec();
3847 			}
3848 
3849 			rfs4_state_rele_nounlock(sp);
3850 
3851 			return (stat);
3852 		}
3853 
3854 		if (dsp != NULL) {
3855 			/* Is associated server instance in its grace period? */
3856 			if (rfs4_clnt_in_grace(dsp->rds_client)) {
3857 				rfs4_deleg_state_rele(dsp);
3858 				return (NFS4ERR_GRACE);
3859 			}
3860 			if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3861 				rfs4_deleg_state_rele(dsp);
3862 				return (NFS4ERR_BAD_STATEID);
3863 			}
3864 
3865 			/* Ensure specified filehandle matches */
3866 			if (dsp->rds_finfo->rf_vp != vp) {
3867 				rfs4_deleg_state_rele(dsp);
3868 				return (NFS4ERR_BAD_STATEID);
3869 			}
3870 			/*
3871 			 * Return whether this state has write
3872 			 * delegation if desired
3873 			 */
3874 			if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3875 			    OPEN_DELEGATE_WRITE))
3876 				*deleg = TRUE;
3877 
3878 			rfs4_update_lease(dsp->rds_client);
3879 
3880 			/*
3881 			 * If a delegation is present on this file and
3882 			 * this is a WRITE, then update the lastwrite
3883 			 * time to indicate that activity is present.
3884 			 */
3885 			if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3886 			    OPEN_DELEGATE_WRITE && mode == FWRITE) {
3887 				dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3888 				    gethrestime_sec();
3889 			}
3890 
3891 			/*
3892 			 * XXX - what happens if this is a WRITE and the
3893 			 * delegation type of for READ.
3894 			 */
3895 			rfs4_deleg_state_rele(dsp);
3896 
3897 			return (stat);
3898 		}
3899 		/*
3900 		 * If we got this far, something bad happened
3901 		 */
3902 		return (NFS4ERR_BAD_STATEID);
3903 	}
3904 }
3905 
3906 
3907 /*
3908  * This is a special function in that for the file struct provided the
3909  * server wants to remove/close all current state associated with the
3910  * file.  The prime use of this would be with OP_REMOVE to force the
3911  * release of state and particularly of file locks.
3912  *
3913  * There is an assumption that there is no delegations outstanding on
3914  * this file at this point.  The caller should have waited for those
3915  * to be returned or revoked.
3916  */
3917 void
3918 rfs4_close_all_state(rfs4_file_t *fp)
3919 {
3920 	rfs4_state_t *sp;
3921 
3922 	rfs4_dbe_lock(fp->rf_dbe);
3923 
3924 #ifdef DEBUG
3925 	/* only applies when server is handing out delegations */
3926 	if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3927 		ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3928 #endif
3929 
3930 	/* No delegations for this file */
3931 	ASSERT(list_is_empty(&fp->rf_delegstatelist));
3932 
3933 	/* Make sure that it can not be found */
3934 	rfs4_dbe_invalidate(fp->rf_dbe);
3935 
3936 	if (fp->rf_vp == NULL) {
3937 		rfs4_dbe_unlock(fp->rf_dbe);
3938 		return;
3939 	}
3940 	rfs4_dbe_unlock(fp->rf_dbe);
3941 
3942 	/*
3943 	 * Hold as writer to prevent other server threads from
3944 	 * processing requests related to the file while all state is
3945 	 * being removed.
3946 	 */
3947 	rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3948 
3949 	/* Remove ALL state from the file */
3950 	while (sp = rfs4_findstate_by_file(fp)) {
3951 		rfs4_state_close(sp, FALSE, FALSE, CRED());
3952 		rfs4_state_rele_nounlock(sp);
3953 	}
3954 
3955 	/*
3956 	 * This is only safe since there are no further references to
3957 	 * the file.
3958 	 */
3959 	rfs4_dbe_lock(fp->rf_dbe);
3960 	if (fp->rf_vp) {
3961 		vnode_t *vp = fp->rf_vp;
3962 
3963 		mutex_enter(&vp->v_vsd_lock);
3964 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
3965 		mutex_exit(&vp->v_vsd_lock);
3966 		VN_RELE(vp);
3967 		fp->rf_vp = NULL;
3968 	}
3969 	rfs4_dbe_unlock(fp->rf_dbe);
3970 
3971 	/* Finally let other references to proceed */
3972 	rw_exit(&fp->rf_file_rwlock);
3973 }
3974 
3975 /*
3976  * This function is used as a target for the rfs4_dbe_walk() call
3977  * below.  The purpose of this function is to see if the
3978  * lockowner_state refers to a file that resides within the exportinfo
3979  * export.  If so, then remove the lock_owner state (file locks and
3980  * share "locks") for this object since the intent is the server is
3981  * unexporting the specified directory.  Be sure to invalidate the
3982  * object after the state has been released
3983  */
3984 static void
3985 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3986 {
3987 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3988 	struct exportinfo *exi = (struct exportinfo *)e;
3989 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
3990 	fhandle_t *efhp;
3991 
3992 	efhp = (fhandle_t *)&exi->exi_fh;
3993 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3994 
3995 	FH_TO_FMT4(efhp, exi_fhp);
3996 
3997 	finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3998 	    rf_filehandle.nfs_fh4_val;
3999 
4000 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4001 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4002 	    exi_fhp->fh4_xlen) == 0) {
4003 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
4004 		rfs4_dbe_invalidate(lsp->rls_dbe);
4005 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
4006 	}
4007 }
4008 
4009 /*
4010  * This function is used as a target for the rfs4_dbe_walk() call
4011  * below.  The purpose of this function is to see if the state refers
4012  * to a file that resides within the exportinfo export.  If so, then
4013  * remove the open state for this object since the intent is the
4014  * server is unexporting the specified directory.  The main result for
4015  * this type of entry is to invalidate it such it will not be found in
4016  * the future.
4017  */
4018 static void
4019 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4020 {
4021 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4022 	struct exportinfo *exi = (struct exportinfo *)e;
4023 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4024 	fhandle_t *efhp;
4025 
4026 	efhp = (fhandle_t *)&exi->exi_fh;
4027 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4028 
4029 	FH_TO_FMT4(efhp, exi_fhp);
4030 
4031 	finfo_fhp =
4032 	    (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4033 
4034 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4035 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4036 	    exi_fhp->fh4_xlen) == 0) {
4037 		rfs4_state_close(sp, TRUE, FALSE, CRED());
4038 		rfs4_dbe_invalidate(sp->rs_dbe);
4039 	}
4040 }
4041 
4042 /*
4043  * This function is used as a target for the rfs4_dbe_walk() call
4044  * below.  The purpose of this function is to see if the state refers
4045  * to a file that resides within the exportinfo export.  If so, then
4046  * remove the deleg state for this object since the intent is the
4047  * server is unexporting the specified directory.  The main result for
4048  * this type of entry is to invalidate it such it will not be found in
4049  * the future.
4050  */
4051 static void
4052 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4053 {
4054 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4055 	struct exportinfo *exi = (struct exportinfo *)e;
4056 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4057 	fhandle_t *efhp;
4058 
4059 	efhp = (fhandle_t *)&exi->exi_fh;
4060 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4061 
4062 	FH_TO_FMT4(efhp, exi_fhp);
4063 
4064 	finfo_fhp =
4065 	    (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4066 
4067 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4068 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4069 	    exi_fhp->fh4_xlen) == 0) {
4070 		rfs4_dbe_invalidate(dsp->rds_dbe);
4071 	}
4072 }
4073 
4074 /*
4075  * This function is used as a target for the rfs4_dbe_walk() call
4076  * below.  The purpose of this function is to see if the state refers
4077  * to a file that resides within the exportinfo export.  If so, then
4078  * release vnode hold for this object since the intent is the server
4079  * is unexporting the specified directory.  Invalidation will prevent
4080  * this struct from being found in the future.
4081  */
4082 static void
4083 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4084 {
4085 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4086 	struct exportinfo *exi = (struct exportinfo *)e;
4087 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4088 	fhandle_t *efhp;
4089 
4090 	efhp = (fhandle_t *)&exi->exi_fh;
4091 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4092 
4093 	FH_TO_FMT4(efhp, exi_fhp);
4094 
4095 	finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4096 
4097 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4098 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4099 	    exi_fhp->fh4_xlen) == 0) {
4100 		if (fp->rf_vp) {
4101 			vnode_t *vp = fp->rf_vp;
4102 
4103 			/*
4104 			 * don't leak monitors and remove the reference
4105 			 * put on the vnode when the delegation was granted.
4106 			 */
4107 			if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4108 				(void) fem_uninstall(vp, deleg_rdops,
4109 				    (void *)fp);
4110 				vn_open_downgrade(vp, FREAD);
4111 			} else if (fp->rf_dinfo.rd_dtype ==
4112 			    OPEN_DELEGATE_WRITE) {
4113 				(void) fem_uninstall(vp, deleg_wrops,
4114 				    (void *)fp);
4115 				vn_open_downgrade(vp, FREAD|FWRITE);
4116 			}
4117 			mutex_enter(&vp->v_vsd_lock);
4118 			(void) vsd_set(vp, nfs4_srv_vkey, NULL);
4119 			mutex_exit(&vp->v_vsd_lock);
4120 			VN_RELE(vp);
4121 			fp->rf_vp = NULL;
4122 		}
4123 		rfs4_dbe_invalidate(fp->rf_dbe);
4124 	}
4125 }
4126 
4127 /*
4128  * Given a directory that is being unexported, cleanup/release all
4129  * state in the server that refers to objects residing underneath this
4130  * particular export.  The ordering of the release is important.
4131  * Lock_owner, then state and then file.
4132  *
4133  * NFS zones note: nfs_export.c:unexport() calls this from a
4134  * thread in the global zone for NGZ data structures, so we
4135  * CANNOT use zone_getspecific anywhere in this code path.
4136  */
4137 void
4138 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4139 {
4140 	nfs_globals_t *ng;
4141 	nfs4_srv_t *nsrv4;
4142 
4143 	ng = ne->ne_globals;
4144 	ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4145 	nsrv4 = ng->nfs4_srv;
4146 
4147 	mutex_enter(&nsrv4->state_lock);
4148 
4149 	if (nsrv4->nfs4_server_state == NULL) {
4150 		mutex_exit(&nsrv4->state_lock);
4151 		return;
4152 	}
4153 
4154 	rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4155 	    rfs4_lo_state_walk_callout, exi);
4156 	rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4157 	rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4158 	    rfs4_deleg_state_walk_callout, exi);
4159 	rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4160 
4161 	mutex_exit(&nsrv4->state_lock);
4162 }
4163