xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_subr.c (revision 7c478bd9)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  *
26*7c478bd9Sstevel@tonic-gate  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
27*7c478bd9Sstevel@tonic-gate  *	All rights reserved.
28*7c478bd9Sstevel@tonic-gate  */
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/buf.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/socket.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/uio.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/swap.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/session.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/acl.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
57*7c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
58*7c478bd9Sstevel@tonic-gate #include <sys/flock.h>
59*7c478bd9Sstevel@tonic-gate #include <sys/dirent.h>
60*7c478bd9Sstevel@tonic-gate #include <sys/flock.h>
61*7c478bd9Sstevel@tonic-gate #include <sys/callb.h>
62*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
63*7c478bd9Sstevel@tonic-gate #include <sys/list.h>
64*7c478bd9Sstevel@tonic-gate 
65*7c478bd9Sstevel@tonic-gate #include <rpc/types.h>
66*7c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
67*7c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
68*7c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
69*7c478bd9Sstevel@tonic-gate 
70*7c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
71*7c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
72*7c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
73*7c478bd9Sstevel@tonic-gate #include <nfs/rnode.h>
74*7c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
75*7c478bd9Sstevel@tonic-gate 
76*7c478bd9Sstevel@tonic-gate /*
77*7c478bd9Sstevel@tonic-gate  * The hash queues for the access to active and cached rnodes
78*7c478bd9Sstevel@tonic-gate  * are organized as doubly linked lists.  A reader/writer lock
79*7c478bd9Sstevel@tonic-gate  * for each hash bucket is used to control access and to synchronize
80*7c478bd9Sstevel@tonic-gate  * lookups, additions, and deletions from the hash queue.
81*7c478bd9Sstevel@tonic-gate  *
82*7c478bd9Sstevel@tonic-gate  * The rnode freelist is organized as a doubly linked list with
83*7c478bd9Sstevel@tonic-gate  * a head pointer.  Additions and deletions are synchronized via
84*7c478bd9Sstevel@tonic-gate  * a single mutex.
85*7c478bd9Sstevel@tonic-gate  *
86*7c478bd9Sstevel@tonic-gate  * In order to add an rnode to the free list, it must be hashed into
87*7c478bd9Sstevel@tonic-gate  * a hash queue and the exclusive lock to the hash queue be held.
88*7c478bd9Sstevel@tonic-gate  * If an rnode is not hashed into a hash queue, then it is destroyed
89*7c478bd9Sstevel@tonic-gate  * because it represents no valuable information that can be reused
90*7c478bd9Sstevel@tonic-gate  * about the file.  The exclusive lock to the hash queue must be
91*7c478bd9Sstevel@tonic-gate  * held in order to prevent a lookup in the hash queue from finding
92*7c478bd9Sstevel@tonic-gate  * the rnode and using it and assuming that the rnode is not on the
93*7c478bd9Sstevel@tonic-gate  * freelist.  The lookup in the hash queue will have the hash queue
94*7c478bd9Sstevel@tonic-gate  * locked, either exclusive or shared.
95*7c478bd9Sstevel@tonic-gate  *
96*7c478bd9Sstevel@tonic-gate  * The vnode reference count for each rnode is not allowed to drop
97*7c478bd9Sstevel@tonic-gate  * below 1.  This prevents external entities, such as the VM
98*7c478bd9Sstevel@tonic-gate  * subsystem, from acquiring references to vnodes already on the
99*7c478bd9Sstevel@tonic-gate  * freelist and then trying to place them back on the freelist
100*7c478bd9Sstevel@tonic-gate  * when their reference is released.  This means that the when an
101*7c478bd9Sstevel@tonic-gate  * rnode is looked up in the hash queues, then either the rnode
102*7c478bd9Sstevel@tonic-gate  * is removed from the freelist and that reference is tranfered to
103*7c478bd9Sstevel@tonic-gate  * the new reference or the vnode reference count must be incremented
104*7c478bd9Sstevel@tonic-gate  * accordingly.  The mutex for the freelist must be held in order to
105*7c478bd9Sstevel@tonic-gate  * accurately test to see if the rnode is on the freelist or not.
106*7c478bd9Sstevel@tonic-gate  * The hash queue lock might be held shared and it is possible that
107*7c478bd9Sstevel@tonic-gate  * two different threads may race to remove the rnode from the
108*7c478bd9Sstevel@tonic-gate  * freelist.  This race can be resolved by holding the mutex for the
109*7c478bd9Sstevel@tonic-gate  * freelist.  Please note that the mutex for the freelist does not
110*7c478bd9Sstevel@tonic-gate  * need to held if the rnode is not on the freelist.  It can not be
111*7c478bd9Sstevel@tonic-gate  * placed on the freelist due to the requirement that the thread
112*7c478bd9Sstevel@tonic-gate  * putting the rnode on the freelist must hold the exclusive lock
113*7c478bd9Sstevel@tonic-gate  * to the hash queue and the thread doing the lookup in the hash
114*7c478bd9Sstevel@tonic-gate  * queue is holding either a shared or exclusive lock to the hash
115*7c478bd9Sstevel@tonic-gate  * queue.
116*7c478bd9Sstevel@tonic-gate  *
117*7c478bd9Sstevel@tonic-gate  * The lock ordering is:
118*7c478bd9Sstevel@tonic-gate  *
119*7c478bd9Sstevel@tonic-gate  *	hash bucket lock -> vnode lock
120*7c478bd9Sstevel@tonic-gate  *	hash bucket lock -> freelist lock
121*7c478bd9Sstevel@tonic-gate  */
122*7c478bd9Sstevel@tonic-gate static rhashq_t *rtable;
123*7c478bd9Sstevel@tonic-gate 
124*7c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock;
125*7c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL;
126*7c478bd9Sstevel@tonic-gate static long rnew = 0;
127*7c478bd9Sstevel@tonic-gate long nrnode = 0;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate static int rtablesize;
130*7c478bd9Sstevel@tonic-gate static int rtablemask;
131*7c478bd9Sstevel@tonic-gate 
132*7c478bd9Sstevel@tonic-gate static int hashlen = 4;
133*7c478bd9Sstevel@tonic-gate 
134*7c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache;
135*7c478bd9Sstevel@tonic-gate 
136*7c478bd9Sstevel@tonic-gate /*
137*7c478bd9Sstevel@tonic-gate  * Mutex to protect the following variables:
138*7c478bd9Sstevel@tonic-gate  *	nfs_major
139*7c478bd9Sstevel@tonic-gate  *	nfs_minor
140*7c478bd9Sstevel@tonic-gate  */
141*7c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock;
142*7c478bd9Sstevel@tonic-gate int nfs_major;
143*7c478bd9Sstevel@tonic-gate int nfs_minor;
144*7c478bd9Sstevel@tonic-gate 
145*7c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */
146*7c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE;	/* default: do not allow preepoch */
147*7c478bd9Sstevel@tonic-gate 
148*7c478bd9Sstevel@tonic-gate /*
149*7c478bd9Sstevel@tonic-gate  * Access cache
150*7c478bd9Sstevel@tonic-gate  */
151*7c478bd9Sstevel@tonic-gate static acache_hash_t *acache;
152*7c478bd9Sstevel@tonic-gate static long nacache;	/* used strictly to size the number of hash queues */
153*7c478bd9Sstevel@tonic-gate 
154*7c478bd9Sstevel@tonic-gate static int acachesize;
155*7c478bd9Sstevel@tonic-gate static int acachemask;
156*7c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache;
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate /*
159*7c478bd9Sstevel@tonic-gate  * Client side utilities
160*7c478bd9Sstevel@tonic-gate  */
161*7c478bd9Sstevel@tonic-gate 
162*7c478bd9Sstevel@tonic-gate /*
163*7c478bd9Sstevel@tonic-gate  * client side statistics
164*7c478bd9Sstevel@tonic-gate  */
165*7c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = {
166*7c478bd9Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
167*7c478bd9Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
168*7c478bd9Sstevel@tonic-gate 	{ "clgets",	KSTAT_DATA_UINT64 },
169*7c478bd9Sstevel@tonic-gate 	{ "cltoomany",	KSTAT_DATA_UINT64 },
170*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
171*7c478bd9Sstevel@tonic-gate 	{ "clalloc",	KSTAT_DATA_UINT64 },
172*7c478bd9Sstevel@tonic-gate 	{ "noresponse",	KSTAT_DATA_UINT64 },
173*7c478bd9Sstevel@tonic-gate 	{ "failover",	KSTAT_DATA_UINT64 },
174*7c478bd9Sstevel@tonic-gate 	{ "remap",	KSTAT_DATA_UINT64 },
175*7c478bd9Sstevel@tonic-gate #endif
176*7c478bd9Sstevel@tonic-gate };
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate /*
179*7c478bd9Sstevel@tonic-gate  * The following are statistics that describe behavior of the system as a whole
180*7c478bd9Sstevel@tonic-gate  * and doesn't correspond to any one particular zone.
181*7c478bd9Sstevel@tonic-gate  */
182*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
183*7c478bd9Sstevel@tonic-gate static struct clstat_debug {
184*7c478bd9Sstevel@tonic-gate 	kstat_named_t	nrnode;			/* number of allocated rnodes */
185*7c478bd9Sstevel@tonic-gate 	kstat_named_t	access;			/* size of access cache */
186*7c478bd9Sstevel@tonic-gate 	kstat_named_t	dirent;			/* size of readdir cache */
187*7c478bd9Sstevel@tonic-gate 	kstat_named_t	dirents;		/* size of readdir buf cache */
188*7c478bd9Sstevel@tonic-gate 	kstat_named_t	reclaim;		/* number of reclaims */
189*7c478bd9Sstevel@tonic-gate 	kstat_named_t	clreclaim;		/* number of cl reclaims */
190*7c478bd9Sstevel@tonic-gate 	kstat_named_t	f_reclaim;		/* number of free reclaims */
191*7c478bd9Sstevel@tonic-gate 	kstat_named_t	a_reclaim;		/* number of active reclaims */
192*7c478bd9Sstevel@tonic-gate 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
193*7c478bd9Sstevel@tonic-gate 	kstat_named_t	rpath;			/* bytes used to store rpaths */
194*7c478bd9Sstevel@tonic-gate } clstat_debug = {
195*7c478bd9Sstevel@tonic-gate 	{ "nrnode",	KSTAT_DATA_UINT64 },
196*7c478bd9Sstevel@tonic-gate 	{ "access",	KSTAT_DATA_UINT64 },
197*7c478bd9Sstevel@tonic-gate 	{ "dirent",	KSTAT_DATA_UINT64 },
198*7c478bd9Sstevel@tonic-gate 	{ "dirents",	KSTAT_DATA_UINT64 },
199*7c478bd9Sstevel@tonic-gate 	{ "reclaim",	KSTAT_DATA_UINT64 },
200*7c478bd9Sstevel@tonic-gate 	{ "clreclaim",	KSTAT_DATA_UINT64 },
201*7c478bd9Sstevel@tonic-gate 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
202*7c478bd9Sstevel@tonic-gate 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
203*7c478bd9Sstevel@tonic-gate 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
204*7c478bd9Sstevel@tonic-gate 	{ "r_path",	KSTAT_DATA_UINT64 },
205*7c478bd9Sstevel@tonic-gate };
206*7c478bd9Sstevel@tonic-gate #endif	/* DEBUG */
207*7c478bd9Sstevel@tonic-gate 
208*7c478bd9Sstevel@tonic-gate /*
209*7c478bd9Sstevel@tonic-gate  * We keep a global list of per-zone client data, so we can clean up all zones
210*7c478bd9Sstevel@tonic-gate  * if we get low on memory.
211*7c478bd9Sstevel@tonic-gate  */
212*7c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list;
213*7c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock;
214*7c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key;
215*7c478bd9Sstevel@tonic-gate 
216*7c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache;
217*7c478bd9Sstevel@tonic-gate 
218*7c478bd9Sstevel@tonic-gate /*
219*7c478bd9Sstevel@tonic-gate  * Some servers do not properly update the attributes of the
220*7c478bd9Sstevel@tonic-gate  * directory when changes are made.  To allow interoperability
221*7c478bd9Sstevel@tonic-gate  * with these broken servers, the nfs_disable_rddir_cache
222*7c478bd9Sstevel@tonic-gate  * parameter must be set in /etc/system
223*7c478bd9Sstevel@tonic-gate  */
224*7c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0;
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate int		clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
227*7c478bd9Sstevel@tonic-gate 		    struct chtab **);
228*7c478bd9Sstevel@tonic-gate void		clfree(CLIENT *, struct chtab *);
229*7c478bd9Sstevel@tonic-gate static int	acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
230*7c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
231*7c478bd9Sstevel@tonic-gate static int	nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
232*7c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
233*7c478bd9Sstevel@tonic-gate static void	clreclaim(void *);
234*7c478bd9Sstevel@tonic-gate static int	nfs_feedback(int, int, mntinfo_t *);
235*7c478bd9Sstevel@tonic-gate static int	rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
236*7c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, enum clnt_stat *, int,
237*7c478bd9Sstevel@tonic-gate 		    failinfo_t *);
238*7c478bd9Sstevel@tonic-gate static int	aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
239*7c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, int, failinfo_t *);
240*7c478bd9Sstevel@tonic-gate static void	rinactive(rnode_t *, cred_t *);
241*7c478bd9Sstevel@tonic-gate static int	rtablehash(nfs_fhandle *);
242*7c478bd9Sstevel@tonic-gate static vnode_t	*make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
243*7c478bd9Sstevel@tonic-gate 		    struct vnodeops *,
244*7c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
245*7c478bd9Sstevel@tonic-gate 			cred_t *),
246*7c478bd9Sstevel@tonic-gate 		    int (*)(const void *, const void *), int *, cred_t *,
247*7c478bd9Sstevel@tonic-gate 		    char *, char *);
248*7c478bd9Sstevel@tonic-gate static void	rp_rmfree(rnode_t *);
249*7c478bd9Sstevel@tonic-gate static void	rp_addhash(rnode_t *);
250*7c478bd9Sstevel@tonic-gate static void	rp_rmhash_locked(rnode_t *);
251*7c478bd9Sstevel@tonic-gate static rnode_t	*rfind(rhashq_t *, nfs_fhandle *, struct vfs *);
252*7c478bd9Sstevel@tonic-gate static void	destroy_rnode(rnode_t *);
253*7c478bd9Sstevel@tonic-gate static void	rddir_cache_free(rddir_cache *);
254*7c478bd9Sstevel@tonic-gate static int	nfs_free_data_reclaim(rnode_t *);
255*7c478bd9Sstevel@tonic-gate static int	nfs_active_data_reclaim(rnode_t *);
256*7c478bd9Sstevel@tonic-gate static int	nfs_free_reclaim(void);
257*7c478bd9Sstevel@tonic-gate static int	nfs_active_reclaim(void);
258*7c478bd9Sstevel@tonic-gate static int	nfs_rnode_reclaim(void);
259*7c478bd9Sstevel@tonic-gate static void	nfs_reclaim(void *);
260*7c478bd9Sstevel@tonic-gate static int	failover_safe(failinfo_t *);
261*7c478bd9Sstevel@tonic-gate static void	failover_newserver(mntinfo_t *mi);
262*7c478bd9Sstevel@tonic-gate static void	failover_thread(mntinfo_t *mi);
263*7c478bd9Sstevel@tonic-gate static int	failover_wait(mntinfo_t *);
264*7c478bd9Sstevel@tonic-gate static int	failover_remap(failinfo_t *);
265*7c478bd9Sstevel@tonic-gate static int	failover_lookup(char *, vnode_t *,
266*7c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, char *, vnode_t **,
267*7c478bd9Sstevel@tonic-gate 			struct pathname *, int, vnode_t *, cred_t *, int),
268*7c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
269*7c478bd9Sstevel@tonic-gate 		    vnode_t **);
270*7c478bd9Sstevel@tonic-gate static void	nfs_free_r_path(rnode_t *);
271*7c478bd9Sstevel@tonic-gate static void	nfs_set_vroot(vnode_t *);
272*7c478bd9Sstevel@tonic-gate static char	*nfs_getsrvnames(mntinfo_t *, size_t *);
273*7c478bd9Sstevel@tonic-gate 
274*7c478bd9Sstevel@tonic-gate /*
275*7c478bd9Sstevel@tonic-gate  * from rpcsec module (common/rpcsec)
276*7c478bd9Sstevel@tonic-gate  */
277*7c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
278*7c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *);
279*7c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *);
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate /*
282*7c478bd9Sstevel@tonic-gate  * EIO or EINTR are not recoverable errors.
283*7c478bd9Sstevel@tonic-gate  */
284*7c478bd9Sstevel@tonic-gate #define	IS_RECOVERABLE_ERROR(error)	!((error == EINTR) || (error == EIO))
285*7c478bd9Sstevel@tonic-gate 
286*7c478bd9Sstevel@tonic-gate /*
287*7c478bd9Sstevel@tonic-gate  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
288*7c478bd9Sstevel@tonic-gate  */
289*7c478bd9Sstevel@tonic-gate static int
290*7c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
291*7c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
292*7c478bd9Sstevel@tonic-gate {
293*7c478bd9Sstevel@tonic-gate 	struct chhead *ch, *newch;
294*7c478bd9Sstevel@tonic-gate 	struct chhead **plistp;
295*7c478bd9Sstevel@tonic-gate 	struct chtab *cp;
296*7c478bd9Sstevel@tonic-gate 	int error;
297*7c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
298*7c478bd9Sstevel@tonic-gate 
299*7c478bd9Sstevel@tonic-gate 	if (newcl == NULL || chp == NULL || ci == NULL)
300*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
301*7c478bd9Sstevel@tonic-gate 
302*7c478bd9Sstevel@tonic-gate 	*newcl = NULL;
303*7c478bd9Sstevel@tonic-gate 	*chp = NULL;
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate 	/*
306*7c478bd9Sstevel@tonic-gate 	 * Find an unused handle or create one
307*7c478bd9Sstevel@tonic-gate 	 */
308*7c478bd9Sstevel@tonic-gate 	newch = NULL;
309*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.clgets.value.ui64++;
310*7c478bd9Sstevel@tonic-gate top:
311*7c478bd9Sstevel@tonic-gate 	/*
312*7c478bd9Sstevel@tonic-gate 	 * Find the correct entry in the cache to check for free
313*7c478bd9Sstevel@tonic-gate 	 * client handles.  The search is based on the RPC program
314*7c478bd9Sstevel@tonic-gate 	 * number, program version number, dev_t for the transport
315*7c478bd9Sstevel@tonic-gate 	 * device, and the protocol family.
316*7c478bd9Sstevel@tonic-gate 	 */
317*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
318*7c478bd9Sstevel@tonic-gate 	plistp = &nfscl->nfscl_chtable;
319*7c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
320*7c478bd9Sstevel@tonic-gate 		if (ch->ch_prog == ci->cl_prog &&
321*7c478bd9Sstevel@tonic-gate 		    ch->ch_vers == ci->cl_vers &&
322*7c478bd9Sstevel@tonic-gate 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
323*7c478bd9Sstevel@tonic-gate 		    (strcmp(ch->ch_protofmly,
324*7c478bd9Sstevel@tonic-gate 			svp->sv_knconf->knc_protofmly) == 0))
325*7c478bd9Sstevel@tonic-gate 			break;
326*7c478bd9Sstevel@tonic-gate 		plistp = &ch->ch_next;
327*7c478bd9Sstevel@tonic-gate 	}
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate 	/*
330*7c478bd9Sstevel@tonic-gate 	 * If we didn't find a cache entry for this quadruple, then
331*7c478bd9Sstevel@tonic-gate 	 * create one.  If we don't have one already preallocated,
332*7c478bd9Sstevel@tonic-gate 	 * then drop the cache lock, create one, and then start over.
333*7c478bd9Sstevel@tonic-gate 	 * If we did have a preallocated entry, then just add it to
334*7c478bd9Sstevel@tonic-gate 	 * the front of the list.
335*7c478bd9Sstevel@tonic-gate 	 */
336*7c478bd9Sstevel@tonic-gate 	if (ch == NULL) {
337*7c478bd9Sstevel@tonic-gate 		if (newch == NULL) {
338*7c478bd9Sstevel@tonic-gate 			mutex_exit(&nfscl->nfscl_chtable_lock);
339*7c478bd9Sstevel@tonic-gate 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
340*7c478bd9Sstevel@tonic-gate 			newch->ch_timesused = 0;
341*7c478bd9Sstevel@tonic-gate 			newch->ch_prog = ci->cl_prog;
342*7c478bd9Sstevel@tonic-gate 			newch->ch_vers = ci->cl_vers;
343*7c478bd9Sstevel@tonic-gate 			newch->ch_dev = svp->sv_knconf->knc_rdev;
344*7c478bd9Sstevel@tonic-gate 			newch->ch_protofmly = kmem_alloc(
345*7c478bd9Sstevel@tonic-gate 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
346*7c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
347*7c478bd9Sstevel@tonic-gate 			(void) strcpy(newch->ch_protofmly,
348*7c478bd9Sstevel@tonic-gate 			    svp->sv_knconf->knc_protofmly);
349*7c478bd9Sstevel@tonic-gate 			newch->ch_list = NULL;
350*7c478bd9Sstevel@tonic-gate 			goto top;
351*7c478bd9Sstevel@tonic-gate 		}
352*7c478bd9Sstevel@tonic-gate 		ch = newch;
353*7c478bd9Sstevel@tonic-gate 		newch = NULL;
354*7c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
355*7c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
356*7c478bd9Sstevel@tonic-gate 	/*
357*7c478bd9Sstevel@tonic-gate 	 * We found a cache entry, but if it isn't on the front of the
358*7c478bd9Sstevel@tonic-gate 	 * list, then move it to the front of the list to try to take
359*7c478bd9Sstevel@tonic-gate 	 * advantage of locality of operations.
360*7c478bd9Sstevel@tonic-gate 	 */
361*7c478bd9Sstevel@tonic-gate 	} else if (ch != nfscl->nfscl_chtable) {
362*7c478bd9Sstevel@tonic-gate 		*plistp = ch->ch_next;
363*7c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
364*7c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
365*7c478bd9Sstevel@tonic-gate 	}
366*7c478bd9Sstevel@tonic-gate 
367*7c478bd9Sstevel@tonic-gate 	/*
368*7c478bd9Sstevel@tonic-gate 	 * If there was a free client handle cached, then remove it
369*7c478bd9Sstevel@tonic-gate 	 * from the list, init it, and use it.
370*7c478bd9Sstevel@tonic-gate 	 */
371*7c478bd9Sstevel@tonic-gate 	if (ch->ch_list != NULL) {
372*7c478bd9Sstevel@tonic-gate 		cp = ch->ch_list;
373*7c478bd9Sstevel@tonic-gate 		ch->ch_list = cp->ch_list;
374*7c478bd9Sstevel@tonic-gate 		mutex_exit(&nfscl->nfscl_chtable_lock);
375*7c478bd9Sstevel@tonic-gate 		if (newch != NULL) {
376*7c478bd9Sstevel@tonic-gate 			kmem_free(newch->ch_protofmly,
377*7c478bd9Sstevel@tonic-gate 			    strlen(newch->ch_protofmly) + 1);
378*7c478bd9Sstevel@tonic-gate 			kmem_free(newch, sizeof (*newch));
379*7c478bd9Sstevel@tonic-gate 		}
380*7c478bd9Sstevel@tonic-gate 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
381*7c478bd9Sstevel@tonic-gate 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
382*7c478bd9Sstevel@tonic-gate 		error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
383*7c478bd9Sstevel@tonic-gate 		    &cp->ch_client->cl_auth);
384*7c478bd9Sstevel@tonic-gate 		if (error || cp->ch_client->cl_auth == NULL) {
385*7c478bd9Sstevel@tonic-gate 			CLNT_DESTROY(cp->ch_client);
386*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(chtab_cache, cp);
387*7c478bd9Sstevel@tonic-gate 			return ((error != 0) ? error : EINTR);
388*7c478bd9Sstevel@tonic-gate 		}
389*7c478bd9Sstevel@tonic-gate 		ch->ch_timesused++;
390*7c478bd9Sstevel@tonic-gate 		*newcl = cp->ch_client;
391*7c478bd9Sstevel@tonic-gate 		*chp = cp;
392*7c478bd9Sstevel@tonic-gate 		return (0);
393*7c478bd9Sstevel@tonic-gate 	}
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate 	/*
396*7c478bd9Sstevel@tonic-gate 	 * There weren't any free client handles which fit, so allocate
397*7c478bd9Sstevel@tonic-gate 	 * a new one and use that.
398*7c478bd9Sstevel@tonic-gate 	 */
399*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
400*7c478bd9Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1);
401*7c478bd9Sstevel@tonic-gate #endif
402*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.cltoomany.value.ui64++;
405*7c478bd9Sstevel@tonic-gate 	if (newch != NULL) {
406*7c478bd9Sstevel@tonic-gate 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
407*7c478bd9Sstevel@tonic-gate 		kmem_free(newch, sizeof (*newch));
408*7c478bd9Sstevel@tonic-gate 	}
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate 	cp = kmem_cache_alloc(chtab_cache, KM_SLEEP);
411*7c478bd9Sstevel@tonic-gate 	cp->ch_head = ch;
412*7c478bd9Sstevel@tonic-gate 
413*7c478bd9Sstevel@tonic-gate 	sigintr(&smask, (int)ci->cl_flags & MI_INT);
414*7c478bd9Sstevel@tonic-gate 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
415*7c478bd9Sstevel@tonic-gate 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
416*7c478bd9Sstevel@tonic-gate 	sigunintr(&smask);
417*7c478bd9Sstevel@tonic-gate 
418*7c478bd9Sstevel@tonic-gate 	if (error != 0) {
419*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
420*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
421*7c478bd9Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
422*7c478bd9Sstevel@tonic-gate #endif
423*7c478bd9Sstevel@tonic-gate 		/*
424*7c478bd9Sstevel@tonic-gate 		 * Warning is unnecessary if error is EINTR.
425*7c478bd9Sstevel@tonic-gate 		 */
426*7c478bd9Sstevel@tonic-gate 		if (error != EINTR) {
427*7c478bd9Sstevel@tonic-gate 			nfs_cmn_err(error, CE_WARN,
428*7c478bd9Sstevel@tonic-gate 			    "clget: couldn't create handle: %m\n");
429*7c478bd9Sstevel@tonic-gate 		}
430*7c478bd9Sstevel@tonic-gate 		return (error);
431*7c478bd9Sstevel@tonic-gate 	}
432*7c478bd9Sstevel@tonic-gate 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
433*7c478bd9Sstevel@tonic-gate 	auth_destroy(cp->ch_client->cl_auth);
434*7c478bd9Sstevel@tonic-gate 	error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
435*7c478bd9Sstevel@tonic-gate 	    &cp->ch_client->cl_auth);
436*7c478bd9Sstevel@tonic-gate 	if (error || cp->ch_client->cl_auth == NULL) {
437*7c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
438*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
439*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
440*7c478bd9Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
441*7c478bd9Sstevel@tonic-gate #endif
442*7c478bd9Sstevel@tonic-gate 		return ((error != 0) ? error : EINTR);
443*7c478bd9Sstevel@tonic-gate 	}
444*7c478bd9Sstevel@tonic-gate 	ch->ch_timesused++;
445*7c478bd9Sstevel@tonic-gate 	*newcl = cp->ch_client;
446*7c478bd9Sstevel@tonic-gate 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
447*7c478bd9Sstevel@tonic-gate 	*chp = cp;
448*7c478bd9Sstevel@tonic-gate 	return (0);
449*7c478bd9Sstevel@tonic-gate }
450*7c478bd9Sstevel@tonic-gate 
451*7c478bd9Sstevel@tonic-gate int
452*7c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
453*7c478bd9Sstevel@tonic-gate     struct chtab **chp)
454*7c478bd9Sstevel@tonic-gate {
455*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
456*7c478bd9Sstevel@tonic-gate 
457*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
458*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
459*7c478bd9Sstevel@tonic-gate 
460*7c478bd9Sstevel@tonic-gate 	return (clget_impl(ci, svp, cr, newcl, chp, nfscl));
461*7c478bd9Sstevel@tonic-gate }
462*7c478bd9Sstevel@tonic-gate 
463*7c478bd9Sstevel@tonic-gate static int
464*7c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
465*7c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
466*7c478bd9Sstevel@tonic-gate {
467*7c478bd9Sstevel@tonic-gate 	clinfo_t ci;
468*7c478bd9Sstevel@tonic-gate 	int error;
469*7c478bd9Sstevel@tonic-gate 
470*7c478bd9Sstevel@tonic-gate 	/*
471*7c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
472*7c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
473*7c478bd9Sstevel@tonic-gate 	 */
474*7c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
475*7c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
476*7c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate 	/*
479*7c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
480*7c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
481*7c478bd9Sstevel@tonic-gate 	 */
482*7c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
483*7c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
484*7c478bd9Sstevel@tonic-gate 	else
485*7c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
486*7c478bd9Sstevel@tonic-gate 
487*7c478bd9Sstevel@tonic-gate 	ci.cl_prog = NFS_ACL_PROGRAM;
488*7c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
489*7c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 	/*
492*7c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
493*7c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
494*7c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
495*7c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
496*7c478bd9Sstevel@tonic-gate 	 */
497*7c478bd9Sstevel@tonic-gate 	do {
498*7c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
499*7c478bd9Sstevel@tonic-gate 
500*7c478bd9Sstevel@tonic-gate 		if (error == 0)
501*7c478bd9Sstevel@tonic-gate 			break;
502*7c478bd9Sstevel@tonic-gate 
503*7c478bd9Sstevel@tonic-gate 		/*
504*7c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
505*7c478bd9Sstevel@tonic-gate 		 */
506*7c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
507*7c478bd9Sstevel@tonic-gate 			error = EIO;
508*7c478bd9Sstevel@tonic-gate 			break;
509*7c478bd9Sstevel@tonic-gate 		}
510*7c478bd9Sstevel@tonic-gate 
511*7c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
512*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
513*7c478bd9Sstevel@tonic-gate 			break;
514*7c478bd9Sstevel@tonic-gate 
515*7c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
516*7c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
517*7c478bd9Sstevel@tonic-gate 			break;
518*7c478bd9Sstevel@tonic-gate 
519*7c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
520*7c478bd9Sstevel@tonic-gate 
521*7c478bd9Sstevel@tonic-gate 	return (error);
522*7c478bd9Sstevel@tonic-gate }
523*7c478bd9Sstevel@tonic-gate 
524*7c478bd9Sstevel@tonic-gate static int
525*7c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
526*7c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
527*7c478bd9Sstevel@tonic-gate {
528*7c478bd9Sstevel@tonic-gate 	clinfo_t ci;
529*7c478bd9Sstevel@tonic-gate 	int error;
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate 	/*
532*7c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
533*7c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
534*7c478bd9Sstevel@tonic-gate 	 */
535*7c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
536*7c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
537*7c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate 	/*
540*7c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
541*7c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
542*7c478bd9Sstevel@tonic-gate 	 */
543*7c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
544*7c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
545*7c478bd9Sstevel@tonic-gate 	else
546*7c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
547*7c478bd9Sstevel@tonic-gate 
548*7c478bd9Sstevel@tonic-gate 	ci.cl_prog = mi->mi_prog;
549*7c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
550*7c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 	/*
553*7c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
554*7c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
555*7c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
556*7c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
557*7c478bd9Sstevel@tonic-gate 	 */
558*7c478bd9Sstevel@tonic-gate 	do {
559*7c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
560*7c478bd9Sstevel@tonic-gate 
561*7c478bd9Sstevel@tonic-gate 		if (error == 0)
562*7c478bd9Sstevel@tonic-gate 			break;
563*7c478bd9Sstevel@tonic-gate 
564*7c478bd9Sstevel@tonic-gate 		/*
565*7c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
566*7c478bd9Sstevel@tonic-gate 		 */
567*7c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
568*7c478bd9Sstevel@tonic-gate 			error = EIO;
569*7c478bd9Sstevel@tonic-gate 			break;
570*7c478bd9Sstevel@tonic-gate 		}
571*7c478bd9Sstevel@tonic-gate 
572*7c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
573*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
574*7c478bd9Sstevel@tonic-gate 			break;
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
577*7c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
578*7c478bd9Sstevel@tonic-gate 			break;
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
581*7c478bd9Sstevel@tonic-gate 
582*7c478bd9Sstevel@tonic-gate 	return (error);
583*7c478bd9Sstevel@tonic-gate }
584*7c478bd9Sstevel@tonic-gate 
585*7c478bd9Sstevel@tonic-gate static void
586*7c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl)
587*7c478bd9Sstevel@tonic-gate {
588*7c478bd9Sstevel@tonic-gate 	if (cl->cl_auth != NULL) {
589*7c478bd9Sstevel@tonic-gate 		sec_clnt_freeh(cl->cl_auth);
590*7c478bd9Sstevel@tonic-gate 		cl->cl_auth = NULL;
591*7c478bd9Sstevel@tonic-gate 	}
592*7c478bd9Sstevel@tonic-gate 
593*7c478bd9Sstevel@tonic-gate 	/*
594*7c478bd9Sstevel@tonic-gate 	 * Timestamp this cache entry so that we know when it was last
595*7c478bd9Sstevel@tonic-gate 	 * used.
596*7c478bd9Sstevel@tonic-gate 	 */
597*7c478bd9Sstevel@tonic-gate 	cp->ch_freed = gethrestime_sec();
598*7c478bd9Sstevel@tonic-gate 
599*7c478bd9Sstevel@tonic-gate 	/*
600*7c478bd9Sstevel@tonic-gate 	 * Add the free client handle to the front of the list.
601*7c478bd9Sstevel@tonic-gate 	 * This way, the list will be sorted in youngest to oldest
602*7c478bd9Sstevel@tonic-gate 	 * order.
603*7c478bd9Sstevel@tonic-gate 	 */
604*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
605*7c478bd9Sstevel@tonic-gate 	cp->ch_list = cp->ch_head->ch_list;
606*7c478bd9Sstevel@tonic-gate 	cp->ch_head->ch_list = cp;
607*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
608*7c478bd9Sstevel@tonic-gate }
609*7c478bd9Sstevel@tonic-gate 
610*7c478bd9Sstevel@tonic-gate void
611*7c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp)
612*7c478bd9Sstevel@tonic-gate {
613*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
614*7c478bd9Sstevel@tonic-gate 
615*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
616*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
617*7c478bd9Sstevel@tonic-gate 
618*7c478bd9Sstevel@tonic-gate 	clfree_impl(cl, cp, nfscl);
619*7c478bd9Sstevel@tonic-gate }
620*7c478bd9Sstevel@tonic-gate 
621*7c478bd9Sstevel@tonic-gate #define	CL_HOLDTIME	60	/* time to hold client handles */
622*7c478bd9Sstevel@tonic-gate 
623*7c478bd9Sstevel@tonic-gate static void
624*7c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime)
625*7c478bd9Sstevel@tonic-gate {
626*7c478bd9Sstevel@tonic-gate 	struct chhead *ch;
627*7c478bd9Sstevel@tonic-gate 	struct chtab *cp;	/* list of objects that can be reclaimed */
628*7c478bd9Sstevel@tonic-gate 	struct chtab *cpe;
629*7c478bd9Sstevel@tonic-gate 	struct chtab *cpl;
630*7c478bd9Sstevel@tonic-gate 	struct chtab **cpp;
631*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
632*7c478bd9Sstevel@tonic-gate 	int n = 0;
633*7c478bd9Sstevel@tonic-gate #endif
634*7c478bd9Sstevel@tonic-gate 
635*7c478bd9Sstevel@tonic-gate 	/*
636*7c478bd9Sstevel@tonic-gate 	 * Need to reclaim some memory, so step through the cache
637*7c478bd9Sstevel@tonic-gate 	 * looking through the lists for entries which can be freed.
638*7c478bd9Sstevel@tonic-gate 	 */
639*7c478bd9Sstevel@tonic-gate 	cp = NULL;
640*7c478bd9Sstevel@tonic-gate 
641*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
642*7c478bd9Sstevel@tonic-gate 
643*7c478bd9Sstevel@tonic-gate 	/*
644*7c478bd9Sstevel@tonic-gate 	 * Here we step through each non-NULL quadruple and start to
645*7c478bd9Sstevel@tonic-gate 	 * construct the reclaim list pointed to by cp.  Note that
646*7c478bd9Sstevel@tonic-gate 	 * cp will contain all eligible chtab entries.  When this traversal
647*7c478bd9Sstevel@tonic-gate 	 * completes, chtab entries from the last quadruple will be at the
648*7c478bd9Sstevel@tonic-gate 	 * front of cp and entries from previously inspected quadruples have
649*7c478bd9Sstevel@tonic-gate 	 * been appended to the rear of cp.
650*7c478bd9Sstevel@tonic-gate 	 */
651*7c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
652*7c478bd9Sstevel@tonic-gate 		if (ch->ch_list == NULL)
653*7c478bd9Sstevel@tonic-gate 			continue;
654*7c478bd9Sstevel@tonic-gate 		/*
655*7c478bd9Sstevel@tonic-gate 		 * Search each list for entries older then
656*7c478bd9Sstevel@tonic-gate 		 * cl_holdtime seconds.  The lists are maintained
657*7c478bd9Sstevel@tonic-gate 		 * in youngest to oldest order so that when the
658*7c478bd9Sstevel@tonic-gate 		 * first entry is found which is old enough, then
659*7c478bd9Sstevel@tonic-gate 		 * all of the rest of the entries on the list will
660*7c478bd9Sstevel@tonic-gate 		 * be old enough as well.
661*7c478bd9Sstevel@tonic-gate 		 */
662*7c478bd9Sstevel@tonic-gate 		cpl = ch->ch_list;
663*7c478bd9Sstevel@tonic-gate 		cpp = &ch->ch_list;
664*7c478bd9Sstevel@tonic-gate 		while (cpl != NULL &&
665*7c478bd9Sstevel@tonic-gate 			cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
666*7c478bd9Sstevel@tonic-gate 			cpp = &cpl->ch_list;
667*7c478bd9Sstevel@tonic-gate 			cpl = cpl->ch_list;
668*7c478bd9Sstevel@tonic-gate 		}
669*7c478bd9Sstevel@tonic-gate 		if (cpl != NULL) {
670*7c478bd9Sstevel@tonic-gate 			*cpp = NULL;
671*7c478bd9Sstevel@tonic-gate 			if (cp != NULL) {
672*7c478bd9Sstevel@tonic-gate 				cpe = cpl;
673*7c478bd9Sstevel@tonic-gate 				while (cpe->ch_list != NULL)
674*7c478bd9Sstevel@tonic-gate 					cpe = cpe->ch_list;
675*7c478bd9Sstevel@tonic-gate 				cpe->ch_list = cp;
676*7c478bd9Sstevel@tonic-gate 			}
677*7c478bd9Sstevel@tonic-gate 			cp = cpl;
678*7c478bd9Sstevel@tonic-gate 		}
679*7c478bd9Sstevel@tonic-gate 	}
680*7c478bd9Sstevel@tonic-gate 
681*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
682*7c478bd9Sstevel@tonic-gate 
683*7c478bd9Sstevel@tonic-gate 	/*
684*7c478bd9Sstevel@tonic-gate 	 * If cp is empty, then there is nothing to reclaim here.
685*7c478bd9Sstevel@tonic-gate 	 */
686*7c478bd9Sstevel@tonic-gate 	if (cp == NULL)
687*7c478bd9Sstevel@tonic-gate 		return;
688*7c478bd9Sstevel@tonic-gate 
689*7c478bd9Sstevel@tonic-gate 	/*
690*7c478bd9Sstevel@tonic-gate 	 * Step through the list of entries to free, destroying each client
691*7c478bd9Sstevel@tonic-gate 	 * handle and kmem_free'ing the memory for each entry.
692*7c478bd9Sstevel@tonic-gate 	 */
693*7c478bd9Sstevel@tonic-gate 	while (cp != NULL) {
694*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
695*7c478bd9Sstevel@tonic-gate 		n++;
696*7c478bd9Sstevel@tonic-gate #endif
697*7c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
698*7c478bd9Sstevel@tonic-gate 		cpl = cp->ch_list;
699*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
700*7c478bd9Sstevel@tonic-gate 		cp = cpl;
701*7c478bd9Sstevel@tonic-gate 	}
702*7c478bd9Sstevel@tonic-gate 
703*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
704*7c478bd9Sstevel@tonic-gate 	/*
705*7c478bd9Sstevel@tonic-gate 	 * Update clalloc so that nfsstat shows the current number
706*7c478bd9Sstevel@tonic-gate 	 * of allocated client handles.
707*7c478bd9Sstevel@tonic-gate 	 */
708*7c478bd9Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
709*7c478bd9Sstevel@tonic-gate #endif
710*7c478bd9Sstevel@tonic-gate }
711*7c478bd9Sstevel@tonic-gate 
712*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
713*7c478bd9Sstevel@tonic-gate static void
714*7c478bd9Sstevel@tonic-gate clreclaim(void *all)
715*7c478bd9Sstevel@tonic-gate {
716*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
717*7c478bd9Sstevel@tonic-gate 
718*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
719*7c478bd9Sstevel@tonic-gate 	clstat_debug.clreclaim.value.ui64++;
720*7c478bd9Sstevel@tonic-gate #endif
721*7c478bd9Sstevel@tonic-gate 	/*
722*7c478bd9Sstevel@tonic-gate 	 * The system is low on memory; go through and try to reclaim some from
723*7c478bd9Sstevel@tonic-gate 	 * every zone on the system.
724*7c478bd9Sstevel@tonic-gate 	 */
725*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
726*7c478bd9Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
727*7c478bd9Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl))
728*7c478bd9Sstevel@tonic-gate 		clreclaim_zone(nfscl, CL_HOLDTIME);
729*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
730*7c478bd9Sstevel@tonic-gate }
731*7c478bd9Sstevel@tonic-gate 
732*7c478bd9Sstevel@tonic-gate /*
733*7c478bd9Sstevel@tonic-gate  * Minimum time-out values indexed by call type
734*7c478bd9Sstevel@tonic-gate  * These units are in "eights" of a second to avoid multiplies
735*7c478bd9Sstevel@tonic-gate  */
736*7c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = {
737*7c478bd9Sstevel@tonic-gate 	6, 7, 10
738*7c478bd9Sstevel@tonic-gate };
739*7c478bd9Sstevel@tonic-gate 
740*7c478bd9Sstevel@tonic-gate /*
741*7c478bd9Sstevel@tonic-gate  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
742*7c478bd9Sstevel@tonic-gate  */
743*7c478bd9Sstevel@tonic-gate #define	MAXTIMO	(20*hz)
744*7c478bd9Sstevel@tonic-gate #define	backoff(tim)	(((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
745*7c478bd9Sstevel@tonic-gate #define	dobackoff(tim)	((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
746*7c478bd9Sstevel@tonic-gate 
747*7c478bd9Sstevel@tonic-gate #define	MIN_NFS_TSIZE 512	/* minimum "chunk" of NFS IO */
748*7c478bd9Sstevel@tonic-gate #define	REDUCE_NFS_TIME (hz/2)	/* rtxcur we try to keep under */
749*7c478bd9Sstevel@tonic-gate #define	INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */
750*7c478bd9Sstevel@tonic-gate 
751*7c478bd9Sstevel@tonic-gate /*
752*7c478bd9Sstevel@tonic-gate  * Function called when rfscall notices that we have been
753*7c478bd9Sstevel@tonic-gate  * re-transmitting, or when we get a response without retransmissions.
754*7c478bd9Sstevel@tonic-gate  * Return 1 if the transfer size was adjusted down - 0 if no change.
755*7c478bd9Sstevel@tonic-gate  */
756*7c478bd9Sstevel@tonic-gate static int
757*7c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi)
758*7c478bd9Sstevel@tonic-gate {
759*7c478bd9Sstevel@tonic-gate 	int kind;
760*7c478bd9Sstevel@tonic-gate 	int r = 0;
761*7c478bd9Sstevel@tonic-gate 
762*7c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
763*7c478bd9Sstevel@tonic-gate 	if (flag == FEEDBACK_REXMIT1) {
764*7c478bd9Sstevel@tonic-gate 		if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 &&
765*7c478bd9Sstevel@tonic-gate 		    mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME)
766*7c478bd9Sstevel@tonic-gate 			goto done;
767*7c478bd9Sstevel@tonic-gate 		if (mi->mi_curread > MIN_NFS_TSIZE) {
768*7c478bd9Sstevel@tonic-gate 			mi->mi_curread /= 2;
769*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curread < MIN_NFS_TSIZE)
770*7c478bd9Sstevel@tonic-gate 				mi->mi_curread = MIN_NFS_TSIZE;
771*7c478bd9Sstevel@tonic-gate 			r = 1;
772*7c478bd9Sstevel@tonic-gate 		}
773*7c478bd9Sstevel@tonic-gate 
774*7c478bd9Sstevel@tonic-gate 		if (mi->mi_curwrite > MIN_NFS_TSIZE) {
775*7c478bd9Sstevel@tonic-gate 			mi->mi_curwrite /= 2;
776*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite < MIN_NFS_TSIZE)
777*7c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = MIN_NFS_TSIZE;
778*7c478bd9Sstevel@tonic-gate 			r = 1;
779*7c478bd9Sstevel@tonic-gate 		}
780*7c478bd9Sstevel@tonic-gate 	} else if (flag == FEEDBACK_OK) {
781*7c478bd9Sstevel@tonic-gate 		kind = mi->mi_timer_type[which];
782*7c478bd9Sstevel@tonic-gate 		if (kind == 0 ||
783*7c478bd9Sstevel@tonic-gate 		    mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME)
784*7c478bd9Sstevel@tonic-gate 			goto done;
785*7c478bd9Sstevel@tonic-gate 		if (kind == 1) {
786*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curread >= mi->mi_tsize)
787*7c478bd9Sstevel@tonic-gate 				goto done;
788*7c478bd9Sstevel@tonic-gate 			mi->mi_curread +=  MIN_NFS_TSIZE;
789*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curread > mi->mi_tsize/2)
790*7c478bd9Sstevel@tonic-gate 				mi->mi_curread = mi->mi_tsize;
791*7c478bd9Sstevel@tonic-gate 		} else if (kind == 2) {
792*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite >= mi->mi_stsize)
793*7c478bd9Sstevel@tonic-gate 				goto done;
794*7c478bd9Sstevel@tonic-gate 			mi->mi_curwrite += MIN_NFS_TSIZE;
795*7c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite > mi->mi_stsize/2)
796*7c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = mi->mi_stsize;
797*7c478bd9Sstevel@tonic-gate 		}
798*7c478bd9Sstevel@tonic-gate 	}
799*7c478bd9Sstevel@tonic-gate done:
800*7c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
801*7c478bd9Sstevel@tonic-gate 	return (r);
802*7c478bd9Sstevel@tonic-gate }
803*7c478bd9Sstevel@tonic-gate 
804*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
805*7c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0;
806*7c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0;
807*7c478bd9Sstevel@tonic-gate #endif
808*7c478bd9Sstevel@tonic-gate 
809*7c478bd9Sstevel@tonic-gate int
810*7c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
811*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
812*7c478bd9Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
813*7c478bd9Sstevel@tonic-gate {
814*7c478bd9Sstevel@tonic-gate 	int rpcerror;
815*7c478bd9Sstevel@tonic-gate 	enum clnt_stat rpc_status;
816*7c478bd9Sstevel@tonic-gate 
817*7c478bd9Sstevel@tonic-gate 	ASSERT(statusp != NULL);
818*7c478bd9Sstevel@tonic-gate 
819*7c478bd9Sstevel@tonic-gate 	rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
820*7c478bd9Sstevel@tonic-gate 	    cr, douprintf, &rpc_status, flags, fi);
821*7c478bd9Sstevel@tonic-gate 	if (!rpcerror) {
822*7c478bd9Sstevel@tonic-gate 		/*
823*7c478bd9Sstevel@tonic-gate 		 * See crnetadjust() for comments.
824*7c478bd9Sstevel@tonic-gate 		 */
825*7c478bd9Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
826*7c478bd9Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
827*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
828*7c478bd9Sstevel@tonic-gate 			rfs2call_hits++;
829*7c478bd9Sstevel@tonic-gate #endif
830*7c478bd9Sstevel@tonic-gate 			rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
831*7c478bd9Sstevel@tonic-gate 			    resp, cr, douprintf, NULL, flags, fi);
832*7c478bd9Sstevel@tonic-gate 			crfree(cr);
833*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
834*7c478bd9Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
835*7c478bd9Sstevel@tonic-gate 				rfs2call_misses++;
836*7c478bd9Sstevel@tonic-gate #endif
837*7c478bd9Sstevel@tonic-gate 		}
838*7c478bd9Sstevel@tonic-gate 	} else if (rpc_status == RPC_PROCUNAVAIL) {
839*7c478bd9Sstevel@tonic-gate 		*statusp = NFSERR_OPNOTSUPP;
840*7c478bd9Sstevel@tonic-gate 		rpcerror = 0;
841*7c478bd9Sstevel@tonic-gate 	}
842*7c478bd9Sstevel@tonic-gate 
843*7c478bd9Sstevel@tonic-gate 	return (rpcerror);
844*7c478bd9Sstevel@tonic-gate }
845*7c478bd9Sstevel@tonic-gate 
846*7c478bd9Sstevel@tonic-gate #define	NFS3_JUKEBOX_DELAY	10 * hz
847*7c478bd9Sstevel@tonic-gate 
848*7c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0;
849*7c478bd9Sstevel@tonic-gate 
850*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
851*7c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0;
852*7c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0;
853*7c478bd9Sstevel@tonic-gate #endif
854*7c478bd9Sstevel@tonic-gate 
855*7c478bd9Sstevel@tonic-gate int
856*7c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
857*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
858*7c478bd9Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
859*7c478bd9Sstevel@tonic-gate {
860*7c478bd9Sstevel@tonic-gate 	int rpcerror;
861*7c478bd9Sstevel@tonic-gate 	int user_informed;
862*7c478bd9Sstevel@tonic-gate 
863*7c478bd9Sstevel@tonic-gate 	user_informed = 0;
864*7c478bd9Sstevel@tonic-gate 	do {
865*7c478bd9Sstevel@tonic-gate 		rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
866*7c478bd9Sstevel@tonic-gate 		    cr, douprintf, NULL, flags, fi);
867*7c478bd9Sstevel@tonic-gate 		if (!rpcerror) {
868*7c478bd9Sstevel@tonic-gate 			cred_t *crr;
869*7c478bd9Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
870*7c478bd9Sstevel@tonic-gate 				if (ttoproc(curthread) == &p0) {
871*7c478bd9Sstevel@tonic-gate 					rpcerror = EAGAIN;
872*7c478bd9Sstevel@tonic-gate 					break;
873*7c478bd9Sstevel@tonic-gate 				}
874*7c478bd9Sstevel@tonic-gate 				if (!user_informed) {
875*7c478bd9Sstevel@tonic-gate 					user_informed = 1;
876*7c478bd9Sstevel@tonic-gate 					uprintf(
877*7c478bd9Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
878*7c478bd9Sstevel@tonic-gate 				}
879*7c478bd9Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
880*7c478bd9Sstevel@tonic-gate 			}
881*7c478bd9Sstevel@tonic-gate 			/*
882*7c478bd9Sstevel@tonic-gate 			 * See crnetadjust() for comments.
883*7c478bd9Sstevel@tonic-gate 			 */
884*7c478bd9Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
885*7c478bd9Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
886*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
887*7c478bd9Sstevel@tonic-gate 				rfs3call_hits++;
888*7c478bd9Sstevel@tonic-gate #endif
889*7c478bd9Sstevel@tonic-gate 				rpcerror = rfscall(mi, which, xdrargs, argsp,
890*7c478bd9Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf,
891*7c478bd9Sstevel@tonic-gate 				    NULL, flags, fi);
892*7c478bd9Sstevel@tonic-gate 
893*7c478bd9Sstevel@tonic-gate 				crfree(crr);
894*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
895*7c478bd9Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
896*7c478bd9Sstevel@tonic-gate 					rfs3call_misses++;
897*7c478bd9Sstevel@tonic-gate #endif
898*7c478bd9Sstevel@tonic-gate 			}
899*7c478bd9Sstevel@tonic-gate 		}
900*7c478bd9Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
901*7c478bd9Sstevel@tonic-gate 
902*7c478bd9Sstevel@tonic-gate 	return (rpcerror);
903*7c478bd9Sstevel@tonic-gate }
904*7c478bd9Sstevel@tonic-gate 
905*7c478bd9Sstevel@tonic-gate #define	VALID_FH(fi)	(VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv)
906*7c478bd9Sstevel@tonic-gate #define	INC_READERS(mi)		{ \
907*7c478bd9Sstevel@tonic-gate 	mi->mi_readers++; \
908*7c478bd9Sstevel@tonic-gate }
909*7c478bd9Sstevel@tonic-gate #define	DEC_READERS(mi)		{ \
910*7c478bd9Sstevel@tonic-gate 	mi->mi_readers--; \
911*7c478bd9Sstevel@tonic-gate 	if (mi->mi_readers == 0) \
912*7c478bd9Sstevel@tonic-gate 		cv_broadcast(&mi->mi_failover_cv); \
913*7c478bd9Sstevel@tonic-gate }
914*7c478bd9Sstevel@tonic-gate 
915*7c478bd9Sstevel@tonic-gate static int
916*7c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
917*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
918*7c478bd9Sstevel@tonic-gate     enum clnt_stat *rpc_status, int flags, failinfo_t *fi)
919*7c478bd9Sstevel@tonic-gate {
920*7c478bd9Sstevel@tonic-gate 	CLIENT *client;
921*7c478bd9Sstevel@tonic-gate 	struct chtab *ch;
922*7c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
923*7c478bd9Sstevel@tonic-gate 	struct rpc_err rpcerr;
924*7c478bd9Sstevel@tonic-gate 	struct timeval wait;
925*7c478bd9Sstevel@tonic-gate 	int timeo;		/* in units of hz */
926*7c478bd9Sstevel@tonic-gate 	int my_rsize, my_wsize;
927*7c478bd9Sstevel@tonic-gate 	bool_t tryagain;
928*7c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
929*7c478bd9Sstevel@tonic-gate 	servinfo_t *svp;
930*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
931*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
932*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
933*7c478bd9Sstevel@tonic-gate 	char *bufp;
934*7c478bd9Sstevel@tonic-gate #endif
935*7c478bd9Sstevel@tonic-gate 
936*7c478bd9Sstevel@tonic-gate 
937*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
938*7c478bd9Sstevel@tonic-gate 		"rfscall_start:which %d mi %p", which, mi);
939*7c478bd9Sstevel@tonic-gate 
940*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
941*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
942*7c478bd9Sstevel@tonic-gate 
943*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
944*7c478bd9Sstevel@tonic-gate 	mi->mi_reqs[which].value.ui64++;
945*7c478bd9Sstevel@tonic-gate 
946*7c478bd9Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
947*7c478bd9Sstevel@tonic-gate 
948*7c478bd9Sstevel@tonic-gate 	/*
949*7c478bd9Sstevel@tonic-gate 	 * In case of forced unmount or zone shutdown, return EIO.
950*7c478bd9Sstevel@tonic-gate 	 */
951*7c478bd9Sstevel@tonic-gate 
952*7c478bd9Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
953*7c478bd9Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
954*7c478bd9Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
955*7c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
956*7c478bd9Sstevel@tonic-gate 	}
957*7c478bd9Sstevel@tonic-gate 
958*7c478bd9Sstevel@tonic-gate 	/*
959*7c478bd9Sstevel@tonic-gate 	 * Remember the transfer sizes in case
960*7c478bd9Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
961*7c478bd9Sstevel@tonic-gate 	 */
962*7c478bd9Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
963*7c478bd9Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
964*7c478bd9Sstevel@tonic-gate 
965*7c478bd9Sstevel@tonic-gate 	/*
966*7c478bd9Sstevel@tonic-gate 	 * NFS client failover support
967*7c478bd9Sstevel@tonic-gate 	 *
968*7c478bd9Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
969*7c478bd9Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
970*7c478bd9Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
971*7c478bd9Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
972*7c478bd9Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
973*7c478bd9Sstevel@tonic-gate 	 *
974*7c478bd9Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
975*7c478bd9Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
976*7c478bd9Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
977*7c478bd9Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
978*7c478bd9Sstevel@tonic-gate 	 * to the wrong host.
979*7c478bd9Sstevel@tonic-gate 	 */
980*7c478bd9Sstevel@tonic-gate failoverretry:
981*7c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
982*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
983*7c478bd9Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
984*7c478bd9Sstevel@tonic-gate 			if (failover_wait(mi)) {
985*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
986*7c478bd9Sstevel@tonic-gate 				return (EINTR);
987*7c478bd9Sstevel@tonic-gate 			}
988*7c478bd9Sstevel@tonic-gate 		}
989*7c478bd9Sstevel@tonic-gate 		INC_READERS(mi);
990*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
991*7c478bd9Sstevel@tonic-gate 		if (fi) {
992*7c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
993*7c478bd9Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
994*7c478bd9Sstevel@tonic-gate 				int remaperr;
995*7c478bd9Sstevel@tonic-gate 
996*7c478bd9Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
997*7c478bd9Sstevel@tonic-gate 				remaperr = failover_remap(fi);
998*7c478bd9Sstevel@tonic-gate 				if (remaperr != 0) {
999*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1000*7c478bd9Sstevel@tonic-gate 					if (remaperr != EINTR)
1001*7c478bd9Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
1002*7c478bd9Sstevel@tonic-gate 					    "rfscall couldn't failover: %m");
1003*7c478bd9Sstevel@tonic-gate #endif
1004*7c478bd9Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
1005*7c478bd9Sstevel@tonic-gate 					DEC_READERS(mi);
1006*7c478bd9Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
1007*7c478bd9Sstevel@tonic-gate 					/*
1008*7c478bd9Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
1009*7c478bd9Sstevel@tonic-gate 					 * and the filesystem is hard mounted
1010*7c478bd9Sstevel@tonic-gate 					 * we have to retry the call with a new
1011*7c478bd9Sstevel@tonic-gate 					 * server.
1012*7c478bd9Sstevel@tonic-gate 					 */
1013*7c478bd9Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
1014*7c478bd9Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
1015*7c478bd9Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
1016*7c478bd9Sstevel@tonic-gate 							failover_newserver(mi);
1017*7c478bd9Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
1018*7c478bd9Sstevel@tonic-gate 						goto failoverretry;
1019*7c478bd9Sstevel@tonic-gate 					}
1020*7c478bd9Sstevel@tonic-gate 					rpcerr.re_errno = remaperr;
1021*7c478bd9Sstevel@tonic-gate 					return (remaperr);
1022*7c478bd9Sstevel@tonic-gate 				}
1023*7c478bd9Sstevel@tonic-gate 			}
1024*7c478bd9Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
1025*7c478bd9Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
1026*7c478bd9Sstevel@tonic-gate 		}
1027*7c478bd9Sstevel@tonic-gate 	}
1028*7c478bd9Sstevel@tonic-gate 
1029*7c478bd9Sstevel@tonic-gate 	/*
1030*7c478bd9Sstevel@tonic-gate 	 * clget() calls clnt_tli_kinit() which clears the xid, so we
1031*7c478bd9Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
1032*7c478bd9Sstevel@tonic-gate 	 */
1033*7c478bd9Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
1034*7c478bd9Sstevel@tonic-gate 	rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl);
1035*7c478bd9Sstevel@tonic-gate 
1036*7c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
1037*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
1038*7c478bd9Sstevel@tonic-gate 		DEC_READERS(mi);
1039*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
1040*7c478bd9Sstevel@tonic-gate 
1041*7c478bd9Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
1042*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno == ECONNRESET) &&
1043*7c478bd9Sstevel@tonic-gate 				failover_safe(fi)) {
1044*7c478bd9Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
1045*7c478bd9Sstevel@tonic-gate 				failover_newserver(mi);
1046*7c478bd9Sstevel@tonic-gate 			goto failoverretry;
1047*7c478bd9Sstevel@tonic-gate 		}
1048*7c478bd9Sstevel@tonic-gate 	}
1049*7c478bd9Sstevel@tonic-gate 	if (rpcerr.re_errno != 0)
1050*7c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
1053*7c478bd9Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
1054*7c478bd9Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
1055*7c478bd9Sstevel@tonic-gate 	} else {
1056*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
1057*7c478bd9Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
1058*7c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_timer_type[which]]),
1059*7c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
1060*7c478bd9Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_call_type[which]]*hz)>>3,
1061*7c478bd9Sstevel@tonic-gate 		    (void (*)())NULL, (caddr_t)mi, 0);
1062*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
1063*7c478bd9Sstevel@tonic-gate 	}
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate 	/*
1066*7c478bd9Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
1067*7c478bd9Sstevel@tonic-gate 	 */
1068*7c478bd9Sstevel@tonic-gate 	do {
1069*7c478bd9Sstevel@tonic-gate 		tryagain = FALSE;
1070*7c478bd9Sstevel@tonic-gate 
1071*7c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
1072*7c478bd9Sstevel@tonic-gate 			status = RPC_FAILED;
1073*7c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
1074*7c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
1075*7c478bd9Sstevel@tonic-gate 			break;
1076*7c478bd9Sstevel@tonic-gate 		}
1077*7c478bd9Sstevel@tonic-gate 
1078*7c478bd9Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
1079*7c478bd9Sstevel@tonic-gate 
1080*7c478bd9Sstevel@tonic-gate 		/*
1081*7c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1082*7c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
1083*7c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
1084*7c478bd9Sstevel@tonic-gate 		 */
1085*7c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
1086*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
1087*7c478bd9Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
1088*7c478bd9Sstevel@tonic-gate 
1089*7c478bd9Sstevel@tonic-gate 		/*
1090*7c478bd9Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
1091*7c478bd9Sstevel@tonic-gate 		 * even trying to send out the request because we
1092*7c478bd9Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
1093*7c478bd9Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
1094*7c478bd9Sstevel@tonic-gate 		 */
1095*7c478bd9Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1096*7c478bd9Sstevel@tonic-gate 			status = RPC_INTR;
1097*7c478bd9Sstevel@tonic-gate 		else {
1098*7c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
1099*7c478bd9Sstevel@tonic-gate 			    xdrres, resp, wait);
1100*7c478bd9Sstevel@tonic-gate 		}
1101*7c478bd9Sstevel@tonic-gate 
1102*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
1103*7c478bd9Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
1104*7c478bd9Sstevel@tonic-gate 		/*
1105*7c478bd9Sstevel@tonic-gate 		 * restore original signal mask
1106*7c478bd9Sstevel@tonic-gate 		 */
1107*7c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
1108*7c478bd9Sstevel@tonic-gate 
1109*7c478bd9Sstevel@tonic-gate 		switch (status) {
1110*7c478bd9Sstevel@tonic-gate 		case RPC_SUCCESS:
1111*7c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
1112*7c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
1113*7c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
1114*7c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
1115*7c478bd9Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
1116*7c478bd9Sstevel@tonic-gate 			break;
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 		case RPC_INTR:
1119*7c478bd9Sstevel@tonic-gate 			/*
1120*7c478bd9Sstevel@tonic-gate 			 * There is no way to recover from this error,
1121*7c478bd9Sstevel@tonic-gate 			 * even if mount option nointr is specified.
1122*7c478bd9Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
1123*7c478bd9Sstevel@tonic-gate 			 */
1124*7c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
1125*7c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
1126*7c478bd9Sstevel@tonic-gate 			break;
1127*7c478bd9Sstevel@tonic-gate 
1128*7c478bd9Sstevel@tonic-gate 		case RPC_UDERROR:
1129*7c478bd9Sstevel@tonic-gate 			/*
1130*7c478bd9Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
1131*7c478bd9Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
1132*7c478bd9Sstevel@tonic-gate 			 * This is a retryable error, so we would
1133*7c478bd9Sstevel@tonic-gate 			 * loop, so check to see if the specific
1134*7c478bd9Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
1135*7c478bd9Sstevel@tonic-gate 			 * target did not exist at all.  If so,
1136*7c478bd9Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
1137*7c478bd9Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
1138*7c478bd9Sstevel@tonic-gate 			 */
1139*7c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
1140*7c478bd9Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
1141*7c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
1142*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
1143*7c478bd9Sstevel@tonic-gate 				break;
1144*7c478bd9Sstevel@tonic-gate 			}
1145*7c478bd9Sstevel@tonic-gate 			/*FALLTHROUGH*/
1146*7c478bd9Sstevel@tonic-gate 
1147*7c478bd9Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
1148*7c478bd9Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
1149*7c478bd9Sstevel@tonic-gate 				break;
1150*7c478bd9Sstevel@tonic-gate 
1151*7c478bd9Sstevel@tonic-gate 			/*
1152*7c478bd9Sstevel@tonic-gate 			 * increment server not responding count
1153*7c478bd9Sstevel@tonic-gate 			 */
1154*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1155*7c478bd9Sstevel@tonic-gate 			mi->mi_noresponse++;
1156*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1157*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1158*7c478bd9Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
1159*7c478bd9Sstevel@tonic-gate #endif
1160*7c478bd9Sstevel@tonic-gate 
1161*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
1162*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
1163*7c478bd9Sstevel@tonic-gate 				    (mi->mi_ss_call_type[which] == 0))
1164*7c478bd9Sstevel@tonic-gate 					break;
1165*7c478bd9Sstevel@tonic-gate 			}
1166*7c478bd9Sstevel@tonic-gate 
1167*7c478bd9Sstevel@tonic-gate 			/*
1168*7c478bd9Sstevel@tonic-gate 			 * The call is in progress (over COTS).
1169*7c478bd9Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
1170*7c478bd9Sstevel@tonic-gate 			 * print a noisy error message.
1171*7c478bd9Sstevel@tonic-gate 			 */
1172*7c478bd9Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
1173*7c478bd9Sstevel@tonic-gate 				tryagain = TRUE;
1174*7c478bd9Sstevel@tonic-gate 				break;
1175*7c478bd9Sstevel@tonic-gate 			}
1176*7c478bd9Sstevel@tonic-gate 
1177*7c478bd9Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
1178*7c478bd9Sstevel@tonic-gate 				break;
1179*7c478bd9Sstevel@tonic-gate 
1180*7c478bd9Sstevel@tonic-gate 			/*
1181*7c478bd9Sstevel@tonic-gate 			 * On zone shutdown, just move on.
1182*7c478bd9Sstevel@tonic-gate 			 */
1183*7c478bd9Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
1184*7c478bd9Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
1185*7c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
1186*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
1187*7c478bd9Sstevel@tonic-gate 				break;
1188*7c478bd9Sstevel@tonic-gate 			}
1189*7c478bd9Sstevel@tonic-gate 
1190*7c478bd9Sstevel@tonic-gate 			/*
1191*7c478bd9Sstevel@tonic-gate 			 * NFS client failover support
1192*7c478bd9Sstevel@tonic-gate 			 *
1193*7c478bd9Sstevel@tonic-gate 			 * If the current server just failed us, we'll
1194*7c478bd9Sstevel@tonic-gate 			 * start the process of finding a new server.
1195*7c478bd9Sstevel@tonic-gate 			 * After that, we can just retry.
1196*7c478bd9Sstevel@tonic-gate 			 */
1197*7c478bd9Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
1198*7c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
1199*7c478bd9Sstevel@tonic-gate 					failover_newserver(mi);
1200*7c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
1201*7c478bd9Sstevel@tonic-gate 				goto failoverretry;
1202*7c478bd9Sstevel@tonic-gate 			}
1203*7c478bd9Sstevel@tonic-gate 
1204*7c478bd9Sstevel@tonic-gate 			tryagain = TRUE;
1205*7c478bd9Sstevel@tonic-gate 			timeo = backoff(timeo);
1206*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1207*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
1208*7c478bd9Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
1209*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1210*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1211*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
1212*7c478bd9Sstevel@tonic-gate 			"NFS%d server %s not responding still trying\n",
1213*7c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
1214*7c478bd9Sstevel@tonic-gate #else
1215*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
1216*7c478bd9Sstevel@tonic-gate 			"NFS server %s not responding still trying\n",
1217*7c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
1218*7c478bd9Sstevel@tonic-gate #endif
1219*7c478bd9Sstevel@tonic-gate 			} else
1220*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1221*7c478bd9Sstevel@tonic-gate 			if (*douprintf && curproc->p_sessp->s_vp != NULL) {
1222*7c478bd9Sstevel@tonic-gate 				*douprintf = 0;
1223*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
1224*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1225*7c478bd9Sstevel@tonic-gate 					uprintf(
1226*7c478bd9Sstevel@tonic-gate 			    "NFS%d server %s not responding still trying\n",
1227*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
1228*7c478bd9Sstevel@tonic-gate #else
1229*7c478bd9Sstevel@tonic-gate 					uprintf(
1230*7c478bd9Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
1231*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
1232*7c478bd9Sstevel@tonic-gate #endif
1233*7c478bd9Sstevel@tonic-gate 			}
1234*7c478bd9Sstevel@tonic-gate 
1235*7c478bd9Sstevel@tonic-gate 			/*
1236*7c478bd9Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
1237*7c478bd9Sstevel@tonic-gate 			 * size and if it's a read or write call
1238*7c478bd9Sstevel@tonic-gate 			 * and if the transfer size changed while
1239*7c478bd9Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
1240*7c478bd9Sstevel@tonic-gate 			 * changed the transfer size,
1241*7c478bd9Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
1242*7c478bd9Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
1243*7c478bd9Sstevel@tonic-gate 			 */
1244*7c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
1245*7c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
1246*7c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
1247*7c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
1248*7c478bd9Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
1249*7c478bd9Sstevel@tonic-gate 				/*
1250*7c478bd9Sstevel@tonic-gate 				 * On read or write calls, return
1251*7c478bd9Sstevel@tonic-gate 				 * back to the vnode ops level if
1252*7c478bd9Sstevel@tonic-gate 				 * the transfer size changed.
1253*7c478bd9Sstevel@tonic-gate 				 */
1254*7c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
1255*7c478bd9Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
1256*7c478bd9Sstevel@tonic-gate 			}
1257*7c478bd9Sstevel@tonic-gate 		}
1258*7c478bd9Sstevel@tonic-gate 	} while (tryagain);
1259*7c478bd9Sstevel@tonic-gate 
1260*7c478bd9Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
1261*7c478bd9Sstevel@tonic-gate 		/*
1262*7c478bd9Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
1263*7c478bd9Sstevel@tonic-gate 		 */
1264*7c478bd9Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
1265*7c478bd9Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
1266*7c478bd9Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
1267*7c478bd9Sstevel@tonic-gate 		if (status != RPC_INTR) {
1268*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1269*7c478bd9Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
1270*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1271*7c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
1272*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1273*7c478bd9Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
1274*7c478bd9Sstevel@tonic-gate 			zprintf(zoneid, "NFS%d %s failed for %s\n",
1275*7c478bd9Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
1276*7c478bd9Sstevel@tonic-gate 			if (curproc->p_sessp->s_vp != NULL) {
1277*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
1278*7c478bd9Sstevel@tonic-gate 					uprintf("NFS%d %s failed for %s\n",
1279*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_rfsnames[which],
1280*7c478bd9Sstevel@tonic-gate 					    bufp);
1281*7c478bd9Sstevel@tonic-gate 				}
1282*7c478bd9Sstevel@tonic-gate 			}
1283*7c478bd9Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
1284*7c478bd9Sstevel@tonic-gate #else
1285*7c478bd9Sstevel@tonic-gate 			zprintf(zoneid,
1286*7c478bd9Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
1287*7c478bd9Sstevel@tonic-gate 			    mi->mi_rfsnames[which], svp->sv_hostname,
1288*7c478bd9Sstevel@tonic-gate 			    status, clnt_sperrno(status));
1289*7c478bd9Sstevel@tonic-gate 			if (curproc->p_sessp->s_vp != NULL) {
1290*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
1291*7c478bd9Sstevel@tonic-gate 					uprintf(
1292*7c478bd9Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
1293*7c478bd9Sstevel@tonic-gate 					    mi->mi_rfsnames[which],
1294*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname, status,
1295*7c478bd9Sstevel@tonic-gate 					    clnt_sperrno(status));
1296*7c478bd9Sstevel@tonic-gate 				}
1297*7c478bd9Sstevel@tonic-gate 			}
1298*7c478bd9Sstevel@tonic-gate #endif
1299*7c478bd9Sstevel@tonic-gate 			/*
1300*7c478bd9Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
1301*7c478bd9Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
1302*7c478bd9Sstevel@tonic-gate 			 * the authentication error
1303*7c478bd9Sstevel@tonic-gate 			 */
1304*7c478bd9Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
1305*7c478bd9Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
1306*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
1307*7c478bd9Sstevel@tonic-gate 		}
1308*7c478bd9Sstevel@tonic-gate 	} else {
1309*7c478bd9Sstevel@tonic-gate 		/*
1310*7c478bd9Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
1311*7c478bd9Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
1312*7c478bd9Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
1313*7c478bd9Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
1314*7c478bd9Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
1315*7c478bd9Sstevel@tonic-gate 		 * This was just complete overhead.
1316*7c478bd9Sstevel@tonic-gate 		 */
1317*7c478bd9Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
1318*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1319*7c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
1320*7c478bd9Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
1321*7c478bd9Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
1322*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1323*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1324*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1325*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS%d server %s ok\n",
1326*7c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
1327*7c478bd9Sstevel@tonic-gate #else
1328*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1329*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
1330*7c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
1331*7c478bd9Sstevel@tonic-gate #endif
1332*7c478bd9Sstevel@tonic-gate 			} else
1333*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1334*7c478bd9Sstevel@tonic-gate 		}
1335*7c478bd9Sstevel@tonic-gate 
1336*7c478bd9Sstevel@tonic-gate 		if (*douprintf == 0) {
1337*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
1338*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1339*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1340*7c478bd9Sstevel@tonic-gate 					uprintf("NFS%d server %s ok\n",
1341*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
1342*7c478bd9Sstevel@tonic-gate #else
1343*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1344*7c478bd9Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
1345*7c478bd9Sstevel@tonic-gate #endif
1346*7c478bd9Sstevel@tonic-gate 			*douprintf = 1;
1347*7c478bd9Sstevel@tonic-gate 		}
1348*7c478bd9Sstevel@tonic-gate 	}
1349*7c478bd9Sstevel@tonic-gate 
1350*7c478bd9Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
1351*7c478bd9Sstevel@tonic-gate 
1352*7c478bd9Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1353*7c478bd9Sstevel@tonic-gate 
1354*7c478bd9Sstevel@tonic-gate 	if (rpc_status != NULL)
1355*7c478bd9Sstevel@tonic-gate 		*rpc_status = rpcerr.re_status;
1356*7c478bd9Sstevel@tonic-gate 
1357*7c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
1358*7c478bd9Sstevel@tonic-gate 	    rpcerr.re_errno);
1359*7c478bd9Sstevel@tonic-gate 
1360*7c478bd9Sstevel@tonic-gate 	return (rpcerr.re_errno);
1361*7c478bd9Sstevel@tonic-gate }
1362*7c478bd9Sstevel@tonic-gate 
1363*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1364*7c478bd9Sstevel@tonic-gate static int acl2call_hits = 0;
1365*7c478bd9Sstevel@tonic-gate static int acl2call_misses = 0;
1366*7c478bd9Sstevel@tonic-gate #endif
1367*7c478bd9Sstevel@tonic-gate 
1368*7c478bd9Sstevel@tonic-gate int
1369*7c478bd9Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1370*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
1371*7c478bd9Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
1372*7c478bd9Sstevel@tonic-gate {
1373*7c478bd9Sstevel@tonic-gate 	int rpcerror;
1374*7c478bd9Sstevel@tonic-gate 
1375*7c478bd9Sstevel@tonic-gate 	rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
1376*7c478bd9Sstevel@tonic-gate 	    cr, douprintf, flags, fi);
1377*7c478bd9Sstevel@tonic-gate 	if (!rpcerror) {
1378*7c478bd9Sstevel@tonic-gate 		/*
1379*7c478bd9Sstevel@tonic-gate 		 * See comments with crnetadjust().
1380*7c478bd9Sstevel@tonic-gate 		 */
1381*7c478bd9Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
1382*7c478bd9Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
1383*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1384*7c478bd9Sstevel@tonic-gate 			acl2call_hits++;
1385*7c478bd9Sstevel@tonic-gate #endif
1386*7c478bd9Sstevel@tonic-gate 			rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres,
1387*7c478bd9Sstevel@tonic-gate 			    resp, cr, douprintf, flags, fi);
1388*7c478bd9Sstevel@tonic-gate 			crfree(cr);
1389*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1390*7c478bd9Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
1391*7c478bd9Sstevel@tonic-gate 				acl2call_misses++;
1392*7c478bd9Sstevel@tonic-gate #endif
1393*7c478bd9Sstevel@tonic-gate 		}
1394*7c478bd9Sstevel@tonic-gate 	}
1395*7c478bd9Sstevel@tonic-gate 
1396*7c478bd9Sstevel@tonic-gate 	return (rpcerror);
1397*7c478bd9Sstevel@tonic-gate }
1398*7c478bd9Sstevel@tonic-gate 
1399*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1400*7c478bd9Sstevel@tonic-gate static int acl3call_hits = 0;
1401*7c478bd9Sstevel@tonic-gate static int acl3call_misses = 0;
1402*7c478bd9Sstevel@tonic-gate #endif
1403*7c478bd9Sstevel@tonic-gate 
1404*7c478bd9Sstevel@tonic-gate int
1405*7c478bd9Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1406*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
1407*7c478bd9Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
1408*7c478bd9Sstevel@tonic-gate {
1409*7c478bd9Sstevel@tonic-gate 	int rpcerror;
1410*7c478bd9Sstevel@tonic-gate 	int user_informed;
1411*7c478bd9Sstevel@tonic-gate 
1412*7c478bd9Sstevel@tonic-gate 	user_informed = 0;
1413*7c478bd9Sstevel@tonic-gate 
1414*7c478bd9Sstevel@tonic-gate 	do {
1415*7c478bd9Sstevel@tonic-gate 		rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
1416*7c478bd9Sstevel@tonic-gate 		    cr, douprintf, flags, fi);
1417*7c478bd9Sstevel@tonic-gate 		if (!rpcerror) {
1418*7c478bd9Sstevel@tonic-gate 			cred_t *crr;
1419*7c478bd9Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
1420*7c478bd9Sstevel@tonic-gate 				if (!user_informed) {
1421*7c478bd9Sstevel@tonic-gate 					user_informed = 1;
1422*7c478bd9Sstevel@tonic-gate 					uprintf(
1423*7c478bd9Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
1424*7c478bd9Sstevel@tonic-gate 				}
1425*7c478bd9Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
1426*7c478bd9Sstevel@tonic-gate 			}
1427*7c478bd9Sstevel@tonic-gate 			/*
1428*7c478bd9Sstevel@tonic-gate 			 * See crnetadjust() for comments.
1429*7c478bd9Sstevel@tonic-gate 			 */
1430*7c478bd9Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
1431*7c478bd9Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
1432*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1433*7c478bd9Sstevel@tonic-gate 				acl3call_hits++;
1434*7c478bd9Sstevel@tonic-gate #endif
1435*7c478bd9Sstevel@tonic-gate 				rpcerror = aclcall(mi, which, xdrargs, argsp,
1436*7c478bd9Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf, flags, fi);
1437*7c478bd9Sstevel@tonic-gate 
1438*7c478bd9Sstevel@tonic-gate 				crfree(crr);
1439*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1440*7c478bd9Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
1441*7c478bd9Sstevel@tonic-gate 					acl3call_misses++;
1442*7c478bd9Sstevel@tonic-gate #endif
1443*7c478bd9Sstevel@tonic-gate 			}
1444*7c478bd9Sstevel@tonic-gate 		}
1445*7c478bd9Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
1446*7c478bd9Sstevel@tonic-gate 
1447*7c478bd9Sstevel@tonic-gate 	return (rpcerror);
1448*7c478bd9Sstevel@tonic-gate }
1449*7c478bd9Sstevel@tonic-gate 
1450*7c478bd9Sstevel@tonic-gate static int
1451*7c478bd9Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1452*7c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
1453*7c478bd9Sstevel@tonic-gate     int flags, failinfo_t *fi)
1454*7c478bd9Sstevel@tonic-gate {
1455*7c478bd9Sstevel@tonic-gate 	CLIENT *client;
1456*7c478bd9Sstevel@tonic-gate 	struct chtab *ch;
1457*7c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
1458*7c478bd9Sstevel@tonic-gate 	struct rpc_err rpcerr;
1459*7c478bd9Sstevel@tonic-gate 	struct timeval wait;
1460*7c478bd9Sstevel@tonic-gate 	int timeo;		/* in units of hz */
1461*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1462*7c478bd9Sstevel@tonic-gate 	int my_rsize, my_wsize;
1463*7c478bd9Sstevel@tonic-gate #endif
1464*7c478bd9Sstevel@tonic-gate 	bool_t tryagain;
1465*7c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
1466*7c478bd9Sstevel@tonic-gate 	servinfo_t *svp;
1467*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
1468*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
1469*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1470*7c478bd9Sstevel@tonic-gate 	char *bufp;
1471*7c478bd9Sstevel@tonic-gate #endif
1472*7c478bd9Sstevel@tonic-gate 
1473*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1474*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
1475*7c478bd9Sstevel@tonic-gate 		"rfscall_start:which %d mi %p", which, mi);
1476*7c478bd9Sstevel@tonic-gate #endif
1477*7c478bd9Sstevel@tonic-gate 
1478*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
1479*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
1480*7c478bd9Sstevel@tonic-gate 
1481*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
1482*7c478bd9Sstevel@tonic-gate 	mi->mi_aclreqs[which].value.ui64++;
1483*7c478bd9Sstevel@tonic-gate 
1484*7c478bd9Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
1485*7c478bd9Sstevel@tonic-gate 
1486*7c478bd9Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
1487*7c478bd9Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
1488*7c478bd9Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
1489*7c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
1490*7c478bd9Sstevel@tonic-gate 	}
1491*7c478bd9Sstevel@tonic-gate 
1492*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1493*7c478bd9Sstevel@tonic-gate 	/*
1494*7c478bd9Sstevel@tonic-gate 	 * Remember the transfer sizes in case
1495*7c478bd9Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
1496*7c478bd9Sstevel@tonic-gate 	 */
1497*7c478bd9Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
1498*7c478bd9Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
1499*7c478bd9Sstevel@tonic-gate #endif
1500*7c478bd9Sstevel@tonic-gate 
1501*7c478bd9Sstevel@tonic-gate 	/*
1502*7c478bd9Sstevel@tonic-gate 	 * NFS client failover support
1503*7c478bd9Sstevel@tonic-gate 	 *
1504*7c478bd9Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
1505*7c478bd9Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
1506*7c478bd9Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
1507*7c478bd9Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
1508*7c478bd9Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
1509*7c478bd9Sstevel@tonic-gate 	 *
1510*7c478bd9Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
1511*7c478bd9Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
1512*7c478bd9Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
1513*7c478bd9Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
1514*7c478bd9Sstevel@tonic-gate 	 * to the wrong host.
1515*7c478bd9Sstevel@tonic-gate 	 */
1516*7c478bd9Sstevel@tonic-gate failoverretry:
1517*7c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
1518*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
1519*7c478bd9Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
1520*7c478bd9Sstevel@tonic-gate 			if (failover_wait(mi)) {
1521*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1522*7c478bd9Sstevel@tonic-gate 				return (EINTR);
1523*7c478bd9Sstevel@tonic-gate 			}
1524*7c478bd9Sstevel@tonic-gate 		}
1525*7c478bd9Sstevel@tonic-gate 		INC_READERS(mi);
1526*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
1527*7c478bd9Sstevel@tonic-gate 		if (fi) {
1528*7c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
1529*7c478bd9Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
1530*7c478bd9Sstevel@tonic-gate 				int remaperr;
1531*7c478bd9Sstevel@tonic-gate 
1532*7c478bd9Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
1533*7c478bd9Sstevel@tonic-gate 				remaperr = failover_remap(fi);
1534*7c478bd9Sstevel@tonic-gate 				if (remaperr != 0) {
1535*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1536*7c478bd9Sstevel@tonic-gate 					if (remaperr != EINTR)
1537*7c478bd9Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
1538*7c478bd9Sstevel@tonic-gate 					    "aclcall couldn't failover: %m");
1539*7c478bd9Sstevel@tonic-gate #endif
1540*7c478bd9Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
1541*7c478bd9Sstevel@tonic-gate 					DEC_READERS(mi);
1542*7c478bd9Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
1543*7c478bd9Sstevel@tonic-gate 
1544*7c478bd9Sstevel@tonic-gate 					/*
1545*7c478bd9Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
1546*7c478bd9Sstevel@tonic-gate 					 * and the filesystem is hard mounted
1547*7c478bd9Sstevel@tonic-gate 					 * we have to retry the call with a new
1548*7c478bd9Sstevel@tonic-gate 					 * server.
1549*7c478bd9Sstevel@tonic-gate 					 */
1550*7c478bd9Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
1551*7c478bd9Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
1552*7c478bd9Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
1553*7c478bd9Sstevel@tonic-gate 							failover_newserver(mi);
1554*7c478bd9Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
1555*7c478bd9Sstevel@tonic-gate 						goto failoverretry;
1556*7c478bd9Sstevel@tonic-gate 					}
1557*7c478bd9Sstevel@tonic-gate 					return (remaperr);
1558*7c478bd9Sstevel@tonic-gate 				}
1559*7c478bd9Sstevel@tonic-gate 			}
1560*7c478bd9Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
1561*7c478bd9Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
1562*7c478bd9Sstevel@tonic-gate 		}
1563*7c478bd9Sstevel@tonic-gate 	}
1564*7c478bd9Sstevel@tonic-gate 
1565*7c478bd9Sstevel@tonic-gate 	/*
1566*7c478bd9Sstevel@tonic-gate 	 * acl_clget() calls clnt_tli_kinit() which clears the xid, so we
1567*7c478bd9Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
1568*7c478bd9Sstevel@tonic-gate 	 */
1569*7c478bd9Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
1570*7c478bd9Sstevel@tonic-gate 	rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl);
1571*7c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
1572*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
1573*7c478bd9Sstevel@tonic-gate 		DEC_READERS(mi);
1574*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
1575*7c478bd9Sstevel@tonic-gate 
1576*7c478bd9Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
1577*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno == ECONNRESET) &&
1578*7c478bd9Sstevel@tonic-gate 				failover_safe(fi)) {
1579*7c478bd9Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
1580*7c478bd9Sstevel@tonic-gate 				failover_newserver(mi);
1581*7c478bd9Sstevel@tonic-gate 			goto failoverretry;
1582*7c478bd9Sstevel@tonic-gate 		}
1583*7c478bd9Sstevel@tonic-gate 	}
1584*7c478bd9Sstevel@tonic-gate 	if (rpcerr.re_errno != 0)
1585*7c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
1586*7c478bd9Sstevel@tonic-gate 
1587*7c478bd9Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
1588*7c478bd9Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
1589*7c478bd9Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
1590*7c478bd9Sstevel@tonic-gate 	} else {
1591*7c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
1592*7c478bd9Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
1593*7c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_acl_timer_type[which]]),
1594*7c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
1595*7c478bd9Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3,
1596*7c478bd9Sstevel@tonic-gate 		    (void (*)()) 0, (caddr_t)mi, 0);
1597*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
1598*7c478bd9Sstevel@tonic-gate 	}
1599*7c478bd9Sstevel@tonic-gate 
1600*7c478bd9Sstevel@tonic-gate 	/*
1601*7c478bd9Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
1602*7c478bd9Sstevel@tonic-gate 	 */
1603*7c478bd9Sstevel@tonic-gate 	do {
1604*7c478bd9Sstevel@tonic-gate 		tryagain = FALSE;
1605*7c478bd9Sstevel@tonic-gate 
1606*7c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
1607*7c478bd9Sstevel@tonic-gate 			status = RPC_FAILED;
1608*7c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
1609*7c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
1610*7c478bd9Sstevel@tonic-gate 			break;
1611*7c478bd9Sstevel@tonic-gate 		}
1612*7c478bd9Sstevel@tonic-gate 
1613*7c478bd9Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
1614*7c478bd9Sstevel@tonic-gate 
1615*7c478bd9Sstevel@tonic-gate 		/*
1616*7c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1617*7c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
1618*7c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
1619*7c478bd9Sstevel@tonic-gate 		 */
1620*7c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
1621*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
1622*7c478bd9Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
1623*7c478bd9Sstevel@tonic-gate 
1624*7c478bd9Sstevel@tonic-gate 		/*
1625*7c478bd9Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
1626*7c478bd9Sstevel@tonic-gate 		 * even trying to send out the request because we
1627*7c478bd9Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
1628*7c478bd9Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
1629*7c478bd9Sstevel@tonic-gate 		 */
1630*7c478bd9Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1631*7c478bd9Sstevel@tonic-gate 			status = RPC_INTR;
1632*7c478bd9Sstevel@tonic-gate 		else {
1633*7c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
1634*7c478bd9Sstevel@tonic-gate 			    xdrres, resp, wait);
1635*7c478bd9Sstevel@tonic-gate 		}
1636*7c478bd9Sstevel@tonic-gate 
1637*7c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
1638*7c478bd9Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
1639*7c478bd9Sstevel@tonic-gate 		/*
1640*7c478bd9Sstevel@tonic-gate 		 * restore original signal mask
1641*7c478bd9Sstevel@tonic-gate 		 */
1642*7c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
1643*7c478bd9Sstevel@tonic-gate 
1644*7c478bd9Sstevel@tonic-gate 		switch (status) {
1645*7c478bd9Sstevel@tonic-gate 		case RPC_SUCCESS:
1646*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1647*7c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
1648*7c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
1649*7c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
1650*7c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
1651*7c478bd9Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
1652*7c478bd9Sstevel@tonic-gate #endif
1653*7c478bd9Sstevel@tonic-gate 			break;
1654*7c478bd9Sstevel@tonic-gate 
1655*7c478bd9Sstevel@tonic-gate 		/*
1656*7c478bd9Sstevel@tonic-gate 		 * Unfortunately, there are servers in the world which
1657*7c478bd9Sstevel@tonic-gate 		 * are not coded correctly.  They are not prepared to
1658*7c478bd9Sstevel@tonic-gate 		 * handle RPC requests to the NFS port which are not
1659*7c478bd9Sstevel@tonic-gate 		 * NFS requests.  Thus, they may try to process the
1660*7c478bd9Sstevel@tonic-gate 		 * NFS_ACL request as if it were an NFS request.  This
1661*7c478bd9Sstevel@tonic-gate 		 * does not work.  Generally, an error will be generated
1662*7c478bd9Sstevel@tonic-gate 		 * on the client because it will not be able to decode
1663*7c478bd9Sstevel@tonic-gate 		 * the response from the server.  However, it seems
1664*7c478bd9Sstevel@tonic-gate 		 * possible that the server may not be able to decode
1665*7c478bd9Sstevel@tonic-gate 		 * the arguments.  Thus, the criteria for deciding
1666*7c478bd9Sstevel@tonic-gate 		 * whether the server supports NFS_ACL or not is whether
1667*7c478bd9Sstevel@tonic-gate 		 * the following RPC errors are returned from CLNT_CALL.
1668*7c478bd9Sstevel@tonic-gate 		 */
1669*7c478bd9Sstevel@tonic-gate 		case RPC_CANTDECODERES:
1670*7c478bd9Sstevel@tonic-gate 		case RPC_PROGUNAVAIL:
1671*7c478bd9Sstevel@tonic-gate 		case RPC_CANTDECODEARGS:
1672*7c478bd9Sstevel@tonic-gate 		case RPC_PROGVERSMISMATCH:
1673*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1674*7c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~(MI_ACL | MI_EXTATTR);
1675*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1676*7c478bd9Sstevel@tonic-gate 			break;
1677*7c478bd9Sstevel@tonic-gate 
1678*7c478bd9Sstevel@tonic-gate 		/*
1679*7c478bd9Sstevel@tonic-gate 		 * If the server supports NFS_ACL but not the new ops
1680*7c478bd9Sstevel@tonic-gate 		 * for extended attributes, make sure we don't retry.
1681*7c478bd9Sstevel@tonic-gate 		 */
1682*7c478bd9Sstevel@tonic-gate 		case RPC_PROCUNAVAIL:
1683*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1684*7c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_EXTATTR;
1685*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1686*7c478bd9Sstevel@tonic-gate 			break;
1687*7c478bd9Sstevel@tonic-gate 
1688*7c478bd9Sstevel@tonic-gate 		case RPC_INTR:
1689*7c478bd9Sstevel@tonic-gate 			/*
1690*7c478bd9Sstevel@tonic-gate 			 * There is no way to recover from this error,
1691*7c478bd9Sstevel@tonic-gate 			 * even if mount option nointr is specified.
1692*7c478bd9Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
1693*7c478bd9Sstevel@tonic-gate 			 */
1694*7c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
1695*7c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
1696*7c478bd9Sstevel@tonic-gate 			break;
1697*7c478bd9Sstevel@tonic-gate 
1698*7c478bd9Sstevel@tonic-gate 		case RPC_UDERROR:
1699*7c478bd9Sstevel@tonic-gate 			/*
1700*7c478bd9Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
1701*7c478bd9Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
1702*7c478bd9Sstevel@tonic-gate 			 * This is a retryable error, so we would
1703*7c478bd9Sstevel@tonic-gate 			 * loop, so check to see if the specific
1704*7c478bd9Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
1705*7c478bd9Sstevel@tonic-gate 			 * target did not exist at all.  If so,
1706*7c478bd9Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
1707*7c478bd9Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
1708*7c478bd9Sstevel@tonic-gate 			 */
1709*7c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
1710*7c478bd9Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
1711*7c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
1712*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
1713*7c478bd9Sstevel@tonic-gate 				break;
1714*7c478bd9Sstevel@tonic-gate 			}
1715*7c478bd9Sstevel@tonic-gate 			/*FALLTHROUGH*/
1716*7c478bd9Sstevel@tonic-gate 
1717*7c478bd9Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
1718*7c478bd9Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
1719*7c478bd9Sstevel@tonic-gate 				break;
1720*7c478bd9Sstevel@tonic-gate 
1721*7c478bd9Sstevel@tonic-gate 			/*
1722*7c478bd9Sstevel@tonic-gate 			 * increment server not responding count
1723*7c478bd9Sstevel@tonic-gate 			 */
1724*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1725*7c478bd9Sstevel@tonic-gate 			mi->mi_noresponse++;
1726*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1727*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1728*7c478bd9Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
1729*7c478bd9Sstevel@tonic-gate #endif
1730*7c478bd9Sstevel@tonic-gate 
1731*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
1732*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
1733*7c478bd9Sstevel@tonic-gate 				    (mi->mi_acl_ss_call_type[which] == 0))
1734*7c478bd9Sstevel@tonic-gate 					break;
1735*7c478bd9Sstevel@tonic-gate 			}
1736*7c478bd9Sstevel@tonic-gate 
1737*7c478bd9Sstevel@tonic-gate 			/*
1738*7c478bd9Sstevel@tonic-gate 			 * The call is in progress (over COTS).
1739*7c478bd9Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
1740*7c478bd9Sstevel@tonic-gate 			 * print a noisy error message.
1741*7c478bd9Sstevel@tonic-gate 			 */
1742*7c478bd9Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
1743*7c478bd9Sstevel@tonic-gate 				tryagain = TRUE;
1744*7c478bd9Sstevel@tonic-gate 				break;
1745*7c478bd9Sstevel@tonic-gate 			}
1746*7c478bd9Sstevel@tonic-gate 
1747*7c478bd9Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
1748*7c478bd9Sstevel@tonic-gate 				break;
1749*7c478bd9Sstevel@tonic-gate 
1750*7c478bd9Sstevel@tonic-gate 			/*
1751*7c478bd9Sstevel@tonic-gate 			 * On zone shutdown, just move on.
1752*7c478bd9Sstevel@tonic-gate 			 */
1753*7c478bd9Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
1754*7c478bd9Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
1755*7c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
1756*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
1757*7c478bd9Sstevel@tonic-gate 				break;
1758*7c478bd9Sstevel@tonic-gate 			}
1759*7c478bd9Sstevel@tonic-gate 
1760*7c478bd9Sstevel@tonic-gate 			/*
1761*7c478bd9Sstevel@tonic-gate 			 * NFS client failover support
1762*7c478bd9Sstevel@tonic-gate 			 *
1763*7c478bd9Sstevel@tonic-gate 			 * If the current server just failed us, we'll
1764*7c478bd9Sstevel@tonic-gate 			 * start the process of finding a new server.
1765*7c478bd9Sstevel@tonic-gate 			 * After that, we can just retry.
1766*7c478bd9Sstevel@tonic-gate 			 */
1767*7c478bd9Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
1768*7c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
1769*7c478bd9Sstevel@tonic-gate 					failover_newserver(mi);
1770*7c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
1771*7c478bd9Sstevel@tonic-gate 				goto failoverretry;
1772*7c478bd9Sstevel@tonic-gate 			}
1773*7c478bd9Sstevel@tonic-gate 
1774*7c478bd9Sstevel@tonic-gate 			tryagain = TRUE;
1775*7c478bd9Sstevel@tonic-gate 			timeo = backoff(timeo);
1776*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1777*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
1778*7c478bd9Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
1779*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1780*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1781*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
1782*7c478bd9Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
1783*7c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
1784*7c478bd9Sstevel@tonic-gate #else
1785*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
1786*7c478bd9Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
1787*7c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
1788*7c478bd9Sstevel@tonic-gate #endif
1789*7c478bd9Sstevel@tonic-gate 			} else
1790*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1791*7c478bd9Sstevel@tonic-gate 			if (*douprintf && curproc->p_sessp->s_vp != NULL) {
1792*7c478bd9Sstevel@tonic-gate 				*douprintf = 0;
1793*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
1794*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1795*7c478bd9Sstevel@tonic-gate 					uprintf(
1796*7c478bd9Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
1797*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
1798*7c478bd9Sstevel@tonic-gate #else
1799*7c478bd9Sstevel@tonic-gate 					uprintf(
1800*7c478bd9Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
1801*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
1802*7c478bd9Sstevel@tonic-gate #endif
1803*7c478bd9Sstevel@tonic-gate 			}
1804*7c478bd9Sstevel@tonic-gate 
1805*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1806*7c478bd9Sstevel@tonic-gate 			/*
1807*7c478bd9Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
1808*7c478bd9Sstevel@tonic-gate 			 * size and if it's a read or write call
1809*7c478bd9Sstevel@tonic-gate 			 * and if the transfer size changed while
1810*7c478bd9Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
1811*7c478bd9Sstevel@tonic-gate 			 * changed the transfer size,
1812*7c478bd9Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
1813*7c478bd9Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
1814*7c478bd9Sstevel@tonic-gate 			 */
1815*7c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
1816*7c478bd9Sstevel@tonic-gate 			    mi->mi_acl_timer_type[which] != 0 &&
1817*7c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
1818*7c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
1819*7c478bd9Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
1820*7c478bd9Sstevel@tonic-gate 				/*
1821*7c478bd9Sstevel@tonic-gate 				 * On read or write calls, return
1822*7c478bd9Sstevel@tonic-gate 				 * back to the vnode ops level if
1823*7c478bd9Sstevel@tonic-gate 				 * the transfer size changed.
1824*7c478bd9Sstevel@tonic-gate 				 */
1825*7c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
1826*7c478bd9Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
1827*7c478bd9Sstevel@tonic-gate 			}
1828*7c478bd9Sstevel@tonic-gate #endif
1829*7c478bd9Sstevel@tonic-gate 		}
1830*7c478bd9Sstevel@tonic-gate 	} while (tryagain);
1831*7c478bd9Sstevel@tonic-gate 
1832*7c478bd9Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
1833*7c478bd9Sstevel@tonic-gate 		/*
1834*7c478bd9Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
1835*7c478bd9Sstevel@tonic-gate 		 */
1836*7c478bd9Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
1837*7c478bd9Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
1838*7c478bd9Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
1839*7c478bd9Sstevel@tonic-gate 		if (status == RPC_CANTDECODERES ||
1840*7c478bd9Sstevel@tonic-gate 		    status == RPC_PROGUNAVAIL ||
1841*7c478bd9Sstevel@tonic-gate 		    status == RPC_PROCUNAVAIL ||
1842*7c478bd9Sstevel@tonic-gate 		    status == RPC_CANTDECODEARGS ||
1843*7c478bd9Sstevel@tonic-gate 		    status == RPC_PROGVERSMISMATCH)
1844*7c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
1845*7c478bd9Sstevel@tonic-gate 		else if (status != RPC_INTR) {
1846*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1847*7c478bd9Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
1848*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
1849*7c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
1850*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1851*7c478bd9Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
1852*7c478bd9Sstevel@tonic-gate 			zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
1853*7c478bd9Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_aclnames[which], bufp);
1854*7c478bd9Sstevel@tonic-gate 			if (curproc->p_sessp->s_vp != NULL) {
1855*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
1856*7c478bd9Sstevel@tonic-gate 					uprintf("NFS_ACL%d %s failed for %s\n",
1857*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_aclnames[which],
1858*7c478bd9Sstevel@tonic-gate 					    bufp);
1859*7c478bd9Sstevel@tonic-gate 				}
1860*7c478bd9Sstevel@tonic-gate 			}
1861*7c478bd9Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
1862*7c478bd9Sstevel@tonic-gate #else
1863*7c478bd9Sstevel@tonic-gate 			zprintf(zoneid,
1864*7c478bd9Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
1865*7c478bd9Sstevel@tonic-gate 			    mi->mi_aclnames[which], svp->sv_hostname,
1866*7c478bd9Sstevel@tonic-gate 			    status, clnt_sperrno(status));
1867*7c478bd9Sstevel@tonic-gate 			if (curproc->p_sessp->s_vp != NULL) {
1868*7c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
1869*7c478bd9Sstevel@tonic-gate 					uprintf(
1870*7c478bd9Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
1871*7c478bd9Sstevel@tonic-gate 					    mi->mi_aclnames[which],
1872*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname, status,
1873*7c478bd9Sstevel@tonic-gate 					    clnt_sperrno(status));
1874*7c478bd9Sstevel@tonic-gate 			}
1875*7c478bd9Sstevel@tonic-gate #endif
1876*7c478bd9Sstevel@tonic-gate 			/*
1877*7c478bd9Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
1878*7c478bd9Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
1879*7c478bd9Sstevel@tonic-gate 			 * the authentication error
1880*7c478bd9Sstevel@tonic-gate 			 */
1881*7c478bd9Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
1882*7c478bd9Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
1883*7c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
1884*7c478bd9Sstevel@tonic-gate 		}
1885*7c478bd9Sstevel@tonic-gate 	} else {
1886*7c478bd9Sstevel@tonic-gate 		/*
1887*7c478bd9Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
1888*7c478bd9Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
1889*7c478bd9Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
1890*7c478bd9Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
1891*7c478bd9Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
1892*7c478bd9Sstevel@tonic-gate 		 * This was just complete overhead.
1893*7c478bd9Sstevel@tonic-gate 		 */
1894*7c478bd9Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
1895*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
1896*7c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
1897*7c478bd9Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
1898*7c478bd9Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
1899*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1900*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1901*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS_ACL%d server %s ok\n",
1902*7c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
1903*7c478bd9Sstevel@tonic-gate #else
1904*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
1905*7c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
1906*7c478bd9Sstevel@tonic-gate #endif
1907*7c478bd9Sstevel@tonic-gate 			} else
1908*7c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
1909*7c478bd9Sstevel@tonic-gate 		}
1910*7c478bd9Sstevel@tonic-gate 
1911*7c478bd9Sstevel@tonic-gate 		if (*douprintf == 0) {
1912*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
1913*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1914*7c478bd9Sstevel@tonic-gate 				uprintf("NFS_ACL%d server %s ok\n",
1915*7c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
1916*7c478bd9Sstevel@tonic-gate #else
1917*7c478bd9Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
1918*7c478bd9Sstevel@tonic-gate #endif
1919*7c478bd9Sstevel@tonic-gate 			*douprintf = 1;
1920*7c478bd9Sstevel@tonic-gate 		}
1921*7c478bd9Sstevel@tonic-gate 	}
1922*7c478bd9Sstevel@tonic-gate 
1923*7c478bd9Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
1924*7c478bd9Sstevel@tonic-gate 
1925*7c478bd9Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1926*7c478bd9Sstevel@tonic-gate 
1927*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */
1928*7c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
1929*7c478bd9Sstevel@tonic-gate 	    rpcerr.re_errno);
1930*7c478bd9Sstevel@tonic-gate #endif
1931*7c478bd9Sstevel@tonic-gate 
1932*7c478bd9Sstevel@tonic-gate 	return (rpcerr.re_errno);
1933*7c478bd9Sstevel@tonic-gate }
1934*7c478bd9Sstevel@tonic-gate 
1935*7c478bd9Sstevel@tonic-gate int
1936*7c478bd9Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa)
1937*7c478bd9Sstevel@tonic-gate {
1938*7c478bd9Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
1939*7c478bd9Sstevel@tonic-gate 
1940*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MODE))
1941*7c478bd9Sstevel@tonic-gate 		sa->sa_mode = (uint32_t)-1;
1942*7c478bd9Sstevel@tonic-gate 	else
1943*7c478bd9Sstevel@tonic-gate 		sa->sa_mode = vap->va_mode;
1944*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_UID))
1945*7c478bd9Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)-1;
1946*7c478bd9Sstevel@tonic-gate 	else
1947*7c478bd9Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)vap->va_uid;
1948*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_GID))
1949*7c478bd9Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)-1;
1950*7c478bd9Sstevel@tonic-gate 	else
1951*7c478bd9Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)vap->va_gid;
1952*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
1953*7c478bd9Sstevel@tonic-gate 		sa->sa_size = (uint32_t)-1;
1954*7c478bd9Sstevel@tonic-gate 	else
1955*7c478bd9Sstevel@tonic-gate 		sa->sa_size = (uint32_t)vap->va_size;
1956*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
1957*7c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1;
1958*7c478bd9Sstevel@tonic-gate 	else {
1959*7c478bd9Sstevel@tonic-gate 		/* check time validity */
1960*7c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
1961*7c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
1962*7c478bd9Sstevel@tonic-gate 		}
1963*7c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_sec = vap->va_atime.tv_sec;
1964*7c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
1965*7c478bd9Sstevel@tonic-gate 	}
1966*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
1967*7c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1;
1968*7c478bd9Sstevel@tonic-gate 	else {
1969*7c478bd9Sstevel@tonic-gate 		/* check time validity */
1970*7c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
1971*7c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
1972*7c478bd9Sstevel@tonic-gate 		}
1973*7c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec;
1974*7c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
1975*7c478bd9Sstevel@tonic-gate 	}
1976*7c478bd9Sstevel@tonic-gate 	return (0);
1977*7c478bd9Sstevel@tonic-gate }
1978*7c478bd9Sstevel@tonic-gate 
1979*7c478bd9Sstevel@tonic-gate int
1980*7c478bd9Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa)
1981*7c478bd9Sstevel@tonic-gate {
1982*7c478bd9Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
1983*7c478bd9Sstevel@tonic-gate 
1984*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MODE))
1985*7c478bd9Sstevel@tonic-gate 		sa->mode.set_it = FALSE;
1986*7c478bd9Sstevel@tonic-gate 	else {
1987*7c478bd9Sstevel@tonic-gate 		sa->mode.set_it = TRUE;
1988*7c478bd9Sstevel@tonic-gate 		sa->mode.mode = (mode3)vap->va_mode;
1989*7c478bd9Sstevel@tonic-gate 	}
1990*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_UID))
1991*7c478bd9Sstevel@tonic-gate 		sa->uid.set_it = FALSE;
1992*7c478bd9Sstevel@tonic-gate 	else {
1993*7c478bd9Sstevel@tonic-gate 		sa->uid.set_it = TRUE;
1994*7c478bd9Sstevel@tonic-gate 		sa->uid.uid = (uid3)vap->va_uid;
1995*7c478bd9Sstevel@tonic-gate 	}
1996*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_GID))
1997*7c478bd9Sstevel@tonic-gate 		sa->gid.set_it = FALSE;
1998*7c478bd9Sstevel@tonic-gate 	else {
1999*7c478bd9Sstevel@tonic-gate 		sa->gid.set_it = TRUE;
2000*7c478bd9Sstevel@tonic-gate 		sa->gid.gid = (gid3)vap->va_gid;
2001*7c478bd9Sstevel@tonic-gate 	}
2002*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
2003*7c478bd9Sstevel@tonic-gate 		sa->size.set_it = FALSE;
2004*7c478bd9Sstevel@tonic-gate 	else {
2005*7c478bd9Sstevel@tonic-gate 		sa->size.set_it = TRUE;
2006*7c478bd9Sstevel@tonic-gate 		sa->size.size = (size3)vap->va_size;
2007*7c478bd9Sstevel@tonic-gate 	}
2008*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
2009*7c478bd9Sstevel@tonic-gate 		sa->atime.set_it = DONT_CHANGE;
2010*7c478bd9Sstevel@tonic-gate 	else {
2011*7c478bd9Sstevel@tonic-gate 		/* check time validity */
2012*7c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
2013*7c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
2014*7c478bd9Sstevel@tonic-gate 		}
2015*7c478bd9Sstevel@tonic-gate 		sa->atime.set_it = SET_TO_CLIENT_TIME;
2016*7c478bd9Sstevel@tonic-gate 		sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec;
2017*7c478bd9Sstevel@tonic-gate 		sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec;
2018*7c478bd9Sstevel@tonic-gate 	}
2019*7c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
2020*7c478bd9Sstevel@tonic-gate 		sa->mtime.set_it = DONT_CHANGE;
2021*7c478bd9Sstevel@tonic-gate 	else {
2022*7c478bd9Sstevel@tonic-gate 		/* check time validity */
2023*7c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
2024*7c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
2025*7c478bd9Sstevel@tonic-gate 		}
2026*7c478bd9Sstevel@tonic-gate 		sa->mtime.set_it = SET_TO_CLIENT_TIME;
2027*7c478bd9Sstevel@tonic-gate 		sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec;
2028*7c478bd9Sstevel@tonic-gate 		sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec;
2029*7c478bd9Sstevel@tonic-gate 	}
2030*7c478bd9Sstevel@tonic-gate 	return (0);
2031*7c478bd9Sstevel@tonic-gate }
2032*7c478bd9Sstevel@tonic-gate 
2033*7c478bd9Sstevel@tonic-gate void
2034*7c478bd9Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp)
2035*7c478bd9Sstevel@tonic-gate {
2036*7c478bd9Sstevel@tonic-gate 
2037*7c478bd9Sstevel@tonic-gate 	da->da_fhandle = VTOFH(dvp);
2038*7c478bd9Sstevel@tonic-gate 	da->da_name = nm;
2039*7c478bd9Sstevel@tonic-gate 	da->da_flags = 0;
2040*7c478bd9Sstevel@tonic-gate }
2041*7c478bd9Sstevel@tonic-gate 
2042*7c478bd9Sstevel@tonic-gate void
2043*7c478bd9Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp)
2044*7c478bd9Sstevel@tonic-gate {
2045*7c478bd9Sstevel@tonic-gate 
2046*7c478bd9Sstevel@tonic-gate 	da->dirp = VTOFH3(dvp);
2047*7c478bd9Sstevel@tonic-gate 	da->name = nm;
2048*7c478bd9Sstevel@tonic-gate }
2049*7c478bd9Sstevel@tonic-gate 
2050*7c478bd9Sstevel@tonic-gate int
2051*7c478bd9Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr)
2052*7c478bd9Sstevel@tonic-gate {
2053*7c478bd9Sstevel@tonic-gate 	int error;
2054*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2055*7c478bd9Sstevel@tonic-gate 	struct vattr va;
2056*7c478bd9Sstevel@tonic-gate 
2057*7c478bd9Sstevel@tonic-gate 	va.va_mask = AT_MODE | AT_GID;
2058*7c478bd9Sstevel@tonic-gate 	error = VOP_GETATTR(dvp, &va, 0, cr);
2059*7c478bd9Sstevel@tonic-gate 	if (error)
2060*7c478bd9Sstevel@tonic-gate 		return (error);
2061*7c478bd9Sstevel@tonic-gate 
2062*7c478bd9Sstevel@tonic-gate 	/*
2063*7c478bd9Sstevel@tonic-gate 	 * To determine the expected group-id of the created file:
2064*7c478bd9Sstevel@tonic-gate 	 *  1)	If the filesystem was not mounted with the Old-BSD-compatible
2065*7c478bd9Sstevel@tonic-gate 	 *	GRPID option, and the directory's set-gid bit is clear,
2066*7c478bd9Sstevel@tonic-gate 	 *	then use the process's gid.
2067*7c478bd9Sstevel@tonic-gate 	 *  2)	Otherwise, set the group-id to the gid of the parent directory.
2068*7c478bd9Sstevel@tonic-gate 	 */
2069*7c478bd9Sstevel@tonic-gate 	rp = VTOR(dvp);
2070*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2071*7c478bd9Sstevel@tonic-gate 	if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID))
2072*7c478bd9Sstevel@tonic-gate 		*gidp = crgetgid(cr);
2073*7c478bd9Sstevel@tonic-gate 	else
2074*7c478bd9Sstevel@tonic-gate 		*gidp = va.va_gid;
2075*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2076*7c478bd9Sstevel@tonic-gate 	return (0);
2077*7c478bd9Sstevel@tonic-gate }
2078*7c478bd9Sstevel@tonic-gate 
2079*7c478bd9Sstevel@tonic-gate int
2080*7c478bd9Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr)
2081*7c478bd9Sstevel@tonic-gate {
2082*7c478bd9Sstevel@tonic-gate 	int error;
2083*7c478bd9Sstevel@tonic-gate 	struct vattr va;
2084*7c478bd9Sstevel@tonic-gate 
2085*7c478bd9Sstevel@tonic-gate 	va.va_mask = AT_MODE;
2086*7c478bd9Sstevel@tonic-gate 	error = VOP_GETATTR(dvp, &va, 0, cr);
2087*7c478bd9Sstevel@tonic-gate 	if (error)
2088*7c478bd9Sstevel@tonic-gate 		return (error);
2089*7c478bd9Sstevel@tonic-gate 
2090*7c478bd9Sstevel@tonic-gate 	/*
2091*7c478bd9Sstevel@tonic-gate 	 * Modify the expected mode (om) so that the set-gid bit matches
2092*7c478bd9Sstevel@tonic-gate 	 * that of the parent directory (dvp).
2093*7c478bd9Sstevel@tonic-gate 	 */
2094*7c478bd9Sstevel@tonic-gate 	if (va.va_mode & VSGID)
2095*7c478bd9Sstevel@tonic-gate 		*omp |= VSGID;
2096*7c478bd9Sstevel@tonic-gate 	else
2097*7c478bd9Sstevel@tonic-gate 		*omp &= ~VSGID;
2098*7c478bd9Sstevel@tonic-gate 	return (0);
2099*7c478bd9Sstevel@tonic-gate }
2100*7c478bd9Sstevel@tonic-gate 
2101*7c478bd9Sstevel@tonic-gate void
2102*7c478bd9Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap)
2103*7c478bd9Sstevel@tonic-gate {
2104*7c478bd9Sstevel@tonic-gate 
2105*7c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) {
2106*7c478bd9Sstevel@tonic-gate 		if (!(vp->v_flag & VSWAPLIKE)) {
2107*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
2108*7c478bd9Sstevel@tonic-gate 			vp->v_flag |= VSWAPLIKE;
2109*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2110*7c478bd9Sstevel@tonic-gate 		}
2111*7c478bd9Sstevel@tonic-gate 	} else {
2112*7c478bd9Sstevel@tonic-gate 		if (vp->v_flag & VSWAPLIKE) {
2113*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
2114*7c478bd9Sstevel@tonic-gate 			vp->v_flag &= ~VSWAPLIKE;
2115*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2116*7c478bd9Sstevel@tonic-gate 		}
2117*7c478bd9Sstevel@tonic-gate 	}
2118*7c478bd9Sstevel@tonic-gate }
2119*7c478bd9Sstevel@tonic-gate 
2120*7c478bd9Sstevel@tonic-gate /*
2121*7c478bd9Sstevel@tonic-gate  * Free the resources associated with an rnode.
2122*7c478bd9Sstevel@tonic-gate  */
2123*7c478bd9Sstevel@tonic-gate static void
2124*7c478bd9Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr)
2125*7c478bd9Sstevel@tonic-gate {
2126*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2127*7c478bd9Sstevel@tonic-gate 	cred_t *cred;
2128*7c478bd9Sstevel@tonic-gate 	char *contents;
2129*7c478bd9Sstevel@tonic-gate 	int size;
2130*7c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
2131*7c478bd9Sstevel@tonic-gate 	int error;
2132*7c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
2133*7c478bd9Sstevel@tonic-gate 
2134*7c478bd9Sstevel@tonic-gate 	/*
2135*7c478bd9Sstevel@tonic-gate 	 * Before freeing anything, wait until all asynchronous
2136*7c478bd9Sstevel@tonic-gate 	 * activity is done on this rnode.  This will allow all
2137*7c478bd9Sstevel@tonic-gate 	 * asynchronous read ahead and write behind i/o's to
2138*7c478bd9Sstevel@tonic-gate 	 * finish.
2139*7c478bd9Sstevel@tonic-gate 	 */
2140*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2141*7c478bd9Sstevel@tonic-gate 	while (rp->r_count > 0)
2142*7c478bd9Sstevel@tonic-gate 		cv_wait(&rp->r_cv, &rp->r_statelock);
2143*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2144*7c478bd9Sstevel@tonic-gate 
2145*7c478bd9Sstevel@tonic-gate 	/*
2146*7c478bd9Sstevel@tonic-gate 	 * Flush and invalidate all pages associated with the vnode.
2147*7c478bd9Sstevel@tonic-gate 	 */
2148*7c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
2149*7c478bd9Sstevel@tonic-gate 	if (vn_has_cached_data(vp)) {
2150*7c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
2151*7c478bd9Sstevel@tonic-gate 		if ((rp->r_flags & RDIRTY) && !rp->r_error) {
2152*7c478bd9Sstevel@tonic-gate 			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr);
2153*7c478bd9Sstevel@tonic-gate 			if (error && (error == ENOSPC || error == EDQUOT)) {
2154*7c478bd9Sstevel@tonic-gate 				mutex_enter(&rp->r_statelock);
2155*7c478bd9Sstevel@tonic-gate 				if (!rp->r_error)
2156*7c478bd9Sstevel@tonic-gate 					rp->r_error = error;
2157*7c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
2158*7c478bd9Sstevel@tonic-gate 			}
2159*7c478bd9Sstevel@tonic-gate 		}
2160*7c478bd9Sstevel@tonic-gate 		nfs_invalidate_pages(vp, (u_offset_t)0, cr);
2161*7c478bd9Sstevel@tonic-gate 	}
2162*7c478bd9Sstevel@tonic-gate 
2163*7c478bd9Sstevel@tonic-gate 	/*
2164*7c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which may be associated
2165*7c478bd9Sstevel@tonic-gate 	 * with this rnode.
2166*7c478bd9Sstevel@tonic-gate 	 */
2167*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2168*7c478bd9Sstevel@tonic-gate 	cred = rp->r_cred;
2169*7c478bd9Sstevel@tonic-gate 	rp->r_cred = NULL;
2170*7c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
2171*7c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
2172*7c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
2173*7c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
2174*7c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
2175*7c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
2176*7c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
2177*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2178*7c478bd9Sstevel@tonic-gate 
2179*7c478bd9Sstevel@tonic-gate 	/*
2180*7c478bd9Sstevel@tonic-gate 	 * Free the held credential.
2181*7c478bd9Sstevel@tonic-gate 	 */
2182*7c478bd9Sstevel@tonic-gate 	if (cred != NULL)
2183*7c478bd9Sstevel@tonic-gate 		crfree(cred);
2184*7c478bd9Sstevel@tonic-gate 
2185*7c478bd9Sstevel@tonic-gate 	/*
2186*7c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
2187*7c478bd9Sstevel@tonic-gate 	 */
2188*7c478bd9Sstevel@tonic-gate 	(void) nfs_access_purge_rp(rp);
2189*7c478bd9Sstevel@tonic-gate 
2190*7c478bd9Sstevel@tonic-gate 	/*
2191*7c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries.
2192*7c478bd9Sstevel@tonic-gate 	 */
2193*7c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
2194*7c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(vp);
2195*7c478bd9Sstevel@tonic-gate 
2196*7c478bd9Sstevel@tonic-gate 	/*
2197*7c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
2198*7c478bd9Sstevel@tonic-gate 	 */
2199*7c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
2200*7c478bd9Sstevel@tonic-gate 
2201*7c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
2202*7c478bd9Sstevel@tonic-gate 	}
2203*7c478bd9Sstevel@tonic-gate 
2204*7c478bd9Sstevel@tonic-gate 	/*
2205*7c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
2206*7c478bd9Sstevel@tonic-gate 	 */
2207*7c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
2208*7c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
2209*7c478bd9Sstevel@tonic-gate 
2210*7c478bd9Sstevel@tonic-gate 	/*
2211*7c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
2212*7c478bd9Sstevel@tonic-gate 	 */
2213*7c478bd9Sstevel@tonic-gate 	if (info != NULL)
2214*7c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
2215*7c478bd9Sstevel@tonic-gate }
2216*7c478bd9Sstevel@tonic-gate 
2217*7c478bd9Sstevel@tonic-gate /*
2218*7c478bd9Sstevel@tonic-gate  * Return a vnode for the given NFS Version 2 file handle.
2219*7c478bd9Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
2220*7c478bd9Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
2221*7c478bd9Sstevel@tonic-gate  * already exists, return it.
2222*7c478bd9Sstevel@tonic-gate  *
2223*7c478bd9Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
2224*7c478bd9Sstevel@tonic-gate  */
2225*7c478bd9Sstevel@tonic-gate vnode_t *
2226*7c478bd9Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp,
2227*7c478bd9Sstevel@tonic-gate     hrtime_t t, cred_t *cr, char *dnm, char *nm)
2228*7c478bd9Sstevel@tonic-gate {
2229*7c478bd9Sstevel@tonic-gate 	int newnode;
2230*7c478bd9Sstevel@tonic-gate 	int index;
2231*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2232*7c478bd9Sstevel@tonic-gate 	nfs_fhandle nfh;
2233*7c478bd9Sstevel@tonic-gate 	vattr_t va;
2234*7c478bd9Sstevel@tonic-gate 
2235*7c478bd9Sstevel@tonic-gate 	nfh.fh_len = NFS_FHSIZE;
2236*7c478bd9Sstevel@tonic-gate 	bcopy(fh, nfh.fh_buf, NFS_FHSIZE);
2237*7c478bd9Sstevel@tonic-gate 
2238*7c478bd9Sstevel@tonic-gate 	index = rtablehash(&nfh);
2239*7c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
2240*7c478bd9Sstevel@tonic-gate 
2241*7c478bd9Sstevel@tonic-gate 	vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops,
2242*7c478bd9Sstevel@tonic-gate 	    nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm);
2243*7c478bd9Sstevel@tonic-gate 
2244*7c478bd9Sstevel@tonic-gate 	if (attr != NULL) {
2245*7c478bd9Sstevel@tonic-gate 		if (!newnode) {
2246*7c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
2247*7c478bd9Sstevel@tonic-gate 			(void) nfs_cache_fattr(vp, attr, &va, t, cr);
2248*7c478bd9Sstevel@tonic-gate 		} else {
2249*7c478bd9Sstevel@tonic-gate 			if (attr->na_type < NFNON || attr->na_type > NFSOC)
2250*7c478bd9Sstevel@tonic-gate 				vp->v_type = VBAD;
2251*7c478bd9Sstevel@tonic-gate 			else
2252*7c478bd9Sstevel@tonic-gate 				vp->v_type = n2v_type(attr);
2253*7c478bd9Sstevel@tonic-gate 			/*
2254*7c478bd9Sstevel@tonic-gate 			 * A translation here seems to be necessary
2255*7c478bd9Sstevel@tonic-gate 			 * because this function can be called
2256*7c478bd9Sstevel@tonic-gate 			 * with `attr' that has come from the wire,
2257*7c478bd9Sstevel@tonic-gate 			 * and been operated on by vattr_to_nattr().
2258*7c478bd9Sstevel@tonic-gate 			 * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr()
2259*7c478bd9Sstevel@tonic-gate 			 * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr()
2260*7c478bd9Sstevel@tonic-gate 			 * ->makenfsnode().
2261*7c478bd9Sstevel@tonic-gate 			 */
2262*7c478bd9Sstevel@tonic-gate 			if ((attr->na_rdev & 0xffff0000) == 0)
2263*7c478bd9Sstevel@tonic-gate 				vp->v_rdev = nfsv2_expdev(attr->na_rdev);
2264*7c478bd9Sstevel@tonic-gate 			else
2265*7c478bd9Sstevel@tonic-gate 				vp->v_rdev = expldev(n2v_rdev(attr));
2266*7c478bd9Sstevel@tonic-gate 			nfs_attrcache(vp, attr, t);
2267*7c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
2268*7c478bd9Sstevel@tonic-gate 		}
2269*7c478bd9Sstevel@tonic-gate 	} else {
2270*7c478bd9Sstevel@tonic-gate 		if (newnode) {
2271*7c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
2272*7c478bd9Sstevel@tonic-gate 		}
2273*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2274*7c478bd9Sstevel@tonic-gate 	}
2275*7c478bd9Sstevel@tonic-gate 
2276*7c478bd9Sstevel@tonic-gate 	return (vp);
2277*7c478bd9Sstevel@tonic-gate }
2278*7c478bd9Sstevel@tonic-gate 
2279*7c478bd9Sstevel@tonic-gate /*
2280*7c478bd9Sstevel@tonic-gate  * Return a vnode for the given NFS Version 3 file handle.
2281*7c478bd9Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
2282*7c478bd9Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
2283*7c478bd9Sstevel@tonic-gate  * already exists, return it.
2284*7c478bd9Sstevel@tonic-gate  *
2285*7c478bd9Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
2286*7c478bd9Sstevel@tonic-gate  */
2287*7c478bd9Sstevel@tonic-gate vnode_t *
2288*7c478bd9Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t,
2289*7c478bd9Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
2290*7c478bd9Sstevel@tonic-gate {
2291*7c478bd9Sstevel@tonic-gate 	int newnode;
2292*7c478bd9Sstevel@tonic-gate 	int index;
2293*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2294*7c478bd9Sstevel@tonic-gate 
2295*7c478bd9Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
2296*7c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
2297*7c478bd9Sstevel@tonic-gate 
2298*7c478bd9Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
2299*7c478bd9Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
2300*7c478bd9Sstevel@tonic-gate 	    dnm, nm);
2301*7c478bd9Sstevel@tonic-gate 
2302*7c478bd9Sstevel@tonic-gate 	if (vap == NULL) {
2303*7c478bd9Sstevel@tonic-gate 		if (newnode) {
2304*7c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
2305*7c478bd9Sstevel@tonic-gate 		}
2306*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2307*7c478bd9Sstevel@tonic-gate 		return (vp);
2308*7c478bd9Sstevel@tonic-gate 	}
2309*7c478bd9Sstevel@tonic-gate 
2310*7c478bd9Sstevel@tonic-gate 	if (!newnode) {
2311*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2312*7c478bd9Sstevel@tonic-gate 		nfs_attr_cache(vp, vap, t, cr);
2313*7c478bd9Sstevel@tonic-gate 	} else {
2314*7c478bd9Sstevel@tonic-gate 		rnode_t *rp = VTOR(vp);
2315*7c478bd9Sstevel@tonic-gate 
2316*7c478bd9Sstevel@tonic-gate 		vp->v_type = vap->va_type;
2317*7c478bd9Sstevel@tonic-gate 		vp->v_rdev = vap->va_rdev;
2318*7c478bd9Sstevel@tonic-gate 
2319*7c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
2320*7c478bd9Sstevel@tonic-gate 		if (rp->r_mtime <= t)
2321*7c478bd9Sstevel@tonic-gate 			nfs_attrcache_va(vp, vap);
2322*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
2323*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2324*7c478bd9Sstevel@tonic-gate 	}
2325*7c478bd9Sstevel@tonic-gate 
2326*7c478bd9Sstevel@tonic-gate 	return (vp);
2327*7c478bd9Sstevel@tonic-gate }
2328*7c478bd9Sstevel@tonic-gate 
2329*7c478bd9Sstevel@tonic-gate vnode_t *
2330*7c478bd9Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t,
2331*7c478bd9Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
2332*7c478bd9Sstevel@tonic-gate {
2333*7c478bd9Sstevel@tonic-gate 	int newnode;
2334*7c478bd9Sstevel@tonic-gate 	int index;
2335*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2336*7c478bd9Sstevel@tonic-gate 	vattr_t va;
2337*7c478bd9Sstevel@tonic-gate 
2338*7c478bd9Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
2339*7c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
2340*7c478bd9Sstevel@tonic-gate 
2341*7c478bd9Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
2342*7c478bd9Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
2343*7c478bd9Sstevel@tonic-gate 	    dnm, nm);
2344*7c478bd9Sstevel@tonic-gate 
2345*7c478bd9Sstevel@tonic-gate 	if (attr == NULL) {
2346*7c478bd9Sstevel@tonic-gate 		if (newnode) {
2347*7c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
2348*7c478bd9Sstevel@tonic-gate 		}
2349*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2350*7c478bd9Sstevel@tonic-gate 		return (vp);
2351*7c478bd9Sstevel@tonic-gate 	}
2352*7c478bd9Sstevel@tonic-gate 
2353*7c478bd9Sstevel@tonic-gate 	if (!newnode) {
2354*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2355*7c478bd9Sstevel@tonic-gate 		(void) nfs3_cache_fattr3(vp, attr, &va, t, cr);
2356*7c478bd9Sstevel@tonic-gate 	} else {
2357*7c478bd9Sstevel@tonic-gate 		if (attr->type < NF3REG || attr->type > NF3FIFO)
2358*7c478bd9Sstevel@tonic-gate 			vp->v_type = VBAD;
2359*7c478bd9Sstevel@tonic-gate 		else
2360*7c478bd9Sstevel@tonic-gate 			vp->v_type = nf3_to_vt[attr->type];
2361*7c478bd9Sstevel@tonic-gate 		vp->v_rdev = makedevice(attr->rdev.specdata1,
2362*7c478bd9Sstevel@tonic-gate 			    attr->rdev.specdata2);
2363*7c478bd9Sstevel@tonic-gate 		nfs3_attrcache(vp, attr, t);
2364*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2365*7c478bd9Sstevel@tonic-gate 	}
2366*7c478bd9Sstevel@tonic-gate 
2367*7c478bd9Sstevel@tonic-gate 	return (vp);
2368*7c478bd9Sstevel@tonic-gate }
2369*7c478bd9Sstevel@tonic-gate 
2370*7c478bd9Sstevel@tonic-gate /*
2371*7c478bd9Sstevel@tonic-gate  * Read this comment before making changes to rtablehash()!
2372*7c478bd9Sstevel@tonic-gate  * This is a hash function in which seemingly obvious and harmless
2373*7c478bd9Sstevel@tonic-gate  * changes can cause escalations costing million dollars!
2374*7c478bd9Sstevel@tonic-gate  * Know what you are doing.
2375*7c478bd9Sstevel@tonic-gate  *
2376*7c478bd9Sstevel@tonic-gate  * rtablehash() implements Jenkins' one-at-a-time hash algorithm.  The
2377*7c478bd9Sstevel@tonic-gate  * algorithm is currently detailed here:
2378*7c478bd9Sstevel@tonic-gate  *
2379*7c478bd9Sstevel@tonic-gate  *   http://burtleburtle.net/bob/hash/doobs.html
2380*7c478bd9Sstevel@tonic-gate  *
2381*7c478bd9Sstevel@tonic-gate  * Of course, the above link may not be valid by the time you are reading
2382*7c478bd9Sstevel@tonic-gate  * this, but suffice it to say that the one-at-a-time algorithm works well in
2383*7c478bd9Sstevel@tonic-gate  * almost all cases.  If you are changing the algorithm be sure to verify that
2384*7c478bd9Sstevel@tonic-gate  * the hash algorithm still provides even distribution in all cases and with
2385*7c478bd9Sstevel@tonic-gate  * any server returning filehandles in whatever order (sequential or random).
2386*7c478bd9Sstevel@tonic-gate  */
2387*7c478bd9Sstevel@tonic-gate static int
2388*7c478bd9Sstevel@tonic-gate rtablehash(nfs_fhandle *fh)
2389*7c478bd9Sstevel@tonic-gate {
2390*7c478bd9Sstevel@tonic-gate 	ulong_t hash, len, i;
2391*7c478bd9Sstevel@tonic-gate 	char *key;
2392*7c478bd9Sstevel@tonic-gate 
2393*7c478bd9Sstevel@tonic-gate 	key = fh->fh_buf;
2394*7c478bd9Sstevel@tonic-gate 	len = (ulong_t)fh->fh_len;
2395*7c478bd9Sstevel@tonic-gate 	for (hash = 0, i = 0; i < len; i++) {
2396*7c478bd9Sstevel@tonic-gate 		hash += key[i];
2397*7c478bd9Sstevel@tonic-gate 		hash += (hash << 10);
2398*7c478bd9Sstevel@tonic-gate 		hash ^= (hash >> 6);
2399*7c478bd9Sstevel@tonic-gate 	}
2400*7c478bd9Sstevel@tonic-gate 	hash += (hash << 3);
2401*7c478bd9Sstevel@tonic-gate 	hash ^= (hash >> 11);
2402*7c478bd9Sstevel@tonic-gate 	hash += (hash << 15);
2403*7c478bd9Sstevel@tonic-gate 	return (hash & rtablemask);
2404*7c478bd9Sstevel@tonic-gate }
2405*7c478bd9Sstevel@tonic-gate 
2406*7c478bd9Sstevel@tonic-gate static vnode_t *
2407*7c478bd9Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp,
2408*7c478bd9Sstevel@tonic-gate     struct vnodeops *vops,
2409*7c478bd9Sstevel@tonic-gate     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
2410*7c478bd9Sstevel@tonic-gate     int (*compar)(const void *, const void *),
2411*7c478bd9Sstevel@tonic-gate     int *newnode, cred_t *cr, char *dnm, char *nm)
2412*7c478bd9Sstevel@tonic-gate {
2413*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2414*7c478bd9Sstevel@tonic-gate 	rnode_t *trp;
2415*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2416*7c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
2417*7c478bd9Sstevel@tonic-gate 
2418*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&rhtp->r_lock));
2419*7c478bd9Sstevel@tonic-gate 
2420*7c478bd9Sstevel@tonic-gate 	mi = VFTOMI(vfsp);
2421*7c478bd9Sstevel@tonic-gate start:
2422*7c478bd9Sstevel@tonic-gate 	if ((rp = rfind(rhtp, fh, vfsp)) != NULL) {
2423*7c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
2424*7c478bd9Sstevel@tonic-gate 		nfs_set_vroot(vp);
2425*7c478bd9Sstevel@tonic-gate 		*newnode = 0;
2426*7c478bd9Sstevel@tonic-gate 		return (vp);
2427*7c478bd9Sstevel@tonic-gate 	}
2428*7c478bd9Sstevel@tonic-gate 	rw_exit(&rhtp->r_lock);
2429*7c478bd9Sstevel@tonic-gate 
2430*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
2431*7c478bd9Sstevel@tonic-gate 	if (rpfreelist != NULL && rnew >= nrnode) {
2432*7c478bd9Sstevel@tonic-gate 		rp = rpfreelist;
2433*7c478bd9Sstevel@tonic-gate 		rp_rmfree(rp);
2434*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
2435*7c478bd9Sstevel@tonic-gate 
2436*7c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
2437*7c478bd9Sstevel@tonic-gate 
2438*7c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
2439*7c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
2440*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
2441*7c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
2442*7c478bd9Sstevel@tonic-gate 				vp->v_count--;
2443*7c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
2444*7c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
2445*7c478bd9Sstevel@tonic-gate 				rw_enter(&rhtp->r_lock, RW_READER);
2446*7c478bd9Sstevel@tonic-gate 				goto start;
2447*7c478bd9Sstevel@tonic-gate 			}
2448*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2449*7c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
2450*7c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
2451*7c478bd9Sstevel@tonic-gate 		}
2452*7c478bd9Sstevel@tonic-gate 
2453*7c478bd9Sstevel@tonic-gate 		rinactive(rp, cr);
2454*7c478bd9Sstevel@tonic-gate 
2455*7c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
2456*7c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
2457*7c478bd9Sstevel@tonic-gate 			vp->v_count--;
2458*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2459*7c478bd9Sstevel@tonic-gate 			rw_enter(&rhtp->r_lock, RW_READER);
2460*7c478bd9Sstevel@tonic-gate 			goto start;
2461*7c478bd9Sstevel@tonic-gate 		}
2462*7c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
2463*7c478bd9Sstevel@tonic-gate 		vn_invalid(vp);
2464*7c478bd9Sstevel@tonic-gate 		/*
2465*7c478bd9Sstevel@tonic-gate 		 * destroy old locks before bzero'ing and
2466*7c478bd9Sstevel@tonic-gate 		 * recreating the locks below.
2467*7c478bd9Sstevel@tonic-gate 		 */
2468*7c478bd9Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_rwlock);
2469*7c478bd9Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_lkserlock);
2470*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&rp->r_statelock);
2471*7c478bd9Sstevel@tonic-gate 		cv_destroy(&rp->r_cv);
2472*7c478bd9Sstevel@tonic-gate 		cv_destroy(&rp->r_commit.c_cv);
2473*7c478bd9Sstevel@tonic-gate 		nfs_free_r_path(rp);
2474*7c478bd9Sstevel@tonic-gate 		avl_destroy(&rp->r_dir);
2475*7c478bd9Sstevel@tonic-gate 		/*
2476*7c478bd9Sstevel@tonic-gate 		 * Make sure that if rnode is recycled then
2477*7c478bd9Sstevel@tonic-gate 		 * VFS count is decremented properly before
2478*7c478bd9Sstevel@tonic-gate 		 * reuse.
2479*7c478bd9Sstevel@tonic-gate 		 */
2480*7c478bd9Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
2481*7c478bd9Sstevel@tonic-gate 		vn_reinit(vp);
2482*7c478bd9Sstevel@tonic-gate 	} else {
2483*7c478bd9Sstevel@tonic-gate 		vnode_t *new_vp;
2484*7c478bd9Sstevel@tonic-gate 
2485*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
2486*7c478bd9Sstevel@tonic-gate 
2487*7c478bd9Sstevel@tonic-gate 		rp = kmem_cache_alloc(rnode_cache, KM_SLEEP);
2488*7c478bd9Sstevel@tonic-gate 		new_vp = vn_alloc(KM_SLEEP);
2489*7c478bd9Sstevel@tonic-gate 
2490*7c478bd9Sstevel@tonic-gate 		atomic_add_long((ulong_t *)&rnew, 1);
2491*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2492*7c478bd9Sstevel@tonic-gate 		clstat_debug.nrnode.value.ui64++;
2493*7c478bd9Sstevel@tonic-gate #endif
2494*7c478bd9Sstevel@tonic-gate 		vp = new_vp;
2495*7c478bd9Sstevel@tonic-gate 	}
2496*7c478bd9Sstevel@tonic-gate 
2497*7c478bd9Sstevel@tonic-gate 	bzero(rp, sizeof (*rp));
2498*7c478bd9Sstevel@tonic-gate 	rp->r_vnode = vp;
2499*7c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
2500*7c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
2501*7c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
2502*7c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
2503*7c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
2504*7c478bd9Sstevel@tonic-gate 	rp->r_fh.fh_len = fh->fh_len;
2505*7c478bd9Sstevel@tonic-gate 	bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len);
2506*7c478bd9Sstevel@tonic-gate 	rp->r_server = mi->mi_curr_serv;
2507*7c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
2508*7c478bd9Sstevel@tonic-gate 		/*
2509*7c478bd9Sstevel@tonic-gate 		 * If replicated servers, stash pathnames
2510*7c478bd9Sstevel@tonic-gate 		 */
2511*7c478bd9Sstevel@tonic-gate 		if (dnm != NULL && nm != NULL) {
2512*7c478bd9Sstevel@tonic-gate 			char *s, *p;
2513*7c478bd9Sstevel@tonic-gate 			uint_t len;
2514*7c478bd9Sstevel@tonic-gate 
2515*7c478bd9Sstevel@tonic-gate 			len = (uint_t)(strlen(dnm) + strlen(nm) + 2);
2516*7c478bd9Sstevel@tonic-gate 			rp->r_path = kmem_alloc(len, KM_SLEEP);
2517*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2518*7c478bd9Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += len;
2519*7c478bd9Sstevel@tonic-gate #endif
2520*7c478bd9Sstevel@tonic-gate 			s = rp->r_path;
2521*7c478bd9Sstevel@tonic-gate 			for (p = dnm; *p; p++)
2522*7c478bd9Sstevel@tonic-gate 				*s++ = *p;
2523*7c478bd9Sstevel@tonic-gate 			*s++ = '/';
2524*7c478bd9Sstevel@tonic-gate 			for (p = nm; *p; p++)
2525*7c478bd9Sstevel@tonic-gate 				*s++ = *p;
2526*7c478bd9Sstevel@tonic-gate 			*s = '\0';
2527*7c478bd9Sstevel@tonic-gate 		} else {
2528*7c478bd9Sstevel@tonic-gate 			/* special case for root */
2529*7c478bd9Sstevel@tonic-gate 			rp->r_path = kmem_alloc(2, KM_SLEEP);
2530*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2531*7c478bd9Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += 2;
2532*7c478bd9Sstevel@tonic-gate #endif
2533*7c478bd9Sstevel@tonic-gate 			*rp->r_path = '.';
2534*7c478bd9Sstevel@tonic-gate 			*(rp->r_path + 1) = '\0';
2535*7c478bd9Sstevel@tonic-gate 		}
2536*7c478bd9Sstevel@tonic-gate 	}
2537*7c478bd9Sstevel@tonic-gate 	VFS_HOLD(vfsp);
2538*7c478bd9Sstevel@tonic-gate 	rp->r_putapage = putapage;
2539*7c478bd9Sstevel@tonic-gate 	rp->r_hashq = rhtp;
2540*7c478bd9Sstevel@tonic-gate 	rp->r_flags = RREADDIRPLUS;
2541*7c478bd9Sstevel@tonic-gate 	avl_create(&rp->r_dir, compar, sizeof (rddir_cache),
2542*7c478bd9Sstevel@tonic-gate 	    offsetof(rddir_cache, tree));
2543*7c478bd9Sstevel@tonic-gate 	vn_setops(vp, vops);
2544*7c478bd9Sstevel@tonic-gate 	vp->v_data = (caddr_t)rp;
2545*7c478bd9Sstevel@tonic-gate 	vp->v_vfsp = vfsp;
2546*7c478bd9Sstevel@tonic-gate 	vp->v_type = VNON;
2547*7c478bd9Sstevel@tonic-gate 	nfs_set_vroot(vp);
2548*7c478bd9Sstevel@tonic-gate 
2549*7c478bd9Sstevel@tonic-gate 	/*
2550*7c478bd9Sstevel@tonic-gate 	 * There is a race condition if someone else
2551*7c478bd9Sstevel@tonic-gate 	 * alloc's the rnode while no locks are held, so we
2552*7c478bd9Sstevel@tonic-gate 	 * check again and recover if found.
2553*7c478bd9Sstevel@tonic-gate 	 */
2554*7c478bd9Sstevel@tonic-gate 	rw_enter(&rhtp->r_lock, RW_WRITER);
2555*7c478bd9Sstevel@tonic-gate 	if ((trp = rfind(rhtp, fh, vfsp)) != NULL) {
2556*7c478bd9Sstevel@tonic-gate 		vp = RTOV(trp);
2557*7c478bd9Sstevel@tonic-gate 		nfs_set_vroot(vp);
2558*7c478bd9Sstevel@tonic-gate 		*newnode = 0;
2559*7c478bd9Sstevel@tonic-gate 		rw_exit(&rhtp->r_lock);
2560*7c478bd9Sstevel@tonic-gate 		rp_addfree(rp, cr);
2561*7c478bd9Sstevel@tonic-gate 		rw_enter(&rhtp->r_lock, RW_READER);
2562*7c478bd9Sstevel@tonic-gate 		return (vp);
2563*7c478bd9Sstevel@tonic-gate 	}
2564*7c478bd9Sstevel@tonic-gate 	rp_addhash(rp);
2565*7c478bd9Sstevel@tonic-gate 	*newnode = 1;
2566*7c478bd9Sstevel@tonic-gate 	return (vp);
2567*7c478bd9Sstevel@tonic-gate }
2568*7c478bd9Sstevel@tonic-gate 
2569*7c478bd9Sstevel@tonic-gate static void
2570*7c478bd9Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp)
2571*7c478bd9Sstevel@tonic-gate {
2572*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2573*7c478bd9Sstevel@tonic-gate 	nfs_fhandle *rootfh;
2574*7c478bd9Sstevel@tonic-gate 
2575*7c478bd9Sstevel@tonic-gate 	rp = VTOR(vp);
2576*7c478bd9Sstevel@tonic-gate 	rootfh = &rp->r_server->sv_fhandle;
2577*7c478bd9Sstevel@tonic-gate 	if (rootfh->fh_len == rp->r_fh.fh_len &&
2578*7c478bd9Sstevel@tonic-gate 	    bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) {
2579*7c478bd9Sstevel@tonic-gate 		if (!(vp->v_flag & VROOT)) {
2580*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
2581*7c478bd9Sstevel@tonic-gate 			vp->v_flag |= VROOT;
2582*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2583*7c478bd9Sstevel@tonic-gate 		}
2584*7c478bd9Sstevel@tonic-gate 	}
2585*7c478bd9Sstevel@tonic-gate }
2586*7c478bd9Sstevel@tonic-gate 
2587*7c478bd9Sstevel@tonic-gate static void
2588*7c478bd9Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp)
2589*7c478bd9Sstevel@tonic-gate {
2590*7c478bd9Sstevel@tonic-gate 	char *path;
2591*7c478bd9Sstevel@tonic-gate 	size_t len;
2592*7c478bd9Sstevel@tonic-gate 
2593*7c478bd9Sstevel@tonic-gate 	path = rp->r_path;
2594*7c478bd9Sstevel@tonic-gate 	if (path) {
2595*7c478bd9Sstevel@tonic-gate 		rp->r_path = NULL;
2596*7c478bd9Sstevel@tonic-gate 		len = strlen(path) + 1;
2597*7c478bd9Sstevel@tonic-gate 		kmem_free(path, len);
2598*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2599*7c478bd9Sstevel@tonic-gate 		clstat_debug.rpath.value.ui64 -= len;
2600*7c478bd9Sstevel@tonic-gate #endif
2601*7c478bd9Sstevel@tonic-gate 	}
2602*7c478bd9Sstevel@tonic-gate }
2603*7c478bd9Sstevel@tonic-gate 
2604*7c478bd9Sstevel@tonic-gate /*
2605*7c478bd9Sstevel@tonic-gate  * Put an rnode on the free list.
2606*7c478bd9Sstevel@tonic-gate  *
2607*7c478bd9Sstevel@tonic-gate  * Rnodes which were allocated above and beyond the normal limit
2608*7c478bd9Sstevel@tonic-gate  * are immediately freed.
2609*7c478bd9Sstevel@tonic-gate  */
2610*7c478bd9Sstevel@tonic-gate void
2611*7c478bd9Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr)
2612*7c478bd9Sstevel@tonic-gate {
2613*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2614*7c478bd9Sstevel@tonic-gate 	struct vfs *vfsp;
2615*7c478bd9Sstevel@tonic-gate 
2616*7c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
2617*7c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count >= 1);
2618*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
2619*7c478bd9Sstevel@tonic-gate 
2620*7c478bd9Sstevel@tonic-gate 	/*
2621*7c478bd9Sstevel@tonic-gate 	 * If we have too many rnodes allocated and there are no
2622*7c478bd9Sstevel@tonic-gate 	 * references to this rnode, or if the rnode is no longer
2623*7c478bd9Sstevel@tonic-gate 	 * accessible by it does not reside in the hash queues,
2624*7c478bd9Sstevel@tonic-gate 	 * or if an i/o error occurred while writing to the file,
2625*7c478bd9Sstevel@tonic-gate 	 * then just free it instead of putting it on the rnode
2626*7c478bd9Sstevel@tonic-gate 	 * freelist.
2627*7c478bd9Sstevel@tonic-gate 	 */
2628*7c478bd9Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
2629*7c478bd9Sstevel@tonic-gate 	if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error ||
2630*7c478bd9Sstevel@tonic-gate 	    (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
2631*7c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
2632*7c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
2633*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
2634*7c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
2635*7c478bd9Sstevel@tonic-gate 				vp->v_count--;
2636*7c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
2637*7c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
2638*7c478bd9Sstevel@tonic-gate 				return;
2639*7c478bd9Sstevel@tonic-gate 			}
2640*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2641*7c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
2642*7c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
2643*7c478bd9Sstevel@tonic-gate 		}
2644*7c478bd9Sstevel@tonic-gate 
2645*7c478bd9Sstevel@tonic-gate 		rinactive(rp, cr);
2646*7c478bd9Sstevel@tonic-gate 
2647*7c478bd9Sstevel@tonic-gate 		/*
2648*7c478bd9Sstevel@tonic-gate 		 * Recheck the vnode reference count.  We need to
2649*7c478bd9Sstevel@tonic-gate 		 * make sure that another reference has not been
2650*7c478bd9Sstevel@tonic-gate 		 * acquired while we were not holding v_lock.  The
2651*7c478bd9Sstevel@tonic-gate 		 * rnode is not in the rnode hash queues, so the
2652*7c478bd9Sstevel@tonic-gate 		 * only way for a reference to have been acquired
2653*7c478bd9Sstevel@tonic-gate 		 * is for a VOP_PUTPAGE because the rnode was marked
2654*7c478bd9Sstevel@tonic-gate 		 * with RDIRTY or for a modified page.  This
2655*7c478bd9Sstevel@tonic-gate 		 * reference may have been acquired before our call
2656*7c478bd9Sstevel@tonic-gate 		 * to rinactive.  The i/o may have been completed,
2657*7c478bd9Sstevel@tonic-gate 		 * thus allowing rinactive to complete, but the
2658*7c478bd9Sstevel@tonic-gate 		 * reference to the vnode may not have been released
2659*7c478bd9Sstevel@tonic-gate 		 * yet.  In any case, the rnode can not be destroyed
2660*7c478bd9Sstevel@tonic-gate 		 * until the other references to this vnode have been
2661*7c478bd9Sstevel@tonic-gate 		 * released.  The other references will take care of
2662*7c478bd9Sstevel@tonic-gate 		 * either destroying the rnode or placing it on the
2663*7c478bd9Sstevel@tonic-gate 		 * rnode freelist.  If there are no other references,
2664*7c478bd9Sstevel@tonic-gate 		 * then the rnode may be safely destroyed.
2665*7c478bd9Sstevel@tonic-gate 		 */
2666*7c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
2667*7c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
2668*7c478bd9Sstevel@tonic-gate 			vp->v_count--;
2669*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
2670*7c478bd9Sstevel@tonic-gate 			return;
2671*7c478bd9Sstevel@tonic-gate 		}
2672*7c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
2673*7c478bd9Sstevel@tonic-gate 
2674*7c478bd9Sstevel@tonic-gate 		destroy_rnode(rp);
2675*7c478bd9Sstevel@tonic-gate 		return;
2676*7c478bd9Sstevel@tonic-gate 	}
2677*7c478bd9Sstevel@tonic-gate 
2678*7c478bd9Sstevel@tonic-gate 	/*
2679*7c478bd9Sstevel@tonic-gate 	 * Lock the hash queue and then recheck the reference count
2680*7c478bd9Sstevel@tonic-gate 	 * to ensure that no other threads have acquired a reference
2681*7c478bd9Sstevel@tonic-gate 	 * to indicate that the rnode should not be placed on the
2682*7c478bd9Sstevel@tonic-gate 	 * freelist.  If another reference has been acquired, then
2683*7c478bd9Sstevel@tonic-gate 	 * just release this one and let the other thread complete
2684*7c478bd9Sstevel@tonic-gate 	 * the processing of adding this rnode to the freelist.
2685*7c478bd9Sstevel@tonic-gate 	 */
2686*7c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
2687*7c478bd9Sstevel@tonic-gate 
2688*7c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
2689*7c478bd9Sstevel@tonic-gate 	if (vp->v_count > 1) {
2690*7c478bd9Sstevel@tonic-gate 		vp->v_count--;
2691*7c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
2692*7c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
2693*7c478bd9Sstevel@tonic-gate 		return;
2694*7c478bd9Sstevel@tonic-gate 	}
2695*7c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
2696*7c478bd9Sstevel@tonic-gate 
2697*7c478bd9Sstevel@tonic-gate 	/*
2698*7c478bd9Sstevel@tonic-gate 	 * If there is no cached data or metadata for this file, then
2699*7c478bd9Sstevel@tonic-gate 	 * put the rnode on the front of the freelist so that it will
2700*7c478bd9Sstevel@tonic-gate 	 * be reused before other rnodes which may have cached data or
2701*7c478bd9Sstevel@tonic-gate 	 * metadata associated with them.
2702*7c478bd9Sstevel@tonic-gate 	 */
2703*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
2704*7c478bd9Sstevel@tonic-gate 	if (rpfreelist == NULL) {
2705*7c478bd9Sstevel@tonic-gate 		rp->r_freef = rp;
2706*7c478bd9Sstevel@tonic-gate 		rp->r_freeb = rp;
2707*7c478bd9Sstevel@tonic-gate 		rpfreelist = rp;
2708*7c478bd9Sstevel@tonic-gate 	} else {
2709*7c478bd9Sstevel@tonic-gate 		rp->r_freef = rpfreelist;
2710*7c478bd9Sstevel@tonic-gate 		rp->r_freeb = rpfreelist->r_freeb;
2711*7c478bd9Sstevel@tonic-gate 		rpfreelist->r_freeb->r_freef = rp;
2712*7c478bd9Sstevel@tonic-gate 		rpfreelist->r_freeb = rp;
2713*7c478bd9Sstevel@tonic-gate 		if (!vn_has_cached_data(vp) &&
2714*7c478bd9Sstevel@tonic-gate 		    !HAVE_RDDIR_CACHE(rp) &&
2715*7c478bd9Sstevel@tonic-gate 		    rp->r_symlink.contents == NULL &&
2716*7c478bd9Sstevel@tonic-gate 		    rp->r_secattr == NULL &&
2717*7c478bd9Sstevel@tonic-gate 		    rp->r_pathconf == NULL)
2718*7c478bd9Sstevel@tonic-gate 			rpfreelist = rp;
2719*7c478bd9Sstevel@tonic-gate 	}
2720*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
2721*7c478bd9Sstevel@tonic-gate 
2722*7c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
2723*7c478bd9Sstevel@tonic-gate }
2724*7c478bd9Sstevel@tonic-gate 
2725*7c478bd9Sstevel@tonic-gate /*
2726*7c478bd9Sstevel@tonic-gate  * Remove an rnode from the free list.
2727*7c478bd9Sstevel@tonic-gate  *
2728*7c478bd9Sstevel@tonic-gate  * The caller must be holding rpfreelist_lock and the rnode
2729*7c478bd9Sstevel@tonic-gate  * must be on the freelist.
2730*7c478bd9Sstevel@tonic-gate  */
2731*7c478bd9Sstevel@tonic-gate static void
2732*7c478bd9Sstevel@tonic-gate rp_rmfree(rnode_t *rp)
2733*7c478bd9Sstevel@tonic-gate {
2734*7c478bd9Sstevel@tonic-gate 
2735*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rpfreelist_lock));
2736*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
2737*7c478bd9Sstevel@tonic-gate 
2738*7c478bd9Sstevel@tonic-gate 	if (rp == rpfreelist) {
2739*7c478bd9Sstevel@tonic-gate 		rpfreelist = rp->r_freef;
2740*7c478bd9Sstevel@tonic-gate 		if (rp == rpfreelist)
2741*7c478bd9Sstevel@tonic-gate 			rpfreelist = NULL;
2742*7c478bd9Sstevel@tonic-gate 	}
2743*7c478bd9Sstevel@tonic-gate 
2744*7c478bd9Sstevel@tonic-gate 	rp->r_freeb->r_freef = rp->r_freef;
2745*7c478bd9Sstevel@tonic-gate 	rp->r_freef->r_freeb = rp->r_freeb;
2746*7c478bd9Sstevel@tonic-gate 
2747*7c478bd9Sstevel@tonic-gate 	rp->r_freef = rp->r_freeb = NULL;
2748*7c478bd9Sstevel@tonic-gate }
2749*7c478bd9Sstevel@tonic-gate 
2750*7c478bd9Sstevel@tonic-gate /*
2751*7c478bd9Sstevel@tonic-gate  * Put a rnode in the hash table.
2752*7c478bd9Sstevel@tonic-gate  *
2753*7c478bd9Sstevel@tonic-gate  * The caller must be holding the exclusive hash queue lock.
2754*7c478bd9Sstevel@tonic-gate  */
2755*7c478bd9Sstevel@tonic-gate static void
2756*7c478bd9Sstevel@tonic-gate rp_addhash(rnode_t *rp)
2757*7c478bd9Sstevel@tonic-gate {
2758*7c478bd9Sstevel@tonic-gate 
2759*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
2760*7c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
2761*7c478bd9Sstevel@tonic-gate 
2762*7c478bd9Sstevel@tonic-gate 	rp->r_hashf = rp->r_hashq->r_hashf;
2763*7c478bd9Sstevel@tonic-gate 	rp->r_hashq->r_hashf = rp;
2764*7c478bd9Sstevel@tonic-gate 	rp->r_hashb = (rnode_t *)rp->r_hashq;
2765*7c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp;
2766*7c478bd9Sstevel@tonic-gate 
2767*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2768*7c478bd9Sstevel@tonic-gate 	rp->r_flags |= RHASHED;
2769*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2770*7c478bd9Sstevel@tonic-gate }
2771*7c478bd9Sstevel@tonic-gate 
2772*7c478bd9Sstevel@tonic-gate /*
2773*7c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
2774*7c478bd9Sstevel@tonic-gate  *
2775*7c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock.
2776*7c478bd9Sstevel@tonic-gate  */
2777*7c478bd9Sstevel@tonic-gate static void
2778*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp)
2779*7c478bd9Sstevel@tonic-gate {
2780*7c478bd9Sstevel@tonic-gate 
2781*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
2782*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_flags & RHASHED);
2783*7c478bd9Sstevel@tonic-gate 
2784*7c478bd9Sstevel@tonic-gate 	rp->r_hashb->r_hashf = rp->r_hashf;
2785*7c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp->r_hashb;
2786*7c478bd9Sstevel@tonic-gate 
2787*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2788*7c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~RHASHED;
2789*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2790*7c478bd9Sstevel@tonic-gate }
2791*7c478bd9Sstevel@tonic-gate 
2792*7c478bd9Sstevel@tonic-gate /*
2793*7c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
2794*7c478bd9Sstevel@tonic-gate  *
2795*7c478bd9Sstevel@tonic-gate  * The caller must not be holding the hash queue lock.
2796*7c478bd9Sstevel@tonic-gate  */
2797*7c478bd9Sstevel@tonic-gate void
2798*7c478bd9Sstevel@tonic-gate rp_rmhash(rnode_t *rp)
2799*7c478bd9Sstevel@tonic-gate {
2800*7c478bd9Sstevel@tonic-gate 
2801*7c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
2802*7c478bd9Sstevel@tonic-gate 	rp_rmhash_locked(rp);
2803*7c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
2804*7c478bd9Sstevel@tonic-gate }
2805*7c478bd9Sstevel@tonic-gate 
2806*7c478bd9Sstevel@tonic-gate /*
2807*7c478bd9Sstevel@tonic-gate  * Lookup a rnode by fhandle.
2808*7c478bd9Sstevel@tonic-gate  *
2809*7c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock, either shared or exclusive.
2810*7c478bd9Sstevel@tonic-gate  */
2811*7c478bd9Sstevel@tonic-gate static rnode_t *
2812*7c478bd9Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
2813*7c478bd9Sstevel@tonic-gate {
2814*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2815*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2816*7c478bd9Sstevel@tonic-gate 
2817*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
2818*7c478bd9Sstevel@tonic-gate 
2819*7c478bd9Sstevel@tonic-gate 	for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) {
2820*7c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
2821*7c478bd9Sstevel@tonic-gate 		if (vp->v_vfsp == vfsp &&
2822*7c478bd9Sstevel@tonic-gate 		    rp->r_fh.fh_len == fh->fh_len &&
2823*7c478bd9Sstevel@tonic-gate 		    bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) {
2824*7c478bd9Sstevel@tonic-gate 			/*
2825*7c478bd9Sstevel@tonic-gate 			 * remove rnode from free list, if necessary.
2826*7c478bd9Sstevel@tonic-gate 			 */
2827*7c478bd9Sstevel@tonic-gate 			if (rp->r_freef != NULL) {
2828*7c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
2829*7c478bd9Sstevel@tonic-gate 				/*
2830*7c478bd9Sstevel@tonic-gate 				 * If the rnode is on the freelist,
2831*7c478bd9Sstevel@tonic-gate 				 * then remove it and use that reference
2832*7c478bd9Sstevel@tonic-gate 				 * as the new reference.  Otherwise,
2833*7c478bd9Sstevel@tonic-gate 				 * need to increment the reference count.
2834*7c478bd9Sstevel@tonic-gate 				 */
2835*7c478bd9Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
2836*7c478bd9Sstevel@tonic-gate 					rp_rmfree(rp);
2837*7c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
2838*7c478bd9Sstevel@tonic-gate 				} else {
2839*7c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
2840*7c478bd9Sstevel@tonic-gate 					VN_HOLD(vp);
2841*7c478bd9Sstevel@tonic-gate 				}
2842*7c478bd9Sstevel@tonic-gate 			} else
2843*7c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
2844*7c478bd9Sstevel@tonic-gate 			return (rp);
2845*7c478bd9Sstevel@tonic-gate 		}
2846*7c478bd9Sstevel@tonic-gate 	}
2847*7c478bd9Sstevel@tonic-gate 	return (NULL);
2848*7c478bd9Sstevel@tonic-gate }
2849*7c478bd9Sstevel@tonic-gate 
2850*7c478bd9Sstevel@tonic-gate /*
2851*7c478bd9Sstevel@tonic-gate  * Return 1 if there is a active vnode belonging to this vfs in the
2852*7c478bd9Sstevel@tonic-gate  * rtable cache.
2853*7c478bd9Sstevel@tonic-gate  *
2854*7c478bd9Sstevel@tonic-gate  * Several of these checks are done without holding the usual
2855*7c478bd9Sstevel@tonic-gate  * locks.  This is safe because destroy_rtable(), rp_addfree(),
2856*7c478bd9Sstevel@tonic-gate  * etc. will redo the necessary checks before actually destroying
2857*7c478bd9Sstevel@tonic-gate  * any rnodes.
2858*7c478bd9Sstevel@tonic-gate  */
2859*7c478bd9Sstevel@tonic-gate int
2860*7c478bd9Sstevel@tonic-gate check_rtable(struct vfs *vfsp)
2861*7c478bd9Sstevel@tonic-gate {
2862*7c478bd9Sstevel@tonic-gate 	int index;
2863*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2864*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2865*7c478bd9Sstevel@tonic-gate 
2866*7c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
2867*7c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
2868*7c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
2869*7c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
2870*7c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
2871*7c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
2872*7c478bd9Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
2873*7c478bd9Sstevel@tonic-gate 				if (rp->r_freef == NULL ||
2874*7c478bd9Sstevel@tonic-gate 				    (vn_has_cached_data(vp) &&
2875*7c478bd9Sstevel@tonic-gate 				    (rp->r_flags & RDIRTY)) ||
2876*7c478bd9Sstevel@tonic-gate 				    rp->r_count > 0) {
2877*7c478bd9Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
2878*7c478bd9Sstevel@tonic-gate 					return (1);
2879*7c478bd9Sstevel@tonic-gate 				}
2880*7c478bd9Sstevel@tonic-gate 			}
2881*7c478bd9Sstevel@tonic-gate 		}
2882*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2883*7c478bd9Sstevel@tonic-gate 	}
2884*7c478bd9Sstevel@tonic-gate 	return (0);
2885*7c478bd9Sstevel@tonic-gate }
2886*7c478bd9Sstevel@tonic-gate 
2887*7c478bd9Sstevel@tonic-gate /*
2888*7c478bd9Sstevel@tonic-gate  * Destroy inactive vnodes from the hash queues which belong to this
2889*7c478bd9Sstevel@tonic-gate  * vfs.  It is essential that we destroy all inactive vnodes during a
2890*7c478bd9Sstevel@tonic-gate  * forced unmount as well as during a normal unmount.
2891*7c478bd9Sstevel@tonic-gate  */
2892*7c478bd9Sstevel@tonic-gate void
2893*7c478bd9Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr)
2894*7c478bd9Sstevel@tonic-gate {
2895*7c478bd9Sstevel@tonic-gate 	int index;
2896*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2897*7c478bd9Sstevel@tonic-gate 	rnode_t *rlist;
2898*7c478bd9Sstevel@tonic-gate 	rnode_t *r_hashf;
2899*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2900*7c478bd9Sstevel@tonic-gate 
2901*7c478bd9Sstevel@tonic-gate 	rlist = NULL;
2902*7c478bd9Sstevel@tonic-gate 
2903*7c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
2904*7c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
2905*7c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
2906*7c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
2907*7c478bd9Sstevel@tonic-gate 		    rp = r_hashf) {
2908*7c478bd9Sstevel@tonic-gate 			/* save the hash pointer before destroying */
2909*7c478bd9Sstevel@tonic-gate 			r_hashf = rp->r_hashf;
2910*7c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
2911*7c478bd9Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
2912*7c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
2913*7c478bd9Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
2914*7c478bd9Sstevel@tonic-gate 					rp_rmfree(rp);
2915*7c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
2916*7c478bd9Sstevel@tonic-gate 					rp_rmhash_locked(rp);
2917*7c478bd9Sstevel@tonic-gate 					rp->r_hashf = rlist;
2918*7c478bd9Sstevel@tonic-gate 					rlist = rp;
2919*7c478bd9Sstevel@tonic-gate 				} else
2920*7c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
2921*7c478bd9Sstevel@tonic-gate 			}
2922*7c478bd9Sstevel@tonic-gate 		}
2923*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
2924*7c478bd9Sstevel@tonic-gate 	}
2925*7c478bd9Sstevel@tonic-gate 
2926*7c478bd9Sstevel@tonic-gate 	for (rp = rlist; rp != NULL; rp = rlist) {
2927*7c478bd9Sstevel@tonic-gate 		rlist = rp->r_hashf;
2928*7c478bd9Sstevel@tonic-gate 		/*
2929*7c478bd9Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
2930*7c478bd9Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
2931*7c478bd9Sstevel@tonic-gate 		 * of checks done.
2932*7c478bd9Sstevel@tonic-gate 		 */
2933*7c478bd9Sstevel@tonic-gate 		rp_addfree(rp, cr);
2934*7c478bd9Sstevel@tonic-gate 	}
2935*7c478bd9Sstevel@tonic-gate 
2936*7c478bd9Sstevel@tonic-gate }
2937*7c478bd9Sstevel@tonic-gate 
2938*7c478bd9Sstevel@tonic-gate /*
2939*7c478bd9Sstevel@tonic-gate  * This routine destroys all the resources associated with the rnode
2940*7c478bd9Sstevel@tonic-gate  * and then the rnode itself.
2941*7c478bd9Sstevel@tonic-gate  */
2942*7c478bd9Sstevel@tonic-gate static void
2943*7c478bd9Sstevel@tonic-gate destroy_rnode(rnode_t *rp)
2944*7c478bd9Sstevel@tonic-gate {
2945*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2946*7c478bd9Sstevel@tonic-gate 	vfs_t *vfsp;
2947*7c478bd9Sstevel@tonic-gate 
2948*7c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
2949*7c478bd9Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
2950*7c478bd9Sstevel@tonic-gate 
2951*7c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count == 1);
2952*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count == 0);
2953*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_lmpl == NULL);
2954*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_mapcnt == 0);
2955*7c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
2956*7c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
2957*7c478bd9Sstevel@tonic-gate 	atomic_add_long((ulong_t *)&rnew, -1);
2958*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2959*7c478bd9Sstevel@tonic-gate 	clstat_debug.nrnode.value.ui64--;
2960*7c478bd9Sstevel@tonic-gate #endif
2961*7c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_rwlock);
2962*7c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_lkserlock);
2963*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&rp->r_statelock);
2964*7c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_cv);
2965*7c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_commit.c_cv);
2966*7c478bd9Sstevel@tonic-gate 	if (rp->r_flags & RDELMAPLIST)
2967*7c478bd9Sstevel@tonic-gate 		list_destroy(&rp->r_indelmap);
2968*7c478bd9Sstevel@tonic-gate 	nfs_free_r_path(rp);
2969*7c478bd9Sstevel@tonic-gate 	avl_destroy(&rp->r_dir);
2970*7c478bd9Sstevel@tonic-gate 	vn_invalid(vp);
2971*7c478bd9Sstevel@tonic-gate 	vn_free(vp);
2972*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(rnode_cache, rp);
2973*7c478bd9Sstevel@tonic-gate 	VFS_RELE(vfsp);
2974*7c478bd9Sstevel@tonic-gate }
2975*7c478bd9Sstevel@tonic-gate 
2976*7c478bd9Sstevel@tonic-gate /*
2977*7c478bd9Sstevel@tonic-gate  * Flush all vnodes in this (or every) vfs.
2978*7c478bd9Sstevel@tonic-gate  * Used by nfs_sync and by nfs_unmount.
2979*7c478bd9Sstevel@tonic-gate  */
2980*7c478bd9Sstevel@tonic-gate void
2981*7c478bd9Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr)
2982*7c478bd9Sstevel@tonic-gate {
2983*7c478bd9Sstevel@tonic-gate 	int index;
2984*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
2985*7c478bd9Sstevel@tonic-gate 	vnode_t *vp, **vplist;
2986*7c478bd9Sstevel@tonic-gate 	long num, cnt;
2987*7c478bd9Sstevel@tonic-gate 
2988*7c478bd9Sstevel@tonic-gate 	/*
2989*7c478bd9Sstevel@tonic-gate 	 * Check to see whether there is anything to do.
2990*7c478bd9Sstevel@tonic-gate 	 */
2991*7c478bd9Sstevel@tonic-gate 	num = rnew;
2992*7c478bd9Sstevel@tonic-gate 	if (num == 0)
2993*7c478bd9Sstevel@tonic-gate 		return;
2994*7c478bd9Sstevel@tonic-gate 
2995*7c478bd9Sstevel@tonic-gate 	/*
2996*7c478bd9Sstevel@tonic-gate 	 * Allocate a slot for all currently active rnodes on the
2997*7c478bd9Sstevel@tonic-gate 	 * supposition that they all may need flushing.
2998*7c478bd9Sstevel@tonic-gate 	 */
2999*7c478bd9Sstevel@tonic-gate 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
3000*7c478bd9Sstevel@tonic-gate 	cnt = 0;
3001*7c478bd9Sstevel@tonic-gate 
3002*7c478bd9Sstevel@tonic-gate 	/*
3003*7c478bd9Sstevel@tonic-gate 	 * Walk the hash queues looking for rnodes with page
3004*7c478bd9Sstevel@tonic-gate 	 * lists associated with them.  Make a list of these
3005*7c478bd9Sstevel@tonic-gate 	 * files.
3006*7c478bd9Sstevel@tonic-gate 	 */
3007*7c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
3008*7c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
3009*7c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
3010*7c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
3011*7c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
3012*7c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
3013*7c478bd9Sstevel@tonic-gate 			/*
3014*7c478bd9Sstevel@tonic-gate 			 * Don't bother sync'ing a vp if it
3015*7c478bd9Sstevel@tonic-gate 			 * is part of virtual swap device or
3016*7c478bd9Sstevel@tonic-gate 			 * if VFS is read-only
3017*7c478bd9Sstevel@tonic-gate 			 */
3018*7c478bd9Sstevel@tonic-gate 			if (IS_SWAPVP(vp) || vn_is_readonly(vp))
3019*7c478bd9Sstevel@tonic-gate 				continue;
3020*7c478bd9Sstevel@tonic-gate 			/*
3021*7c478bd9Sstevel@tonic-gate 			 * If flushing all mounted file systems or
3022*7c478bd9Sstevel@tonic-gate 			 * the vnode belongs to this vfs, has pages
3023*7c478bd9Sstevel@tonic-gate 			 * and is marked as either dirty or mmap'd,
3024*7c478bd9Sstevel@tonic-gate 			 * hold and add this vnode to the list of
3025*7c478bd9Sstevel@tonic-gate 			 * vnodes to flush.
3026*7c478bd9Sstevel@tonic-gate 			 */
3027*7c478bd9Sstevel@tonic-gate 			if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
3028*7c478bd9Sstevel@tonic-gate 			    vn_has_cached_data(vp) &&
3029*7c478bd9Sstevel@tonic-gate 			    ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
3030*7c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
3031*7c478bd9Sstevel@tonic-gate 				vplist[cnt++] = vp;
3032*7c478bd9Sstevel@tonic-gate 				if (cnt == num) {
3033*7c478bd9Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
3034*7c478bd9Sstevel@tonic-gate 					goto toomany;
3035*7c478bd9Sstevel@tonic-gate 				}
3036*7c478bd9Sstevel@tonic-gate 			}
3037*7c478bd9Sstevel@tonic-gate 		}
3038*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
3039*7c478bd9Sstevel@tonic-gate 	}
3040*7c478bd9Sstevel@tonic-gate toomany:
3041*7c478bd9Sstevel@tonic-gate 
3042*7c478bd9Sstevel@tonic-gate 	/*
3043*7c478bd9Sstevel@tonic-gate 	 * Flush and release all of the files on the list.
3044*7c478bd9Sstevel@tonic-gate 	 */
3045*7c478bd9Sstevel@tonic-gate 	while (cnt-- > 0) {
3046*7c478bd9Sstevel@tonic-gate 		vp = vplist[cnt];
3047*7c478bd9Sstevel@tonic-gate 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr);
3048*7c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
3049*7c478bd9Sstevel@tonic-gate 	}
3050*7c478bd9Sstevel@tonic-gate 
3051*7c478bd9Sstevel@tonic-gate 	/*
3052*7c478bd9Sstevel@tonic-gate 	 * Free the space allocated to hold the list.
3053*7c478bd9Sstevel@tonic-gate 	 */
3054*7c478bd9Sstevel@tonic-gate 	kmem_free(vplist, num * sizeof (*vplist));
3055*7c478bd9Sstevel@tonic-gate }
3056*7c478bd9Sstevel@tonic-gate 
3057*7c478bd9Sstevel@tonic-gate /*
3058*7c478bd9Sstevel@tonic-gate  * This probably needs to be larger than or equal to
3059*7c478bd9Sstevel@tonic-gate  * log2(sizeof (struct rnode)) due to the way that rnodes are
3060*7c478bd9Sstevel@tonic-gate  * allocated.
3061*7c478bd9Sstevel@tonic-gate  */
3062*7c478bd9Sstevel@tonic-gate #define	ACACHE_SHIFT_BITS	9
3063*7c478bd9Sstevel@tonic-gate 
3064*7c478bd9Sstevel@tonic-gate static int
3065*7c478bd9Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr)
3066*7c478bd9Sstevel@tonic-gate {
3067*7c478bd9Sstevel@tonic-gate 
3068*7c478bd9Sstevel@tonic-gate 	return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) &
3069*7c478bd9Sstevel@tonic-gate 	    acachemask);
3070*7c478bd9Sstevel@tonic-gate }
3071*7c478bd9Sstevel@tonic-gate 
3072*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3073*7c478bd9Sstevel@tonic-gate static long nfs_access_cache_hits = 0;
3074*7c478bd9Sstevel@tonic-gate static long nfs_access_cache_misses = 0;
3075*7c478bd9Sstevel@tonic-gate #endif
3076*7c478bd9Sstevel@tonic-gate 
3077*7c478bd9Sstevel@tonic-gate nfs_access_type_t
3078*7c478bd9Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr)
3079*7c478bd9Sstevel@tonic-gate {
3080*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
3081*7c478bd9Sstevel@tonic-gate 	acache_t *ap;
3082*7c478bd9Sstevel@tonic-gate 	acache_hash_t *hp;
3083*7c478bd9Sstevel@tonic-gate 	nfs_access_type_t all;
3084*7c478bd9Sstevel@tonic-gate 
3085*7c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
3086*7c478bd9Sstevel@tonic-gate 	if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp))
3087*7c478bd9Sstevel@tonic-gate 		return (NFS_ACCESS_UNKNOWN);
3088*7c478bd9Sstevel@tonic-gate 
3089*7c478bd9Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
3090*7c478bd9Sstevel@tonic-gate 		hp = &acache[acachehash(rp, cr)];
3091*7c478bd9Sstevel@tonic-gate 		rw_enter(&hp->lock, RW_READER);
3092*7c478bd9Sstevel@tonic-gate 		ap = hp->next;
3093*7c478bd9Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
3094*7c478bd9Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
3095*7c478bd9Sstevel@tonic-gate 				if ((ap->known & acc) == acc) {
3096*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3097*7c478bd9Sstevel@tonic-gate 					nfs_access_cache_hits++;
3098*7c478bd9Sstevel@tonic-gate #endif
3099*7c478bd9Sstevel@tonic-gate 					if ((ap->allowed & acc) == acc)
3100*7c478bd9Sstevel@tonic-gate 						all = NFS_ACCESS_ALLOWED;
3101*7c478bd9Sstevel@tonic-gate 					else
3102*7c478bd9Sstevel@tonic-gate 						all = NFS_ACCESS_DENIED;
3103*7c478bd9Sstevel@tonic-gate 				} else {
3104*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3105*7c478bd9Sstevel@tonic-gate 					nfs_access_cache_misses++;
3106*7c478bd9Sstevel@tonic-gate #endif
3107*7c478bd9Sstevel@tonic-gate 					all = NFS_ACCESS_UNKNOWN;
3108*7c478bd9Sstevel@tonic-gate 				}
3109*7c478bd9Sstevel@tonic-gate 				rw_exit(&hp->lock);
3110*7c478bd9Sstevel@tonic-gate 				return (all);
3111*7c478bd9Sstevel@tonic-gate 			}
3112*7c478bd9Sstevel@tonic-gate 			ap = ap->next;
3113*7c478bd9Sstevel@tonic-gate 		}
3114*7c478bd9Sstevel@tonic-gate 		rw_exit(&hp->lock);
3115*7c478bd9Sstevel@tonic-gate 	}
3116*7c478bd9Sstevel@tonic-gate 
3117*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3118*7c478bd9Sstevel@tonic-gate 	nfs_access_cache_misses++;
3119*7c478bd9Sstevel@tonic-gate #endif
3120*7c478bd9Sstevel@tonic-gate 	return (NFS_ACCESS_UNKNOWN);
3121*7c478bd9Sstevel@tonic-gate }
3122*7c478bd9Sstevel@tonic-gate 
3123*7c478bd9Sstevel@tonic-gate void
3124*7c478bd9Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr)
3125*7c478bd9Sstevel@tonic-gate {
3126*7c478bd9Sstevel@tonic-gate 	acache_t *ap;
3127*7c478bd9Sstevel@tonic-gate 	acache_t *nap;
3128*7c478bd9Sstevel@tonic-gate 	acache_hash_t *hp;
3129*7c478bd9Sstevel@tonic-gate 
3130*7c478bd9Sstevel@tonic-gate 	hp = &acache[acachehash(rp, cr)];
3131*7c478bd9Sstevel@tonic-gate 
3132*7c478bd9Sstevel@tonic-gate 	/*
3133*7c478bd9Sstevel@tonic-gate 	 * Allocate now assuming that mostly an allocation will be
3134*7c478bd9Sstevel@tonic-gate 	 * required.  This allows the allocation to happen without
3135*7c478bd9Sstevel@tonic-gate 	 * holding the hash bucket locked.
3136*7c478bd9Sstevel@tonic-gate 	 */
3137*7c478bd9Sstevel@tonic-gate 	nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP);
3138*7c478bd9Sstevel@tonic-gate 	if (nap != NULL) {
3139*7c478bd9Sstevel@tonic-gate 		nap->known = acc;
3140*7c478bd9Sstevel@tonic-gate 		nap->allowed = resacc;
3141*7c478bd9Sstevel@tonic-gate 		nap->rnode = rp;
3142*7c478bd9Sstevel@tonic-gate 		crhold(cr);
3143*7c478bd9Sstevel@tonic-gate 		nap->cred = cr;
3144*7c478bd9Sstevel@tonic-gate 		nap->hashq = hp;
3145*7c478bd9Sstevel@tonic-gate 	}
3146*7c478bd9Sstevel@tonic-gate 
3147*7c478bd9Sstevel@tonic-gate 	rw_enter(&hp->lock, RW_WRITER);
3148*7c478bd9Sstevel@tonic-gate 
3149*7c478bd9Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
3150*7c478bd9Sstevel@tonic-gate 		ap = hp->next;
3151*7c478bd9Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
3152*7c478bd9Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
3153*7c478bd9Sstevel@tonic-gate 				ap->known |= acc;
3154*7c478bd9Sstevel@tonic-gate 				ap->allowed &= ~acc;
3155*7c478bd9Sstevel@tonic-gate 				ap->allowed |= resacc;
3156*7c478bd9Sstevel@tonic-gate 				rw_exit(&hp->lock);
3157*7c478bd9Sstevel@tonic-gate 				if (nap != NULL) {
3158*7c478bd9Sstevel@tonic-gate 					crfree(nap->cred);
3159*7c478bd9Sstevel@tonic-gate 					kmem_cache_free(acache_cache, nap);
3160*7c478bd9Sstevel@tonic-gate 				}
3161*7c478bd9Sstevel@tonic-gate 				return;
3162*7c478bd9Sstevel@tonic-gate 			}
3163*7c478bd9Sstevel@tonic-gate 			ap = ap->next;
3164*7c478bd9Sstevel@tonic-gate 		}
3165*7c478bd9Sstevel@tonic-gate 	}
3166*7c478bd9Sstevel@tonic-gate 
3167*7c478bd9Sstevel@tonic-gate 	if (nap != NULL) {
3168*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3169*7c478bd9Sstevel@tonic-gate 		clstat_debug.access.value.ui64++;
3170*7c478bd9Sstevel@tonic-gate #endif
3171*7c478bd9Sstevel@tonic-gate 		nap->next = hp->next;
3172*7c478bd9Sstevel@tonic-gate 		hp->next = nap;
3173*7c478bd9Sstevel@tonic-gate 		nap->next->prev = nap;
3174*7c478bd9Sstevel@tonic-gate 		nap->prev = (acache_t *)hp;
3175*7c478bd9Sstevel@tonic-gate 
3176*7c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
3177*7c478bd9Sstevel@tonic-gate 		nap->list = rp->r_acache;
3178*7c478bd9Sstevel@tonic-gate 		rp->r_acache = nap;
3179*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
3180*7c478bd9Sstevel@tonic-gate 	}
3181*7c478bd9Sstevel@tonic-gate 
3182*7c478bd9Sstevel@tonic-gate 	rw_exit(&hp->lock);
3183*7c478bd9Sstevel@tonic-gate }
3184*7c478bd9Sstevel@tonic-gate 
3185*7c478bd9Sstevel@tonic-gate int
3186*7c478bd9Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp)
3187*7c478bd9Sstevel@tonic-gate {
3188*7c478bd9Sstevel@tonic-gate 	acache_t *ap;
3189*7c478bd9Sstevel@tonic-gate 	acache_t *tmpap;
3190*7c478bd9Sstevel@tonic-gate 	acache_t *rplist;
3191*7c478bd9Sstevel@tonic-gate 
3192*7c478bd9Sstevel@tonic-gate 	/*
3193*7c478bd9Sstevel@tonic-gate 	 * If there aren't any cached entries, then there is nothing
3194*7c478bd9Sstevel@tonic-gate 	 * to free.
3195*7c478bd9Sstevel@tonic-gate 	 */
3196*7c478bd9Sstevel@tonic-gate 	if (rp->r_acache == NULL)
3197*7c478bd9Sstevel@tonic-gate 		return (0);
3198*7c478bd9Sstevel@tonic-gate 
3199*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3200*7c478bd9Sstevel@tonic-gate 	rplist = rp->r_acache;
3201*7c478bd9Sstevel@tonic-gate 	rp->r_acache = NULL;
3202*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3203*7c478bd9Sstevel@tonic-gate 
3204*7c478bd9Sstevel@tonic-gate 	/*
3205*7c478bd9Sstevel@tonic-gate 	 * Loop through each entry in the list pointed to in the
3206*7c478bd9Sstevel@tonic-gate 	 * rnode.  Remove each of these entries from the hash
3207*7c478bd9Sstevel@tonic-gate 	 * queue that it is on and remove it from the list in
3208*7c478bd9Sstevel@tonic-gate 	 * the rnode.
3209*7c478bd9Sstevel@tonic-gate 	 */
3210*7c478bd9Sstevel@tonic-gate 	for (ap = rplist; ap != NULL; ap = tmpap) {
3211*7c478bd9Sstevel@tonic-gate 		rw_enter(&ap->hashq->lock, RW_WRITER);
3212*7c478bd9Sstevel@tonic-gate 		ap->prev->next = ap->next;
3213*7c478bd9Sstevel@tonic-gate 		ap->next->prev = ap->prev;
3214*7c478bd9Sstevel@tonic-gate 		rw_exit(&ap->hashq->lock);
3215*7c478bd9Sstevel@tonic-gate 
3216*7c478bd9Sstevel@tonic-gate 		tmpap = ap->list;
3217*7c478bd9Sstevel@tonic-gate 		crfree(ap->cred);
3218*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(acache_cache, ap);
3219*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3220*7c478bd9Sstevel@tonic-gate 		clstat_debug.access.value.ui64--;
3221*7c478bd9Sstevel@tonic-gate #endif
3222*7c478bd9Sstevel@tonic-gate 	}
3223*7c478bd9Sstevel@tonic-gate 
3224*7c478bd9Sstevel@tonic-gate 	return (1);
3225*7c478bd9Sstevel@tonic-gate }
3226*7c478bd9Sstevel@tonic-gate 
3227*7c478bd9Sstevel@tonic-gate static const char prefix[] = ".nfs";
3228*7c478bd9Sstevel@tonic-gate 
3229*7c478bd9Sstevel@tonic-gate static kmutex_t newnum_lock;
3230*7c478bd9Sstevel@tonic-gate 
3231*7c478bd9Sstevel@tonic-gate int
3232*7c478bd9Sstevel@tonic-gate newnum(void)
3233*7c478bd9Sstevel@tonic-gate {
3234*7c478bd9Sstevel@tonic-gate 	static uint_t newnum = 0;
3235*7c478bd9Sstevel@tonic-gate 	uint_t id;
3236*7c478bd9Sstevel@tonic-gate 
3237*7c478bd9Sstevel@tonic-gate 	mutex_enter(&newnum_lock);
3238*7c478bd9Sstevel@tonic-gate 	if (newnum == 0)
3239*7c478bd9Sstevel@tonic-gate 		newnum = gethrestime_sec() & 0xffff;
3240*7c478bd9Sstevel@tonic-gate 	id = newnum++;
3241*7c478bd9Sstevel@tonic-gate 	mutex_exit(&newnum_lock);
3242*7c478bd9Sstevel@tonic-gate 	return (id);
3243*7c478bd9Sstevel@tonic-gate }
3244*7c478bd9Sstevel@tonic-gate 
3245*7c478bd9Sstevel@tonic-gate char *
3246*7c478bd9Sstevel@tonic-gate newname(void)
3247*7c478bd9Sstevel@tonic-gate {
3248*7c478bd9Sstevel@tonic-gate 	char *news;
3249*7c478bd9Sstevel@tonic-gate 	char *s;
3250*7c478bd9Sstevel@tonic-gate 	const char *p;
3251*7c478bd9Sstevel@tonic-gate 	uint_t id;
3252*7c478bd9Sstevel@tonic-gate 
3253*7c478bd9Sstevel@tonic-gate 	id = newnum();
3254*7c478bd9Sstevel@tonic-gate 	news = kmem_alloc(MAXNAMELEN, KM_SLEEP);
3255*7c478bd9Sstevel@tonic-gate 	s = news;
3256*7c478bd9Sstevel@tonic-gate 	p = prefix;
3257*7c478bd9Sstevel@tonic-gate 	while (*p != '\0')
3258*7c478bd9Sstevel@tonic-gate 		*s++ = *p++;
3259*7c478bd9Sstevel@tonic-gate 	while (id != 0) {
3260*7c478bd9Sstevel@tonic-gate 		*s++ = "0123456789ABCDEF"[id & 0x0f];
3261*7c478bd9Sstevel@tonic-gate 		id >>= 4;
3262*7c478bd9Sstevel@tonic-gate 	}
3263*7c478bd9Sstevel@tonic-gate 	*s = '\0';
3264*7c478bd9Sstevel@tonic-gate 	return (news);
3265*7c478bd9Sstevel@tonic-gate }
3266*7c478bd9Sstevel@tonic-gate 
3267*7c478bd9Sstevel@tonic-gate int
3268*7c478bd9Sstevel@tonic-gate nfs_atoi(char *cp)
3269*7c478bd9Sstevel@tonic-gate {
3270*7c478bd9Sstevel@tonic-gate 	int n;
3271*7c478bd9Sstevel@tonic-gate 
3272*7c478bd9Sstevel@tonic-gate 	n = 0;
3273*7c478bd9Sstevel@tonic-gate 	while (*cp != '\0') {
3274*7c478bd9Sstevel@tonic-gate 		n = n * 10 + (*cp - '0');
3275*7c478bd9Sstevel@tonic-gate 		cp++;
3276*7c478bd9Sstevel@tonic-gate 	}
3277*7c478bd9Sstevel@tonic-gate 
3278*7c478bd9Sstevel@tonic-gate 	return (n);
3279*7c478bd9Sstevel@tonic-gate }
3280*7c478bd9Sstevel@tonic-gate 
3281*7c478bd9Sstevel@tonic-gate /*
3282*7c478bd9Sstevel@tonic-gate  * Snapshot callback for nfs:0:nfs_client as registered with the kstat
3283*7c478bd9Sstevel@tonic-gate  * framework.
3284*7c478bd9Sstevel@tonic-gate  */
3285*7c478bd9Sstevel@tonic-gate static int
3286*7c478bd9Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw)
3287*7c478bd9Sstevel@tonic-gate {
3288*7c478bd9Sstevel@tonic-gate 	ksp->ks_snaptime = gethrtime();
3289*7c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE) {
3290*7c478bd9Sstevel@tonic-gate 		bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl));
3291*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3292*7c478bd9Sstevel@tonic-gate 		/*
3293*7c478bd9Sstevel@tonic-gate 		 * Currently only the global zone can write to kstats, but we
3294*7c478bd9Sstevel@tonic-gate 		 * add the check just for paranoia.
3295*7c478bd9Sstevel@tonic-gate 		 */
3296*7c478bd9Sstevel@tonic-gate 		if (INGLOBALZONE(curproc))
3297*7c478bd9Sstevel@tonic-gate 			bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug,
3298*7c478bd9Sstevel@tonic-gate 			    sizeof (clstat_debug));
3299*7c478bd9Sstevel@tonic-gate #endif
3300*7c478bd9Sstevel@tonic-gate 	} else {
3301*7c478bd9Sstevel@tonic-gate 		bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl));
3302*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3303*7c478bd9Sstevel@tonic-gate 		/*
3304*7c478bd9Sstevel@tonic-gate 		 * If we're displaying the "global" debug kstat values, we
3305*7c478bd9Sstevel@tonic-gate 		 * display them as-is to all zones since in fact they apply to
3306*7c478bd9Sstevel@tonic-gate 		 * the system as a whole.
3307*7c478bd9Sstevel@tonic-gate 		 */
3308*7c478bd9Sstevel@tonic-gate 		bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl),
3309*7c478bd9Sstevel@tonic-gate 		    sizeof (clstat_debug));
3310*7c478bd9Sstevel@tonic-gate #endif
3311*7c478bd9Sstevel@tonic-gate 	}
3312*7c478bd9Sstevel@tonic-gate 	return (0);
3313*7c478bd9Sstevel@tonic-gate }
3314*7c478bd9Sstevel@tonic-gate 
3315*7c478bd9Sstevel@tonic-gate static void *
3316*7c478bd9Sstevel@tonic-gate clinit_zone(zoneid_t zoneid)
3317*7c478bd9Sstevel@tonic-gate {
3318*7c478bd9Sstevel@tonic-gate 	kstat_t *nfs_client_kstat;
3319*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
3320*7c478bd9Sstevel@tonic-gate 	uint_t ndata;
3321*7c478bd9Sstevel@tonic-gate 
3322*7c478bd9Sstevel@tonic-gate 	nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
3323*7c478bd9Sstevel@tonic-gate 	mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL);
3324*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_chtable = NULL;
3325*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_zoneid = zoneid;
3326*7c478bd9Sstevel@tonic-gate 
3327*7c478bd9Sstevel@tonic-gate 	bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl));
3328*7c478bd9Sstevel@tonic-gate 	ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t);
3329*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3330*7c478bd9Sstevel@tonic-gate 	ndata += sizeof (clstat_debug) / sizeof (kstat_named_t);
3331*7c478bd9Sstevel@tonic-gate #endif
3332*7c478bd9Sstevel@tonic-gate 	if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client",
3333*7c478bd9Sstevel@tonic-gate 	    "misc", KSTAT_TYPE_NAMED, ndata,
3334*7c478bd9Sstevel@tonic-gate 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
3335*7c478bd9Sstevel@tonic-gate 		nfs_client_kstat->ks_private = &nfscl->nfscl_stat;
3336*7c478bd9Sstevel@tonic-gate 		nfs_client_kstat->ks_snapshot = cl_snapshot;
3337*7c478bd9Sstevel@tonic-gate 		kstat_install(nfs_client_kstat);
3338*7c478bd9Sstevel@tonic-gate 	}
3339*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
3340*7c478bd9Sstevel@tonic-gate 	list_insert_head(&nfs_clnt_list, nfscl);
3341*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
3342*7c478bd9Sstevel@tonic-gate 	return (nfscl);
3343*7c478bd9Sstevel@tonic-gate }
3344*7c478bd9Sstevel@tonic-gate 
3345*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3346*7c478bd9Sstevel@tonic-gate static void
3347*7c478bd9Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg)
3348*7c478bd9Sstevel@tonic-gate {
3349*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl = arg;
3350*7c478bd9Sstevel@tonic-gate 	chhead_t *chp, *next;
3351*7c478bd9Sstevel@tonic-gate 
3352*7c478bd9Sstevel@tonic-gate 	if (nfscl == NULL)
3353*7c478bd9Sstevel@tonic-gate 		return;
3354*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
3355*7c478bd9Sstevel@tonic-gate 	list_remove(&nfs_clnt_list, nfscl);
3356*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
3357*7c478bd9Sstevel@tonic-gate 	clreclaim_zone(nfscl, 0);
3358*7c478bd9Sstevel@tonic-gate 	for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) {
3359*7c478bd9Sstevel@tonic-gate 		ASSERT(chp->ch_list == NULL);
3360*7c478bd9Sstevel@tonic-gate 		kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
3361*7c478bd9Sstevel@tonic-gate 		next = chp->ch_next;
3362*7c478bd9Sstevel@tonic-gate 		kmem_free(chp, sizeof (*chp));
3363*7c478bd9Sstevel@tonic-gate 	}
3364*7c478bd9Sstevel@tonic-gate 	kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid);
3365*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&nfscl->nfscl_chtable_lock);
3366*7c478bd9Sstevel@tonic-gate 	kmem_free(nfscl, sizeof (*nfscl));
3367*7c478bd9Sstevel@tonic-gate }
3368*7c478bd9Sstevel@tonic-gate 
3369*7c478bd9Sstevel@tonic-gate /*
3370*7c478bd9Sstevel@tonic-gate  * Called by endpnt_destructor to make sure the client handles are
3371*7c478bd9Sstevel@tonic-gate  * cleaned up before the RPC endpoints.  This becomes a no-op if
3372*7c478bd9Sstevel@tonic-gate  * clfini_zone (above) is called first.  This function is needed
3373*7c478bd9Sstevel@tonic-gate  * (rather than relying on clfini_zone to clean up) because the ZSD
3374*7c478bd9Sstevel@tonic-gate  * callbacks have no ordering mechanism, so we have no way to ensure
3375*7c478bd9Sstevel@tonic-gate  * that clfini_zone is called before endpnt_destructor.
3376*7c478bd9Sstevel@tonic-gate  */
3377*7c478bd9Sstevel@tonic-gate void
3378*7c478bd9Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid)
3379*7c478bd9Sstevel@tonic-gate {
3380*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
3381*7c478bd9Sstevel@tonic-gate 
3382*7c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
3383*7c478bd9Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
3384*7c478bd9Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) {
3385*7c478bd9Sstevel@tonic-gate 		if (nfscl->nfscl_zoneid == zoneid) {
3386*7c478bd9Sstevel@tonic-gate 			clreclaim_zone(nfscl, 0);
3387*7c478bd9Sstevel@tonic-gate 			break;
3388*7c478bd9Sstevel@tonic-gate 		}
3389*7c478bd9Sstevel@tonic-gate 	}
3390*7c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
3391*7c478bd9Sstevel@tonic-gate }
3392*7c478bd9Sstevel@tonic-gate 
3393*7c478bd9Sstevel@tonic-gate int
3394*7c478bd9Sstevel@tonic-gate nfs_subrinit(void)
3395*7c478bd9Sstevel@tonic-gate {
3396*7c478bd9Sstevel@tonic-gate 	int i;
3397*7c478bd9Sstevel@tonic-gate 	ulong_t nrnode_max;
3398*7c478bd9Sstevel@tonic-gate 
3399*7c478bd9Sstevel@tonic-gate 	/*
3400*7c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the rnode hash queues
3401*7c478bd9Sstevel@tonic-gate 	 */
3402*7c478bd9Sstevel@tonic-gate 	if (nrnode <= 0)
3403*7c478bd9Sstevel@tonic-gate 		nrnode = ncsize;
3404*7c478bd9Sstevel@tonic-gate 	nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode));
3405*7c478bd9Sstevel@tonic-gate 	if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) {
3406*7c478bd9Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
3407*7c478bd9Sstevel@tonic-gate 		    "setting nrnode to max value of %ld", nrnode_max);
3408*7c478bd9Sstevel@tonic-gate 		nrnode = nrnode_max;
3409*7c478bd9Sstevel@tonic-gate 	}
3410*7c478bd9Sstevel@tonic-gate 
3411*7c478bd9Sstevel@tonic-gate 	rtablesize = 1 << highbit(nrnode / hashlen);
3412*7c478bd9Sstevel@tonic-gate 	rtablemask = rtablesize - 1;
3413*7c478bd9Sstevel@tonic-gate 	rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP);
3414*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++) {
3415*7c478bd9Sstevel@tonic-gate 		rtable[i].r_hashf = (rnode_t *)(&rtable[i]);
3416*7c478bd9Sstevel@tonic-gate 		rtable[i].r_hashb = (rnode_t *)(&rtable[i]);
3417*7c478bd9Sstevel@tonic-gate 		rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL);
3418*7c478bd9Sstevel@tonic-gate 	}
3419*7c478bd9Sstevel@tonic-gate 	rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t),
3420*7c478bd9Sstevel@tonic-gate 	    0, NULL, NULL, nfs_reclaim, NULL, NULL, 0);
3421*7c478bd9Sstevel@tonic-gate 
3422*7c478bd9Sstevel@tonic-gate 	/*
3423*7c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the access cache
3424*7c478bd9Sstevel@tonic-gate 	 */
3425*7c478bd9Sstevel@tonic-gate 
3426*7c478bd9Sstevel@tonic-gate 	/*
3427*7c478bd9Sstevel@tonic-gate 	 * Initial guess is one access cache entry per rnode unless
3428*7c478bd9Sstevel@tonic-gate 	 * nacache is set to a non-zero value and then it is used to
3429*7c478bd9Sstevel@tonic-gate 	 * indicate a guess at the number of access cache entries.
3430*7c478bd9Sstevel@tonic-gate 	 */
3431*7c478bd9Sstevel@tonic-gate 	if (nacache > 0)
3432*7c478bd9Sstevel@tonic-gate 		acachesize = 1 << highbit(nacache / hashlen);
3433*7c478bd9Sstevel@tonic-gate 	else
3434*7c478bd9Sstevel@tonic-gate 		acachesize = rtablesize;
3435*7c478bd9Sstevel@tonic-gate 	acachemask = acachesize - 1;
3436*7c478bd9Sstevel@tonic-gate 	acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP);
3437*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++) {
3438*7c478bd9Sstevel@tonic-gate 		acache[i].next = (acache_t *)&acache[i];
3439*7c478bd9Sstevel@tonic-gate 		acache[i].prev = (acache_t *)&acache[i];
3440*7c478bd9Sstevel@tonic-gate 		rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL);
3441*7c478bd9Sstevel@tonic-gate 	}
3442*7c478bd9Sstevel@tonic-gate 	acache_cache = kmem_cache_create("nfs_access_cache",
3443*7c478bd9Sstevel@tonic-gate 	    sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3444*7c478bd9Sstevel@tonic-gate 	/*
3445*7c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the client handle cache
3446*7c478bd9Sstevel@tonic-gate 	 */
3447*7c478bd9Sstevel@tonic-gate 	chtab_cache = kmem_cache_create("client_handle_cache",
3448*7c478bd9Sstevel@tonic-gate 		sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL,
3449*7c478bd9Sstevel@tonic-gate 		NULL, 0);
3450*7c478bd9Sstevel@tonic-gate 	/*
3451*7c478bd9Sstevel@tonic-gate 	 * Initialize the list of per-zone client handles (and associated data).
3452*7c478bd9Sstevel@tonic-gate 	 * This needs to be done before we call zone_key_create().
3453*7c478bd9Sstevel@tonic-gate 	 */
3454*7c478bd9Sstevel@tonic-gate 	list_create(&nfs_clnt_list, sizeof (struct nfs_clnt),
3455*7c478bd9Sstevel@tonic-gate 	    offsetof(struct nfs_clnt, nfscl_node));
3456*7c478bd9Sstevel@tonic-gate 	/*
3457*7c478bd9Sstevel@tonic-gate 	 * Initialize the zone_key for per-zone client handle lists.
3458*7c478bd9Sstevel@tonic-gate 	 */
3459*7c478bd9Sstevel@tonic-gate 	zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone);
3460*7c478bd9Sstevel@tonic-gate 	/*
3461*7c478bd9Sstevel@tonic-gate 	 * Initialize the various mutexes and reader/writer locks
3462*7c478bd9Sstevel@tonic-gate 	 */
3463*7c478bd9Sstevel@tonic-gate 	mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
3464*7c478bd9Sstevel@tonic-gate 	mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL);
3465*7c478bd9Sstevel@tonic-gate 	mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
3466*7c478bd9Sstevel@tonic-gate 
3467*7c478bd9Sstevel@tonic-gate 	/*
3468*7c478bd9Sstevel@tonic-gate 	 * Assign unique major number for all nfs mounts
3469*7c478bd9Sstevel@tonic-gate 	 */
3470*7c478bd9Sstevel@tonic-gate 	if ((nfs_major = getudev()) == -1) {
3471*7c478bd9Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
3472*7c478bd9Sstevel@tonic-gate 		    "nfs: init: can't get unique device number");
3473*7c478bd9Sstevel@tonic-gate 		nfs_major = 0;
3474*7c478bd9Sstevel@tonic-gate 	}
3475*7c478bd9Sstevel@tonic-gate 	nfs_minor = 0;
3476*7c478bd9Sstevel@tonic-gate 
3477*7c478bd9Sstevel@tonic-gate 	if (nfs3_jukebox_delay == 0)
3478*7c478bd9Sstevel@tonic-gate 		nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY;
3479*7c478bd9Sstevel@tonic-gate 
3480*7c478bd9Sstevel@tonic-gate 	return (0);
3481*7c478bd9Sstevel@tonic-gate }
3482*7c478bd9Sstevel@tonic-gate 
3483*7c478bd9Sstevel@tonic-gate void
3484*7c478bd9Sstevel@tonic-gate nfs_subrfini(void)
3485*7c478bd9Sstevel@tonic-gate {
3486*7c478bd9Sstevel@tonic-gate 	int i;
3487*7c478bd9Sstevel@tonic-gate 
3488*7c478bd9Sstevel@tonic-gate 	/*
3489*7c478bd9Sstevel@tonic-gate 	 * Deallocate the rnode hash queues
3490*7c478bd9Sstevel@tonic-gate 	 */
3491*7c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(rnode_cache);
3492*7c478bd9Sstevel@tonic-gate 
3493*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++)
3494*7c478bd9Sstevel@tonic-gate 		rw_destroy(&rtable[i].r_lock);
3495*7c478bd9Sstevel@tonic-gate 	kmem_free(rtable, rtablesize * sizeof (*rtable));
3496*7c478bd9Sstevel@tonic-gate 
3497*7c478bd9Sstevel@tonic-gate 	/*
3498*7c478bd9Sstevel@tonic-gate 	 * Deallocated the access cache
3499*7c478bd9Sstevel@tonic-gate 	 */
3500*7c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(acache_cache);
3501*7c478bd9Sstevel@tonic-gate 
3502*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++)
3503*7c478bd9Sstevel@tonic-gate 		rw_destroy(&acache[i].lock);
3504*7c478bd9Sstevel@tonic-gate 	kmem_free(acache, acachesize * sizeof (*acache));
3505*7c478bd9Sstevel@tonic-gate 
3506*7c478bd9Sstevel@tonic-gate 	/*
3507*7c478bd9Sstevel@tonic-gate 	 * Deallocate the client handle cache
3508*7c478bd9Sstevel@tonic-gate 	 */
3509*7c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(chtab_cache);
3510*7c478bd9Sstevel@tonic-gate 
3511*7c478bd9Sstevel@tonic-gate 	/*
3512*7c478bd9Sstevel@tonic-gate 	 * Destroy the various mutexes and reader/writer locks
3513*7c478bd9Sstevel@tonic-gate 	 */
3514*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpfreelist_lock);
3515*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&newnum_lock);
3516*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&nfs_minor_lock);
3517*7c478bd9Sstevel@tonic-gate 	(void) zone_key_delete(nfsclnt_zone_key);
3518*7c478bd9Sstevel@tonic-gate }
3519*7c478bd9Sstevel@tonic-gate 
3520*7c478bd9Sstevel@tonic-gate enum nfsstat
3521*7c478bd9Sstevel@tonic-gate puterrno(int error)
3522*7c478bd9Sstevel@tonic-gate {
3523*7c478bd9Sstevel@tonic-gate 
3524*7c478bd9Sstevel@tonic-gate 	switch (error) {
3525*7c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
3526*7c478bd9Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
3527*7c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
3528*7c478bd9Sstevel@tonic-gate 		return (NFSERR_NAMETOOLONG);
3529*7c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
3530*7c478bd9Sstevel@tonic-gate 		return (NFSERR_NOTEMPTY);
3531*7c478bd9Sstevel@tonic-gate 	case EDQUOT:
3532*7c478bd9Sstevel@tonic-gate 		return (NFSERR_DQUOT);
3533*7c478bd9Sstevel@tonic-gate 	case ESTALE:
3534*7c478bd9Sstevel@tonic-gate 		return (NFSERR_STALE);
3535*7c478bd9Sstevel@tonic-gate 	case EREMOTE:
3536*7c478bd9Sstevel@tonic-gate 		return (NFSERR_REMOTE);
3537*7c478bd9Sstevel@tonic-gate 	case ENOSYS:
3538*7c478bd9Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
3539*7c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
3540*7c478bd9Sstevel@tonic-gate 		return (NFSERR_INVAL);
3541*7c478bd9Sstevel@tonic-gate 	default:
3542*7c478bd9Sstevel@tonic-gate 		return ((enum nfsstat)error);
3543*7c478bd9Sstevel@tonic-gate 	}
3544*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
3545*7c478bd9Sstevel@tonic-gate }
3546*7c478bd9Sstevel@tonic-gate 
3547*7c478bd9Sstevel@tonic-gate int
3548*7c478bd9Sstevel@tonic-gate geterrno(enum nfsstat status)
3549*7c478bd9Sstevel@tonic-gate {
3550*7c478bd9Sstevel@tonic-gate 
3551*7c478bd9Sstevel@tonic-gate 	switch (status) {
3552*7c478bd9Sstevel@tonic-gate 	case NFSERR_OPNOTSUPP:
3553*7c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
3554*7c478bd9Sstevel@tonic-gate 	case NFSERR_NAMETOOLONG:
3555*7c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
3556*7c478bd9Sstevel@tonic-gate 	case NFSERR_NOTEMPTY:
3557*7c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
3558*7c478bd9Sstevel@tonic-gate 	case NFSERR_DQUOT:
3559*7c478bd9Sstevel@tonic-gate 		return (EDQUOT);
3560*7c478bd9Sstevel@tonic-gate 	case NFSERR_STALE:
3561*7c478bd9Sstevel@tonic-gate 		return (ESTALE);
3562*7c478bd9Sstevel@tonic-gate 	case NFSERR_REMOTE:
3563*7c478bd9Sstevel@tonic-gate 		return (EREMOTE);
3564*7c478bd9Sstevel@tonic-gate 	case NFSERR_WFLUSH:
3565*7c478bd9Sstevel@tonic-gate 		return (EIO);
3566*7c478bd9Sstevel@tonic-gate 	default:
3567*7c478bd9Sstevel@tonic-gate 		return ((int)status);
3568*7c478bd9Sstevel@tonic-gate 	}
3569*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
3570*7c478bd9Sstevel@tonic-gate }
3571*7c478bd9Sstevel@tonic-gate 
3572*7c478bd9Sstevel@tonic-gate enum nfsstat3
3573*7c478bd9Sstevel@tonic-gate puterrno3(int error)
3574*7c478bd9Sstevel@tonic-gate {
3575*7c478bd9Sstevel@tonic-gate 
3576*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3577*7c478bd9Sstevel@tonic-gate 	switch (error) {
3578*7c478bd9Sstevel@tonic-gate 	case 0:
3579*7c478bd9Sstevel@tonic-gate 		return (NFS3_OK);
3580*7c478bd9Sstevel@tonic-gate 	case EPERM:
3581*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_PERM);
3582*7c478bd9Sstevel@tonic-gate 	case ENOENT:
3583*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOENT);
3584*7c478bd9Sstevel@tonic-gate 	case EIO:
3585*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_IO);
3586*7c478bd9Sstevel@tonic-gate 	case ENXIO:
3587*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NXIO);
3588*7c478bd9Sstevel@tonic-gate 	case EACCES:
3589*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ACCES);
3590*7c478bd9Sstevel@tonic-gate 	case EEXIST:
3591*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_EXIST);
3592*7c478bd9Sstevel@tonic-gate 	case EXDEV:
3593*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_XDEV);
3594*7c478bd9Sstevel@tonic-gate 	case ENODEV:
3595*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NODEV);
3596*7c478bd9Sstevel@tonic-gate 	case ENOTDIR:
3597*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTDIR);
3598*7c478bd9Sstevel@tonic-gate 	case EISDIR:
3599*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ISDIR);
3600*7c478bd9Sstevel@tonic-gate 	case EINVAL:
3601*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
3602*7c478bd9Sstevel@tonic-gate 	case EFBIG:
3603*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_FBIG);
3604*7c478bd9Sstevel@tonic-gate 	case ENOSPC:
3605*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOSPC);
3606*7c478bd9Sstevel@tonic-gate 	case EROFS:
3607*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ROFS);
3608*7c478bd9Sstevel@tonic-gate 	case EMLINK:
3609*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_MLINK);
3610*7c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
3611*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
3612*7c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
3613*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
3614*7c478bd9Sstevel@tonic-gate 	case EDQUOT:
3615*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
3616*7c478bd9Sstevel@tonic-gate 	case ESTALE:
3617*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_STALE);
3618*7c478bd9Sstevel@tonic-gate 	case EREMOTE:
3619*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
3620*7c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
3621*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
3622*7c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
3623*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
3624*7c478bd9Sstevel@tonic-gate 	default:
3625*7c478bd9Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
3626*7c478bd9Sstevel@tonic-gate 		    "puterrno3: got error %d", error);
3627*7c478bd9Sstevel@tonic-gate 		return ((enum nfsstat3)error);
3628*7c478bd9Sstevel@tonic-gate 	}
3629*7c478bd9Sstevel@tonic-gate #else
3630*7c478bd9Sstevel@tonic-gate 	switch (error) {
3631*7c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
3632*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
3633*7c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
3634*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
3635*7c478bd9Sstevel@tonic-gate 	case EDQUOT:
3636*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
3637*7c478bd9Sstevel@tonic-gate 	case ESTALE:
3638*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_STALE);
3639*7c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
3640*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
3641*7c478bd9Sstevel@tonic-gate 	case EREMOTE:
3642*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
3643*7c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
3644*7c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
3645*7c478bd9Sstevel@tonic-gate 	default:
3646*7c478bd9Sstevel@tonic-gate 		return ((enum nfsstat3)error);
3647*7c478bd9Sstevel@tonic-gate 	}
3648*7c478bd9Sstevel@tonic-gate #endif
3649*7c478bd9Sstevel@tonic-gate }
3650*7c478bd9Sstevel@tonic-gate 
3651*7c478bd9Sstevel@tonic-gate int
3652*7c478bd9Sstevel@tonic-gate geterrno3(enum nfsstat3 status)
3653*7c478bd9Sstevel@tonic-gate {
3654*7c478bd9Sstevel@tonic-gate 
3655*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3656*7c478bd9Sstevel@tonic-gate 	switch (status) {
3657*7c478bd9Sstevel@tonic-gate 	case NFS3_OK:
3658*7c478bd9Sstevel@tonic-gate 		return (0);
3659*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_PERM:
3660*7c478bd9Sstevel@tonic-gate 		return (EPERM);
3661*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOENT:
3662*7c478bd9Sstevel@tonic-gate 		return (ENOENT);
3663*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_IO:
3664*7c478bd9Sstevel@tonic-gate 		return (EIO);
3665*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NXIO:
3666*7c478bd9Sstevel@tonic-gate 		return (ENXIO);
3667*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_ACCES:
3668*7c478bd9Sstevel@tonic-gate 		return (EACCES);
3669*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_EXIST:
3670*7c478bd9Sstevel@tonic-gate 		return (EEXIST);
3671*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_XDEV:
3672*7c478bd9Sstevel@tonic-gate 		return (EXDEV);
3673*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NODEV:
3674*7c478bd9Sstevel@tonic-gate 		return (ENODEV);
3675*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTDIR:
3676*7c478bd9Sstevel@tonic-gate 		return (ENOTDIR);
3677*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_ISDIR:
3678*7c478bd9Sstevel@tonic-gate 		return (EISDIR);
3679*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_INVAL:
3680*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3681*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_FBIG:
3682*7c478bd9Sstevel@tonic-gate 		return (EFBIG);
3683*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOSPC:
3684*7c478bd9Sstevel@tonic-gate 		return (ENOSPC);
3685*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_ROFS:
3686*7c478bd9Sstevel@tonic-gate 		return (EROFS);
3687*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_MLINK:
3688*7c478bd9Sstevel@tonic-gate 		return (EMLINK);
3689*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
3690*7c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
3691*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
3692*7c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
3693*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
3694*7c478bd9Sstevel@tonic-gate 		return (EDQUOT);
3695*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_STALE:
3696*7c478bd9Sstevel@tonic-gate 		return (ESTALE);
3697*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
3698*7c478bd9Sstevel@tonic-gate 		return (EREMOTE);
3699*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
3700*7c478bd9Sstevel@tonic-gate 		return (ESTALE);
3701*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
3702*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3703*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
3704*7c478bd9Sstevel@tonic-gate 		return (ENOENT);
3705*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
3706*7c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
3707*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
3708*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3709*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
3710*7c478bd9Sstevel@tonic-gate 		return (EIO);
3711*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
3712*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3713*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
3714*7c478bd9Sstevel@tonic-gate 		return (ENXIO);
3715*7c478bd9Sstevel@tonic-gate 	default:
3716*7c478bd9Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
3717*7c478bd9Sstevel@tonic-gate 		    "geterrno3: got status %d", status);
3718*7c478bd9Sstevel@tonic-gate 		return ((int)status);
3719*7c478bd9Sstevel@tonic-gate 	}
3720*7c478bd9Sstevel@tonic-gate #else
3721*7c478bd9Sstevel@tonic-gate 	switch (status) {
3722*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
3723*7c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
3724*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
3725*7c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
3726*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
3727*7c478bd9Sstevel@tonic-gate 		return (EDQUOT);
3728*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_STALE:
3729*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
3730*7c478bd9Sstevel@tonic-gate 		return (ESTALE);
3731*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
3732*7c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
3733*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
3734*7c478bd9Sstevel@tonic-gate 		return (EREMOTE);
3735*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
3736*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
3737*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
3738*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3739*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
3740*7c478bd9Sstevel@tonic-gate 		return (ENOENT);
3741*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
3742*7c478bd9Sstevel@tonic-gate 		return (EIO);
3743*7c478bd9Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
3744*7c478bd9Sstevel@tonic-gate 		return (ENXIO);
3745*7c478bd9Sstevel@tonic-gate 	default:
3746*7c478bd9Sstevel@tonic-gate 		return ((int)status);
3747*7c478bd9Sstevel@tonic-gate 	}
3748*7c478bd9Sstevel@tonic-gate #endif
3749*7c478bd9Sstevel@tonic-gate }
3750*7c478bd9Sstevel@tonic-gate 
3751*7c478bd9Sstevel@tonic-gate rddir_cache *
3752*7c478bd9Sstevel@tonic-gate rddir_cache_alloc(int flags)
3753*7c478bd9Sstevel@tonic-gate {
3754*7c478bd9Sstevel@tonic-gate 	rddir_cache *rc;
3755*7c478bd9Sstevel@tonic-gate 
3756*7c478bd9Sstevel@tonic-gate 	rc = kmem_alloc(sizeof (*rc), flags);
3757*7c478bd9Sstevel@tonic-gate 	if (rc != NULL) {
3758*7c478bd9Sstevel@tonic-gate 		rc->entries = NULL;
3759*7c478bd9Sstevel@tonic-gate 		rc->flags = RDDIR;
3760*7c478bd9Sstevel@tonic-gate 		cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
3761*7c478bd9Sstevel@tonic-gate 		mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL);
3762*7c478bd9Sstevel@tonic-gate 		rc->count = 1;
3763*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3764*7c478bd9Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirent.value.ui64, 1);
3765*7c478bd9Sstevel@tonic-gate #endif
3766*7c478bd9Sstevel@tonic-gate 	}
3767*7c478bd9Sstevel@tonic-gate 	return (rc);
3768*7c478bd9Sstevel@tonic-gate }
3769*7c478bd9Sstevel@tonic-gate 
3770*7c478bd9Sstevel@tonic-gate static void
3771*7c478bd9Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc)
3772*7c478bd9Sstevel@tonic-gate {
3773*7c478bd9Sstevel@tonic-gate 
3774*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3775*7c478bd9Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirent.value.ui64, -1);
3776*7c478bd9Sstevel@tonic-gate #endif
3777*7c478bd9Sstevel@tonic-gate 	if (rc->entries != NULL) {
3778*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3779*7c478bd9Sstevel@tonic-gate 		rddir_cache_buf_free(rc->entries, rc->buflen);
3780*7c478bd9Sstevel@tonic-gate #else
3781*7c478bd9Sstevel@tonic-gate 		kmem_free(rc->entries, rc->buflen);
3782*7c478bd9Sstevel@tonic-gate #endif
3783*7c478bd9Sstevel@tonic-gate 	}
3784*7c478bd9Sstevel@tonic-gate 	cv_destroy(&rc->cv);
3785*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&rc->lock);
3786*7c478bd9Sstevel@tonic-gate 	kmem_free(rc, sizeof (*rc));
3787*7c478bd9Sstevel@tonic-gate }
3788*7c478bd9Sstevel@tonic-gate 
3789*7c478bd9Sstevel@tonic-gate void
3790*7c478bd9Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc)
3791*7c478bd9Sstevel@tonic-gate {
3792*7c478bd9Sstevel@tonic-gate 
3793*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rc->lock);
3794*7c478bd9Sstevel@tonic-gate 	rc->count++;
3795*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rc->lock);
3796*7c478bd9Sstevel@tonic-gate }
3797*7c478bd9Sstevel@tonic-gate 
3798*7c478bd9Sstevel@tonic-gate void
3799*7c478bd9Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc)
3800*7c478bd9Sstevel@tonic-gate {
3801*7c478bd9Sstevel@tonic-gate 
3802*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rc->lock);
3803*7c478bd9Sstevel@tonic-gate 	ASSERT(rc->count > 0);
3804*7c478bd9Sstevel@tonic-gate 	if (--rc->count == 0) {
3805*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rc->lock);
3806*7c478bd9Sstevel@tonic-gate 		rddir_cache_free(rc);
3807*7c478bd9Sstevel@tonic-gate 	} else
3808*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rc->lock);
3809*7c478bd9Sstevel@tonic-gate }
3810*7c478bd9Sstevel@tonic-gate 
3811*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3812*7c478bd9Sstevel@tonic-gate char *
3813*7c478bd9Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags)
3814*7c478bd9Sstevel@tonic-gate {
3815*7c478bd9Sstevel@tonic-gate 	char *rc;
3816*7c478bd9Sstevel@tonic-gate 
3817*7c478bd9Sstevel@tonic-gate 	rc = kmem_alloc(size, flags);
3818*7c478bd9Sstevel@tonic-gate 	if (rc != NULL)
3819*7c478bd9Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirents.value.ui64, size);
3820*7c478bd9Sstevel@tonic-gate 	return (rc);
3821*7c478bd9Sstevel@tonic-gate }
3822*7c478bd9Sstevel@tonic-gate 
3823*7c478bd9Sstevel@tonic-gate void
3824*7c478bd9Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size)
3825*7c478bd9Sstevel@tonic-gate {
3826*7c478bd9Sstevel@tonic-gate 
3827*7c478bd9Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size);
3828*7c478bd9Sstevel@tonic-gate 	kmem_free(addr, size);
3829*7c478bd9Sstevel@tonic-gate }
3830*7c478bd9Sstevel@tonic-gate #endif
3831*7c478bd9Sstevel@tonic-gate 
3832*7c478bd9Sstevel@tonic-gate static int
3833*7c478bd9Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp)
3834*7c478bd9Sstevel@tonic-gate {
3835*7c478bd9Sstevel@tonic-gate 	char *contents;
3836*7c478bd9Sstevel@tonic-gate 	int size;
3837*7c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
3838*7c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
3839*7c478bd9Sstevel@tonic-gate 	int freed;
3840*7c478bd9Sstevel@tonic-gate 	cred_t *cred;
3841*7c478bd9Sstevel@tonic-gate 
3842*7c478bd9Sstevel@tonic-gate 	/*
3843*7c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which
3844*7c478bd9Sstevel@tonic-gate 	 * may be associated with this rnode.
3845*7c478bd9Sstevel@tonic-gate 	 */
3846*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3847*7c478bd9Sstevel@tonic-gate 	cred = rp->r_cred;
3848*7c478bd9Sstevel@tonic-gate 	rp->r_cred = NULL;
3849*7c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
3850*7c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
3851*7c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
3852*7c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
3853*7c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
3854*7c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
3855*7c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
3856*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3857*7c478bd9Sstevel@tonic-gate 
3858*7c478bd9Sstevel@tonic-gate 	if (cred != NULL)
3859*7c478bd9Sstevel@tonic-gate 		crfree(cred);
3860*7c478bd9Sstevel@tonic-gate 
3861*7c478bd9Sstevel@tonic-gate 	/*
3862*7c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
3863*7c478bd9Sstevel@tonic-gate 	 */
3864*7c478bd9Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
3865*7c478bd9Sstevel@tonic-gate 
3866*7c478bd9Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
3867*7c478bd9Sstevel@tonic-gate 	    contents == NULL &&
3868*7c478bd9Sstevel@tonic-gate 	    vsp == NULL &&
3869*7c478bd9Sstevel@tonic-gate 	    info == NULL)
3870*7c478bd9Sstevel@tonic-gate 		return (freed);
3871*7c478bd9Sstevel@tonic-gate 
3872*7c478bd9Sstevel@tonic-gate 	/*
3873*7c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries
3874*7c478bd9Sstevel@tonic-gate 	 */
3875*7c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
3876*7c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
3877*7c478bd9Sstevel@tonic-gate 
3878*7c478bd9Sstevel@tonic-gate 	/*
3879*7c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
3880*7c478bd9Sstevel@tonic-gate 	 */
3881*7c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
3882*7c478bd9Sstevel@tonic-gate 
3883*7c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
3884*7c478bd9Sstevel@tonic-gate 	}
3885*7c478bd9Sstevel@tonic-gate 
3886*7c478bd9Sstevel@tonic-gate 	/*
3887*7c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
3888*7c478bd9Sstevel@tonic-gate 	 */
3889*7c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
3890*7c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
3891*7c478bd9Sstevel@tonic-gate 
3892*7c478bd9Sstevel@tonic-gate 	/*
3893*7c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
3894*7c478bd9Sstevel@tonic-gate 	 */
3895*7c478bd9Sstevel@tonic-gate 	if (info != NULL)
3896*7c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
3897*7c478bd9Sstevel@tonic-gate 
3898*7c478bd9Sstevel@tonic-gate 	return (1);
3899*7c478bd9Sstevel@tonic-gate }
3900*7c478bd9Sstevel@tonic-gate 
3901*7c478bd9Sstevel@tonic-gate static int
3902*7c478bd9Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp)
3903*7c478bd9Sstevel@tonic-gate {
3904*7c478bd9Sstevel@tonic-gate 	char *contents;
3905*7c478bd9Sstevel@tonic-gate 	int size;
3906*7c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
3907*7c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
3908*7c478bd9Sstevel@tonic-gate 	int freed;
3909*7c478bd9Sstevel@tonic-gate 
3910*7c478bd9Sstevel@tonic-gate 	/*
3911*7c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which
3912*7c478bd9Sstevel@tonic-gate 	 * may be associated with this rnode.
3913*7c478bd9Sstevel@tonic-gate 	 */
3914*7c478bd9Sstevel@tonic-gate 	if (!mutex_tryenter(&rp->r_statelock))
3915*7c478bd9Sstevel@tonic-gate 		return (0);
3916*7c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
3917*7c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
3918*7c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
3919*7c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
3920*7c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
3921*7c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
3922*7c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
3923*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3924*7c478bd9Sstevel@tonic-gate 
3925*7c478bd9Sstevel@tonic-gate 	/*
3926*7c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
3927*7c478bd9Sstevel@tonic-gate 	 */
3928*7c478bd9Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
3929*7c478bd9Sstevel@tonic-gate 
3930*7c478bd9Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
3931*7c478bd9Sstevel@tonic-gate 	    contents == NULL &&
3932*7c478bd9Sstevel@tonic-gate 	    vsp == NULL &&
3933*7c478bd9Sstevel@tonic-gate 	    info == NULL)
3934*7c478bd9Sstevel@tonic-gate 		return (freed);
3935*7c478bd9Sstevel@tonic-gate 
3936*7c478bd9Sstevel@tonic-gate 	/*
3937*7c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries
3938*7c478bd9Sstevel@tonic-gate 	 */
3939*7c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
3940*7c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
3941*7c478bd9Sstevel@tonic-gate 
3942*7c478bd9Sstevel@tonic-gate 	/*
3943*7c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
3944*7c478bd9Sstevel@tonic-gate 	 */
3945*7c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
3946*7c478bd9Sstevel@tonic-gate 
3947*7c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
3948*7c478bd9Sstevel@tonic-gate 	}
3949*7c478bd9Sstevel@tonic-gate 
3950*7c478bd9Sstevel@tonic-gate 	/*
3951*7c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
3952*7c478bd9Sstevel@tonic-gate 	 */
3953*7c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
3954*7c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
3955*7c478bd9Sstevel@tonic-gate 
3956*7c478bd9Sstevel@tonic-gate 	/*
3957*7c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
3958*7c478bd9Sstevel@tonic-gate 	 */
3959*7c478bd9Sstevel@tonic-gate 	if (info != NULL)
3960*7c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
3961*7c478bd9Sstevel@tonic-gate 
3962*7c478bd9Sstevel@tonic-gate 	return (1);
3963*7c478bd9Sstevel@tonic-gate }
3964*7c478bd9Sstevel@tonic-gate 
3965*7c478bd9Sstevel@tonic-gate static int
3966*7c478bd9Sstevel@tonic-gate nfs_free_reclaim(void)
3967*7c478bd9Sstevel@tonic-gate {
3968*7c478bd9Sstevel@tonic-gate 	int freed;
3969*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
3970*7c478bd9Sstevel@tonic-gate 
3971*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3972*7c478bd9Sstevel@tonic-gate 	clstat_debug.f_reclaim.value.ui64++;
3973*7c478bd9Sstevel@tonic-gate #endif
3974*7c478bd9Sstevel@tonic-gate 	freed = 0;
3975*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
3976*7c478bd9Sstevel@tonic-gate 	rp = rpfreelist;
3977*7c478bd9Sstevel@tonic-gate 	if (rp != NULL) {
3978*7c478bd9Sstevel@tonic-gate 		do {
3979*7c478bd9Sstevel@tonic-gate 			if (nfs_free_data_reclaim(rp))
3980*7c478bd9Sstevel@tonic-gate 				freed = 1;
3981*7c478bd9Sstevel@tonic-gate 		} while ((rp = rp->r_freef) != rpfreelist);
3982*7c478bd9Sstevel@tonic-gate 	}
3983*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
3984*7c478bd9Sstevel@tonic-gate 	return (freed);
3985*7c478bd9Sstevel@tonic-gate }
3986*7c478bd9Sstevel@tonic-gate 
3987*7c478bd9Sstevel@tonic-gate static int
3988*7c478bd9Sstevel@tonic-gate nfs_active_reclaim(void)
3989*7c478bd9Sstevel@tonic-gate {
3990*7c478bd9Sstevel@tonic-gate 	int freed;
3991*7c478bd9Sstevel@tonic-gate 	int index;
3992*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
3993*7c478bd9Sstevel@tonic-gate 
3994*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3995*7c478bd9Sstevel@tonic-gate 	clstat_debug.a_reclaim.value.ui64++;
3996*7c478bd9Sstevel@tonic-gate #endif
3997*7c478bd9Sstevel@tonic-gate 	freed = 0;
3998*7c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
3999*7c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
4000*7c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
4001*7c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
4002*7c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
4003*7c478bd9Sstevel@tonic-gate 			if (nfs_active_data_reclaim(rp))
4004*7c478bd9Sstevel@tonic-gate 				freed = 1;
4005*7c478bd9Sstevel@tonic-gate 		}
4006*7c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
4007*7c478bd9Sstevel@tonic-gate 	}
4008*7c478bd9Sstevel@tonic-gate 	return (freed);
4009*7c478bd9Sstevel@tonic-gate }
4010*7c478bd9Sstevel@tonic-gate 
4011*7c478bd9Sstevel@tonic-gate static int
4012*7c478bd9Sstevel@tonic-gate nfs_rnode_reclaim(void)
4013*7c478bd9Sstevel@tonic-gate {
4014*7c478bd9Sstevel@tonic-gate 	int freed;
4015*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
4016*7c478bd9Sstevel@tonic-gate 	vnode_t *vp;
4017*7c478bd9Sstevel@tonic-gate 
4018*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4019*7c478bd9Sstevel@tonic-gate 	clstat_debug.r_reclaim.value.ui64++;
4020*7c478bd9Sstevel@tonic-gate #endif
4021*7c478bd9Sstevel@tonic-gate 	freed = 0;
4022*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
4023*7c478bd9Sstevel@tonic-gate 	while ((rp = rpfreelist) != NULL) {
4024*7c478bd9Sstevel@tonic-gate 		rp_rmfree(rp);
4025*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
4026*7c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
4027*7c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
4028*7c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
4029*7c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
4030*7c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
4031*7c478bd9Sstevel@tonic-gate 				vp->v_count--;
4032*7c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
4033*7c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
4034*7c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
4035*7c478bd9Sstevel@tonic-gate 				continue;
4036*7c478bd9Sstevel@tonic-gate 			}
4037*7c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
4038*7c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
4039*7c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
4040*7c478bd9Sstevel@tonic-gate 		}
4041*7c478bd9Sstevel@tonic-gate 		/*
4042*7c478bd9Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
4043*7c478bd9Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
4044*7c478bd9Sstevel@tonic-gate 		 * of checks done.
4045*7c478bd9Sstevel@tonic-gate 		 */
4046*7c478bd9Sstevel@tonic-gate 		rp_addfree(rp, CRED());
4047*7c478bd9Sstevel@tonic-gate 		mutex_enter(&rpfreelist_lock);
4048*7c478bd9Sstevel@tonic-gate 	}
4049*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
4050*7c478bd9Sstevel@tonic-gate 	return (freed);
4051*7c478bd9Sstevel@tonic-gate }
4052*7c478bd9Sstevel@tonic-gate 
4053*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4054*7c478bd9Sstevel@tonic-gate static void
4055*7c478bd9Sstevel@tonic-gate nfs_reclaim(void *cdrarg)
4056*7c478bd9Sstevel@tonic-gate {
4057*7c478bd9Sstevel@tonic-gate 
4058*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4059*7c478bd9Sstevel@tonic-gate 	clstat_debug.reclaim.value.ui64++;
4060*7c478bd9Sstevel@tonic-gate #endif
4061*7c478bd9Sstevel@tonic-gate 	if (nfs_free_reclaim())
4062*7c478bd9Sstevel@tonic-gate 		return;
4063*7c478bd9Sstevel@tonic-gate 
4064*7c478bd9Sstevel@tonic-gate 	if (nfs_active_reclaim())
4065*7c478bd9Sstevel@tonic-gate 		return;
4066*7c478bd9Sstevel@tonic-gate 
4067*7c478bd9Sstevel@tonic-gate 	(void) nfs_rnode_reclaim();
4068*7c478bd9Sstevel@tonic-gate }
4069*7c478bd9Sstevel@tonic-gate 
4070*7c478bd9Sstevel@tonic-gate /*
4071*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4072*7c478bd9Sstevel@tonic-gate  *
4073*7c478bd9Sstevel@tonic-gate  * Routines to copy filehandles
4074*7c478bd9Sstevel@tonic-gate  */
4075*7c478bd9Sstevel@tonic-gate void
4076*7c478bd9Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp)
4077*7c478bd9Sstevel@tonic-gate {
4078*7c478bd9Sstevel@tonic-gate 	fhandle_t *dest = (fhandle_t *)fhp;
4079*7c478bd9Sstevel@tonic-gate 
4080*7c478bd9Sstevel@tonic-gate 	if (dest != NULL)
4081*7c478bd9Sstevel@tonic-gate 		*dest = *VTOFH(vp);
4082*7c478bd9Sstevel@tonic-gate }
4083*7c478bd9Sstevel@tonic-gate 
4084*7c478bd9Sstevel@tonic-gate void
4085*7c478bd9Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp)
4086*7c478bd9Sstevel@tonic-gate {
4087*7c478bd9Sstevel@tonic-gate 	nfs_fh3 *dest = (nfs_fh3 *)fhp;
4088*7c478bd9Sstevel@tonic-gate 
4089*7c478bd9Sstevel@tonic-gate 	if (dest != NULL)
4090*7c478bd9Sstevel@tonic-gate 		*dest = *VTOFH3(vp);
4091*7c478bd9Sstevel@tonic-gate }
4092*7c478bd9Sstevel@tonic-gate 
4093*7c478bd9Sstevel@tonic-gate /*
4094*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4095*7c478bd9Sstevel@tonic-gate  *
4096*7c478bd9Sstevel@tonic-gate  * failover_safe() will test various conditions to ensure that
4097*7c478bd9Sstevel@tonic-gate  * failover is permitted for this vnode.  It will be denied
4098*7c478bd9Sstevel@tonic-gate  * if:
4099*7c478bd9Sstevel@tonic-gate  *	1) the operation in progress does not support failover (NULL fi)
4100*7c478bd9Sstevel@tonic-gate  *	2) there are no available replicas (NULL mi_servers->sv_next)
4101*7c478bd9Sstevel@tonic-gate  *	3) any locks are outstanding on this file
4102*7c478bd9Sstevel@tonic-gate  */
4103*7c478bd9Sstevel@tonic-gate static int
4104*7c478bd9Sstevel@tonic-gate failover_safe(failinfo_t *fi)
4105*7c478bd9Sstevel@tonic-gate {
4106*7c478bd9Sstevel@tonic-gate 
4107*7c478bd9Sstevel@tonic-gate 	/*
4108*7c478bd9Sstevel@tonic-gate 	 * Does this op permit failover?
4109*7c478bd9Sstevel@tonic-gate 	 */
4110*7c478bd9Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL)
4111*7c478bd9Sstevel@tonic-gate 		return (0);
4112*7c478bd9Sstevel@tonic-gate 
4113*7c478bd9Sstevel@tonic-gate 	/*
4114*7c478bd9Sstevel@tonic-gate 	 * Are there any alternates to failover to?
4115*7c478bd9Sstevel@tonic-gate 	 */
4116*7c478bd9Sstevel@tonic-gate 	if (VTOMI(fi->vp)->mi_servers->sv_next == NULL)
4117*7c478bd9Sstevel@tonic-gate 		return (0);
4118*7c478bd9Sstevel@tonic-gate 
4119*7c478bd9Sstevel@tonic-gate 	/*
4120*7c478bd9Sstevel@tonic-gate 	 * Disable check; we've forced local locking
4121*7c478bd9Sstevel@tonic-gate 	 *
4122*7c478bd9Sstevel@tonic-gate 	 * if (flk_has_remote_locks(fi->vp))
4123*7c478bd9Sstevel@tonic-gate 	 *	return (0);
4124*7c478bd9Sstevel@tonic-gate 	 */
4125*7c478bd9Sstevel@tonic-gate 
4126*7c478bd9Sstevel@tonic-gate 	/*
4127*7c478bd9Sstevel@tonic-gate 	 * If we have no partial path, we can't do anything
4128*7c478bd9Sstevel@tonic-gate 	 */
4129*7c478bd9Sstevel@tonic-gate 	if (VTOR(fi->vp)->r_path == NULL)
4130*7c478bd9Sstevel@tonic-gate 		return (0);
4131*7c478bd9Sstevel@tonic-gate 
4132*7c478bd9Sstevel@tonic-gate 	return (1);
4133*7c478bd9Sstevel@tonic-gate }
4134*7c478bd9Sstevel@tonic-gate 
4135*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
4136*7c478bd9Sstevel@tonic-gate 
4137*7c478bd9Sstevel@tonic-gate /*
4138*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4139*7c478bd9Sstevel@tonic-gate  *
4140*7c478bd9Sstevel@tonic-gate  * failover_newserver() will start a search for a new server,
4141*7c478bd9Sstevel@tonic-gate  * preferably by starting an async thread to do the work.  If
4142*7c478bd9Sstevel@tonic-gate  * someone is already doing this (recognizable by MI_BINDINPROG
4143*7c478bd9Sstevel@tonic-gate  * being set), it will simply return and the calling thread
4144*7c478bd9Sstevel@tonic-gate  * will queue on the mi_failover_cv condition variable.
4145*7c478bd9Sstevel@tonic-gate  */
4146*7c478bd9Sstevel@tonic-gate static void
4147*7c478bd9Sstevel@tonic-gate failover_newserver(mntinfo_t *mi)
4148*7c478bd9Sstevel@tonic-gate {
4149*7c478bd9Sstevel@tonic-gate 	/*
4150*7c478bd9Sstevel@tonic-gate 	 * Check if someone else is doing this already
4151*7c478bd9Sstevel@tonic-gate 	 */
4152*7c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4153*7c478bd9Sstevel@tonic-gate 	if (mi->mi_flags & MI_BINDINPROG) {
4154*7c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
4155*7c478bd9Sstevel@tonic-gate 		return;
4156*7c478bd9Sstevel@tonic-gate 	}
4157*7c478bd9Sstevel@tonic-gate 	mi->mi_flags |= MI_BINDINPROG;
4158*7c478bd9Sstevel@tonic-gate 
4159*7c478bd9Sstevel@tonic-gate 	/*
4160*7c478bd9Sstevel@tonic-gate 	 * Need to hold the vfs struct so that it can't be released
4161*7c478bd9Sstevel@tonic-gate 	 * while the failover thread is selecting a new server.
4162*7c478bd9Sstevel@tonic-gate 	 */
4163*7c478bd9Sstevel@tonic-gate 	VFS_HOLD(mi->mi_vfsp);
4164*7c478bd9Sstevel@tonic-gate 
4165*7c478bd9Sstevel@tonic-gate 	/*
4166*7c478bd9Sstevel@tonic-gate 	 * Start a thread to do the real searching.
4167*7c478bd9Sstevel@tonic-gate 	 */
4168*7c478bd9Sstevel@tonic-gate 	(void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri);
4169*7c478bd9Sstevel@tonic-gate 
4170*7c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
4171*7c478bd9Sstevel@tonic-gate }
4172*7c478bd9Sstevel@tonic-gate 
4173*7c478bd9Sstevel@tonic-gate /*
4174*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4175*7c478bd9Sstevel@tonic-gate  *
4176*7c478bd9Sstevel@tonic-gate  * failover_thread() will find a new server to replace the one
4177*7c478bd9Sstevel@tonic-gate  * currently in use, wake up other threads waiting on this mount
4178*7c478bd9Sstevel@tonic-gate  * point, and die.  It will start at the head of the server list
4179*7c478bd9Sstevel@tonic-gate  * and poll servers until it finds one with an NFS server which is
4180*7c478bd9Sstevel@tonic-gate  * registered and responds to a NULL procedure ping.
4181*7c478bd9Sstevel@tonic-gate  *
4182*7c478bd9Sstevel@tonic-gate  * XXX failover_thread is unsafe within the scope of the
4183*7c478bd9Sstevel@tonic-gate  * present model defined for cpr to suspend the system.
4184*7c478bd9Sstevel@tonic-gate  * Specifically, over-the-wire calls made by the thread
4185*7c478bd9Sstevel@tonic-gate  * are unsafe. The thread needs to be reevaluated in case of
4186*7c478bd9Sstevel@tonic-gate  * future updates to the cpr suspend model.
4187*7c478bd9Sstevel@tonic-gate  */
4188*7c478bd9Sstevel@tonic-gate static void
4189*7c478bd9Sstevel@tonic-gate failover_thread(mntinfo_t *mi)
4190*7c478bd9Sstevel@tonic-gate {
4191*7c478bd9Sstevel@tonic-gate 	servinfo_t *svp = NULL;
4192*7c478bd9Sstevel@tonic-gate 	CLIENT *cl;
4193*7c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
4194*7c478bd9Sstevel@tonic-gate 	struct timeval tv;
4195*7c478bd9Sstevel@tonic-gate 	int error;
4196*7c478bd9Sstevel@tonic-gate 	int oncethru = 0;
4197*7c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
4198*7c478bd9Sstevel@tonic-gate 	rnode_t *rp;
4199*7c478bd9Sstevel@tonic-gate 	int index;
4200*7c478bd9Sstevel@tonic-gate 	char *srvnames;
4201*7c478bd9Sstevel@tonic-gate 	size_t srvnames_len;
4202*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl = NULL;
4203*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
4204*7c478bd9Sstevel@tonic-gate 
4205*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4206*7c478bd9Sstevel@tonic-gate 	/*
4207*7c478bd9Sstevel@tonic-gate 	 * This is currently only needed to access counters which exist on
4208*7c478bd9Sstevel@tonic-gate 	 * DEBUG kernels, hence we don't want to pay the penalty of the lookup
4209*7c478bd9Sstevel@tonic-gate 	 * on non-DEBUG kernels.
4210*7c478bd9Sstevel@tonic-gate 	 */
4211*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
4212*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
4213*7c478bd9Sstevel@tonic-gate #endif
4214*7c478bd9Sstevel@tonic-gate 
4215*7c478bd9Sstevel@tonic-gate 	/*
4216*7c478bd9Sstevel@tonic-gate 	 * Its safe to piggyback on the mi_lock since failover_newserver()
4217*7c478bd9Sstevel@tonic-gate 	 * code guarantees that there will be only one failover thread
4218*7c478bd9Sstevel@tonic-gate 	 * per mountinfo at any instance.
4219*7c478bd9Sstevel@tonic-gate 	 */
4220*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr,
4221*7c478bd9Sstevel@tonic-gate 	    "failover_thread");
4222*7c478bd9Sstevel@tonic-gate 
4223*7c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4224*7c478bd9Sstevel@tonic-gate 	while (mi->mi_readers) {
4225*7c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
4226*7c478bd9Sstevel@tonic-gate 		cv_wait(&mi->mi_failover_cv, &mi->mi_lock);
4227*7c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
4228*7c478bd9Sstevel@tonic-gate 	}
4229*7c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
4230*7c478bd9Sstevel@tonic-gate 
4231*7c478bd9Sstevel@tonic-gate 	tv.tv_sec = 2;
4232*7c478bd9Sstevel@tonic-gate 	tv.tv_usec = 0;
4233*7c478bd9Sstevel@tonic-gate 
4234*7c478bd9Sstevel@tonic-gate 	/*
4235*7c478bd9Sstevel@tonic-gate 	 * Ping the null NFS procedure of every server in
4236*7c478bd9Sstevel@tonic-gate 	 * the list until one responds.  We always start
4237*7c478bd9Sstevel@tonic-gate 	 * at the head of the list and always skip the one
4238*7c478bd9Sstevel@tonic-gate 	 * that is current, since it's caused us a problem.
4239*7c478bd9Sstevel@tonic-gate 	 */
4240*7c478bd9Sstevel@tonic-gate 	while (svp == NULL) {
4241*7c478bd9Sstevel@tonic-gate 		for (svp = mi->mi_servers; svp; svp = svp->sv_next) {
4242*7c478bd9Sstevel@tonic-gate 			if (!oncethru && svp == mi->mi_curr_serv)
4243*7c478bd9Sstevel@tonic-gate 				continue;
4244*7c478bd9Sstevel@tonic-gate 
4245*7c478bd9Sstevel@tonic-gate 			/*
4246*7c478bd9Sstevel@tonic-gate 			 * If the file system was forcibly umounted
4247*7c478bd9Sstevel@tonic-gate 			 * while trying to do a failover, then just
4248*7c478bd9Sstevel@tonic-gate 			 * give up on the failover.  It won't matter
4249*7c478bd9Sstevel@tonic-gate 			 * what the server is.
4250*7c478bd9Sstevel@tonic-gate 			 */
4251*7c478bd9Sstevel@tonic-gate 			if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
4252*7c478bd9Sstevel@tonic-gate 				svp = NULL;
4253*7c478bd9Sstevel@tonic-gate 				goto done;
4254*7c478bd9Sstevel@tonic-gate 			}
4255*7c478bd9Sstevel@tonic-gate 
4256*7c478bd9Sstevel@tonic-gate 			error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr,
4257*7c478bd9Sstevel@tonic-gate 			    NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl);
4258*7c478bd9Sstevel@tonic-gate 			if (error)
4259*7c478bd9Sstevel@tonic-gate 				continue;
4260*7c478bd9Sstevel@tonic-gate 
4261*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
4262*7c478bd9Sstevel@tonic-gate 				cl->cl_nosignal = TRUE;
4263*7c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL,
4264*7c478bd9Sstevel@tonic-gate 			    xdr_void, NULL, tv);
4265*7c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
4266*7c478bd9Sstevel@tonic-gate 				cl->cl_nosignal = FALSE;
4267*7c478bd9Sstevel@tonic-gate 			AUTH_DESTROY(cl->cl_auth);
4268*7c478bd9Sstevel@tonic-gate 			CLNT_DESTROY(cl);
4269*7c478bd9Sstevel@tonic-gate 			if (status == RPC_SUCCESS) {
4270*7c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv) {
4271*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4272*7c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
4273*7c478bd9Sstevel@tonic-gate 			"NFS%d: failing over: selecting original server %s",
4274*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
4275*7c478bd9Sstevel@tonic-gate #else
4276*7c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
4277*7c478bd9Sstevel@tonic-gate 			"NFS: failing over: selecting original server %s",
4278*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
4279*7c478bd9Sstevel@tonic-gate #endif
4280*7c478bd9Sstevel@tonic-gate 				} else {
4281*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4282*7c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
4283*7c478bd9Sstevel@tonic-gate 				    "NFS%d: failing over from %s to %s",
4284*7c478bd9Sstevel@tonic-gate 					    mi->mi_vers,
4285*7c478bd9Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
4286*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
4287*7c478bd9Sstevel@tonic-gate #else
4288*7c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
4289*7c478bd9Sstevel@tonic-gate 				    "NFS: failing over from %s to %s",
4290*7c478bd9Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
4291*7c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
4292*7c478bd9Sstevel@tonic-gate #endif
4293*7c478bd9Sstevel@tonic-gate 				}
4294*7c478bd9Sstevel@tonic-gate 				break;
4295*7c478bd9Sstevel@tonic-gate 			}
4296*7c478bd9Sstevel@tonic-gate 		}
4297*7c478bd9Sstevel@tonic-gate 
4298*7c478bd9Sstevel@tonic-gate 		if (svp == NULL) {
4299*7c478bd9Sstevel@tonic-gate 			if (!oncethru) {
4300*7c478bd9Sstevel@tonic-gate 				srvnames = nfs_getsrvnames(mi, &srvnames_len);
4301*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4302*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
4303*7c478bd9Sstevel@tonic-gate 				    "NFS%d servers %s not responding "
4304*7c478bd9Sstevel@tonic-gate 				    "still trying\n", mi->mi_vers, srvnames);
4305*7c478bd9Sstevel@tonic-gate #else
4306*7c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS servers %s not responding "
4307*7c478bd9Sstevel@tonic-gate 				    "still trying\n", srvnames);
4308*7c478bd9Sstevel@tonic-gate #endif
4309*7c478bd9Sstevel@tonic-gate 				oncethru = 1;
4310*7c478bd9Sstevel@tonic-gate 			}
4311*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
4312*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
4313*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
4314*7c478bd9Sstevel@tonic-gate 			delay(hz);
4315*7c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
4316*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
4317*7c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
4318*7c478bd9Sstevel@tonic-gate 		}
4319*7c478bd9Sstevel@tonic-gate 	}
4320*7c478bd9Sstevel@tonic-gate 
4321*7c478bd9Sstevel@tonic-gate 	if (oncethru) {
4322*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4323*7c478bd9Sstevel@tonic-gate 		zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames);
4324*7c478bd9Sstevel@tonic-gate #else
4325*7c478bd9Sstevel@tonic-gate 		zprintf(zoneid, "NFS servers %s ok\n", srvnames);
4326*7c478bd9Sstevel@tonic-gate #endif
4327*7c478bd9Sstevel@tonic-gate 	}
4328*7c478bd9Sstevel@tonic-gate 
4329*7c478bd9Sstevel@tonic-gate 	if (svp != mi->mi_curr_serv) {
4330*7c478bd9Sstevel@tonic-gate 		(void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
4331*7c478bd9Sstevel@tonic-gate 		index = rtablehash(&mi->mi_curr_serv->sv_fhandle);
4332*7c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
4333*7c478bd9Sstevel@tonic-gate 		rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle,
4334*7c478bd9Sstevel@tonic-gate 		    mi->mi_vfsp);
4335*7c478bd9Sstevel@tonic-gate 		if (rp != NULL) {
4336*7c478bd9Sstevel@tonic-gate 			if (rp->r_flags & RHASHED)
4337*7c478bd9Sstevel@tonic-gate 				rp_rmhash_locked(rp);
4338*7c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
4339*7c478bd9Sstevel@tonic-gate 			rp->r_server = svp;
4340*7c478bd9Sstevel@tonic-gate 			rp->r_fh = svp->sv_fhandle;
4341*7c478bd9Sstevel@tonic-gate 			(void) nfs_free_data_reclaim(rp);
4342*7c478bd9Sstevel@tonic-gate 			index = rtablehash(&rp->r_fh);
4343*7c478bd9Sstevel@tonic-gate 			rp->r_hashq = &rtable[index];
4344*7c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
4345*7c478bd9Sstevel@tonic-gate 			vn_exists(RTOV(rp));
4346*7c478bd9Sstevel@tonic-gate 			rp_addhash(rp);
4347*7c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
4348*7c478bd9Sstevel@tonic-gate 			VN_RELE(RTOV(rp));
4349*7c478bd9Sstevel@tonic-gate 		} else
4350*7c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
4351*7c478bd9Sstevel@tonic-gate 	}
4352*7c478bd9Sstevel@tonic-gate 
4353*7c478bd9Sstevel@tonic-gate done:
4354*7c478bd9Sstevel@tonic-gate 	if (oncethru)
4355*7c478bd9Sstevel@tonic-gate 		kmem_free(srvnames, srvnames_len);
4356*7c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4357*7c478bd9Sstevel@tonic-gate 	mi->mi_flags &= ~MI_BINDINPROG;
4358*7c478bd9Sstevel@tonic-gate 	if (svp != NULL) {
4359*7c478bd9Sstevel@tonic-gate 		mi->mi_curr_serv = svp;
4360*7c478bd9Sstevel@tonic-gate 		mi->mi_failover++;
4361*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4362*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.failover.value.ui64++;
4363*7c478bd9Sstevel@tonic-gate #endif
4364*7c478bd9Sstevel@tonic-gate 	}
4365*7c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_failover_cv);
4366*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
4367*7c478bd9Sstevel@tonic-gate 	VFS_RELE(mi->mi_vfsp);
4368*7c478bd9Sstevel@tonic-gate 	zthread_exit();
4369*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
4370*7c478bd9Sstevel@tonic-gate }
4371*7c478bd9Sstevel@tonic-gate 
4372*7c478bd9Sstevel@tonic-gate /*
4373*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4374*7c478bd9Sstevel@tonic-gate  *
4375*7c478bd9Sstevel@tonic-gate  * failover_wait() will put the thread to sleep until MI_BINDINPROG
4376*7c478bd9Sstevel@tonic-gate  * is cleared, meaning that failover is complete.  Called with
4377*7c478bd9Sstevel@tonic-gate  * mi_lock mutex held.
4378*7c478bd9Sstevel@tonic-gate  */
4379*7c478bd9Sstevel@tonic-gate static int
4380*7c478bd9Sstevel@tonic-gate failover_wait(mntinfo_t *mi)
4381*7c478bd9Sstevel@tonic-gate {
4382*7c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
4383*7c478bd9Sstevel@tonic-gate 
4384*7c478bd9Sstevel@tonic-gate 	/*
4385*7c478bd9Sstevel@tonic-gate 	 * If someone else is hunting for a living server,
4386*7c478bd9Sstevel@tonic-gate 	 * sleep until it's done.  After our sleep, we may
4387*7c478bd9Sstevel@tonic-gate 	 * be bound to the right server and get off cheaply.
4388*7c478bd9Sstevel@tonic-gate 	 */
4389*7c478bd9Sstevel@tonic-gate 	while (mi->mi_flags & MI_BINDINPROG) {
4390*7c478bd9Sstevel@tonic-gate 		/*
4391*7c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
4392*7c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
4393*7c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
4394*7c478bd9Sstevel@tonic-gate 		 */
4395*7c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
4396*7c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) {
4397*7c478bd9Sstevel@tonic-gate 			/*
4398*7c478bd9Sstevel@tonic-gate 			 * restore original signal mask
4399*7c478bd9Sstevel@tonic-gate 			 */
4400*7c478bd9Sstevel@tonic-gate 			sigunintr(&smask);
4401*7c478bd9Sstevel@tonic-gate 			return (EINTR);
4402*7c478bd9Sstevel@tonic-gate 		}
4403*7c478bd9Sstevel@tonic-gate 		/*
4404*7c478bd9Sstevel@tonic-gate 		 * restore original signal mask
4405*7c478bd9Sstevel@tonic-gate 		 */
4406*7c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
4407*7c478bd9Sstevel@tonic-gate 	}
4408*7c478bd9Sstevel@tonic-gate 	return (0);
4409*7c478bd9Sstevel@tonic-gate }
4410*7c478bd9Sstevel@tonic-gate 
4411*7c478bd9Sstevel@tonic-gate /*
4412*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4413*7c478bd9Sstevel@tonic-gate  *
4414*7c478bd9Sstevel@tonic-gate  * failover_remap() will do a partial pathname lookup and find the
4415*7c478bd9Sstevel@tonic-gate  * desired vnode on the current server.  The interim vnode will be
4416*7c478bd9Sstevel@tonic-gate  * discarded after we pilfer the new filehandle.
4417*7c478bd9Sstevel@tonic-gate  *
4418*7c478bd9Sstevel@tonic-gate  * Side effects:
4419*7c478bd9Sstevel@tonic-gate  * - This routine will also update the filehandle in the args structure
4420*7c478bd9Sstevel@tonic-gate  *    pointed to by the fi->fhp pointer if it is non-NULL.
4421*7c478bd9Sstevel@tonic-gate  */
4422*7c478bd9Sstevel@tonic-gate 
4423*7c478bd9Sstevel@tonic-gate static int
4424*7c478bd9Sstevel@tonic-gate failover_remap(failinfo_t *fi)
4425*7c478bd9Sstevel@tonic-gate {
4426*7c478bd9Sstevel@tonic-gate 	vnode_t *vp, *nvp, *rootvp;
4427*7c478bd9Sstevel@tonic-gate 	rnode_t *rp, *nrp;
4428*7c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
4429*7c478bd9Sstevel@tonic-gate 	int error;
4430*7c478bd9Sstevel@tonic-gate 	int index;
4431*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4432*7c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
4433*7c478bd9Sstevel@tonic-gate 
4434*7c478bd9Sstevel@tonic-gate 	nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone);
4435*7c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
4436*7c478bd9Sstevel@tonic-gate #endif
4437*7c478bd9Sstevel@tonic-gate 	/*
4438*7c478bd9Sstevel@tonic-gate 	 * Sanity check
4439*7c478bd9Sstevel@tonic-gate 	 */
4440*7c478bd9Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL)
4441*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
4442*7c478bd9Sstevel@tonic-gate 	vp = fi->vp;
4443*7c478bd9Sstevel@tonic-gate 	rp = VTOR(vp);
4444*7c478bd9Sstevel@tonic-gate 	mi = VTOMI(vp);
4445*7c478bd9Sstevel@tonic-gate 
4446*7c478bd9Sstevel@tonic-gate 	if (!(vp->v_flag & VROOT)) {
4447*7c478bd9Sstevel@tonic-gate 		/*
4448*7c478bd9Sstevel@tonic-gate 		 * Given the root fh, use the path stored in
4449*7c478bd9Sstevel@tonic-gate 		 * the rnode to find the fh for the new server.
4450*7c478bd9Sstevel@tonic-gate 		 */
4451*7c478bd9Sstevel@tonic-gate 		error = VFS_ROOT(mi->mi_vfsp, &rootvp);
4452*7c478bd9Sstevel@tonic-gate 		if (error)
4453*7c478bd9Sstevel@tonic-gate 			return (error);
4454*7c478bd9Sstevel@tonic-gate 
4455*7c478bd9Sstevel@tonic-gate 		error = failover_lookup(rp->r_path, rootvp,
4456*7c478bd9Sstevel@tonic-gate 		    fi->lookupproc, fi->xattrdirproc, &nvp);
4457*7c478bd9Sstevel@tonic-gate 
4458*7c478bd9Sstevel@tonic-gate 		VN_RELE(rootvp);
4459*7c478bd9Sstevel@tonic-gate 
4460*7c478bd9Sstevel@tonic-gate 		if (error)
4461*7c478bd9Sstevel@tonic-gate 			return (error);
4462*7c478bd9Sstevel@tonic-gate 
4463*7c478bd9Sstevel@tonic-gate 		/*
4464*7c478bd9Sstevel@tonic-gate 		 * If we found the same rnode, we're done now
4465*7c478bd9Sstevel@tonic-gate 		 */
4466*7c478bd9Sstevel@tonic-gate 		if (nvp == vp) {
4467*7c478bd9Sstevel@tonic-gate 			/*
4468*7c478bd9Sstevel@tonic-gate 			 * Failed and the new server may physically be same
4469*7c478bd9Sstevel@tonic-gate 			 * OR may share a same disk subsystem. In this case
4470*7c478bd9Sstevel@tonic-gate 			 * file handle for a particular file path is not going
4471*7c478bd9Sstevel@tonic-gate 			 * to change, given the same filehandle lookup will
4472*7c478bd9Sstevel@tonic-gate 			 * always locate the same rnode as the existing one.
4473*7c478bd9Sstevel@tonic-gate 			 * All we might need to do is to update the r_server
4474*7c478bd9Sstevel@tonic-gate 			 * with the current servinfo.
4475*7c478bd9Sstevel@tonic-gate 			 */
4476*7c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi)) {
4477*7c478bd9Sstevel@tonic-gate 				rp->r_server = mi->mi_curr_serv;
4478*7c478bd9Sstevel@tonic-gate 			}
4479*7c478bd9Sstevel@tonic-gate 			VN_RELE(nvp);
4480*7c478bd9Sstevel@tonic-gate 			return (0);
4481*7c478bd9Sstevel@tonic-gate 		}
4482*7c478bd9Sstevel@tonic-gate 
4483*7c478bd9Sstevel@tonic-gate 		/*
4484*7c478bd9Sstevel@tonic-gate 		 * Try to make it so that no one else will find this
4485*7c478bd9Sstevel@tonic-gate 		 * vnode because it is just a temporary to hold the
4486*7c478bd9Sstevel@tonic-gate 		 * new file handle until that file handle can be
4487*7c478bd9Sstevel@tonic-gate 		 * copied to the original vnode/rnode.
4488*7c478bd9Sstevel@tonic-gate 		 */
4489*7c478bd9Sstevel@tonic-gate 		nrp = VTOR(nvp);
4490*7c478bd9Sstevel@tonic-gate 		if (nrp->r_flags & RHASHED)
4491*7c478bd9Sstevel@tonic-gate 			rp_rmhash(nrp);
4492*7c478bd9Sstevel@tonic-gate 
4493*7c478bd9Sstevel@tonic-gate 		/*
4494*7c478bd9Sstevel@tonic-gate 		 * As a heuristic check on the validity of the new
4495*7c478bd9Sstevel@tonic-gate 		 * file, check that the size and type match against
4496*7c478bd9Sstevel@tonic-gate 		 * that we remember from the old version.
4497*7c478bd9Sstevel@tonic-gate 		 */
4498*7c478bd9Sstevel@tonic-gate 		if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) {
4499*7c478bd9Sstevel@tonic-gate 			zcmn_err(mi->mi_zone->zone_id, CE_WARN,
4500*7c478bd9Sstevel@tonic-gate 			    "NFS replicas %s and %s: file %s not same.",
4501*7c478bd9Sstevel@tonic-gate 			    rp->r_server->sv_hostname,
4502*7c478bd9Sstevel@tonic-gate 			    nrp->r_server->sv_hostname, rp->r_path);
4503*7c478bd9Sstevel@tonic-gate 			VN_RELE(nvp);
4504*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
4505*7c478bd9Sstevel@tonic-gate 		}
4506*7c478bd9Sstevel@tonic-gate 
4507*7c478bd9Sstevel@tonic-gate 		/*
4508*7c478bd9Sstevel@tonic-gate 		 * snarf the filehandle from the new rnode
4509*7c478bd9Sstevel@tonic-gate 		 * then release it, again while updating the
4510*7c478bd9Sstevel@tonic-gate 		 * hash queues for the rnode.
4511*7c478bd9Sstevel@tonic-gate 		 */
4512*7c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED)
4513*7c478bd9Sstevel@tonic-gate 			rp_rmhash(rp);
4514*7c478bd9Sstevel@tonic-gate 		rp->r_server = mi->mi_curr_serv;
4515*7c478bd9Sstevel@tonic-gate 		rp->r_fh = nrp->r_fh;
4516*7c478bd9Sstevel@tonic-gate 		index = rtablehash(&rp->r_fh);
4517*7c478bd9Sstevel@tonic-gate 		rp->r_hashq = &rtable[index];
4518*7c478bd9Sstevel@tonic-gate 		/*
4519*7c478bd9Sstevel@tonic-gate 		 * Copy the attributes from the new rnode to the old
4520*7c478bd9Sstevel@tonic-gate 		 * rnode.  This will help to reduce unnecessary page
4521*7c478bd9Sstevel@tonic-gate 		 * cache flushes.
4522*7c478bd9Sstevel@tonic-gate 		 */
4523*7c478bd9Sstevel@tonic-gate 		rp->r_attr = nrp->r_attr;
4524*7c478bd9Sstevel@tonic-gate 		rp->r_attrtime = nrp->r_attrtime;
4525*7c478bd9Sstevel@tonic-gate 		rp->r_mtime = nrp->r_mtime;
4526*7c478bd9Sstevel@tonic-gate 		(void) nfs_free_data_reclaim(rp);
4527*7c478bd9Sstevel@tonic-gate 		nfs_setswaplike(vp, &rp->r_attr);
4528*7c478bd9Sstevel@tonic-gate 		rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
4529*7c478bd9Sstevel@tonic-gate 		rp_addhash(rp);
4530*7c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
4531*7c478bd9Sstevel@tonic-gate 		VN_RELE(nvp);
4532*7c478bd9Sstevel@tonic-gate 	}
4533*7c478bd9Sstevel@tonic-gate 
4534*7c478bd9Sstevel@tonic-gate 	/*
4535*7c478bd9Sstevel@tonic-gate 	 * Update successful failover remap count
4536*7c478bd9Sstevel@tonic-gate 	 */
4537*7c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4538*7c478bd9Sstevel@tonic-gate 	mi->mi_remap++;
4539*7c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
4540*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4541*7c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.remap.value.ui64++;
4542*7c478bd9Sstevel@tonic-gate #endif
4543*7c478bd9Sstevel@tonic-gate 
4544*7c478bd9Sstevel@tonic-gate 	/*
4545*7c478bd9Sstevel@tonic-gate 	 * If we have a copied filehandle to update, do it now.
4546*7c478bd9Sstevel@tonic-gate 	 */
4547*7c478bd9Sstevel@tonic-gate 	if (fi->fhp != NULL && fi->copyproc != NULL)
4548*7c478bd9Sstevel@tonic-gate 		(*fi->copyproc)(fi->fhp, vp);
4549*7c478bd9Sstevel@tonic-gate 
4550*7c478bd9Sstevel@tonic-gate 	return (0);
4551*7c478bd9Sstevel@tonic-gate }
4552*7c478bd9Sstevel@tonic-gate 
4553*7c478bd9Sstevel@tonic-gate /*
4554*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4555*7c478bd9Sstevel@tonic-gate  *
4556*7c478bd9Sstevel@tonic-gate  * We want a simple pathname lookup routine to parse the pieces
4557*7c478bd9Sstevel@tonic-gate  * of path in rp->r_path.  We know that the path was a created
4558*7c478bd9Sstevel@tonic-gate  * as rnodes were made, so we know we have only to deal with
4559*7c478bd9Sstevel@tonic-gate  * paths that look like:
4560*7c478bd9Sstevel@tonic-gate  *	dir1/dir2/dir3/file
4561*7c478bd9Sstevel@tonic-gate  * Any evidence of anything like .., symlinks, and ENOTDIR
4562*7c478bd9Sstevel@tonic-gate  * are hard errors, because they mean something in this filesystem
4563*7c478bd9Sstevel@tonic-gate  * is different from the one we came from, or has changed under
4564*7c478bd9Sstevel@tonic-gate  * us in some way.  If this is true, we want the failure.
4565*7c478bd9Sstevel@tonic-gate  *
4566*7c478bd9Sstevel@tonic-gate  * Extended attributes: if the filesystem is mounted with extended
4567*7c478bd9Sstevel@tonic-gate  * attributes enabled (-o xattr), the attribute directory will be
4568*7c478bd9Sstevel@tonic-gate  * represented in the r_path as the magic name XATTR_RPATH. So if
4569*7c478bd9Sstevel@tonic-gate  * we see that name in the pathname, is must be because this node
4570*7c478bd9Sstevel@tonic-gate  * is an extended attribute.  Therefore, look it up that way.
4571*7c478bd9Sstevel@tonic-gate  */
4572*7c478bd9Sstevel@tonic-gate static int
4573*7c478bd9Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root,
4574*7c478bd9Sstevel@tonic-gate     int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int,
4575*7c478bd9Sstevel@tonic-gate 	vnode_t *, cred_t *, int),
4576*7c478bd9Sstevel@tonic-gate     int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
4577*7c478bd9Sstevel@tonic-gate     vnode_t **new)
4578*7c478bd9Sstevel@tonic-gate {
4579*7c478bd9Sstevel@tonic-gate 	vnode_t *dvp, *nvp;
4580*7c478bd9Sstevel@tonic-gate 	int error = EINVAL;
4581*7c478bd9Sstevel@tonic-gate 	char *s, *p, *tmppath;
4582*7c478bd9Sstevel@tonic-gate 	size_t len;
4583*7c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
4584*7c478bd9Sstevel@tonic-gate 	bool_t xattr;
4585*7c478bd9Sstevel@tonic-gate 
4586*7c478bd9Sstevel@tonic-gate 	/* Make local copy of path */
4587*7c478bd9Sstevel@tonic-gate 	len = strlen(path) + 1;
4588*7c478bd9Sstevel@tonic-gate 	tmppath = kmem_alloc(len, KM_SLEEP);
4589*7c478bd9Sstevel@tonic-gate 	(void) strcpy(tmppath, path);
4590*7c478bd9Sstevel@tonic-gate 	s = tmppath;
4591*7c478bd9Sstevel@tonic-gate 
4592*7c478bd9Sstevel@tonic-gate 	dvp = root;
4593*7c478bd9Sstevel@tonic-gate 	VN_HOLD(dvp);
4594*7c478bd9Sstevel@tonic-gate 	mi = VTOMI(root);
4595*7c478bd9Sstevel@tonic-gate 	xattr = mi->mi_flags & MI_EXTATTR;
4596*7c478bd9Sstevel@tonic-gate 
4597*7c478bd9Sstevel@tonic-gate 	do {
4598*7c478bd9Sstevel@tonic-gate 		p = strchr(s, '/');
4599*7c478bd9Sstevel@tonic-gate 		if (p != NULL)
4600*7c478bd9Sstevel@tonic-gate 			*p = '\0';
4601*7c478bd9Sstevel@tonic-gate 		if (xattr && strcmp(s, XATTR_RPATH) == 0) {
4602*7c478bd9Sstevel@tonic-gate 			error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(),
4603*7c478bd9Sstevel@tonic-gate 			    RFSCALL_SOFT);
4604*7c478bd9Sstevel@tonic-gate 		} else {
4605*7c478bd9Sstevel@tonic-gate 			error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL,
4606*7c478bd9Sstevel@tonic-gate 			    CRED(), RFSCALL_SOFT);
4607*7c478bd9Sstevel@tonic-gate 		}
4608*7c478bd9Sstevel@tonic-gate 		if (p != NULL)
4609*7c478bd9Sstevel@tonic-gate 			*p++ = '/';
4610*7c478bd9Sstevel@tonic-gate 		if (error) {
4611*7c478bd9Sstevel@tonic-gate 			VN_RELE(dvp);
4612*7c478bd9Sstevel@tonic-gate 			kmem_free(tmppath, len);
4613*7c478bd9Sstevel@tonic-gate 			return (error);
4614*7c478bd9Sstevel@tonic-gate 		}
4615*7c478bd9Sstevel@tonic-gate 		s = p;
4616*7c478bd9Sstevel@tonic-gate 		VN_RELE(dvp);
4617*7c478bd9Sstevel@tonic-gate 		dvp = nvp;
4618*7c478bd9Sstevel@tonic-gate 	} while (p != NULL);
4619*7c478bd9Sstevel@tonic-gate 
4620*7c478bd9Sstevel@tonic-gate 	if (nvp != NULL && new != NULL)
4621*7c478bd9Sstevel@tonic-gate 		*new = nvp;
4622*7c478bd9Sstevel@tonic-gate 	kmem_free(tmppath, len);
4623*7c478bd9Sstevel@tonic-gate 	return (0);
4624*7c478bd9Sstevel@tonic-gate }
4625*7c478bd9Sstevel@tonic-gate 
4626*7c478bd9Sstevel@tonic-gate /*
4627*7c478bd9Sstevel@tonic-gate  * NFS client failover support
4628*7c478bd9Sstevel@tonic-gate  *
4629*7c478bd9Sstevel@tonic-gate  * sv_free() frees the malloc'd portion of a "servinfo_t".
4630*7c478bd9Sstevel@tonic-gate  */
4631*7c478bd9Sstevel@tonic-gate void
4632*7c478bd9Sstevel@tonic-gate sv_free(servinfo_t *svp)
4633*7c478bd9Sstevel@tonic-gate {
4634*7c478bd9Sstevel@tonic-gate 	servinfo_t *next;
4635*7c478bd9Sstevel@tonic-gate 	struct knetconfig *knconf;
4636*7c478bd9Sstevel@tonic-gate 
4637*7c478bd9Sstevel@tonic-gate 	while (svp != NULL) {
4638*7c478bd9Sstevel@tonic-gate 		next = svp->sv_next;
4639*7c478bd9Sstevel@tonic-gate 		if (svp->sv_secdata)
4640*7c478bd9Sstevel@tonic-gate 			sec_clnt_freeinfo(svp->sv_secdata);
4641*7c478bd9Sstevel@tonic-gate 		if (svp->sv_hostname && svp->sv_hostnamelen > 0)
4642*7c478bd9Sstevel@tonic-gate 			kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
4643*7c478bd9Sstevel@tonic-gate 		knconf = svp->sv_knconf;
4644*7c478bd9Sstevel@tonic-gate 		if (knconf != NULL) {
4645*7c478bd9Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
4646*7c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
4647*7c478bd9Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
4648*7c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
4649*7c478bd9Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
4650*7c478bd9Sstevel@tonic-gate 		}
4651*7c478bd9Sstevel@tonic-gate 		knconf = svp->sv_origknconf;
4652*7c478bd9Sstevel@tonic-gate 		if (knconf != NULL) {
4653*7c478bd9Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
4654*7c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
4655*7c478bd9Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
4656*7c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
4657*7c478bd9Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
4658*7c478bd9Sstevel@tonic-gate 		}
4659*7c478bd9Sstevel@tonic-gate 		if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
4660*7c478bd9Sstevel@tonic-gate 			kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
4661*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&svp->sv_lock);
4662*7c478bd9Sstevel@tonic-gate 		kmem_free(svp, sizeof (*svp));
4663*7c478bd9Sstevel@tonic-gate 		svp = next;
4664*7c478bd9Sstevel@tonic-gate 	}
4665*7c478bd9Sstevel@tonic-gate }
4666*7c478bd9Sstevel@tonic-gate 
4667*7c478bd9Sstevel@tonic-gate /*
4668*7c478bd9Sstevel@tonic-gate  * Only can return non-zero if intr != 0.
4669*7c478bd9Sstevel@tonic-gate  */
4670*7c478bd9Sstevel@tonic-gate int
4671*7c478bd9Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr)
4672*7c478bd9Sstevel@tonic-gate {
4673*7c478bd9Sstevel@tonic-gate 
4674*7c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
4675*7c478bd9Sstevel@tonic-gate 
4676*7c478bd9Sstevel@tonic-gate 	/*
4677*7c478bd9Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
4678*7c478bd9Sstevel@tonic-gate 	 * must be as many exits as enters through.
4679*7c478bd9Sstevel@tonic-gate 	 */
4680*7c478bd9Sstevel@tonic-gate 	if (l->owner == curthread) {
4681*7c478bd9Sstevel@tonic-gate 		/* lock is held for writing by current thread */
4682*7c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
4683*7c478bd9Sstevel@tonic-gate 		l->count--;
4684*7c478bd9Sstevel@tonic-gate 	} else if (rw == RW_READER) {
4685*7c478bd9Sstevel@tonic-gate 		/*
4686*7c478bd9Sstevel@tonic-gate 		 * While there is a writer active or writers waiting,
4687*7c478bd9Sstevel@tonic-gate 		 * then wait for them to finish up and move on.  Then,
4688*7c478bd9Sstevel@tonic-gate 		 * increment the count to indicate that a reader is
4689*7c478bd9Sstevel@tonic-gate 		 * active.
4690*7c478bd9Sstevel@tonic-gate 		 */
4691*7c478bd9Sstevel@tonic-gate 		while (l->count < 0 || l->waiters > 0) {
4692*7c478bd9Sstevel@tonic-gate 			if (intr) {
4693*7c478bd9Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
4694*7c478bd9Sstevel@tonic-gate 
4695*7c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
4696*7c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop++;
4697*7c478bd9Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
4698*7c478bd9Sstevel@tonic-gate 					if (lwp != NULL)
4699*7c478bd9Sstevel@tonic-gate 						lwp->lwp_nostop--;
4700*7c478bd9Sstevel@tonic-gate 					mutex_exit(&l->lock);
4701*7c478bd9Sstevel@tonic-gate 					return (EINTR);
4702*7c478bd9Sstevel@tonic-gate 				}
4703*7c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
4704*7c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop--;
4705*7c478bd9Sstevel@tonic-gate 			} else
4706*7c478bd9Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
4707*7c478bd9Sstevel@tonic-gate 		}
4708*7c478bd9Sstevel@tonic-gate 		ASSERT(l->count < INT_MAX);
4709*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
4710*7c478bd9Sstevel@tonic-gate 		if ((l->count % 10000) == 9999)
4711*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on"
4712*7c478bd9Sstevel@tonic-gate 				"rwlock @ %p\n", l->count, (void *)&l);
4713*7c478bd9Sstevel@tonic-gate #endif
4714*7c478bd9Sstevel@tonic-gate 		l->count++;
4715*7c478bd9Sstevel@tonic-gate 	} else {
4716*7c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
4717*7c478bd9Sstevel@tonic-gate 		/*
4718*7c478bd9Sstevel@tonic-gate 		 * While there are readers active or a writer
4719*7c478bd9Sstevel@tonic-gate 		 * active, then wait for all of the readers
4720*7c478bd9Sstevel@tonic-gate 		 * to finish or for the writer to finish.
4721*7c478bd9Sstevel@tonic-gate 		 * Then, set the owner field to curthread and
4722*7c478bd9Sstevel@tonic-gate 		 * decrement count to indicate that a writer
4723*7c478bd9Sstevel@tonic-gate 		 * is active.
4724*7c478bd9Sstevel@tonic-gate 		 */
4725*7c478bd9Sstevel@tonic-gate 		while (l->count > 0 || l->owner != NULL) {
4726*7c478bd9Sstevel@tonic-gate 			l->waiters++;
4727*7c478bd9Sstevel@tonic-gate 			if (intr) {
4728*7c478bd9Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
4729*7c478bd9Sstevel@tonic-gate 
4730*7c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
4731*7c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop++;
4732*7c478bd9Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
4733*7c478bd9Sstevel@tonic-gate 					if (lwp != NULL)
4734*7c478bd9Sstevel@tonic-gate 						lwp->lwp_nostop--;
4735*7c478bd9Sstevel@tonic-gate 					l->waiters--;
4736*7c478bd9Sstevel@tonic-gate 					cv_broadcast(&l->cv);
4737*7c478bd9Sstevel@tonic-gate 					mutex_exit(&l->lock);
4738*7c478bd9Sstevel@tonic-gate 					return (EINTR);
4739*7c478bd9Sstevel@tonic-gate 				}
4740*7c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
4741*7c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop--;
4742*7c478bd9Sstevel@tonic-gate 			} else
4743*7c478bd9Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
4744*7c478bd9Sstevel@tonic-gate 			l->waiters--;
4745*7c478bd9Sstevel@tonic-gate 		}
4746*7c478bd9Sstevel@tonic-gate 		l->owner = curthread;
4747*7c478bd9Sstevel@tonic-gate 		l->count--;
4748*7c478bd9Sstevel@tonic-gate 	}
4749*7c478bd9Sstevel@tonic-gate 
4750*7c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
4751*7c478bd9Sstevel@tonic-gate 
4752*7c478bd9Sstevel@tonic-gate 	return (0);
4753*7c478bd9Sstevel@tonic-gate }
4754*7c478bd9Sstevel@tonic-gate 
4755*7c478bd9Sstevel@tonic-gate /*
4756*7c478bd9Sstevel@tonic-gate  * If the lock is available, obtain it and return non-zero.  If there is
4757*7c478bd9Sstevel@tonic-gate  * already a conflicting lock, return 0 immediately.
4758*7c478bd9Sstevel@tonic-gate  */
4759*7c478bd9Sstevel@tonic-gate 
4760*7c478bd9Sstevel@tonic-gate int
4761*7c478bd9Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw)
4762*7c478bd9Sstevel@tonic-gate {
4763*7c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
4764*7c478bd9Sstevel@tonic-gate 
4765*7c478bd9Sstevel@tonic-gate 	/*
4766*7c478bd9Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
4767*7c478bd9Sstevel@tonic-gate 	 * must be as many exits as enters through.
4768*7c478bd9Sstevel@tonic-gate 	 */
4769*7c478bd9Sstevel@tonic-gate 	if (l->owner == curthread) {
4770*7c478bd9Sstevel@tonic-gate 		/* lock is held for writing by current thread */
4771*7c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
4772*7c478bd9Sstevel@tonic-gate 		l->count--;
4773*7c478bd9Sstevel@tonic-gate 	} else if (rw == RW_READER) {
4774*7c478bd9Sstevel@tonic-gate 		/*
4775*7c478bd9Sstevel@tonic-gate 		 * If there is a writer active or writers waiting, deny the
4776*7c478bd9Sstevel@tonic-gate 		 * lock.  Otherwise, bump the count of readers.
4777*7c478bd9Sstevel@tonic-gate 		 */
4778*7c478bd9Sstevel@tonic-gate 		if (l->count < 0 || l->waiters > 0) {
4779*7c478bd9Sstevel@tonic-gate 			mutex_exit(&l->lock);
4780*7c478bd9Sstevel@tonic-gate 			return (0);
4781*7c478bd9Sstevel@tonic-gate 		}
4782*7c478bd9Sstevel@tonic-gate 		l->count++;
4783*7c478bd9Sstevel@tonic-gate 	} else {
4784*7c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
4785*7c478bd9Sstevel@tonic-gate 		/*
4786*7c478bd9Sstevel@tonic-gate 		 * If there are readers active or a writer active, deny the
4787*7c478bd9Sstevel@tonic-gate 		 * lock.  Otherwise, set the owner field to curthread and
4788*7c478bd9Sstevel@tonic-gate 		 * decrement count to indicate that a writer is active.
4789*7c478bd9Sstevel@tonic-gate 		 */
4790*7c478bd9Sstevel@tonic-gate 		if (l->count > 0 || l->owner != NULL) {
4791*7c478bd9Sstevel@tonic-gate 			mutex_exit(&l->lock);
4792*7c478bd9Sstevel@tonic-gate 			return (0);
4793*7c478bd9Sstevel@tonic-gate 		}
4794*7c478bd9Sstevel@tonic-gate 		l->owner = curthread;
4795*7c478bd9Sstevel@tonic-gate 		l->count--;
4796*7c478bd9Sstevel@tonic-gate 	}
4797*7c478bd9Sstevel@tonic-gate 
4798*7c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
4799*7c478bd9Sstevel@tonic-gate 
4800*7c478bd9Sstevel@tonic-gate 	return (1);
4801*7c478bd9Sstevel@tonic-gate }
4802*7c478bd9Sstevel@tonic-gate 
4803*7c478bd9Sstevel@tonic-gate void
4804*7c478bd9Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l)
4805*7c478bd9Sstevel@tonic-gate {
4806*7c478bd9Sstevel@tonic-gate 
4807*7c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
4808*7c478bd9Sstevel@tonic-gate 	/*
4809*7c478bd9Sstevel@tonic-gate 	 * If this is releasing a writer lock, then increment count to
4810*7c478bd9Sstevel@tonic-gate 	 * indicate that there is one less writer active.  If this was
4811*7c478bd9Sstevel@tonic-gate 	 * the last of possibly nested writer locks, then clear the owner
4812*7c478bd9Sstevel@tonic-gate 	 * field as well to indicate that there is no writer active
4813*7c478bd9Sstevel@tonic-gate 	 * and wakeup any possible waiting writers or readers.
4814*7c478bd9Sstevel@tonic-gate 	 *
4815*7c478bd9Sstevel@tonic-gate 	 * If releasing a reader lock, then just decrement count to
4816*7c478bd9Sstevel@tonic-gate 	 * indicate that there is one less reader active.  If this was
4817*7c478bd9Sstevel@tonic-gate 	 * the last active reader and there are writer(s) waiting,
4818*7c478bd9Sstevel@tonic-gate 	 * then wake up the first.
4819*7c478bd9Sstevel@tonic-gate 	 */
4820*7c478bd9Sstevel@tonic-gate 	if (l->owner != NULL) {
4821*7c478bd9Sstevel@tonic-gate 		ASSERT(l->owner == curthread);
4822*7c478bd9Sstevel@tonic-gate 		l->count++;
4823*7c478bd9Sstevel@tonic-gate 		if (l->count == 0) {
4824*7c478bd9Sstevel@tonic-gate 			l->owner = NULL;
4825*7c478bd9Sstevel@tonic-gate 			cv_broadcast(&l->cv);
4826*7c478bd9Sstevel@tonic-gate 		}
4827*7c478bd9Sstevel@tonic-gate 	} else {
4828*7c478bd9Sstevel@tonic-gate 		ASSERT(l->count > 0);
4829*7c478bd9Sstevel@tonic-gate 		l->count--;
4830*7c478bd9Sstevel@tonic-gate 		if (l->count == 0 && l->waiters > 0)
4831*7c478bd9Sstevel@tonic-gate 			cv_broadcast(&l->cv);
4832*7c478bd9Sstevel@tonic-gate 	}
4833*7c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
4834*7c478bd9Sstevel@tonic-gate }
4835*7c478bd9Sstevel@tonic-gate 
4836*7c478bd9Sstevel@tonic-gate int
4837*7c478bd9Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw)
4838*7c478bd9Sstevel@tonic-gate {
4839*7c478bd9Sstevel@tonic-gate 
4840*7c478bd9Sstevel@tonic-gate 	if (rw == RW_READER)
4841*7c478bd9Sstevel@tonic-gate 		return (l->count > 0);
4842*7c478bd9Sstevel@tonic-gate 	ASSERT(rw == RW_WRITER);
4843*7c478bd9Sstevel@tonic-gate 	return (l->count < 0);
4844*7c478bd9Sstevel@tonic-gate }
4845*7c478bd9Sstevel@tonic-gate 
4846*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
4847*7c478bd9Sstevel@tonic-gate void
4848*7c478bd9Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg)
4849*7c478bd9Sstevel@tonic-gate {
4850*7c478bd9Sstevel@tonic-gate 
4851*7c478bd9Sstevel@tonic-gate 	l->count = 0;
4852*7c478bd9Sstevel@tonic-gate 	l->waiters = 0;
4853*7c478bd9Sstevel@tonic-gate 	l->owner = NULL;
4854*7c478bd9Sstevel@tonic-gate 	mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL);
4855*7c478bd9Sstevel@tonic-gate 	cv_init(&l->cv, NULL, CV_DEFAULT, NULL);
4856*7c478bd9Sstevel@tonic-gate }
4857*7c478bd9Sstevel@tonic-gate 
4858*7c478bd9Sstevel@tonic-gate void
4859*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l)
4860*7c478bd9Sstevel@tonic-gate {
4861*7c478bd9Sstevel@tonic-gate 
4862*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&l->lock);
4863*7c478bd9Sstevel@tonic-gate 	cv_destroy(&l->cv);
4864*7c478bd9Sstevel@tonic-gate }
4865*7c478bd9Sstevel@tonic-gate 
4866*7c478bd9Sstevel@tonic-gate int
4867*7c478bd9Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y)
4868*7c478bd9Sstevel@tonic-gate {
4869*7c478bd9Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
4870*7c478bd9Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
4871*7c478bd9Sstevel@tonic-gate 
4872*7c478bd9Sstevel@tonic-gate 	if (a->nfs3_cookie == b->nfs3_cookie) {
4873*7c478bd9Sstevel@tonic-gate 		if (a->buflen == b->buflen)
4874*7c478bd9Sstevel@tonic-gate 			return (0);
4875*7c478bd9Sstevel@tonic-gate 		if (a->buflen < b->buflen)
4876*7c478bd9Sstevel@tonic-gate 			return (-1);
4877*7c478bd9Sstevel@tonic-gate 		return (1);
4878*7c478bd9Sstevel@tonic-gate 	}
4879*7c478bd9Sstevel@tonic-gate 
4880*7c478bd9Sstevel@tonic-gate 	if (a->nfs3_cookie < b->nfs3_cookie)
4881*7c478bd9Sstevel@tonic-gate 		return (-1);
4882*7c478bd9Sstevel@tonic-gate 
4883*7c478bd9Sstevel@tonic-gate 	return (1);
4884*7c478bd9Sstevel@tonic-gate }
4885*7c478bd9Sstevel@tonic-gate 
4886*7c478bd9Sstevel@tonic-gate int
4887*7c478bd9Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y)
4888*7c478bd9Sstevel@tonic-gate {
4889*7c478bd9Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
4890*7c478bd9Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
4891*7c478bd9Sstevel@tonic-gate 
4892*7c478bd9Sstevel@tonic-gate 	if (a->nfs_cookie == b->nfs_cookie) {
4893*7c478bd9Sstevel@tonic-gate 		if (a->buflen == b->buflen)
4894*7c478bd9Sstevel@tonic-gate 			return (0);
4895*7c478bd9Sstevel@tonic-gate 		if (a->buflen < b->buflen)
4896*7c478bd9Sstevel@tonic-gate 			return (-1);
4897*7c478bd9Sstevel@tonic-gate 		return (1);
4898*7c478bd9Sstevel@tonic-gate 	}
4899*7c478bd9Sstevel@tonic-gate 
4900*7c478bd9Sstevel@tonic-gate 	if (a->nfs_cookie < b->nfs_cookie)
4901*7c478bd9Sstevel@tonic-gate 		return (-1);
4902*7c478bd9Sstevel@tonic-gate 
4903*7c478bd9Sstevel@tonic-gate 	return (1);
4904*7c478bd9Sstevel@tonic-gate }
4905*7c478bd9Sstevel@tonic-gate 
4906*7c478bd9Sstevel@tonic-gate static char *
4907*7c478bd9Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len)
4908*7c478bd9Sstevel@tonic-gate {
4909*7c478bd9Sstevel@tonic-gate 	servinfo_t *s;
4910*7c478bd9Sstevel@tonic-gate 	char *srvnames;
4911*7c478bd9Sstevel@tonic-gate 	char *namep;
4912*7c478bd9Sstevel@tonic-gate 	size_t length;
4913*7c478bd9Sstevel@tonic-gate 
4914*7c478bd9Sstevel@tonic-gate 	/*
4915*7c478bd9Sstevel@tonic-gate 	 * Calculate the length of the string required to hold all
4916*7c478bd9Sstevel@tonic-gate 	 * of the server names plus either a comma or a null
4917*7c478bd9Sstevel@tonic-gate 	 * character following each individual one.
4918*7c478bd9Sstevel@tonic-gate 	 */
4919*7c478bd9Sstevel@tonic-gate 	length = 0;
4920*7c478bd9Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next)
4921*7c478bd9Sstevel@tonic-gate 		length += s->sv_hostnamelen;
4922*7c478bd9Sstevel@tonic-gate 
4923*7c478bd9Sstevel@tonic-gate 	srvnames = kmem_alloc(length, KM_SLEEP);
4924*7c478bd9Sstevel@tonic-gate 
4925*7c478bd9Sstevel@tonic-gate 	namep = srvnames;
4926*7c478bd9Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next) {
4927*7c478bd9Sstevel@tonic-gate 		(void) strcpy(namep, s->sv_hostname);
4928*7c478bd9Sstevel@tonic-gate 		namep += s->sv_hostnamelen - 1;
4929*7c478bd9Sstevel@tonic-gate 		*namep++ = ',';
4930*7c478bd9Sstevel@tonic-gate 	}
4931*7c478bd9Sstevel@tonic-gate 	*--namep = '\0';
4932*7c478bd9Sstevel@tonic-gate 
4933*7c478bd9Sstevel@tonic-gate 	*len = length;
4934*7c478bd9Sstevel@tonic-gate 
4935*7c478bd9Sstevel@tonic-gate 	return (srvnames);
4936*7c478bd9Sstevel@tonic-gate }
4937