1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate * 26*7c478bd9Sstevel@tonic-gate * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27*7c478bd9Sstevel@tonic-gate * All rights reserved. 28*7c478bd9Sstevel@tonic-gate */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 31*7c478bd9Sstevel@tonic-gate 32*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/cred.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/user.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/buf.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/socket.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/uio.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/tiuser.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/swap.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/session.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/acl.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 57*7c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 58*7c478bd9Sstevel@tonic-gate #include <sys/flock.h> 59*7c478bd9Sstevel@tonic-gate #include <sys/dirent.h> 60*7c478bd9Sstevel@tonic-gate #include <sys/flock.h> 61*7c478bd9Sstevel@tonic-gate #include <sys/callb.h> 62*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 63*7c478bd9Sstevel@tonic-gate #include <sys/list.h> 64*7c478bd9Sstevel@tonic-gate 65*7c478bd9Sstevel@tonic-gate #include <rpc/types.h> 66*7c478bd9Sstevel@tonic-gate #include <rpc/xdr.h> 67*7c478bd9Sstevel@tonic-gate #include <rpc/auth.h> 68*7c478bd9Sstevel@tonic-gate #include <rpc/clnt.h> 69*7c478bd9Sstevel@tonic-gate 70*7c478bd9Sstevel@tonic-gate #include <nfs/nfs.h> 71*7c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h> 72*7c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h> 73*7c478bd9Sstevel@tonic-gate #include <nfs/rnode.h> 74*7c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h> 75*7c478bd9Sstevel@tonic-gate 76*7c478bd9Sstevel@tonic-gate /* 77*7c478bd9Sstevel@tonic-gate * The hash queues for the access to active and cached rnodes 78*7c478bd9Sstevel@tonic-gate * are organized as doubly linked lists. A reader/writer lock 79*7c478bd9Sstevel@tonic-gate * for each hash bucket is used to control access and to synchronize 80*7c478bd9Sstevel@tonic-gate * lookups, additions, and deletions from the hash queue. 81*7c478bd9Sstevel@tonic-gate * 82*7c478bd9Sstevel@tonic-gate * The rnode freelist is organized as a doubly linked list with 83*7c478bd9Sstevel@tonic-gate * a head pointer. Additions and deletions are synchronized via 84*7c478bd9Sstevel@tonic-gate * a single mutex. 85*7c478bd9Sstevel@tonic-gate * 86*7c478bd9Sstevel@tonic-gate * In order to add an rnode to the free list, it must be hashed into 87*7c478bd9Sstevel@tonic-gate * a hash queue and the exclusive lock to the hash queue be held. 88*7c478bd9Sstevel@tonic-gate * If an rnode is not hashed into a hash queue, then it is destroyed 89*7c478bd9Sstevel@tonic-gate * because it represents no valuable information that can be reused 90*7c478bd9Sstevel@tonic-gate * about the file. The exclusive lock to the hash queue must be 91*7c478bd9Sstevel@tonic-gate * held in order to prevent a lookup in the hash queue from finding 92*7c478bd9Sstevel@tonic-gate * the rnode and using it and assuming that the rnode is not on the 93*7c478bd9Sstevel@tonic-gate * freelist. The lookup in the hash queue will have the hash queue 94*7c478bd9Sstevel@tonic-gate * locked, either exclusive or shared. 95*7c478bd9Sstevel@tonic-gate * 96*7c478bd9Sstevel@tonic-gate * The vnode reference count for each rnode is not allowed to drop 97*7c478bd9Sstevel@tonic-gate * below 1. This prevents external entities, such as the VM 98*7c478bd9Sstevel@tonic-gate * subsystem, from acquiring references to vnodes already on the 99*7c478bd9Sstevel@tonic-gate * freelist and then trying to place them back on the freelist 100*7c478bd9Sstevel@tonic-gate * when their reference is released. This means that the when an 101*7c478bd9Sstevel@tonic-gate * rnode is looked up in the hash queues, then either the rnode 102*7c478bd9Sstevel@tonic-gate * is removed from the freelist and that reference is tranfered to 103*7c478bd9Sstevel@tonic-gate * the new reference or the vnode reference count must be incremented 104*7c478bd9Sstevel@tonic-gate * accordingly. The mutex for the freelist must be held in order to 105*7c478bd9Sstevel@tonic-gate * accurately test to see if the rnode is on the freelist or not. 106*7c478bd9Sstevel@tonic-gate * The hash queue lock might be held shared and it is possible that 107*7c478bd9Sstevel@tonic-gate * two different threads may race to remove the rnode from the 108*7c478bd9Sstevel@tonic-gate * freelist. This race can be resolved by holding the mutex for the 109*7c478bd9Sstevel@tonic-gate * freelist. Please note that the mutex for the freelist does not 110*7c478bd9Sstevel@tonic-gate * need to held if the rnode is not on the freelist. It can not be 111*7c478bd9Sstevel@tonic-gate * placed on the freelist due to the requirement that the thread 112*7c478bd9Sstevel@tonic-gate * putting the rnode on the freelist must hold the exclusive lock 113*7c478bd9Sstevel@tonic-gate * to the hash queue and the thread doing the lookup in the hash 114*7c478bd9Sstevel@tonic-gate * queue is holding either a shared or exclusive lock to the hash 115*7c478bd9Sstevel@tonic-gate * queue. 116*7c478bd9Sstevel@tonic-gate * 117*7c478bd9Sstevel@tonic-gate * The lock ordering is: 118*7c478bd9Sstevel@tonic-gate * 119*7c478bd9Sstevel@tonic-gate * hash bucket lock -> vnode lock 120*7c478bd9Sstevel@tonic-gate * hash bucket lock -> freelist lock 121*7c478bd9Sstevel@tonic-gate */ 122*7c478bd9Sstevel@tonic-gate static rhashq_t *rtable; 123*7c478bd9Sstevel@tonic-gate 124*7c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock; 125*7c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL; 126*7c478bd9Sstevel@tonic-gate static long rnew = 0; 127*7c478bd9Sstevel@tonic-gate long nrnode = 0; 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate static int rtablesize; 130*7c478bd9Sstevel@tonic-gate static int rtablemask; 131*7c478bd9Sstevel@tonic-gate 132*7c478bd9Sstevel@tonic-gate static int hashlen = 4; 133*7c478bd9Sstevel@tonic-gate 134*7c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache; 135*7c478bd9Sstevel@tonic-gate 136*7c478bd9Sstevel@tonic-gate /* 137*7c478bd9Sstevel@tonic-gate * Mutex to protect the following variables: 138*7c478bd9Sstevel@tonic-gate * nfs_major 139*7c478bd9Sstevel@tonic-gate * nfs_minor 140*7c478bd9Sstevel@tonic-gate */ 141*7c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock; 142*7c478bd9Sstevel@tonic-gate int nfs_major; 143*7c478bd9Sstevel@tonic-gate int nfs_minor; 144*7c478bd9Sstevel@tonic-gate 145*7c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */ 146*7c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE; /* default: do not allow preepoch */ 147*7c478bd9Sstevel@tonic-gate 148*7c478bd9Sstevel@tonic-gate /* 149*7c478bd9Sstevel@tonic-gate * Access cache 150*7c478bd9Sstevel@tonic-gate */ 151*7c478bd9Sstevel@tonic-gate static acache_hash_t *acache; 152*7c478bd9Sstevel@tonic-gate static long nacache; /* used strictly to size the number of hash queues */ 153*7c478bd9Sstevel@tonic-gate 154*7c478bd9Sstevel@tonic-gate static int acachesize; 155*7c478bd9Sstevel@tonic-gate static int acachemask; 156*7c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate /* 159*7c478bd9Sstevel@tonic-gate * Client side utilities 160*7c478bd9Sstevel@tonic-gate */ 161*7c478bd9Sstevel@tonic-gate 162*7c478bd9Sstevel@tonic-gate /* 163*7c478bd9Sstevel@tonic-gate * client side statistics 164*7c478bd9Sstevel@tonic-gate */ 165*7c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = { 166*7c478bd9Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 167*7c478bd9Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 168*7c478bd9Sstevel@tonic-gate { "clgets", KSTAT_DATA_UINT64 }, 169*7c478bd9Sstevel@tonic-gate { "cltoomany", KSTAT_DATA_UINT64 }, 170*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 171*7c478bd9Sstevel@tonic-gate { "clalloc", KSTAT_DATA_UINT64 }, 172*7c478bd9Sstevel@tonic-gate { "noresponse", KSTAT_DATA_UINT64 }, 173*7c478bd9Sstevel@tonic-gate { "failover", KSTAT_DATA_UINT64 }, 174*7c478bd9Sstevel@tonic-gate { "remap", KSTAT_DATA_UINT64 }, 175*7c478bd9Sstevel@tonic-gate #endif 176*7c478bd9Sstevel@tonic-gate }; 177*7c478bd9Sstevel@tonic-gate 178*7c478bd9Sstevel@tonic-gate /* 179*7c478bd9Sstevel@tonic-gate * The following are statistics that describe behavior of the system as a whole 180*7c478bd9Sstevel@tonic-gate * and doesn't correspond to any one particular zone. 181*7c478bd9Sstevel@tonic-gate */ 182*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 183*7c478bd9Sstevel@tonic-gate static struct clstat_debug { 184*7c478bd9Sstevel@tonic-gate kstat_named_t nrnode; /* number of allocated rnodes */ 185*7c478bd9Sstevel@tonic-gate kstat_named_t access; /* size of access cache */ 186*7c478bd9Sstevel@tonic-gate kstat_named_t dirent; /* size of readdir cache */ 187*7c478bd9Sstevel@tonic-gate kstat_named_t dirents; /* size of readdir buf cache */ 188*7c478bd9Sstevel@tonic-gate kstat_named_t reclaim; /* number of reclaims */ 189*7c478bd9Sstevel@tonic-gate kstat_named_t clreclaim; /* number of cl reclaims */ 190*7c478bd9Sstevel@tonic-gate kstat_named_t f_reclaim; /* number of free reclaims */ 191*7c478bd9Sstevel@tonic-gate kstat_named_t a_reclaim; /* number of active reclaims */ 192*7c478bd9Sstevel@tonic-gate kstat_named_t r_reclaim; /* number of rnode reclaims */ 193*7c478bd9Sstevel@tonic-gate kstat_named_t rpath; /* bytes used to store rpaths */ 194*7c478bd9Sstevel@tonic-gate } clstat_debug = { 195*7c478bd9Sstevel@tonic-gate { "nrnode", KSTAT_DATA_UINT64 }, 196*7c478bd9Sstevel@tonic-gate { "access", KSTAT_DATA_UINT64 }, 197*7c478bd9Sstevel@tonic-gate { "dirent", KSTAT_DATA_UINT64 }, 198*7c478bd9Sstevel@tonic-gate { "dirents", KSTAT_DATA_UINT64 }, 199*7c478bd9Sstevel@tonic-gate { "reclaim", KSTAT_DATA_UINT64 }, 200*7c478bd9Sstevel@tonic-gate { "clreclaim", KSTAT_DATA_UINT64 }, 201*7c478bd9Sstevel@tonic-gate { "f_reclaim", KSTAT_DATA_UINT64 }, 202*7c478bd9Sstevel@tonic-gate { "a_reclaim", KSTAT_DATA_UINT64 }, 203*7c478bd9Sstevel@tonic-gate { "r_reclaim", KSTAT_DATA_UINT64 }, 204*7c478bd9Sstevel@tonic-gate { "r_path", KSTAT_DATA_UINT64 }, 205*7c478bd9Sstevel@tonic-gate }; 206*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 207*7c478bd9Sstevel@tonic-gate 208*7c478bd9Sstevel@tonic-gate /* 209*7c478bd9Sstevel@tonic-gate * We keep a global list of per-zone client data, so we can clean up all zones 210*7c478bd9Sstevel@tonic-gate * if we get low on memory. 211*7c478bd9Sstevel@tonic-gate */ 212*7c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list; 213*7c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock; 214*7c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key; 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache; 217*7c478bd9Sstevel@tonic-gate 218*7c478bd9Sstevel@tonic-gate /* 219*7c478bd9Sstevel@tonic-gate * Some servers do not properly update the attributes of the 220*7c478bd9Sstevel@tonic-gate * directory when changes are made. To allow interoperability 221*7c478bd9Sstevel@tonic-gate * with these broken servers, the nfs_disable_rddir_cache 222*7c478bd9Sstevel@tonic-gate * parameter must be set in /etc/system 223*7c478bd9Sstevel@tonic-gate */ 224*7c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0; 225*7c478bd9Sstevel@tonic-gate 226*7c478bd9Sstevel@tonic-gate int clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **, 227*7c478bd9Sstevel@tonic-gate struct chtab **); 228*7c478bd9Sstevel@tonic-gate void clfree(CLIENT *, struct chtab *); 229*7c478bd9Sstevel@tonic-gate static int acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **, 230*7c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *); 231*7c478bd9Sstevel@tonic-gate static int nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **, 232*7c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *); 233*7c478bd9Sstevel@tonic-gate static void clreclaim(void *); 234*7c478bd9Sstevel@tonic-gate static int nfs_feedback(int, int, mntinfo_t *); 235*7c478bd9Sstevel@tonic-gate static int rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t, 236*7c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, enum clnt_stat *, int, 237*7c478bd9Sstevel@tonic-gate failinfo_t *); 238*7c478bd9Sstevel@tonic-gate static int aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t, 239*7c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, int, failinfo_t *); 240*7c478bd9Sstevel@tonic-gate static void rinactive(rnode_t *, cred_t *); 241*7c478bd9Sstevel@tonic-gate static int rtablehash(nfs_fhandle *); 242*7c478bd9Sstevel@tonic-gate static vnode_t *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *, 243*7c478bd9Sstevel@tonic-gate struct vnodeops *, 244*7c478bd9Sstevel@tonic-gate int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int, 245*7c478bd9Sstevel@tonic-gate cred_t *), 246*7c478bd9Sstevel@tonic-gate int (*)(const void *, const void *), int *, cred_t *, 247*7c478bd9Sstevel@tonic-gate char *, char *); 248*7c478bd9Sstevel@tonic-gate static void rp_rmfree(rnode_t *); 249*7c478bd9Sstevel@tonic-gate static void rp_addhash(rnode_t *); 250*7c478bd9Sstevel@tonic-gate static void rp_rmhash_locked(rnode_t *); 251*7c478bd9Sstevel@tonic-gate static rnode_t *rfind(rhashq_t *, nfs_fhandle *, struct vfs *); 252*7c478bd9Sstevel@tonic-gate static void destroy_rnode(rnode_t *); 253*7c478bd9Sstevel@tonic-gate static void rddir_cache_free(rddir_cache *); 254*7c478bd9Sstevel@tonic-gate static int nfs_free_data_reclaim(rnode_t *); 255*7c478bd9Sstevel@tonic-gate static int nfs_active_data_reclaim(rnode_t *); 256*7c478bd9Sstevel@tonic-gate static int nfs_free_reclaim(void); 257*7c478bd9Sstevel@tonic-gate static int nfs_active_reclaim(void); 258*7c478bd9Sstevel@tonic-gate static int nfs_rnode_reclaim(void); 259*7c478bd9Sstevel@tonic-gate static void nfs_reclaim(void *); 260*7c478bd9Sstevel@tonic-gate static int failover_safe(failinfo_t *); 261*7c478bd9Sstevel@tonic-gate static void failover_newserver(mntinfo_t *mi); 262*7c478bd9Sstevel@tonic-gate static void failover_thread(mntinfo_t *mi); 263*7c478bd9Sstevel@tonic-gate static int failover_wait(mntinfo_t *); 264*7c478bd9Sstevel@tonic-gate static int failover_remap(failinfo_t *); 265*7c478bd9Sstevel@tonic-gate static int failover_lookup(char *, vnode_t *, 266*7c478bd9Sstevel@tonic-gate int (*)(vnode_t *, char *, vnode_t **, 267*7c478bd9Sstevel@tonic-gate struct pathname *, int, vnode_t *, cred_t *, int), 268*7c478bd9Sstevel@tonic-gate int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int), 269*7c478bd9Sstevel@tonic-gate vnode_t **); 270*7c478bd9Sstevel@tonic-gate static void nfs_free_r_path(rnode_t *); 271*7c478bd9Sstevel@tonic-gate static void nfs_set_vroot(vnode_t *); 272*7c478bd9Sstevel@tonic-gate static char *nfs_getsrvnames(mntinfo_t *, size_t *); 273*7c478bd9Sstevel@tonic-gate 274*7c478bd9Sstevel@tonic-gate /* 275*7c478bd9Sstevel@tonic-gate * from rpcsec module (common/rpcsec) 276*7c478bd9Sstevel@tonic-gate */ 277*7c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 278*7c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *); 279*7c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *); 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate /* 282*7c478bd9Sstevel@tonic-gate * EIO or EINTR are not recoverable errors. 283*7c478bd9Sstevel@tonic-gate */ 284*7c478bd9Sstevel@tonic-gate #define IS_RECOVERABLE_ERROR(error) !((error == EINTR) || (error == EIO)) 285*7c478bd9Sstevel@tonic-gate 286*7c478bd9Sstevel@tonic-gate /* 287*7c478bd9Sstevel@tonic-gate * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 288*7c478bd9Sstevel@tonic-gate */ 289*7c478bd9Sstevel@tonic-gate static int 290*7c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 291*7c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 292*7c478bd9Sstevel@tonic-gate { 293*7c478bd9Sstevel@tonic-gate struct chhead *ch, *newch; 294*7c478bd9Sstevel@tonic-gate struct chhead **plistp; 295*7c478bd9Sstevel@tonic-gate struct chtab *cp; 296*7c478bd9Sstevel@tonic-gate int error; 297*7c478bd9Sstevel@tonic-gate k_sigset_t smask; 298*7c478bd9Sstevel@tonic-gate 299*7c478bd9Sstevel@tonic-gate if (newcl == NULL || chp == NULL || ci == NULL) 300*7c478bd9Sstevel@tonic-gate return (EINVAL); 301*7c478bd9Sstevel@tonic-gate 302*7c478bd9Sstevel@tonic-gate *newcl = NULL; 303*7c478bd9Sstevel@tonic-gate *chp = NULL; 304*7c478bd9Sstevel@tonic-gate 305*7c478bd9Sstevel@tonic-gate /* 306*7c478bd9Sstevel@tonic-gate * Find an unused handle or create one 307*7c478bd9Sstevel@tonic-gate */ 308*7c478bd9Sstevel@tonic-gate newch = NULL; 309*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.clgets.value.ui64++; 310*7c478bd9Sstevel@tonic-gate top: 311*7c478bd9Sstevel@tonic-gate /* 312*7c478bd9Sstevel@tonic-gate * Find the correct entry in the cache to check for free 313*7c478bd9Sstevel@tonic-gate * client handles. The search is based on the RPC program 314*7c478bd9Sstevel@tonic-gate * number, program version number, dev_t for the transport 315*7c478bd9Sstevel@tonic-gate * device, and the protocol family. 316*7c478bd9Sstevel@tonic-gate */ 317*7c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 318*7c478bd9Sstevel@tonic-gate plistp = &nfscl->nfscl_chtable; 319*7c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) { 320*7c478bd9Sstevel@tonic-gate if (ch->ch_prog == ci->cl_prog && 321*7c478bd9Sstevel@tonic-gate ch->ch_vers == ci->cl_vers && 322*7c478bd9Sstevel@tonic-gate ch->ch_dev == svp->sv_knconf->knc_rdev && 323*7c478bd9Sstevel@tonic-gate (strcmp(ch->ch_protofmly, 324*7c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_protofmly) == 0)) 325*7c478bd9Sstevel@tonic-gate break; 326*7c478bd9Sstevel@tonic-gate plistp = &ch->ch_next; 327*7c478bd9Sstevel@tonic-gate } 328*7c478bd9Sstevel@tonic-gate 329*7c478bd9Sstevel@tonic-gate /* 330*7c478bd9Sstevel@tonic-gate * If we didn't find a cache entry for this quadruple, then 331*7c478bd9Sstevel@tonic-gate * create one. If we don't have one already preallocated, 332*7c478bd9Sstevel@tonic-gate * then drop the cache lock, create one, and then start over. 333*7c478bd9Sstevel@tonic-gate * If we did have a preallocated entry, then just add it to 334*7c478bd9Sstevel@tonic-gate * the front of the list. 335*7c478bd9Sstevel@tonic-gate */ 336*7c478bd9Sstevel@tonic-gate if (ch == NULL) { 337*7c478bd9Sstevel@tonic-gate if (newch == NULL) { 338*7c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 339*7c478bd9Sstevel@tonic-gate newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 340*7c478bd9Sstevel@tonic-gate newch->ch_timesused = 0; 341*7c478bd9Sstevel@tonic-gate newch->ch_prog = ci->cl_prog; 342*7c478bd9Sstevel@tonic-gate newch->ch_vers = ci->cl_vers; 343*7c478bd9Sstevel@tonic-gate newch->ch_dev = svp->sv_knconf->knc_rdev; 344*7c478bd9Sstevel@tonic-gate newch->ch_protofmly = kmem_alloc( 345*7c478bd9Sstevel@tonic-gate strlen(svp->sv_knconf->knc_protofmly) + 1, 346*7c478bd9Sstevel@tonic-gate KM_SLEEP); 347*7c478bd9Sstevel@tonic-gate (void) strcpy(newch->ch_protofmly, 348*7c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_protofmly); 349*7c478bd9Sstevel@tonic-gate newch->ch_list = NULL; 350*7c478bd9Sstevel@tonic-gate goto top; 351*7c478bd9Sstevel@tonic-gate } 352*7c478bd9Sstevel@tonic-gate ch = newch; 353*7c478bd9Sstevel@tonic-gate newch = NULL; 354*7c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable; 355*7c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch; 356*7c478bd9Sstevel@tonic-gate /* 357*7c478bd9Sstevel@tonic-gate * We found a cache entry, but if it isn't on the front of the 358*7c478bd9Sstevel@tonic-gate * list, then move it to the front of the list to try to take 359*7c478bd9Sstevel@tonic-gate * advantage of locality of operations. 360*7c478bd9Sstevel@tonic-gate */ 361*7c478bd9Sstevel@tonic-gate } else if (ch != nfscl->nfscl_chtable) { 362*7c478bd9Sstevel@tonic-gate *plistp = ch->ch_next; 363*7c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable; 364*7c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch; 365*7c478bd9Sstevel@tonic-gate } 366*7c478bd9Sstevel@tonic-gate 367*7c478bd9Sstevel@tonic-gate /* 368*7c478bd9Sstevel@tonic-gate * If there was a free client handle cached, then remove it 369*7c478bd9Sstevel@tonic-gate * from the list, init it, and use it. 370*7c478bd9Sstevel@tonic-gate */ 371*7c478bd9Sstevel@tonic-gate if (ch->ch_list != NULL) { 372*7c478bd9Sstevel@tonic-gate cp = ch->ch_list; 373*7c478bd9Sstevel@tonic-gate ch->ch_list = cp->ch_list; 374*7c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 375*7c478bd9Sstevel@tonic-gate if (newch != NULL) { 376*7c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly, 377*7c478bd9Sstevel@tonic-gate strlen(newch->ch_protofmly) + 1); 378*7c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch)); 379*7c478bd9Sstevel@tonic-gate } 380*7c478bd9Sstevel@tonic-gate (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 381*7c478bd9Sstevel@tonic-gate &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 382*7c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr, 383*7c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth); 384*7c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) { 385*7c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 386*7c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 387*7c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR); 388*7c478bd9Sstevel@tonic-gate } 389*7c478bd9Sstevel@tonic-gate ch->ch_timesused++; 390*7c478bd9Sstevel@tonic-gate *newcl = cp->ch_client; 391*7c478bd9Sstevel@tonic-gate *chp = cp; 392*7c478bd9Sstevel@tonic-gate return (0); 393*7c478bd9Sstevel@tonic-gate } 394*7c478bd9Sstevel@tonic-gate 395*7c478bd9Sstevel@tonic-gate /* 396*7c478bd9Sstevel@tonic-gate * There weren't any free client handles which fit, so allocate 397*7c478bd9Sstevel@tonic-gate * a new one and use that. 398*7c478bd9Sstevel@tonic-gate */ 399*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 400*7c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1); 401*7c478bd9Sstevel@tonic-gate #endif 402*7c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 403*7c478bd9Sstevel@tonic-gate 404*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.cltoomany.value.ui64++; 405*7c478bd9Sstevel@tonic-gate if (newch != NULL) { 406*7c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 407*7c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch)); 408*7c478bd9Sstevel@tonic-gate } 409*7c478bd9Sstevel@tonic-gate 410*7c478bd9Sstevel@tonic-gate cp = kmem_cache_alloc(chtab_cache, KM_SLEEP); 411*7c478bd9Sstevel@tonic-gate cp->ch_head = ch; 412*7c478bd9Sstevel@tonic-gate 413*7c478bd9Sstevel@tonic-gate sigintr(&smask, (int)ci->cl_flags & MI_INT); 414*7c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 415*7c478bd9Sstevel@tonic-gate ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 416*7c478bd9Sstevel@tonic-gate sigunintr(&smask); 417*7c478bd9Sstevel@tonic-gate 418*7c478bd9Sstevel@tonic-gate if (error != 0) { 419*7c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 420*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 421*7c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 422*7c478bd9Sstevel@tonic-gate #endif 423*7c478bd9Sstevel@tonic-gate /* 424*7c478bd9Sstevel@tonic-gate * Warning is unnecessary if error is EINTR. 425*7c478bd9Sstevel@tonic-gate */ 426*7c478bd9Sstevel@tonic-gate if (error != EINTR) { 427*7c478bd9Sstevel@tonic-gate nfs_cmn_err(error, CE_WARN, 428*7c478bd9Sstevel@tonic-gate "clget: couldn't create handle: %m\n"); 429*7c478bd9Sstevel@tonic-gate } 430*7c478bd9Sstevel@tonic-gate return (error); 431*7c478bd9Sstevel@tonic-gate } 432*7c478bd9Sstevel@tonic-gate (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 433*7c478bd9Sstevel@tonic-gate auth_destroy(cp->ch_client->cl_auth); 434*7c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr, 435*7c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth); 436*7c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) { 437*7c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 438*7c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 439*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 440*7c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1); 441*7c478bd9Sstevel@tonic-gate #endif 442*7c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR); 443*7c478bd9Sstevel@tonic-gate } 444*7c478bd9Sstevel@tonic-gate ch->ch_timesused++; 445*7c478bd9Sstevel@tonic-gate *newcl = cp->ch_client; 446*7c478bd9Sstevel@tonic-gate ASSERT(cp->ch_client->cl_nosignal == FALSE); 447*7c478bd9Sstevel@tonic-gate *chp = cp; 448*7c478bd9Sstevel@tonic-gate return (0); 449*7c478bd9Sstevel@tonic-gate } 450*7c478bd9Sstevel@tonic-gate 451*7c478bd9Sstevel@tonic-gate int 452*7c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 453*7c478bd9Sstevel@tonic-gate struct chtab **chp) 454*7c478bd9Sstevel@tonic-gate { 455*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 456*7c478bd9Sstevel@tonic-gate 457*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 458*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 459*7c478bd9Sstevel@tonic-gate 460*7c478bd9Sstevel@tonic-gate return (clget_impl(ci, svp, cr, newcl, chp, nfscl)); 461*7c478bd9Sstevel@tonic-gate } 462*7c478bd9Sstevel@tonic-gate 463*7c478bd9Sstevel@tonic-gate static int 464*7c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 465*7c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 466*7c478bd9Sstevel@tonic-gate { 467*7c478bd9Sstevel@tonic-gate clinfo_t ci; 468*7c478bd9Sstevel@tonic-gate int error; 469*7c478bd9Sstevel@tonic-gate 470*7c478bd9Sstevel@tonic-gate /* 471*7c478bd9Sstevel@tonic-gate * Set read buffer size to rsize 472*7c478bd9Sstevel@tonic-gate * and add room for RPC headers. 473*7c478bd9Sstevel@tonic-gate */ 474*7c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize; 475*7c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0) 476*7c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate /* 479*7c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once. 480*7c478bd9Sstevel@tonic-gate * meaning: do not retransmit. 481*7c478bd9Sstevel@tonic-gate */ 482*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN)) 483*7c478bd9Sstevel@tonic-gate ci.cl_retrans = 0; 484*7c478bd9Sstevel@tonic-gate else 485*7c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans; 486*7c478bd9Sstevel@tonic-gate 487*7c478bd9Sstevel@tonic-gate ci.cl_prog = NFS_ACL_PROGRAM; 488*7c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers; 489*7c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags; 490*7c478bd9Sstevel@tonic-gate 491*7c478bd9Sstevel@tonic-gate /* 492*7c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS 493*7c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context 494*7c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset, 495*7c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again. 496*7c478bd9Sstevel@tonic-gate */ 497*7c478bd9Sstevel@tonic-gate do { 498*7c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl); 499*7c478bd9Sstevel@tonic-gate 500*7c478bd9Sstevel@tonic-gate if (error == 0) 501*7c478bd9Sstevel@tonic-gate break; 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate /* 504*7c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry. 505*7c478bd9Sstevel@tonic-gate */ 506*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 507*7c478bd9Sstevel@tonic-gate error = EIO; 508*7c478bd9Sstevel@tonic-gate break; 509*7c478bd9Sstevel@tonic-gate } 510*7c478bd9Sstevel@tonic-gate 511*7c478bd9Sstevel@tonic-gate /* do not retry for softmount */ 512*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) 513*7c478bd9Sstevel@tonic-gate break; 514*7c478bd9Sstevel@tonic-gate 515*7c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */ 516*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) 517*7c478bd9Sstevel@tonic-gate break; 518*7c478bd9Sstevel@tonic-gate 519*7c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET); 520*7c478bd9Sstevel@tonic-gate 521*7c478bd9Sstevel@tonic-gate return (error); 522*7c478bd9Sstevel@tonic-gate } 523*7c478bd9Sstevel@tonic-gate 524*7c478bd9Sstevel@tonic-gate static int 525*7c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 526*7c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 527*7c478bd9Sstevel@tonic-gate { 528*7c478bd9Sstevel@tonic-gate clinfo_t ci; 529*7c478bd9Sstevel@tonic-gate int error; 530*7c478bd9Sstevel@tonic-gate 531*7c478bd9Sstevel@tonic-gate /* 532*7c478bd9Sstevel@tonic-gate * Set read buffer size to rsize 533*7c478bd9Sstevel@tonic-gate * and add room for RPC headers. 534*7c478bd9Sstevel@tonic-gate */ 535*7c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize; 536*7c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0) 537*7c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate /* 540*7c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once. 541*7c478bd9Sstevel@tonic-gate * meaning: do not retransmit. 542*7c478bd9Sstevel@tonic-gate */ 543*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN)) 544*7c478bd9Sstevel@tonic-gate ci.cl_retrans = 0; 545*7c478bd9Sstevel@tonic-gate else 546*7c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans; 547*7c478bd9Sstevel@tonic-gate 548*7c478bd9Sstevel@tonic-gate ci.cl_prog = mi->mi_prog; 549*7c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers; 550*7c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags; 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate /* 553*7c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS 554*7c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context 555*7c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset, 556*7c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again. 557*7c478bd9Sstevel@tonic-gate */ 558*7c478bd9Sstevel@tonic-gate do { 559*7c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl); 560*7c478bd9Sstevel@tonic-gate 561*7c478bd9Sstevel@tonic-gate if (error == 0) 562*7c478bd9Sstevel@tonic-gate break; 563*7c478bd9Sstevel@tonic-gate 564*7c478bd9Sstevel@tonic-gate /* 565*7c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry. 566*7c478bd9Sstevel@tonic-gate */ 567*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 568*7c478bd9Sstevel@tonic-gate error = EIO; 569*7c478bd9Sstevel@tonic-gate break; 570*7c478bd9Sstevel@tonic-gate } 571*7c478bd9Sstevel@tonic-gate 572*7c478bd9Sstevel@tonic-gate /* do not retry for softmount */ 573*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) 574*7c478bd9Sstevel@tonic-gate break; 575*7c478bd9Sstevel@tonic-gate 576*7c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */ 577*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) 578*7c478bd9Sstevel@tonic-gate break; 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET); 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate return (error); 583*7c478bd9Sstevel@tonic-gate } 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate static void 586*7c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl) 587*7c478bd9Sstevel@tonic-gate { 588*7c478bd9Sstevel@tonic-gate if (cl->cl_auth != NULL) { 589*7c478bd9Sstevel@tonic-gate sec_clnt_freeh(cl->cl_auth); 590*7c478bd9Sstevel@tonic-gate cl->cl_auth = NULL; 591*7c478bd9Sstevel@tonic-gate } 592*7c478bd9Sstevel@tonic-gate 593*7c478bd9Sstevel@tonic-gate /* 594*7c478bd9Sstevel@tonic-gate * Timestamp this cache entry so that we know when it was last 595*7c478bd9Sstevel@tonic-gate * used. 596*7c478bd9Sstevel@tonic-gate */ 597*7c478bd9Sstevel@tonic-gate cp->ch_freed = gethrestime_sec(); 598*7c478bd9Sstevel@tonic-gate 599*7c478bd9Sstevel@tonic-gate /* 600*7c478bd9Sstevel@tonic-gate * Add the free client handle to the front of the list. 601*7c478bd9Sstevel@tonic-gate * This way, the list will be sorted in youngest to oldest 602*7c478bd9Sstevel@tonic-gate * order. 603*7c478bd9Sstevel@tonic-gate */ 604*7c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 605*7c478bd9Sstevel@tonic-gate cp->ch_list = cp->ch_head->ch_list; 606*7c478bd9Sstevel@tonic-gate cp->ch_head->ch_list = cp; 607*7c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 608*7c478bd9Sstevel@tonic-gate } 609*7c478bd9Sstevel@tonic-gate 610*7c478bd9Sstevel@tonic-gate void 611*7c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp) 612*7c478bd9Sstevel@tonic-gate { 613*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 614*7c478bd9Sstevel@tonic-gate 615*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 616*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 617*7c478bd9Sstevel@tonic-gate 618*7c478bd9Sstevel@tonic-gate clfree_impl(cl, cp, nfscl); 619*7c478bd9Sstevel@tonic-gate } 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate #define CL_HOLDTIME 60 /* time to hold client handles */ 622*7c478bd9Sstevel@tonic-gate 623*7c478bd9Sstevel@tonic-gate static void 624*7c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime) 625*7c478bd9Sstevel@tonic-gate { 626*7c478bd9Sstevel@tonic-gate struct chhead *ch; 627*7c478bd9Sstevel@tonic-gate struct chtab *cp; /* list of objects that can be reclaimed */ 628*7c478bd9Sstevel@tonic-gate struct chtab *cpe; 629*7c478bd9Sstevel@tonic-gate struct chtab *cpl; 630*7c478bd9Sstevel@tonic-gate struct chtab **cpp; 631*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 632*7c478bd9Sstevel@tonic-gate int n = 0; 633*7c478bd9Sstevel@tonic-gate #endif 634*7c478bd9Sstevel@tonic-gate 635*7c478bd9Sstevel@tonic-gate /* 636*7c478bd9Sstevel@tonic-gate * Need to reclaim some memory, so step through the cache 637*7c478bd9Sstevel@tonic-gate * looking through the lists for entries which can be freed. 638*7c478bd9Sstevel@tonic-gate */ 639*7c478bd9Sstevel@tonic-gate cp = NULL; 640*7c478bd9Sstevel@tonic-gate 641*7c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 642*7c478bd9Sstevel@tonic-gate 643*7c478bd9Sstevel@tonic-gate /* 644*7c478bd9Sstevel@tonic-gate * Here we step through each non-NULL quadruple and start to 645*7c478bd9Sstevel@tonic-gate * construct the reclaim list pointed to by cp. Note that 646*7c478bd9Sstevel@tonic-gate * cp will contain all eligible chtab entries. When this traversal 647*7c478bd9Sstevel@tonic-gate * completes, chtab entries from the last quadruple will be at the 648*7c478bd9Sstevel@tonic-gate * front of cp and entries from previously inspected quadruples have 649*7c478bd9Sstevel@tonic-gate * been appended to the rear of cp. 650*7c478bd9Sstevel@tonic-gate */ 651*7c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) { 652*7c478bd9Sstevel@tonic-gate if (ch->ch_list == NULL) 653*7c478bd9Sstevel@tonic-gate continue; 654*7c478bd9Sstevel@tonic-gate /* 655*7c478bd9Sstevel@tonic-gate * Search each list for entries older then 656*7c478bd9Sstevel@tonic-gate * cl_holdtime seconds. The lists are maintained 657*7c478bd9Sstevel@tonic-gate * in youngest to oldest order so that when the 658*7c478bd9Sstevel@tonic-gate * first entry is found which is old enough, then 659*7c478bd9Sstevel@tonic-gate * all of the rest of the entries on the list will 660*7c478bd9Sstevel@tonic-gate * be old enough as well. 661*7c478bd9Sstevel@tonic-gate */ 662*7c478bd9Sstevel@tonic-gate cpl = ch->ch_list; 663*7c478bd9Sstevel@tonic-gate cpp = &ch->ch_list; 664*7c478bd9Sstevel@tonic-gate while (cpl != NULL && 665*7c478bd9Sstevel@tonic-gate cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 666*7c478bd9Sstevel@tonic-gate cpp = &cpl->ch_list; 667*7c478bd9Sstevel@tonic-gate cpl = cpl->ch_list; 668*7c478bd9Sstevel@tonic-gate } 669*7c478bd9Sstevel@tonic-gate if (cpl != NULL) { 670*7c478bd9Sstevel@tonic-gate *cpp = NULL; 671*7c478bd9Sstevel@tonic-gate if (cp != NULL) { 672*7c478bd9Sstevel@tonic-gate cpe = cpl; 673*7c478bd9Sstevel@tonic-gate while (cpe->ch_list != NULL) 674*7c478bd9Sstevel@tonic-gate cpe = cpe->ch_list; 675*7c478bd9Sstevel@tonic-gate cpe->ch_list = cp; 676*7c478bd9Sstevel@tonic-gate } 677*7c478bd9Sstevel@tonic-gate cp = cpl; 678*7c478bd9Sstevel@tonic-gate } 679*7c478bd9Sstevel@tonic-gate } 680*7c478bd9Sstevel@tonic-gate 681*7c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 682*7c478bd9Sstevel@tonic-gate 683*7c478bd9Sstevel@tonic-gate /* 684*7c478bd9Sstevel@tonic-gate * If cp is empty, then there is nothing to reclaim here. 685*7c478bd9Sstevel@tonic-gate */ 686*7c478bd9Sstevel@tonic-gate if (cp == NULL) 687*7c478bd9Sstevel@tonic-gate return; 688*7c478bd9Sstevel@tonic-gate 689*7c478bd9Sstevel@tonic-gate /* 690*7c478bd9Sstevel@tonic-gate * Step through the list of entries to free, destroying each client 691*7c478bd9Sstevel@tonic-gate * handle and kmem_free'ing the memory for each entry. 692*7c478bd9Sstevel@tonic-gate */ 693*7c478bd9Sstevel@tonic-gate while (cp != NULL) { 694*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 695*7c478bd9Sstevel@tonic-gate n++; 696*7c478bd9Sstevel@tonic-gate #endif 697*7c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 698*7c478bd9Sstevel@tonic-gate cpl = cp->ch_list; 699*7c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 700*7c478bd9Sstevel@tonic-gate cp = cpl; 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate 703*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 704*7c478bd9Sstevel@tonic-gate /* 705*7c478bd9Sstevel@tonic-gate * Update clalloc so that nfsstat shows the current number 706*7c478bd9Sstevel@tonic-gate * of allocated client handles. 707*7c478bd9Sstevel@tonic-gate */ 708*7c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 709*7c478bd9Sstevel@tonic-gate #endif 710*7c478bd9Sstevel@tonic-gate } 711*7c478bd9Sstevel@tonic-gate 712*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 713*7c478bd9Sstevel@tonic-gate static void 714*7c478bd9Sstevel@tonic-gate clreclaim(void *all) 715*7c478bd9Sstevel@tonic-gate { 716*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 717*7c478bd9Sstevel@tonic-gate 718*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 719*7c478bd9Sstevel@tonic-gate clstat_debug.clreclaim.value.ui64++; 720*7c478bd9Sstevel@tonic-gate #endif 721*7c478bd9Sstevel@tonic-gate /* 722*7c478bd9Sstevel@tonic-gate * The system is low on memory; go through and try to reclaim some from 723*7c478bd9Sstevel@tonic-gate * every zone on the system. 724*7c478bd9Sstevel@tonic-gate */ 725*7c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 726*7c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list); 727*7c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) 728*7c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, CL_HOLDTIME); 729*7c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 730*7c478bd9Sstevel@tonic-gate } 731*7c478bd9Sstevel@tonic-gate 732*7c478bd9Sstevel@tonic-gate /* 733*7c478bd9Sstevel@tonic-gate * Minimum time-out values indexed by call type 734*7c478bd9Sstevel@tonic-gate * These units are in "eights" of a second to avoid multiplies 735*7c478bd9Sstevel@tonic-gate */ 736*7c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = { 737*7c478bd9Sstevel@tonic-gate 6, 7, 10 738*7c478bd9Sstevel@tonic-gate }; 739*7c478bd9Sstevel@tonic-gate 740*7c478bd9Sstevel@tonic-gate /* 741*7c478bd9Sstevel@tonic-gate * Back off for retransmission timeout, MAXTIMO is in hz of a sec 742*7c478bd9Sstevel@tonic-gate */ 743*7c478bd9Sstevel@tonic-gate #define MAXTIMO (20*hz) 744*7c478bd9Sstevel@tonic-gate #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 745*7c478bd9Sstevel@tonic-gate #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 746*7c478bd9Sstevel@tonic-gate 747*7c478bd9Sstevel@tonic-gate #define MIN_NFS_TSIZE 512 /* minimum "chunk" of NFS IO */ 748*7c478bd9Sstevel@tonic-gate #define REDUCE_NFS_TIME (hz/2) /* rtxcur we try to keep under */ 749*7c478bd9Sstevel@tonic-gate #define INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */ 750*7c478bd9Sstevel@tonic-gate 751*7c478bd9Sstevel@tonic-gate /* 752*7c478bd9Sstevel@tonic-gate * Function called when rfscall notices that we have been 753*7c478bd9Sstevel@tonic-gate * re-transmitting, or when we get a response without retransmissions. 754*7c478bd9Sstevel@tonic-gate * Return 1 if the transfer size was adjusted down - 0 if no change. 755*7c478bd9Sstevel@tonic-gate */ 756*7c478bd9Sstevel@tonic-gate static int 757*7c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi) 758*7c478bd9Sstevel@tonic-gate { 759*7c478bd9Sstevel@tonic-gate int kind; 760*7c478bd9Sstevel@tonic-gate int r = 0; 761*7c478bd9Sstevel@tonic-gate 762*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 763*7c478bd9Sstevel@tonic-gate if (flag == FEEDBACK_REXMIT1) { 764*7c478bd9Sstevel@tonic-gate if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 && 765*7c478bd9Sstevel@tonic-gate mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME) 766*7c478bd9Sstevel@tonic-gate goto done; 767*7c478bd9Sstevel@tonic-gate if (mi->mi_curread > MIN_NFS_TSIZE) { 768*7c478bd9Sstevel@tonic-gate mi->mi_curread /= 2; 769*7c478bd9Sstevel@tonic-gate if (mi->mi_curread < MIN_NFS_TSIZE) 770*7c478bd9Sstevel@tonic-gate mi->mi_curread = MIN_NFS_TSIZE; 771*7c478bd9Sstevel@tonic-gate r = 1; 772*7c478bd9Sstevel@tonic-gate } 773*7c478bd9Sstevel@tonic-gate 774*7c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > MIN_NFS_TSIZE) { 775*7c478bd9Sstevel@tonic-gate mi->mi_curwrite /= 2; 776*7c478bd9Sstevel@tonic-gate if (mi->mi_curwrite < MIN_NFS_TSIZE) 777*7c478bd9Sstevel@tonic-gate mi->mi_curwrite = MIN_NFS_TSIZE; 778*7c478bd9Sstevel@tonic-gate r = 1; 779*7c478bd9Sstevel@tonic-gate } 780*7c478bd9Sstevel@tonic-gate } else if (flag == FEEDBACK_OK) { 781*7c478bd9Sstevel@tonic-gate kind = mi->mi_timer_type[which]; 782*7c478bd9Sstevel@tonic-gate if (kind == 0 || 783*7c478bd9Sstevel@tonic-gate mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME) 784*7c478bd9Sstevel@tonic-gate goto done; 785*7c478bd9Sstevel@tonic-gate if (kind == 1) { 786*7c478bd9Sstevel@tonic-gate if (mi->mi_curread >= mi->mi_tsize) 787*7c478bd9Sstevel@tonic-gate goto done; 788*7c478bd9Sstevel@tonic-gate mi->mi_curread += MIN_NFS_TSIZE; 789*7c478bd9Sstevel@tonic-gate if (mi->mi_curread > mi->mi_tsize/2) 790*7c478bd9Sstevel@tonic-gate mi->mi_curread = mi->mi_tsize; 791*7c478bd9Sstevel@tonic-gate } else if (kind == 2) { 792*7c478bd9Sstevel@tonic-gate if (mi->mi_curwrite >= mi->mi_stsize) 793*7c478bd9Sstevel@tonic-gate goto done; 794*7c478bd9Sstevel@tonic-gate mi->mi_curwrite += MIN_NFS_TSIZE; 795*7c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > mi->mi_stsize/2) 796*7c478bd9Sstevel@tonic-gate mi->mi_curwrite = mi->mi_stsize; 797*7c478bd9Sstevel@tonic-gate } 798*7c478bd9Sstevel@tonic-gate } 799*7c478bd9Sstevel@tonic-gate done: 800*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 801*7c478bd9Sstevel@tonic-gate return (r); 802*7c478bd9Sstevel@tonic-gate } 803*7c478bd9Sstevel@tonic-gate 804*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 805*7c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0; 806*7c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0; 807*7c478bd9Sstevel@tonic-gate #endif 808*7c478bd9Sstevel@tonic-gate 809*7c478bd9Sstevel@tonic-gate int 810*7c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 811*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 812*7c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi) 813*7c478bd9Sstevel@tonic-gate { 814*7c478bd9Sstevel@tonic-gate int rpcerror; 815*7c478bd9Sstevel@tonic-gate enum clnt_stat rpc_status; 816*7c478bd9Sstevel@tonic-gate 817*7c478bd9Sstevel@tonic-gate ASSERT(statusp != NULL); 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp, 820*7c478bd9Sstevel@tonic-gate cr, douprintf, &rpc_status, flags, fi); 821*7c478bd9Sstevel@tonic-gate if (!rpcerror) { 822*7c478bd9Sstevel@tonic-gate /* 823*7c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 824*7c478bd9Sstevel@tonic-gate */ 825*7c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES && 826*7c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) { 827*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 828*7c478bd9Sstevel@tonic-gate rfs2call_hits++; 829*7c478bd9Sstevel@tonic-gate #endif 830*7c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, 831*7c478bd9Sstevel@tonic-gate resp, cr, douprintf, NULL, flags, fi); 832*7c478bd9Sstevel@tonic-gate crfree(cr); 833*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 834*7c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES) 835*7c478bd9Sstevel@tonic-gate rfs2call_misses++; 836*7c478bd9Sstevel@tonic-gate #endif 837*7c478bd9Sstevel@tonic-gate } 838*7c478bd9Sstevel@tonic-gate } else if (rpc_status == RPC_PROCUNAVAIL) { 839*7c478bd9Sstevel@tonic-gate *statusp = NFSERR_OPNOTSUPP; 840*7c478bd9Sstevel@tonic-gate rpcerror = 0; 841*7c478bd9Sstevel@tonic-gate } 842*7c478bd9Sstevel@tonic-gate 843*7c478bd9Sstevel@tonic-gate return (rpcerror); 844*7c478bd9Sstevel@tonic-gate } 845*7c478bd9Sstevel@tonic-gate 846*7c478bd9Sstevel@tonic-gate #define NFS3_JUKEBOX_DELAY 10 * hz 847*7c478bd9Sstevel@tonic-gate 848*7c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0; 849*7c478bd9Sstevel@tonic-gate 850*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 851*7c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0; 852*7c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0; 853*7c478bd9Sstevel@tonic-gate #endif 854*7c478bd9Sstevel@tonic-gate 855*7c478bd9Sstevel@tonic-gate int 856*7c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 857*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 858*7c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi) 859*7c478bd9Sstevel@tonic-gate { 860*7c478bd9Sstevel@tonic-gate int rpcerror; 861*7c478bd9Sstevel@tonic-gate int user_informed; 862*7c478bd9Sstevel@tonic-gate 863*7c478bd9Sstevel@tonic-gate user_informed = 0; 864*7c478bd9Sstevel@tonic-gate do { 865*7c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp, 866*7c478bd9Sstevel@tonic-gate cr, douprintf, NULL, flags, fi); 867*7c478bd9Sstevel@tonic-gate if (!rpcerror) { 868*7c478bd9Sstevel@tonic-gate cred_t *crr; 869*7c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) { 870*7c478bd9Sstevel@tonic-gate if (ttoproc(curthread) == &p0) { 871*7c478bd9Sstevel@tonic-gate rpcerror = EAGAIN; 872*7c478bd9Sstevel@tonic-gate break; 873*7c478bd9Sstevel@tonic-gate } 874*7c478bd9Sstevel@tonic-gate if (!user_informed) { 875*7c478bd9Sstevel@tonic-gate user_informed = 1; 876*7c478bd9Sstevel@tonic-gate uprintf( 877*7c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n"); 878*7c478bd9Sstevel@tonic-gate } 879*7c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay); 880*7c478bd9Sstevel@tonic-gate } 881*7c478bd9Sstevel@tonic-gate /* 882*7c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 883*7c478bd9Sstevel@tonic-gate */ 884*7c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES && 885*7c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) { 886*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 887*7c478bd9Sstevel@tonic-gate rfs3call_hits++; 888*7c478bd9Sstevel@tonic-gate #endif 889*7c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, 890*7c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf, 891*7c478bd9Sstevel@tonic-gate NULL, flags, fi); 892*7c478bd9Sstevel@tonic-gate 893*7c478bd9Sstevel@tonic-gate crfree(crr); 894*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 895*7c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES) 896*7c478bd9Sstevel@tonic-gate rfs3call_misses++; 897*7c478bd9Sstevel@tonic-gate #endif 898*7c478bd9Sstevel@tonic-gate } 899*7c478bd9Sstevel@tonic-gate } 900*7c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX); 901*7c478bd9Sstevel@tonic-gate 902*7c478bd9Sstevel@tonic-gate return (rpcerror); 903*7c478bd9Sstevel@tonic-gate } 904*7c478bd9Sstevel@tonic-gate 905*7c478bd9Sstevel@tonic-gate #define VALID_FH(fi) (VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv) 906*7c478bd9Sstevel@tonic-gate #define INC_READERS(mi) { \ 907*7c478bd9Sstevel@tonic-gate mi->mi_readers++; \ 908*7c478bd9Sstevel@tonic-gate } 909*7c478bd9Sstevel@tonic-gate #define DEC_READERS(mi) { \ 910*7c478bd9Sstevel@tonic-gate mi->mi_readers--; \ 911*7c478bd9Sstevel@tonic-gate if (mi->mi_readers == 0) \ 912*7c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); \ 913*7c478bd9Sstevel@tonic-gate } 914*7c478bd9Sstevel@tonic-gate 915*7c478bd9Sstevel@tonic-gate static int 916*7c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 917*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 918*7c478bd9Sstevel@tonic-gate enum clnt_stat *rpc_status, int flags, failinfo_t *fi) 919*7c478bd9Sstevel@tonic-gate { 920*7c478bd9Sstevel@tonic-gate CLIENT *client; 921*7c478bd9Sstevel@tonic-gate struct chtab *ch; 922*7c478bd9Sstevel@tonic-gate enum clnt_stat status; 923*7c478bd9Sstevel@tonic-gate struct rpc_err rpcerr; 924*7c478bd9Sstevel@tonic-gate struct timeval wait; 925*7c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */ 926*7c478bd9Sstevel@tonic-gate int my_rsize, my_wsize; 927*7c478bd9Sstevel@tonic-gate bool_t tryagain; 928*7c478bd9Sstevel@tonic-gate k_sigset_t smask; 929*7c478bd9Sstevel@tonic-gate servinfo_t *svp; 930*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 931*7c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 932*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 933*7c478bd9Sstevel@tonic-gate char *bufp; 934*7c478bd9Sstevel@tonic-gate #endif 935*7c478bd9Sstevel@tonic-gate 936*7c478bd9Sstevel@tonic-gate 937*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START, 938*7c478bd9Sstevel@tonic-gate "rfscall_start:which %d mi %p", which, mi); 939*7c478bd9Sstevel@tonic-gate 940*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 941*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 942*7c478bd9Sstevel@tonic-gate 943*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++; 944*7c478bd9Sstevel@tonic-gate mi->mi_reqs[which].value.ui64++; 945*7c478bd9Sstevel@tonic-gate 946*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 947*7c478bd9Sstevel@tonic-gate 948*7c478bd9Sstevel@tonic-gate /* 949*7c478bd9Sstevel@tonic-gate * In case of forced unmount or zone shutdown, return EIO. 950*7c478bd9Sstevel@tonic-gate */ 951*7c478bd9Sstevel@tonic-gate 952*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 953*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 954*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 955*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 956*7c478bd9Sstevel@tonic-gate } 957*7c478bd9Sstevel@tonic-gate 958*7c478bd9Sstevel@tonic-gate /* 959*7c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case 960*7c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us. 961*7c478bd9Sstevel@tonic-gate */ 962*7c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread; 963*7c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite; 964*7c478bd9Sstevel@tonic-gate 965*7c478bd9Sstevel@tonic-gate /* 966*7c478bd9Sstevel@tonic-gate * NFS client failover support 967*7c478bd9Sstevel@tonic-gate * 968*7c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH), 969*7c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted 970*7c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll 971*7c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is 972*7c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3. 973*7c478bd9Sstevel@tonic-gate * 974*7c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible 975*7c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being 976*7c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a 977*7c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle 978*7c478bd9Sstevel@tonic-gate * to the wrong host. 979*7c478bd9Sstevel@tonic-gate */ 980*7c478bd9Sstevel@tonic-gate failoverretry: 981*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 982*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 983*7c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) { 984*7c478bd9Sstevel@tonic-gate if (failover_wait(mi)) { 985*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 986*7c478bd9Sstevel@tonic-gate return (EINTR); 987*7c478bd9Sstevel@tonic-gate } 988*7c478bd9Sstevel@tonic-gate } 989*7c478bd9Sstevel@tonic-gate INC_READERS(mi); 990*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 991*7c478bd9Sstevel@tonic-gate if (fi) { 992*7c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) && 993*7c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) { 994*7c478bd9Sstevel@tonic-gate int remaperr; 995*7c478bd9Sstevel@tonic-gate 996*7c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 997*7c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi); 998*7c478bd9Sstevel@tonic-gate if (remaperr != 0) { 999*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1000*7c478bd9Sstevel@tonic-gate if (remaperr != EINTR) 1001*7c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN, 1002*7c478bd9Sstevel@tonic-gate "rfscall couldn't failover: %m"); 1003*7c478bd9Sstevel@tonic-gate #endif 1004*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1005*7c478bd9Sstevel@tonic-gate DEC_READERS(mi); 1006*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1007*7c478bd9Sstevel@tonic-gate /* 1008*7c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT 1009*7c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted 1010*7c478bd9Sstevel@tonic-gate * we have to retry the call with a new 1011*7c478bd9Sstevel@tonic-gate * server. 1012*7c478bd9Sstevel@tonic-gate */ 1013*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) && 1014*7c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) { 1015*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1016*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1017*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 1018*7c478bd9Sstevel@tonic-gate goto failoverretry; 1019*7c478bd9Sstevel@tonic-gate } 1020*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = remaperr; 1021*7c478bd9Sstevel@tonic-gate return (remaperr); 1022*7c478bd9Sstevel@tonic-gate } 1023*7c478bd9Sstevel@tonic-gate } 1024*7c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc) 1025*7c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp); 1026*7c478bd9Sstevel@tonic-gate } 1027*7c478bd9Sstevel@tonic-gate } 1028*7c478bd9Sstevel@tonic-gate 1029*7c478bd9Sstevel@tonic-gate /* 1030*7c478bd9Sstevel@tonic-gate * clget() calls clnt_tli_kinit() which clears the xid, so we 1031*7c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request. 1032*7c478bd9Sstevel@tonic-gate */ 1033*7c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 1034*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl); 1035*7c478bd9Sstevel@tonic-gate 1036*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 1037*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1038*7c478bd9Sstevel@tonic-gate DEC_READERS(mi); 1039*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1040*7c478bd9Sstevel@tonic-gate 1041*7c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT || 1042*7c478bd9Sstevel@tonic-gate rpcerr.re_errno == ECONNRESET) && 1043*7c478bd9Sstevel@tonic-gate failover_safe(fi)) { 1044*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1045*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1046*7c478bd9Sstevel@tonic-gate goto failoverretry; 1047*7c478bd9Sstevel@tonic-gate } 1048*7c478bd9Sstevel@tonic-gate } 1049*7c478bd9Sstevel@tonic-gate if (rpcerr.re_errno != 0) 1050*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 1051*7c478bd9Sstevel@tonic-gate 1052*7c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1053*7c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) { 1054*7c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10; 1055*7c478bd9Sstevel@tonic-gate } else { 1056*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1057*7c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client, 1058*7c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_timer_type[which]]), 1059*7c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]), 1060*7c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_call_type[which]]*hz)>>3, 1061*7c478bd9Sstevel@tonic-gate (void (*)())NULL, (caddr_t)mi, 0); 1062*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1063*7c478bd9Sstevel@tonic-gate } 1064*7c478bd9Sstevel@tonic-gate 1065*7c478bd9Sstevel@tonic-gate /* 1066*7c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs. 1067*7c478bd9Sstevel@tonic-gate */ 1068*7c478bd9Sstevel@tonic-gate do { 1069*7c478bd9Sstevel@tonic-gate tryagain = FALSE; 1070*7c478bd9Sstevel@tonic-gate 1071*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 1072*7c478bd9Sstevel@tonic-gate status = RPC_FAILED; 1073*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 1074*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1075*7c478bd9Sstevel@tonic-gate break; 1076*7c478bd9Sstevel@tonic-gate } 1077*7c478bd9Sstevel@tonic-gate 1078*7c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait); 1079*7c478bd9Sstevel@tonic-gate 1080*7c478bd9Sstevel@tonic-gate /* 1081*7c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1082*7c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 1083*7c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 1084*7c478bd9Sstevel@tonic-gate */ 1085*7c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 1086*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 1087*7c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE; 1088*7c478bd9Sstevel@tonic-gate 1089*7c478bd9Sstevel@tonic-gate /* 1090*7c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother 1091*7c478bd9Sstevel@tonic-gate * even trying to send out the request because we 1092*7c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response. 1093*7c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it. 1094*7c478bd9Sstevel@tonic-gate */ 1095*7c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1096*7c478bd9Sstevel@tonic-gate status = RPC_INTR; 1097*7c478bd9Sstevel@tonic-gate else { 1098*7c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp, 1099*7c478bd9Sstevel@tonic-gate xdrres, resp, wait); 1100*7c478bd9Sstevel@tonic-gate } 1101*7c478bd9Sstevel@tonic-gate 1102*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 1103*7c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE; 1104*7c478bd9Sstevel@tonic-gate /* 1105*7c478bd9Sstevel@tonic-gate * restore original signal mask 1106*7c478bd9Sstevel@tonic-gate */ 1107*7c478bd9Sstevel@tonic-gate sigunintr(&smask); 1108*7c478bd9Sstevel@tonic-gate 1109*7c478bd9Sstevel@tonic-gate switch (status) { 1110*7c478bd9Sstevel@tonic-gate case RPC_SUCCESS: 1111*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 1112*7c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 1113*7c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 1114*7c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize)) 1115*7c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi); 1116*7c478bd9Sstevel@tonic-gate break; 1117*7c478bd9Sstevel@tonic-gate 1118*7c478bd9Sstevel@tonic-gate case RPC_INTR: 1119*7c478bd9Sstevel@tonic-gate /* 1120*7c478bd9Sstevel@tonic-gate * There is no way to recover from this error, 1121*7c478bd9Sstevel@tonic-gate * even if mount option nointr is specified. 1122*7c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked. 1123*7c478bd9Sstevel@tonic-gate */ 1124*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR; 1125*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR; 1126*7c478bd9Sstevel@tonic-gate break; 1127*7c478bd9Sstevel@tonic-gate 1128*7c478bd9Sstevel@tonic-gate case RPC_UDERROR: 1129*7c478bd9Sstevel@tonic-gate /* 1130*7c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and 1131*7c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR. 1132*7c478bd9Sstevel@tonic-gate * This is a retryable error, so we would 1133*7c478bd9Sstevel@tonic-gate * loop, so check to see if the specific 1134*7c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that 1135*7c478bd9Sstevel@tonic-gate * target did not exist at all. If so, 1136*7c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and 1137*7c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why. 1138*7c478bd9Sstevel@tonic-gate */ 1139*7c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 1140*7c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) { 1141*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL; 1142*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET; 1143*7c478bd9Sstevel@tonic-gate break; 1144*7c478bd9Sstevel@tonic-gate } 1145*7c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/ 1146*7c478bd9Sstevel@tonic-gate 1147*7c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */ 1148*7c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status)) 1149*7c478bd9Sstevel@tonic-gate break; 1150*7c478bd9Sstevel@tonic-gate 1151*7c478bd9Sstevel@tonic-gate /* 1152*7c478bd9Sstevel@tonic-gate * increment server not responding count 1153*7c478bd9Sstevel@tonic-gate */ 1154*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1155*7c478bd9Sstevel@tonic-gate mi->mi_noresponse++; 1156*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1157*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1158*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++; 1159*7c478bd9Sstevel@tonic-gate #endif 1160*7c478bd9Sstevel@tonic-gate 1161*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) { 1162*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) || 1163*7c478bd9Sstevel@tonic-gate (mi->mi_ss_call_type[which] == 0)) 1164*7c478bd9Sstevel@tonic-gate break; 1165*7c478bd9Sstevel@tonic-gate } 1166*7c478bd9Sstevel@tonic-gate 1167*7c478bd9Sstevel@tonic-gate /* 1168*7c478bd9Sstevel@tonic-gate * The call is in progress (over COTS). 1169*7c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't 1170*7c478bd9Sstevel@tonic-gate * print a noisy error message. 1171*7c478bd9Sstevel@tonic-gate */ 1172*7c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) { 1173*7c478bd9Sstevel@tonic-gate tryagain = TRUE; 1174*7c478bd9Sstevel@tonic-gate break; 1175*7c478bd9Sstevel@tonic-gate } 1176*7c478bd9Sstevel@tonic-gate 1177*7c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT) 1178*7c478bd9Sstevel@tonic-gate break; 1179*7c478bd9Sstevel@tonic-gate 1180*7c478bd9Sstevel@tonic-gate /* 1181*7c478bd9Sstevel@tonic-gate * On zone shutdown, just move on. 1182*7c478bd9Sstevel@tonic-gate */ 1183*7c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >= 1184*7c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) { 1185*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 1186*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1187*7c478bd9Sstevel@tonic-gate break; 1188*7c478bd9Sstevel@tonic-gate } 1189*7c478bd9Sstevel@tonic-gate 1190*7c478bd9Sstevel@tonic-gate /* 1191*7c478bd9Sstevel@tonic-gate * NFS client failover support 1192*7c478bd9Sstevel@tonic-gate * 1193*7c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll 1194*7c478bd9Sstevel@tonic-gate * start the process of finding a new server. 1195*7c478bd9Sstevel@tonic-gate * After that, we can just retry. 1196*7c478bd9Sstevel@tonic-gate */ 1197*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) { 1198*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1199*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1200*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1201*7c478bd9Sstevel@tonic-gate goto failoverretry; 1202*7c478bd9Sstevel@tonic-gate } 1203*7c478bd9Sstevel@tonic-gate 1204*7c478bd9Sstevel@tonic-gate tryagain = TRUE; 1205*7c478bd9Sstevel@tonic-gate timeo = backoff(timeo); 1206*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1207*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) { 1208*7c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED; 1209*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1210*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1211*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1212*7c478bd9Sstevel@tonic-gate "NFS%d server %s not responding still trying\n", 1213*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1214*7c478bd9Sstevel@tonic-gate #else 1215*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1216*7c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 1217*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1218*7c478bd9Sstevel@tonic-gate #endif 1219*7c478bd9Sstevel@tonic-gate } else 1220*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1221*7c478bd9Sstevel@tonic-gate if (*douprintf && curproc->p_sessp->s_vp != NULL) { 1222*7c478bd9Sstevel@tonic-gate *douprintf = 0; 1223*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 1224*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1225*7c478bd9Sstevel@tonic-gate uprintf( 1226*7c478bd9Sstevel@tonic-gate "NFS%d server %s not responding still trying\n", 1227*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1228*7c478bd9Sstevel@tonic-gate #else 1229*7c478bd9Sstevel@tonic-gate uprintf( 1230*7c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 1231*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1232*7c478bd9Sstevel@tonic-gate #endif 1233*7c478bd9Sstevel@tonic-gate } 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate /* 1236*7c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer 1237*7c478bd9Sstevel@tonic-gate * size and if it's a read or write call 1238*7c478bd9Sstevel@tonic-gate * and if the transfer size changed while 1239*7c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine 1240*7c478bd9Sstevel@tonic-gate * changed the transfer size, 1241*7c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer 1242*7c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level. 1243*7c478bd9Sstevel@tonic-gate */ 1244*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 1245*7c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 1246*7c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 1247*7c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize || 1248*7c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) { 1249*7c478bd9Sstevel@tonic-gate /* 1250*7c478bd9Sstevel@tonic-gate * On read or write calls, return 1251*7c478bd9Sstevel@tonic-gate * back to the vnode ops level if 1252*7c478bd9Sstevel@tonic-gate * the transfer size changed. 1253*7c478bd9Sstevel@tonic-gate */ 1254*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1255*7c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN); 1256*7c478bd9Sstevel@tonic-gate } 1257*7c478bd9Sstevel@tonic-gate } 1258*7c478bd9Sstevel@tonic-gate } while (tryagain); 1259*7c478bd9Sstevel@tonic-gate 1260*7c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) { 1261*7c478bd9Sstevel@tonic-gate /* 1262*7c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message. 1263*7c478bd9Sstevel@tonic-gate */ 1264*7c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) 1265*7c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT; 1266*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++; 1267*7c478bd9Sstevel@tonic-gate if (status != RPC_INTR) { 1268*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1269*7c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN; 1270*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1271*7c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 1272*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1273*7c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname); 1274*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d %s failed for %s\n", 1275*7c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which], bufp); 1276*7c478bd9Sstevel@tonic-gate if (curproc->p_sessp->s_vp != NULL) { 1277*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 1278*7c478bd9Sstevel@tonic-gate uprintf("NFS%d %s failed for %s\n", 1279*7c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which], 1280*7c478bd9Sstevel@tonic-gate bufp); 1281*7c478bd9Sstevel@tonic-gate } 1282*7c478bd9Sstevel@tonic-gate } 1283*7c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN); 1284*7c478bd9Sstevel@tonic-gate #else 1285*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1286*7c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 1287*7c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which], svp->sv_hostname, 1288*7c478bd9Sstevel@tonic-gate status, clnt_sperrno(status)); 1289*7c478bd9Sstevel@tonic-gate if (curproc->p_sessp->s_vp != NULL) { 1290*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 1291*7c478bd9Sstevel@tonic-gate uprintf( 1292*7c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 1293*7c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which], 1294*7c478bd9Sstevel@tonic-gate svp->sv_hostname, status, 1295*7c478bd9Sstevel@tonic-gate clnt_sperrno(status)); 1296*7c478bd9Sstevel@tonic-gate } 1297*7c478bd9Sstevel@tonic-gate } 1298*7c478bd9Sstevel@tonic-gate #endif 1299*7c478bd9Sstevel@tonic-gate /* 1300*7c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR, 1301*7c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on 1302*7c478bd9Sstevel@tonic-gate * the authentication error 1303*7c478bd9Sstevel@tonic-gate */ 1304*7c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH || 1305*7c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 1306*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1307*7c478bd9Sstevel@tonic-gate } 1308*7c478bd9Sstevel@tonic-gate } else { 1309*7c478bd9Sstevel@tonic-gate /* 1310*7c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without 1311*7c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero, 1312*7c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed 1313*7c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and 1314*7c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC. 1315*7c478bd9Sstevel@tonic-gate * This was just complete overhead. 1316*7c478bd9Sstevel@tonic-gate */ 1317*7c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) { 1318*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1319*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN; 1320*7c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) { 1321*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED; 1322*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1323*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1324*7c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1325*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d server %s ok\n", 1326*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1327*7c478bd9Sstevel@tonic-gate #else 1328*7c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1329*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n", 1330*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1331*7c478bd9Sstevel@tonic-gate #endif 1332*7c478bd9Sstevel@tonic-gate } else 1333*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1334*7c478bd9Sstevel@tonic-gate } 1335*7c478bd9Sstevel@tonic-gate 1336*7c478bd9Sstevel@tonic-gate if (*douprintf == 0) { 1337*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 1338*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1339*7c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1340*7c478bd9Sstevel@tonic-gate uprintf("NFS%d server %s ok\n", 1341*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1342*7c478bd9Sstevel@tonic-gate #else 1343*7c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 1344*7c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname); 1345*7c478bd9Sstevel@tonic-gate #endif 1346*7c478bd9Sstevel@tonic-gate *douprintf = 1; 1347*7c478bd9Sstevel@tonic-gate } 1348*7c478bd9Sstevel@tonic-gate } 1349*7c478bd9Sstevel@tonic-gate 1350*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1351*7c478bd9Sstevel@tonic-gate 1352*7c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1353*7c478bd9Sstevel@tonic-gate 1354*7c478bd9Sstevel@tonic-gate if (rpc_status != NULL) 1355*7c478bd9Sstevel@tonic-gate *rpc_status = rpcerr.re_status; 1356*7c478bd9Sstevel@tonic-gate 1357*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d", 1358*7c478bd9Sstevel@tonic-gate rpcerr.re_errno); 1359*7c478bd9Sstevel@tonic-gate 1360*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 1361*7c478bd9Sstevel@tonic-gate } 1362*7c478bd9Sstevel@tonic-gate 1363*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1364*7c478bd9Sstevel@tonic-gate static int acl2call_hits = 0; 1365*7c478bd9Sstevel@tonic-gate static int acl2call_misses = 0; 1366*7c478bd9Sstevel@tonic-gate #endif 1367*7c478bd9Sstevel@tonic-gate 1368*7c478bd9Sstevel@tonic-gate int 1369*7c478bd9Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1370*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 1371*7c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi) 1372*7c478bd9Sstevel@tonic-gate { 1373*7c478bd9Sstevel@tonic-gate int rpcerror; 1374*7c478bd9Sstevel@tonic-gate 1375*7c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp, 1376*7c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi); 1377*7c478bd9Sstevel@tonic-gate if (!rpcerror) { 1378*7c478bd9Sstevel@tonic-gate /* 1379*7c478bd9Sstevel@tonic-gate * See comments with crnetadjust(). 1380*7c478bd9Sstevel@tonic-gate */ 1381*7c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES && 1382*7c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) { 1383*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1384*7c478bd9Sstevel@tonic-gate acl2call_hits++; 1385*7c478bd9Sstevel@tonic-gate #endif 1386*7c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, 1387*7c478bd9Sstevel@tonic-gate resp, cr, douprintf, flags, fi); 1388*7c478bd9Sstevel@tonic-gate crfree(cr); 1389*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1390*7c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES) 1391*7c478bd9Sstevel@tonic-gate acl2call_misses++; 1392*7c478bd9Sstevel@tonic-gate #endif 1393*7c478bd9Sstevel@tonic-gate } 1394*7c478bd9Sstevel@tonic-gate } 1395*7c478bd9Sstevel@tonic-gate 1396*7c478bd9Sstevel@tonic-gate return (rpcerror); 1397*7c478bd9Sstevel@tonic-gate } 1398*7c478bd9Sstevel@tonic-gate 1399*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1400*7c478bd9Sstevel@tonic-gate static int acl3call_hits = 0; 1401*7c478bd9Sstevel@tonic-gate static int acl3call_misses = 0; 1402*7c478bd9Sstevel@tonic-gate #endif 1403*7c478bd9Sstevel@tonic-gate 1404*7c478bd9Sstevel@tonic-gate int 1405*7c478bd9Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1406*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 1407*7c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi) 1408*7c478bd9Sstevel@tonic-gate { 1409*7c478bd9Sstevel@tonic-gate int rpcerror; 1410*7c478bd9Sstevel@tonic-gate int user_informed; 1411*7c478bd9Sstevel@tonic-gate 1412*7c478bd9Sstevel@tonic-gate user_informed = 0; 1413*7c478bd9Sstevel@tonic-gate 1414*7c478bd9Sstevel@tonic-gate do { 1415*7c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp, 1416*7c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi); 1417*7c478bd9Sstevel@tonic-gate if (!rpcerror) { 1418*7c478bd9Sstevel@tonic-gate cred_t *crr; 1419*7c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) { 1420*7c478bd9Sstevel@tonic-gate if (!user_informed) { 1421*7c478bd9Sstevel@tonic-gate user_informed = 1; 1422*7c478bd9Sstevel@tonic-gate uprintf( 1423*7c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n"); 1424*7c478bd9Sstevel@tonic-gate } 1425*7c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay); 1426*7c478bd9Sstevel@tonic-gate } 1427*7c478bd9Sstevel@tonic-gate /* 1428*7c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 1429*7c478bd9Sstevel@tonic-gate */ 1430*7c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES && 1431*7c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) { 1432*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1433*7c478bd9Sstevel@tonic-gate acl3call_hits++; 1434*7c478bd9Sstevel@tonic-gate #endif 1435*7c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, 1436*7c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf, flags, fi); 1437*7c478bd9Sstevel@tonic-gate 1438*7c478bd9Sstevel@tonic-gate crfree(crr); 1439*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1440*7c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES) 1441*7c478bd9Sstevel@tonic-gate acl3call_misses++; 1442*7c478bd9Sstevel@tonic-gate #endif 1443*7c478bd9Sstevel@tonic-gate } 1444*7c478bd9Sstevel@tonic-gate } 1445*7c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX); 1446*7c478bd9Sstevel@tonic-gate 1447*7c478bd9Sstevel@tonic-gate return (rpcerror); 1448*7c478bd9Sstevel@tonic-gate } 1449*7c478bd9Sstevel@tonic-gate 1450*7c478bd9Sstevel@tonic-gate static int 1451*7c478bd9Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 1452*7c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 1453*7c478bd9Sstevel@tonic-gate int flags, failinfo_t *fi) 1454*7c478bd9Sstevel@tonic-gate { 1455*7c478bd9Sstevel@tonic-gate CLIENT *client; 1456*7c478bd9Sstevel@tonic-gate struct chtab *ch; 1457*7c478bd9Sstevel@tonic-gate enum clnt_stat status; 1458*7c478bd9Sstevel@tonic-gate struct rpc_err rpcerr; 1459*7c478bd9Sstevel@tonic-gate struct timeval wait; 1460*7c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */ 1461*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1462*7c478bd9Sstevel@tonic-gate int my_rsize, my_wsize; 1463*7c478bd9Sstevel@tonic-gate #endif 1464*7c478bd9Sstevel@tonic-gate bool_t tryagain; 1465*7c478bd9Sstevel@tonic-gate k_sigset_t smask; 1466*7c478bd9Sstevel@tonic-gate servinfo_t *svp; 1467*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 1468*7c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 1469*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1470*7c478bd9Sstevel@tonic-gate char *bufp; 1471*7c478bd9Sstevel@tonic-gate #endif 1472*7c478bd9Sstevel@tonic-gate 1473*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1474*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START, 1475*7c478bd9Sstevel@tonic-gate "rfscall_start:which %d mi %p", which, mi); 1476*7c478bd9Sstevel@tonic-gate #endif 1477*7c478bd9Sstevel@tonic-gate 1478*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 1479*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 1480*7c478bd9Sstevel@tonic-gate 1481*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++; 1482*7c478bd9Sstevel@tonic-gate mi->mi_aclreqs[which].value.ui64++; 1483*7c478bd9Sstevel@tonic-gate 1484*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 1485*7c478bd9Sstevel@tonic-gate 1486*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 1487*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 1488*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1489*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 1490*7c478bd9Sstevel@tonic-gate } 1491*7c478bd9Sstevel@tonic-gate 1492*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1493*7c478bd9Sstevel@tonic-gate /* 1494*7c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case 1495*7c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us. 1496*7c478bd9Sstevel@tonic-gate */ 1497*7c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread; 1498*7c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite; 1499*7c478bd9Sstevel@tonic-gate #endif 1500*7c478bd9Sstevel@tonic-gate 1501*7c478bd9Sstevel@tonic-gate /* 1502*7c478bd9Sstevel@tonic-gate * NFS client failover support 1503*7c478bd9Sstevel@tonic-gate * 1504*7c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH), 1505*7c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted 1506*7c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll 1507*7c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is 1508*7c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3. 1509*7c478bd9Sstevel@tonic-gate * 1510*7c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible 1511*7c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being 1512*7c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a 1513*7c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle 1514*7c478bd9Sstevel@tonic-gate * to the wrong host. 1515*7c478bd9Sstevel@tonic-gate */ 1516*7c478bd9Sstevel@tonic-gate failoverretry: 1517*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 1518*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1519*7c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) { 1520*7c478bd9Sstevel@tonic-gate if (failover_wait(mi)) { 1521*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1522*7c478bd9Sstevel@tonic-gate return (EINTR); 1523*7c478bd9Sstevel@tonic-gate } 1524*7c478bd9Sstevel@tonic-gate } 1525*7c478bd9Sstevel@tonic-gate INC_READERS(mi); 1526*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1527*7c478bd9Sstevel@tonic-gate if (fi) { 1528*7c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) && 1529*7c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) { 1530*7c478bd9Sstevel@tonic-gate int remaperr; 1531*7c478bd9Sstevel@tonic-gate 1532*7c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 1533*7c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi); 1534*7c478bd9Sstevel@tonic-gate if (remaperr != 0) { 1535*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1536*7c478bd9Sstevel@tonic-gate if (remaperr != EINTR) 1537*7c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN, 1538*7c478bd9Sstevel@tonic-gate "aclcall couldn't failover: %m"); 1539*7c478bd9Sstevel@tonic-gate #endif 1540*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1541*7c478bd9Sstevel@tonic-gate DEC_READERS(mi); 1542*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1543*7c478bd9Sstevel@tonic-gate 1544*7c478bd9Sstevel@tonic-gate /* 1545*7c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT 1546*7c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted 1547*7c478bd9Sstevel@tonic-gate * we have to retry the call with a new 1548*7c478bd9Sstevel@tonic-gate * server. 1549*7c478bd9Sstevel@tonic-gate */ 1550*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) && 1551*7c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) { 1552*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1553*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1554*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 1555*7c478bd9Sstevel@tonic-gate goto failoverretry; 1556*7c478bd9Sstevel@tonic-gate } 1557*7c478bd9Sstevel@tonic-gate return (remaperr); 1558*7c478bd9Sstevel@tonic-gate } 1559*7c478bd9Sstevel@tonic-gate } 1560*7c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc) 1561*7c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp); 1562*7c478bd9Sstevel@tonic-gate } 1563*7c478bd9Sstevel@tonic-gate } 1564*7c478bd9Sstevel@tonic-gate 1565*7c478bd9Sstevel@tonic-gate /* 1566*7c478bd9Sstevel@tonic-gate * acl_clget() calls clnt_tli_kinit() which clears the xid, so we 1567*7c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request. 1568*7c478bd9Sstevel@tonic-gate */ 1569*7c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 1570*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl); 1571*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 1572*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1573*7c478bd9Sstevel@tonic-gate DEC_READERS(mi); 1574*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1575*7c478bd9Sstevel@tonic-gate 1576*7c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT || 1577*7c478bd9Sstevel@tonic-gate rpcerr.re_errno == ECONNRESET) && 1578*7c478bd9Sstevel@tonic-gate failover_safe(fi)) { 1579*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1580*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1581*7c478bd9Sstevel@tonic-gate goto failoverretry; 1582*7c478bd9Sstevel@tonic-gate } 1583*7c478bd9Sstevel@tonic-gate } 1584*7c478bd9Sstevel@tonic-gate if (rpcerr.re_errno != 0) 1585*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 1586*7c478bd9Sstevel@tonic-gate 1587*7c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1588*7c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) { 1589*7c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10; 1590*7c478bd9Sstevel@tonic-gate } else { 1591*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1592*7c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client, 1593*7c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_acl_timer_type[which]]), 1594*7c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]), 1595*7c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3, 1596*7c478bd9Sstevel@tonic-gate (void (*)()) 0, (caddr_t)mi, 0); 1597*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1598*7c478bd9Sstevel@tonic-gate } 1599*7c478bd9Sstevel@tonic-gate 1600*7c478bd9Sstevel@tonic-gate /* 1601*7c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs. 1602*7c478bd9Sstevel@tonic-gate */ 1603*7c478bd9Sstevel@tonic-gate do { 1604*7c478bd9Sstevel@tonic-gate tryagain = FALSE; 1605*7c478bd9Sstevel@tonic-gate 1606*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 1607*7c478bd9Sstevel@tonic-gate status = RPC_FAILED; 1608*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 1609*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1610*7c478bd9Sstevel@tonic-gate break; 1611*7c478bd9Sstevel@tonic-gate } 1612*7c478bd9Sstevel@tonic-gate 1613*7c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait); 1614*7c478bd9Sstevel@tonic-gate 1615*7c478bd9Sstevel@tonic-gate /* 1616*7c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 1617*7c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 1618*7c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 1619*7c478bd9Sstevel@tonic-gate */ 1620*7c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 1621*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 1622*7c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE; 1623*7c478bd9Sstevel@tonic-gate 1624*7c478bd9Sstevel@tonic-gate /* 1625*7c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother 1626*7c478bd9Sstevel@tonic-gate * even trying to send out the request because we 1627*7c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response. 1628*7c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it. 1629*7c478bd9Sstevel@tonic-gate */ 1630*7c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 1631*7c478bd9Sstevel@tonic-gate status = RPC_INTR; 1632*7c478bd9Sstevel@tonic-gate else { 1633*7c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp, 1634*7c478bd9Sstevel@tonic-gate xdrres, resp, wait); 1635*7c478bd9Sstevel@tonic-gate } 1636*7c478bd9Sstevel@tonic-gate 1637*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 1638*7c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE; 1639*7c478bd9Sstevel@tonic-gate /* 1640*7c478bd9Sstevel@tonic-gate * restore original signal mask 1641*7c478bd9Sstevel@tonic-gate */ 1642*7c478bd9Sstevel@tonic-gate sigunintr(&smask); 1643*7c478bd9Sstevel@tonic-gate 1644*7c478bd9Sstevel@tonic-gate switch (status) { 1645*7c478bd9Sstevel@tonic-gate case RPC_SUCCESS: 1646*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1647*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 1648*7c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 1649*7c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 1650*7c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize)) 1651*7c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi); 1652*7c478bd9Sstevel@tonic-gate #endif 1653*7c478bd9Sstevel@tonic-gate break; 1654*7c478bd9Sstevel@tonic-gate 1655*7c478bd9Sstevel@tonic-gate /* 1656*7c478bd9Sstevel@tonic-gate * Unfortunately, there are servers in the world which 1657*7c478bd9Sstevel@tonic-gate * are not coded correctly. They are not prepared to 1658*7c478bd9Sstevel@tonic-gate * handle RPC requests to the NFS port which are not 1659*7c478bd9Sstevel@tonic-gate * NFS requests. Thus, they may try to process the 1660*7c478bd9Sstevel@tonic-gate * NFS_ACL request as if it were an NFS request. This 1661*7c478bd9Sstevel@tonic-gate * does not work. Generally, an error will be generated 1662*7c478bd9Sstevel@tonic-gate * on the client because it will not be able to decode 1663*7c478bd9Sstevel@tonic-gate * the response from the server. However, it seems 1664*7c478bd9Sstevel@tonic-gate * possible that the server may not be able to decode 1665*7c478bd9Sstevel@tonic-gate * the arguments. Thus, the criteria for deciding 1666*7c478bd9Sstevel@tonic-gate * whether the server supports NFS_ACL or not is whether 1667*7c478bd9Sstevel@tonic-gate * the following RPC errors are returned from CLNT_CALL. 1668*7c478bd9Sstevel@tonic-gate */ 1669*7c478bd9Sstevel@tonic-gate case RPC_CANTDECODERES: 1670*7c478bd9Sstevel@tonic-gate case RPC_PROGUNAVAIL: 1671*7c478bd9Sstevel@tonic-gate case RPC_CANTDECODEARGS: 1672*7c478bd9Sstevel@tonic-gate case RPC_PROGVERSMISMATCH: 1673*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1674*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~(MI_ACL | MI_EXTATTR); 1675*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1676*7c478bd9Sstevel@tonic-gate break; 1677*7c478bd9Sstevel@tonic-gate 1678*7c478bd9Sstevel@tonic-gate /* 1679*7c478bd9Sstevel@tonic-gate * If the server supports NFS_ACL but not the new ops 1680*7c478bd9Sstevel@tonic-gate * for extended attributes, make sure we don't retry. 1681*7c478bd9Sstevel@tonic-gate */ 1682*7c478bd9Sstevel@tonic-gate case RPC_PROCUNAVAIL: 1683*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1684*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_EXTATTR; 1685*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1686*7c478bd9Sstevel@tonic-gate break; 1687*7c478bd9Sstevel@tonic-gate 1688*7c478bd9Sstevel@tonic-gate case RPC_INTR: 1689*7c478bd9Sstevel@tonic-gate /* 1690*7c478bd9Sstevel@tonic-gate * There is no way to recover from this error, 1691*7c478bd9Sstevel@tonic-gate * even if mount option nointr is specified. 1692*7c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked. 1693*7c478bd9Sstevel@tonic-gate */ 1694*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR; 1695*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR; 1696*7c478bd9Sstevel@tonic-gate break; 1697*7c478bd9Sstevel@tonic-gate 1698*7c478bd9Sstevel@tonic-gate case RPC_UDERROR: 1699*7c478bd9Sstevel@tonic-gate /* 1700*7c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and 1701*7c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR. 1702*7c478bd9Sstevel@tonic-gate * This is a retryable error, so we would 1703*7c478bd9Sstevel@tonic-gate * loop, so check to see if the specific 1704*7c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that 1705*7c478bd9Sstevel@tonic-gate * target did not exist at all. If so, 1706*7c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and 1707*7c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why. 1708*7c478bd9Sstevel@tonic-gate */ 1709*7c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 1710*7c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) { 1711*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL; 1712*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET; 1713*7c478bd9Sstevel@tonic-gate break; 1714*7c478bd9Sstevel@tonic-gate } 1715*7c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/ 1716*7c478bd9Sstevel@tonic-gate 1717*7c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */ 1718*7c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status)) 1719*7c478bd9Sstevel@tonic-gate break; 1720*7c478bd9Sstevel@tonic-gate 1721*7c478bd9Sstevel@tonic-gate /* 1722*7c478bd9Sstevel@tonic-gate * increment server not responding count 1723*7c478bd9Sstevel@tonic-gate */ 1724*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1725*7c478bd9Sstevel@tonic-gate mi->mi_noresponse++; 1726*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1727*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1728*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++; 1729*7c478bd9Sstevel@tonic-gate #endif 1730*7c478bd9Sstevel@tonic-gate 1731*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) { 1732*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) || 1733*7c478bd9Sstevel@tonic-gate (mi->mi_acl_ss_call_type[which] == 0)) 1734*7c478bd9Sstevel@tonic-gate break; 1735*7c478bd9Sstevel@tonic-gate } 1736*7c478bd9Sstevel@tonic-gate 1737*7c478bd9Sstevel@tonic-gate /* 1738*7c478bd9Sstevel@tonic-gate * The call is in progress (over COTS). 1739*7c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't 1740*7c478bd9Sstevel@tonic-gate * print a noisy error message. 1741*7c478bd9Sstevel@tonic-gate */ 1742*7c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) { 1743*7c478bd9Sstevel@tonic-gate tryagain = TRUE; 1744*7c478bd9Sstevel@tonic-gate break; 1745*7c478bd9Sstevel@tonic-gate } 1746*7c478bd9Sstevel@tonic-gate 1747*7c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT) 1748*7c478bd9Sstevel@tonic-gate break; 1749*7c478bd9Sstevel@tonic-gate 1750*7c478bd9Sstevel@tonic-gate /* 1751*7c478bd9Sstevel@tonic-gate * On zone shutdown, just move on. 1752*7c478bd9Sstevel@tonic-gate */ 1753*7c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >= 1754*7c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) { 1755*7c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 1756*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1757*7c478bd9Sstevel@tonic-gate break; 1758*7c478bd9Sstevel@tonic-gate } 1759*7c478bd9Sstevel@tonic-gate 1760*7c478bd9Sstevel@tonic-gate /* 1761*7c478bd9Sstevel@tonic-gate * NFS client failover support 1762*7c478bd9Sstevel@tonic-gate * 1763*7c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll 1764*7c478bd9Sstevel@tonic-gate * start the process of finding a new server. 1765*7c478bd9Sstevel@tonic-gate * After that, we can just retry. 1766*7c478bd9Sstevel@tonic-gate */ 1767*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) { 1768*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 1769*7c478bd9Sstevel@tonic-gate failover_newserver(mi); 1770*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1771*7c478bd9Sstevel@tonic-gate goto failoverretry; 1772*7c478bd9Sstevel@tonic-gate } 1773*7c478bd9Sstevel@tonic-gate 1774*7c478bd9Sstevel@tonic-gate tryagain = TRUE; 1775*7c478bd9Sstevel@tonic-gate timeo = backoff(timeo); 1776*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1777*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) { 1778*7c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED; 1779*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1780*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1781*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1782*7c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n", 1783*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1784*7c478bd9Sstevel@tonic-gate #else 1785*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1786*7c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 1787*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1788*7c478bd9Sstevel@tonic-gate #endif 1789*7c478bd9Sstevel@tonic-gate } else 1790*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1791*7c478bd9Sstevel@tonic-gate if (*douprintf && curproc->p_sessp->s_vp != NULL) { 1792*7c478bd9Sstevel@tonic-gate *douprintf = 0; 1793*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 1794*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1795*7c478bd9Sstevel@tonic-gate uprintf( 1796*7c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n", 1797*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1798*7c478bd9Sstevel@tonic-gate #else 1799*7c478bd9Sstevel@tonic-gate uprintf( 1800*7c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 1801*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1802*7c478bd9Sstevel@tonic-gate #endif 1803*7c478bd9Sstevel@tonic-gate } 1804*7c478bd9Sstevel@tonic-gate 1805*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1806*7c478bd9Sstevel@tonic-gate /* 1807*7c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer 1808*7c478bd9Sstevel@tonic-gate * size and if it's a read or write call 1809*7c478bd9Sstevel@tonic-gate * and if the transfer size changed while 1810*7c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine 1811*7c478bd9Sstevel@tonic-gate * changed the transfer size, 1812*7c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer 1813*7c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level. 1814*7c478bd9Sstevel@tonic-gate */ 1815*7c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 1816*7c478bd9Sstevel@tonic-gate mi->mi_acl_timer_type[which] != 0 && 1817*7c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 1818*7c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize || 1819*7c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) { 1820*7c478bd9Sstevel@tonic-gate /* 1821*7c478bd9Sstevel@tonic-gate * On read or write calls, return 1822*7c478bd9Sstevel@tonic-gate * back to the vnode ops level if 1823*7c478bd9Sstevel@tonic-gate * the transfer size changed. 1824*7c478bd9Sstevel@tonic-gate */ 1825*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1826*7c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN); 1827*7c478bd9Sstevel@tonic-gate } 1828*7c478bd9Sstevel@tonic-gate #endif 1829*7c478bd9Sstevel@tonic-gate } 1830*7c478bd9Sstevel@tonic-gate } while (tryagain); 1831*7c478bd9Sstevel@tonic-gate 1832*7c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) { 1833*7c478bd9Sstevel@tonic-gate /* 1834*7c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message. 1835*7c478bd9Sstevel@tonic-gate */ 1836*7c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) 1837*7c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT; 1838*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++; 1839*7c478bd9Sstevel@tonic-gate if (status == RPC_CANTDECODERES || 1840*7c478bd9Sstevel@tonic-gate status == RPC_PROGUNAVAIL || 1841*7c478bd9Sstevel@tonic-gate status == RPC_PROCUNAVAIL || 1842*7c478bd9Sstevel@tonic-gate status == RPC_CANTDECODEARGS || 1843*7c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 1844*7c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 1845*7c478bd9Sstevel@tonic-gate else if (status != RPC_INTR) { 1846*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1847*7c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN; 1848*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1849*7c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 1850*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1851*7c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname); 1852*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d %s failed for %s\n", 1853*7c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which], bufp); 1854*7c478bd9Sstevel@tonic-gate if (curproc->p_sessp->s_vp != NULL) { 1855*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 1856*7c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d %s failed for %s\n", 1857*7c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which], 1858*7c478bd9Sstevel@tonic-gate bufp); 1859*7c478bd9Sstevel@tonic-gate } 1860*7c478bd9Sstevel@tonic-gate } 1861*7c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN); 1862*7c478bd9Sstevel@tonic-gate #else 1863*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 1864*7c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 1865*7c478bd9Sstevel@tonic-gate mi->mi_aclnames[which], svp->sv_hostname, 1866*7c478bd9Sstevel@tonic-gate status, clnt_sperrno(status)); 1867*7c478bd9Sstevel@tonic-gate if (curproc->p_sessp->s_vp != NULL) { 1868*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 1869*7c478bd9Sstevel@tonic-gate uprintf( 1870*7c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 1871*7c478bd9Sstevel@tonic-gate mi->mi_aclnames[which], 1872*7c478bd9Sstevel@tonic-gate svp->sv_hostname, status, 1873*7c478bd9Sstevel@tonic-gate clnt_sperrno(status)); 1874*7c478bd9Sstevel@tonic-gate } 1875*7c478bd9Sstevel@tonic-gate #endif 1876*7c478bd9Sstevel@tonic-gate /* 1877*7c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR, 1878*7c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on 1879*7c478bd9Sstevel@tonic-gate * the authentication error 1880*7c478bd9Sstevel@tonic-gate */ 1881*7c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH || 1882*7c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 1883*7c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 1884*7c478bd9Sstevel@tonic-gate } 1885*7c478bd9Sstevel@tonic-gate } else { 1886*7c478bd9Sstevel@tonic-gate /* 1887*7c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without 1888*7c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero, 1889*7c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed 1890*7c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and 1891*7c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC. 1892*7c478bd9Sstevel@tonic-gate * This was just complete overhead. 1893*7c478bd9Sstevel@tonic-gate */ 1894*7c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) { 1895*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1896*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN; 1897*7c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) { 1898*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED; 1899*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1900*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1901*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d server %s ok\n", 1902*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1903*7c478bd9Sstevel@tonic-gate #else 1904*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n", 1905*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 1906*7c478bd9Sstevel@tonic-gate #endif 1907*7c478bd9Sstevel@tonic-gate } else 1908*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 1909*7c478bd9Sstevel@tonic-gate } 1910*7c478bd9Sstevel@tonic-gate 1911*7c478bd9Sstevel@tonic-gate if (*douprintf == 0) { 1912*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 1913*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1914*7c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d server %s ok\n", 1915*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 1916*7c478bd9Sstevel@tonic-gate #else 1917*7c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname); 1918*7c478bd9Sstevel@tonic-gate #endif 1919*7c478bd9Sstevel@tonic-gate *douprintf = 1; 1920*7c478bd9Sstevel@tonic-gate } 1921*7c478bd9Sstevel@tonic-gate } 1922*7c478bd9Sstevel@tonic-gate 1923*7c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 1924*7c478bd9Sstevel@tonic-gate 1925*7c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 1926*7c478bd9Sstevel@tonic-gate 1927*7c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 1928*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d", 1929*7c478bd9Sstevel@tonic-gate rpcerr.re_errno); 1930*7c478bd9Sstevel@tonic-gate #endif 1931*7c478bd9Sstevel@tonic-gate 1932*7c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 1933*7c478bd9Sstevel@tonic-gate } 1934*7c478bd9Sstevel@tonic-gate 1935*7c478bd9Sstevel@tonic-gate int 1936*7c478bd9Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa) 1937*7c478bd9Sstevel@tonic-gate { 1938*7c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask; 1939*7c478bd9Sstevel@tonic-gate 1940*7c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE)) 1941*7c478bd9Sstevel@tonic-gate sa->sa_mode = (uint32_t)-1; 1942*7c478bd9Sstevel@tonic-gate else 1943*7c478bd9Sstevel@tonic-gate sa->sa_mode = vap->va_mode; 1944*7c478bd9Sstevel@tonic-gate if (!(mask & AT_UID)) 1945*7c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)-1; 1946*7c478bd9Sstevel@tonic-gate else 1947*7c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)vap->va_uid; 1948*7c478bd9Sstevel@tonic-gate if (!(mask & AT_GID)) 1949*7c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)-1; 1950*7c478bd9Sstevel@tonic-gate else 1951*7c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)vap->va_gid; 1952*7c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE)) 1953*7c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)-1; 1954*7c478bd9Sstevel@tonic-gate else 1955*7c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)vap->va_size; 1956*7c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME)) 1957*7c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1; 1958*7c478bd9Sstevel@tonic-gate else { 1959*7c478bd9Sstevel@tonic-gate /* check time validity */ 1960*7c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) { 1961*7c478bd9Sstevel@tonic-gate return (EOVERFLOW); 1962*7c478bd9Sstevel@tonic-gate } 1963*7c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = vap->va_atime.tv_sec; 1964*7c478bd9Sstevel@tonic-gate sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 1965*7c478bd9Sstevel@tonic-gate } 1966*7c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME)) 1967*7c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1; 1968*7c478bd9Sstevel@tonic-gate else { 1969*7c478bd9Sstevel@tonic-gate /* check time validity */ 1970*7c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) { 1971*7c478bd9Sstevel@tonic-gate return (EOVERFLOW); 1972*7c478bd9Sstevel@tonic-gate } 1973*7c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec; 1974*7c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 1975*7c478bd9Sstevel@tonic-gate } 1976*7c478bd9Sstevel@tonic-gate return (0); 1977*7c478bd9Sstevel@tonic-gate } 1978*7c478bd9Sstevel@tonic-gate 1979*7c478bd9Sstevel@tonic-gate int 1980*7c478bd9Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa) 1981*7c478bd9Sstevel@tonic-gate { 1982*7c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask; 1983*7c478bd9Sstevel@tonic-gate 1984*7c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE)) 1985*7c478bd9Sstevel@tonic-gate sa->mode.set_it = FALSE; 1986*7c478bd9Sstevel@tonic-gate else { 1987*7c478bd9Sstevel@tonic-gate sa->mode.set_it = TRUE; 1988*7c478bd9Sstevel@tonic-gate sa->mode.mode = (mode3)vap->va_mode; 1989*7c478bd9Sstevel@tonic-gate } 1990*7c478bd9Sstevel@tonic-gate if (!(mask & AT_UID)) 1991*7c478bd9Sstevel@tonic-gate sa->uid.set_it = FALSE; 1992*7c478bd9Sstevel@tonic-gate else { 1993*7c478bd9Sstevel@tonic-gate sa->uid.set_it = TRUE; 1994*7c478bd9Sstevel@tonic-gate sa->uid.uid = (uid3)vap->va_uid; 1995*7c478bd9Sstevel@tonic-gate } 1996*7c478bd9Sstevel@tonic-gate if (!(mask & AT_GID)) 1997*7c478bd9Sstevel@tonic-gate sa->gid.set_it = FALSE; 1998*7c478bd9Sstevel@tonic-gate else { 1999*7c478bd9Sstevel@tonic-gate sa->gid.set_it = TRUE; 2000*7c478bd9Sstevel@tonic-gate sa->gid.gid = (gid3)vap->va_gid; 2001*7c478bd9Sstevel@tonic-gate } 2002*7c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE)) 2003*7c478bd9Sstevel@tonic-gate sa->size.set_it = FALSE; 2004*7c478bd9Sstevel@tonic-gate else { 2005*7c478bd9Sstevel@tonic-gate sa->size.set_it = TRUE; 2006*7c478bd9Sstevel@tonic-gate sa->size.size = (size3)vap->va_size; 2007*7c478bd9Sstevel@tonic-gate } 2008*7c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME)) 2009*7c478bd9Sstevel@tonic-gate sa->atime.set_it = DONT_CHANGE; 2010*7c478bd9Sstevel@tonic-gate else { 2011*7c478bd9Sstevel@tonic-gate /* check time validity */ 2012*7c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) { 2013*7c478bd9Sstevel@tonic-gate return (EOVERFLOW); 2014*7c478bd9Sstevel@tonic-gate } 2015*7c478bd9Sstevel@tonic-gate sa->atime.set_it = SET_TO_CLIENT_TIME; 2016*7c478bd9Sstevel@tonic-gate sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec; 2017*7c478bd9Sstevel@tonic-gate sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec; 2018*7c478bd9Sstevel@tonic-gate } 2019*7c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME)) 2020*7c478bd9Sstevel@tonic-gate sa->mtime.set_it = DONT_CHANGE; 2021*7c478bd9Sstevel@tonic-gate else { 2022*7c478bd9Sstevel@tonic-gate /* check time validity */ 2023*7c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) { 2024*7c478bd9Sstevel@tonic-gate return (EOVERFLOW); 2025*7c478bd9Sstevel@tonic-gate } 2026*7c478bd9Sstevel@tonic-gate sa->mtime.set_it = SET_TO_CLIENT_TIME; 2027*7c478bd9Sstevel@tonic-gate sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec; 2028*7c478bd9Sstevel@tonic-gate sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec; 2029*7c478bd9Sstevel@tonic-gate } 2030*7c478bd9Sstevel@tonic-gate return (0); 2031*7c478bd9Sstevel@tonic-gate } 2032*7c478bd9Sstevel@tonic-gate 2033*7c478bd9Sstevel@tonic-gate void 2034*7c478bd9Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp) 2035*7c478bd9Sstevel@tonic-gate { 2036*7c478bd9Sstevel@tonic-gate 2037*7c478bd9Sstevel@tonic-gate da->da_fhandle = VTOFH(dvp); 2038*7c478bd9Sstevel@tonic-gate da->da_name = nm; 2039*7c478bd9Sstevel@tonic-gate da->da_flags = 0; 2040*7c478bd9Sstevel@tonic-gate } 2041*7c478bd9Sstevel@tonic-gate 2042*7c478bd9Sstevel@tonic-gate void 2043*7c478bd9Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp) 2044*7c478bd9Sstevel@tonic-gate { 2045*7c478bd9Sstevel@tonic-gate 2046*7c478bd9Sstevel@tonic-gate da->dirp = VTOFH3(dvp); 2047*7c478bd9Sstevel@tonic-gate da->name = nm; 2048*7c478bd9Sstevel@tonic-gate } 2049*7c478bd9Sstevel@tonic-gate 2050*7c478bd9Sstevel@tonic-gate int 2051*7c478bd9Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr) 2052*7c478bd9Sstevel@tonic-gate { 2053*7c478bd9Sstevel@tonic-gate int error; 2054*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2055*7c478bd9Sstevel@tonic-gate struct vattr va; 2056*7c478bd9Sstevel@tonic-gate 2057*7c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE | AT_GID; 2058*7c478bd9Sstevel@tonic-gate error = VOP_GETATTR(dvp, &va, 0, cr); 2059*7c478bd9Sstevel@tonic-gate if (error) 2060*7c478bd9Sstevel@tonic-gate return (error); 2061*7c478bd9Sstevel@tonic-gate 2062*7c478bd9Sstevel@tonic-gate /* 2063*7c478bd9Sstevel@tonic-gate * To determine the expected group-id of the created file: 2064*7c478bd9Sstevel@tonic-gate * 1) If the filesystem was not mounted with the Old-BSD-compatible 2065*7c478bd9Sstevel@tonic-gate * GRPID option, and the directory's set-gid bit is clear, 2066*7c478bd9Sstevel@tonic-gate * then use the process's gid. 2067*7c478bd9Sstevel@tonic-gate * 2) Otherwise, set the group-id to the gid of the parent directory. 2068*7c478bd9Sstevel@tonic-gate */ 2069*7c478bd9Sstevel@tonic-gate rp = VTOR(dvp); 2070*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2071*7c478bd9Sstevel@tonic-gate if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID)) 2072*7c478bd9Sstevel@tonic-gate *gidp = crgetgid(cr); 2073*7c478bd9Sstevel@tonic-gate else 2074*7c478bd9Sstevel@tonic-gate *gidp = va.va_gid; 2075*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2076*7c478bd9Sstevel@tonic-gate return (0); 2077*7c478bd9Sstevel@tonic-gate } 2078*7c478bd9Sstevel@tonic-gate 2079*7c478bd9Sstevel@tonic-gate int 2080*7c478bd9Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr) 2081*7c478bd9Sstevel@tonic-gate { 2082*7c478bd9Sstevel@tonic-gate int error; 2083*7c478bd9Sstevel@tonic-gate struct vattr va; 2084*7c478bd9Sstevel@tonic-gate 2085*7c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE; 2086*7c478bd9Sstevel@tonic-gate error = VOP_GETATTR(dvp, &va, 0, cr); 2087*7c478bd9Sstevel@tonic-gate if (error) 2088*7c478bd9Sstevel@tonic-gate return (error); 2089*7c478bd9Sstevel@tonic-gate 2090*7c478bd9Sstevel@tonic-gate /* 2091*7c478bd9Sstevel@tonic-gate * Modify the expected mode (om) so that the set-gid bit matches 2092*7c478bd9Sstevel@tonic-gate * that of the parent directory (dvp). 2093*7c478bd9Sstevel@tonic-gate */ 2094*7c478bd9Sstevel@tonic-gate if (va.va_mode & VSGID) 2095*7c478bd9Sstevel@tonic-gate *omp |= VSGID; 2096*7c478bd9Sstevel@tonic-gate else 2097*7c478bd9Sstevel@tonic-gate *omp &= ~VSGID; 2098*7c478bd9Sstevel@tonic-gate return (0); 2099*7c478bd9Sstevel@tonic-gate } 2100*7c478bd9Sstevel@tonic-gate 2101*7c478bd9Sstevel@tonic-gate void 2102*7c478bd9Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap) 2103*7c478bd9Sstevel@tonic-gate { 2104*7c478bd9Sstevel@tonic-gate 2105*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) { 2106*7c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VSWAPLIKE)) { 2107*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2108*7c478bd9Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE; 2109*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2110*7c478bd9Sstevel@tonic-gate } 2111*7c478bd9Sstevel@tonic-gate } else { 2112*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VSWAPLIKE) { 2113*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2114*7c478bd9Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE; 2115*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2116*7c478bd9Sstevel@tonic-gate } 2117*7c478bd9Sstevel@tonic-gate } 2118*7c478bd9Sstevel@tonic-gate } 2119*7c478bd9Sstevel@tonic-gate 2120*7c478bd9Sstevel@tonic-gate /* 2121*7c478bd9Sstevel@tonic-gate * Free the resources associated with an rnode. 2122*7c478bd9Sstevel@tonic-gate */ 2123*7c478bd9Sstevel@tonic-gate static void 2124*7c478bd9Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr) 2125*7c478bd9Sstevel@tonic-gate { 2126*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2127*7c478bd9Sstevel@tonic-gate cred_t *cred; 2128*7c478bd9Sstevel@tonic-gate char *contents; 2129*7c478bd9Sstevel@tonic-gate int size; 2130*7c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 2131*7c478bd9Sstevel@tonic-gate int error; 2132*7c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 2133*7c478bd9Sstevel@tonic-gate 2134*7c478bd9Sstevel@tonic-gate /* 2135*7c478bd9Sstevel@tonic-gate * Before freeing anything, wait until all asynchronous 2136*7c478bd9Sstevel@tonic-gate * activity is done on this rnode. This will allow all 2137*7c478bd9Sstevel@tonic-gate * asynchronous read ahead and write behind i/o's to 2138*7c478bd9Sstevel@tonic-gate * finish. 2139*7c478bd9Sstevel@tonic-gate */ 2140*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2141*7c478bd9Sstevel@tonic-gate while (rp->r_count > 0) 2142*7c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 2143*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2144*7c478bd9Sstevel@tonic-gate 2145*7c478bd9Sstevel@tonic-gate /* 2146*7c478bd9Sstevel@tonic-gate * Flush and invalidate all pages associated with the vnode. 2147*7c478bd9Sstevel@tonic-gate */ 2148*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2149*7c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp)) { 2150*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 2151*7c478bd9Sstevel@tonic-gate if ((rp->r_flags & RDIRTY) && !rp->r_error) { 2152*7c478bd9Sstevel@tonic-gate error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr); 2153*7c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) { 2154*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2155*7c478bd9Sstevel@tonic-gate if (!rp->r_error) 2156*7c478bd9Sstevel@tonic-gate rp->r_error = error; 2157*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2158*7c478bd9Sstevel@tonic-gate } 2159*7c478bd9Sstevel@tonic-gate } 2160*7c478bd9Sstevel@tonic-gate nfs_invalidate_pages(vp, (u_offset_t)0, cr); 2161*7c478bd9Sstevel@tonic-gate } 2162*7c478bd9Sstevel@tonic-gate 2163*7c478bd9Sstevel@tonic-gate /* 2164*7c478bd9Sstevel@tonic-gate * Free any held credentials and caches which may be associated 2165*7c478bd9Sstevel@tonic-gate * with this rnode. 2166*7c478bd9Sstevel@tonic-gate */ 2167*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2168*7c478bd9Sstevel@tonic-gate cred = rp->r_cred; 2169*7c478bd9Sstevel@tonic-gate rp->r_cred = NULL; 2170*7c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 2171*7c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 2172*7c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 2173*7c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 2174*7c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 2175*7c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 2176*7c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 2177*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2178*7c478bd9Sstevel@tonic-gate 2179*7c478bd9Sstevel@tonic-gate /* 2180*7c478bd9Sstevel@tonic-gate * Free the held credential. 2181*7c478bd9Sstevel@tonic-gate */ 2182*7c478bd9Sstevel@tonic-gate if (cred != NULL) 2183*7c478bd9Sstevel@tonic-gate crfree(cred); 2184*7c478bd9Sstevel@tonic-gate 2185*7c478bd9Sstevel@tonic-gate /* 2186*7c478bd9Sstevel@tonic-gate * Free the access cache entries. 2187*7c478bd9Sstevel@tonic-gate */ 2188*7c478bd9Sstevel@tonic-gate (void) nfs_access_purge_rp(rp); 2189*7c478bd9Sstevel@tonic-gate 2190*7c478bd9Sstevel@tonic-gate /* 2191*7c478bd9Sstevel@tonic-gate * Free the readdir cache entries. 2192*7c478bd9Sstevel@tonic-gate */ 2193*7c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 2194*7c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(vp); 2195*7c478bd9Sstevel@tonic-gate 2196*7c478bd9Sstevel@tonic-gate /* 2197*7c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 2198*7c478bd9Sstevel@tonic-gate */ 2199*7c478bd9Sstevel@tonic-gate if (contents != NULL) { 2200*7c478bd9Sstevel@tonic-gate 2201*7c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 2202*7c478bd9Sstevel@tonic-gate } 2203*7c478bd9Sstevel@tonic-gate 2204*7c478bd9Sstevel@tonic-gate /* 2205*7c478bd9Sstevel@tonic-gate * Free any cached ACL. 2206*7c478bd9Sstevel@tonic-gate */ 2207*7c478bd9Sstevel@tonic-gate if (vsp != NULL) 2208*7c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 2209*7c478bd9Sstevel@tonic-gate 2210*7c478bd9Sstevel@tonic-gate /* 2211*7c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 2212*7c478bd9Sstevel@tonic-gate */ 2213*7c478bd9Sstevel@tonic-gate if (info != NULL) 2214*7c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 2215*7c478bd9Sstevel@tonic-gate } 2216*7c478bd9Sstevel@tonic-gate 2217*7c478bd9Sstevel@tonic-gate /* 2218*7c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 2 file handle. 2219*7c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it 2220*7c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle 2221*7c478bd9Sstevel@tonic-gate * already exists, return it. 2222*7c478bd9Sstevel@tonic-gate * 2223*7c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive. 2224*7c478bd9Sstevel@tonic-gate */ 2225*7c478bd9Sstevel@tonic-gate vnode_t * 2226*7c478bd9Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp, 2227*7c478bd9Sstevel@tonic-gate hrtime_t t, cred_t *cr, char *dnm, char *nm) 2228*7c478bd9Sstevel@tonic-gate { 2229*7c478bd9Sstevel@tonic-gate int newnode; 2230*7c478bd9Sstevel@tonic-gate int index; 2231*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2232*7c478bd9Sstevel@tonic-gate nfs_fhandle nfh; 2233*7c478bd9Sstevel@tonic-gate vattr_t va; 2234*7c478bd9Sstevel@tonic-gate 2235*7c478bd9Sstevel@tonic-gate nfh.fh_len = NFS_FHSIZE; 2236*7c478bd9Sstevel@tonic-gate bcopy(fh, nfh.fh_buf, NFS_FHSIZE); 2237*7c478bd9Sstevel@tonic-gate 2238*7c478bd9Sstevel@tonic-gate index = rtablehash(&nfh); 2239*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 2240*7c478bd9Sstevel@tonic-gate 2241*7c478bd9Sstevel@tonic-gate vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops, 2242*7c478bd9Sstevel@tonic-gate nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm); 2243*7c478bd9Sstevel@tonic-gate 2244*7c478bd9Sstevel@tonic-gate if (attr != NULL) { 2245*7c478bd9Sstevel@tonic-gate if (!newnode) { 2246*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2247*7c478bd9Sstevel@tonic-gate (void) nfs_cache_fattr(vp, attr, &va, t, cr); 2248*7c478bd9Sstevel@tonic-gate } else { 2249*7c478bd9Sstevel@tonic-gate if (attr->na_type < NFNON || attr->na_type > NFSOC) 2250*7c478bd9Sstevel@tonic-gate vp->v_type = VBAD; 2251*7c478bd9Sstevel@tonic-gate else 2252*7c478bd9Sstevel@tonic-gate vp->v_type = n2v_type(attr); 2253*7c478bd9Sstevel@tonic-gate /* 2254*7c478bd9Sstevel@tonic-gate * A translation here seems to be necessary 2255*7c478bd9Sstevel@tonic-gate * because this function can be called 2256*7c478bd9Sstevel@tonic-gate * with `attr' that has come from the wire, 2257*7c478bd9Sstevel@tonic-gate * and been operated on by vattr_to_nattr(). 2258*7c478bd9Sstevel@tonic-gate * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr() 2259*7c478bd9Sstevel@tonic-gate * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr() 2260*7c478bd9Sstevel@tonic-gate * ->makenfsnode(). 2261*7c478bd9Sstevel@tonic-gate */ 2262*7c478bd9Sstevel@tonic-gate if ((attr->na_rdev & 0xffff0000) == 0) 2263*7c478bd9Sstevel@tonic-gate vp->v_rdev = nfsv2_expdev(attr->na_rdev); 2264*7c478bd9Sstevel@tonic-gate else 2265*7c478bd9Sstevel@tonic-gate vp->v_rdev = expldev(n2v_rdev(attr)); 2266*7c478bd9Sstevel@tonic-gate nfs_attrcache(vp, attr, t); 2267*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2268*7c478bd9Sstevel@tonic-gate } 2269*7c478bd9Sstevel@tonic-gate } else { 2270*7c478bd9Sstevel@tonic-gate if (newnode) { 2271*7c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 2272*7c478bd9Sstevel@tonic-gate } 2273*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2274*7c478bd9Sstevel@tonic-gate } 2275*7c478bd9Sstevel@tonic-gate 2276*7c478bd9Sstevel@tonic-gate return (vp); 2277*7c478bd9Sstevel@tonic-gate } 2278*7c478bd9Sstevel@tonic-gate 2279*7c478bd9Sstevel@tonic-gate /* 2280*7c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 3 file handle. 2281*7c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it 2282*7c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle 2283*7c478bd9Sstevel@tonic-gate * already exists, return it. 2284*7c478bd9Sstevel@tonic-gate * 2285*7c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive. 2286*7c478bd9Sstevel@tonic-gate */ 2287*7c478bd9Sstevel@tonic-gate vnode_t * 2288*7c478bd9Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t, 2289*7c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm) 2290*7c478bd9Sstevel@tonic-gate { 2291*7c478bd9Sstevel@tonic-gate int newnode; 2292*7c478bd9Sstevel@tonic-gate int index; 2293*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2294*7c478bd9Sstevel@tonic-gate 2295*7c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh); 2296*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 2297*7c478bd9Sstevel@tonic-gate 2298*7c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp, 2299*7c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr, 2300*7c478bd9Sstevel@tonic-gate dnm, nm); 2301*7c478bd9Sstevel@tonic-gate 2302*7c478bd9Sstevel@tonic-gate if (vap == NULL) { 2303*7c478bd9Sstevel@tonic-gate if (newnode) { 2304*7c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 2305*7c478bd9Sstevel@tonic-gate } 2306*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2307*7c478bd9Sstevel@tonic-gate return (vp); 2308*7c478bd9Sstevel@tonic-gate } 2309*7c478bd9Sstevel@tonic-gate 2310*7c478bd9Sstevel@tonic-gate if (!newnode) { 2311*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2312*7c478bd9Sstevel@tonic-gate nfs_attr_cache(vp, vap, t, cr); 2313*7c478bd9Sstevel@tonic-gate } else { 2314*7c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 2315*7c478bd9Sstevel@tonic-gate 2316*7c478bd9Sstevel@tonic-gate vp->v_type = vap->va_type; 2317*7c478bd9Sstevel@tonic-gate vp->v_rdev = vap->va_rdev; 2318*7c478bd9Sstevel@tonic-gate 2319*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2320*7c478bd9Sstevel@tonic-gate if (rp->r_mtime <= t) 2321*7c478bd9Sstevel@tonic-gate nfs_attrcache_va(vp, vap); 2322*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2323*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2324*7c478bd9Sstevel@tonic-gate } 2325*7c478bd9Sstevel@tonic-gate 2326*7c478bd9Sstevel@tonic-gate return (vp); 2327*7c478bd9Sstevel@tonic-gate } 2328*7c478bd9Sstevel@tonic-gate 2329*7c478bd9Sstevel@tonic-gate vnode_t * 2330*7c478bd9Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t, 2331*7c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm) 2332*7c478bd9Sstevel@tonic-gate { 2333*7c478bd9Sstevel@tonic-gate int newnode; 2334*7c478bd9Sstevel@tonic-gate int index; 2335*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2336*7c478bd9Sstevel@tonic-gate vattr_t va; 2337*7c478bd9Sstevel@tonic-gate 2338*7c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh); 2339*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 2340*7c478bd9Sstevel@tonic-gate 2341*7c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp, 2342*7c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr, 2343*7c478bd9Sstevel@tonic-gate dnm, nm); 2344*7c478bd9Sstevel@tonic-gate 2345*7c478bd9Sstevel@tonic-gate if (attr == NULL) { 2346*7c478bd9Sstevel@tonic-gate if (newnode) { 2347*7c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 2348*7c478bd9Sstevel@tonic-gate } 2349*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2350*7c478bd9Sstevel@tonic-gate return (vp); 2351*7c478bd9Sstevel@tonic-gate } 2352*7c478bd9Sstevel@tonic-gate 2353*7c478bd9Sstevel@tonic-gate if (!newnode) { 2354*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2355*7c478bd9Sstevel@tonic-gate (void) nfs3_cache_fattr3(vp, attr, &va, t, cr); 2356*7c478bd9Sstevel@tonic-gate } else { 2357*7c478bd9Sstevel@tonic-gate if (attr->type < NF3REG || attr->type > NF3FIFO) 2358*7c478bd9Sstevel@tonic-gate vp->v_type = VBAD; 2359*7c478bd9Sstevel@tonic-gate else 2360*7c478bd9Sstevel@tonic-gate vp->v_type = nf3_to_vt[attr->type]; 2361*7c478bd9Sstevel@tonic-gate vp->v_rdev = makedevice(attr->rdev.specdata1, 2362*7c478bd9Sstevel@tonic-gate attr->rdev.specdata2); 2363*7c478bd9Sstevel@tonic-gate nfs3_attrcache(vp, attr, t); 2364*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2365*7c478bd9Sstevel@tonic-gate } 2366*7c478bd9Sstevel@tonic-gate 2367*7c478bd9Sstevel@tonic-gate return (vp); 2368*7c478bd9Sstevel@tonic-gate } 2369*7c478bd9Sstevel@tonic-gate 2370*7c478bd9Sstevel@tonic-gate /* 2371*7c478bd9Sstevel@tonic-gate * Read this comment before making changes to rtablehash()! 2372*7c478bd9Sstevel@tonic-gate * This is a hash function in which seemingly obvious and harmless 2373*7c478bd9Sstevel@tonic-gate * changes can cause escalations costing million dollars! 2374*7c478bd9Sstevel@tonic-gate * Know what you are doing. 2375*7c478bd9Sstevel@tonic-gate * 2376*7c478bd9Sstevel@tonic-gate * rtablehash() implements Jenkins' one-at-a-time hash algorithm. The 2377*7c478bd9Sstevel@tonic-gate * algorithm is currently detailed here: 2378*7c478bd9Sstevel@tonic-gate * 2379*7c478bd9Sstevel@tonic-gate * http://burtleburtle.net/bob/hash/doobs.html 2380*7c478bd9Sstevel@tonic-gate * 2381*7c478bd9Sstevel@tonic-gate * Of course, the above link may not be valid by the time you are reading 2382*7c478bd9Sstevel@tonic-gate * this, but suffice it to say that the one-at-a-time algorithm works well in 2383*7c478bd9Sstevel@tonic-gate * almost all cases. If you are changing the algorithm be sure to verify that 2384*7c478bd9Sstevel@tonic-gate * the hash algorithm still provides even distribution in all cases and with 2385*7c478bd9Sstevel@tonic-gate * any server returning filehandles in whatever order (sequential or random). 2386*7c478bd9Sstevel@tonic-gate */ 2387*7c478bd9Sstevel@tonic-gate static int 2388*7c478bd9Sstevel@tonic-gate rtablehash(nfs_fhandle *fh) 2389*7c478bd9Sstevel@tonic-gate { 2390*7c478bd9Sstevel@tonic-gate ulong_t hash, len, i; 2391*7c478bd9Sstevel@tonic-gate char *key; 2392*7c478bd9Sstevel@tonic-gate 2393*7c478bd9Sstevel@tonic-gate key = fh->fh_buf; 2394*7c478bd9Sstevel@tonic-gate len = (ulong_t)fh->fh_len; 2395*7c478bd9Sstevel@tonic-gate for (hash = 0, i = 0; i < len; i++) { 2396*7c478bd9Sstevel@tonic-gate hash += key[i]; 2397*7c478bd9Sstevel@tonic-gate hash += (hash << 10); 2398*7c478bd9Sstevel@tonic-gate hash ^= (hash >> 6); 2399*7c478bd9Sstevel@tonic-gate } 2400*7c478bd9Sstevel@tonic-gate hash += (hash << 3); 2401*7c478bd9Sstevel@tonic-gate hash ^= (hash >> 11); 2402*7c478bd9Sstevel@tonic-gate hash += (hash << 15); 2403*7c478bd9Sstevel@tonic-gate return (hash & rtablemask); 2404*7c478bd9Sstevel@tonic-gate } 2405*7c478bd9Sstevel@tonic-gate 2406*7c478bd9Sstevel@tonic-gate static vnode_t * 2407*7c478bd9Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp, 2408*7c478bd9Sstevel@tonic-gate struct vnodeops *vops, 2409*7c478bd9Sstevel@tonic-gate int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *), 2410*7c478bd9Sstevel@tonic-gate int (*compar)(const void *, const void *), 2411*7c478bd9Sstevel@tonic-gate int *newnode, cred_t *cr, char *dnm, char *nm) 2412*7c478bd9Sstevel@tonic-gate { 2413*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2414*7c478bd9Sstevel@tonic-gate rnode_t *trp; 2415*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2416*7c478bd9Sstevel@tonic-gate mntinfo_t *mi; 2417*7c478bd9Sstevel@tonic-gate 2418*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&rhtp->r_lock)); 2419*7c478bd9Sstevel@tonic-gate 2420*7c478bd9Sstevel@tonic-gate mi = VFTOMI(vfsp); 2421*7c478bd9Sstevel@tonic-gate start: 2422*7c478bd9Sstevel@tonic-gate if ((rp = rfind(rhtp, fh, vfsp)) != NULL) { 2423*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2424*7c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 2425*7c478bd9Sstevel@tonic-gate *newnode = 0; 2426*7c478bd9Sstevel@tonic-gate return (vp); 2427*7c478bd9Sstevel@tonic-gate } 2428*7c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock); 2429*7c478bd9Sstevel@tonic-gate 2430*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 2431*7c478bd9Sstevel@tonic-gate if (rpfreelist != NULL && rnew >= nrnode) { 2432*7c478bd9Sstevel@tonic-gate rp = rpfreelist; 2433*7c478bd9Sstevel@tonic-gate rp_rmfree(rp); 2434*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2435*7c478bd9Sstevel@tonic-gate 2436*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2437*7c478bd9Sstevel@tonic-gate 2438*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 2439*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 2440*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2441*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2442*7c478bd9Sstevel@tonic-gate vp->v_count--; 2443*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2444*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2445*7c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 2446*7c478bd9Sstevel@tonic-gate goto start; 2447*7c478bd9Sstevel@tonic-gate } 2448*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2449*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 2450*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2451*7c478bd9Sstevel@tonic-gate } 2452*7c478bd9Sstevel@tonic-gate 2453*7c478bd9Sstevel@tonic-gate rinactive(rp, cr); 2454*7c478bd9Sstevel@tonic-gate 2455*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2456*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2457*7c478bd9Sstevel@tonic-gate vp->v_count--; 2458*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2459*7c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 2460*7c478bd9Sstevel@tonic-gate goto start; 2461*7c478bd9Sstevel@tonic-gate } 2462*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2463*7c478bd9Sstevel@tonic-gate vn_invalid(vp); 2464*7c478bd9Sstevel@tonic-gate /* 2465*7c478bd9Sstevel@tonic-gate * destroy old locks before bzero'ing and 2466*7c478bd9Sstevel@tonic-gate * recreating the locks below. 2467*7c478bd9Sstevel@tonic-gate */ 2468*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock); 2469*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock); 2470*7c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock); 2471*7c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv); 2472*7c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv); 2473*7c478bd9Sstevel@tonic-gate nfs_free_r_path(rp); 2474*7c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir); 2475*7c478bd9Sstevel@tonic-gate /* 2476*7c478bd9Sstevel@tonic-gate * Make sure that if rnode is recycled then 2477*7c478bd9Sstevel@tonic-gate * VFS count is decremented properly before 2478*7c478bd9Sstevel@tonic-gate * reuse. 2479*7c478bd9Sstevel@tonic-gate */ 2480*7c478bd9Sstevel@tonic-gate VFS_RELE(vp->v_vfsp); 2481*7c478bd9Sstevel@tonic-gate vn_reinit(vp); 2482*7c478bd9Sstevel@tonic-gate } else { 2483*7c478bd9Sstevel@tonic-gate vnode_t *new_vp; 2484*7c478bd9Sstevel@tonic-gate 2485*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2486*7c478bd9Sstevel@tonic-gate 2487*7c478bd9Sstevel@tonic-gate rp = kmem_cache_alloc(rnode_cache, KM_SLEEP); 2488*7c478bd9Sstevel@tonic-gate new_vp = vn_alloc(KM_SLEEP); 2489*7c478bd9Sstevel@tonic-gate 2490*7c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)&rnew, 1); 2491*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2492*7c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64++; 2493*7c478bd9Sstevel@tonic-gate #endif 2494*7c478bd9Sstevel@tonic-gate vp = new_vp; 2495*7c478bd9Sstevel@tonic-gate } 2496*7c478bd9Sstevel@tonic-gate 2497*7c478bd9Sstevel@tonic-gate bzero(rp, sizeof (*rp)); 2498*7c478bd9Sstevel@tonic-gate rp->r_vnode = vp; 2499*7c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL); 2500*7c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL); 2501*7c478bd9Sstevel@tonic-gate mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL); 2502*7c478bd9Sstevel@tonic-gate cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL); 2503*7c478bd9Sstevel@tonic-gate cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL); 2504*7c478bd9Sstevel@tonic-gate rp->r_fh.fh_len = fh->fh_len; 2505*7c478bd9Sstevel@tonic-gate bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len); 2506*7c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 2507*7c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 2508*7c478bd9Sstevel@tonic-gate /* 2509*7c478bd9Sstevel@tonic-gate * If replicated servers, stash pathnames 2510*7c478bd9Sstevel@tonic-gate */ 2511*7c478bd9Sstevel@tonic-gate if (dnm != NULL && nm != NULL) { 2512*7c478bd9Sstevel@tonic-gate char *s, *p; 2513*7c478bd9Sstevel@tonic-gate uint_t len; 2514*7c478bd9Sstevel@tonic-gate 2515*7c478bd9Sstevel@tonic-gate len = (uint_t)(strlen(dnm) + strlen(nm) + 2); 2516*7c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(len, KM_SLEEP); 2517*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2518*7c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += len; 2519*7c478bd9Sstevel@tonic-gate #endif 2520*7c478bd9Sstevel@tonic-gate s = rp->r_path; 2521*7c478bd9Sstevel@tonic-gate for (p = dnm; *p; p++) 2522*7c478bd9Sstevel@tonic-gate *s++ = *p; 2523*7c478bd9Sstevel@tonic-gate *s++ = '/'; 2524*7c478bd9Sstevel@tonic-gate for (p = nm; *p; p++) 2525*7c478bd9Sstevel@tonic-gate *s++ = *p; 2526*7c478bd9Sstevel@tonic-gate *s = '\0'; 2527*7c478bd9Sstevel@tonic-gate } else { 2528*7c478bd9Sstevel@tonic-gate /* special case for root */ 2529*7c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(2, KM_SLEEP); 2530*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2531*7c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += 2; 2532*7c478bd9Sstevel@tonic-gate #endif 2533*7c478bd9Sstevel@tonic-gate *rp->r_path = '.'; 2534*7c478bd9Sstevel@tonic-gate *(rp->r_path + 1) = '\0'; 2535*7c478bd9Sstevel@tonic-gate } 2536*7c478bd9Sstevel@tonic-gate } 2537*7c478bd9Sstevel@tonic-gate VFS_HOLD(vfsp); 2538*7c478bd9Sstevel@tonic-gate rp->r_putapage = putapage; 2539*7c478bd9Sstevel@tonic-gate rp->r_hashq = rhtp; 2540*7c478bd9Sstevel@tonic-gate rp->r_flags = RREADDIRPLUS; 2541*7c478bd9Sstevel@tonic-gate avl_create(&rp->r_dir, compar, sizeof (rddir_cache), 2542*7c478bd9Sstevel@tonic-gate offsetof(rddir_cache, tree)); 2543*7c478bd9Sstevel@tonic-gate vn_setops(vp, vops); 2544*7c478bd9Sstevel@tonic-gate vp->v_data = (caddr_t)rp; 2545*7c478bd9Sstevel@tonic-gate vp->v_vfsp = vfsp; 2546*7c478bd9Sstevel@tonic-gate vp->v_type = VNON; 2547*7c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 2548*7c478bd9Sstevel@tonic-gate 2549*7c478bd9Sstevel@tonic-gate /* 2550*7c478bd9Sstevel@tonic-gate * There is a race condition if someone else 2551*7c478bd9Sstevel@tonic-gate * alloc's the rnode while no locks are held, so we 2552*7c478bd9Sstevel@tonic-gate * check again and recover if found. 2553*7c478bd9Sstevel@tonic-gate */ 2554*7c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_WRITER); 2555*7c478bd9Sstevel@tonic-gate if ((trp = rfind(rhtp, fh, vfsp)) != NULL) { 2556*7c478bd9Sstevel@tonic-gate vp = RTOV(trp); 2557*7c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 2558*7c478bd9Sstevel@tonic-gate *newnode = 0; 2559*7c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock); 2560*7c478bd9Sstevel@tonic-gate rp_addfree(rp, cr); 2561*7c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 2562*7c478bd9Sstevel@tonic-gate return (vp); 2563*7c478bd9Sstevel@tonic-gate } 2564*7c478bd9Sstevel@tonic-gate rp_addhash(rp); 2565*7c478bd9Sstevel@tonic-gate *newnode = 1; 2566*7c478bd9Sstevel@tonic-gate return (vp); 2567*7c478bd9Sstevel@tonic-gate } 2568*7c478bd9Sstevel@tonic-gate 2569*7c478bd9Sstevel@tonic-gate static void 2570*7c478bd9Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp) 2571*7c478bd9Sstevel@tonic-gate { 2572*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2573*7c478bd9Sstevel@tonic-gate nfs_fhandle *rootfh; 2574*7c478bd9Sstevel@tonic-gate 2575*7c478bd9Sstevel@tonic-gate rp = VTOR(vp); 2576*7c478bd9Sstevel@tonic-gate rootfh = &rp->r_server->sv_fhandle; 2577*7c478bd9Sstevel@tonic-gate if (rootfh->fh_len == rp->r_fh.fh_len && 2578*7c478bd9Sstevel@tonic-gate bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) { 2579*7c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) { 2580*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2581*7c478bd9Sstevel@tonic-gate vp->v_flag |= VROOT; 2582*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2583*7c478bd9Sstevel@tonic-gate } 2584*7c478bd9Sstevel@tonic-gate } 2585*7c478bd9Sstevel@tonic-gate } 2586*7c478bd9Sstevel@tonic-gate 2587*7c478bd9Sstevel@tonic-gate static void 2588*7c478bd9Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp) 2589*7c478bd9Sstevel@tonic-gate { 2590*7c478bd9Sstevel@tonic-gate char *path; 2591*7c478bd9Sstevel@tonic-gate size_t len; 2592*7c478bd9Sstevel@tonic-gate 2593*7c478bd9Sstevel@tonic-gate path = rp->r_path; 2594*7c478bd9Sstevel@tonic-gate if (path) { 2595*7c478bd9Sstevel@tonic-gate rp->r_path = NULL; 2596*7c478bd9Sstevel@tonic-gate len = strlen(path) + 1; 2597*7c478bd9Sstevel@tonic-gate kmem_free(path, len); 2598*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2599*7c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 -= len; 2600*7c478bd9Sstevel@tonic-gate #endif 2601*7c478bd9Sstevel@tonic-gate } 2602*7c478bd9Sstevel@tonic-gate } 2603*7c478bd9Sstevel@tonic-gate 2604*7c478bd9Sstevel@tonic-gate /* 2605*7c478bd9Sstevel@tonic-gate * Put an rnode on the free list. 2606*7c478bd9Sstevel@tonic-gate * 2607*7c478bd9Sstevel@tonic-gate * Rnodes which were allocated above and beyond the normal limit 2608*7c478bd9Sstevel@tonic-gate * are immediately freed. 2609*7c478bd9Sstevel@tonic-gate */ 2610*7c478bd9Sstevel@tonic-gate void 2611*7c478bd9Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr) 2612*7c478bd9Sstevel@tonic-gate { 2613*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2614*7c478bd9Sstevel@tonic-gate struct vfs *vfsp; 2615*7c478bd9Sstevel@tonic-gate 2616*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2617*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_count >= 1); 2618*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL); 2619*7c478bd9Sstevel@tonic-gate 2620*7c478bd9Sstevel@tonic-gate /* 2621*7c478bd9Sstevel@tonic-gate * If we have too many rnodes allocated and there are no 2622*7c478bd9Sstevel@tonic-gate * references to this rnode, or if the rnode is no longer 2623*7c478bd9Sstevel@tonic-gate * accessible by it does not reside in the hash queues, 2624*7c478bd9Sstevel@tonic-gate * or if an i/o error occurred while writing to the file, 2625*7c478bd9Sstevel@tonic-gate * then just free it instead of putting it on the rnode 2626*7c478bd9Sstevel@tonic-gate * freelist. 2627*7c478bd9Sstevel@tonic-gate */ 2628*7c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp; 2629*7c478bd9Sstevel@tonic-gate if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error || 2630*7c478bd9Sstevel@tonic-gate (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) { 2631*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 2632*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 2633*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2634*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2635*7c478bd9Sstevel@tonic-gate vp->v_count--; 2636*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2637*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2638*7c478bd9Sstevel@tonic-gate return; 2639*7c478bd9Sstevel@tonic-gate } 2640*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2641*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 2642*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2643*7c478bd9Sstevel@tonic-gate } 2644*7c478bd9Sstevel@tonic-gate 2645*7c478bd9Sstevel@tonic-gate rinactive(rp, cr); 2646*7c478bd9Sstevel@tonic-gate 2647*7c478bd9Sstevel@tonic-gate /* 2648*7c478bd9Sstevel@tonic-gate * Recheck the vnode reference count. We need to 2649*7c478bd9Sstevel@tonic-gate * make sure that another reference has not been 2650*7c478bd9Sstevel@tonic-gate * acquired while we were not holding v_lock. The 2651*7c478bd9Sstevel@tonic-gate * rnode is not in the rnode hash queues, so the 2652*7c478bd9Sstevel@tonic-gate * only way for a reference to have been acquired 2653*7c478bd9Sstevel@tonic-gate * is for a VOP_PUTPAGE because the rnode was marked 2654*7c478bd9Sstevel@tonic-gate * with RDIRTY or for a modified page. This 2655*7c478bd9Sstevel@tonic-gate * reference may have been acquired before our call 2656*7c478bd9Sstevel@tonic-gate * to rinactive. The i/o may have been completed, 2657*7c478bd9Sstevel@tonic-gate * thus allowing rinactive to complete, but the 2658*7c478bd9Sstevel@tonic-gate * reference to the vnode may not have been released 2659*7c478bd9Sstevel@tonic-gate * yet. In any case, the rnode can not be destroyed 2660*7c478bd9Sstevel@tonic-gate * until the other references to this vnode have been 2661*7c478bd9Sstevel@tonic-gate * released. The other references will take care of 2662*7c478bd9Sstevel@tonic-gate * either destroying the rnode or placing it on the 2663*7c478bd9Sstevel@tonic-gate * rnode freelist. If there are no other references, 2664*7c478bd9Sstevel@tonic-gate * then the rnode may be safely destroyed. 2665*7c478bd9Sstevel@tonic-gate */ 2666*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2667*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2668*7c478bd9Sstevel@tonic-gate vp->v_count--; 2669*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2670*7c478bd9Sstevel@tonic-gate return; 2671*7c478bd9Sstevel@tonic-gate } 2672*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2673*7c478bd9Sstevel@tonic-gate 2674*7c478bd9Sstevel@tonic-gate destroy_rnode(rp); 2675*7c478bd9Sstevel@tonic-gate return; 2676*7c478bd9Sstevel@tonic-gate } 2677*7c478bd9Sstevel@tonic-gate 2678*7c478bd9Sstevel@tonic-gate /* 2679*7c478bd9Sstevel@tonic-gate * Lock the hash queue and then recheck the reference count 2680*7c478bd9Sstevel@tonic-gate * to ensure that no other threads have acquired a reference 2681*7c478bd9Sstevel@tonic-gate * to indicate that the rnode should not be placed on the 2682*7c478bd9Sstevel@tonic-gate * freelist. If another reference has been acquired, then 2683*7c478bd9Sstevel@tonic-gate * just release this one and let the other thread complete 2684*7c478bd9Sstevel@tonic-gate * the processing of adding this rnode to the freelist. 2685*7c478bd9Sstevel@tonic-gate */ 2686*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 2687*7c478bd9Sstevel@tonic-gate 2688*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2689*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2690*7c478bd9Sstevel@tonic-gate vp->v_count--; 2691*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2692*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2693*7c478bd9Sstevel@tonic-gate return; 2694*7c478bd9Sstevel@tonic-gate } 2695*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2696*7c478bd9Sstevel@tonic-gate 2697*7c478bd9Sstevel@tonic-gate /* 2698*7c478bd9Sstevel@tonic-gate * If there is no cached data or metadata for this file, then 2699*7c478bd9Sstevel@tonic-gate * put the rnode on the front of the freelist so that it will 2700*7c478bd9Sstevel@tonic-gate * be reused before other rnodes which may have cached data or 2701*7c478bd9Sstevel@tonic-gate * metadata associated with them. 2702*7c478bd9Sstevel@tonic-gate */ 2703*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 2704*7c478bd9Sstevel@tonic-gate if (rpfreelist == NULL) { 2705*7c478bd9Sstevel@tonic-gate rp->r_freef = rp; 2706*7c478bd9Sstevel@tonic-gate rp->r_freeb = rp; 2707*7c478bd9Sstevel@tonic-gate rpfreelist = rp; 2708*7c478bd9Sstevel@tonic-gate } else { 2709*7c478bd9Sstevel@tonic-gate rp->r_freef = rpfreelist; 2710*7c478bd9Sstevel@tonic-gate rp->r_freeb = rpfreelist->r_freeb; 2711*7c478bd9Sstevel@tonic-gate rpfreelist->r_freeb->r_freef = rp; 2712*7c478bd9Sstevel@tonic-gate rpfreelist->r_freeb = rp; 2713*7c478bd9Sstevel@tonic-gate if (!vn_has_cached_data(vp) && 2714*7c478bd9Sstevel@tonic-gate !HAVE_RDDIR_CACHE(rp) && 2715*7c478bd9Sstevel@tonic-gate rp->r_symlink.contents == NULL && 2716*7c478bd9Sstevel@tonic-gate rp->r_secattr == NULL && 2717*7c478bd9Sstevel@tonic-gate rp->r_pathconf == NULL) 2718*7c478bd9Sstevel@tonic-gate rpfreelist = rp; 2719*7c478bd9Sstevel@tonic-gate } 2720*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2721*7c478bd9Sstevel@tonic-gate 2722*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2723*7c478bd9Sstevel@tonic-gate } 2724*7c478bd9Sstevel@tonic-gate 2725*7c478bd9Sstevel@tonic-gate /* 2726*7c478bd9Sstevel@tonic-gate * Remove an rnode from the free list. 2727*7c478bd9Sstevel@tonic-gate * 2728*7c478bd9Sstevel@tonic-gate * The caller must be holding rpfreelist_lock and the rnode 2729*7c478bd9Sstevel@tonic-gate * must be on the freelist. 2730*7c478bd9Sstevel@tonic-gate */ 2731*7c478bd9Sstevel@tonic-gate static void 2732*7c478bd9Sstevel@tonic-gate rp_rmfree(rnode_t *rp) 2733*7c478bd9Sstevel@tonic-gate { 2734*7c478bd9Sstevel@tonic-gate 2735*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rpfreelist_lock)); 2736*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL); 2737*7c478bd9Sstevel@tonic-gate 2738*7c478bd9Sstevel@tonic-gate if (rp == rpfreelist) { 2739*7c478bd9Sstevel@tonic-gate rpfreelist = rp->r_freef; 2740*7c478bd9Sstevel@tonic-gate if (rp == rpfreelist) 2741*7c478bd9Sstevel@tonic-gate rpfreelist = NULL; 2742*7c478bd9Sstevel@tonic-gate } 2743*7c478bd9Sstevel@tonic-gate 2744*7c478bd9Sstevel@tonic-gate rp->r_freeb->r_freef = rp->r_freef; 2745*7c478bd9Sstevel@tonic-gate rp->r_freef->r_freeb = rp->r_freeb; 2746*7c478bd9Sstevel@tonic-gate 2747*7c478bd9Sstevel@tonic-gate rp->r_freef = rp->r_freeb = NULL; 2748*7c478bd9Sstevel@tonic-gate } 2749*7c478bd9Sstevel@tonic-gate 2750*7c478bd9Sstevel@tonic-gate /* 2751*7c478bd9Sstevel@tonic-gate * Put a rnode in the hash table. 2752*7c478bd9Sstevel@tonic-gate * 2753*7c478bd9Sstevel@tonic-gate * The caller must be holding the exclusive hash queue lock. 2754*7c478bd9Sstevel@tonic-gate */ 2755*7c478bd9Sstevel@tonic-gate static void 2756*7c478bd9Sstevel@tonic-gate rp_addhash(rnode_t *rp) 2757*7c478bd9Sstevel@tonic-gate { 2758*7c478bd9Sstevel@tonic-gate 2759*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); 2760*7c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED)); 2761*7c478bd9Sstevel@tonic-gate 2762*7c478bd9Sstevel@tonic-gate rp->r_hashf = rp->r_hashq->r_hashf; 2763*7c478bd9Sstevel@tonic-gate rp->r_hashq->r_hashf = rp; 2764*7c478bd9Sstevel@tonic-gate rp->r_hashb = (rnode_t *)rp->r_hashq; 2765*7c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp; 2766*7c478bd9Sstevel@tonic-gate 2767*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2768*7c478bd9Sstevel@tonic-gate rp->r_flags |= RHASHED; 2769*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2770*7c478bd9Sstevel@tonic-gate } 2771*7c478bd9Sstevel@tonic-gate 2772*7c478bd9Sstevel@tonic-gate /* 2773*7c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table. 2774*7c478bd9Sstevel@tonic-gate * 2775*7c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock. 2776*7c478bd9Sstevel@tonic-gate */ 2777*7c478bd9Sstevel@tonic-gate static void 2778*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp) 2779*7c478bd9Sstevel@tonic-gate { 2780*7c478bd9Sstevel@tonic-gate 2781*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); 2782*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RHASHED); 2783*7c478bd9Sstevel@tonic-gate 2784*7c478bd9Sstevel@tonic-gate rp->r_hashb->r_hashf = rp->r_hashf; 2785*7c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp->r_hashb; 2786*7c478bd9Sstevel@tonic-gate 2787*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2788*7c478bd9Sstevel@tonic-gate rp->r_flags &= ~RHASHED; 2789*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2790*7c478bd9Sstevel@tonic-gate } 2791*7c478bd9Sstevel@tonic-gate 2792*7c478bd9Sstevel@tonic-gate /* 2793*7c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table. 2794*7c478bd9Sstevel@tonic-gate * 2795*7c478bd9Sstevel@tonic-gate * The caller must not be holding the hash queue lock. 2796*7c478bd9Sstevel@tonic-gate */ 2797*7c478bd9Sstevel@tonic-gate void 2798*7c478bd9Sstevel@tonic-gate rp_rmhash(rnode_t *rp) 2799*7c478bd9Sstevel@tonic-gate { 2800*7c478bd9Sstevel@tonic-gate 2801*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 2802*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 2803*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 2804*7c478bd9Sstevel@tonic-gate } 2805*7c478bd9Sstevel@tonic-gate 2806*7c478bd9Sstevel@tonic-gate /* 2807*7c478bd9Sstevel@tonic-gate * Lookup a rnode by fhandle. 2808*7c478bd9Sstevel@tonic-gate * 2809*7c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock, either shared or exclusive. 2810*7c478bd9Sstevel@tonic-gate */ 2811*7c478bd9Sstevel@tonic-gate static rnode_t * 2812*7c478bd9Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp) 2813*7c478bd9Sstevel@tonic-gate { 2814*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2815*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2816*7c478bd9Sstevel@tonic-gate 2817*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&rhtp->r_lock)); 2818*7c478bd9Sstevel@tonic-gate 2819*7c478bd9Sstevel@tonic-gate for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) { 2820*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2821*7c478bd9Sstevel@tonic-gate if (vp->v_vfsp == vfsp && 2822*7c478bd9Sstevel@tonic-gate rp->r_fh.fh_len == fh->fh_len && 2823*7c478bd9Sstevel@tonic-gate bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) { 2824*7c478bd9Sstevel@tonic-gate /* 2825*7c478bd9Sstevel@tonic-gate * remove rnode from free list, if necessary. 2826*7c478bd9Sstevel@tonic-gate */ 2827*7c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) { 2828*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 2829*7c478bd9Sstevel@tonic-gate /* 2830*7c478bd9Sstevel@tonic-gate * If the rnode is on the freelist, 2831*7c478bd9Sstevel@tonic-gate * then remove it and use that reference 2832*7c478bd9Sstevel@tonic-gate * as the new reference. Otherwise, 2833*7c478bd9Sstevel@tonic-gate * need to increment the reference count. 2834*7c478bd9Sstevel@tonic-gate */ 2835*7c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) { 2836*7c478bd9Sstevel@tonic-gate rp_rmfree(rp); 2837*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2838*7c478bd9Sstevel@tonic-gate } else { 2839*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2840*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 2841*7c478bd9Sstevel@tonic-gate } 2842*7c478bd9Sstevel@tonic-gate } else 2843*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 2844*7c478bd9Sstevel@tonic-gate return (rp); 2845*7c478bd9Sstevel@tonic-gate } 2846*7c478bd9Sstevel@tonic-gate } 2847*7c478bd9Sstevel@tonic-gate return (NULL); 2848*7c478bd9Sstevel@tonic-gate } 2849*7c478bd9Sstevel@tonic-gate 2850*7c478bd9Sstevel@tonic-gate /* 2851*7c478bd9Sstevel@tonic-gate * Return 1 if there is a active vnode belonging to this vfs in the 2852*7c478bd9Sstevel@tonic-gate * rtable cache. 2853*7c478bd9Sstevel@tonic-gate * 2854*7c478bd9Sstevel@tonic-gate * Several of these checks are done without holding the usual 2855*7c478bd9Sstevel@tonic-gate * locks. This is safe because destroy_rtable(), rp_addfree(), 2856*7c478bd9Sstevel@tonic-gate * etc. will redo the necessary checks before actually destroying 2857*7c478bd9Sstevel@tonic-gate * any rnodes. 2858*7c478bd9Sstevel@tonic-gate */ 2859*7c478bd9Sstevel@tonic-gate int 2860*7c478bd9Sstevel@tonic-gate check_rtable(struct vfs *vfsp) 2861*7c478bd9Sstevel@tonic-gate { 2862*7c478bd9Sstevel@tonic-gate int index; 2863*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2864*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2865*7c478bd9Sstevel@tonic-gate 2866*7c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 2867*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 2868*7c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 2869*7c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 2870*7c478bd9Sstevel@tonic-gate rp = rp->r_hashf) { 2871*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2872*7c478bd9Sstevel@tonic-gate if (vp->v_vfsp == vfsp) { 2873*7c478bd9Sstevel@tonic-gate if (rp->r_freef == NULL || 2874*7c478bd9Sstevel@tonic-gate (vn_has_cached_data(vp) && 2875*7c478bd9Sstevel@tonic-gate (rp->r_flags & RDIRTY)) || 2876*7c478bd9Sstevel@tonic-gate rp->r_count > 0) { 2877*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2878*7c478bd9Sstevel@tonic-gate return (1); 2879*7c478bd9Sstevel@tonic-gate } 2880*7c478bd9Sstevel@tonic-gate } 2881*7c478bd9Sstevel@tonic-gate } 2882*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2883*7c478bd9Sstevel@tonic-gate } 2884*7c478bd9Sstevel@tonic-gate return (0); 2885*7c478bd9Sstevel@tonic-gate } 2886*7c478bd9Sstevel@tonic-gate 2887*7c478bd9Sstevel@tonic-gate /* 2888*7c478bd9Sstevel@tonic-gate * Destroy inactive vnodes from the hash queues which belong to this 2889*7c478bd9Sstevel@tonic-gate * vfs. It is essential that we destroy all inactive vnodes during a 2890*7c478bd9Sstevel@tonic-gate * forced unmount as well as during a normal unmount. 2891*7c478bd9Sstevel@tonic-gate */ 2892*7c478bd9Sstevel@tonic-gate void 2893*7c478bd9Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr) 2894*7c478bd9Sstevel@tonic-gate { 2895*7c478bd9Sstevel@tonic-gate int index; 2896*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2897*7c478bd9Sstevel@tonic-gate rnode_t *rlist; 2898*7c478bd9Sstevel@tonic-gate rnode_t *r_hashf; 2899*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2900*7c478bd9Sstevel@tonic-gate 2901*7c478bd9Sstevel@tonic-gate rlist = NULL; 2902*7c478bd9Sstevel@tonic-gate 2903*7c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 2904*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_WRITER); 2905*7c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 2906*7c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 2907*7c478bd9Sstevel@tonic-gate rp = r_hashf) { 2908*7c478bd9Sstevel@tonic-gate /* save the hash pointer before destroying */ 2909*7c478bd9Sstevel@tonic-gate r_hashf = rp->r_hashf; 2910*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2911*7c478bd9Sstevel@tonic-gate if (vp->v_vfsp == vfsp) { 2912*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 2913*7c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) { 2914*7c478bd9Sstevel@tonic-gate rp_rmfree(rp); 2915*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2916*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 2917*7c478bd9Sstevel@tonic-gate rp->r_hashf = rlist; 2918*7c478bd9Sstevel@tonic-gate rlist = rp; 2919*7c478bd9Sstevel@tonic-gate } else 2920*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 2921*7c478bd9Sstevel@tonic-gate } 2922*7c478bd9Sstevel@tonic-gate } 2923*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 2924*7c478bd9Sstevel@tonic-gate } 2925*7c478bd9Sstevel@tonic-gate 2926*7c478bd9Sstevel@tonic-gate for (rp = rlist; rp != NULL; rp = rlist) { 2927*7c478bd9Sstevel@tonic-gate rlist = rp->r_hashf; 2928*7c478bd9Sstevel@tonic-gate /* 2929*7c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the 2930*7c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set 2931*7c478bd9Sstevel@tonic-gate * of checks done. 2932*7c478bd9Sstevel@tonic-gate */ 2933*7c478bd9Sstevel@tonic-gate rp_addfree(rp, cr); 2934*7c478bd9Sstevel@tonic-gate } 2935*7c478bd9Sstevel@tonic-gate 2936*7c478bd9Sstevel@tonic-gate } 2937*7c478bd9Sstevel@tonic-gate 2938*7c478bd9Sstevel@tonic-gate /* 2939*7c478bd9Sstevel@tonic-gate * This routine destroys all the resources associated with the rnode 2940*7c478bd9Sstevel@tonic-gate * and then the rnode itself. 2941*7c478bd9Sstevel@tonic-gate */ 2942*7c478bd9Sstevel@tonic-gate static void 2943*7c478bd9Sstevel@tonic-gate destroy_rnode(rnode_t *rp) 2944*7c478bd9Sstevel@tonic-gate { 2945*7c478bd9Sstevel@tonic-gate vnode_t *vp; 2946*7c478bd9Sstevel@tonic-gate vfs_t *vfsp; 2947*7c478bd9Sstevel@tonic-gate 2948*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 2949*7c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp; 2950*7c478bd9Sstevel@tonic-gate 2951*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_count == 1); 2952*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_count == 0); 2953*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_lmpl == NULL); 2954*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_mapcnt == 0); 2955*7c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED)); 2956*7c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL); 2957*7c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)&rnew, -1); 2958*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2959*7c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64--; 2960*7c478bd9Sstevel@tonic-gate #endif 2961*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock); 2962*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock); 2963*7c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock); 2964*7c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv); 2965*7c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv); 2966*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RDELMAPLIST) 2967*7c478bd9Sstevel@tonic-gate list_destroy(&rp->r_indelmap); 2968*7c478bd9Sstevel@tonic-gate nfs_free_r_path(rp); 2969*7c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir); 2970*7c478bd9Sstevel@tonic-gate vn_invalid(vp); 2971*7c478bd9Sstevel@tonic-gate vn_free(vp); 2972*7c478bd9Sstevel@tonic-gate kmem_cache_free(rnode_cache, rp); 2973*7c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); 2974*7c478bd9Sstevel@tonic-gate } 2975*7c478bd9Sstevel@tonic-gate 2976*7c478bd9Sstevel@tonic-gate /* 2977*7c478bd9Sstevel@tonic-gate * Flush all vnodes in this (or every) vfs. 2978*7c478bd9Sstevel@tonic-gate * Used by nfs_sync and by nfs_unmount. 2979*7c478bd9Sstevel@tonic-gate */ 2980*7c478bd9Sstevel@tonic-gate void 2981*7c478bd9Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr) 2982*7c478bd9Sstevel@tonic-gate { 2983*7c478bd9Sstevel@tonic-gate int index; 2984*7c478bd9Sstevel@tonic-gate rnode_t *rp; 2985*7c478bd9Sstevel@tonic-gate vnode_t *vp, **vplist; 2986*7c478bd9Sstevel@tonic-gate long num, cnt; 2987*7c478bd9Sstevel@tonic-gate 2988*7c478bd9Sstevel@tonic-gate /* 2989*7c478bd9Sstevel@tonic-gate * Check to see whether there is anything to do. 2990*7c478bd9Sstevel@tonic-gate */ 2991*7c478bd9Sstevel@tonic-gate num = rnew; 2992*7c478bd9Sstevel@tonic-gate if (num == 0) 2993*7c478bd9Sstevel@tonic-gate return; 2994*7c478bd9Sstevel@tonic-gate 2995*7c478bd9Sstevel@tonic-gate /* 2996*7c478bd9Sstevel@tonic-gate * Allocate a slot for all currently active rnodes on the 2997*7c478bd9Sstevel@tonic-gate * supposition that they all may need flushing. 2998*7c478bd9Sstevel@tonic-gate */ 2999*7c478bd9Sstevel@tonic-gate vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP); 3000*7c478bd9Sstevel@tonic-gate cnt = 0; 3001*7c478bd9Sstevel@tonic-gate 3002*7c478bd9Sstevel@tonic-gate /* 3003*7c478bd9Sstevel@tonic-gate * Walk the hash queues looking for rnodes with page 3004*7c478bd9Sstevel@tonic-gate * lists associated with them. Make a list of these 3005*7c478bd9Sstevel@tonic-gate * files. 3006*7c478bd9Sstevel@tonic-gate */ 3007*7c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 3008*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 3009*7c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 3010*7c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 3011*7c478bd9Sstevel@tonic-gate rp = rp->r_hashf) { 3012*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 3013*7c478bd9Sstevel@tonic-gate /* 3014*7c478bd9Sstevel@tonic-gate * Don't bother sync'ing a vp if it 3015*7c478bd9Sstevel@tonic-gate * is part of virtual swap device or 3016*7c478bd9Sstevel@tonic-gate * if VFS is read-only 3017*7c478bd9Sstevel@tonic-gate */ 3018*7c478bd9Sstevel@tonic-gate if (IS_SWAPVP(vp) || vn_is_readonly(vp)) 3019*7c478bd9Sstevel@tonic-gate continue; 3020*7c478bd9Sstevel@tonic-gate /* 3021*7c478bd9Sstevel@tonic-gate * If flushing all mounted file systems or 3022*7c478bd9Sstevel@tonic-gate * the vnode belongs to this vfs, has pages 3023*7c478bd9Sstevel@tonic-gate * and is marked as either dirty or mmap'd, 3024*7c478bd9Sstevel@tonic-gate * hold and add this vnode to the list of 3025*7c478bd9Sstevel@tonic-gate * vnodes to flush. 3026*7c478bd9Sstevel@tonic-gate */ 3027*7c478bd9Sstevel@tonic-gate if ((vfsp == NULL || vp->v_vfsp == vfsp) && 3028*7c478bd9Sstevel@tonic-gate vn_has_cached_data(vp) && 3029*7c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) { 3030*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 3031*7c478bd9Sstevel@tonic-gate vplist[cnt++] = vp; 3032*7c478bd9Sstevel@tonic-gate if (cnt == num) { 3033*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 3034*7c478bd9Sstevel@tonic-gate goto toomany; 3035*7c478bd9Sstevel@tonic-gate } 3036*7c478bd9Sstevel@tonic-gate } 3037*7c478bd9Sstevel@tonic-gate } 3038*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 3039*7c478bd9Sstevel@tonic-gate } 3040*7c478bd9Sstevel@tonic-gate toomany: 3041*7c478bd9Sstevel@tonic-gate 3042*7c478bd9Sstevel@tonic-gate /* 3043*7c478bd9Sstevel@tonic-gate * Flush and release all of the files on the list. 3044*7c478bd9Sstevel@tonic-gate */ 3045*7c478bd9Sstevel@tonic-gate while (cnt-- > 0) { 3046*7c478bd9Sstevel@tonic-gate vp = vplist[cnt]; 3047*7c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr); 3048*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 3049*7c478bd9Sstevel@tonic-gate } 3050*7c478bd9Sstevel@tonic-gate 3051*7c478bd9Sstevel@tonic-gate /* 3052*7c478bd9Sstevel@tonic-gate * Free the space allocated to hold the list. 3053*7c478bd9Sstevel@tonic-gate */ 3054*7c478bd9Sstevel@tonic-gate kmem_free(vplist, num * sizeof (*vplist)); 3055*7c478bd9Sstevel@tonic-gate } 3056*7c478bd9Sstevel@tonic-gate 3057*7c478bd9Sstevel@tonic-gate /* 3058*7c478bd9Sstevel@tonic-gate * This probably needs to be larger than or equal to 3059*7c478bd9Sstevel@tonic-gate * log2(sizeof (struct rnode)) due to the way that rnodes are 3060*7c478bd9Sstevel@tonic-gate * allocated. 3061*7c478bd9Sstevel@tonic-gate */ 3062*7c478bd9Sstevel@tonic-gate #define ACACHE_SHIFT_BITS 9 3063*7c478bd9Sstevel@tonic-gate 3064*7c478bd9Sstevel@tonic-gate static int 3065*7c478bd9Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr) 3066*7c478bd9Sstevel@tonic-gate { 3067*7c478bd9Sstevel@tonic-gate 3068*7c478bd9Sstevel@tonic-gate return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) & 3069*7c478bd9Sstevel@tonic-gate acachemask); 3070*7c478bd9Sstevel@tonic-gate } 3071*7c478bd9Sstevel@tonic-gate 3072*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3073*7c478bd9Sstevel@tonic-gate static long nfs_access_cache_hits = 0; 3074*7c478bd9Sstevel@tonic-gate static long nfs_access_cache_misses = 0; 3075*7c478bd9Sstevel@tonic-gate #endif 3076*7c478bd9Sstevel@tonic-gate 3077*7c478bd9Sstevel@tonic-gate nfs_access_type_t 3078*7c478bd9Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr) 3079*7c478bd9Sstevel@tonic-gate { 3080*7c478bd9Sstevel@tonic-gate vnode_t *vp; 3081*7c478bd9Sstevel@tonic-gate acache_t *ap; 3082*7c478bd9Sstevel@tonic-gate acache_hash_t *hp; 3083*7c478bd9Sstevel@tonic-gate nfs_access_type_t all; 3084*7c478bd9Sstevel@tonic-gate 3085*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 3086*7c478bd9Sstevel@tonic-gate if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp)) 3087*7c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN); 3088*7c478bd9Sstevel@tonic-gate 3089*7c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) { 3090*7c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)]; 3091*7c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_READER); 3092*7c478bd9Sstevel@tonic-gate ap = hp->next; 3093*7c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) { 3094*7c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) { 3095*7c478bd9Sstevel@tonic-gate if ((ap->known & acc) == acc) { 3096*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3097*7c478bd9Sstevel@tonic-gate nfs_access_cache_hits++; 3098*7c478bd9Sstevel@tonic-gate #endif 3099*7c478bd9Sstevel@tonic-gate if ((ap->allowed & acc) == acc) 3100*7c478bd9Sstevel@tonic-gate all = NFS_ACCESS_ALLOWED; 3101*7c478bd9Sstevel@tonic-gate else 3102*7c478bd9Sstevel@tonic-gate all = NFS_ACCESS_DENIED; 3103*7c478bd9Sstevel@tonic-gate } else { 3104*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3105*7c478bd9Sstevel@tonic-gate nfs_access_cache_misses++; 3106*7c478bd9Sstevel@tonic-gate #endif 3107*7c478bd9Sstevel@tonic-gate all = NFS_ACCESS_UNKNOWN; 3108*7c478bd9Sstevel@tonic-gate } 3109*7c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 3110*7c478bd9Sstevel@tonic-gate return (all); 3111*7c478bd9Sstevel@tonic-gate } 3112*7c478bd9Sstevel@tonic-gate ap = ap->next; 3113*7c478bd9Sstevel@tonic-gate } 3114*7c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 3115*7c478bd9Sstevel@tonic-gate } 3116*7c478bd9Sstevel@tonic-gate 3117*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3118*7c478bd9Sstevel@tonic-gate nfs_access_cache_misses++; 3119*7c478bd9Sstevel@tonic-gate #endif 3120*7c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN); 3121*7c478bd9Sstevel@tonic-gate } 3122*7c478bd9Sstevel@tonic-gate 3123*7c478bd9Sstevel@tonic-gate void 3124*7c478bd9Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr) 3125*7c478bd9Sstevel@tonic-gate { 3126*7c478bd9Sstevel@tonic-gate acache_t *ap; 3127*7c478bd9Sstevel@tonic-gate acache_t *nap; 3128*7c478bd9Sstevel@tonic-gate acache_hash_t *hp; 3129*7c478bd9Sstevel@tonic-gate 3130*7c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)]; 3131*7c478bd9Sstevel@tonic-gate 3132*7c478bd9Sstevel@tonic-gate /* 3133*7c478bd9Sstevel@tonic-gate * Allocate now assuming that mostly an allocation will be 3134*7c478bd9Sstevel@tonic-gate * required. This allows the allocation to happen without 3135*7c478bd9Sstevel@tonic-gate * holding the hash bucket locked. 3136*7c478bd9Sstevel@tonic-gate */ 3137*7c478bd9Sstevel@tonic-gate nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP); 3138*7c478bd9Sstevel@tonic-gate if (nap != NULL) { 3139*7c478bd9Sstevel@tonic-gate nap->known = acc; 3140*7c478bd9Sstevel@tonic-gate nap->allowed = resacc; 3141*7c478bd9Sstevel@tonic-gate nap->rnode = rp; 3142*7c478bd9Sstevel@tonic-gate crhold(cr); 3143*7c478bd9Sstevel@tonic-gate nap->cred = cr; 3144*7c478bd9Sstevel@tonic-gate nap->hashq = hp; 3145*7c478bd9Sstevel@tonic-gate } 3146*7c478bd9Sstevel@tonic-gate 3147*7c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_WRITER); 3148*7c478bd9Sstevel@tonic-gate 3149*7c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) { 3150*7c478bd9Sstevel@tonic-gate ap = hp->next; 3151*7c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) { 3152*7c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) { 3153*7c478bd9Sstevel@tonic-gate ap->known |= acc; 3154*7c478bd9Sstevel@tonic-gate ap->allowed &= ~acc; 3155*7c478bd9Sstevel@tonic-gate ap->allowed |= resacc; 3156*7c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 3157*7c478bd9Sstevel@tonic-gate if (nap != NULL) { 3158*7c478bd9Sstevel@tonic-gate crfree(nap->cred); 3159*7c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, nap); 3160*7c478bd9Sstevel@tonic-gate } 3161*7c478bd9Sstevel@tonic-gate return; 3162*7c478bd9Sstevel@tonic-gate } 3163*7c478bd9Sstevel@tonic-gate ap = ap->next; 3164*7c478bd9Sstevel@tonic-gate } 3165*7c478bd9Sstevel@tonic-gate } 3166*7c478bd9Sstevel@tonic-gate 3167*7c478bd9Sstevel@tonic-gate if (nap != NULL) { 3168*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3169*7c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64++; 3170*7c478bd9Sstevel@tonic-gate #endif 3171*7c478bd9Sstevel@tonic-gate nap->next = hp->next; 3172*7c478bd9Sstevel@tonic-gate hp->next = nap; 3173*7c478bd9Sstevel@tonic-gate nap->next->prev = nap; 3174*7c478bd9Sstevel@tonic-gate nap->prev = (acache_t *)hp; 3175*7c478bd9Sstevel@tonic-gate 3176*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3177*7c478bd9Sstevel@tonic-gate nap->list = rp->r_acache; 3178*7c478bd9Sstevel@tonic-gate rp->r_acache = nap; 3179*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3180*7c478bd9Sstevel@tonic-gate } 3181*7c478bd9Sstevel@tonic-gate 3182*7c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 3183*7c478bd9Sstevel@tonic-gate } 3184*7c478bd9Sstevel@tonic-gate 3185*7c478bd9Sstevel@tonic-gate int 3186*7c478bd9Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp) 3187*7c478bd9Sstevel@tonic-gate { 3188*7c478bd9Sstevel@tonic-gate acache_t *ap; 3189*7c478bd9Sstevel@tonic-gate acache_t *tmpap; 3190*7c478bd9Sstevel@tonic-gate acache_t *rplist; 3191*7c478bd9Sstevel@tonic-gate 3192*7c478bd9Sstevel@tonic-gate /* 3193*7c478bd9Sstevel@tonic-gate * If there aren't any cached entries, then there is nothing 3194*7c478bd9Sstevel@tonic-gate * to free. 3195*7c478bd9Sstevel@tonic-gate */ 3196*7c478bd9Sstevel@tonic-gate if (rp->r_acache == NULL) 3197*7c478bd9Sstevel@tonic-gate return (0); 3198*7c478bd9Sstevel@tonic-gate 3199*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3200*7c478bd9Sstevel@tonic-gate rplist = rp->r_acache; 3201*7c478bd9Sstevel@tonic-gate rp->r_acache = NULL; 3202*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3203*7c478bd9Sstevel@tonic-gate 3204*7c478bd9Sstevel@tonic-gate /* 3205*7c478bd9Sstevel@tonic-gate * Loop through each entry in the list pointed to in the 3206*7c478bd9Sstevel@tonic-gate * rnode. Remove each of these entries from the hash 3207*7c478bd9Sstevel@tonic-gate * queue that it is on and remove it from the list in 3208*7c478bd9Sstevel@tonic-gate * the rnode. 3209*7c478bd9Sstevel@tonic-gate */ 3210*7c478bd9Sstevel@tonic-gate for (ap = rplist; ap != NULL; ap = tmpap) { 3211*7c478bd9Sstevel@tonic-gate rw_enter(&ap->hashq->lock, RW_WRITER); 3212*7c478bd9Sstevel@tonic-gate ap->prev->next = ap->next; 3213*7c478bd9Sstevel@tonic-gate ap->next->prev = ap->prev; 3214*7c478bd9Sstevel@tonic-gate rw_exit(&ap->hashq->lock); 3215*7c478bd9Sstevel@tonic-gate 3216*7c478bd9Sstevel@tonic-gate tmpap = ap->list; 3217*7c478bd9Sstevel@tonic-gate crfree(ap->cred); 3218*7c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, ap); 3219*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3220*7c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64--; 3221*7c478bd9Sstevel@tonic-gate #endif 3222*7c478bd9Sstevel@tonic-gate } 3223*7c478bd9Sstevel@tonic-gate 3224*7c478bd9Sstevel@tonic-gate return (1); 3225*7c478bd9Sstevel@tonic-gate } 3226*7c478bd9Sstevel@tonic-gate 3227*7c478bd9Sstevel@tonic-gate static const char prefix[] = ".nfs"; 3228*7c478bd9Sstevel@tonic-gate 3229*7c478bd9Sstevel@tonic-gate static kmutex_t newnum_lock; 3230*7c478bd9Sstevel@tonic-gate 3231*7c478bd9Sstevel@tonic-gate int 3232*7c478bd9Sstevel@tonic-gate newnum(void) 3233*7c478bd9Sstevel@tonic-gate { 3234*7c478bd9Sstevel@tonic-gate static uint_t newnum = 0; 3235*7c478bd9Sstevel@tonic-gate uint_t id; 3236*7c478bd9Sstevel@tonic-gate 3237*7c478bd9Sstevel@tonic-gate mutex_enter(&newnum_lock); 3238*7c478bd9Sstevel@tonic-gate if (newnum == 0) 3239*7c478bd9Sstevel@tonic-gate newnum = gethrestime_sec() & 0xffff; 3240*7c478bd9Sstevel@tonic-gate id = newnum++; 3241*7c478bd9Sstevel@tonic-gate mutex_exit(&newnum_lock); 3242*7c478bd9Sstevel@tonic-gate return (id); 3243*7c478bd9Sstevel@tonic-gate } 3244*7c478bd9Sstevel@tonic-gate 3245*7c478bd9Sstevel@tonic-gate char * 3246*7c478bd9Sstevel@tonic-gate newname(void) 3247*7c478bd9Sstevel@tonic-gate { 3248*7c478bd9Sstevel@tonic-gate char *news; 3249*7c478bd9Sstevel@tonic-gate char *s; 3250*7c478bd9Sstevel@tonic-gate const char *p; 3251*7c478bd9Sstevel@tonic-gate uint_t id; 3252*7c478bd9Sstevel@tonic-gate 3253*7c478bd9Sstevel@tonic-gate id = newnum(); 3254*7c478bd9Sstevel@tonic-gate news = kmem_alloc(MAXNAMELEN, KM_SLEEP); 3255*7c478bd9Sstevel@tonic-gate s = news; 3256*7c478bd9Sstevel@tonic-gate p = prefix; 3257*7c478bd9Sstevel@tonic-gate while (*p != '\0') 3258*7c478bd9Sstevel@tonic-gate *s++ = *p++; 3259*7c478bd9Sstevel@tonic-gate while (id != 0) { 3260*7c478bd9Sstevel@tonic-gate *s++ = "0123456789ABCDEF"[id & 0x0f]; 3261*7c478bd9Sstevel@tonic-gate id >>= 4; 3262*7c478bd9Sstevel@tonic-gate } 3263*7c478bd9Sstevel@tonic-gate *s = '\0'; 3264*7c478bd9Sstevel@tonic-gate return (news); 3265*7c478bd9Sstevel@tonic-gate } 3266*7c478bd9Sstevel@tonic-gate 3267*7c478bd9Sstevel@tonic-gate int 3268*7c478bd9Sstevel@tonic-gate nfs_atoi(char *cp) 3269*7c478bd9Sstevel@tonic-gate { 3270*7c478bd9Sstevel@tonic-gate int n; 3271*7c478bd9Sstevel@tonic-gate 3272*7c478bd9Sstevel@tonic-gate n = 0; 3273*7c478bd9Sstevel@tonic-gate while (*cp != '\0') { 3274*7c478bd9Sstevel@tonic-gate n = n * 10 + (*cp - '0'); 3275*7c478bd9Sstevel@tonic-gate cp++; 3276*7c478bd9Sstevel@tonic-gate } 3277*7c478bd9Sstevel@tonic-gate 3278*7c478bd9Sstevel@tonic-gate return (n); 3279*7c478bd9Sstevel@tonic-gate } 3280*7c478bd9Sstevel@tonic-gate 3281*7c478bd9Sstevel@tonic-gate /* 3282*7c478bd9Sstevel@tonic-gate * Snapshot callback for nfs:0:nfs_client as registered with the kstat 3283*7c478bd9Sstevel@tonic-gate * framework. 3284*7c478bd9Sstevel@tonic-gate */ 3285*7c478bd9Sstevel@tonic-gate static int 3286*7c478bd9Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw) 3287*7c478bd9Sstevel@tonic-gate { 3288*7c478bd9Sstevel@tonic-gate ksp->ks_snaptime = gethrtime(); 3289*7c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) { 3290*7c478bd9Sstevel@tonic-gate bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl)); 3291*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3292*7c478bd9Sstevel@tonic-gate /* 3293*7c478bd9Sstevel@tonic-gate * Currently only the global zone can write to kstats, but we 3294*7c478bd9Sstevel@tonic-gate * add the check just for paranoia. 3295*7c478bd9Sstevel@tonic-gate */ 3296*7c478bd9Sstevel@tonic-gate if (INGLOBALZONE(curproc)) 3297*7c478bd9Sstevel@tonic-gate bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug, 3298*7c478bd9Sstevel@tonic-gate sizeof (clstat_debug)); 3299*7c478bd9Sstevel@tonic-gate #endif 3300*7c478bd9Sstevel@tonic-gate } else { 3301*7c478bd9Sstevel@tonic-gate bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl)); 3302*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3303*7c478bd9Sstevel@tonic-gate /* 3304*7c478bd9Sstevel@tonic-gate * If we're displaying the "global" debug kstat values, we 3305*7c478bd9Sstevel@tonic-gate * display them as-is to all zones since in fact they apply to 3306*7c478bd9Sstevel@tonic-gate * the system as a whole. 3307*7c478bd9Sstevel@tonic-gate */ 3308*7c478bd9Sstevel@tonic-gate bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl), 3309*7c478bd9Sstevel@tonic-gate sizeof (clstat_debug)); 3310*7c478bd9Sstevel@tonic-gate #endif 3311*7c478bd9Sstevel@tonic-gate } 3312*7c478bd9Sstevel@tonic-gate return (0); 3313*7c478bd9Sstevel@tonic-gate } 3314*7c478bd9Sstevel@tonic-gate 3315*7c478bd9Sstevel@tonic-gate static void * 3316*7c478bd9Sstevel@tonic-gate clinit_zone(zoneid_t zoneid) 3317*7c478bd9Sstevel@tonic-gate { 3318*7c478bd9Sstevel@tonic-gate kstat_t *nfs_client_kstat; 3319*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 3320*7c478bd9Sstevel@tonic-gate uint_t ndata; 3321*7c478bd9Sstevel@tonic-gate 3322*7c478bd9Sstevel@tonic-gate nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 3323*7c478bd9Sstevel@tonic-gate mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL); 3324*7c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = NULL; 3325*7c478bd9Sstevel@tonic-gate nfscl->nfscl_zoneid = zoneid; 3326*7c478bd9Sstevel@tonic-gate 3327*7c478bd9Sstevel@tonic-gate bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl)); 3328*7c478bd9Sstevel@tonic-gate ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t); 3329*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3330*7c478bd9Sstevel@tonic-gate ndata += sizeof (clstat_debug) / sizeof (kstat_named_t); 3331*7c478bd9Sstevel@tonic-gate #endif 3332*7c478bd9Sstevel@tonic-gate if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client", 3333*7c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, ndata, 3334*7c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 3335*7c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_private = &nfscl->nfscl_stat; 3336*7c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_snapshot = cl_snapshot; 3337*7c478bd9Sstevel@tonic-gate kstat_install(nfs_client_kstat); 3338*7c478bd9Sstevel@tonic-gate } 3339*7c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 3340*7c478bd9Sstevel@tonic-gate list_insert_head(&nfs_clnt_list, nfscl); 3341*7c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 3342*7c478bd9Sstevel@tonic-gate return (nfscl); 3343*7c478bd9Sstevel@tonic-gate } 3344*7c478bd9Sstevel@tonic-gate 3345*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3346*7c478bd9Sstevel@tonic-gate static void 3347*7c478bd9Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg) 3348*7c478bd9Sstevel@tonic-gate { 3349*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = arg; 3350*7c478bd9Sstevel@tonic-gate chhead_t *chp, *next; 3351*7c478bd9Sstevel@tonic-gate 3352*7c478bd9Sstevel@tonic-gate if (nfscl == NULL) 3353*7c478bd9Sstevel@tonic-gate return; 3354*7c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 3355*7c478bd9Sstevel@tonic-gate list_remove(&nfs_clnt_list, nfscl); 3356*7c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 3357*7c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0); 3358*7c478bd9Sstevel@tonic-gate for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) { 3359*7c478bd9Sstevel@tonic-gate ASSERT(chp->ch_list == NULL); 3360*7c478bd9Sstevel@tonic-gate kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 3361*7c478bd9Sstevel@tonic-gate next = chp->ch_next; 3362*7c478bd9Sstevel@tonic-gate kmem_free(chp, sizeof (*chp)); 3363*7c478bd9Sstevel@tonic-gate } 3364*7c478bd9Sstevel@tonic-gate kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid); 3365*7c478bd9Sstevel@tonic-gate mutex_destroy(&nfscl->nfscl_chtable_lock); 3366*7c478bd9Sstevel@tonic-gate kmem_free(nfscl, sizeof (*nfscl)); 3367*7c478bd9Sstevel@tonic-gate } 3368*7c478bd9Sstevel@tonic-gate 3369*7c478bd9Sstevel@tonic-gate /* 3370*7c478bd9Sstevel@tonic-gate * Called by endpnt_destructor to make sure the client handles are 3371*7c478bd9Sstevel@tonic-gate * cleaned up before the RPC endpoints. This becomes a no-op if 3372*7c478bd9Sstevel@tonic-gate * clfini_zone (above) is called first. This function is needed 3373*7c478bd9Sstevel@tonic-gate * (rather than relying on clfini_zone to clean up) because the ZSD 3374*7c478bd9Sstevel@tonic-gate * callbacks have no ordering mechanism, so we have no way to ensure 3375*7c478bd9Sstevel@tonic-gate * that clfini_zone is called before endpnt_destructor. 3376*7c478bd9Sstevel@tonic-gate */ 3377*7c478bd9Sstevel@tonic-gate void 3378*7c478bd9Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid) 3379*7c478bd9Sstevel@tonic-gate { 3380*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 3381*7c478bd9Sstevel@tonic-gate 3382*7c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 3383*7c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list); 3384*7c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) { 3385*7c478bd9Sstevel@tonic-gate if (nfscl->nfscl_zoneid == zoneid) { 3386*7c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0); 3387*7c478bd9Sstevel@tonic-gate break; 3388*7c478bd9Sstevel@tonic-gate } 3389*7c478bd9Sstevel@tonic-gate } 3390*7c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 3391*7c478bd9Sstevel@tonic-gate } 3392*7c478bd9Sstevel@tonic-gate 3393*7c478bd9Sstevel@tonic-gate int 3394*7c478bd9Sstevel@tonic-gate nfs_subrinit(void) 3395*7c478bd9Sstevel@tonic-gate { 3396*7c478bd9Sstevel@tonic-gate int i; 3397*7c478bd9Sstevel@tonic-gate ulong_t nrnode_max; 3398*7c478bd9Sstevel@tonic-gate 3399*7c478bd9Sstevel@tonic-gate /* 3400*7c478bd9Sstevel@tonic-gate * Allocate and initialize the rnode hash queues 3401*7c478bd9Sstevel@tonic-gate */ 3402*7c478bd9Sstevel@tonic-gate if (nrnode <= 0) 3403*7c478bd9Sstevel@tonic-gate nrnode = ncsize; 3404*7c478bd9Sstevel@tonic-gate nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode)); 3405*7c478bd9Sstevel@tonic-gate if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) { 3406*7c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_NOTE, 3407*7c478bd9Sstevel@tonic-gate "setting nrnode to max value of %ld", nrnode_max); 3408*7c478bd9Sstevel@tonic-gate nrnode = nrnode_max; 3409*7c478bd9Sstevel@tonic-gate } 3410*7c478bd9Sstevel@tonic-gate 3411*7c478bd9Sstevel@tonic-gate rtablesize = 1 << highbit(nrnode / hashlen); 3412*7c478bd9Sstevel@tonic-gate rtablemask = rtablesize - 1; 3413*7c478bd9Sstevel@tonic-gate rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP); 3414*7c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++) { 3415*7c478bd9Sstevel@tonic-gate rtable[i].r_hashf = (rnode_t *)(&rtable[i]); 3416*7c478bd9Sstevel@tonic-gate rtable[i].r_hashb = (rnode_t *)(&rtable[i]); 3417*7c478bd9Sstevel@tonic-gate rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL); 3418*7c478bd9Sstevel@tonic-gate } 3419*7c478bd9Sstevel@tonic-gate rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t), 3420*7c478bd9Sstevel@tonic-gate 0, NULL, NULL, nfs_reclaim, NULL, NULL, 0); 3421*7c478bd9Sstevel@tonic-gate 3422*7c478bd9Sstevel@tonic-gate /* 3423*7c478bd9Sstevel@tonic-gate * Allocate and initialize the access cache 3424*7c478bd9Sstevel@tonic-gate */ 3425*7c478bd9Sstevel@tonic-gate 3426*7c478bd9Sstevel@tonic-gate /* 3427*7c478bd9Sstevel@tonic-gate * Initial guess is one access cache entry per rnode unless 3428*7c478bd9Sstevel@tonic-gate * nacache is set to a non-zero value and then it is used to 3429*7c478bd9Sstevel@tonic-gate * indicate a guess at the number of access cache entries. 3430*7c478bd9Sstevel@tonic-gate */ 3431*7c478bd9Sstevel@tonic-gate if (nacache > 0) 3432*7c478bd9Sstevel@tonic-gate acachesize = 1 << highbit(nacache / hashlen); 3433*7c478bd9Sstevel@tonic-gate else 3434*7c478bd9Sstevel@tonic-gate acachesize = rtablesize; 3435*7c478bd9Sstevel@tonic-gate acachemask = acachesize - 1; 3436*7c478bd9Sstevel@tonic-gate acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP); 3437*7c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++) { 3438*7c478bd9Sstevel@tonic-gate acache[i].next = (acache_t *)&acache[i]; 3439*7c478bd9Sstevel@tonic-gate acache[i].prev = (acache_t *)&acache[i]; 3440*7c478bd9Sstevel@tonic-gate rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL); 3441*7c478bd9Sstevel@tonic-gate } 3442*7c478bd9Sstevel@tonic-gate acache_cache = kmem_cache_create("nfs_access_cache", 3443*7c478bd9Sstevel@tonic-gate sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 3444*7c478bd9Sstevel@tonic-gate /* 3445*7c478bd9Sstevel@tonic-gate * Allocate and initialize the client handle cache 3446*7c478bd9Sstevel@tonic-gate */ 3447*7c478bd9Sstevel@tonic-gate chtab_cache = kmem_cache_create("client_handle_cache", 3448*7c478bd9Sstevel@tonic-gate sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL, 3449*7c478bd9Sstevel@tonic-gate NULL, 0); 3450*7c478bd9Sstevel@tonic-gate /* 3451*7c478bd9Sstevel@tonic-gate * Initialize the list of per-zone client handles (and associated data). 3452*7c478bd9Sstevel@tonic-gate * This needs to be done before we call zone_key_create(). 3453*7c478bd9Sstevel@tonic-gate */ 3454*7c478bd9Sstevel@tonic-gate list_create(&nfs_clnt_list, sizeof (struct nfs_clnt), 3455*7c478bd9Sstevel@tonic-gate offsetof(struct nfs_clnt, nfscl_node)); 3456*7c478bd9Sstevel@tonic-gate /* 3457*7c478bd9Sstevel@tonic-gate * Initialize the zone_key for per-zone client handle lists. 3458*7c478bd9Sstevel@tonic-gate */ 3459*7c478bd9Sstevel@tonic-gate zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone); 3460*7c478bd9Sstevel@tonic-gate /* 3461*7c478bd9Sstevel@tonic-gate * Initialize the various mutexes and reader/writer locks 3462*7c478bd9Sstevel@tonic-gate */ 3463*7c478bd9Sstevel@tonic-gate mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL); 3464*7c478bd9Sstevel@tonic-gate mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL); 3465*7c478bd9Sstevel@tonic-gate mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); 3466*7c478bd9Sstevel@tonic-gate 3467*7c478bd9Sstevel@tonic-gate /* 3468*7c478bd9Sstevel@tonic-gate * Assign unique major number for all nfs mounts 3469*7c478bd9Sstevel@tonic-gate */ 3470*7c478bd9Sstevel@tonic-gate if ((nfs_major = getudev()) == -1) { 3471*7c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_WARN, 3472*7c478bd9Sstevel@tonic-gate "nfs: init: can't get unique device number"); 3473*7c478bd9Sstevel@tonic-gate nfs_major = 0; 3474*7c478bd9Sstevel@tonic-gate } 3475*7c478bd9Sstevel@tonic-gate nfs_minor = 0; 3476*7c478bd9Sstevel@tonic-gate 3477*7c478bd9Sstevel@tonic-gate if (nfs3_jukebox_delay == 0) 3478*7c478bd9Sstevel@tonic-gate nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY; 3479*7c478bd9Sstevel@tonic-gate 3480*7c478bd9Sstevel@tonic-gate return (0); 3481*7c478bd9Sstevel@tonic-gate } 3482*7c478bd9Sstevel@tonic-gate 3483*7c478bd9Sstevel@tonic-gate void 3484*7c478bd9Sstevel@tonic-gate nfs_subrfini(void) 3485*7c478bd9Sstevel@tonic-gate { 3486*7c478bd9Sstevel@tonic-gate int i; 3487*7c478bd9Sstevel@tonic-gate 3488*7c478bd9Sstevel@tonic-gate /* 3489*7c478bd9Sstevel@tonic-gate * Deallocate the rnode hash queues 3490*7c478bd9Sstevel@tonic-gate */ 3491*7c478bd9Sstevel@tonic-gate kmem_cache_destroy(rnode_cache); 3492*7c478bd9Sstevel@tonic-gate 3493*7c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++) 3494*7c478bd9Sstevel@tonic-gate rw_destroy(&rtable[i].r_lock); 3495*7c478bd9Sstevel@tonic-gate kmem_free(rtable, rtablesize * sizeof (*rtable)); 3496*7c478bd9Sstevel@tonic-gate 3497*7c478bd9Sstevel@tonic-gate /* 3498*7c478bd9Sstevel@tonic-gate * Deallocated the access cache 3499*7c478bd9Sstevel@tonic-gate */ 3500*7c478bd9Sstevel@tonic-gate kmem_cache_destroy(acache_cache); 3501*7c478bd9Sstevel@tonic-gate 3502*7c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++) 3503*7c478bd9Sstevel@tonic-gate rw_destroy(&acache[i].lock); 3504*7c478bd9Sstevel@tonic-gate kmem_free(acache, acachesize * sizeof (*acache)); 3505*7c478bd9Sstevel@tonic-gate 3506*7c478bd9Sstevel@tonic-gate /* 3507*7c478bd9Sstevel@tonic-gate * Deallocate the client handle cache 3508*7c478bd9Sstevel@tonic-gate */ 3509*7c478bd9Sstevel@tonic-gate kmem_cache_destroy(chtab_cache); 3510*7c478bd9Sstevel@tonic-gate 3511*7c478bd9Sstevel@tonic-gate /* 3512*7c478bd9Sstevel@tonic-gate * Destroy the various mutexes and reader/writer locks 3513*7c478bd9Sstevel@tonic-gate */ 3514*7c478bd9Sstevel@tonic-gate mutex_destroy(&rpfreelist_lock); 3515*7c478bd9Sstevel@tonic-gate mutex_destroy(&newnum_lock); 3516*7c478bd9Sstevel@tonic-gate mutex_destroy(&nfs_minor_lock); 3517*7c478bd9Sstevel@tonic-gate (void) zone_key_delete(nfsclnt_zone_key); 3518*7c478bd9Sstevel@tonic-gate } 3519*7c478bd9Sstevel@tonic-gate 3520*7c478bd9Sstevel@tonic-gate enum nfsstat 3521*7c478bd9Sstevel@tonic-gate puterrno(int error) 3522*7c478bd9Sstevel@tonic-gate { 3523*7c478bd9Sstevel@tonic-gate 3524*7c478bd9Sstevel@tonic-gate switch (error) { 3525*7c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 3526*7c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP); 3527*7c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 3528*7c478bd9Sstevel@tonic-gate return (NFSERR_NAMETOOLONG); 3529*7c478bd9Sstevel@tonic-gate case ENOTEMPTY: 3530*7c478bd9Sstevel@tonic-gate return (NFSERR_NOTEMPTY); 3531*7c478bd9Sstevel@tonic-gate case EDQUOT: 3532*7c478bd9Sstevel@tonic-gate return (NFSERR_DQUOT); 3533*7c478bd9Sstevel@tonic-gate case ESTALE: 3534*7c478bd9Sstevel@tonic-gate return (NFSERR_STALE); 3535*7c478bd9Sstevel@tonic-gate case EREMOTE: 3536*7c478bd9Sstevel@tonic-gate return (NFSERR_REMOTE); 3537*7c478bd9Sstevel@tonic-gate case ENOSYS: 3538*7c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP); 3539*7c478bd9Sstevel@tonic-gate case EOVERFLOW: 3540*7c478bd9Sstevel@tonic-gate return (NFSERR_INVAL); 3541*7c478bd9Sstevel@tonic-gate default: 3542*7c478bd9Sstevel@tonic-gate return ((enum nfsstat)error); 3543*7c478bd9Sstevel@tonic-gate } 3544*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 3545*7c478bd9Sstevel@tonic-gate } 3546*7c478bd9Sstevel@tonic-gate 3547*7c478bd9Sstevel@tonic-gate int 3548*7c478bd9Sstevel@tonic-gate geterrno(enum nfsstat status) 3549*7c478bd9Sstevel@tonic-gate { 3550*7c478bd9Sstevel@tonic-gate 3551*7c478bd9Sstevel@tonic-gate switch (status) { 3552*7c478bd9Sstevel@tonic-gate case NFSERR_OPNOTSUPP: 3553*7c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 3554*7c478bd9Sstevel@tonic-gate case NFSERR_NAMETOOLONG: 3555*7c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 3556*7c478bd9Sstevel@tonic-gate case NFSERR_NOTEMPTY: 3557*7c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 3558*7c478bd9Sstevel@tonic-gate case NFSERR_DQUOT: 3559*7c478bd9Sstevel@tonic-gate return (EDQUOT); 3560*7c478bd9Sstevel@tonic-gate case NFSERR_STALE: 3561*7c478bd9Sstevel@tonic-gate return (ESTALE); 3562*7c478bd9Sstevel@tonic-gate case NFSERR_REMOTE: 3563*7c478bd9Sstevel@tonic-gate return (EREMOTE); 3564*7c478bd9Sstevel@tonic-gate case NFSERR_WFLUSH: 3565*7c478bd9Sstevel@tonic-gate return (EIO); 3566*7c478bd9Sstevel@tonic-gate default: 3567*7c478bd9Sstevel@tonic-gate return ((int)status); 3568*7c478bd9Sstevel@tonic-gate } 3569*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 3570*7c478bd9Sstevel@tonic-gate } 3571*7c478bd9Sstevel@tonic-gate 3572*7c478bd9Sstevel@tonic-gate enum nfsstat3 3573*7c478bd9Sstevel@tonic-gate puterrno3(int error) 3574*7c478bd9Sstevel@tonic-gate { 3575*7c478bd9Sstevel@tonic-gate 3576*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3577*7c478bd9Sstevel@tonic-gate switch (error) { 3578*7c478bd9Sstevel@tonic-gate case 0: 3579*7c478bd9Sstevel@tonic-gate return (NFS3_OK); 3580*7c478bd9Sstevel@tonic-gate case EPERM: 3581*7c478bd9Sstevel@tonic-gate return (NFS3ERR_PERM); 3582*7c478bd9Sstevel@tonic-gate case ENOENT: 3583*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOENT); 3584*7c478bd9Sstevel@tonic-gate case EIO: 3585*7c478bd9Sstevel@tonic-gate return (NFS3ERR_IO); 3586*7c478bd9Sstevel@tonic-gate case ENXIO: 3587*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NXIO); 3588*7c478bd9Sstevel@tonic-gate case EACCES: 3589*7c478bd9Sstevel@tonic-gate return (NFS3ERR_ACCES); 3590*7c478bd9Sstevel@tonic-gate case EEXIST: 3591*7c478bd9Sstevel@tonic-gate return (NFS3ERR_EXIST); 3592*7c478bd9Sstevel@tonic-gate case EXDEV: 3593*7c478bd9Sstevel@tonic-gate return (NFS3ERR_XDEV); 3594*7c478bd9Sstevel@tonic-gate case ENODEV: 3595*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NODEV); 3596*7c478bd9Sstevel@tonic-gate case ENOTDIR: 3597*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTDIR); 3598*7c478bd9Sstevel@tonic-gate case EISDIR: 3599*7c478bd9Sstevel@tonic-gate return (NFS3ERR_ISDIR); 3600*7c478bd9Sstevel@tonic-gate case EINVAL: 3601*7c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 3602*7c478bd9Sstevel@tonic-gate case EFBIG: 3603*7c478bd9Sstevel@tonic-gate return (NFS3ERR_FBIG); 3604*7c478bd9Sstevel@tonic-gate case ENOSPC: 3605*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOSPC); 3606*7c478bd9Sstevel@tonic-gate case EROFS: 3607*7c478bd9Sstevel@tonic-gate return (NFS3ERR_ROFS); 3608*7c478bd9Sstevel@tonic-gate case EMLINK: 3609*7c478bd9Sstevel@tonic-gate return (NFS3ERR_MLINK); 3610*7c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 3611*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG); 3612*7c478bd9Sstevel@tonic-gate case ENOTEMPTY: 3613*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY); 3614*7c478bd9Sstevel@tonic-gate case EDQUOT: 3615*7c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT); 3616*7c478bd9Sstevel@tonic-gate case ESTALE: 3617*7c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE); 3618*7c478bd9Sstevel@tonic-gate case EREMOTE: 3619*7c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE); 3620*7c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 3621*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP); 3622*7c478bd9Sstevel@tonic-gate case EOVERFLOW: 3623*7c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 3624*7c478bd9Sstevel@tonic-gate default: 3625*7c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 3626*7c478bd9Sstevel@tonic-gate "puterrno3: got error %d", error); 3627*7c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error); 3628*7c478bd9Sstevel@tonic-gate } 3629*7c478bd9Sstevel@tonic-gate #else 3630*7c478bd9Sstevel@tonic-gate switch (error) { 3631*7c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 3632*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG); 3633*7c478bd9Sstevel@tonic-gate case ENOTEMPTY: 3634*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY); 3635*7c478bd9Sstevel@tonic-gate case EDQUOT: 3636*7c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT); 3637*7c478bd9Sstevel@tonic-gate case ESTALE: 3638*7c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE); 3639*7c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 3640*7c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP); 3641*7c478bd9Sstevel@tonic-gate case EREMOTE: 3642*7c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE); 3643*7c478bd9Sstevel@tonic-gate case EOVERFLOW: 3644*7c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 3645*7c478bd9Sstevel@tonic-gate default: 3646*7c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error); 3647*7c478bd9Sstevel@tonic-gate } 3648*7c478bd9Sstevel@tonic-gate #endif 3649*7c478bd9Sstevel@tonic-gate } 3650*7c478bd9Sstevel@tonic-gate 3651*7c478bd9Sstevel@tonic-gate int 3652*7c478bd9Sstevel@tonic-gate geterrno3(enum nfsstat3 status) 3653*7c478bd9Sstevel@tonic-gate { 3654*7c478bd9Sstevel@tonic-gate 3655*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3656*7c478bd9Sstevel@tonic-gate switch (status) { 3657*7c478bd9Sstevel@tonic-gate case NFS3_OK: 3658*7c478bd9Sstevel@tonic-gate return (0); 3659*7c478bd9Sstevel@tonic-gate case NFS3ERR_PERM: 3660*7c478bd9Sstevel@tonic-gate return (EPERM); 3661*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOENT: 3662*7c478bd9Sstevel@tonic-gate return (ENOENT); 3663*7c478bd9Sstevel@tonic-gate case NFS3ERR_IO: 3664*7c478bd9Sstevel@tonic-gate return (EIO); 3665*7c478bd9Sstevel@tonic-gate case NFS3ERR_NXIO: 3666*7c478bd9Sstevel@tonic-gate return (ENXIO); 3667*7c478bd9Sstevel@tonic-gate case NFS3ERR_ACCES: 3668*7c478bd9Sstevel@tonic-gate return (EACCES); 3669*7c478bd9Sstevel@tonic-gate case NFS3ERR_EXIST: 3670*7c478bd9Sstevel@tonic-gate return (EEXIST); 3671*7c478bd9Sstevel@tonic-gate case NFS3ERR_XDEV: 3672*7c478bd9Sstevel@tonic-gate return (EXDEV); 3673*7c478bd9Sstevel@tonic-gate case NFS3ERR_NODEV: 3674*7c478bd9Sstevel@tonic-gate return (ENODEV); 3675*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOTDIR: 3676*7c478bd9Sstevel@tonic-gate return (ENOTDIR); 3677*7c478bd9Sstevel@tonic-gate case NFS3ERR_ISDIR: 3678*7c478bd9Sstevel@tonic-gate return (EISDIR); 3679*7c478bd9Sstevel@tonic-gate case NFS3ERR_INVAL: 3680*7c478bd9Sstevel@tonic-gate return (EINVAL); 3681*7c478bd9Sstevel@tonic-gate case NFS3ERR_FBIG: 3682*7c478bd9Sstevel@tonic-gate return (EFBIG); 3683*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOSPC: 3684*7c478bd9Sstevel@tonic-gate return (ENOSPC); 3685*7c478bd9Sstevel@tonic-gate case NFS3ERR_ROFS: 3686*7c478bd9Sstevel@tonic-gate return (EROFS); 3687*7c478bd9Sstevel@tonic-gate case NFS3ERR_MLINK: 3688*7c478bd9Sstevel@tonic-gate return (EMLINK); 3689*7c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG: 3690*7c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 3691*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY: 3692*7c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 3693*7c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT: 3694*7c478bd9Sstevel@tonic-gate return (EDQUOT); 3695*7c478bd9Sstevel@tonic-gate case NFS3ERR_STALE: 3696*7c478bd9Sstevel@tonic-gate return (ESTALE); 3697*7c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE: 3698*7c478bd9Sstevel@tonic-gate return (EREMOTE); 3699*7c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE: 3700*7c478bd9Sstevel@tonic-gate return (ESTALE); 3701*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC: 3702*7c478bd9Sstevel@tonic-gate return (EINVAL); 3703*7c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE: 3704*7c478bd9Sstevel@tonic-gate return (ENOENT); 3705*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP: 3706*7c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 3707*7c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL: 3708*7c478bd9Sstevel@tonic-gate return (EINVAL); 3709*7c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT: 3710*7c478bd9Sstevel@tonic-gate return (EIO); 3711*7c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE: 3712*7c478bd9Sstevel@tonic-gate return (EINVAL); 3713*7c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX: 3714*7c478bd9Sstevel@tonic-gate return (ENXIO); 3715*7c478bd9Sstevel@tonic-gate default: 3716*7c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 3717*7c478bd9Sstevel@tonic-gate "geterrno3: got status %d", status); 3718*7c478bd9Sstevel@tonic-gate return ((int)status); 3719*7c478bd9Sstevel@tonic-gate } 3720*7c478bd9Sstevel@tonic-gate #else 3721*7c478bd9Sstevel@tonic-gate switch (status) { 3722*7c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG: 3723*7c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 3724*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY: 3725*7c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 3726*7c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT: 3727*7c478bd9Sstevel@tonic-gate return (EDQUOT); 3728*7c478bd9Sstevel@tonic-gate case NFS3ERR_STALE: 3729*7c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE: 3730*7c478bd9Sstevel@tonic-gate return (ESTALE); 3731*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP: 3732*7c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 3733*7c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE: 3734*7c478bd9Sstevel@tonic-gate return (EREMOTE); 3735*7c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC: 3736*7c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL: 3737*7c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE: 3738*7c478bd9Sstevel@tonic-gate return (EINVAL); 3739*7c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE: 3740*7c478bd9Sstevel@tonic-gate return (ENOENT); 3741*7c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT: 3742*7c478bd9Sstevel@tonic-gate return (EIO); 3743*7c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX: 3744*7c478bd9Sstevel@tonic-gate return (ENXIO); 3745*7c478bd9Sstevel@tonic-gate default: 3746*7c478bd9Sstevel@tonic-gate return ((int)status); 3747*7c478bd9Sstevel@tonic-gate } 3748*7c478bd9Sstevel@tonic-gate #endif 3749*7c478bd9Sstevel@tonic-gate } 3750*7c478bd9Sstevel@tonic-gate 3751*7c478bd9Sstevel@tonic-gate rddir_cache * 3752*7c478bd9Sstevel@tonic-gate rddir_cache_alloc(int flags) 3753*7c478bd9Sstevel@tonic-gate { 3754*7c478bd9Sstevel@tonic-gate rddir_cache *rc; 3755*7c478bd9Sstevel@tonic-gate 3756*7c478bd9Sstevel@tonic-gate rc = kmem_alloc(sizeof (*rc), flags); 3757*7c478bd9Sstevel@tonic-gate if (rc != NULL) { 3758*7c478bd9Sstevel@tonic-gate rc->entries = NULL; 3759*7c478bd9Sstevel@tonic-gate rc->flags = RDDIR; 3760*7c478bd9Sstevel@tonic-gate cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 3761*7c478bd9Sstevel@tonic-gate mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL); 3762*7c478bd9Sstevel@tonic-gate rc->count = 1; 3763*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3764*7c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirent.value.ui64, 1); 3765*7c478bd9Sstevel@tonic-gate #endif 3766*7c478bd9Sstevel@tonic-gate } 3767*7c478bd9Sstevel@tonic-gate return (rc); 3768*7c478bd9Sstevel@tonic-gate } 3769*7c478bd9Sstevel@tonic-gate 3770*7c478bd9Sstevel@tonic-gate static void 3771*7c478bd9Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc) 3772*7c478bd9Sstevel@tonic-gate { 3773*7c478bd9Sstevel@tonic-gate 3774*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3775*7c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirent.value.ui64, -1); 3776*7c478bd9Sstevel@tonic-gate #endif 3777*7c478bd9Sstevel@tonic-gate if (rc->entries != NULL) { 3778*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3779*7c478bd9Sstevel@tonic-gate rddir_cache_buf_free(rc->entries, rc->buflen); 3780*7c478bd9Sstevel@tonic-gate #else 3781*7c478bd9Sstevel@tonic-gate kmem_free(rc->entries, rc->buflen); 3782*7c478bd9Sstevel@tonic-gate #endif 3783*7c478bd9Sstevel@tonic-gate } 3784*7c478bd9Sstevel@tonic-gate cv_destroy(&rc->cv); 3785*7c478bd9Sstevel@tonic-gate mutex_destroy(&rc->lock); 3786*7c478bd9Sstevel@tonic-gate kmem_free(rc, sizeof (*rc)); 3787*7c478bd9Sstevel@tonic-gate } 3788*7c478bd9Sstevel@tonic-gate 3789*7c478bd9Sstevel@tonic-gate void 3790*7c478bd9Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc) 3791*7c478bd9Sstevel@tonic-gate { 3792*7c478bd9Sstevel@tonic-gate 3793*7c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock); 3794*7c478bd9Sstevel@tonic-gate rc->count++; 3795*7c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 3796*7c478bd9Sstevel@tonic-gate } 3797*7c478bd9Sstevel@tonic-gate 3798*7c478bd9Sstevel@tonic-gate void 3799*7c478bd9Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc) 3800*7c478bd9Sstevel@tonic-gate { 3801*7c478bd9Sstevel@tonic-gate 3802*7c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock); 3803*7c478bd9Sstevel@tonic-gate ASSERT(rc->count > 0); 3804*7c478bd9Sstevel@tonic-gate if (--rc->count == 0) { 3805*7c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 3806*7c478bd9Sstevel@tonic-gate rddir_cache_free(rc); 3807*7c478bd9Sstevel@tonic-gate } else 3808*7c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 3809*7c478bd9Sstevel@tonic-gate } 3810*7c478bd9Sstevel@tonic-gate 3811*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3812*7c478bd9Sstevel@tonic-gate char * 3813*7c478bd9Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags) 3814*7c478bd9Sstevel@tonic-gate { 3815*7c478bd9Sstevel@tonic-gate char *rc; 3816*7c478bd9Sstevel@tonic-gate 3817*7c478bd9Sstevel@tonic-gate rc = kmem_alloc(size, flags); 3818*7c478bd9Sstevel@tonic-gate if (rc != NULL) 3819*7c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, size); 3820*7c478bd9Sstevel@tonic-gate return (rc); 3821*7c478bd9Sstevel@tonic-gate } 3822*7c478bd9Sstevel@tonic-gate 3823*7c478bd9Sstevel@tonic-gate void 3824*7c478bd9Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size) 3825*7c478bd9Sstevel@tonic-gate { 3826*7c478bd9Sstevel@tonic-gate 3827*7c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size); 3828*7c478bd9Sstevel@tonic-gate kmem_free(addr, size); 3829*7c478bd9Sstevel@tonic-gate } 3830*7c478bd9Sstevel@tonic-gate #endif 3831*7c478bd9Sstevel@tonic-gate 3832*7c478bd9Sstevel@tonic-gate static int 3833*7c478bd9Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp) 3834*7c478bd9Sstevel@tonic-gate { 3835*7c478bd9Sstevel@tonic-gate char *contents; 3836*7c478bd9Sstevel@tonic-gate int size; 3837*7c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 3838*7c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 3839*7c478bd9Sstevel@tonic-gate int freed; 3840*7c478bd9Sstevel@tonic-gate cred_t *cred; 3841*7c478bd9Sstevel@tonic-gate 3842*7c478bd9Sstevel@tonic-gate /* 3843*7c478bd9Sstevel@tonic-gate * Free any held credentials and caches which 3844*7c478bd9Sstevel@tonic-gate * may be associated with this rnode. 3845*7c478bd9Sstevel@tonic-gate */ 3846*7c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3847*7c478bd9Sstevel@tonic-gate cred = rp->r_cred; 3848*7c478bd9Sstevel@tonic-gate rp->r_cred = NULL; 3849*7c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 3850*7c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 3851*7c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 3852*7c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 3853*7c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 3854*7c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 3855*7c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 3856*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3857*7c478bd9Sstevel@tonic-gate 3858*7c478bd9Sstevel@tonic-gate if (cred != NULL) 3859*7c478bd9Sstevel@tonic-gate crfree(cred); 3860*7c478bd9Sstevel@tonic-gate 3861*7c478bd9Sstevel@tonic-gate /* 3862*7c478bd9Sstevel@tonic-gate * Free the access cache entries. 3863*7c478bd9Sstevel@tonic-gate */ 3864*7c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp); 3865*7c478bd9Sstevel@tonic-gate 3866*7c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) && 3867*7c478bd9Sstevel@tonic-gate contents == NULL && 3868*7c478bd9Sstevel@tonic-gate vsp == NULL && 3869*7c478bd9Sstevel@tonic-gate info == NULL) 3870*7c478bd9Sstevel@tonic-gate return (freed); 3871*7c478bd9Sstevel@tonic-gate 3872*7c478bd9Sstevel@tonic-gate /* 3873*7c478bd9Sstevel@tonic-gate * Free the readdir cache entries 3874*7c478bd9Sstevel@tonic-gate */ 3875*7c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 3876*7c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp)); 3877*7c478bd9Sstevel@tonic-gate 3878*7c478bd9Sstevel@tonic-gate /* 3879*7c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 3880*7c478bd9Sstevel@tonic-gate */ 3881*7c478bd9Sstevel@tonic-gate if (contents != NULL) { 3882*7c478bd9Sstevel@tonic-gate 3883*7c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 3884*7c478bd9Sstevel@tonic-gate } 3885*7c478bd9Sstevel@tonic-gate 3886*7c478bd9Sstevel@tonic-gate /* 3887*7c478bd9Sstevel@tonic-gate * Free any cached ACL. 3888*7c478bd9Sstevel@tonic-gate */ 3889*7c478bd9Sstevel@tonic-gate if (vsp != NULL) 3890*7c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 3891*7c478bd9Sstevel@tonic-gate 3892*7c478bd9Sstevel@tonic-gate /* 3893*7c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 3894*7c478bd9Sstevel@tonic-gate */ 3895*7c478bd9Sstevel@tonic-gate if (info != NULL) 3896*7c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 3897*7c478bd9Sstevel@tonic-gate 3898*7c478bd9Sstevel@tonic-gate return (1); 3899*7c478bd9Sstevel@tonic-gate } 3900*7c478bd9Sstevel@tonic-gate 3901*7c478bd9Sstevel@tonic-gate static int 3902*7c478bd9Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp) 3903*7c478bd9Sstevel@tonic-gate { 3904*7c478bd9Sstevel@tonic-gate char *contents; 3905*7c478bd9Sstevel@tonic-gate int size; 3906*7c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 3907*7c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 3908*7c478bd9Sstevel@tonic-gate int freed; 3909*7c478bd9Sstevel@tonic-gate 3910*7c478bd9Sstevel@tonic-gate /* 3911*7c478bd9Sstevel@tonic-gate * Free any held credentials and caches which 3912*7c478bd9Sstevel@tonic-gate * may be associated with this rnode. 3913*7c478bd9Sstevel@tonic-gate */ 3914*7c478bd9Sstevel@tonic-gate if (!mutex_tryenter(&rp->r_statelock)) 3915*7c478bd9Sstevel@tonic-gate return (0); 3916*7c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 3917*7c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 3918*7c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 3919*7c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 3920*7c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 3921*7c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 3922*7c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 3923*7c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3924*7c478bd9Sstevel@tonic-gate 3925*7c478bd9Sstevel@tonic-gate /* 3926*7c478bd9Sstevel@tonic-gate * Free the access cache entries. 3927*7c478bd9Sstevel@tonic-gate */ 3928*7c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp); 3929*7c478bd9Sstevel@tonic-gate 3930*7c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) && 3931*7c478bd9Sstevel@tonic-gate contents == NULL && 3932*7c478bd9Sstevel@tonic-gate vsp == NULL && 3933*7c478bd9Sstevel@tonic-gate info == NULL) 3934*7c478bd9Sstevel@tonic-gate return (freed); 3935*7c478bd9Sstevel@tonic-gate 3936*7c478bd9Sstevel@tonic-gate /* 3937*7c478bd9Sstevel@tonic-gate * Free the readdir cache entries 3938*7c478bd9Sstevel@tonic-gate */ 3939*7c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 3940*7c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp)); 3941*7c478bd9Sstevel@tonic-gate 3942*7c478bd9Sstevel@tonic-gate /* 3943*7c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 3944*7c478bd9Sstevel@tonic-gate */ 3945*7c478bd9Sstevel@tonic-gate if (contents != NULL) { 3946*7c478bd9Sstevel@tonic-gate 3947*7c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 3948*7c478bd9Sstevel@tonic-gate } 3949*7c478bd9Sstevel@tonic-gate 3950*7c478bd9Sstevel@tonic-gate /* 3951*7c478bd9Sstevel@tonic-gate * Free any cached ACL. 3952*7c478bd9Sstevel@tonic-gate */ 3953*7c478bd9Sstevel@tonic-gate if (vsp != NULL) 3954*7c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 3955*7c478bd9Sstevel@tonic-gate 3956*7c478bd9Sstevel@tonic-gate /* 3957*7c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 3958*7c478bd9Sstevel@tonic-gate */ 3959*7c478bd9Sstevel@tonic-gate if (info != NULL) 3960*7c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 3961*7c478bd9Sstevel@tonic-gate 3962*7c478bd9Sstevel@tonic-gate return (1); 3963*7c478bd9Sstevel@tonic-gate } 3964*7c478bd9Sstevel@tonic-gate 3965*7c478bd9Sstevel@tonic-gate static int 3966*7c478bd9Sstevel@tonic-gate nfs_free_reclaim(void) 3967*7c478bd9Sstevel@tonic-gate { 3968*7c478bd9Sstevel@tonic-gate int freed; 3969*7c478bd9Sstevel@tonic-gate rnode_t *rp; 3970*7c478bd9Sstevel@tonic-gate 3971*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3972*7c478bd9Sstevel@tonic-gate clstat_debug.f_reclaim.value.ui64++; 3973*7c478bd9Sstevel@tonic-gate #endif 3974*7c478bd9Sstevel@tonic-gate freed = 0; 3975*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 3976*7c478bd9Sstevel@tonic-gate rp = rpfreelist; 3977*7c478bd9Sstevel@tonic-gate if (rp != NULL) { 3978*7c478bd9Sstevel@tonic-gate do { 3979*7c478bd9Sstevel@tonic-gate if (nfs_free_data_reclaim(rp)) 3980*7c478bd9Sstevel@tonic-gate freed = 1; 3981*7c478bd9Sstevel@tonic-gate } while ((rp = rp->r_freef) != rpfreelist); 3982*7c478bd9Sstevel@tonic-gate } 3983*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 3984*7c478bd9Sstevel@tonic-gate return (freed); 3985*7c478bd9Sstevel@tonic-gate } 3986*7c478bd9Sstevel@tonic-gate 3987*7c478bd9Sstevel@tonic-gate static int 3988*7c478bd9Sstevel@tonic-gate nfs_active_reclaim(void) 3989*7c478bd9Sstevel@tonic-gate { 3990*7c478bd9Sstevel@tonic-gate int freed; 3991*7c478bd9Sstevel@tonic-gate int index; 3992*7c478bd9Sstevel@tonic-gate rnode_t *rp; 3993*7c478bd9Sstevel@tonic-gate 3994*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 3995*7c478bd9Sstevel@tonic-gate clstat_debug.a_reclaim.value.ui64++; 3996*7c478bd9Sstevel@tonic-gate #endif 3997*7c478bd9Sstevel@tonic-gate freed = 0; 3998*7c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 3999*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 4000*7c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 4001*7c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 4002*7c478bd9Sstevel@tonic-gate rp = rp->r_hashf) { 4003*7c478bd9Sstevel@tonic-gate if (nfs_active_data_reclaim(rp)) 4004*7c478bd9Sstevel@tonic-gate freed = 1; 4005*7c478bd9Sstevel@tonic-gate } 4006*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 4007*7c478bd9Sstevel@tonic-gate } 4008*7c478bd9Sstevel@tonic-gate return (freed); 4009*7c478bd9Sstevel@tonic-gate } 4010*7c478bd9Sstevel@tonic-gate 4011*7c478bd9Sstevel@tonic-gate static int 4012*7c478bd9Sstevel@tonic-gate nfs_rnode_reclaim(void) 4013*7c478bd9Sstevel@tonic-gate { 4014*7c478bd9Sstevel@tonic-gate int freed; 4015*7c478bd9Sstevel@tonic-gate rnode_t *rp; 4016*7c478bd9Sstevel@tonic-gate vnode_t *vp; 4017*7c478bd9Sstevel@tonic-gate 4018*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4019*7c478bd9Sstevel@tonic-gate clstat_debug.r_reclaim.value.ui64++; 4020*7c478bd9Sstevel@tonic-gate #endif 4021*7c478bd9Sstevel@tonic-gate freed = 0; 4022*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 4023*7c478bd9Sstevel@tonic-gate while ((rp = rpfreelist) != NULL) { 4024*7c478bd9Sstevel@tonic-gate rp_rmfree(rp); 4025*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 4026*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 4027*7c478bd9Sstevel@tonic-gate vp = RTOV(rp); 4028*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 4029*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 4030*7c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 4031*7c478bd9Sstevel@tonic-gate vp->v_count--; 4032*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 4033*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 4034*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 4035*7c478bd9Sstevel@tonic-gate continue; 4036*7c478bd9Sstevel@tonic-gate } 4037*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 4038*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 4039*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 4040*7c478bd9Sstevel@tonic-gate } 4041*7c478bd9Sstevel@tonic-gate /* 4042*7c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the 4043*7c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set 4044*7c478bd9Sstevel@tonic-gate * of checks done. 4045*7c478bd9Sstevel@tonic-gate */ 4046*7c478bd9Sstevel@tonic-gate rp_addfree(rp, CRED()); 4047*7c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 4048*7c478bd9Sstevel@tonic-gate } 4049*7c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 4050*7c478bd9Sstevel@tonic-gate return (freed); 4051*7c478bd9Sstevel@tonic-gate } 4052*7c478bd9Sstevel@tonic-gate 4053*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4054*7c478bd9Sstevel@tonic-gate static void 4055*7c478bd9Sstevel@tonic-gate nfs_reclaim(void *cdrarg) 4056*7c478bd9Sstevel@tonic-gate { 4057*7c478bd9Sstevel@tonic-gate 4058*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4059*7c478bd9Sstevel@tonic-gate clstat_debug.reclaim.value.ui64++; 4060*7c478bd9Sstevel@tonic-gate #endif 4061*7c478bd9Sstevel@tonic-gate if (nfs_free_reclaim()) 4062*7c478bd9Sstevel@tonic-gate return; 4063*7c478bd9Sstevel@tonic-gate 4064*7c478bd9Sstevel@tonic-gate if (nfs_active_reclaim()) 4065*7c478bd9Sstevel@tonic-gate return; 4066*7c478bd9Sstevel@tonic-gate 4067*7c478bd9Sstevel@tonic-gate (void) nfs_rnode_reclaim(); 4068*7c478bd9Sstevel@tonic-gate } 4069*7c478bd9Sstevel@tonic-gate 4070*7c478bd9Sstevel@tonic-gate /* 4071*7c478bd9Sstevel@tonic-gate * NFS client failover support 4072*7c478bd9Sstevel@tonic-gate * 4073*7c478bd9Sstevel@tonic-gate * Routines to copy filehandles 4074*7c478bd9Sstevel@tonic-gate */ 4075*7c478bd9Sstevel@tonic-gate void 4076*7c478bd9Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp) 4077*7c478bd9Sstevel@tonic-gate { 4078*7c478bd9Sstevel@tonic-gate fhandle_t *dest = (fhandle_t *)fhp; 4079*7c478bd9Sstevel@tonic-gate 4080*7c478bd9Sstevel@tonic-gate if (dest != NULL) 4081*7c478bd9Sstevel@tonic-gate *dest = *VTOFH(vp); 4082*7c478bd9Sstevel@tonic-gate } 4083*7c478bd9Sstevel@tonic-gate 4084*7c478bd9Sstevel@tonic-gate void 4085*7c478bd9Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp) 4086*7c478bd9Sstevel@tonic-gate { 4087*7c478bd9Sstevel@tonic-gate nfs_fh3 *dest = (nfs_fh3 *)fhp; 4088*7c478bd9Sstevel@tonic-gate 4089*7c478bd9Sstevel@tonic-gate if (dest != NULL) 4090*7c478bd9Sstevel@tonic-gate *dest = *VTOFH3(vp); 4091*7c478bd9Sstevel@tonic-gate } 4092*7c478bd9Sstevel@tonic-gate 4093*7c478bd9Sstevel@tonic-gate /* 4094*7c478bd9Sstevel@tonic-gate * NFS client failover support 4095*7c478bd9Sstevel@tonic-gate * 4096*7c478bd9Sstevel@tonic-gate * failover_safe() will test various conditions to ensure that 4097*7c478bd9Sstevel@tonic-gate * failover is permitted for this vnode. It will be denied 4098*7c478bd9Sstevel@tonic-gate * if: 4099*7c478bd9Sstevel@tonic-gate * 1) the operation in progress does not support failover (NULL fi) 4100*7c478bd9Sstevel@tonic-gate * 2) there are no available replicas (NULL mi_servers->sv_next) 4101*7c478bd9Sstevel@tonic-gate * 3) any locks are outstanding on this file 4102*7c478bd9Sstevel@tonic-gate */ 4103*7c478bd9Sstevel@tonic-gate static int 4104*7c478bd9Sstevel@tonic-gate failover_safe(failinfo_t *fi) 4105*7c478bd9Sstevel@tonic-gate { 4106*7c478bd9Sstevel@tonic-gate 4107*7c478bd9Sstevel@tonic-gate /* 4108*7c478bd9Sstevel@tonic-gate * Does this op permit failover? 4109*7c478bd9Sstevel@tonic-gate */ 4110*7c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL) 4111*7c478bd9Sstevel@tonic-gate return (0); 4112*7c478bd9Sstevel@tonic-gate 4113*7c478bd9Sstevel@tonic-gate /* 4114*7c478bd9Sstevel@tonic-gate * Are there any alternates to failover to? 4115*7c478bd9Sstevel@tonic-gate */ 4116*7c478bd9Sstevel@tonic-gate if (VTOMI(fi->vp)->mi_servers->sv_next == NULL) 4117*7c478bd9Sstevel@tonic-gate return (0); 4118*7c478bd9Sstevel@tonic-gate 4119*7c478bd9Sstevel@tonic-gate /* 4120*7c478bd9Sstevel@tonic-gate * Disable check; we've forced local locking 4121*7c478bd9Sstevel@tonic-gate * 4122*7c478bd9Sstevel@tonic-gate * if (flk_has_remote_locks(fi->vp)) 4123*7c478bd9Sstevel@tonic-gate * return (0); 4124*7c478bd9Sstevel@tonic-gate */ 4125*7c478bd9Sstevel@tonic-gate 4126*7c478bd9Sstevel@tonic-gate /* 4127*7c478bd9Sstevel@tonic-gate * If we have no partial path, we can't do anything 4128*7c478bd9Sstevel@tonic-gate */ 4129*7c478bd9Sstevel@tonic-gate if (VTOR(fi->vp)->r_path == NULL) 4130*7c478bd9Sstevel@tonic-gate return (0); 4131*7c478bd9Sstevel@tonic-gate 4132*7c478bd9Sstevel@tonic-gate return (1); 4133*7c478bd9Sstevel@tonic-gate } 4134*7c478bd9Sstevel@tonic-gate 4135*7c478bd9Sstevel@tonic-gate #include <sys/thread.h> 4136*7c478bd9Sstevel@tonic-gate 4137*7c478bd9Sstevel@tonic-gate /* 4138*7c478bd9Sstevel@tonic-gate * NFS client failover support 4139*7c478bd9Sstevel@tonic-gate * 4140*7c478bd9Sstevel@tonic-gate * failover_newserver() will start a search for a new server, 4141*7c478bd9Sstevel@tonic-gate * preferably by starting an async thread to do the work. If 4142*7c478bd9Sstevel@tonic-gate * someone is already doing this (recognizable by MI_BINDINPROG 4143*7c478bd9Sstevel@tonic-gate * being set), it will simply return and the calling thread 4144*7c478bd9Sstevel@tonic-gate * will queue on the mi_failover_cv condition variable. 4145*7c478bd9Sstevel@tonic-gate */ 4146*7c478bd9Sstevel@tonic-gate static void 4147*7c478bd9Sstevel@tonic-gate failover_newserver(mntinfo_t *mi) 4148*7c478bd9Sstevel@tonic-gate { 4149*7c478bd9Sstevel@tonic-gate /* 4150*7c478bd9Sstevel@tonic-gate * Check if someone else is doing this already 4151*7c478bd9Sstevel@tonic-gate */ 4152*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4153*7c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_BINDINPROG) { 4154*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4155*7c478bd9Sstevel@tonic-gate return; 4156*7c478bd9Sstevel@tonic-gate } 4157*7c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_BINDINPROG; 4158*7c478bd9Sstevel@tonic-gate 4159*7c478bd9Sstevel@tonic-gate /* 4160*7c478bd9Sstevel@tonic-gate * Need to hold the vfs struct so that it can't be released 4161*7c478bd9Sstevel@tonic-gate * while the failover thread is selecting a new server. 4162*7c478bd9Sstevel@tonic-gate */ 4163*7c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp); 4164*7c478bd9Sstevel@tonic-gate 4165*7c478bd9Sstevel@tonic-gate /* 4166*7c478bd9Sstevel@tonic-gate * Start a thread to do the real searching. 4167*7c478bd9Sstevel@tonic-gate */ 4168*7c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri); 4169*7c478bd9Sstevel@tonic-gate 4170*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4171*7c478bd9Sstevel@tonic-gate } 4172*7c478bd9Sstevel@tonic-gate 4173*7c478bd9Sstevel@tonic-gate /* 4174*7c478bd9Sstevel@tonic-gate * NFS client failover support 4175*7c478bd9Sstevel@tonic-gate * 4176*7c478bd9Sstevel@tonic-gate * failover_thread() will find a new server to replace the one 4177*7c478bd9Sstevel@tonic-gate * currently in use, wake up other threads waiting on this mount 4178*7c478bd9Sstevel@tonic-gate * point, and die. It will start at the head of the server list 4179*7c478bd9Sstevel@tonic-gate * and poll servers until it finds one with an NFS server which is 4180*7c478bd9Sstevel@tonic-gate * registered and responds to a NULL procedure ping. 4181*7c478bd9Sstevel@tonic-gate * 4182*7c478bd9Sstevel@tonic-gate * XXX failover_thread is unsafe within the scope of the 4183*7c478bd9Sstevel@tonic-gate * present model defined for cpr to suspend the system. 4184*7c478bd9Sstevel@tonic-gate * Specifically, over-the-wire calls made by the thread 4185*7c478bd9Sstevel@tonic-gate * are unsafe. The thread needs to be reevaluated in case of 4186*7c478bd9Sstevel@tonic-gate * future updates to the cpr suspend model. 4187*7c478bd9Sstevel@tonic-gate */ 4188*7c478bd9Sstevel@tonic-gate static void 4189*7c478bd9Sstevel@tonic-gate failover_thread(mntinfo_t *mi) 4190*7c478bd9Sstevel@tonic-gate { 4191*7c478bd9Sstevel@tonic-gate servinfo_t *svp = NULL; 4192*7c478bd9Sstevel@tonic-gate CLIENT *cl; 4193*7c478bd9Sstevel@tonic-gate enum clnt_stat status; 4194*7c478bd9Sstevel@tonic-gate struct timeval tv; 4195*7c478bd9Sstevel@tonic-gate int error; 4196*7c478bd9Sstevel@tonic-gate int oncethru = 0; 4197*7c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 4198*7c478bd9Sstevel@tonic-gate rnode_t *rp; 4199*7c478bd9Sstevel@tonic-gate int index; 4200*7c478bd9Sstevel@tonic-gate char *srvnames; 4201*7c478bd9Sstevel@tonic-gate size_t srvnames_len; 4202*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = NULL; 4203*7c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 4204*7c478bd9Sstevel@tonic-gate 4205*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4206*7c478bd9Sstevel@tonic-gate /* 4207*7c478bd9Sstevel@tonic-gate * This is currently only needed to access counters which exist on 4208*7c478bd9Sstevel@tonic-gate * DEBUG kernels, hence we don't want to pay the penalty of the lookup 4209*7c478bd9Sstevel@tonic-gate * on non-DEBUG kernels. 4210*7c478bd9Sstevel@tonic-gate */ 4211*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 4212*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 4213*7c478bd9Sstevel@tonic-gate #endif 4214*7c478bd9Sstevel@tonic-gate 4215*7c478bd9Sstevel@tonic-gate /* 4216*7c478bd9Sstevel@tonic-gate * Its safe to piggyback on the mi_lock since failover_newserver() 4217*7c478bd9Sstevel@tonic-gate * code guarantees that there will be only one failover thread 4218*7c478bd9Sstevel@tonic-gate * per mountinfo at any instance. 4219*7c478bd9Sstevel@tonic-gate */ 4220*7c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr, 4221*7c478bd9Sstevel@tonic-gate "failover_thread"); 4222*7c478bd9Sstevel@tonic-gate 4223*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4224*7c478bd9Sstevel@tonic-gate while (mi->mi_readers) { 4225*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 4226*7c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_failover_cv, &mi->mi_lock); 4227*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock); 4228*7c478bd9Sstevel@tonic-gate } 4229*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4230*7c478bd9Sstevel@tonic-gate 4231*7c478bd9Sstevel@tonic-gate tv.tv_sec = 2; 4232*7c478bd9Sstevel@tonic-gate tv.tv_usec = 0; 4233*7c478bd9Sstevel@tonic-gate 4234*7c478bd9Sstevel@tonic-gate /* 4235*7c478bd9Sstevel@tonic-gate * Ping the null NFS procedure of every server in 4236*7c478bd9Sstevel@tonic-gate * the list until one responds. We always start 4237*7c478bd9Sstevel@tonic-gate * at the head of the list and always skip the one 4238*7c478bd9Sstevel@tonic-gate * that is current, since it's caused us a problem. 4239*7c478bd9Sstevel@tonic-gate */ 4240*7c478bd9Sstevel@tonic-gate while (svp == NULL) { 4241*7c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp; svp = svp->sv_next) { 4242*7c478bd9Sstevel@tonic-gate if (!oncethru && svp == mi->mi_curr_serv) 4243*7c478bd9Sstevel@tonic-gate continue; 4244*7c478bd9Sstevel@tonic-gate 4245*7c478bd9Sstevel@tonic-gate /* 4246*7c478bd9Sstevel@tonic-gate * If the file system was forcibly umounted 4247*7c478bd9Sstevel@tonic-gate * while trying to do a failover, then just 4248*7c478bd9Sstevel@tonic-gate * give up on the failover. It won't matter 4249*7c478bd9Sstevel@tonic-gate * what the server is. 4250*7c478bd9Sstevel@tonic-gate */ 4251*7c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 4252*7c478bd9Sstevel@tonic-gate svp = NULL; 4253*7c478bd9Sstevel@tonic-gate goto done; 4254*7c478bd9Sstevel@tonic-gate } 4255*7c478bd9Sstevel@tonic-gate 4256*7c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, 4257*7c478bd9Sstevel@tonic-gate NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl); 4258*7c478bd9Sstevel@tonic-gate if (error) 4259*7c478bd9Sstevel@tonic-gate continue; 4260*7c478bd9Sstevel@tonic-gate 4261*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 4262*7c478bd9Sstevel@tonic-gate cl->cl_nosignal = TRUE; 4263*7c478bd9Sstevel@tonic-gate status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, 4264*7c478bd9Sstevel@tonic-gate xdr_void, NULL, tv); 4265*7c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 4266*7c478bd9Sstevel@tonic-gate cl->cl_nosignal = FALSE; 4267*7c478bd9Sstevel@tonic-gate AUTH_DESTROY(cl->cl_auth); 4268*7c478bd9Sstevel@tonic-gate CLNT_DESTROY(cl); 4269*7c478bd9Sstevel@tonic-gate if (status == RPC_SUCCESS) { 4270*7c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) { 4271*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4272*7c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 4273*7c478bd9Sstevel@tonic-gate "NFS%d: failing over: selecting original server %s", 4274*7c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 4275*7c478bd9Sstevel@tonic-gate #else 4276*7c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 4277*7c478bd9Sstevel@tonic-gate "NFS: failing over: selecting original server %s", 4278*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 4279*7c478bd9Sstevel@tonic-gate #endif 4280*7c478bd9Sstevel@tonic-gate } else { 4281*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4282*7c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 4283*7c478bd9Sstevel@tonic-gate "NFS%d: failing over from %s to %s", 4284*7c478bd9Sstevel@tonic-gate mi->mi_vers, 4285*7c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname, 4286*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 4287*7c478bd9Sstevel@tonic-gate #else 4288*7c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 4289*7c478bd9Sstevel@tonic-gate "NFS: failing over from %s to %s", 4290*7c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname, 4291*7c478bd9Sstevel@tonic-gate svp->sv_hostname); 4292*7c478bd9Sstevel@tonic-gate #endif 4293*7c478bd9Sstevel@tonic-gate } 4294*7c478bd9Sstevel@tonic-gate break; 4295*7c478bd9Sstevel@tonic-gate } 4296*7c478bd9Sstevel@tonic-gate } 4297*7c478bd9Sstevel@tonic-gate 4298*7c478bd9Sstevel@tonic-gate if (svp == NULL) { 4299*7c478bd9Sstevel@tonic-gate if (!oncethru) { 4300*7c478bd9Sstevel@tonic-gate srvnames = nfs_getsrvnames(mi, &srvnames_len); 4301*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4302*7c478bd9Sstevel@tonic-gate zprintf(zoneid, 4303*7c478bd9Sstevel@tonic-gate "NFS%d servers %s not responding " 4304*7c478bd9Sstevel@tonic-gate "still trying\n", mi->mi_vers, srvnames); 4305*7c478bd9Sstevel@tonic-gate #else 4306*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s not responding " 4307*7c478bd9Sstevel@tonic-gate "still trying\n", srvnames); 4308*7c478bd9Sstevel@tonic-gate #endif 4309*7c478bd9Sstevel@tonic-gate oncethru = 1; 4310*7c478bd9Sstevel@tonic-gate } 4311*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4312*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 4313*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4314*7c478bd9Sstevel@tonic-gate delay(hz); 4315*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4316*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock); 4317*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4318*7c478bd9Sstevel@tonic-gate } 4319*7c478bd9Sstevel@tonic-gate } 4320*7c478bd9Sstevel@tonic-gate 4321*7c478bd9Sstevel@tonic-gate if (oncethru) { 4322*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4323*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames); 4324*7c478bd9Sstevel@tonic-gate #else 4325*7c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s ok\n", srvnames); 4326*7c478bd9Sstevel@tonic-gate #endif 4327*7c478bd9Sstevel@tonic-gate } 4328*7c478bd9Sstevel@tonic-gate 4329*7c478bd9Sstevel@tonic-gate if (svp != mi->mi_curr_serv) { 4330*7c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0); 4331*7c478bd9Sstevel@tonic-gate index = rtablehash(&mi->mi_curr_serv->sv_fhandle); 4332*7c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_WRITER); 4333*7c478bd9Sstevel@tonic-gate rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle, 4334*7c478bd9Sstevel@tonic-gate mi->mi_vfsp); 4335*7c478bd9Sstevel@tonic-gate if (rp != NULL) { 4336*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) 4337*7c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 4338*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 4339*7c478bd9Sstevel@tonic-gate rp->r_server = svp; 4340*7c478bd9Sstevel@tonic-gate rp->r_fh = svp->sv_fhandle; 4341*7c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp); 4342*7c478bd9Sstevel@tonic-gate index = rtablehash(&rp->r_fh); 4343*7c478bd9Sstevel@tonic-gate rp->r_hashq = &rtable[index]; 4344*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 4345*7c478bd9Sstevel@tonic-gate vn_exists(RTOV(rp)); 4346*7c478bd9Sstevel@tonic-gate rp_addhash(rp); 4347*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 4348*7c478bd9Sstevel@tonic-gate VN_RELE(RTOV(rp)); 4349*7c478bd9Sstevel@tonic-gate } else 4350*7c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 4351*7c478bd9Sstevel@tonic-gate } 4352*7c478bd9Sstevel@tonic-gate 4353*7c478bd9Sstevel@tonic-gate done: 4354*7c478bd9Sstevel@tonic-gate if (oncethru) 4355*7c478bd9Sstevel@tonic-gate kmem_free(srvnames, srvnames_len); 4356*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4357*7c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_BINDINPROG; 4358*7c478bd9Sstevel@tonic-gate if (svp != NULL) { 4359*7c478bd9Sstevel@tonic-gate mi->mi_curr_serv = svp; 4360*7c478bd9Sstevel@tonic-gate mi->mi_failover++; 4361*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4362*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.failover.value.ui64++; 4363*7c478bd9Sstevel@tonic-gate #endif 4364*7c478bd9Sstevel@tonic-gate } 4365*7c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); 4366*7c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 4367*7c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 4368*7c478bd9Sstevel@tonic-gate zthread_exit(); 4369*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 4370*7c478bd9Sstevel@tonic-gate } 4371*7c478bd9Sstevel@tonic-gate 4372*7c478bd9Sstevel@tonic-gate /* 4373*7c478bd9Sstevel@tonic-gate * NFS client failover support 4374*7c478bd9Sstevel@tonic-gate * 4375*7c478bd9Sstevel@tonic-gate * failover_wait() will put the thread to sleep until MI_BINDINPROG 4376*7c478bd9Sstevel@tonic-gate * is cleared, meaning that failover is complete. Called with 4377*7c478bd9Sstevel@tonic-gate * mi_lock mutex held. 4378*7c478bd9Sstevel@tonic-gate */ 4379*7c478bd9Sstevel@tonic-gate static int 4380*7c478bd9Sstevel@tonic-gate failover_wait(mntinfo_t *mi) 4381*7c478bd9Sstevel@tonic-gate { 4382*7c478bd9Sstevel@tonic-gate k_sigset_t smask; 4383*7c478bd9Sstevel@tonic-gate 4384*7c478bd9Sstevel@tonic-gate /* 4385*7c478bd9Sstevel@tonic-gate * If someone else is hunting for a living server, 4386*7c478bd9Sstevel@tonic-gate * sleep until it's done. After our sleep, we may 4387*7c478bd9Sstevel@tonic-gate * be bound to the right server and get off cheaply. 4388*7c478bd9Sstevel@tonic-gate */ 4389*7c478bd9Sstevel@tonic-gate while (mi->mi_flags & MI_BINDINPROG) { 4390*7c478bd9Sstevel@tonic-gate /* 4391*7c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 4392*7c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 4393*7c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 4394*7c478bd9Sstevel@tonic-gate */ 4395*7c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 4396*7c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) { 4397*7c478bd9Sstevel@tonic-gate /* 4398*7c478bd9Sstevel@tonic-gate * restore original signal mask 4399*7c478bd9Sstevel@tonic-gate */ 4400*7c478bd9Sstevel@tonic-gate sigunintr(&smask); 4401*7c478bd9Sstevel@tonic-gate return (EINTR); 4402*7c478bd9Sstevel@tonic-gate } 4403*7c478bd9Sstevel@tonic-gate /* 4404*7c478bd9Sstevel@tonic-gate * restore original signal mask 4405*7c478bd9Sstevel@tonic-gate */ 4406*7c478bd9Sstevel@tonic-gate sigunintr(&smask); 4407*7c478bd9Sstevel@tonic-gate } 4408*7c478bd9Sstevel@tonic-gate return (0); 4409*7c478bd9Sstevel@tonic-gate } 4410*7c478bd9Sstevel@tonic-gate 4411*7c478bd9Sstevel@tonic-gate /* 4412*7c478bd9Sstevel@tonic-gate * NFS client failover support 4413*7c478bd9Sstevel@tonic-gate * 4414*7c478bd9Sstevel@tonic-gate * failover_remap() will do a partial pathname lookup and find the 4415*7c478bd9Sstevel@tonic-gate * desired vnode on the current server. The interim vnode will be 4416*7c478bd9Sstevel@tonic-gate * discarded after we pilfer the new filehandle. 4417*7c478bd9Sstevel@tonic-gate * 4418*7c478bd9Sstevel@tonic-gate * Side effects: 4419*7c478bd9Sstevel@tonic-gate * - This routine will also update the filehandle in the args structure 4420*7c478bd9Sstevel@tonic-gate * pointed to by the fi->fhp pointer if it is non-NULL. 4421*7c478bd9Sstevel@tonic-gate */ 4422*7c478bd9Sstevel@tonic-gate 4423*7c478bd9Sstevel@tonic-gate static int 4424*7c478bd9Sstevel@tonic-gate failover_remap(failinfo_t *fi) 4425*7c478bd9Sstevel@tonic-gate { 4426*7c478bd9Sstevel@tonic-gate vnode_t *vp, *nvp, *rootvp; 4427*7c478bd9Sstevel@tonic-gate rnode_t *rp, *nrp; 4428*7c478bd9Sstevel@tonic-gate mntinfo_t *mi; 4429*7c478bd9Sstevel@tonic-gate int error; 4430*7c478bd9Sstevel@tonic-gate int index; 4431*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4432*7c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 4433*7c478bd9Sstevel@tonic-gate 4434*7c478bd9Sstevel@tonic-gate nfscl = zone_getspecific(nfsclnt_zone_key, curproc->p_zone); 4435*7c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 4436*7c478bd9Sstevel@tonic-gate #endif 4437*7c478bd9Sstevel@tonic-gate /* 4438*7c478bd9Sstevel@tonic-gate * Sanity check 4439*7c478bd9Sstevel@tonic-gate */ 4440*7c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL) 4441*7c478bd9Sstevel@tonic-gate return (EINVAL); 4442*7c478bd9Sstevel@tonic-gate vp = fi->vp; 4443*7c478bd9Sstevel@tonic-gate rp = VTOR(vp); 4444*7c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 4445*7c478bd9Sstevel@tonic-gate 4446*7c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) { 4447*7c478bd9Sstevel@tonic-gate /* 4448*7c478bd9Sstevel@tonic-gate * Given the root fh, use the path stored in 4449*7c478bd9Sstevel@tonic-gate * the rnode to find the fh for the new server. 4450*7c478bd9Sstevel@tonic-gate */ 4451*7c478bd9Sstevel@tonic-gate error = VFS_ROOT(mi->mi_vfsp, &rootvp); 4452*7c478bd9Sstevel@tonic-gate if (error) 4453*7c478bd9Sstevel@tonic-gate return (error); 4454*7c478bd9Sstevel@tonic-gate 4455*7c478bd9Sstevel@tonic-gate error = failover_lookup(rp->r_path, rootvp, 4456*7c478bd9Sstevel@tonic-gate fi->lookupproc, fi->xattrdirproc, &nvp); 4457*7c478bd9Sstevel@tonic-gate 4458*7c478bd9Sstevel@tonic-gate VN_RELE(rootvp); 4459*7c478bd9Sstevel@tonic-gate 4460*7c478bd9Sstevel@tonic-gate if (error) 4461*7c478bd9Sstevel@tonic-gate return (error); 4462*7c478bd9Sstevel@tonic-gate 4463*7c478bd9Sstevel@tonic-gate /* 4464*7c478bd9Sstevel@tonic-gate * If we found the same rnode, we're done now 4465*7c478bd9Sstevel@tonic-gate */ 4466*7c478bd9Sstevel@tonic-gate if (nvp == vp) { 4467*7c478bd9Sstevel@tonic-gate /* 4468*7c478bd9Sstevel@tonic-gate * Failed and the new server may physically be same 4469*7c478bd9Sstevel@tonic-gate * OR may share a same disk subsystem. In this case 4470*7c478bd9Sstevel@tonic-gate * file handle for a particular file path is not going 4471*7c478bd9Sstevel@tonic-gate * to change, given the same filehandle lookup will 4472*7c478bd9Sstevel@tonic-gate * always locate the same rnode as the existing one. 4473*7c478bd9Sstevel@tonic-gate * All we might need to do is to update the r_server 4474*7c478bd9Sstevel@tonic-gate * with the current servinfo. 4475*7c478bd9Sstevel@tonic-gate */ 4476*7c478bd9Sstevel@tonic-gate if (!VALID_FH(fi)) { 4477*7c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 4478*7c478bd9Sstevel@tonic-gate } 4479*7c478bd9Sstevel@tonic-gate VN_RELE(nvp); 4480*7c478bd9Sstevel@tonic-gate return (0); 4481*7c478bd9Sstevel@tonic-gate } 4482*7c478bd9Sstevel@tonic-gate 4483*7c478bd9Sstevel@tonic-gate /* 4484*7c478bd9Sstevel@tonic-gate * Try to make it so that no one else will find this 4485*7c478bd9Sstevel@tonic-gate * vnode because it is just a temporary to hold the 4486*7c478bd9Sstevel@tonic-gate * new file handle until that file handle can be 4487*7c478bd9Sstevel@tonic-gate * copied to the original vnode/rnode. 4488*7c478bd9Sstevel@tonic-gate */ 4489*7c478bd9Sstevel@tonic-gate nrp = VTOR(nvp); 4490*7c478bd9Sstevel@tonic-gate if (nrp->r_flags & RHASHED) 4491*7c478bd9Sstevel@tonic-gate rp_rmhash(nrp); 4492*7c478bd9Sstevel@tonic-gate 4493*7c478bd9Sstevel@tonic-gate /* 4494*7c478bd9Sstevel@tonic-gate * As a heuristic check on the validity of the new 4495*7c478bd9Sstevel@tonic-gate * file, check that the size and type match against 4496*7c478bd9Sstevel@tonic-gate * that we remember from the old version. 4497*7c478bd9Sstevel@tonic-gate */ 4498*7c478bd9Sstevel@tonic-gate if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) { 4499*7c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_WARN, 4500*7c478bd9Sstevel@tonic-gate "NFS replicas %s and %s: file %s not same.", 4501*7c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname, 4502*7c478bd9Sstevel@tonic-gate nrp->r_server->sv_hostname, rp->r_path); 4503*7c478bd9Sstevel@tonic-gate VN_RELE(nvp); 4504*7c478bd9Sstevel@tonic-gate return (EINVAL); 4505*7c478bd9Sstevel@tonic-gate } 4506*7c478bd9Sstevel@tonic-gate 4507*7c478bd9Sstevel@tonic-gate /* 4508*7c478bd9Sstevel@tonic-gate * snarf the filehandle from the new rnode 4509*7c478bd9Sstevel@tonic-gate * then release it, again while updating the 4510*7c478bd9Sstevel@tonic-gate * hash queues for the rnode. 4511*7c478bd9Sstevel@tonic-gate */ 4512*7c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) 4513*7c478bd9Sstevel@tonic-gate rp_rmhash(rp); 4514*7c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 4515*7c478bd9Sstevel@tonic-gate rp->r_fh = nrp->r_fh; 4516*7c478bd9Sstevel@tonic-gate index = rtablehash(&rp->r_fh); 4517*7c478bd9Sstevel@tonic-gate rp->r_hashq = &rtable[index]; 4518*7c478bd9Sstevel@tonic-gate /* 4519*7c478bd9Sstevel@tonic-gate * Copy the attributes from the new rnode to the old 4520*7c478bd9Sstevel@tonic-gate * rnode. This will help to reduce unnecessary page 4521*7c478bd9Sstevel@tonic-gate * cache flushes. 4522*7c478bd9Sstevel@tonic-gate */ 4523*7c478bd9Sstevel@tonic-gate rp->r_attr = nrp->r_attr; 4524*7c478bd9Sstevel@tonic-gate rp->r_attrtime = nrp->r_attrtime; 4525*7c478bd9Sstevel@tonic-gate rp->r_mtime = nrp->r_mtime; 4526*7c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp); 4527*7c478bd9Sstevel@tonic-gate nfs_setswaplike(vp, &rp->r_attr); 4528*7c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 4529*7c478bd9Sstevel@tonic-gate rp_addhash(rp); 4530*7c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 4531*7c478bd9Sstevel@tonic-gate VN_RELE(nvp); 4532*7c478bd9Sstevel@tonic-gate } 4533*7c478bd9Sstevel@tonic-gate 4534*7c478bd9Sstevel@tonic-gate /* 4535*7c478bd9Sstevel@tonic-gate * Update successful failover remap count 4536*7c478bd9Sstevel@tonic-gate */ 4537*7c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4538*7c478bd9Sstevel@tonic-gate mi->mi_remap++; 4539*7c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4540*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4541*7c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.remap.value.ui64++; 4542*7c478bd9Sstevel@tonic-gate #endif 4543*7c478bd9Sstevel@tonic-gate 4544*7c478bd9Sstevel@tonic-gate /* 4545*7c478bd9Sstevel@tonic-gate * If we have a copied filehandle to update, do it now. 4546*7c478bd9Sstevel@tonic-gate */ 4547*7c478bd9Sstevel@tonic-gate if (fi->fhp != NULL && fi->copyproc != NULL) 4548*7c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, vp); 4549*7c478bd9Sstevel@tonic-gate 4550*7c478bd9Sstevel@tonic-gate return (0); 4551*7c478bd9Sstevel@tonic-gate } 4552*7c478bd9Sstevel@tonic-gate 4553*7c478bd9Sstevel@tonic-gate /* 4554*7c478bd9Sstevel@tonic-gate * NFS client failover support 4555*7c478bd9Sstevel@tonic-gate * 4556*7c478bd9Sstevel@tonic-gate * We want a simple pathname lookup routine to parse the pieces 4557*7c478bd9Sstevel@tonic-gate * of path in rp->r_path. We know that the path was a created 4558*7c478bd9Sstevel@tonic-gate * as rnodes were made, so we know we have only to deal with 4559*7c478bd9Sstevel@tonic-gate * paths that look like: 4560*7c478bd9Sstevel@tonic-gate * dir1/dir2/dir3/file 4561*7c478bd9Sstevel@tonic-gate * Any evidence of anything like .., symlinks, and ENOTDIR 4562*7c478bd9Sstevel@tonic-gate * are hard errors, because they mean something in this filesystem 4563*7c478bd9Sstevel@tonic-gate * is different from the one we came from, or has changed under 4564*7c478bd9Sstevel@tonic-gate * us in some way. If this is true, we want the failure. 4565*7c478bd9Sstevel@tonic-gate * 4566*7c478bd9Sstevel@tonic-gate * Extended attributes: if the filesystem is mounted with extended 4567*7c478bd9Sstevel@tonic-gate * attributes enabled (-o xattr), the attribute directory will be 4568*7c478bd9Sstevel@tonic-gate * represented in the r_path as the magic name XATTR_RPATH. So if 4569*7c478bd9Sstevel@tonic-gate * we see that name in the pathname, is must be because this node 4570*7c478bd9Sstevel@tonic-gate * is an extended attribute. Therefore, look it up that way. 4571*7c478bd9Sstevel@tonic-gate */ 4572*7c478bd9Sstevel@tonic-gate static int 4573*7c478bd9Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root, 4574*7c478bd9Sstevel@tonic-gate int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int, 4575*7c478bd9Sstevel@tonic-gate vnode_t *, cred_t *, int), 4576*7c478bd9Sstevel@tonic-gate int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int), 4577*7c478bd9Sstevel@tonic-gate vnode_t **new) 4578*7c478bd9Sstevel@tonic-gate { 4579*7c478bd9Sstevel@tonic-gate vnode_t *dvp, *nvp; 4580*7c478bd9Sstevel@tonic-gate int error = EINVAL; 4581*7c478bd9Sstevel@tonic-gate char *s, *p, *tmppath; 4582*7c478bd9Sstevel@tonic-gate size_t len; 4583*7c478bd9Sstevel@tonic-gate mntinfo_t *mi; 4584*7c478bd9Sstevel@tonic-gate bool_t xattr; 4585*7c478bd9Sstevel@tonic-gate 4586*7c478bd9Sstevel@tonic-gate /* Make local copy of path */ 4587*7c478bd9Sstevel@tonic-gate len = strlen(path) + 1; 4588*7c478bd9Sstevel@tonic-gate tmppath = kmem_alloc(len, KM_SLEEP); 4589*7c478bd9Sstevel@tonic-gate (void) strcpy(tmppath, path); 4590*7c478bd9Sstevel@tonic-gate s = tmppath; 4591*7c478bd9Sstevel@tonic-gate 4592*7c478bd9Sstevel@tonic-gate dvp = root; 4593*7c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 4594*7c478bd9Sstevel@tonic-gate mi = VTOMI(root); 4595*7c478bd9Sstevel@tonic-gate xattr = mi->mi_flags & MI_EXTATTR; 4596*7c478bd9Sstevel@tonic-gate 4597*7c478bd9Sstevel@tonic-gate do { 4598*7c478bd9Sstevel@tonic-gate p = strchr(s, '/'); 4599*7c478bd9Sstevel@tonic-gate if (p != NULL) 4600*7c478bd9Sstevel@tonic-gate *p = '\0'; 4601*7c478bd9Sstevel@tonic-gate if (xattr && strcmp(s, XATTR_RPATH) == 0) { 4602*7c478bd9Sstevel@tonic-gate error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(), 4603*7c478bd9Sstevel@tonic-gate RFSCALL_SOFT); 4604*7c478bd9Sstevel@tonic-gate } else { 4605*7c478bd9Sstevel@tonic-gate error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL, 4606*7c478bd9Sstevel@tonic-gate CRED(), RFSCALL_SOFT); 4607*7c478bd9Sstevel@tonic-gate } 4608*7c478bd9Sstevel@tonic-gate if (p != NULL) 4609*7c478bd9Sstevel@tonic-gate *p++ = '/'; 4610*7c478bd9Sstevel@tonic-gate if (error) { 4611*7c478bd9Sstevel@tonic-gate VN_RELE(dvp); 4612*7c478bd9Sstevel@tonic-gate kmem_free(tmppath, len); 4613*7c478bd9Sstevel@tonic-gate return (error); 4614*7c478bd9Sstevel@tonic-gate } 4615*7c478bd9Sstevel@tonic-gate s = p; 4616*7c478bd9Sstevel@tonic-gate VN_RELE(dvp); 4617*7c478bd9Sstevel@tonic-gate dvp = nvp; 4618*7c478bd9Sstevel@tonic-gate } while (p != NULL); 4619*7c478bd9Sstevel@tonic-gate 4620*7c478bd9Sstevel@tonic-gate if (nvp != NULL && new != NULL) 4621*7c478bd9Sstevel@tonic-gate *new = nvp; 4622*7c478bd9Sstevel@tonic-gate kmem_free(tmppath, len); 4623*7c478bd9Sstevel@tonic-gate return (0); 4624*7c478bd9Sstevel@tonic-gate } 4625*7c478bd9Sstevel@tonic-gate 4626*7c478bd9Sstevel@tonic-gate /* 4627*7c478bd9Sstevel@tonic-gate * NFS client failover support 4628*7c478bd9Sstevel@tonic-gate * 4629*7c478bd9Sstevel@tonic-gate * sv_free() frees the malloc'd portion of a "servinfo_t". 4630*7c478bd9Sstevel@tonic-gate */ 4631*7c478bd9Sstevel@tonic-gate void 4632*7c478bd9Sstevel@tonic-gate sv_free(servinfo_t *svp) 4633*7c478bd9Sstevel@tonic-gate { 4634*7c478bd9Sstevel@tonic-gate servinfo_t *next; 4635*7c478bd9Sstevel@tonic-gate struct knetconfig *knconf; 4636*7c478bd9Sstevel@tonic-gate 4637*7c478bd9Sstevel@tonic-gate while (svp != NULL) { 4638*7c478bd9Sstevel@tonic-gate next = svp->sv_next; 4639*7c478bd9Sstevel@tonic-gate if (svp->sv_secdata) 4640*7c478bd9Sstevel@tonic-gate sec_clnt_freeinfo(svp->sv_secdata); 4641*7c478bd9Sstevel@tonic-gate if (svp->sv_hostname && svp->sv_hostnamelen > 0) 4642*7c478bd9Sstevel@tonic-gate kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 4643*7c478bd9Sstevel@tonic-gate knconf = svp->sv_knconf; 4644*7c478bd9Sstevel@tonic-gate if (knconf != NULL) { 4645*7c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL) 4646*7c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 4647*7c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL) 4648*7c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE); 4649*7c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf)); 4650*7c478bd9Sstevel@tonic-gate } 4651*7c478bd9Sstevel@tonic-gate knconf = svp->sv_origknconf; 4652*7c478bd9Sstevel@tonic-gate if (knconf != NULL) { 4653*7c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL) 4654*7c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 4655*7c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL) 4656*7c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE); 4657*7c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf)); 4658*7c478bd9Sstevel@tonic-gate } 4659*7c478bd9Sstevel@tonic-gate if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 4660*7c478bd9Sstevel@tonic-gate kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 4661*7c478bd9Sstevel@tonic-gate mutex_destroy(&svp->sv_lock); 4662*7c478bd9Sstevel@tonic-gate kmem_free(svp, sizeof (*svp)); 4663*7c478bd9Sstevel@tonic-gate svp = next; 4664*7c478bd9Sstevel@tonic-gate } 4665*7c478bd9Sstevel@tonic-gate } 4666*7c478bd9Sstevel@tonic-gate 4667*7c478bd9Sstevel@tonic-gate /* 4668*7c478bd9Sstevel@tonic-gate * Only can return non-zero if intr != 0. 4669*7c478bd9Sstevel@tonic-gate */ 4670*7c478bd9Sstevel@tonic-gate int 4671*7c478bd9Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr) 4672*7c478bd9Sstevel@tonic-gate { 4673*7c478bd9Sstevel@tonic-gate 4674*7c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 4675*7c478bd9Sstevel@tonic-gate 4676*7c478bd9Sstevel@tonic-gate /* 4677*7c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There 4678*7c478bd9Sstevel@tonic-gate * must be as many exits as enters through. 4679*7c478bd9Sstevel@tonic-gate */ 4680*7c478bd9Sstevel@tonic-gate if (l->owner == curthread) { 4681*7c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */ 4682*7c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER); 4683*7c478bd9Sstevel@tonic-gate l->count--; 4684*7c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) { 4685*7c478bd9Sstevel@tonic-gate /* 4686*7c478bd9Sstevel@tonic-gate * While there is a writer active or writers waiting, 4687*7c478bd9Sstevel@tonic-gate * then wait for them to finish up and move on. Then, 4688*7c478bd9Sstevel@tonic-gate * increment the count to indicate that a reader is 4689*7c478bd9Sstevel@tonic-gate * active. 4690*7c478bd9Sstevel@tonic-gate */ 4691*7c478bd9Sstevel@tonic-gate while (l->count < 0 || l->waiters > 0) { 4692*7c478bd9Sstevel@tonic-gate if (intr) { 4693*7c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 4694*7c478bd9Sstevel@tonic-gate 4695*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4696*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 4697*7c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&l->cv, &l->lock)) { 4698*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4699*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 4700*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4701*7c478bd9Sstevel@tonic-gate return (EINTR); 4702*7c478bd9Sstevel@tonic-gate } 4703*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4704*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 4705*7c478bd9Sstevel@tonic-gate } else 4706*7c478bd9Sstevel@tonic-gate cv_wait(&l->cv, &l->lock); 4707*7c478bd9Sstevel@tonic-gate } 4708*7c478bd9Sstevel@tonic-gate ASSERT(l->count < INT_MAX); 4709*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 4710*7c478bd9Sstevel@tonic-gate if ((l->count % 10000) == 9999) 4711*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on" 4712*7c478bd9Sstevel@tonic-gate "rwlock @ %p\n", l->count, (void *)&l); 4713*7c478bd9Sstevel@tonic-gate #endif 4714*7c478bd9Sstevel@tonic-gate l->count++; 4715*7c478bd9Sstevel@tonic-gate } else { 4716*7c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 4717*7c478bd9Sstevel@tonic-gate /* 4718*7c478bd9Sstevel@tonic-gate * While there are readers active or a writer 4719*7c478bd9Sstevel@tonic-gate * active, then wait for all of the readers 4720*7c478bd9Sstevel@tonic-gate * to finish or for the writer to finish. 4721*7c478bd9Sstevel@tonic-gate * Then, set the owner field to curthread and 4722*7c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer 4723*7c478bd9Sstevel@tonic-gate * is active. 4724*7c478bd9Sstevel@tonic-gate */ 4725*7c478bd9Sstevel@tonic-gate while (l->count > 0 || l->owner != NULL) { 4726*7c478bd9Sstevel@tonic-gate l->waiters++; 4727*7c478bd9Sstevel@tonic-gate if (intr) { 4728*7c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 4729*7c478bd9Sstevel@tonic-gate 4730*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4731*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 4732*7c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&l->cv, &l->lock)) { 4733*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4734*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 4735*7c478bd9Sstevel@tonic-gate l->waiters--; 4736*7c478bd9Sstevel@tonic-gate cv_broadcast(&l->cv); 4737*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4738*7c478bd9Sstevel@tonic-gate return (EINTR); 4739*7c478bd9Sstevel@tonic-gate } 4740*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 4741*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 4742*7c478bd9Sstevel@tonic-gate } else 4743*7c478bd9Sstevel@tonic-gate cv_wait(&l->cv, &l->lock); 4744*7c478bd9Sstevel@tonic-gate l->waiters--; 4745*7c478bd9Sstevel@tonic-gate } 4746*7c478bd9Sstevel@tonic-gate l->owner = curthread; 4747*7c478bd9Sstevel@tonic-gate l->count--; 4748*7c478bd9Sstevel@tonic-gate } 4749*7c478bd9Sstevel@tonic-gate 4750*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4751*7c478bd9Sstevel@tonic-gate 4752*7c478bd9Sstevel@tonic-gate return (0); 4753*7c478bd9Sstevel@tonic-gate } 4754*7c478bd9Sstevel@tonic-gate 4755*7c478bd9Sstevel@tonic-gate /* 4756*7c478bd9Sstevel@tonic-gate * If the lock is available, obtain it and return non-zero. If there is 4757*7c478bd9Sstevel@tonic-gate * already a conflicting lock, return 0 immediately. 4758*7c478bd9Sstevel@tonic-gate */ 4759*7c478bd9Sstevel@tonic-gate 4760*7c478bd9Sstevel@tonic-gate int 4761*7c478bd9Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw) 4762*7c478bd9Sstevel@tonic-gate { 4763*7c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 4764*7c478bd9Sstevel@tonic-gate 4765*7c478bd9Sstevel@tonic-gate /* 4766*7c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There 4767*7c478bd9Sstevel@tonic-gate * must be as many exits as enters through. 4768*7c478bd9Sstevel@tonic-gate */ 4769*7c478bd9Sstevel@tonic-gate if (l->owner == curthread) { 4770*7c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */ 4771*7c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER); 4772*7c478bd9Sstevel@tonic-gate l->count--; 4773*7c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) { 4774*7c478bd9Sstevel@tonic-gate /* 4775*7c478bd9Sstevel@tonic-gate * If there is a writer active or writers waiting, deny the 4776*7c478bd9Sstevel@tonic-gate * lock. Otherwise, bump the count of readers. 4777*7c478bd9Sstevel@tonic-gate */ 4778*7c478bd9Sstevel@tonic-gate if (l->count < 0 || l->waiters > 0) { 4779*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4780*7c478bd9Sstevel@tonic-gate return (0); 4781*7c478bd9Sstevel@tonic-gate } 4782*7c478bd9Sstevel@tonic-gate l->count++; 4783*7c478bd9Sstevel@tonic-gate } else { 4784*7c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 4785*7c478bd9Sstevel@tonic-gate /* 4786*7c478bd9Sstevel@tonic-gate * If there are readers active or a writer active, deny the 4787*7c478bd9Sstevel@tonic-gate * lock. Otherwise, set the owner field to curthread and 4788*7c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer is active. 4789*7c478bd9Sstevel@tonic-gate */ 4790*7c478bd9Sstevel@tonic-gate if (l->count > 0 || l->owner != NULL) { 4791*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4792*7c478bd9Sstevel@tonic-gate return (0); 4793*7c478bd9Sstevel@tonic-gate } 4794*7c478bd9Sstevel@tonic-gate l->owner = curthread; 4795*7c478bd9Sstevel@tonic-gate l->count--; 4796*7c478bd9Sstevel@tonic-gate } 4797*7c478bd9Sstevel@tonic-gate 4798*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4799*7c478bd9Sstevel@tonic-gate 4800*7c478bd9Sstevel@tonic-gate return (1); 4801*7c478bd9Sstevel@tonic-gate } 4802*7c478bd9Sstevel@tonic-gate 4803*7c478bd9Sstevel@tonic-gate void 4804*7c478bd9Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l) 4805*7c478bd9Sstevel@tonic-gate { 4806*7c478bd9Sstevel@tonic-gate 4807*7c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 4808*7c478bd9Sstevel@tonic-gate /* 4809*7c478bd9Sstevel@tonic-gate * If this is releasing a writer lock, then increment count to 4810*7c478bd9Sstevel@tonic-gate * indicate that there is one less writer active. If this was 4811*7c478bd9Sstevel@tonic-gate * the last of possibly nested writer locks, then clear the owner 4812*7c478bd9Sstevel@tonic-gate * field as well to indicate that there is no writer active 4813*7c478bd9Sstevel@tonic-gate * and wakeup any possible waiting writers or readers. 4814*7c478bd9Sstevel@tonic-gate * 4815*7c478bd9Sstevel@tonic-gate * If releasing a reader lock, then just decrement count to 4816*7c478bd9Sstevel@tonic-gate * indicate that there is one less reader active. If this was 4817*7c478bd9Sstevel@tonic-gate * the last active reader and there are writer(s) waiting, 4818*7c478bd9Sstevel@tonic-gate * then wake up the first. 4819*7c478bd9Sstevel@tonic-gate */ 4820*7c478bd9Sstevel@tonic-gate if (l->owner != NULL) { 4821*7c478bd9Sstevel@tonic-gate ASSERT(l->owner == curthread); 4822*7c478bd9Sstevel@tonic-gate l->count++; 4823*7c478bd9Sstevel@tonic-gate if (l->count == 0) { 4824*7c478bd9Sstevel@tonic-gate l->owner = NULL; 4825*7c478bd9Sstevel@tonic-gate cv_broadcast(&l->cv); 4826*7c478bd9Sstevel@tonic-gate } 4827*7c478bd9Sstevel@tonic-gate } else { 4828*7c478bd9Sstevel@tonic-gate ASSERT(l->count > 0); 4829*7c478bd9Sstevel@tonic-gate l->count--; 4830*7c478bd9Sstevel@tonic-gate if (l->count == 0 && l->waiters > 0) 4831*7c478bd9Sstevel@tonic-gate cv_broadcast(&l->cv); 4832*7c478bd9Sstevel@tonic-gate } 4833*7c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 4834*7c478bd9Sstevel@tonic-gate } 4835*7c478bd9Sstevel@tonic-gate 4836*7c478bd9Sstevel@tonic-gate int 4837*7c478bd9Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw) 4838*7c478bd9Sstevel@tonic-gate { 4839*7c478bd9Sstevel@tonic-gate 4840*7c478bd9Sstevel@tonic-gate if (rw == RW_READER) 4841*7c478bd9Sstevel@tonic-gate return (l->count > 0); 4842*7c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 4843*7c478bd9Sstevel@tonic-gate return (l->count < 0); 4844*7c478bd9Sstevel@tonic-gate } 4845*7c478bd9Sstevel@tonic-gate 4846*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4847*7c478bd9Sstevel@tonic-gate void 4848*7c478bd9Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg) 4849*7c478bd9Sstevel@tonic-gate { 4850*7c478bd9Sstevel@tonic-gate 4851*7c478bd9Sstevel@tonic-gate l->count = 0; 4852*7c478bd9Sstevel@tonic-gate l->waiters = 0; 4853*7c478bd9Sstevel@tonic-gate l->owner = NULL; 4854*7c478bd9Sstevel@tonic-gate mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL); 4855*7c478bd9Sstevel@tonic-gate cv_init(&l->cv, NULL, CV_DEFAULT, NULL); 4856*7c478bd9Sstevel@tonic-gate } 4857*7c478bd9Sstevel@tonic-gate 4858*7c478bd9Sstevel@tonic-gate void 4859*7c478bd9Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l) 4860*7c478bd9Sstevel@tonic-gate { 4861*7c478bd9Sstevel@tonic-gate 4862*7c478bd9Sstevel@tonic-gate mutex_destroy(&l->lock); 4863*7c478bd9Sstevel@tonic-gate cv_destroy(&l->cv); 4864*7c478bd9Sstevel@tonic-gate } 4865*7c478bd9Sstevel@tonic-gate 4866*7c478bd9Sstevel@tonic-gate int 4867*7c478bd9Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y) 4868*7c478bd9Sstevel@tonic-gate { 4869*7c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x; 4870*7c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y; 4871*7c478bd9Sstevel@tonic-gate 4872*7c478bd9Sstevel@tonic-gate if (a->nfs3_cookie == b->nfs3_cookie) { 4873*7c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen) 4874*7c478bd9Sstevel@tonic-gate return (0); 4875*7c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen) 4876*7c478bd9Sstevel@tonic-gate return (-1); 4877*7c478bd9Sstevel@tonic-gate return (1); 4878*7c478bd9Sstevel@tonic-gate } 4879*7c478bd9Sstevel@tonic-gate 4880*7c478bd9Sstevel@tonic-gate if (a->nfs3_cookie < b->nfs3_cookie) 4881*7c478bd9Sstevel@tonic-gate return (-1); 4882*7c478bd9Sstevel@tonic-gate 4883*7c478bd9Sstevel@tonic-gate return (1); 4884*7c478bd9Sstevel@tonic-gate } 4885*7c478bd9Sstevel@tonic-gate 4886*7c478bd9Sstevel@tonic-gate int 4887*7c478bd9Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y) 4888*7c478bd9Sstevel@tonic-gate { 4889*7c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x; 4890*7c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y; 4891*7c478bd9Sstevel@tonic-gate 4892*7c478bd9Sstevel@tonic-gate if (a->nfs_cookie == b->nfs_cookie) { 4893*7c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen) 4894*7c478bd9Sstevel@tonic-gate return (0); 4895*7c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen) 4896*7c478bd9Sstevel@tonic-gate return (-1); 4897*7c478bd9Sstevel@tonic-gate return (1); 4898*7c478bd9Sstevel@tonic-gate } 4899*7c478bd9Sstevel@tonic-gate 4900*7c478bd9Sstevel@tonic-gate if (a->nfs_cookie < b->nfs_cookie) 4901*7c478bd9Sstevel@tonic-gate return (-1); 4902*7c478bd9Sstevel@tonic-gate 4903*7c478bd9Sstevel@tonic-gate return (1); 4904*7c478bd9Sstevel@tonic-gate } 4905*7c478bd9Sstevel@tonic-gate 4906*7c478bd9Sstevel@tonic-gate static char * 4907*7c478bd9Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len) 4908*7c478bd9Sstevel@tonic-gate { 4909*7c478bd9Sstevel@tonic-gate servinfo_t *s; 4910*7c478bd9Sstevel@tonic-gate char *srvnames; 4911*7c478bd9Sstevel@tonic-gate char *namep; 4912*7c478bd9Sstevel@tonic-gate size_t length; 4913*7c478bd9Sstevel@tonic-gate 4914*7c478bd9Sstevel@tonic-gate /* 4915*7c478bd9Sstevel@tonic-gate * Calculate the length of the string required to hold all 4916*7c478bd9Sstevel@tonic-gate * of the server names plus either a comma or a null 4917*7c478bd9Sstevel@tonic-gate * character following each individual one. 4918*7c478bd9Sstevel@tonic-gate */ 4919*7c478bd9Sstevel@tonic-gate length = 0; 4920*7c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next) 4921*7c478bd9Sstevel@tonic-gate length += s->sv_hostnamelen; 4922*7c478bd9Sstevel@tonic-gate 4923*7c478bd9Sstevel@tonic-gate srvnames = kmem_alloc(length, KM_SLEEP); 4924*7c478bd9Sstevel@tonic-gate 4925*7c478bd9Sstevel@tonic-gate namep = srvnames; 4926*7c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next) { 4927*7c478bd9Sstevel@tonic-gate (void) strcpy(namep, s->sv_hostname); 4928*7c478bd9Sstevel@tonic-gate namep += s->sv_hostnamelen - 1; 4929*7c478bd9Sstevel@tonic-gate *namep++ = ','; 4930*7c478bd9Sstevel@tonic-gate } 4931*7c478bd9Sstevel@tonic-gate *--namep = '\0'; 4932*7c478bd9Sstevel@tonic-gate 4933*7c478bd9Sstevel@tonic-gate *len = length; 4934*7c478bd9Sstevel@tonic-gate 4935*7c478bd9Sstevel@tonic-gate return (srvnames); 4936*7c478bd9Sstevel@tonic-gate } 4937