19ec7b004SRick Macklem /*- 29ec7b004SRick Macklem * Copyright (c) 1989, 1993 39ec7b004SRick Macklem * The Regents of the University of California. All rights reserved. 49ec7b004SRick Macklem * 59ec7b004SRick Macklem * This code is derived from software contributed to Berkeley by 69ec7b004SRick Macklem * Rick Macklem at The University of Guelph. 79ec7b004SRick Macklem * 89ec7b004SRick Macklem * Redistribution and use in source and binary forms, with or without 99ec7b004SRick Macklem * modification, are permitted provided that the following conditions 109ec7b004SRick Macklem * are met: 119ec7b004SRick Macklem * 1. Redistributions of source code must retain the above copyright 129ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer. 139ec7b004SRick Macklem * 2. Redistributions in binary form must reproduce the above copyright 149ec7b004SRick Macklem * notice, this list of conditions and the following disclaimer in the 159ec7b004SRick Macklem * documentation and/or other materials provided with the distribution. 169ec7b004SRick Macklem * 4. Neither the name of the University nor the names of its contributors 179ec7b004SRick Macklem * may be used to endorse or promote products derived from this software 189ec7b004SRick Macklem * without specific prior written permission. 199ec7b004SRick Macklem * 209ec7b004SRick Macklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 219ec7b004SRick Macklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 229ec7b004SRick Macklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 239ec7b004SRick Macklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 249ec7b004SRick Macklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 259ec7b004SRick Macklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 269ec7b004SRick Macklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 279ec7b004SRick Macklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 289ec7b004SRick Macklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 299ec7b004SRick Macklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 309ec7b004SRick Macklem * SUCH DAMAGE. 319ec7b004SRick Macklem * 329ec7b004SRick Macklem */ 339ec7b004SRick Macklem 349ec7b004SRick Macklem #include <sys/cdefs.h> 359ec7b004SRick Macklem __FBSDID("$FreeBSD$"); 369ec7b004SRick Macklem 379ec7b004SRick Macklem /* 389ec7b004SRick Macklem * Here is the basic algorithm: 399ec7b004SRick Macklem * First, some design criteria I used: 409ec7b004SRick Macklem * - I think a false hit is more serious than a false miss 419ec7b004SRick Macklem * - A false hit for an RPC that has Op(s) that order via seqid# must be 429ec7b004SRick Macklem * avoided at all cost 439ec7b004SRick Macklem * - A valid hit will probably happen a long time after the original reply 449ec7b004SRick Macklem * and the TCP socket that the original request was received on will no 459ec7b004SRick Macklem * longer be active 469ec7b004SRick Macklem * (The long time delay implies to me that LRU is not appropriate.) 479ec7b004SRick Macklem * - The mechanism will satisfy the requirements of ordering Ops with seqid#s 489ec7b004SRick Macklem * in them as well as minimizing the risk of redoing retried non-idempotent 499ec7b004SRick Macklem * Ops. 509ec7b004SRick Macklem * Because it is biased towards avoiding false hits, multiple entries with 519ec7b004SRick Macklem * the same xid are to be expected, especially for the case of the entry 529ec7b004SRick Macklem * in the cache being related to a seqid# sequenced Op. 539ec7b004SRick Macklem * 549ec7b004SRick Macklem * The basic algorithm I'm about to code up: 559ec7b004SRick Macklem * - Null RPCs bypass the cache and are just done 569ec7b004SRick Macklem * For TCP 579ec7b004SRick Macklem * - key on <xid, NFS version> (as noted above, there can be several 589ec7b004SRick Macklem * entries with the same key) 599ec7b004SRick Macklem * When a request arrives: 609ec7b004SRick Macklem * For all that match key 619ec7b004SRick Macklem * - if RPC# != OR request_size != 629ec7b004SRick Macklem * - not a match with this one 639ec7b004SRick Macklem * - if NFSv4 and received on same TCP socket OR 649ec7b004SRick Macklem * received on a TCP connection created before the 659ec7b004SRick Macklem * entry was cached 669ec7b004SRick Macklem * - not a match with this one 679ec7b004SRick Macklem * (V2,3 clients might retry on same TCP socket) 689ec7b004SRick Macklem * - calculate checksum on first N bytes of NFS XDR 699ec7b004SRick Macklem * - if checksum != 709ec7b004SRick Macklem * - not a match for this one 719ec7b004SRick Macklem * If any of the remaining ones that match has a 729ec7b004SRick Macklem * seqid_refcnt > 0 739ec7b004SRick Macklem * - not a match (go do RPC, using new cache entry) 749ec7b004SRick Macklem * If one match left 759ec7b004SRick Macklem * - a hit (reply from cache) 769ec7b004SRick Macklem * else 779ec7b004SRick Macklem * - miss (go do RPC, using new cache entry) 789ec7b004SRick Macklem * 799ec7b004SRick Macklem * During processing of NFSv4 request: 809ec7b004SRick Macklem * - set a flag when a non-idempotent Op is processed 819ec7b004SRick Macklem * - when an Op that uses a seqid# (Open,...) is processed 829ec7b004SRick Macklem * - if same seqid# as referenced entry in cache 839ec7b004SRick Macklem * - free new cache entry 849ec7b004SRick Macklem * - reply from referenced cache entry 859ec7b004SRick Macklem * else if next seqid# in order 869ec7b004SRick Macklem * - free referenced cache entry 879ec7b004SRick Macklem * - increment seqid_refcnt on new cache entry 889ec7b004SRick Macklem * - set pointer from Openowner/Lockowner to 899ec7b004SRick Macklem * new cache entry (aka reference it) 909ec7b004SRick Macklem * else if first seqid# in sequence 919ec7b004SRick Macklem * - increment seqid_refcnt on new cache entry 929ec7b004SRick Macklem * - set pointer from Openowner/Lockowner to 939ec7b004SRick Macklem * new cache entry (aka reference it) 949ec7b004SRick Macklem * 959ec7b004SRick Macklem * At end of RPC processing: 969ec7b004SRick Macklem * - if seqid_refcnt > 0 OR flagged non-idempotent on new 979ec7b004SRick Macklem * cache entry 989ec7b004SRick Macklem * - save reply in cache entry 999ec7b004SRick Macklem * - calculate checksum on first N bytes of NFS XDR 1009ec7b004SRick Macklem * request 1019ec7b004SRick Macklem * - note op and length of XDR request (in bytes) 1029ec7b004SRick Macklem * - timestamp it 1039ec7b004SRick Macklem * else 1049ec7b004SRick Macklem * - free new cache entry 1059ec7b004SRick Macklem * - Send reply (noting info for socket activity check, below) 1069ec7b004SRick Macklem * 1079ec7b004SRick Macklem * For cache entries saved above: 1089ec7b004SRick Macklem * - if saved since seqid_refcnt was > 0 1099ec7b004SRick Macklem * - free when seqid_refcnt decrements to 0 1109ec7b004SRick Macklem * (when next one in sequence is processed above, or 1119ec7b004SRick Macklem * when Openowner/Lockowner is discarded) 1129ec7b004SRick Macklem * else { non-idempotent Op(s) } 1139ec7b004SRick Macklem * - free when 1149ec7b004SRick Macklem * - some further activity observed on same 1159ec7b004SRick Macklem * socket 1169ec7b004SRick Macklem * (I'm not yet sure how I'm going to do 1179ec7b004SRick Macklem * this. Maybe look at the TCP connection 1189ec7b004SRick Macklem * to see if the send_tcp_sequence# is well 1199ec7b004SRick Macklem * past sent reply OR K additional RPCs 1209ec7b004SRick Macklem * replied on same socket OR?) 1219ec7b004SRick Macklem * OR 1229ec7b004SRick Macklem * - when very old (hours, days, weeks?) 1239ec7b004SRick Macklem * 1249ec7b004SRick Macklem * For UDP (v2, 3 only), pretty much the old way: 1259ec7b004SRick Macklem * - key on <xid, NFS version, RPC#, Client host ip#> 1269ec7b004SRick Macklem * (at most one entry for each key) 1279ec7b004SRick Macklem * 1289ec7b004SRick Macklem * When a Request arrives: 1299ec7b004SRick Macklem * - if a match with entry via key 1309ec7b004SRick Macklem * - if RPC marked In_progress 1319ec7b004SRick Macklem * - discard request (don't send reply) 1329ec7b004SRick Macklem * else 1339ec7b004SRick Macklem * - reply from cache 1349ec7b004SRick Macklem * - timestamp cache entry 1359ec7b004SRick Macklem * else 1369ec7b004SRick Macklem * - add entry to cache, marked In_progress 1379ec7b004SRick Macklem * - do RPC 1389ec7b004SRick Macklem * - when RPC done 1399ec7b004SRick Macklem * - if RPC# non-idempotent 1409ec7b004SRick Macklem * - mark entry Done (not In_progress) 1419ec7b004SRick Macklem * - save reply 1429ec7b004SRick Macklem * - timestamp cache entry 1439ec7b004SRick Macklem * else 1449ec7b004SRick Macklem * - free cache entry 1459ec7b004SRick Macklem * - send reply 1469ec7b004SRick Macklem * 1479ec7b004SRick Macklem * Later, entries with saved replies are free'd a short time (few minutes) 1489ec7b004SRick Macklem * after reply sent (timestamp). 1499ec7b004SRick Macklem * Reference: Chet Juszczak, "Improving the Performance and Correctness 1509ec7b004SRick Macklem * of an NFS Server", in Proc. Winter 1989 USENIX Conference, 1519ec7b004SRick Macklem * pages 53-63. San Diego, February 1989. 1529ec7b004SRick Macklem * for the UDP case. 1539ec7b004SRick Macklem * nfsrc_floodlevel is set to the allowable upper limit for saved replies 1549ec7b004SRick Macklem * for TCP. For V3, a reply won't be saved when the flood level is 1559ec7b004SRick Macklem * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in 1569ec7b004SRick Macklem * that case. This level should be set high enough that this almost 1579ec7b004SRick Macklem * never happens. 1589ec7b004SRick Macklem */ 1599ec7b004SRick Macklem #ifndef APPLEKEXT 1609ec7b004SRick Macklem #include <fs/nfs/nfsport.h> 1619ec7b004SRick Macklem 1629ec7b004SRick Macklem extern struct nfsstats newnfsstats; 16393c5875bSRick Macklem extern struct mtx nfsrc_udpmtx; 16493c5875bSRick Macklem extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 1659ec7b004SRick Macklem int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; 1669ec7b004SRick Macklem #endif /* !APPLEKEXT */ 1679ec7b004SRick Macklem 16893c5875bSRick Macklem SYSCTL_DECL(_vfs_nfsd); 16993c5875bSRick Macklem 17093c5875bSRick Macklem static u_int nfsrc_tcphighwater = 0; 17193c5875bSRick Macklem static int 17293c5875bSRick Macklem sysctl_tcphighwater(SYSCTL_HANDLER_ARGS) 17393c5875bSRick Macklem { 17493c5875bSRick Macklem int error, newhighwater; 17593c5875bSRick Macklem 17693c5875bSRick Macklem newhighwater = nfsrc_tcphighwater; 17793c5875bSRick Macklem error = sysctl_handle_int(oidp, &newhighwater, 0, req); 17893c5875bSRick Macklem if (error != 0 || req->newptr == NULL) 17993c5875bSRick Macklem return (error); 18093c5875bSRick Macklem if (newhighwater < 0) 18193c5875bSRick Macklem return (EINVAL); 18293c5875bSRick Macklem if (newhighwater >= nfsrc_floodlevel) 18393c5875bSRick Macklem nfsrc_floodlevel = newhighwater + newhighwater / 5; 18493c5875bSRick Macklem nfsrc_tcphighwater = newhighwater; 18593c5875bSRick Macklem return (0); 18693c5875bSRick Macklem } 18793c5875bSRick Macklem SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0, 18893c5875bSRick Macklem sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU", 18993c5875bSRick Macklem "High water mark for TCP cache entries"); 19093c5875bSRick Macklem 19193c5875bSRick Macklem static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER; 19293c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW, 19393c5875bSRick Macklem &nfsrc_udphighwater, 0, 19493c5875bSRick Macklem "High water mark for UDP cache entries"); 19593c5875bSRick Macklem static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT; 19693c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW, 19793c5875bSRick Macklem &nfsrc_tcptimeout, 0, 19893c5875bSRick Macklem "Timeout for TCP entries in the DRC"); 19993c5875bSRick Macklem static u_int nfsrc_tcpnonidempotent = 1; 20093c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, 20193c5875bSRick Macklem &nfsrc_tcpnonidempotent, 0, 20293c5875bSRick Macklem "Enable the DRC for NFS over TCP"); 20393c5875bSRick Macklem 20493c5875bSRick Macklem static int nfsrc_udpcachesize = 0; 2059ec7b004SRick Macklem static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; 20693c5875bSRick Macklem static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; 20793c5875bSRick Macklem 2089ec7b004SRick Macklem /* 2099ec7b004SRick Macklem * and the reverse mapping from generic to Version 2 procedure numbers 2109ec7b004SRick Macklem */ 2119ec7b004SRick Macklem static int newnfsv2_procid[NFS_V3NPROCS] = { 2129ec7b004SRick Macklem NFSV2PROC_NULL, 2139ec7b004SRick Macklem NFSV2PROC_GETATTR, 2149ec7b004SRick Macklem NFSV2PROC_SETATTR, 2159ec7b004SRick Macklem NFSV2PROC_LOOKUP, 2169ec7b004SRick Macklem NFSV2PROC_NOOP, 2179ec7b004SRick Macklem NFSV2PROC_READLINK, 2189ec7b004SRick Macklem NFSV2PROC_READ, 2199ec7b004SRick Macklem NFSV2PROC_WRITE, 2209ec7b004SRick Macklem NFSV2PROC_CREATE, 2219ec7b004SRick Macklem NFSV2PROC_MKDIR, 2229ec7b004SRick Macklem NFSV2PROC_SYMLINK, 2239ec7b004SRick Macklem NFSV2PROC_CREATE, 2249ec7b004SRick Macklem NFSV2PROC_REMOVE, 2259ec7b004SRick Macklem NFSV2PROC_RMDIR, 2269ec7b004SRick Macklem NFSV2PROC_RENAME, 2279ec7b004SRick Macklem NFSV2PROC_LINK, 2289ec7b004SRick Macklem NFSV2PROC_READDIR, 2299ec7b004SRick Macklem NFSV2PROC_NOOP, 2309ec7b004SRick Macklem NFSV2PROC_STATFS, 2319ec7b004SRick Macklem NFSV2PROC_NOOP, 2329ec7b004SRick Macklem NFSV2PROC_NOOP, 2339ec7b004SRick Macklem NFSV2PROC_NOOP, 2349ec7b004SRick Macklem }; 2359ec7b004SRick Macklem 23693c5875bSRick Macklem #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) 2379ec7b004SRick Macklem #define NFSRCUDPHASH(xid) \ 23893c5875bSRick Macklem (&nfsrvudphashtbl[nfsrc_hash(xid)]) 2399ec7b004SRick Macklem #define NFSRCHASH(xid) \ 24093c5875bSRick Macklem (&nfsrchash_table[nfsrc_hash(xid)].tbl) 2419ec7b004SRick Macklem #define TRUE 1 2429ec7b004SRick Macklem #define FALSE 0 2439ec7b004SRick Macklem #define NFSRVCACHE_CHECKLEN 100 2449ec7b004SRick Macklem 2459ec7b004SRick Macklem /* True iff the rpc reply is an nfs status ONLY! */ 2469ec7b004SRick Macklem static int nfsv2_repstat[NFS_V3NPROCS] = { 2479ec7b004SRick Macklem FALSE, 2489ec7b004SRick Macklem FALSE, 2499ec7b004SRick Macklem FALSE, 2509ec7b004SRick Macklem FALSE, 2519ec7b004SRick Macklem FALSE, 2529ec7b004SRick Macklem FALSE, 2539ec7b004SRick Macklem FALSE, 2549ec7b004SRick Macklem FALSE, 2559ec7b004SRick Macklem FALSE, 2569ec7b004SRick Macklem FALSE, 2579ec7b004SRick Macklem TRUE, 2589ec7b004SRick Macklem TRUE, 2599ec7b004SRick Macklem TRUE, 2609ec7b004SRick Macklem TRUE, 2619ec7b004SRick Macklem FALSE, 2629ec7b004SRick Macklem TRUE, 2639ec7b004SRick Macklem FALSE, 2649ec7b004SRick Macklem FALSE, 2659ec7b004SRick Macklem FALSE, 2669ec7b004SRick Macklem FALSE, 2679ec7b004SRick Macklem FALSE, 2689ec7b004SRick Macklem FALSE, 2699ec7b004SRick Macklem }; 2709ec7b004SRick Macklem 2719ec7b004SRick Macklem /* 2729ec7b004SRick Macklem * Will NFS want to work over IPv6 someday? 2739ec7b004SRick Macklem */ 2749ec7b004SRick Macklem #define NETFAMILY(rp) \ 2759ec7b004SRick Macklem (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET) 2769ec7b004SRick Macklem 2779ec7b004SRick Macklem /* local functions */ 2789ec7b004SRick Macklem static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 2799ec7b004SRick Macklem static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); 2809ec7b004SRick Macklem static void nfsrc_lock(struct nfsrvcache *rp); 2819ec7b004SRick Macklem static void nfsrc_unlock(struct nfsrvcache *rp); 2829ec7b004SRick Macklem static void nfsrc_wanted(struct nfsrvcache *rp); 2839ec7b004SRick Macklem static void nfsrc_freecache(struct nfsrvcache *rp); 2849ec7b004SRick Macklem static void nfsrc_trimcache(u_int64_t, struct socket *); 2859ec7b004SRick Macklem static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t, 2869ec7b004SRick Macklem struct socket *); 2879ec7b004SRick Macklem static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); 2889ec7b004SRick Macklem static void nfsrc_marksametcpconn(u_int64_t); 2899ec7b004SRick Macklem 2909ec7b004SRick Macklem /* 29193c5875bSRick Macklem * Return the correct mutex for this cache entry. 29293c5875bSRick Macklem */ 29393c5875bSRick Macklem static __inline struct mtx * 29493c5875bSRick Macklem nfsrc_cachemutex(struct nfsrvcache *rp) 29593c5875bSRick Macklem { 29693c5875bSRick Macklem 29793c5875bSRick Macklem if ((rp->rc_flag & RC_UDP) != 0) 29893c5875bSRick Macklem return (&nfsrc_udpmtx); 29993c5875bSRick Macklem return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); 30093c5875bSRick Macklem } 30193c5875bSRick Macklem 30293c5875bSRick Macklem /* 3039ec7b004SRick Macklem * Initialize the server request cache list 3049ec7b004SRick Macklem */ 3059ec7b004SRick Macklem APPLESTATIC void 3069ec7b004SRick Macklem nfsrvd_initcache(void) 3079ec7b004SRick Macklem { 3089ec7b004SRick Macklem int i; 3099ec7b004SRick Macklem static int inited = 0; 3109ec7b004SRick Macklem 3119ec7b004SRick Macklem if (inited) 3129ec7b004SRick Macklem return; 3139ec7b004SRick Macklem inited = 1; 3149ec7b004SRick Macklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 3159ec7b004SRick Macklem LIST_INIT(&nfsrvudphashtbl[i]); 31693c5875bSRick Macklem LIST_INIT(&nfsrchash_table[i].tbl); 3179ec7b004SRick Macklem } 3189ec7b004SRick Macklem TAILQ_INIT(&nfsrvudplru); 3199ec7b004SRick Macklem nfsrc_tcpsavedreplies = 0; 3209ec7b004SRick Macklem nfsrc_udpcachesize = 0; 3219ec7b004SRick Macklem newnfsstats.srvcache_tcppeak = 0; 3229ec7b004SRick Macklem newnfsstats.srvcache_size = 0; 3239ec7b004SRick Macklem } 3249ec7b004SRick Macklem 3259ec7b004SRick Macklem /* 3269ec7b004SRick Macklem * Get a cache entry for this request. Basically just malloc a new one 3279ec7b004SRick Macklem * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest. 3289ec7b004SRick Macklem * Call nfsrc_trimcache() to clean up the cache before returning. 3299ec7b004SRick Macklem */ 3309ec7b004SRick Macklem APPLESTATIC int 3319ec7b004SRick Macklem nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so) 3329ec7b004SRick Macklem { 3339ec7b004SRick Macklem struct nfsrvcache *newrp; 3349ec7b004SRick Macklem int ret; 3359ec7b004SRick Macklem 3369ec7b004SRick Macklem if (nd->nd_procnum == NFSPROC_NULL) 3379ec7b004SRick Macklem panic("nfsd cache null"); 3389ec7b004SRick Macklem MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache), 3399ec7b004SRick Macklem M_NFSRVCACHE, M_WAITOK); 3409ec7b004SRick Macklem NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache)); 3419ec7b004SRick Macklem if (nd->nd_flag & ND_NFSV4) 3429ec7b004SRick Macklem newrp->rc_flag = RC_NFSV4; 3439ec7b004SRick Macklem else if (nd->nd_flag & ND_NFSV3) 3449ec7b004SRick Macklem newrp->rc_flag = RC_NFSV3; 3459ec7b004SRick Macklem else 3469ec7b004SRick Macklem newrp->rc_flag = RC_NFSV2; 3479ec7b004SRick Macklem newrp->rc_xid = nd->nd_retxid; 3489ec7b004SRick Macklem newrp->rc_proc = nd->nd_procnum; 3499ec7b004SRick Macklem newrp->rc_sockref = nd->nd_sockref; 3509ec7b004SRick Macklem newrp->rc_cachetime = nd->nd_tcpconntime; 3519ec7b004SRick Macklem if (nd->nd_flag & ND_SAMETCPCONN) 3529ec7b004SRick Macklem newrp->rc_flag |= RC_SAMETCPCONN; 3539ec7b004SRick Macklem if (nd->nd_nam2 != NULL) { 3549ec7b004SRick Macklem newrp->rc_flag |= RC_UDP; 3559ec7b004SRick Macklem ret = nfsrc_getudp(nd, newrp); 3569ec7b004SRick Macklem } else { 3579ec7b004SRick Macklem ret = nfsrc_gettcp(nd, newrp); 3589ec7b004SRick Macklem } 3599ec7b004SRick Macklem nfsrc_trimcache(nd->nd_sockref, so); 360a9285ae5SZack Kirsch NFSEXITCODE2(0, nd); 3619ec7b004SRick Macklem return (ret); 3629ec7b004SRick Macklem } 3639ec7b004SRick Macklem 3649ec7b004SRick Macklem /* 3659ec7b004SRick Macklem * For UDP (v2, v3): 3669ec7b004SRick Macklem * - key on <xid, NFS version, RPC#, Client host ip#> 3679ec7b004SRick Macklem * (at most one entry for each key) 3689ec7b004SRick Macklem */ 3699ec7b004SRick Macklem static int 3709ec7b004SRick Macklem nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 3719ec7b004SRick Macklem { 3729ec7b004SRick Macklem struct nfsrvcache *rp; 3739ec7b004SRick Macklem struct sockaddr_in *saddr; 3749ec7b004SRick Macklem struct sockaddr_in6 *saddr6; 3759ec7b004SRick Macklem struct nfsrvhashhead *hp; 3769ec7b004SRick Macklem int ret = 0; 37793c5875bSRick Macklem struct mtx *mutex; 3789ec7b004SRick Macklem 37993c5875bSRick Macklem mutex = nfsrc_cachemutex(newrp); 3809ec7b004SRick Macklem hp = NFSRCUDPHASH(newrp->rc_xid); 3819ec7b004SRick Macklem loop: 38293c5875bSRick Macklem mtx_lock(mutex); 3839ec7b004SRick Macklem LIST_FOREACH(rp, hp, rc_hash) { 3849ec7b004SRick Macklem if (newrp->rc_xid == rp->rc_xid && 3859ec7b004SRick Macklem newrp->rc_proc == rp->rc_proc && 3869ec7b004SRick Macklem (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 3879ec7b004SRick Macklem nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { 3889ec7b004SRick Macklem if ((rp->rc_flag & RC_LOCKED) != 0) { 3899ec7b004SRick Macklem rp->rc_flag |= RC_WANTED; 39093c5875bSRick Macklem (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 39193c5875bSRick Macklem "nfsrc", 10 * hz); 3929ec7b004SRick Macklem goto loop; 3939ec7b004SRick Macklem } 3949ec7b004SRick Macklem if (rp->rc_flag == 0) 3959ec7b004SRick Macklem panic("nfs udp cache0"); 3969ec7b004SRick Macklem rp->rc_flag |= RC_LOCKED; 3979ec7b004SRick Macklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 3989ec7b004SRick Macklem TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 3999ec7b004SRick Macklem if (rp->rc_flag & RC_INPROG) { 4009ec7b004SRick Macklem newnfsstats.srvcache_inproghits++; 40193c5875bSRick Macklem mtx_unlock(mutex); 4029ec7b004SRick Macklem ret = RC_DROPIT; 4039ec7b004SRick Macklem } else if (rp->rc_flag & RC_REPSTATUS) { 4049ec7b004SRick Macklem /* 4059ec7b004SRick Macklem * V2 only. 4069ec7b004SRick Macklem */ 4079ec7b004SRick Macklem newnfsstats.srvcache_nonidemdonehits++; 40893c5875bSRick Macklem mtx_unlock(mutex); 4099ec7b004SRick Macklem nfsrvd_rephead(nd); 4109ec7b004SRick Macklem *(nd->nd_errp) = rp->rc_status; 4119ec7b004SRick Macklem ret = RC_REPLY; 4129ec7b004SRick Macklem rp->rc_timestamp = NFSD_MONOSEC + 4139ec7b004SRick Macklem NFSRVCACHE_UDPTIMEOUT; 4149ec7b004SRick Macklem } else if (rp->rc_flag & RC_REPMBUF) { 4159ec7b004SRick Macklem newnfsstats.srvcache_nonidemdonehits++; 41693c5875bSRick Macklem mtx_unlock(mutex); 4179ec7b004SRick Macklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 418eb1b1807SGleb Smirnoff M_COPYALL, M_WAITOK); 4199ec7b004SRick Macklem ret = RC_REPLY; 4209ec7b004SRick Macklem rp->rc_timestamp = NFSD_MONOSEC + 4219ec7b004SRick Macklem NFSRVCACHE_UDPTIMEOUT; 4229ec7b004SRick Macklem } else { 4239ec7b004SRick Macklem panic("nfs udp cache1"); 4249ec7b004SRick Macklem } 4259ec7b004SRick Macklem nfsrc_unlock(rp); 4269ec7b004SRick Macklem free((caddr_t)newrp, M_NFSRVCACHE); 427a9285ae5SZack Kirsch goto out; 4289ec7b004SRick Macklem } 4299ec7b004SRick Macklem } 4309ec7b004SRick Macklem newnfsstats.srvcache_misses++; 43193c5875bSRick Macklem atomic_add_int(&newnfsstats.srvcache_size, 1); 4329ec7b004SRick Macklem nfsrc_udpcachesize++; 4339ec7b004SRick Macklem 4349ec7b004SRick Macklem newrp->rc_flag |= RC_INPROG; 4359ec7b004SRick Macklem saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 4369ec7b004SRick Macklem if (saddr->sin_family == AF_INET) 4379ec7b004SRick Macklem newrp->rc_inet = saddr->sin_addr.s_addr; 4389ec7b004SRick Macklem else if (saddr->sin_family == AF_INET6) { 4399ec7b004SRick Macklem saddr6 = (struct sockaddr_in6 *)saddr; 4409ec7b004SRick Macklem NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6, 4419ec7b004SRick Macklem sizeof (struct in6_addr)); 442d5ad6625SRick Macklem newrp->rc_flag |= RC_INETIPV6; 4439ec7b004SRick Macklem } 4449ec7b004SRick Macklem LIST_INSERT_HEAD(hp, newrp, rc_hash); 4459ec7b004SRick Macklem TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); 44693c5875bSRick Macklem mtx_unlock(mutex); 4479ec7b004SRick Macklem nd->nd_rp = newrp; 448a9285ae5SZack Kirsch ret = RC_DOIT; 449a9285ae5SZack Kirsch 450a9285ae5SZack Kirsch out: 451a9285ae5SZack Kirsch NFSEXITCODE2(0, nd); 452a9285ae5SZack Kirsch return (ret); 4539ec7b004SRick Macklem } 4549ec7b004SRick Macklem 4559ec7b004SRick Macklem /* 4569ec7b004SRick Macklem * Update a request cache entry after the rpc has been done 4579ec7b004SRick Macklem */ 4589ec7b004SRick Macklem APPLESTATIC struct nfsrvcache * 4599ec7b004SRick Macklem nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so) 4609ec7b004SRick Macklem { 4619ec7b004SRick Macklem struct nfsrvcache *rp; 4629ec7b004SRick Macklem struct nfsrvcache *retrp = NULL; 4634e22c98aSRick Macklem mbuf_t m; 46493c5875bSRick Macklem struct mtx *mutex; 4659ec7b004SRick Macklem 4669ec7b004SRick Macklem rp = nd->nd_rp; 4679ec7b004SRick Macklem if (!rp) 4689ec7b004SRick Macklem panic("nfsrvd_updatecache null rp"); 4699ec7b004SRick Macklem nd->nd_rp = NULL; 47093c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 47193c5875bSRick Macklem mtx_lock(mutex); 4729ec7b004SRick Macklem nfsrc_lock(rp); 4739ec7b004SRick Macklem if (!(rp->rc_flag & RC_INPROG)) 4749ec7b004SRick Macklem panic("nfsrvd_updatecache not inprog"); 4759ec7b004SRick Macklem rp->rc_flag &= ~RC_INPROG; 4769ec7b004SRick Macklem if (rp->rc_flag & RC_UDP) { 4779ec7b004SRick Macklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 4789ec7b004SRick Macklem TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); 4799ec7b004SRick Macklem } 4809ec7b004SRick Macklem 4819ec7b004SRick Macklem /* 4829ec7b004SRick Macklem * Reply from cache is a special case returned by nfsrv_checkseqid(). 4839ec7b004SRick Macklem */ 4849ec7b004SRick Macklem if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { 4859ec7b004SRick Macklem newnfsstats.srvcache_nonidemdonehits++; 48693c5875bSRick Macklem mtx_unlock(mutex); 4879ec7b004SRick Macklem nd->nd_repstat = 0; 4889ec7b004SRick Macklem if (nd->nd_mreq) 4899ec7b004SRick Macklem mbuf_freem(nd->nd_mreq); 4909ec7b004SRick Macklem if (!(rp->rc_flag & RC_REPMBUF)) 4919ec7b004SRick Macklem panic("reply from cache"); 4929ec7b004SRick Macklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 493eb1b1807SGleb Smirnoff M_COPYALL, M_WAITOK); 49493c5875bSRick Macklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 4959ec7b004SRick Macklem nfsrc_unlock(rp); 496a9285ae5SZack Kirsch goto out; 4979ec7b004SRick Macklem } 4989ec7b004SRick Macklem 4999ec7b004SRick Macklem /* 5009ec7b004SRick Macklem * If rc_refcnt > 0, save it 5019ec7b004SRick Macklem * For UDP, save it if ND_SAVEREPLY is set 5029ec7b004SRick Macklem * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set 5039ec7b004SRick Macklem */ 5049ec7b004SRick Macklem if (nd->nd_repstat != NFSERR_DONTREPLY && 5059ec7b004SRick Macklem (rp->rc_refcnt > 0 || 5069ec7b004SRick Macklem ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || 5079ec7b004SRick Macklem ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && 5089ec7b004SRick Macklem nfsrc_tcpsavedreplies <= nfsrc_floodlevel && 5099ec7b004SRick Macklem nfsrc_tcpnonidempotent))) { 5109ec7b004SRick Macklem if (rp->rc_refcnt > 0) { 5119ec7b004SRick Macklem if (!(rp->rc_flag & RC_NFSV4)) 5129ec7b004SRick Macklem panic("update_cache refcnt"); 5139ec7b004SRick Macklem rp->rc_flag |= RC_REFCNT; 5149ec7b004SRick Macklem } 5159ec7b004SRick Macklem if ((nd->nd_flag & ND_NFSV2) && 5169ec7b004SRick Macklem nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { 5179ec7b004SRick Macklem rp->rc_status = nd->nd_repstat; 5189ec7b004SRick Macklem rp->rc_flag |= RC_REPSTATUS; 51993c5875bSRick Macklem mtx_unlock(mutex); 5209ec7b004SRick Macklem } else { 5219ec7b004SRick Macklem if (!(rp->rc_flag & RC_UDP)) { 52293c5875bSRick Macklem atomic_add_int(&nfsrc_tcpsavedreplies, 1); 5239ec7b004SRick Macklem if (nfsrc_tcpsavedreplies > 5249ec7b004SRick Macklem newnfsstats.srvcache_tcppeak) 5259ec7b004SRick Macklem newnfsstats.srvcache_tcppeak = 5269ec7b004SRick Macklem nfsrc_tcpsavedreplies; 5279ec7b004SRick Macklem } 52893c5875bSRick Macklem mtx_unlock(mutex); 529eb1b1807SGleb Smirnoff m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); 53093c5875bSRick Macklem mtx_lock(mutex); 5314e22c98aSRick Macklem rp->rc_reply = m; 5329ec7b004SRick Macklem rp->rc_flag |= RC_REPMBUF; 53393c5875bSRick Macklem mtx_unlock(mutex); 5349ec7b004SRick Macklem } 5359ec7b004SRick Macklem if (rp->rc_flag & RC_UDP) { 5369ec7b004SRick Macklem rp->rc_timestamp = NFSD_MONOSEC + 5379ec7b004SRick Macklem NFSRVCACHE_UDPTIMEOUT; 5389ec7b004SRick Macklem nfsrc_unlock(rp); 5399ec7b004SRick Macklem } else { 54093c5875bSRick Macklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 5419ec7b004SRick Macklem if (rp->rc_refcnt > 0) 5429ec7b004SRick Macklem nfsrc_unlock(rp); 5439ec7b004SRick Macklem else 5449ec7b004SRick Macklem retrp = rp; 5459ec7b004SRick Macklem } 5469ec7b004SRick Macklem } else { 5479ec7b004SRick Macklem nfsrc_freecache(rp); 54893c5875bSRick Macklem mtx_unlock(mutex); 5499ec7b004SRick Macklem } 550a9285ae5SZack Kirsch 551a9285ae5SZack Kirsch out: 5529ec7b004SRick Macklem nfsrc_trimcache(nd->nd_sockref, so); 553a9285ae5SZack Kirsch NFSEXITCODE2(0, nd); 5549ec7b004SRick Macklem return (retrp); 5559ec7b004SRick Macklem } 5569ec7b004SRick Macklem 5579ec7b004SRick Macklem /* 5589ec7b004SRick Macklem * Invalidate and, if possible, free an in prog cache entry. 5599ec7b004SRick Macklem * Must not sleep. 5609ec7b004SRick Macklem */ 5619ec7b004SRick Macklem APPLESTATIC void 5629ec7b004SRick Macklem nfsrvd_delcache(struct nfsrvcache *rp) 5639ec7b004SRick Macklem { 56493c5875bSRick Macklem struct mtx *mutex; 5659ec7b004SRick Macklem 56693c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 5679ec7b004SRick Macklem if (!(rp->rc_flag & RC_INPROG)) 5689ec7b004SRick Macklem panic("nfsrvd_delcache not in prog"); 56993c5875bSRick Macklem mtx_lock(mutex); 5709ec7b004SRick Macklem rp->rc_flag &= ~RC_INPROG; 5719ec7b004SRick Macklem if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) 5729ec7b004SRick Macklem nfsrc_freecache(rp); 57393c5875bSRick Macklem mtx_unlock(mutex); 5749ec7b004SRick Macklem } 5759ec7b004SRick Macklem 5769ec7b004SRick Macklem /* 5779ec7b004SRick Macklem * Called after nfsrvd_updatecache() once the reply is sent, to update 5789ec7b004SRick Macklem * the entry for nfsrc_activesocket() and unlock it. The argument is 5799ec7b004SRick Macklem * the pointer returned by nfsrvd_updatecache(). 5809ec7b004SRick Macklem */ 5819ec7b004SRick Macklem APPLESTATIC void 5829ec7b004SRick Macklem nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) 5839ec7b004SRick Macklem { 5844e22c98aSRick Macklem tcp_seq tmp_seq; 58593c5875bSRick Macklem struct mtx *mutex; 5869ec7b004SRick Macklem 58793c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 5889ec7b004SRick Macklem if (!(rp->rc_flag & RC_LOCKED)) 5899ec7b004SRick Macklem panic("nfsrvd_sentcache not locked"); 5909ec7b004SRick Macklem if (!err) { 591d9cf8753SXin LI if ((so->so_proto->pr_domain->dom_family != AF_INET && 592d9cf8753SXin LI so->so_proto->pr_domain->dom_family != AF_INET6) || 5939ec7b004SRick Macklem so->so_proto->pr_protocol != IPPROTO_TCP) 5949ec7b004SRick Macklem panic("nfs sent cache"); 5954e22c98aSRick Macklem if (nfsrv_getsockseqnum(so, &tmp_seq)) { 59693c5875bSRick Macklem mtx_lock(mutex); 5974e22c98aSRick Macklem rp->rc_tcpseq = tmp_seq; 5989ec7b004SRick Macklem rp->rc_flag |= RC_TCPSEQ; 59993c5875bSRick Macklem mtx_unlock(mutex); 6004e22c98aSRick Macklem } 6019ec7b004SRick Macklem } 6029ec7b004SRick Macklem nfsrc_unlock(rp); 6039ec7b004SRick Macklem } 6049ec7b004SRick Macklem 6059ec7b004SRick Macklem /* 6069ec7b004SRick Macklem * Get a cache entry for TCP 6079ec7b004SRick Macklem * - key on <xid, nfs version> 6089ec7b004SRick Macklem * (allow multiple entries for a given key) 6099ec7b004SRick Macklem */ 6109ec7b004SRick Macklem static int 6119ec7b004SRick Macklem nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) 6129ec7b004SRick Macklem { 6139ec7b004SRick Macklem struct nfsrvcache *rp, *nextrp; 6149ec7b004SRick Macklem int i; 6159ec7b004SRick Macklem struct nfsrvcache *hitrp; 6169ec7b004SRick Macklem struct nfsrvhashhead *hp, nfsrc_templist; 6179ec7b004SRick Macklem int hit, ret = 0; 61893c5875bSRick Macklem struct mtx *mutex; 6199ec7b004SRick Macklem 62093c5875bSRick Macklem mutex = nfsrc_cachemutex(newrp); 6219ec7b004SRick Macklem hp = NFSRCHASH(newrp->rc_xid); 6229ec7b004SRick Macklem newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); 6239ec7b004SRick Macklem tryagain: 62493c5875bSRick Macklem mtx_lock(mutex); 6259ec7b004SRick Macklem hit = 1; 6269ec7b004SRick Macklem LIST_INIT(&nfsrc_templist); 6279ec7b004SRick Macklem /* 6289ec7b004SRick Macklem * Get all the matches and put them on the temp list. 6299ec7b004SRick Macklem */ 6309ec7b004SRick Macklem rp = LIST_FIRST(hp); 6319ec7b004SRick Macklem while (rp != LIST_END(hp)) { 6329ec7b004SRick Macklem nextrp = LIST_NEXT(rp, rc_hash); 6339ec7b004SRick Macklem if (newrp->rc_xid == rp->rc_xid && 6349ec7b004SRick Macklem (!(rp->rc_flag & RC_INPROG) || 6359ec7b004SRick Macklem ((newrp->rc_flag & RC_SAMETCPCONN) && 6369ec7b004SRick Macklem newrp->rc_sockref == rp->rc_sockref)) && 6379ec7b004SRick Macklem (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && 6389ec7b004SRick Macklem newrp->rc_proc == rp->rc_proc && 6399ec7b004SRick Macklem ((newrp->rc_flag & RC_NFSV4) && 6409ec7b004SRick Macklem newrp->rc_sockref != rp->rc_sockref && 6419ec7b004SRick Macklem newrp->rc_cachetime >= rp->rc_cachetime) 6429ec7b004SRick Macklem && newrp->rc_reqlen == rp->rc_reqlen && 6439ec7b004SRick Macklem newrp->rc_cksum == rp->rc_cksum) { 6449ec7b004SRick Macklem LIST_REMOVE(rp, rc_hash); 6459ec7b004SRick Macklem LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash); 6469ec7b004SRick Macklem } 6479ec7b004SRick Macklem rp = nextrp; 6489ec7b004SRick Macklem } 6499ec7b004SRick Macklem 6509ec7b004SRick Macklem /* 6519ec7b004SRick Macklem * Now, use nfsrc_templist to decide if there is a match. 6529ec7b004SRick Macklem */ 6539ec7b004SRick Macklem i = 0; 6549ec7b004SRick Macklem LIST_FOREACH(rp, &nfsrc_templist, rc_hash) { 6559ec7b004SRick Macklem i++; 6569ec7b004SRick Macklem if (rp->rc_refcnt > 0) { 6579ec7b004SRick Macklem hit = 0; 6589ec7b004SRick Macklem break; 6599ec7b004SRick Macklem } 6609ec7b004SRick Macklem } 6619ec7b004SRick Macklem /* 6629ec7b004SRick Macklem * Can be a hit only if one entry left. 6639ec7b004SRick Macklem * Note possible hit entry and put nfsrc_templist back on hash 6649ec7b004SRick Macklem * list. 6659ec7b004SRick Macklem */ 6669ec7b004SRick Macklem if (i != 1) 6679ec7b004SRick Macklem hit = 0; 6689ec7b004SRick Macklem hitrp = rp = LIST_FIRST(&nfsrc_templist); 6699ec7b004SRick Macklem while (rp != LIST_END(&nfsrc_templist)) { 6709ec7b004SRick Macklem nextrp = LIST_NEXT(rp, rc_hash); 6719ec7b004SRick Macklem LIST_REMOVE(rp, rc_hash); 6729ec7b004SRick Macklem LIST_INSERT_HEAD(hp, rp, rc_hash); 6739ec7b004SRick Macklem rp = nextrp; 6749ec7b004SRick Macklem } 6759ec7b004SRick Macklem if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist)) 6769ec7b004SRick Macklem panic("nfs gettcp cache templist"); 6779ec7b004SRick Macklem 6789ec7b004SRick Macklem if (hit) { 6799ec7b004SRick Macklem rp = hitrp; 6809ec7b004SRick Macklem if ((rp->rc_flag & RC_LOCKED) != 0) { 6819ec7b004SRick Macklem rp->rc_flag |= RC_WANTED; 68293c5875bSRick Macklem (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, 68393c5875bSRick Macklem "nfsrc", 10 * hz); 6849ec7b004SRick Macklem goto tryagain; 6859ec7b004SRick Macklem } 6869ec7b004SRick Macklem if (rp->rc_flag == 0) 6879ec7b004SRick Macklem panic("nfs tcp cache0"); 6889ec7b004SRick Macklem rp->rc_flag |= RC_LOCKED; 6899ec7b004SRick Macklem if (rp->rc_flag & RC_INPROG) { 6909ec7b004SRick Macklem newnfsstats.srvcache_inproghits++; 69193c5875bSRick Macklem mtx_unlock(mutex); 6929ec7b004SRick Macklem if (newrp->rc_sockref == rp->rc_sockref) 6939ec7b004SRick Macklem nfsrc_marksametcpconn(rp->rc_sockref); 6949ec7b004SRick Macklem ret = RC_DROPIT; 6959ec7b004SRick Macklem } else if (rp->rc_flag & RC_REPSTATUS) { 6969ec7b004SRick Macklem /* 6979ec7b004SRick Macklem * V2 only. 6989ec7b004SRick Macklem */ 6999ec7b004SRick Macklem newnfsstats.srvcache_nonidemdonehits++; 70093c5875bSRick Macklem mtx_unlock(mutex); 7019ec7b004SRick Macklem if (newrp->rc_sockref == rp->rc_sockref) 7029ec7b004SRick Macklem nfsrc_marksametcpconn(rp->rc_sockref); 7039ec7b004SRick Macklem ret = RC_REPLY; 7049ec7b004SRick Macklem nfsrvd_rephead(nd); 7059ec7b004SRick Macklem *(nd->nd_errp) = rp->rc_status; 70693c5875bSRick Macklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 7079ec7b004SRick Macklem } else if (rp->rc_flag & RC_REPMBUF) { 7089ec7b004SRick Macklem newnfsstats.srvcache_nonidemdonehits++; 70993c5875bSRick Macklem mtx_unlock(mutex); 7109ec7b004SRick Macklem if (newrp->rc_sockref == rp->rc_sockref) 7119ec7b004SRick Macklem nfsrc_marksametcpconn(rp->rc_sockref); 7129ec7b004SRick Macklem ret = RC_REPLY; 7139ec7b004SRick Macklem nd->nd_mreq = m_copym(rp->rc_reply, 0, 714eb1b1807SGleb Smirnoff M_COPYALL, M_WAITOK); 71593c5875bSRick Macklem rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; 7169ec7b004SRick Macklem } else { 7179ec7b004SRick Macklem panic("nfs tcp cache1"); 7189ec7b004SRick Macklem } 7199ec7b004SRick Macklem nfsrc_unlock(rp); 7209ec7b004SRick Macklem free((caddr_t)newrp, M_NFSRVCACHE); 721a9285ae5SZack Kirsch goto out; 7229ec7b004SRick Macklem } 7239ec7b004SRick Macklem newnfsstats.srvcache_misses++; 72493c5875bSRick Macklem atomic_add_int(&newnfsstats.srvcache_size, 1); 7259ec7b004SRick Macklem 7269ec7b004SRick Macklem /* 7279ec7b004SRick Macklem * For TCP, multiple entries for a key are allowed, so don't 7289ec7b004SRick Macklem * chain it into the hash table until done. 7299ec7b004SRick Macklem */ 7309ec7b004SRick Macklem newrp->rc_cachetime = NFSD_MONOSEC; 7319ec7b004SRick Macklem newrp->rc_flag |= RC_INPROG; 7329ec7b004SRick Macklem LIST_INSERT_HEAD(hp, newrp, rc_hash); 73393c5875bSRick Macklem mtx_unlock(mutex); 7349ec7b004SRick Macklem nd->nd_rp = newrp; 735a9285ae5SZack Kirsch ret = RC_DOIT; 736a9285ae5SZack Kirsch 737a9285ae5SZack Kirsch out: 738a9285ae5SZack Kirsch NFSEXITCODE2(0, nd); 739a9285ae5SZack Kirsch return (ret); 7409ec7b004SRick Macklem } 7419ec7b004SRick Macklem 7429ec7b004SRick Macklem /* 7439ec7b004SRick Macklem * Lock a cache entry. 7449ec7b004SRick Macklem */ 7459ec7b004SRick Macklem static void 7469ec7b004SRick Macklem nfsrc_lock(struct nfsrvcache *rp) 7479ec7b004SRick Macklem { 74893c5875bSRick Macklem struct mtx *mutex; 74993c5875bSRick Macklem 75093c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 75193c5875bSRick Macklem mtx_assert(mutex, MA_OWNED); 7529ec7b004SRick Macklem while ((rp->rc_flag & RC_LOCKED) != 0) { 7539ec7b004SRick Macklem rp->rc_flag |= RC_WANTED; 75493c5875bSRick Macklem (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0); 7559ec7b004SRick Macklem } 7569ec7b004SRick Macklem rp->rc_flag |= RC_LOCKED; 7579ec7b004SRick Macklem } 7589ec7b004SRick Macklem 7599ec7b004SRick Macklem /* 7609ec7b004SRick Macklem * Unlock a cache entry. 7619ec7b004SRick Macklem */ 7629ec7b004SRick Macklem static void 7639ec7b004SRick Macklem nfsrc_unlock(struct nfsrvcache *rp) 7649ec7b004SRick Macklem { 76593c5875bSRick Macklem struct mtx *mutex; 7664e22c98aSRick Macklem 76793c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 76893c5875bSRick Macklem mtx_lock(mutex); 7699ec7b004SRick Macklem rp->rc_flag &= ~RC_LOCKED; 7709ec7b004SRick Macklem nfsrc_wanted(rp); 77193c5875bSRick Macklem mtx_unlock(mutex); 7729ec7b004SRick Macklem } 7739ec7b004SRick Macklem 7749ec7b004SRick Macklem /* 7759ec7b004SRick Macklem * Wakeup anyone wanting entry. 7769ec7b004SRick Macklem */ 7779ec7b004SRick Macklem static void 7789ec7b004SRick Macklem nfsrc_wanted(struct nfsrvcache *rp) 7799ec7b004SRick Macklem { 7809ec7b004SRick Macklem if (rp->rc_flag & RC_WANTED) { 7819ec7b004SRick Macklem rp->rc_flag &= ~RC_WANTED; 7829ec7b004SRick Macklem wakeup((caddr_t)rp); 7839ec7b004SRick Macklem } 7849ec7b004SRick Macklem } 7859ec7b004SRick Macklem 7869ec7b004SRick Macklem /* 7879ec7b004SRick Macklem * Free up the entry. 7889ec7b004SRick Macklem * Must not sleep. 7899ec7b004SRick Macklem */ 7909ec7b004SRick Macklem static void 7919ec7b004SRick Macklem nfsrc_freecache(struct nfsrvcache *rp) 7929ec7b004SRick Macklem { 7939ec7b004SRick Macklem 7949ec7b004SRick Macklem LIST_REMOVE(rp, rc_hash); 7959ec7b004SRick Macklem if (rp->rc_flag & RC_UDP) { 7969ec7b004SRick Macklem TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); 7979ec7b004SRick Macklem nfsrc_udpcachesize--; 7989ec7b004SRick Macklem } 7999ec7b004SRick Macklem nfsrc_wanted(rp); 8009ec7b004SRick Macklem if (rp->rc_flag & RC_REPMBUF) { 8019ec7b004SRick Macklem mbuf_freem(rp->rc_reply); 8029ec7b004SRick Macklem if (!(rp->rc_flag & RC_UDP)) 80393c5875bSRick Macklem atomic_add_int(&nfsrc_tcpsavedreplies, -1); 8049ec7b004SRick Macklem } 8059ec7b004SRick Macklem FREE((caddr_t)rp, M_NFSRVCACHE); 80693c5875bSRick Macklem atomic_add_int(&newnfsstats.srvcache_size, -1); 8079ec7b004SRick Macklem } 8089ec7b004SRick Macklem 8099ec7b004SRick Macklem /* 81052776c50SZack Kirsch * Clean out the cache. Called when nfsserver module is unloaded. 8119ec7b004SRick Macklem */ 8129ec7b004SRick Macklem APPLESTATIC void 8139ec7b004SRick Macklem nfsrvd_cleancache(void) 8149ec7b004SRick Macklem { 8159ec7b004SRick Macklem struct nfsrvcache *rp, *nextrp; 8169ec7b004SRick Macklem int i; 8179ec7b004SRick Macklem 8189ec7b004SRick Macklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 81993c5875bSRick Macklem mtx_lock(&nfsrchash_table[i].mtx); 82093c5875bSRick Macklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) 8219ec7b004SRick Macklem nfsrc_freecache(rp); 82293c5875bSRick Macklem mtx_unlock(&nfsrchash_table[i].mtx); 8239ec7b004SRick Macklem } 82493c5875bSRick Macklem mtx_lock(&nfsrc_udpmtx); 8259ec7b004SRick Macklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 8269ec7b004SRick Macklem LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { 8279ec7b004SRick Macklem nfsrc_freecache(rp); 8289ec7b004SRick Macklem } 8299ec7b004SRick Macklem } 8309ec7b004SRick Macklem newnfsstats.srvcache_size = 0; 83193c5875bSRick Macklem mtx_unlock(&nfsrc_udpmtx); 8329ec7b004SRick Macklem nfsrc_tcpsavedreplies = 0; 8339ec7b004SRick Macklem } 8349ec7b004SRick Macklem 8351555cf04SAlexander Motin #define HISTSIZE 16 8369ec7b004SRick Macklem /* 8379ec7b004SRick Macklem * The basic rule is to get rid of entries that are expired. 8389ec7b004SRick Macklem */ 8399ec7b004SRick Macklem static void 8409ec7b004SRick Macklem nfsrc_trimcache(u_int64_t sockref, struct socket *so) 8419ec7b004SRick Macklem { 8429ec7b004SRick Macklem struct nfsrvcache *rp, *nextrp; 8431555cf04SAlexander Motin int i, j, k, tto, time_histo[HISTSIZE]; 84493c5875bSRick Macklem time_t thisstamp; 84593c5875bSRick Macklem static time_t udp_lasttrim = 0, tcp_lasttrim = 0; 84693c5875bSRick Macklem static int onethread = 0; 8479ec7b004SRick Macklem 84893c5875bSRick Macklem if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) 84993c5875bSRick Macklem return; 85093c5875bSRick Macklem if (NFSD_MONOSEC != udp_lasttrim || 85193c5875bSRick Macklem nfsrc_udpcachesize >= (nfsrc_udphighwater + 85293c5875bSRick Macklem nfsrc_udphighwater / 2)) { 85393c5875bSRick Macklem mtx_lock(&nfsrc_udpmtx); 85493c5875bSRick Macklem udp_lasttrim = NFSD_MONOSEC; 8559ec7b004SRick Macklem TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { 8569ec7b004SRick Macklem if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) 8579ec7b004SRick Macklem && rp->rc_refcnt == 0 8589ec7b004SRick Macklem && ((rp->rc_flag & RC_REFCNT) || 85993c5875bSRick Macklem udp_lasttrim > rp->rc_timestamp || 8609ec7b004SRick Macklem nfsrc_udpcachesize > nfsrc_udphighwater)) 8619ec7b004SRick Macklem nfsrc_freecache(rp); 8629ec7b004SRick Macklem } 86393c5875bSRick Macklem mtx_unlock(&nfsrc_udpmtx); 86493c5875bSRick Macklem } 86593c5875bSRick Macklem if (NFSD_MONOSEC != tcp_lasttrim || 86693c5875bSRick Macklem nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { 8671555cf04SAlexander Motin for (i = 0; i < HISTSIZE; i++) 86893c5875bSRick Macklem time_histo[i] = 0; 8691555cf04SAlexander Motin tto = nfsrc_tcptimeout; 8709ec7b004SRick Macklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 87193c5875bSRick Macklem mtx_lock(&nfsrchash_table[i].mtx); 87293c5875bSRick Macklem if (i == 0) 87393c5875bSRick Macklem tcp_lasttrim = NFSD_MONOSEC; 87493c5875bSRick Macklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, 87593c5875bSRick Macklem nextrp) { 87693c5875bSRick Macklem if (!(rp->rc_flag & 87793c5875bSRick Macklem (RC_INPROG|RC_LOCKED|RC_WANTED)) 87893c5875bSRick Macklem && rp->rc_refcnt == 0) { 8791555cf04SAlexander Motin if ((rp->rc_flag & RC_REFCNT) || 8801555cf04SAlexander Motin tcp_lasttrim > rp->rc_timestamp || 8811555cf04SAlexander Motin nfsrc_activesocket(rp, sockref, so)) { 8821555cf04SAlexander Motin nfsrc_freecache(rp); 8831555cf04SAlexander Motin continue; 8841555cf04SAlexander Motin } 8851555cf04SAlexander Motin 8861555cf04SAlexander Motin if (nfsrc_tcphighwater == 0) 8871555cf04SAlexander Motin continue; 88893c5875bSRick Macklem /* 88993c5875bSRick Macklem * The timestamps range from roughly the 89093c5875bSRick Macklem * present (tcp_lasttrim) to the present 89193c5875bSRick Macklem * + nfsrc_tcptimeout. Generate a simple 89293c5875bSRick Macklem * histogram of where the timeouts fall. 89393c5875bSRick Macklem */ 89493c5875bSRick Macklem j = rp->rc_timestamp - tcp_lasttrim; 8951555cf04SAlexander Motin if (j >= tto) 8961555cf04SAlexander Motin j = HISTSIZE - 1; 8971555cf04SAlexander Motin else if (j < 0) 89893c5875bSRick Macklem j = 0; 8991555cf04SAlexander Motin else 9001555cf04SAlexander Motin j = j * HISTSIZE / tto; 90193c5875bSRick Macklem time_histo[j]++; 9029ec7b004SRick Macklem } 9039ec7b004SRick Macklem } 90493c5875bSRick Macklem mtx_unlock(&nfsrchash_table[i].mtx); 90593c5875bSRick Macklem } 90693c5875bSRick Macklem j = nfsrc_tcphighwater / 5; /* 20% of it */ 90793c5875bSRick Macklem if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) { 90893c5875bSRick Macklem /* 90993c5875bSRick Macklem * Trim some more with a smaller timeout of as little 91093c5875bSRick Macklem * as 20% of nfsrc_tcptimeout to try and get below 91193c5875bSRick Macklem * 80% of the nfsrc_tcphighwater. 91293c5875bSRick Macklem */ 91393c5875bSRick Macklem k = 0; 9141555cf04SAlexander Motin for (i = 0; i < (HISTSIZE - 2); i++) { 91593c5875bSRick Macklem k += time_histo[i]; 91693c5875bSRick Macklem if (k > j) 91793c5875bSRick Macklem break; 91893c5875bSRick Macklem } 9191555cf04SAlexander Motin k = tto * (i + 1) / HISTSIZE; 92093c5875bSRick Macklem if (k < 1) 92193c5875bSRick Macklem k = 1; 92293c5875bSRick Macklem thisstamp = tcp_lasttrim + k; 92393c5875bSRick Macklem for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 92493c5875bSRick Macklem mtx_lock(&nfsrchash_table[i].mtx); 92593c5875bSRick Macklem LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, 92693c5875bSRick Macklem rc_hash, nextrp) { 92793c5875bSRick Macklem if (!(rp->rc_flag & 92893c5875bSRick Macklem (RC_INPROG|RC_LOCKED|RC_WANTED)) 92993c5875bSRick Macklem && rp->rc_refcnt == 0 93093c5875bSRick Macklem && ((rp->rc_flag & RC_REFCNT) || 93193c5875bSRick Macklem thisstamp > rp->rc_timestamp || 93293c5875bSRick Macklem nfsrc_activesocket(rp, sockref, 93393c5875bSRick Macklem so))) 93493c5875bSRick Macklem nfsrc_freecache(rp); 93593c5875bSRick Macklem } 93693c5875bSRick Macklem mtx_unlock(&nfsrchash_table[i].mtx); 93793c5875bSRick Macklem } 93893c5875bSRick Macklem } 93993c5875bSRick Macklem } 94093c5875bSRick Macklem atomic_store_rel_int(&onethread, 0); 9419ec7b004SRick Macklem } 9429ec7b004SRick Macklem 9439ec7b004SRick Macklem /* 9449ec7b004SRick Macklem * Add a seqid# reference to the cache entry. 9459ec7b004SRick Macklem */ 9469ec7b004SRick Macklem APPLESTATIC void 9479ec7b004SRick Macklem nfsrvd_refcache(struct nfsrvcache *rp) 9489ec7b004SRick Macklem { 94993c5875bSRick Macklem struct mtx *mutex; 9509ec7b004SRick Macklem 95193c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 95293c5875bSRick Macklem mtx_lock(mutex); 9539ec7b004SRick Macklem if (rp->rc_refcnt < 0) 9549ec7b004SRick Macklem panic("nfs cache refcnt"); 9559ec7b004SRick Macklem rp->rc_refcnt++; 95693c5875bSRick Macklem mtx_unlock(mutex); 9579ec7b004SRick Macklem } 9589ec7b004SRick Macklem 9599ec7b004SRick Macklem /* 9609ec7b004SRick Macklem * Dereference a seqid# cache entry. 9619ec7b004SRick Macklem */ 9629ec7b004SRick Macklem APPLESTATIC void 9639ec7b004SRick Macklem nfsrvd_derefcache(struct nfsrvcache *rp) 9649ec7b004SRick Macklem { 96593c5875bSRick Macklem struct mtx *mutex; 9669ec7b004SRick Macklem 96793c5875bSRick Macklem mutex = nfsrc_cachemutex(rp); 96893c5875bSRick Macklem mtx_lock(mutex); 9699ec7b004SRick Macklem if (rp->rc_refcnt <= 0) 9709ec7b004SRick Macklem panic("nfs cache derefcnt"); 9719ec7b004SRick Macklem rp->rc_refcnt--; 9729ec7b004SRick Macklem if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) 9739ec7b004SRick Macklem nfsrc_freecache(rp); 97493c5875bSRick Macklem mtx_unlock(mutex); 9759ec7b004SRick Macklem } 9769ec7b004SRick Macklem 9779ec7b004SRick Macklem /* 9789ec7b004SRick Macklem * Check to see if the socket is active. 9799ec7b004SRick Macklem * Return 1 if the reply has been received/acknowledged by the client, 9809ec7b004SRick Macklem * 0 otherwise. 9819ec7b004SRick Macklem * XXX - Uses tcp internals. 9829ec7b004SRick Macklem */ 9839ec7b004SRick Macklem static int 9849ec7b004SRick Macklem nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref, 9859ec7b004SRick Macklem struct socket *cur_so) 9869ec7b004SRick Macklem { 9879ec7b004SRick Macklem int ret = 0; 9889ec7b004SRick Macklem 9899ec7b004SRick Macklem if (!(rp->rc_flag & RC_TCPSEQ)) 9909ec7b004SRick Macklem return (ret); 9919ec7b004SRick Macklem /* 9929ec7b004SRick Macklem * If the sockref is the same, it is the same TCP connection. 9939ec7b004SRick Macklem */ 9949ec7b004SRick Macklem if (cur_sockref == rp->rc_sockref) 9959ec7b004SRick Macklem ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq); 9969ec7b004SRick Macklem return (ret); 9979ec7b004SRick Macklem } 9989ec7b004SRick Macklem 9999ec7b004SRick Macklem /* 10009ec7b004SRick Macklem * Calculate the length of the mbuf list and a checksum on the first up to 10019ec7b004SRick Macklem * NFSRVCACHE_CHECKLEN bytes. 10029ec7b004SRick Macklem */ 10039ec7b004SRick Macklem static int 10049ec7b004SRick Macklem nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum) 10059ec7b004SRick Macklem { 10069ec7b004SRick Macklem int len = 0, cklen; 10079ec7b004SRick Macklem mbuf_t m; 10089ec7b004SRick Macklem 10099ec7b004SRick Macklem m = m1; 10109ec7b004SRick Macklem while (m) { 10119ec7b004SRick Macklem len += mbuf_len(m); 10129ec7b004SRick Macklem m = mbuf_next(m); 10139ec7b004SRick Macklem } 10149ec7b004SRick Macklem cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len; 10159ec7b004SRick Macklem *cksum = in_cksum(m1, cklen); 10169ec7b004SRick Macklem return (len); 10179ec7b004SRick Macklem } 10189ec7b004SRick Macklem 10199ec7b004SRick Macklem /* 10209ec7b004SRick Macklem * Mark a TCP connection that is seeing retries. Should never happen for 10219ec7b004SRick Macklem * NFSv4. 10229ec7b004SRick Macklem */ 10239ec7b004SRick Macklem static void 10249ec7b004SRick Macklem nfsrc_marksametcpconn(u_int64_t sockref) 10259ec7b004SRick Macklem { 10269ec7b004SRick Macklem } 10279ec7b004SRick Macklem 1028