xref: /freebsd/sys/fs/nfsserver/nfs_nfsdcache.c (revision 9ec7b004)
19ec7b004SRick Macklem /*-
29ec7b004SRick Macklem  * Copyright (c) 1989, 1993
39ec7b004SRick Macklem  *	The Regents of the University of California.  All rights reserved.
49ec7b004SRick Macklem  *
59ec7b004SRick Macklem  * This code is derived from software contributed to Berkeley by
69ec7b004SRick Macklem  * Rick Macklem at The University of Guelph.
79ec7b004SRick Macklem  *
89ec7b004SRick Macklem  * Redistribution and use in source and binary forms, with or without
99ec7b004SRick Macklem  * modification, are permitted provided that the following conditions
109ec7b004SRick Macklem  * are met:
119ec7b004SRick Macklem  * 1. Redistributions of source code must retain the above copyright
129ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer.
139ec7b004SRick Macklem  * 2. Redistributions in binary form must reproduce the above copyright
149ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer in the
159ec7b004SRick Macklem  *    documentation and/or other materials provided with the distribution.
169ec7b004SRick Macklem  * 4. Neither the name of the University nor the names of its contributors
179ec7b004SRick Macklem  *    may be used to endorse or promote products derived from this software
189ec7b004SRick Macklem  *    without specific prior written permission.
199ec7b004SRick Macklem  *
209ec7b004SRick Macklem  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
219ec7b004SRick Macklem  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
229ec7b004SRick Macklem  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
239ec7b004SRick Macklem  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
249ec7b004SRick Macklem  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
259ec7b004SRick Macklem  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
269ec7b004SRick Macklem  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
279ec7b004SRick Macklem  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
289ec7b004SRick Macklem  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
299ec7b004SRick Macklem  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
309ec7b004SRick Macklem  * SUCH DAMAGE.
319ec7b004SRick Macklem  *
329ec7b004SRick Macklem  */
339ec7b004SRick Macklem 
349ec7b004SRick Macklem #include <sys/cdefs.h>
359ec7b004SRick Macklem __FBSDID("$FreeBSD$");
369ec7b004SRick Macklem 
379ec7b004SRick Macklem /*
389ec7b004SRick Macklem  * Here is the basic algorithm:
399ec7b004SRick Macklem  * First, some design criteria I used:
409ec7b004SRick Macklem  * - I think a false hit is more serious than a false miss
419ec7b004SRick Macklem  * - A false hit for an RPC that has Op(s) that order via seqid# must be
429ec7b004SRick Macklem  *   avoided at all cost
439ec7b004SRick Macklem  * - A valid hit will probably happen a long time after the original reply
449ec7b004SRick Macklem  *   and the TCP socket that the original request was received on will no
459ec7b004SRick Macklem  *   longer be active
469ec7b004SRick Macklem  *   (The long time delay implies to me that LRU is not appropriate.)
479ec7b004SRick Macklem  * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
489ec7b004SRick Macklem  *   in them as well as minimizing the risk of redoing retried non-idempotent
499ec7b004SRick Macklem  *   Ops.
509ec7b004SRick Macklem  * Because it is biased towards avoiding false hits, multiple entries with
519ec7b004SRick Macklem  * the same xid are to be expected, especially for the case of the entry
529ec7b004SRick Macklem  * in the cache being related to a seqid# sequenced Op.
539ec7b004SRick Macklem  *
549ec7b004SRick Macklem  * The basic algorithm I'm about to code up:
559ec7b004SRick Macklem  * - Null RPCs bypass the cache and are just done
569ec7b004SRick Macklem  * For TCP
579ec7b004SRick Macklem  * 	- key on <xid, NFS version> (as noted above, there can be several
589ec7b004SRick Macklem  * 				     entries with the same key)
599ec7b004SRick Macklem  * 	When a request arrives:
609ec7b004SRick Macklem  * 		For all that match key
619ec7b004SRick Macklem  * 		- if RPC# != OR request_size !=
629ec7b004SRick Macklem  * 			- not a match with this one
639ec7b004SRick Macklem  * 		- if NFSv4 and received on same TCP socket OR
649ec7b004SRick Macklem  *			received on a TCP connection created before the
659ec7b004SRick Macklem  *			entry was cached
669ec7b004SRick Macklem  * 			- not a match with this one
679ec7b004SRick Macklem  * 			(V2,3 clients might retry on same TCP socket)
689ec7b004SRick Macklem  * 		- calculate checksum on first N bytes of NFS XDR
699ec7b004SRick Macklem  * 		- if checksum !=
709ec7b004SRick Macklem  * 			- not a match for this one
719ec7b004SRick Macklem  * 		If any of the remaining ones that match has a
729ec7b004SRick Macklem  * 			seqid_refcnt > 0
739ec7b004SRick Macklem  * 			- not a match (go do RPC, using new cache entry)
749ec7b004SRick Macklem  * 		If one match left
759ec7b004SRick Macklem  * 			- a hit (reply from cache)
769ec7b004SRick Macklem  * 		else
779ec7b004SRick Macklem  * 			- miss (go do RPC, using new cache entry)
789ec7b004SRick Macklem  *
799ec7b004SRick Macklem  * 	During processing of NFSv4 request:
809ec7b004SRick Macklem  * 		- set a flag when a non-idempotent Op is processed
819ec7b004SRick Macklem  * 		- when an Op that uses a seqid# (Open,...) is processed
829ec7b004SRick Macklem  * 			- if same seqid# as referenced entry in cache
839ec7b004SRick Macklem  * 				- free new cache entry
849ec7b004SRick Macklem  * 				- reply from referenced cache entry
859ec7b004SRick Macklem  * 			  else if next seqid# in order
869ec7b004SRick Macklem  * 				- free referenced cache entry
879ec7b004SRick Macklem  * 				- increment seqid_refcnt on new cache entry
889ec7b004SRick Macklem  * 				- set pointer from Openowner/Lockowner to
899ec7b004SRick Macklem  * 					new cache entry (aka reference it)
909ec7b004SRick Macklem  * 			  else if first seqid# in sequence
919ec7b004SRick Macklem  * 				- increment seqid_refcnt on new cache entry
929ec7b004SRick Macklem  * 				- set pointer from Openowner/Lockowner to
939ec7b004SRick Macklem  * 					new cache entry (aka reference it)
949ec7b004SRick Macklem  *
959ec7b004SRick Macklem  * 	At end of RPC processing:
969ec7b004SRick Macklem  * 		- if seqid_refcnt > 0 OR flagged non-idempotent on new
979ec7b004SRick Macklem  * 			cache entry
989ec7b004SRick Macklem  * 			- save reply in cache entry
999ec7b004SRick Macklem  * 			- calculate checksum on first N bytes of NFS XDR
1009ec7b004SRick Macklem  * 				request
1019ec7b004SRick Macklem  * 			- note op and length of XDR request (in bytes)
1029ec7b004SRick Macklem  * 			- timestamp it
1039ec7b004SRick Macklem  * 		  else
1049ec7b004SRick Macklem  * 			- free new cache entry
1059ec7b004SRick Macklem  * 		- Send reply (noting info for socket activity check, below)
1069ec7b004SRick Macklem  *
1079ec7b004SRick Macklem  * 	For cache entries saved above:
1089ec7b004SRick Macklem  * 		- if saved since seqid_refcnt was > 0
1099ec7b004SRick Macklem  * 			- free when seqid_refcnt decrements to 0
1109ec7b004SRick Macklem  * 			  (when next one in sequence is processed above, or
1119ec7b004SRick Macklem  * 			   when Openowner/Lockowner is discarded)
1129ec7b004SRick Macklem  * 		  else { non-idempotent Op(s) }
1139ec7b004SRick Macklem  * 			- free when
1149ec7b004SRick Macklem  * 				- some further activity observed on same
1159ec7b004SRick Macklem  * 					socket
1169ec7b004SRick Macklem  * 				  (I'm not yet sure how I'm going to do
1179ec7b004SRick Macklem  * 				   this. Maybe look at the TCP connection
1189ec7b004SRick Macklem  * 				   to see if the send_tcp_sequence# is well
1199ec7b004SRick Macklem  * 				   past sent reply OR K additional RPCs
1209ec7b004SRick Macklem  * 				   replied on same socket OR?)
1219ec7b004SRick Macklem  * 			  OR
1229ec7b004SRick Macklem  * 				- when very old (hours, days, weeks?)
1239ec7b004SRick Macklem  *
1249ec7b004SRick Macklem  * For UDP (v2, 3 only), pretty much the old way:
1259ec7b004SRick Macklem  * - key on <xid, NFS version, RPC#, Client host ip#>
1269ec7b004SRick Macklem  *   (at most one entry for each key)
1279ec7b004SRick Macklem  *
1289ec7b004SRick Macklem  * When a Request arrives:
1299ec7b004SRick Macklem  * - if a match with entry via key
1309ec7b004SRick Macklem  * 	- if RPC marked In_progress
1319ec7b004SRick Macklem  * 		- discard request (don't send reply)
1329ec7b004SRick Macklem  * 	  else
1339ec7b004SRick Macklem  * 		- reply from cache
1349ec7b004SRick Macklem  * 		- timestamp cache entry
1359ec7b004SRick Macklem  *   else
1369ec7b004SRick Macklem  * 	- add entry to cache, marked In_progress
1379ec7b004SRick Macklem  * 	- do RPC
1389ec7b004SRick Macklem  * 	- when RPC done
1399ec7b004SRick Macklem  * 		- if RPC# non-idempotent
1409ec7b004SRick Macklem  * 			- mark entry Done (not In_progress)
1419ec7b004SRick Macklem  * 			- save reply
1429ec7b004SRick Macklem  * 			- timestamp cache entry
1439ec7b004SRick Macklem  * 		  else
1449ec7b004SRick Macklem  * 			- free cache entry
1459ec7b004SRick Macklem  * 		- send reply
1469ec7b004SRick Macklem  *
1479ec7b004SRick Macklem  * Later, entries with saved replies are free'd a short time (few minutes)
1489ec7b004SRick Macklem  * after reply sent (timestamp).
1499ec7b004SRick Macklem  * Reference: Chet Juszczak, "Improving the Performance and Correctness
1509ec7b004SRick Macklem  *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
1519ec7b004SRick Macklem  *		pages 53-63. San Diego, February 1989.
1529ec7b004SRick Macklem  *	 for the UDP case.
1539ec7b004SRick Macklem  * nfsrc_floodlevel is set to the allowable upper limit for saved replies
1549ec7b004SRick Macklem  *	for TCP. For V3, a reply won't be saved when the flood level is
1559ec7b004SRick Macklem  *	hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
1569ec7b004SRick Macklem  *	that case. This level should be set high enough that this almost
1579ec7b004SRick Macklem  *	never happens.
1589ec7b004SRick Macklem  */
1599ec7b004SRick Macklem #ifndef APPLEKEXT
1609ec7b004SRick Macklem #include <fs/nfs/nfsport.h>
1619ec7b004SRick Macklem 
1629ec7b004SRick Macklem extern struct nfsstats newnfsstats;
1639ec7b004SRick Macklem NFSCACHEMUTEX;
1649ec7b004SRick Macklem int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
1659ec7b004SRick Macklem #endif	/* !APPLEKEXT */
1669ec7b004SRick Macklem 
1679ec7b004SRick Macklem static int nfsrc_tcpnonidempotent = 1;
1689ec7b004SRick Macklem static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
1699ec7b004SRick Macklem static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
1709ec7b004SRick Macklem static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
1719ec7b004SRick Macklem     nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
1729ec7b004SRick Macklem /*
1739ec7b004SRick Macklem  * and the reverse mapping from generic to Version 2 procedure numbers
1749ec7b004SRick Macklem  */
1759ec7b004SRick Macklem static int newnfsv2_procid[NFS_V3NPROCS] = {
1769ec7b004SRick Macklem 	NFSV2PROC_NULL,
1779ec7b004SRick Macklem 	NFSV2PROC_GETATTR,
1789ec7b004SRick Macklem 	NFSV2PROC_SETATTR,
1799ec7b004SRick Macklem 	NFSV2PROC_LOOKUP,
1809ec7b004SRick Macklem 	NFSV2PROC_NOOP,
1819ec7b004SRick Macklem 	NFSV2PROC_READLINK,
1829ec7b004SRick Macklem 	NFSV2PROC_READ,
1839ec7b004SRick Macklem 	NFSV2PROC_WRITE,
1849ec7b004SRick Macklem 	NFSV2PROC_CREATE,
1859ec7b004SRick Macklem 	NFSV2PROC_MKDIR,
1869ec7b004SRick Macklem 	NFSV2PROC_SYMLINK,
1879ec7b004SRick Macklem 	NFSV2PROC_CREATE,
1889ec7b004SRick Macklem 	NFSV2PROC_REMOVE,
1899ec7b004SRick Macklem 	NFSV2PROC_RMDIR,
1909ec7b004SRick Macklem 	NFSV2PROC_RENAME,
1919ec7b004SRick Macklem 	NFSV2PROC_LINK,
1929ec7b004SRick Macklem 	NFSV2PROC_READDIR,
1939ec7b004SRick Macklem 	NFSV2PROC_NOOP,
1949ec7b004SRick Macklem 	NFSV2PROC_STATFS,
1959ec7b004SRick Macklem 	NFSV2PROC_NOOP,
1969ec7b004SRick Macklem 	NFSV2PROC_NOOP,
1979ec7b004SRick Macklem 	NFSV2PROC_NOOP,
1989ec7b004SRick Macklem };
1999ec7b004SRick Macklem 
2009ec7b004SRick Macklem #define	NFSRCUDPHASH(xid) \
2019ec7b004SRick Macklem 	(&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
2029ec7b004SRick Macklem #define	NFSRCHASH(xid) \
2039ec7b004SRick Macklem 	(&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
2049ec7b004SRick Macklem #define	TRUE	1
2059ec7b004SRick Macklem #define	FALSE	0
2069ec7b004SRick Macklem #define	NFSRVCACHE_CHECKLEN	100
2079ec7b004SRick Macklem 
2089ec7b004SRick Macklem /* True iff the rpc reply is an nfs status ONLY! */
2099ec7b004SRick Macklem static int nfsv2_repstat[NFS_V3NPROCS] = {
2109ec7b004SRick Macklem 	FALSE,
2119ec7b004SRick Macklem 	FALSE,
2129ec7b004SRick Macklem 	FALSE,
2139ec7b004SRick Macklem 	FALSE,
2149ec7b004SRick Macklem 	FALSE,
2159ec7b004SRick Macklem 	FALSE,
2169ec7b004SRick Macklem 	FALSE,
2179ec7b004SRick Macklem 	FALSE,
2189ec7b004SRick Macklem 	FALSE,
2199ec7b004SRick Macklem 	FALSE,
2209ec7b004SRick Macklem 	TRUE,
2219ec7b004SRick Macklem 	TRUE,
2229ec7b004SRick Macklem 	TRUE,
2239ec7b004SRick Macklem 	TRUE,
2249ec7b004SRick Macklem 	FALSE,
2259ec7b004SRick Macklem 	TRUE,
2269ec7b004SRick Macklem 	FALSE,
2279ec7b004SRick Macklem 	FALSE,
2289ec7b004SRick Macklem 	FALSE,
2299ec7b004SRick Macklem 	FALSE,
2309ec7b004SRick Macklem 	FALSE,
2319ec7b004SRick Macklem 	FALSE,
2329ec7b004SRick Macklem };
2339ec7b004SRick Macklem 
2349ec7b004SRick Macklem /*
2359ec7b004SRick Macklem  * Will NFS want to work over IPv6 someday?
2369ec7b004SRick Macklem  */
2379ec7b004SRick Macklem #define	NETFAMILY(rp) \
2389ec7b004SRick Macklem 		(((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
2399ec7b004SRick Macklem 
2409ec7b004SRick Macklem /* local functions */
2419ec7b004SRick Macklem static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2429ec7b004SRick Macklem static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2439ec7b004SRick Macklem static void nfsrc_lock(struct nfsrvcache *rp);
2449ec7b004SRick Macklem static void nfsrc_unlock(struct nfsrvcache *rp);
2459ec7b004SRick Macklem static void nfsrc_wanted(struct nfsrvcache *rp);
2469ec7b004SRick Macklem static void nfsrc_freecache(struct nfsrvcache *rp);
2479ec7b004SRick Macklem static void nfsrc_trimcache(u_int64_t, struct socket *);
2489ec7b004SRick Macklem static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
2499ec7b004SRick Macklem     struct socket *);
2509ec7b004SRick Macklem static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
2519ec7b004SRick Macklem static void nfsrc_marksametcpconn(u_int64_t);
2529ec7b004SRick Macklem 
2539ec7b004SRick Macklem /*
2549ec7b004SRick Macklem  * Initialize the server request cache list
2559ec7b004SRick Macklem  */
2569ec7b004SRick Macklem APPLESTATIC void
2579ec7b004SRick Macklem nfsrvd_initcache(void)
2589ec7b004SRick Macklem {
2599ec7b004SRick Macklem 	int i;
2609ec7b004SRick Macklem 	static int inited = 0;
2619ec7b004SRick Macklem 
2629ec7b004SRick Macklem 	if (inited)
2639ec7b004SRick Macklem 		return;
2649ec7b004SRick Macklem 	inited = 1;
2659ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
2669ec7b004SRick Macklem 		LIST_INIT(&nfsrvudphashtbl[i]);
2679ec7b004SRick Macklem 		LIST_INIT(&nfsrvhashtbl[i]);
2689ec7b004SRick Macklem 	}
2699ec7b004SRick Macklem 	TAILQ_INIT(&nfsrvudplru);
2709ec7b004SRick Macklem 	nfsrc_tcpsavedreplies = 0;
2719ec7b004SRick Macklem 	nfsrc_udpcachesize = 0;
2729ec7b004SRick Macklem 	newnfsstats.srvcache_tcppeak = 0;
2739ec7b004SRick Macklem 	newnfsstats.srvcache_size = 0;
2749ec7b004SRick Macklem }
2759ec7b004SRick Macklem 
2769ec7b004SRick Macklem /*
2779ec7b004SRick Macklem  * Get a cache entry for this request. Basically just malloc a new one
2789ec7b004SRick Macklem  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
2799ec7b004SRick Macklem  * Call nfsrc_trimcache() to clean up the cache before returning.
2809ec7b004SRick Macklem  */
2819ec7b004SRick Macklem APPLESTATIC int
2829ec7b004SRick Macklem nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
2839ec7b004SRick Macklem {
2849ec7b004SRick Macklem 	struct nfsrvcache *newrp;
2859ec7b004SRick Macklem 	int ret;
2869ec7b004SRick Macklem 
2879ec7b004SRick Macklem 	if (nd->nd_procnum == NFSPROC_NULL)
2889ec7b004SRick Macklem 		panic("nfsd cache null");
2899ec7b004SRick Macklem 	MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
2909ec7b004SRick Macklem 	    M_NFSRVCACHE, M_WAITOK);
2919ec7b004SRick Macklem 	NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
2929ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV4)
2939ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV4;
2949ec7b004SRick Macklem 	else if (nd->nd_flag & ND_NFSV3)
2959ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV3;
2969ec7b004SRick Macklem 	else
2979ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV2;
2989ec7b004SRick Macklem 	newrp->rc_xid = nd->nd_retxid;
2999ec7b004SRick Macklem 	newrp->rc_proc = nd->nd_procnum;
3009ec7b004SRick Macklem 	newrp->rc_sockref = nd->nd_sockref;
3019ec7b004SRick Macklem 	newrp->rc_cachetime = nd->nd_tcpconntime;
3029ec7b004SRick Macklem 	if (nd->nd_flag & ND_SAMETCPCONN)
3039ec7b004SRick Macklem 		newrp->rc_flag |= RC_SAMETCPCONN;
3049ec7b004SRick Macklem 	if (nd->nd_nam2 != NULL) {
3059ec7b004SRick Macklem 		newrp->rc_flag |= RC_UDP;
3069ec7b004SRick Macklem 		ret = nfsrc_getudp(nd, newrp);
3079ec7b004SRick Macklem 	} else {
3089ec7b004SRick Macklem 		ret = nfsrc_gettcp(nd, newrp);
3099ec7b004SRick Macklem 	}
3109ec7b004SRick Macklem 	nfsrc_trimcache(nd->nd_sockref, so);
3119ec7b004SRick Macklem 	return (ret);
3129ec7b004SRick Macklem }
3139ec7b004SRick Macklem 
3149ec7b004SRick Macklem /*
3159ec7b004SRick Macklem  * For UDP (v2, v3):
3169ec7b004SRick Macklem  * - key on <xid, NFS version, RPC#, Client host ip#>
3179ec7b004SRick Macklem  *   (at most one entry for each key)
3189ec7b004SRick Macklem  */
3199ec7b004SRick Macklem static int
3209ec7b004SRick Macklem nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
3219ec7b004SRick Macklem {
3229ec7b004SRick Macklem 	struct nfsrvcache *rp;
3239ec7b004SRick Macklem 	struct sockaddr_in *saddr;
3249ec7b004SRick Macklem 	struct sockaddr_in6 *saddr6;
3259ec7b004SRick Macklem 	struct nfsrvhashhead *hp;
3269ec7b004SRick Macklem 	int ret = 0;
3279ec7b004SRick Macklem 
3289ec7b004SRick Macklem 	hp = NFSRCUDPHASH(newrp->rc_xid);
3299ec7b004SRick Macklem loop:
3309ec7b004SRick Macklem 	NFSLOCKCACHE();
3319ec7b004SRick Macklem 	LIST_FOREACH(rp, hp, rc_hash) {
3329ec7b004SRick Macklem 	    if (newrp->rc_xid == rp->rc_xid &&
3339ec7b004SRick Macklem 		newrp->rc_proc == rp->rc_proc &&
3349ec7b004SRick Macklem 		(newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
3359ec7b004SRick Macklem 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
3369ec7b004SRick Macklem 			if ((rp->rc_flag & RC_LOCKED) != 0) {
3379ec7b004SRick Macklem 				rp->rc_flag |= RC_WANTED;
3389ec7b004SRick Macklem 				NFSUNLOCKCACHE();
3399ec7b004SRick Macklem 				(void) tsleep((caddr_t)rp, PZERO - 1,
3409ec7b004SRick Macklem 				    "nfsrc", 10 * hz);
3419ec7b004SRick Macklem 				goto loop;
3429ec7b004SRick Macklem 			}
3439ec7b004SRick Macklem 			if (rp->rc_flag == 0)
3449ec7b004SRick Macklem 				panic("nfs udp cache0");
3459ec7b004SRick Macklem 			rp->rc_flag |= RC_LOCKED;
3469ec7b004SRick Macklem 			TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
3479ec7b004SRick Macklem 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
3489ec7b004SRick Macklem 			if (rp->rc_flag & RC_INPROG) {
3499ec7b004SRick Macklem 				newnfsstats.srvcache_inproghits++;
3509ec7b004SRick Macklem 				NFSUNLOCKCACHE();
3519ec7b004SRick Macklem 				ret = RC_DROPIT;
3529ec7b004SRick Macklem 			} else if (rp->rc_flag & RC_REPSTATUS) {
3539ec7b004SRick Macklem 				/*
3549ec7b004SRick Macklem 				 * V2 only.
3559ec7b004SRick Macklem 				 */
3569ec7b004SRick Macklem 				newnfsstats.srvcache_nonidemdonehits++;
3579ec7b004SRick Macklem 				NFSUNLOCKCACHE();
3589ec7b004SRick Macklem 				nfsrvd_rephead(nd);
3599ec7b004SRick Macklem 				*(nd->nd_errp) = rp->rc_status;
3609ec7b004SRick Macklem 				ret = RC_REPLY;
3619ec7b004SRick Macklem 				rp->rc_timestamp = NFSD_MONOSEC +
3629ec7b004SRick Macklem 					NFSRVCACHE_UDPTIMEOUT;
3639ec7b004SRick Macklem 			} else if (rp->rc_flag & RC_REPMBUF) {
3649ec7b004SRick Macklem 				newnfsstats.srvcache_nonidemdonehits++;
3659ec7b004SRick Macklem 				NFSUNLOCKCACHE();
3669ec7b004SRick Macklem 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
3679ec7b004SRick Macklem 					M_COPYALL, M_WAIT);
3689ec7b004SRick Macklem 				ret = RC_REPLY;
3699ec7b004SRick Macklem 				rp->rc_timestamp = NFSD_MONOSEC +
3709ec7b004SRick Macklem 					NFSRVCACHE_UDPTIMEOUT;
3719ec7b004SRick Macklem 			} else {
3729ec7b004SRick Macklem 				panic("nfs udp cache1");
3739ec7b004SRick Macklem 			}
3749ec7b004SRick Macklem 			nfsrc_unlock(rp);
3759ec7b004SRick Macklem 			free((caddr_t)newrp, M_NFSRVCACHE);
3769ec7b004SRick Macklem 			return (ret);
3779ec7b004SRick Macklem 		}
3789ec7b004SRick Macklem 	}
3799ec7b004SRick Macklem 	newnfsstats.srvcache_misses++;
3809ec7b004SRick Macklem 	newnfsstats.srvcache_size++;
3819ec7b004SRick Macklem 	nfsrc_udpcachesize++;
3829ec7b004SRick Macklem 
3839ec7b004SRick Macklem 	newrp->rc_flag |= RC_INPROG;
3849ec7b004SRick Macklem 	saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3859ec7b004SRick Macklem 	if (saddr->sin_family == AF_INET)
3869ec7b004SRick Macklem 		newrp->rc_inet = saddr->sin_addr.s_addr;
3879ec7b004SRick Macklem 	else if (saddr->sin_family == AF_INET6) {
3889ec7b004SRick Macklem 		saddr6 = (struct sockaddr_in6 *)saddr;
3899ec7b004SRick Macklem 		NFSBCOPY((caddr_t)&saddr6->sin6_addr,(caddr_t)&newrp->rc_inet6,
3909ec7b004SRick Macklem 			sizeof (struct in6_addr));
3919ec7b004SRick Macklem 		rp->rc_flag |= RC_INETIPV6;
3929ec7b004SRick Macklem 	}
3939ec7b004SRick Macklem 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
3949ec7b004SRick Macklem 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
3959ec7b004SRick Macklem 	NFSUNLOCKCACHE();
3969ec7b004SRick Macklem 	nd->nd_rp = newrp;
3979ec7b004SRick Macklem 	return (RC_DOIT);
3989ec7b004SRick Macklem }
3999ec7b004SRick Macklem 
4009ec7b004SRick Macklem /*
4019ec7b004SRick Macklem  * Update a request cache entry after the rpc has been done
4029ec7b004SRick Macklem  */
4039ec7b004SRick Macklem APPLESTATIC struct nfsrvcache *
4049ec7b004SRick Macklem nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
4059ec7b004SRick Macklem {
4069ec7b004SRick Macklem 	struct nfsrvcache *rp;
4079ec7b004SRick Macklem 	struct nfsrvcache *retrp = NULL;
4089ec7b004SRick Macklem 
4099ec7b004SRick Macklem 	rp = nd->nd_rp;
4109ec7b004SRick Macklem 	if (!rp)
4119ec7b004SRick Macklem 		panic("nfsrvd_updatecache null rp");
4129ec7b004SRick Macklem 	nd->nd_rp = NULL;
4139ec7b004SRick Macklem 	NFSLOCKCACHE();
4149ec7b004SRick Macklem 	nfsrc_lock(rp);
4159ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_INPROG))
4169ec7b004SRick Macklem 		panic("nfsrvd_updatecache not inprog");
4179ec7b004SRick Macklem 	rp->rc_flag &= ~RC_INPROG;
4189ec7b004SRick Macklem 	if (rp->rc_flag & RC_UDP) {
4199ec7b004SRick Macklem 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
4209ec7b004SRick Macklem 		TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
4219ec7b004SRick Macklem 	}
4229ec7b004SRick Macklem 
4239ec7b004SRick Macklem 	/*
4249ec7b004SRick Macklem 	 * Reply from cache is a special case returned by nfsrv_checkseqid().
4259ec7b004SRick Macklem 	 */
4269ec7b004SRick Macklem 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
4279ec7b004SRick Macklem 		newnfsstats.srvcache_nonidemdonehits++;
4289ec7b004SRick Macklem 		NFSUNLOCKCACHE();
4299ec7b004SRick Macklem 		nd->nd_repstat = 0;
4309ec7b004SRick Macklem 		if (nd->nd_mreq)
4319ec7b004SRick Macklem 			mbuf_freem(nd->nd_mreq);
4329ec7b004SRick Macklem 		if (!(rp->rc_flag & RC_REPMBUF))
4339ec7b004SRick Macklem 			panic("reply from cache");
4349ec7b004SRick Macklem 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
4359ec7b004SRick Macklem 		    M_COPYALL, M_WAIT);
4369ec7b004SRick Macklem 		rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
4379ec7b004SRick Macklem 		nfsrc_unlock(rp);
4389ec7b004SRick Macklem 		nfsrc_trimcache(nd->nd_sockref, so);
4399ec7b004SRick Macklem 		return (retrp);
4409ec7b004SRick Macklem 	}
4419ec7b004SRick Macklem 
4429ec7b004SRick Macklem 	/*
4439ec7b004SRick Macklem 	 * If rc_refcnt > 0, save it
4449ec7b004SRick Macklem 	 * For UDP, save it if ND_SAVEREPLY is set
4459ec7b004SRick Macklem 	 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
4469ec7b004SRick Macklem 	 */
4479ec7b004SRick Macklem 	if (nd->nd_repstat != NFSERR_DONTREPLY &&
4489ec7b004SRick Macklem 	    (rp->rc_refcnt > 0 ||
4499ec7b004SRick Macklem 	     ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
4509ec7b004SRick Macklem 	     ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
4519ec7b004SRick Macklem 	      nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
4529ec7b004SRick Macklem 	      nfsrc_tcpnonidempotent))) {
4539ec7b004SRick Macklem 		if (rp->rc_refcnt > 0) {
4549ec7b004SRick Macklem 			if (!(rp->rc_flag & RC_NFSV4))
4559ec7b004SRick Macklem 				panic("update_cache refcnt");
4569ec7b004SRick Macklem 			rp->rc_flag |= RC_REFCNT;
4579ec7b004SRick Macklem 		}
4589ec7b004SRick Macklem 		if ((nd->nd_flag & ND_NFSV2) &&
4599ec7b004SRick Macklem 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
4609ec7b004SRick Macklem 			NFSUNLOCKCACHE();
4619ec7b004SRick Macklem 			rp->rc_status = nd->nd_repstat;
4629ec7b004SRick Macklem 			rp->rc_flag |= RC_REPSTATUS;
4639ec7b004SRick Macklem 		} else {
4649ec7b004SRick Macklem 			if (!(rp->rc_flag & RC_UDP)) {
4659ec7b004SRick Macklem 			    nfsrc_tcpsavedreplies++;
4669ec7b004SRick Macklem 			    if (nfsrc_tcpsavedreplies >
4679ec7b004SRick Macklem 				newnfsstats.srvcache_tcppeak)
4689ec7b004SRick Macklem 				newnfsstats.srvcache_tcppeak =
4699ec7b004SRick Macklem 				    nfsrc_tcpsavedreplies;
4709ec7b004SRick Macklem 			}
4719ec7b004SRick Macklem 			NFSUNLOCKCACHE();
4729ec7b004SRick Macklem 			rp->rc_reply = m_copym(nd->nd_mreq, 0, M_COPYALL,
4739ec7b004SRick Macklem 			    M_WAIT);
4749ec7b004SRick Macklem 			rp->rc_flag |= RC_REPMBUF;
4759ec7b004SRick Macklem 		}
4769ec7b004SRick Macklem 		if (rp->rc_flag & RC_UDP) {
4779ec7b004SRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC +
4789ec7b004SRick Macklem 			    NFSRVCACHE_UDPTIMEOUT;
4799ec7b004SRick Macklem 			nfsrc_unlock(rp);
4809ec7b004SRick Macklem 		} else {
4819ec7b004SRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC +
4829ec7b004SRick Macklem 			    NFSRVCACHE_TCPTIMEOUT;
4839ec7b004SRick Macklem 			if (rp->rc_refcnt > 0)
4849ec7b004SRick Macklem 				nfsrc_unlock(rp);
4859ec7b004SRick Macklem 			else
4869ec7b004SRick Macklem 				retrp = rp;
4879ec7b004SRick Macklem 		}
4889ec7b004SRick Macklem 	} else {
4899ec7b004SRick Macklem 		nfsrc_freecache(rp);
4909ec7b004SRick Macklem 		NFSUNLOCKCACHE();
4919ec7b004SRick Macklem 	}
4929ec7b004SRick Macklem 	nfsrc_trimcache(nd->nd_sockref, so);
4939ec7b004SRick Macklem 	return (retrp);
4949ec7b004SRick Macklem }
4959ec7b004SRick Macklem 
4969ec7b004SRick Macklem /*
4979ec7b004SRick Macklem  * Invalidate and, if possible, free an in prog cache entry.
4989ec7b004SRick Macklem  * Must not sleep.
4999ec7b004SRick Macklem  */
5009ec7b004SRick Macklem APPLESTATIC void
5019ec7b004SRick Macklem nfsrvd_delcache(struct nfsrvcache *rp)
5029ec7b004SRick Macklem {
5039ec7b004SRick Macklem 
5049ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_INPROG))
5059ec7b004SRick Macklem 		panic("nfsrvd_delcache not in prog");
5069ec7b004SRick Macklem 	NFSLOCKCACHE();
5079ec7b004SRick Macklem 	rp->rc_flag &= ~RC_INPROG;
5089ec7b004SRick Macklem 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
5099ec7b004SRick Macklem 		nfsrc_freecache(rp);
5109ec7b004SRick Macklem 	NFSUNLOCKCACHE();
5119ec7b004SRick Macklem }
5129ec7b004SRick Macklem 
5139ec7b004SRick Macklem /*
5149ec7b004SRick Macklem  * Called after nfsrvd_updatecache() once the reply is sent, to update
5159ec7b004SRick Macklem  * the entry for nfsrc_activesocket() and unlock it. The argument is
5169ec7b004SRick Macklem  * the pointer returned by nfsrvd_updatecache().
5179ec7b004SRick Macklem  */
5189ec7b004SRick Macklem APPLESTATIC void
5199ec7b004SRick Macklem nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
5209ec7b004SRick Macklem {
5219ec7b004SRick Macklem 
5229ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_LOCKED))
5239ec7b004SRick Macklem 		panic("nfsrvd_sentcache not locked");
5249ec7b004SRick Macklem 	if (!err) {
5259ec7b004SRick Macklem 		if (so->so_proto->pr_domain->dom_family != AF_INET ||
5269ec7b004SRick Macklem 		    so->so_proto->pr_protocol != IPPROTO_TCP)
5279ec7b004SRick Macklem 			panic("nfs sent cache");
5289ec7b004SRick Macklem 		if (nfsrv_getsockseqnum(so, &rp->rc_tcpseq))
5299ec7b004SRick Macklem 			rp->rc_flag |= RC_TCPSEQ;
5309ec7b004SRick Macklem 	}
5319ec7b004SRick Macklem 	nfsrc_unlock(rp);
5329ec7b004SRick Macklem }
5339ec7b004SRick Macklem 
5349ec7b004SRick Macklem /*
5359ec7b004SRick Macklem  * Get a cache entry for TCP
5369ec7b004SRick Macklem  * - key on <xid, nfs version>
5379ec7b004SRick Macklem  *   (allow multiple entries for a given key)
5389ec7b004SRick Macklem  */
5399ec7b004SRick Macklem static int
5409ec7b004SRick Macklem nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
5419ec7b004SRick Macklem {
5429ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
5439ec7b004SRick Macklem 	int i;
5449ec7b004SRick Macklem 	struct nfsrvcache *hitrp;
5459ec7b004SRick Macklem 	struct nfsrvhashhead *hp, nfsrc_templist;
5469ec7b004SRick Macklem 	int hit, ret = 0;
5479ec7b004SRick Macklem 
5489ec7b004SRick Macklem 	hp = NFSRCHASH(newrp->rc_xid);
5499ec7b004SRick Macklem 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
5509ec7b004SRick Macklem tryagain:
5519ec7b004SRick Macklem 	NFSLOCKCACHE();
5529ec7b004SRick Macklem 	hit = 1;
5539ec7b004SRick Macklem 	LIST_INIT(&nfsrc_templist);
5549ec7b004SRick Macklem 	/*
5559ec7b004SRick Macklem 	 * Get all the matches and put them on the temp list.
5569ec7b004SRick Macklem 	 */
5579ec7b004SRick Macklem 	rp = LIST_FIRST(hp);
5589ec7b004SRick Macklem 	while (rp != LIST_END(hp)) {
5599ec7b004SRick Macklem 		nextrp = LIST_NEXT(rp, rc_hash);
5609ec7b004SRick Macklem 		if (newrp->rc_xid == rp->rc_xid &&
5619ec7b004SRick Macklem 		    (!(rp->rc_flag & RC_INPROG) ||
5629ec7b004SRick Macklem 		     ((newrp->rc_flag & RC_SAMETCPCONN) &&
5639ec7b004SRick Macklem 		      newrp->rc_sockref == rp->rc_sockref)) &&
5649ec7b004SRick Macklem 		    (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
5659ec7b004SRick Macklem 		    newrp->rc_proc == rp->rc_proc &&
5669ec7b004SRick Macklem 		    ((newrp->rc_flag & RC_NFSV4) &&
5679ec7b004SRick Macklem 		     newrp->rc_sockref != rp->rc_sockref &&
5689ec7b004SRick Macklem 		     newrp->rc_cachetime >= rp->rc_cachetime)
5699ec7b004SRick Macklem 		    && newrp->rc_reqlen == rp->rc_reqlen &&
5709ec7b004SRick Macklem 		    newrp->rc_cksum == rp->rc_cksum) {
5719ec7b004SRick Macklem 			LIST_REMOVE(rp, rc_hash);
5729ec7b004SRick Macklem 			LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
5739ec7b004SRick Macklem 		}
5749ec7b004SRick Macklem 		rp = nextrp;
5759ec7b004SRick Macklem 	}
5769ec7b004SRick Macklem 
5779ec7b004SRick Macklem 	/*
5789ec7b004SRick Macklem 	 * Now, use nfsrc_templist to decide if there is a match.
5799ec7b004SRick Macklem 	 */
5809ec7b004SRick Macklem 	i = 0;
5819ec7b004SRick Macklem 	LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
5829ec7b004SRick Macklem 		i++;
5839ec7b004SRick Macklem 		if (rp->rc_refcnt > 0) {
5849ec7b004SRick Macklem 			hit = 0;
5859ec7b004SRick Macklem 			break;
5869ec7b004SRick Macklem 		}
5879ec7b004SRick Macklem 	}
5889ec7b004SRick Macklem 	/*
5899ec7b004SRick Macklem 	 * Can be a hit only if one entry left.
5909ec7b004SRick Macklem 	 * Note possible hit entry and put nfsrc_templist back on hash
5919ec7b004SRick Macklem 	 * list.
5929ec7b004SRick Macklem 	 */
5939ec7b004SRick Macklem 	if (i != 1)
5949ec7b004SRick Macklem 		hit = 0;
5959ec7b004SRick Macklem 	hitrp = rp = LIST_FIRST(&nfsrc_templist);
5969ec7b004SRick Macklem 	while (rp != LIST_END(&nfsrc_templist)) {
5979ec7b004SRick Macklem 		nextrp = LIST_NEXT(rp, rc_hash);
5989ec7b004SRick Macklem 		LIST_REMOVE(rp, rc_hash);
5999ec7b004SRick Macklem 		LIST_INSERT_HEAD(hp, rp, rc_hash);
6009ec7b004SRick Macklem 		rp = nextrp;
6019ec7b004SRick Macklem 	}
6029ec7b004SRick Macklem 	if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist))
6039ec7b004SRick Macklem 		panic("nfs gettcp cache templist");
6049ec7b004SRick Macklem 
6059ec7b004SRick Macklem 	if (hit) {
6069ec7b004SRick Macklem 		rp = hitrp;
6079ec7b004SRick Macklem 		if ((rp->rc_flag & RC_LOCKED) != 0) {
6089ec7b004SRick Macklem 			rp->rc_flag |= RC_WANTED;
6099ec7b004SRick Macklem 			NFSUNLOCKCACHE();
6109ec7b004SRick Macklem 			(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 10 * hz);
6119ec7b004SRick Macklem 			goto tryagain;
6129ec7b004SRick Macklem 		}
6139ec7b004SRick Macklem 		if (rp->rc_flag == 0)
6149ec7b004SRick Macklem 			panic("nfs tcp cache0");
6159ec7b004SRick Macklem 		rp->rc_flag |= RC_LOCKED;
6169ec7b004SRick Macklem 		if (rp->rc_flag & RC_INPROG) {
6179ec7b004SRick Macklem 			newnfsstats.srvcache_inproghits++;
6189ec7b004SRick Macklem 			NFSUNLOCKCACHE();
6199ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
6209ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
6219ec7b004SRick Macklem 			ret = RC_DROPIT;
6229ec7b004SRick Macklem 		} else if (rp->rc_flag & RC_REPSTATUS) {
6239ec7b004SRick Macklem 			/*
6249ec7b004SRick Macklem 			 * V2 only.
6259ec7b004SRick Macklem 			 */
6269ec7b004SRick Macklem 			newnfsstats.srvcache_nonidemdonehits++;
6279ec7b004SRick Macklem 			NFSUNLOCKCACHE();
6289ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
6299ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
6309ec7b004SRick Macklem 			ret = RC_REPLY;
6319ec7b004SRick Macklem 			nfsrvd_rephead(nd);
6329ec7b004SRick Macklem 			*(nd->nd_errp) = rp->rc_status;
6339ec7b004SRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC +
6349ec7b004SRick Macklem 				NFSRVCACHE_TCPTIMEOUT;
6359ec7b004SRick Macklem 		} else if (rp->rc_flag & RC_REPMBUF) {
6369ec7b004SRick Macklem 			newnfsstats.srvcache_nonidemdonehits++;
6379ec7b004SRick Macklem 			NFSUNLOCKCACHE();
6389ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
6399ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
6409ec7b004SRick Macklem 			ret = RC_REPLY;
6419ec7b004SRick Macklem 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
6429ec7b004SRick Macklem 				M_COPYALL, M_WAIT);
6439ec7b004SRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC +
6449ec7b004SRick Macklem 				NFSRVCACHE_TCPTIMEOUT;
6459ec7b004SRick Macklem 		} else {
6469ec7b004SRick Macklem 			panic("nfs tcp cache1");
6479ec7b004SRick Macklem 		}
6489ec7b004SRick Macklem 		nfsrc_unlock(rp);
6499ec7b004SRick Macklem 		free((caddr_t)newrp, M_NFSRVCACHE);
6509ec7b004SRick Macklem 		return (ret);
6519ec7b004SRick Macklem 	}
6529ec7b004SRick Macklem 	newnfsstats.srvcache_misses++;
6539ec7b004SRick Macklem 	newnfsstats.srvcache_size++;
6549ec7b004SRick Macklem 
6559ec7b004SRick Macklem 	/*
6569ec7b004SRick Macklem 	 * For TCP, multiple entries for a key are allowed, so don't
6579ec7b004SRick Macklem 	 * chain it into the hash table until done.
6589ec7b004SRick Macklem 	 */
6599ec7b004SRick Macklem 	newrp->rc_cachetime = NFSD_MONOSEC;
6609ec7b004SRick Macklem 	newrp->rc_flag |= RC_INPROG;
6619ec7b004SRick Macklem 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
6629ec7b004SRick Macklem 	NFSUNLOCKCACHE();
6639ec7b004SRick Macklem 	nd->nd_rp = newrp;
6649ec7b004SRick Macklem 	return (RC_DOIT);
6659ec7b004SRick Macklem }
6669ec7b004SRick Macklem 
6679ec7b004SRick Macklem /*
6689ec7b004SRick Macklem  * Lock a cache entry.
6699ec7b004SRick Macklem  * Also puts a mutex lock on the cache list.
6709ec7b004SRick Macklem  */
6719ec7b004SRick Macklem static void
6729ec7b004SRick Macklem nfsrc_lock(struct nfsrvcache *rp)
6739ec7b004SRick Macklem {
6749ec7b004SRick Macklem 	NFSCACHELOCKREQUIRED();
6759ec7b004SRick Macklem 	while ((rp->rc_flag & RC_LOCKED) != 0) {
6769ec7b004SRick Macklem 		rp->rc_flag |= RC_WANTED;
6779ec7b004SRick Macklem 		(void) nfsmsleep((caddr_t)rp, NFSCACHEMUTEXPTR, PZERO - 1,
6789ec7b004SRick Macklem 		    "nfsrc", 0);
6799ec7b004SRick Macklem 	}
6809ec7b004SRick Macklem 	rp->rc_flag |= RC_LOCKED;
6819ec7b004SRick Macklem }
6829ec7b004SRick Macklem 
6839ec7b004SRick Macklem /*
6849ec7b004SRick Macklem  * Unlock a cache entry.
6859ec7b004SRick Macklem  */
6869ec7b004SRick Macklem static void
6879ec7b004SRick Macklem nfsrc_unlock(struct nfsrvcache *rp)
6889ec7b004SRick Macklem {
6899ec7b004SRick Macklem 	rp->rc_flag &= ~RC_LOCKED;
6909ec7b004SRick Macklem 	nfsrc_wanted(rp);
6919ec7b004SRick Macklem }
6929ec7b004SRick Macklem 
6939ec7b004SRick Macklem /*
6949ec7b004SRick Macklem  * Wakeup anyone wanting entry.
6959ec7b004SRick Macklem  */
6969ec7b004SRick Macklem static void
6979ec7b004SRick Macklem nfsrc_wanted(struct nfsrvcache *rp)
6989ec7b004SRick Macklem {
6999ec7b004SRick Macklem 	if (rp->rc_flag & RC_WANTED) {
7009ec7b004SRick Macklem 		rp->rc_flag &= ~RC_WANTED;
7019ec7b004SRick Macklem 		wakeup((caddr_t)rp);
7029ec7b004SRick Macklem 	}
7039ec7b004SRick Macklem }
7049ec7b004SRick Macklem 
7059ec7b004SRick Macklem /*
7069ec7b004SRick Macklem  * Free up the entry.
7079ec7b004SRick Macklem  * Must not sleep.
7089ec7b004SRick Macklem  */
7099ec7b004SRick Macklem static void
7109ec7b004SRick Macklem nfsrc_freecache(struct nfsrvcache *rp)
7119ec7b004SRick Macklem {
7129ec7b004SRick Macklem 
7139ec7b004SRick Macklem 	NFSCACHELOCKREQUIRED();
7149ec7b004SRick Macklem 	LIST_REMOVE(rp, rc_hash);
7159ec7b004SRick Macklem 	if (rp->rc_flag & RC_UDP) {
7169ec7b004SRick Macklem 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
7179ec7b004SRick Macklem 		nfsrc_udpcachesize--;
7189ec7b004SRick Macklem 	}
7199ec7b004SRick Macklem 	nfsrc_wanted(rp);
7209ec7b004SRick Macklem 	if (rp->rc_flag & RC_REPMBUF) {
7219ec7b004SRick Macklem 		mbuf_freem(rp->rc_reply);
7229ec7b004SRick Macklem 		if (!(rp->rc_flag & RC_UDP))
7239ec7b004SRick Macklem 			nfsrc_tcpsavedreplies--;
7249ec7b004SRick Macklem 	}
7259ec7b004SRick Macklem 	FREE((caddr_t)rp, M_NFSRVCACHE);
7269ec7b004SRick Macklem 	newnfsstats.srvcache_size--;
7279ec7b004SRick Macklem }
7289ec7b004SRick Macklem 
7299ec7b004SRick Macklem #ifdef notdef
7309ec7b004SRick Macklem /*
7319ec7b004SRick Macklem  * Clean out the cache. Called when the last nfsd terminates.
7329ec7b004SRick Macklem  */
7339ec7b004SRick Macklem APPLESTATIC void
7349ec7b004SRick Macklem nfsrvd_cleancache(void)
7359ec7b004SRick Macklem {
7369ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
7379ec7b004SRick Macklem 	int i;
7389ec7b004SRick Macklem 
7399ec7b004SRick Macklem 	NFSLOCKCACHE();
7409ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
7419ec7b004SRick Macklem 		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
7429ec7b004SRick Macklem 			nfsrc_freecache(rp);
7439ec7b004SRick Macklem 		}
7449ec7b004SRick Macklem 	}
7459ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
7469ec7b004SRick Macklem 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
7479ec7b004SRick Macklem 			nfsrc_freecache(rp);
7489ec7b004SRick Macklem 		}
7499ec7b004SRick Macklem 	}
7509ec7b004SRick Macklem 	newnfsstats.srvcache_size = 0;
7519ec7b004SRick Macklem 	nfsrc_tcpsavedreplies = 0;
7529ec7b004SRick Macklem 	NFSUNLOCKCACHE();
7539ec7b004SRick Macklem }
7549ec7b004SRick Macklem #endif	/* notdef */
7559ec7b004SRick Macklem 
7569ec7b004SRick Macklem /*
7579ec7b004SRick Macklem  * The basic rule is to get rid of entries that are expired.
7589ec7b004SRick Macklem  */
7599ec7b004SRick Macklem static void
7609ec7b004SRick Macklem nfsrc_trimcache(u_int64_t sockref, struct socket *so)
7619ec7b004SRick Macklem {
7629ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
7639ec7b004SRick Macklem 	int i;
7649ec7b004SRick Macklem 
7659ec7b004SRick Macklem 	NFSLOCKCACHE();
7669ec7b004SRick Macklem 	TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
7679ec7b004SRick Macklem 		if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
7689ec7b004SRick Macklem 		     && rp->rc_refcnt == 0
7699ec7b004SRick Macklem 		     && ((rp->rc_flag & RC_REFCNT) ||
7709ec7b004SRick Macklem 			 NFSD_MONOSEC > rp->rc_timestamp ||
7719ec7b004SRick Macklem 			 nfsrc_udpcachesize > nfsrc_udphighwater))
7729ec7b004SRick Macklem 			nfsrc_freecache(rp);
7739ec7b004SRick Macklem 	}
7749ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
7759ec7b004SRick Macklem 		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
7769ec7b004SRick Macklem 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
7779ec7b004SRick Macklem 			     && rp->rc_refcnt == 0
7789ec7b004SRick Macklem 			     && ((rp->rc_flag & RC_REFCNT) ||
7799ec7b004SRick Macklem 				 NFSD_MONOSEC > rp->rc_timestamp ||
7809ec7b004SRick Macklem 				 nfsrc_activesocket(rp, sockref, so)))
7819ec7b004SRick Macklem 				nfsrc_freecache(rp);
7829ec7b004SRick Macklem 		}
7839ec7b004SRick Macklem 	}
7849ec7b004SRick Macklem 	NFSUNLOCKCACHE();
7859ec7b004SRick Macklem }
7869ec7b004SRick Macklem 
7879ec7b004SRick Macklem /*
7889ec7b004SRick Macklem  * Add a seqid# reference to the cache entry.
7899ec7b004SRick Macklem  */
7909ec7b004SRick Macklem APPLESTATIC void
7919ec7b004SRick Macklem nfsrvd_refcache(struct nfsrvcache *rp)
7929ec7b004SRick Macklem {
7939ec7b004SRick Macklem 
7949ec7b004SRick Macklem 	NFSLOCKCACHE();
7959ec7b004SRick Macklem 	if (rp->rc_refcnt < 0)
7969ec7b004SRick Macklem 		panic("nfs cache refcnt");
7979ec7b004SRick Macklem 	rp->rc_refcnt++;
7989ec7b004SRick Macklem 	NFSUNLOCKCACHE();
7999ec7b004SRick Macklem }
8009ec7b004SRick Macklem 
8019ec7b004SRick Macklem /*
8029ec7b004SRick Macklem  * Dereference a seqid# cache entry.
8039ec7b004SRick Macklem  */
8049ec7b004SRick Macklem APPLESTATIC void
8059ec7b004SRick Macklem nfsrvd_derefcache(struct nfsrvcache *rp)
8069ec7b004SRick Macklem {
8079ec7b004SRick Macklem 
8089ec7b004SRick Macklem 	NFSLOCKCACHE();
8099ec7b004SRick Macklem 	if (rp->rc_refcnt <= 0)
8109ec7b004SRick Macklem 		panic("nfs cache derefcnt");
8119ec7b004SRick Macklem 	rp->rc_refcnt--;
8129ec7b004SRick Macklem 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
8139ec7b004SRick Macklem 		nfsrc_freecache(rp);
8149ec7b004SRick Macklem 	NFSUNLOCKCACHE();
8159ec7b004SRick Macklem }
8169ec7b004SRick Macklem 
8179ec7b004SRick Macklem /*
8189ec7b004SRick Macklem  * Check to see if the socket is active.
8199ec7b004SRick Macklem  * Return 1 if the reply has been received/acknowledged by the client,
8209ec7b004SRick Macklem  * 0 otherwise.
8219ec7b004SRick Macklem  * XXX - Uses tcp internals.
8229ec7b004SRick Macklem  */
8239ec7b004SRick Macklem static int
8249ec7b004SRick Macklem nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
8259ec7b004SRick Macklem     struct socket *cur_so)
8269ec7b004SRick Macklem {
8279ec7b004SRick Macklem 	int ret = 0;
8289ec7b004SRick Macklem 
8299ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_TCPSEQ))
8309ec7b004SRick Macklem 		return (ret);
8319ec7b004SRick Macklem 	/*
8329ec7b004SRick Macklem 	 * If the sockref is the same, it is the same TCP connection.
8339ec7b004SRick Macklem 	 */
8349ec7b004SRick Macklem 	if (cur_sockref == rp->rc_sockref)
8359ec7b004SRick Macklem 		ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
8369ec7b004SRick Macklem 	return (ret);
8379ec7b004SRick Macklem }
8389ec7b004SRick Macklem 
8399ec7b004SRick Macklem /*
8409ec7b004SRick Macklem  * Calculate the length of the mbuf list and a checksum on the first up to
8419ec7b004SRick Macklem  * NFSRVCACHE_CHECKLEN bytes.
8429ec7b004SRick Macklem  */
8439ec7b004SRick Macklem static int
8449ec7b004SRick Macklem nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
8459ec7b004SRick Macklem {
8469ec7b004SRick Macklem 	int len = 0, cklen;
8479ec7b004SRick Macklem 	mbuf_t m;
8489ec7b004SRick Macklem 
8499ec7b004SRick Macklem 	m = m1;
8509ec7b004SRick Macklem 	while (m) {
8519ec7b004SRick Macklem 		len += mbuf_len(m);
8529ec7b004SRick Macklem 		m = mbuf_next(m);
8539ec7b004SRick Macklem 	}
8549ec7b004SRick Macklem 	cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
8559ec7b004SRick Macklem 	*cksum = in_cksum(m1, cklen);
8569ec7b004SRick Macklem 	return (len);
8579ec7b004SRick Macklem }
8589ec7b004SRick Macklem 
8599ec7b004SRick Macklem /*
8609ec7b004SRick Macklem  * Mark a TCP connection that is seeing retries. Should never happen for
8619ec7b004SRick Macklem  * NFSv4.
8629ec7b004SRick Macklem  */
8639ec7b004SRick Macklem static void
8649ec7b004SRick Macklem nfsrc_marksametcpconn(u_int64_t sockref)
8659ec7b004SRick Macklem {
8669ec7b004SRick Macklem }
8679ec7b004SRick Macklem 
868