xref: /freebsd/sys/fs/nfsserver/nfs_nfsdcache.c (revision 685dc743)
19ec7b004SRick Macklem /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
49ec7b004SRick Macklem  * Copyright (c) 1989, 1993
59ec7b004SRick Macklem  *	The Regents of the University of California.  All rights reserved.
69ec7b004SRick Macklem  *
79ec7b004SRick Macklem  * This code is derived from software contributed to Berkeley by
89ec7b004SRick Macklem  * Rick Macklem at The University of Guelph.
99ec7b004SRick Macklem  *
109ec7b004SRick Macklem  * Redistribution and use in source and binary forms, with or without
119ec7b004SRick Macklem  * modification, are permitted provided that the following conditions
129ec7b004SRick Macklem  * are met:
139ec7b004SRick Macklem  * 1. Redistributions of source code must retain the above copyright
149ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer.
159ec7b004SRick Macklem  * 2. Redistributions in binary form must reproduce the above copyright
169ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer in the
179ec7b004SRick Macklem  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
199ec7b004SRick Macklem  *    may be used to endorse or promote products derived from this software
209ec7b004SRick Macklem  *    without specific prior written permission.
219ec7b004SRick Macklem  *
229ec7b004SRick Macklem  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239ec7b004SRick Macklem  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249ec7b004SRick Macklem  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259ec7b004SRick Macklem  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269ec7b004SRick Macklem  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279ec7b004SRick Macklem  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289ec7b004SRick Macklem  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299ec7b004SRick Macklem  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309ec7b004SRick Macklem  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319ec7b004SRick Macklem  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329ec7b004SRick Macklem  * SUCH DAMAGE.
339ec7b004SRick Macklem  *
349ec7b004SRick Macklem  */
359ec7b004SRick Macklem 
369ec7b004SRick Macklem #include <sys/cdefs.h>
379ec7b004SRick Macklem /*
389ec7b004SRick Macklem  * Here is the basic algorithm:
399ec7b004SRick Macklem  * First, some design criteria I used:
409ec7b004SRick Macklem  * - I think a false hit is more serious than a false miss
419ec7b004SRick Macklem  * - A false hit for an RPC that has Op(s) that order via seqid# must be
429ec7b004SRick Macklem  *   avoided at all cost
439ec7b004SRick Macklem  * - A valid hit will probably happen a long time after the original reply
449ec7b004SRick Macklem  *   and the TCP socket that the original request was received on will no
459ec7b004SRick Macklem  *   longer be active
469ec7b004SRick Macklem  *   (The long time delay implies to me that LRU is not appropriate.)
479ec7b004SRick Macklem  * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
489ec7b004SRick Macklem  *   in them as well as minimizing the risk of redoing retried non-idempotent
499ec7b004SRick Macklem  *   Ops.
509ec7b004SRick Macklem  * Because it is biased towards avoiding false hits, multiple entries with
519ec7b004SRick Macklem  * the same xid are to be expected, especially for the case of the entry
529ec7b004SRick Macklem  * in the cache being related to a seqid# sequenced Op.
539ec7b004SRick Macklem  *
549ec7b004SRick Macklem  * The basic algorithm I'm about to code up:
559ec7b004SRick Macklem  * - Null RPCs bypass the cache and are just done
569ec7b004SRick Macklem  * For TCP
579ec7b004SRick Macklem  * 	- key on <xid, NFS version> (as noted above, there can be several
589ec7b004SRick Macklem  * 				     entries with the same key)
599ec7b004SRick Macklem  * 	When a request arrives:
609ec7b004SRick Macklem  * 		For all that match key
619ec7b004SRick Macklem  * 		- if RPC# != OR request_size !=
629ec7b004SRick Macklem  * 			- not a match with this one
639ec7b004SRick Macklem  * 		- if NFSv4 and received on same TCP socket OR
649ec7b004SRick Macklem  *			received on a TCP connection created before the
659ec7b004SRick Macklem  *			entry was cached
669ec7b004SRick Macklem  * 			- not a match with this one
679ec7b004SRick Macklem  * 			(V2,3 clients might retry on same TCP socket)
689ec7b004SRick Macklem  * 		- calculate checksum on first N bytes of NFS XDR
699ec7b004SRick Macklem  * 		- if checksum !=
709ec7b004SRick Macklem  * 			- not a match for this one
719ec7b004SRick Macklem  * 		If any of the remaining ones that match has a
729ec7b004SRick Macklem  * 			seqid_refcnt > 0
739ec7b004SRick Macklem  * 			- not a match (go do RPC, using new cache entry)
749ec7b004SRick Macklem  * 		If one match left
759ec7b004SRick Macklem  * 			- a hit (reply from cache)
769ec7b004SRick Macklem  * 		else
779ec7b004SRick Macklem  * 			- miss (go do RPC, using new cache entry)
789ec7b004SRick Macklem  *
799ec7b004SRick Macklem  * 	During processing of NFSv4 request:
809ec7b004SRick Macklem  * 		- set a flag when a non-idempotent Op is processed
819ec7b004SRick Macklem  * 		- when an Op that uses a seqid# (Open,...) is processed
829ec7b004SRick Macklem  * 			- if same seqid# as referenced entry in cache
839ec7b004SRick Macklem  * 				- free new cache entry
849ec7b004SRick Macklem  * 				- reply from referenced cache entry
859ec7b004SRick Macklem  * 			  else if next seqid# in order
869ec7b004SRick Macklem  * 				- free referenced cache entry
879ec7b004SRick Macklem  * 				- increment seqid_refcnt on new cache entry
889ec7b004SRick Macklem  * 				- set pointer from Openowner/Lockowner to
899ec7b004SRick Macklem  * 					new cache entry (aka reference it)
909ec7b004SRick Macklem  * 			  else if first seqid# in sequence
919ec7b004SRick Macklem  * 				- increment seqid_refcnt on new cache entry
929ec7b004SRick Macklem  * 				- set pointer from Openowner/Lockowner to
939ec7b004SRick Macklem  * 					new cache entry (aka reference it)
949ec7b004SRick Macklem  *
959ec7b004SRick Macklem  * 	At end of RPC processing:
969ec7b004SRick Macklem  * 		- if seqid_refcnt > 0 OR flagged non-idempotent on new
979ec7b004SRick Macklem  * 			cache entry
989ec7b004SRick Macklem  * 			- save reply in cache entry
999ec7b004SRick Macklem  * 			- calculate checksum on first N bytes of NFS XDR
1009ec7b004SRick Macklem  * 				request
1019ec7b004SRick Macklem  * 			- note op and length of XDR request (in bytes)
1029ec7b004SRick Macklem  * 			- timestamp it
1039ec7b004SRick Macklem  * 		  else
1049ec7b004SRick Macklem  * 			- free new cache entry
1059ec7b004SRick Macklem  * 		- Send reply (noting info for socket activity check, below)
1069ec7b004SRick Macklem  *
1079ec7b004SRick Macklem  * 	For cache entries saved above:
1089ec7b004SRick Macklem  * 		- if saved since seqid_refcnt was > 0
1099ec7b004SRick Macklem  * 			- free when seqid_refcnt decrements to 0
1109ec7b004SRick Macklem  * 			  (when next one in sequence is processed above, or
1119ec7b004SRick Macklem  * 			   when Openowner/Lockowner is discarded)
1129ec7b004SRick Macklem  * 		  else { non-idempotent Op(s) }
1139ec7b004SRick Macklem  * 			- free when
1149ec7b004SRick Macklem  * 				- some further activity observed on same
1159ec7b004SRick Macklem  * 					socket
1169ec7b004SRick Macklem  * 				  (I'm not yet sure how I'm going to do
1179ec7b004SRick Macklem  * 				   this. Maybe look at the TCP connection
1189ec7b004SRick Macklem  * 				   to see if the send_tcp_sequence# is well
1199ec7b004SRick Macklem  * 				   past sent reply OR K additional RPCs
1209ec7b004SRick Macklem  * 				   replied on same socket OR?)
1219ec7b004SRick Macklem  * 			  OR
1229ec7b004SRick Macklem  * 				- when very old (hours, days, weeks?)
1239ec7b004SRick Macklem  *
1249ec7b004SRick Macklem  * For UDP (v2, 3 only), pretty much the old way:
1259ec7b004SRick Macklem  * - key on <xid, NFS version, RPC#, Client host ip#>
1269ec7b004SRick Macklem  *   (at most one entry for each key)
1279ec7b004SRick Macklem  *
1289ec7b004SRick Macklem  * When a Request arrives:
1299ec7b004SRick Macklem  * - if a match with entry via key
1309ec7b004SRick Macklem  * 	- if RPC marked In_progress
1319ec7b004SRick Macklem  * 		- discard request (don't send reply)
1329ec7b004SRick Macklem  * 	  else
1339ec7b004SRick Macklem  * 		- reply from cache
1349ec7b004SRick Macklem  * 		- timestamp cache entry
1359ec7b004SRick Macklem  *   else
1369ec7b004SRick Macklem  * 	- add entry to cache, marked In_progress
1379ec7b004SRick Macklem  * 	- do RPC
1389ec7b004SRick Macklem  * 	- when RPC done
1399ec7b004SRick Macklem  * 		- if RPC# non-idempotent
1409ec7b004SRick Macklem  * 			- mark entry Done (not In_progress)
1419ec7b004SRick Macklem  * 			- save reply
1429ec7b004SRick Macklem  * 			- timestamp cache entry
1439ec7b004SRick Macklem  * 		  else
1449ec7b004SRick Macklem  * 			- free cache entry
1459ec7b004SRick Macklem  * 		- send reply
1469ec7b004SRick Macklem  *
1479ec7b004SRick Macklem  * Later, entries with saved replies are free'd a short time (few minutes)
1489ec7b004SRick Macklem  * after reply sent (timestamp).
1499ec7b004SRick Macklem  * Reference: Chet Juszczak, "Improving the Performance and Correctness
1509ec7b004SRick Macklem  *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
1519ec7b004SRick Macklem  *		pages 53-63. San Diego, February 1989.
1529ec7b004SRick Macklem  *	 for the UDP case.
1539ec7b004SRick Macklem  * nfsrc_floodlevel is set to the allowable upper limit for saved replies
1549ec7b004SRick Macklem  *	for TCP. For V3, a reply won't be saved when the flood level is
1559ec7b004SRick Macklem  *	hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
1569ec7b004SRick Macklem  *	that case. This level should be set high enough that this almost
1579ec7b004SRick Macklem  *	never happens.
1589ec7b004SRick Macklem  */
1599ec7b004SRick Macklem #include <fs/nfs/nfsport.h>
1609ec7b004SRick Macklem 
16193c5875bSRick Macklem extern struct mtx nfsrc_udpmtx;
1627e44856eSRick Macklem 
1637e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsrvhashhead *, nfsrvudphashtbl);
1647e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsrchash_bucket *, nfsrchash_table);
1657e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsrchash_bucket *, nfsrcahash_table);
1667e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
1677e44856eSRick Macklem 
1687e44856eSRick Macklem NFSD_VNET_DEFINE(int, nfsrc_floodlevel) = NFSRVCACHE_FLOODLEVEL;
1697e44856eSRick Macklem NFSD_VNET_DEFINE(int, nfsrc_tcpsavedreplies) = 0;
1709ec7b004SRick Macklem 
17193c5875bSRick Macklem SYSCTL_DECL(_vfs_nfsd);
17293c5875bSRick Macklem 
17393c5875bSRick Macklem static u_int	nfsrc_tcphighwater = 0;
17493c5875bSRick Macklem static int
sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)17593c5875bSRick Macklem sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
17693c5875bSRick Macklem {
17793c5875bSRick Macklem 	int error, newhighwater;
17893c5875bSRick Macklem 
17993c5875bSRick Macklem 	newhighwater = nfsrc_tcphighwater;
18093c5875bSRick Macklem 	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
18193c5875bSRick Macklem 	if (error != 0 || req->newptr == NULL)
18293c5875bSRick Macklem 		return (error);
18393c5875bSRick Macklem 	if (newhighwater < 0)
18493c5875bSRick Macklem 		return (EINVAL);
1857e44856eSRick Macklem 	if (newhighwater >= NFSD_VNET(nfsrc_floodlevel))
1867e44856eSRick Macklem 		NFSD_VNET(nfsrc_floodlevel) = newhighwater + newhighwater / 5;
18793c5875bSRick Macklem 	nfsrc_tcphighwater = newhighwater;
18893c5875bSRick Macklem 	return (0);
18993c5875bSRick Macklem }
1907493134eSMateusz Guzik SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater,
1917493134eSMateusz Guzik     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrc_tcphighwater),
1927493134eSMateusz Guzik     sysctl_tcphighwater, "IU", "High water mark for TCP cache entries");
19393c5875bSRick Macklem 
19493c5875bSRick Macklem static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
19593c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
19693c5875bSRick Macklem     &nfsrc_udphighwater, 0,
19793c5875bSRick Macklem     "High water mark for UDP cache entries");
19893c5875bSRick Macklem static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
19993c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
20093c5875bSRick Macklem     &nfsrc_tcptimeout, 0,
20193c5875bSRick Macklem     "Timeout for TCP entries in the DRC");
20293c5875bSRick Macklem static u_int nfsrc_tcpnonidempotent = 1;
20393c5875bSRick Macklem SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
20493c5875bSRick Macklem     &nfsrc_tcpnonidempotent, 0,
20593c5875bSRick Macklem     "Enable the DRC for NFS over TCP");
20693c5875bSRick Macklem 
2077e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(int, nfsrc_udpcachesize) = 0;
2087e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(TAILQ_HEAD(, nfsrvcache), nfsrvudplru);
20993c5875bSRick Macklem 
2109ec7b004SRick Macklem /*
2119ec7b004SRick Macklem  * and the reverse mapping from generic to Version 2 procedure numbers
2129ec7b004SRick Macklem  */
2139ec7b004SRick Macklem static int newnfsv2_procid[NFS_V3NPROCS] = {
2149ec7b004SRick Macklem 	NFSV2PROC_NULL,
2159ec7b004SRick Macklem 	NFSV2PROC_GETATTR,
2169ec7b004SRick Macklem 	NFSV2PROC_SETATTR,
2179ec7b004SRick Macklem 	NFSV2PROC_LOOKUP,
2189ec7b004SRick Macklem 	NFSV2PROC_NOOP,
2199ec7b004SRick Macklem 	NFSV2PROC_READLINK,
2209ec7b004SRick Macklem 	NFSV2PROC_READ,
2219ec7b004SRick Macklem 	NFSV2PROC_WRITE,
2229ec7b004SRick Macklem 	NFSV2PROC_CREATE,
2239ec7b004SRick Macklem 	NFSV2PROC_MKDIR,
2249ec7b004SRick Macklem 	NFSV2PROC_SYMLINK,
2259ec7b004SRick Macklem 	NFSV2PROC_CREATE,
2269ec7b004SRick Macklem 	NFSV2PROC_REMOVE,
2279ec7b004SRick Macklem 	NFSV2PROC_RMDIR,
2289ec7b004SRick Macklem 	NFSV2PROC_RENAME,
2299ec7b004SRick Macklem 	NFSV2PROC_LINK,
2309ec7b004SRick Macklem 	NFSV2PROC_READDIR,
2319ec7b004SRick Macklem 	NFSV2PROC_NOOP,
2329ec7b004SRick Macklem 	NFSV2PROC_STATFS,
2339ec7b004SRick Macklem 	NFSV2PROC_NOOP,
2349ec7b004SRick Macklem 	NFSV2PROC_NOOP,
2359ec7b004SRick Macklem 	NFSV2PROC_NOOP,
2369ec7b004SRick Macklem };
2379ec7b004SRick Macklem 
23893c5875bSRick Macklem #define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
2399ec7b004SRick Macklem #define	NFSRCUDPHASH(xid) \
2407e44856eSRick Macklem 	(&NFSD_VNET(nfsrvudphashtbl)[nfsrc_hash(xid)])
2419ec7b004SRick Macklem #define	NFSRCHASH(xid) \
2427e44856eSRick Macklem 	(&NFSD_VNET(nfsrchash_table)[nfsrc_hash(xid)].tbl)
2437e44856eSRick Macklem #define	NFSRCAHASH(xid) (&NFSD_VNET(nfsrcahash_table)[nfsrc_hash(xid)])
2449ec7b004SRick Macklem #define	TRUE	1
2459ec7b004SRick Macklem #define	FALSE	0
2469ec7b004SRick Macklem #define	NFSRVCACHE_CHECKLEN	100
2479ec7b004SRick Macklem 
2489ec7b004SRick Macklem /* True iff the rpc reply is an nfs status ONLY! */
2499ec7b004SRick Macklem static int nfsv2_repstat[NFS_V3NPROCS] = {
2509ec7b004SRick Macklem 	FALSE,
2519ec7b004SRick Macklem 	FALSE,
2529ec7b004SRick Macklem 	FALSE,
2539ec7b004SRick Macklem 	FALSE,
2549ec7b004SRick Macklem 	FALSE,
2559ec7b004SRick Macklem 	FALSE,
2569ec7b004SRick Macklem 	FALSE,
2579ec7b004SRick Macklem 	FALSE,
2589ec7b004SRick Macklem 	FALSE,
2599ec7b004SRick Macklem 	FALSE,
2609ec7b004SRick Macklem 	TRUE,
2619ec7b004SRick Macklem 	TRUE,
2629ec7b004SRick Macklem 	TRUE,
2639ec7b004SRick Macklem 	TRUE,
2649ec7b004SRick Macklem 	FALSE,
2659ec7b004SRick Macklem 	TRUE,
2669ec7b004SRick Macklem 	FALSE,
2679ec7b004SRick Macklem 	FALSE,
2689ec7b004SRick Macklem 	FALSE,
2699ec7b004SRick Macklem 	FALSE,
2709ec7b004SRick Macklem 	FALSE,
2719ec7b004SRick Macklem 	FALSE,
2729ec7b004SRick Macklem };
2739ec7b004SRick Macklem 
2749ec7b004SRick Macklem /*
2759ec7b004SRick Macklem  * Will NFS want to work over IPv6 someday?
2769ec7b004SRick Macklem  */
2779ec7b004SRick Macklem #define	NETFAMILY(rp) \
2789ec7b004SRick Macklem 		(((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
2799ec7b004SRick Macklem 
2809ec7b004SRick Macklem /* local functions */
2819ec7b004SRick Macklem static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2829ec7b004SRick Macklem static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2839ec7b004SRick Macklem static void nfsrc_lock(struct nfsrvcache *rp);
2849ec7b004SRick Macklem static void nfsrc_unlock(struct nfsrvcache *rp);
2859ec7b004SRick Macklem static void nfsrc_wanted(struct nfsrvcache *rp);
2869ec7b004SRick Macklem static void nfsrc_freecache(struct nfsrvcache *rp);
287ae070589SRick Macklem static int nfsrc_getlenandcksum(struct mbuf *m1, u_int16_t *cksum);
2889ec7b004SRick Macklem static void nfsrc_marksametcpconn(u_int64_t);
2899ec7b004SRick Macklem 
2909ec7b004SRick Macklem /*
29193c5875bSRick Macklem  * Return the correct mutex for this cache entry.
29293c5875bSRick Macklem  */
29393c5875bSRick Macklem static __inline struct mtx *
nfsrc_cachemutex(struct nfsrvcache * rp)29493c5875bSRick Macklem nfsrc_cachemutex(struct nfsrvcache *rp)
29593c5875bSRick Macklem {
29693c5875bSRick Macklem 
29793c5875bSRick Macklem 	if ((rp->rc_flag & RC_UDP) != 0)
29893c5875bSRick Macklem 		return (&nfsrc_udpmtx);
2997e44856eSRick Macklem 	return (&NFSD_VNET(nfsrchash_table)[nfsrc_hash(rp->rc_xid)].mtx);
30093c5875bSRick Macklem }
30193c5875bSRick Macklem 
30293c5875bSRick Macklem /*
3039ec7b004SRick Macklem  * Initialize the server request cache list
3049ec7b004SRick Macklem  */
305b9cc3262SRyan Moeller void
nfsrvd_initcache(void)3069ec7b004SRick Macklem nfsrvd_initcache(void)
3079ec7b004SRick Macklem {
3089ec7b004SRick Macklem 	int i;
3099ec7b004SRick Macklem 
3107e44856eSRick Macklem 	NFSD_VNET(nfsrvudphashtbl) = malloc(sizeof(struct nfsrvhashhead) *
3117e44856eSRick Macklem 	    NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO);
3127e44856eSRick Macklem 	NFSD_VNET(nfsrchash_table) = malloc(sizeof(struct nfsrchash_bucket) *
3137e44856eSRick Macklem 	    NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO);
3147e44856eSRick Macklem 	NFSD_VNET(nfsrcahash_table) = malloc(sizeof(struct nfsrchash_bucket) *
3157e44856eSRick Macklem 	    NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO);
3169ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
3177e44856eSRick Macklem 		mtx_init(&NFSD_VNET(nfsrchash_table)[i].mtx, "nfsrtc", NULL,
3187e44856eSRick Macklem 		    MTX_DEF);
3197e44856eSRick Macklem 		mtx_init(&NFSD_VNET(nfsrcahash_table)[i].mtx, "nfsrtca", NULL,
3207e44856eSRick Macklem 		    MTX_DEF);
3219ec7b004SRick Macklem 	}
3227e44856eSRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
3237e44856eSRick Macklem 		LIST_INIT(&NFSD_VNET(nfsrvudphashtbl)[i]);
3247e44856eSRick Macklem 		LIST_INIT(&NFSD_VNET(nfsrchash_table)[i].tbl);
3257e44856eSRick Macklem 		LIST_INIT(&NFSD_VNET(nfsrcahash_table)[i].tbl);
3267e44856eSRick Macklem 	}
3277e44856eSRick Macklem 	TAILQ_INIT(&NFSD_VNET(nfsrvudplru));
3287e44856eSRick Macklem 	NFSD_VNET(nfsrc_tcpsavedreplies) = 0;
3297e44856eSRick Macklem 	NFSD_VNET(nfsrc_udpcachesize) = 0;
3309ec7b004SRick Macklem }
3319ec7b004SRick Macklem 
3329ec7b004SRick Macklem /*
3339ec7b004SRick Macklem  * Get a cache entry for this request. Basically just malloc a new one
3349ec7b004SRick Macklem  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
3359ec7b004SRick Macklem  */
336b9cc3262SRyan Moeller int
nfsrvd_getcache(struct nfsrv_descript * nd)337d473bac7SAlexander Motin nfsrvd_getcache(struct nfsrv_descript *nd)
3389ec7b004SRick Macklem {
3399ec7b004SRick Macklem 	struct nfsrvcache *newrp;
3409ec7b004SRick Macklem 	int ret;
3419ec7b004SRick Macklem 
3429ec7b004SRick Macklem 	if (nd->nd_procnum == NFSPROC_NULL)
3439ec7b004SRick Macklem 		panic("nfsd cache null");
344222daa42SConrad Meyer 	newrp = malloc(sizeof (struct nfsrvcache),
3459ec7b004SRick Macklem 	    M_NFSRVCACHE, M_WAITOK);
3469ec7b004SRick Macklem 	NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
3479ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV4)
3489ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV4;
3499ec7b004SRick Macklem 	else if (nd->nd_flag & ND_NFSV3)
3509ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV3;
3519ec7b004SRick Macklem 	else
3529ec7b004SRick Macklem 		newrp->rc_flag = RC_NFSV2;
3539ec7b004SRick Macklem 	newrp->rc_xid = nd->nd_retxid;
3549ec7b004SRick Macklem 	newrp->rc_proc = nd->nd_procnum;
3559ec7b004SRick Macklem 	newrp->rc_sockref = nd->nd_sockref;
3569ec7b004SRick Macklem 	newrp->rc_cachetime = nd->nd_tcpconntime;
3579ec7b004SRick Macklem 	if (nd->nd_flag & ND_SAMETCPCONN)
3589ec7b004SRick Macklem 		newrp->rc_flag |= RC_SAMETCPCONN;
3599ec7b004SRick Macklem 	if (nd->nd_nam2 != NULL) {
3609ec7b004SRick Macklem 		newrp->rc_flag |= RC_UDP;
3619ec7b004SRick Macklem 		ret = nfsrc_getudp(nd, newrp);
3629ec7b004SRick Macklem 	} else {
3639ec7b004SRick Macklem 		ret = nfsrc_gettcp(nd, newrp);
3649ec7b004SRick Macklem 	}
365a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
3669ec7b004SRick Macklem 	return (ret);
3679ec7b004SRick Macklem }
3689ec7b004SRick Macklem 
3699ec7b004SRick Macklem /*
3709ec7b004SRick Macklem  * For UDP (v2, v3):
3719ec7b004SRick Macklem  * - key on <xid, NFS version, RPC#, Client host ip#>
3729ec7b004SRick Macklem  *   (at most one entry for each key)
3739ec7b004SRick Macklem  */
3749ec7b004SRick Macklem static int
nfsrc_getudp(struct nfsrv_descript * nd,struct nfsrvcache * newrp)3759ec7b004SRick Macklem nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
3769ec7b004SRick Macklem {
3779ec7b004SRick Macklem 	struct nfsrvcache *rp;
3789ec7b004SRick Macklem 	struct sockaddr_in *saddr;
3799ec7b004SRick Macklem 	struct sockaddr_in6 *saddr6;
3809ec7b004SRick Macklem 	struct nfsrvhashhead *hp;
3819ec7b004SRick Macklem 	int ret = 0;
38293c5875bSRick Macklem 	struct mtx *mutex;
3839ec7b004SRick Macklem 
38493c5875bSRick Macklem 	mutex = nfsrc_cachemutex(newrp);
3859ec7b004SRick Macklem 	hp = NFSRCUDPHASH(newrp->rc_xid);
3869ec7b004SRick Macklem loop:
38793c5875bSRick Macklem 	mtx_lock(mutex);
3889ec7b004SRick Macklem 	LIST_FOREACH(rp, hp, rc_hash) {
3899ec7b004SRick Macklem 	    if (newrp->rc_xid == rp->rc_xid &&
3909ec7b004SRick Macklem 		newrp->rc_proc == rp->rc_proc &&
3919ec7b004SRick Macklem 		(newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
3929ec7b004SRick Macklem 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
3939ec7b004SRick Macklem 			if ((rp->rc_flag & RC_LOCKED) != 0) {
3949ec7b004SRick Macklem 				rp->rc_flag |= RC_WANTED;
39593c5875bSRick Macklem 				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
39693c5875bSRick Macklem 				    "nfsrc", 10 * hz);
3979ec7b004SRick Macklem 				goto loop;
3989ec7b004SRick Macklem 			}
3999ec7b004SRick Macklem 			if (rp->rc_flag == 0)
4009ec7b004SRick Macklem 				panic("nfs udp cache0");
4019ec7b004SRick Macklem 			rp->rc_flag |= RC_LOCKED;
4027e44856eSRick Macklem 			TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru);
4037e44856eSRick Macklem 			TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), rp, rc_lru);
4049ec7b004SRick Macklem 			if (rp->rc_flag & RC_INPROG) {
405b039ca07SRick Macklem 				NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits++;
40693c5875bSRick Macklem 				mtx_unlock(mutex);
4079ec7b004SRick Macklem 				ret = RC_DROPIT;
4089ec7b004SRick Macklem 			} else if (rp->rc_flag & RC_REPSTATUS) {
4099ec7b004SRick Macklem 				/*
4109ec7b004SRick Macklem 				 * V2 only.
4119ec7b004SRick Macklem 				 */
412b039ca07SRick Macklem 				NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++;
41393c5875bSRick Macklem 				mtx_unlock(mutex);
4149ec7b004SRick Macklem 				nfsrvd_rephead(nd);
4159ec7b004SRick Macklem 				*(nd->nd_errp) = rp->rc_status;
4169ec7b004SRick Macklem 				ret = RC_REPLY;
4179ec7b004SRick Macklem 				rp->rc_timestamp = NFSD_MONOSEC +
4189ec7b004SRick Macklem 					NFSRVCACHE_UDPTIMEOUT;
4199ec7b004SRick Macklem 			} else if (rp->rc_flag & RC_REPMBUF) {
420b039ca07SRick Macklem 				NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++;
42193c5875bSRick Macklem 				mtx_unlock(mutex);
4229ec7b004SRick Macklem 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
423eb1b1807SGleb Smirnoff 					M_COPYALL, M_WAITOK);
4249ec7b004SRick Macklem 				ret = RC_REPLY;
4259ec7b004SRick Macklem 				rp->rc_timestamp = NFSD_MONOSEC +
4269ec7b004SRick Macklem 					NFSRVCACHE_UDPTIMEOUT;
4279ec7b004SRick Macklem 			} else {
4289ec7b004SRick Macklem 				panic("nfs udp cache1");
4299ec7b004SRick Macklem 			}
4309ec7b004SRick Macklem 			nfsrc_unlock(rp);
431222daa42SConrad Meyer 			free(newrp, M_NFSRVCACHE);
432a9285ae5SZack Kirsch 			goto out;
4339ec7b004SRick Macklem 		}
4349ec7b004SRick Macklem 	}
435b039ca07SRick Macklem 	NFSD_VNET(nfsstatsv1_p)->srvcache_misses++;
436b039ca07SRick Macklem 	atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, 1);
4377e44856eSRick Macklem 	NFSD_VNET(nfsrc_udpcachesize)++;
4389ec7b004SRick Macklem 
4399ec7b004SRick Macklem 	newrp->rc_flag |= RC_INPROG;
4409ec7b004SRick Macklem 	saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
4419ec7b004SRick Macklem 	if (saddr->sin_family == AF_INET)
4429ec7b004SRick Macklem 		newrp->rc_inet = saddr->sin_addr.s_addr;
4439ec7b004SRick Macklem 	else if (saddr->sin_family == AF_INET6) {
4449ec7b004SRick Macklem 		saddr6 = (struct sockaddr_in6 *)saddr;
4459ec7b004SRick Macklem 		NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
4469ec7b004SRick Macklem 		    sizeof (struct in6_addr));
447d5ad6625SRick Macklem 		newrp->rc_flag |= RC_INETIPV6;
4489ec7b004SRick Macklem 	}
4499ec7b004SRick Macklem 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
4507e44856eSRick Macklem 	TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), newrp, rc_lru);
45193c5875bSRick Macklem 	mtx_unlock(mutex);
4529ec7b004SRick Macklem 	nd->nd_rp = newrp;
453a9285ae5SZack Kirsch 	ret = RC_DOIT;
454a9285ae5SZack Kirsch 
455a9285ae5SZack Kirsch out:
456a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
457a9285ae5SZack Kirsch 	return (ret);
4589ec7b004SRick Macklem }
4599ec7b004SRick Macklem 
4609ec7b004SRick Macklem /*
4619ec7b004SRick Macklem  * Update a request cache entry after the rpc has been done
4629ec7b004SRick Macklem  */
463b9cc3262SRyan Moeller struct nfsrvcache *
nfsrvd_updatecache(struct nfsrv_descript * nd)464d473bac7SAlexander Motin nfsrvd_updatecache(struct nfsrv_descript *nd)
4659ec7b004SRick Macklem {
4669ec7b004SRick Macklem 	struct nfsrvcache *rp;
4679ec7b004SRick Macklem 	struct nfsrvcache *retrp = NULL;
468ae070589SRick Macklem 	struct mbuf *m;
46993c5875bSRick Macklem 	struct mtx *mutex;
4709ec7b004SRick Macklem 
4719ec7b004SRick Macklem 	rp = nd->nd_rp;
4729ec7b004SRick Macklem 	if (!rp)
4739ec7b004SRick Macklem 		panic("nfsrvd_updatecache null rp");
4749ec7b004SRick Macklem 	nd->nd_rp = NULL;
47593c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
47693c5875bSRick Macklem 	mtx_lock(mutex);
4779ec7b004SRick Macklem 	nfsrc_lock(rp);
4789ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_INPROG))
4799ec7b004SRick Macklem 		panic("nfsrvd_updatecache not inprog");
4809ec7b004SRick Macklem 	rp->rc_flag &= ~RC_INPROG;
4819ec7b004SRick Macklem 	if (rp->rc_flag & RC_UDP) {
4827e44856eSRick Macklem 		TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru);
4837e44856eSRick Macklem 		TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), rp, rc_lru);
4849ec7b004SRick Macklem 	}
4859ec7b004SRick Macklem 
4869ec7b004SRick Macklem 	/*
4879ec7b004SRick Macklem 	 * Reply from cache is a special case returned by nfsrv_checkseqid().
4889ec7b004SRick Macklem 	 */
4899ec7b004SRick Macklem 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
490b039ca07SRick Macklem 		NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++;
49193c5875bSRick Macklem 		mtx_unlock(mutex);
4929ec7b004SRick Macklem 		nd->nd_repstat = 0;
4939ec7b004SRick Macklem 		if (nd->nd_mreq)
4949f6624d3SRick Macklem 			m_freem(nd->nd_mreq);
4959ec7b004SRick Macklem 		if (!(rp->rc_flag & RC_REPMBUF))
4969ec7b004SRick Macklem 			panic("reply from cache");
4979ec7b004SRick Macklem 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
498eb1b1807SGleb Smirnoff 		    M_COPYALL, M_WAITOK);
49993c5875bSRick Macklem 		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
5009ec7b004SRick Macklem 		nfsrc_unlock(rp);
501a9285ae5SZack Kirsch 		goto out;
5029ec7b004SRick Macklem 	}
5039ec7b004SRick Macklem 
5049ec7b004SRick Macklem 	/*
5059ec7b004SRick Macklem 	 * If rc_refcnt > 0, save it
5069ec7b004SRick Macklem 	 * For UDP, save it if ND_SAVEREPLY is set
5079ec7b004SRick Macklem 	 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
5089ec7b004SRick Macklem 	 */
5099ec7b004SRick Macklem 	if (nd->nd_repstat != NFSERR_DONTREPLY &&
5109ec7b004SRick Macklem 	    (rp->rc_refcnt > 0 ||
5119ec7b004SRick Macklem 	     ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
5129ec7b004SRick Macklem 	     ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
5137e44856eSRick Macklem 	      NFSD_VNET(nfsrc_tcpsavedreplies) <= NFSD_VNET(nfsrc_floodlevel) &&
5149ec7b004SRick Macklem 	      nfsrc_tcpnonidempotent))) {
5159ec7b004SRick Macklem 		if (rp->rc_refcnt > 0) {
5169ec7b004SRick Macklem 			if (!(rp->rc_flag & RC_NFSV4))
5179ec7b004SRick Macklem 				panic("update_cache refcnt");
5189ec7b004SRick Macklem 			rp->rc_flag |= RC_REFCNT;
5199ec7b004SRick Macklem 		}
5209ec7b004SRick Macklem 		if ((nd->nd_flag & ND_NFSV2) &&
5219ec7b004SRick Macklem 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
5229ec7b004SRick Macklem 			rp->rc_status = nd->nd_repstat;
5239ec7b004SRick Macklem 			rp->rc_flag |= RC_REPSTATUS;
52493c5875bSRick Macklem 			mtx_unlock(mutex);
5259ec7b004SRick Macklem 		} else {
5269ec7b004SRick Macklem 			if (!(rp->rc_flag & RC_UDP)) {
5277e44856eSRick Macklem 			    atomic_add_int(&NFSD_VNET(nfsrc_tcpsavedreplies),
5287e44856eSRick Macklem 				1);
5297e44856eSRick Macklem 			    if (NFSD_VNET(nfsrc_tcpsavedreplies) >
530b039ca07SRick Macklem 				NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak)
531b039ca07SRick Macklem 				NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak =
5327e44856eSRick Macklem 				    NFSD_VNET(nfsrc_tcpsavedreplies);
5339ec7b004SRick Macklem 			}
53493c5875bSRick Macklem 			mtx_unlock(mutex);
535eb1b1807SGleb Smirnoff 			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
53693c5875bSRick Macklem 			mtx_lock(mutex);
5374e22c98aSRick Macklem 			rp->rc_reply = m;
5389ec7b004SRick Macklem 			rp->rc_flag |= RC_REPMBUF;
53993c5875bSRick Macklem 			mtx_unlock(mutex);
5409ec7b004SRick Macklem 		}
5419ec7b004SRick Macklem 		if (rp->rc_flag & RC_UDP) {
5429ec7b004SRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC +
5439ec7b004SRick Macklem 			    NFSRVCACHE_UDPTIMEOUT;
5449ec7b004SRick Macklem 			nfsrc_unlock(rp);
5459ec7b004SRick Macklem 		} else {
54693c5875bSRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
5479ec7b004SRick Macklem 			if (rp->rc_refcnt > 0)
5489ec7b004SRick Macklem 				nfsrc_unlock(rp);
5499ec7b004SRick Macklem 			else
5509ec7b004SRick Macklem 				retrp = rp;
5519ec7b004SRick Macklem 		}
5529ec7b004SRick Macklem 	} else {
5539ec7b004SRick Macklem 		nfsrc_freecache(rp);
55493c5875bSRick Macklem 		mtx_unlock(mutex);
5559ec7b004SRick Macklem 	}
556a9285ae5SZack Kirsch 
557a9285ae5SZack Kirsch out:
558a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
5599ec7b004SRick Macklem 	return (retrp);
5609ec7b004SRick Macklem }
5619ec7b004SRick Macklem 
5629ec7b004SRick Macklem /*
5639ec7b004SRick Macklem  * Invalidate and, if possible, free an in prog cache entry.
5649ec7b004SRick Macklem  * Must not sleep.
5659ec7b004SRick Macklem  */
566b9cc3262SRyan Moeller void
nfsrvd_delcache(struct nfsrvcache * rp)5679ec7b004SRick Macklem nfsrvd_delcache(struct nfsrvcache *rp)
5689ec7b004SRick Macklem {
56993c5875bSRick Macklem 	struct mtx *mutex;
5709ec7b004SRick Macklem 
57193c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
5729ec7b004SRick Macklem 	if (!(rp->rc_flag & RC_INPROG))
5739ec7b004SRick Macklem 		panic("nfsrvd_delcache not in prog");
57493c5875bSRick Macklem 	mtx_lock(mutex);
5759ec7b004SRick Macklem 	rp->rc_flag &= ~RC_INPROG;
5769ec7b004SRick Macklem 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
5779ec7b004SRick Macklem 		nfsrc_freecache(rp);
57893c5875bSRick Macklem 	mtx_unlock(mutex);
5799ec7b004SRick Macklem }
5809ec7b004SRick Macklem 
5819ec7b004SRick Macklem /*
5829ec7b004SRick Macklem  * Called after nfsrvd_updatecache() once the reply is sent, to update
583d473bac7SAlexander Motin  * the entry's sequence number and unlock it. The argument is
5849ec7b004SRick Macklem  * the pointer returned by nfsrvd_updatecache().
5859ec7b004SRick Macklem  */
586b9cc3262SRyan Moeller void
nfsrvd_sentcache(struct nfsrvcache * rp,int have_seq,uint32_t seq)5876103bae6SAlexander Motin nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
5889ec7b004SRick Macklem {
589d473bac7SAlexander Motin 	struct nfsrchash_bucket *hbp;
5909ec7b004SRick Macklem 
591d473bac7SAlexander Motin 	KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
5926103bae6SAlexander Motin 	if (have_seq) {
593d473bac7SAlexander Motin 		hbp = NFSRCAHASH(rp->rc_sockref);
594d473bac7SAlexander Motin 		mtx_lock(&hbp->mtx);
595d473bac7SAlexander Motin 		rp->rc_tcpseq = seq;
596d473bac7SAlexander Motin 		if (rp->rc_acked != RC_NO_ACK)
597d473bac7SAlexander Motin 			LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
598d473bac7SAlexander Motin 		rp->rc_acked = RC_NO_ACK;
599d473bac7SAlexander Motin 		mtx_unlock(&hbp->mtx);
6006103bae6SAlexander Motin 	}
6019ec7b004SRick Macklem 	nfsrc_unlock(rp);
6029ec7b004SRick Macklem }
6039ec7b004SRick Macklem 
6049ec7b004SRick Macklem /*
6059ec7b004SRick Macklem  * Get a cache entry for TCP
6069ec7b004SRick Macklem  * - key on <xid, nfs version>
6079ec7b004SRick Macklem  *   (allow multiple entries for a given key)
6089ec7b004SRick Macklem  */
6099ec7b004SRick Macklem static int
nfsrc_gettcp(struct nfsrv_descript * nd,struct nfsrvcache * newrp)6109ec7b004SRick Macklem nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
6119ec7b004SRick Macklem {
6129ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
6139ec7b004SRick Macklem 	int i;
6149ec7b004SRick Macklem 	struct nfsrvcache *hitrp;
6159ec7b004SRick Macklem 	struct nfsrvhashhead *hp, nfsrc_templist;
6169ec7b004SRick Macklem 	int hit, ret = 0;
61793c5875bSRick Macklem 	struct mtx *mutex;
6189ec7b004SRick Macklem 
61993c5875bSRick Macklem 	mutex = nfsrc_cachemutex(newrp);
6209ec7b004SRick Macklem 	hp = NFSRCHASH(newrp->rc_xid);
6219ec7b004SRick Macklem 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
6229ec7b004SRick Macklem tryagain:
62393c5875bSRick Macklem 	mtx_lock(mutex);
6249ec7b004SRick Macklem 	hit = 1;
6259ec7b004SRick Macklem 	LIST_INIT(&nfsrc_templist);
6269ec7b004SRick Macklem 	/*
6279ec7b004SRick Macklem 	 * Get all the matches and put them on the temp list.
6289ec7b004SRick Macklem 	 */
6299ec7b004SRick Macklem 	rp = LIST_FIRST(hp);
6309ec7b004SRick Macklem 	while (rp != LIST_END(hp)) {
6319ec7b004SRick Macklem 		nextrp = LIST_NEXT(rp, rc_hash);
6329ec7b004SRick Macklem 		if (newrp->rc_xid == rp->rc_xid &&
6339ec7b004SRick Macklem 		    (!(rp->rc_flag & RC_INPROG) ||
6349ec7b004SRick Macklem 		     ((newrp->rc_flag & RC_SAMETCPCONN) &&
6359ec7b004SRick Macklem 		      newrp->rc_sockref == rp->rc_sockref)) &&
6369ec7b004SRick Macklem 		    (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
6379ec7b004SRick Macklem 		    newrp->rc_proc == rp->rc_proc &&
6389ec7b004SRick Macklem 		    ((newrp->rc_flag & RC_NFSV4) &&
6399ec7b004SRick Macklem 		     newrp->rc_sockref != rp->rc_sockref &&
6409ec7b004SRick Macklem 		     newrp->rc_cachetime >= rp->rc_cachetime)
6419ec7b004SRick Macklem 		    && newrp->rc_reqlen == rp->rc_reqlen &&
6429ec7b004SRick Macklem 		    newrp->rc_cksum == rp->rc_cksum) {
6439ec7b004SRick Macklem 			LIST_REMOVE(rp, rc_hash);
6449ec7b004SRick Macklem 			LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
6459ec7b004SRick Macklem 		}
6469ec7b004SRick Macklem 		rp = nextrp;
6479ec7b004SRick Macklem 	}
6489ec7b004SRick Macklem 
6499ec7b004SRick Macklem 	/*
6509ec7b004SRick Macklem 	 * Now, use nfsrc_templist to decide if there is a match.
6519ec7b004SRick Macklem 	 */
6529ec7b004SRick Macklem 	i = 0;
6539ec7b004SRick Macklem 	LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
6549ec7b004SRick Macklem 		i++;
6559ec7b004SRick Macklem 		if (rp->rc_refcnt > 0) {
6569ec7b004SRick Macklem 			hit = 0;
6579ec7b004SRick Macklem 			break;
6589ec7b004SRick Macklem 		}
6599ec7b004SRick Macklem 	}
6609ec7b004SRick Macklem 	/*
6619ec7b004SRick Macklem 	 * Can be a hit only if one entry left.
6629ec7b004SRick Macklem 	 * Note possible hit entry and put nfsrc_templist back on hash
6639ec7b004SRick Macklem 	 * list.
6649ec7b004SRick Macklem 	 */
6659ec7b004SRick Macklem 	if (i != 1)
6669ec7b004SRick Macklem 		hit = 0;
6679ec7b004SRick Macklem 	hitrp = rp = LIST_FIRST(&nfsrc_templist);
6689ec7b004SRick Macklem 	while (rp != LIST_END(&nfsrc_templist)) {
6699ec7b004SRick Macklem 		nextrp = LIST_NEXT(rp, rc_hash);
6709ec7b004SRick Macklem 		LIST_REMOVE(rp, rc_hash);
6719ec7b004SRick Macklem 		LIST_INSERT_HEAD(hp, rp, rc_hash);
6729ec7b004SRick Macklem 		rp = nextrp;
6739ec7b004SRick Macklem 	}
6749ec7b004SRick Macklem 	if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist))
6759ec7b004SRick Macklem 		panic("nfs gettcp cache templist");
6769ec7b004SRick Macklem 
6779ec7b004SRick Macklem 	if (hit) {
6789ec7b004SRick Macklem 		rp = hitrp;
6799ec7b004SRick Macklem 		if ((rp->rc_flag & RC_LOCKED) != 0) {
6809ec7b004SRick Macklem 			rp->rc_flag |= RC_WANTED;
68193c5875bSRick Macklem 			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
68293c5875bSRick Macklem 			    "nfsrc", 10 * hz);
6839ec7b004SRick Macklem 			goto tryagain;
6849ec7b004SRick Macklem 		}
6859ec7b004SRick Macklem 		if (rp->rc_flag == 0)
6869ec7b004SRick Macklem 			panic("nfs tcp cache0");
6879ec7b004SRick Macklem 		rp->rc_flag |= RC_LOCKED;
6889ec7b004SRick Macklem 		if (rp->rc_flag & RC_INPROG) {
689b039ca07SRick Macklem 			NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits++;
69093c5875bSRick Macklem 			mtx_unlock(mutex);
6919ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
6929ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
6939ec7b004SRick Macklem 			ret = RC_DROPIT;
6949ec7b004SRick Macklem 		} else if (rp->rc_flag & RC_REPSTATUS) {
6959ec7b004SRick Macklem 			/*
6969ec7b004SRick Macklem 			 * V2 only.
6979ec7b004SRick Macklem 			 */
698b039ca07SRick Macklem 			NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++;
69993c5875bSRick Macklem 			mtx_unlock(mutex);
7009ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
7019ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
7029ec7b004SRick Macklem 			ret = RC_REPLY;
7039ec7b004SRick Macklem 			nfsrvd_rephead(nd);
7049ec7b004SRick Macklem 			*(nd->nd_errp) = rp->rc_status;
70593c5875bSRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
7069ec7b004SRick Macklem 		} else if (rp->rc_flag & RC_REPMBUF) {
707b039ca07SRick Macklem 			NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++;
70893c5875bSRick Macklem 			mtx_unlock(mutex);
7099ec7b004SRick Macklem 			if (newrp->rc_sockref == rp->rc_sockref)
7109ec7b004SRick Macklem 				nfsrc_marksametcpconn(rp->rc_sockref);
7119ec7b004SRick Macklem 			ret = RC_REPLY;
7129ec7b004SRick Macklem 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
713eb1b1807SGleb Smirnoff 				M_COPYALL, M_WAITOK);
71493c5875bSRick Macklem 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
7159ec7b004SRick Macklem 		} else {
7169ec7b004SRick Macklem 			panic("nfs tcp cache1");
7179ec7b004SRick Macklem 		}
7189ec7b004SRick Macklem 		nfsrc_unlock(rp);
719222daa42SConrad Meyer 		free(newrp, M_NFSRVCACHE);
720a9285ae5SZack Kirsch 		goto out;
7219ec7b004SRick Macklem 	}
722b039ca07SRick Macklem 	NFSD_VNET(nfsstatsv1_p)->srvcache_misses++;
723b039ca07SRick Macklem 	atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, 1);
7249ec7b004SRick Macklem 
7259ec7b004SRick Macklem 	/*
7269ec7b004SRick Macklem 	 * For TCP, multiple entries for a key are allowed, so don't
7279ec7b004SRick Macklem 	 * chain it into the hash table until done.
7289ec7b004SRick Macklem 	 */
7299ec7b004SRick Macklem 	newrp->rc_cachetime = NFSD_MONOSEC;
7309ec7b004SRick Macklem 	newrp->rc_flag |= RC_INPROG;
7319ec7b004SRick Macklem 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
73293c5875bSRick Macklem 	mtx_unlock(mutex);
7339ec7b004SRick Macklem 	nd->nd_rp = newrp;
734a9285ae5SZack Kirsch 	ret = RC_DOIT;
735a9285ae5SZack Kirsch 
736a9285ae5SZack Kirsch out:
737a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
738a9285ae5SZack Kirsch 	return (ret);
7399ec7b004SRick Macklem }
7409ec7b004SRick Macklem 
7419ec7b004SRick Macklem /*
7429ec7b004SRick Macklem  * Lock a cache entry.
7439ec7b004SRick Macklem  */
7449ec7b004SRick Macklem static void
nfsrc_lock(struct nfsrvcache * rp)7459ec7b004SRick Macklem nfsrc_lock(struct nfsrvcache *rp)
7469ec7b004SRick Macklem {
74793c5875bSRick Macklem 	struct mtx *mutex;
74893c5875bSRick Macklem 
74993c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
75093c5875bSRick Macklem 	mtx_assert(mutex, MA_OWNED);
7519ec7b004SRick Macklem 	while ((rp->rc_flag & RC_LOCKED) != 0) {
7529ec7b004SRick Macklem 		rp->rc_flag |= RC_WANTED;
75393c5875bSRick Macklem 		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
7549ec7b004SRick Macklem 	}
7559ec7b004SRick Macklem 	rp->rc_flag |= RC_LOCKED;
7569ec7b004SRick Macklem }
7579ec7b004SRick Macklem 
7589ec7b004SRick Macklem /*
7599ec7b004SRick Macklem  * Unlock a cache entry.
7609ec7b004SRick Macklem  */
7619ec7b004SRick Macklem static void
nfsrc_unlock(struct nfsrvcache * rp)7629ec7b004SRick Macklem nfsrc_unlock(struct nfsrvcache *rp)
7639ec7b004SRick Macklem {
76493c5875bSRick Macklem 	struct mtx *mutex;
7654e22c98aSRick Macklem 
76693c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
76793c5875bSRick Macklem 	mtx_lock(mutex);
7689ec7b004SRick Macklem 	rp->rc_flag &= ~RC_LOCKED;
7699ec7b004SRick Macklem 	nfsrc_wanted(rp);
77093c5875bSRick Macklem 	mtx_unlock(mutex);
7719ec7b004SRick Macklem }
7729ec7b004SRick Macklem 
7739ec7b004SRick Macklem /*
7749ec7b004SRick Macklem  * Wakeup anyone wanting entry.
7759ec7b004SRick Macklem  */
7769ec7b004SRick Macklem static void
nfsrc_wanted(struct nfsrvcache * rp)7779ec7b004SRick Macklem nfsrc_wanted(struct nfsrvcache *rp)
7789ec7b004SRick Macklem {
7799ec7b004SRick Macklem 	if (rp->rc_flag & RC_WANTED) {
7809ec7b004SRick Macklem 		rp->rc_flag &= ~RC_WANTED;
7819ec7b004SRick Macklem 		wakeup((caddr_t)rp);
7829ec7b004SRick Macklem 	}
7839ec7b004SRick Macklem }
7849ec7b004SRick Macklem 
7859ec7b004SRick Macklem /*
7869ec7b004SRick Macklem  * Free up the entry.
7879ec7b004SRick Macklem  * Must not sleep.
7889ec7b004SRick Macklem  */
7899ec7b004SRick Macklem static void
nfsrc_freecache(struct nfsrvcache * rp)7909ec7b004SRick Macklem nfsrc_freecache(struct nfsrvcache *rp)
7919ec7b004SRick Macklem {
792d473bac7SAlexander Motin 	struct nfsrchash_bucket *hbp;
7939ec7b004SRick Macklem 
7949ec7b004SRick Macklem 	LIST_REMOVE(rp, rc_hash);
7959ec7b004SRick Macklem 	if (rp->rc_flag & RC_UDP) {
7967e44856eSRick Macklem 		TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru);
7977e44856eSRick Macklem 		NFSD_VNET(nfsrc_udpcachesize)--;
798d473bac7SAlexander Motin 	} else if (rp->rc_acked != RC_NO_SEQ) {
799d473bac7SAlexander Motin 		hbp = NFSRCAHASH(rp->rc_sockref);
800d473bac7SAlexander Motin 		mtx_lock(&hbp->mtx);
801d473bac7SAlexander Motin 		if (rp->rc_acked == RC_NO_ACK)
802d473bac7SAlexander Motin 			LIST_REMOVE(rp, rc_ahash);
803d473bac7SAlexander Motin 		mtx_unlock(&hbp->mtx);
8049ec7b004SRick Macklem 	}
8059ec7b004SRick Macklem 	nfsrc_wanted(rp);
8069ec7b004SRick Macklem 	if (rp->rc_flag & RC_REPMBUF) {
8079f6624d3SRick Macklem 		m_freem(rp->rc_reply);
8089ec7b004SRick Macklem 		if (!(rp->rc_flag & RC_UDP))
8097e44856eSRick Macklem 			atomic_add_int(&NFSD_VNET(nfsrc_tcpsavedreplies), -1);
8109ec7b004SRick Macklem 	}
811222daa42SConrad Meyer 	free(rp, M_NFSRVCACHE);
812b039ca07SRick Macklem 	atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, -1);
8139ec7b004SRick Macklem }
8149ec7b004SRick Macklem 
8159ec7b004SRick Macklem /*
81652776c50SZack Kirsch  * Clean out the cache. Called when nfsserver module is unloaded.
8179ec7b004SRick Macklem  */
818b9cc3262SRyan Moeller void
nfsrvd_cleancache(void)8199ec7b004SRick Macklem nfsrvd_cleancache(void)
8209ec7b004SRick Macklem {
8219ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
8229ec7b004SRick Macklem 	int i;
8239ec7b004SRick Macklem 
8249ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
8257e44856eSRick Macklem 		LIST_FOREACH_SAFE(rp, &NFSD_VNET(nfsrchash_table)[i].tbl,
8267e44856eSRick Macklem 		    rc_hash, nextrp)
8279ec7b004SRick Macklem 			nfsrc_freecache(rp);
8289ec7b004SRick Macklem 	}
8299ec7b004SRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
8307e44856eSRick Macklem 		LIST_FOREACH_SAFE(rp, &NFSD_VNET(nfsrvudphashtbl)[i], rc_hash,
8317e44856eSRick Macklem 		    nextrp) {
8329ec7b004SRick Macklem 			nfsrc_freecache(rp);
8339ec7b004SRick Macklem 		}
8349ec7b004SRick Macklem 	}
835b039ca07SRick Macklem 	NFSD_VNET(nfsstatsv1_p)->srvcache_size = 0;
8367e44856eSRick Macklem 	NFSD_VNET(nfsrc_tcpsavedreplies) = 0;
8379ec7b004SRick Macklem }
8389ec7b004SRick Macklem 
8391555cf04SAlexander Motin #define HISTSIZE	16
8409ec7b004SRick Macklem /*
8419ec7b004SRick Macklem  * The basic rule is to get rid of entries that are expired.
8429ec7b004SRick Macklem  */
843d473bac7SAlexander Motin void
nfsrc_trimcache(u_int64_t sockref,uint32_t snd_una,int final)844d473bac7SAlexander Motin nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
8459ec7b004SRick Macklem {
846d473bac7SAlexander Motin 	struct nfsrchash_bucket *hbp;
8479ec7b004SRick Macklem 	struct nfsrvcache *rp, *nextrp;
848d473bac7SAlexander Motin 	int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
84993c5875bSRick Macklem 	time_t thisstamp;
85093c5875bSRick Macklem 	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
851d473bac7SAlexander Motin 	static int onethread = 0, oneslot = 0;
852d473bac7SAlexander Motin 
853d473bac7SAlexander Motin 	if (sockref != 0) {
854d473bac7SAlexander Motin 		hbp = NFSRCAHASH(sockref);
855d473bac7SAlexander Motin 		mtx_lock(&hbp->mtx);
856d473bac7SAlexander Motin 		LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
857d473bac7SAlexander Motin 			if (sockref == rp->rc_sockref) {
858d473bac7SAlexander Motin 				if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
859d473bac7SAlexander Motin 					rp->rc_acked = RC_ACK;
860d473bac7SAlexander Motin 					LIST_REMOVE(rp, rc_ahash);
861d473bac7SAlexander Motin 				} else if (final) {
862d473bac7SAlexander Motin 					rp->rc_acked = RC_NACK;
863d473bac7SAlexander Motin 					LIST_REMOVE(rp, rc_ahash);
864d473bac7SAlexander Motin 				}
865d473bac7SAlexander Motin 			}
866d473bac7SAlexander Motin 		}
867d473bac7SAlexander Motin 		mtx_unlock(&hbp->mtx);
868d473bac7SAlexander Motin 	}
8699ec7b004SRick Macklem 
87093c5875bSRick Macklem 	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
87193c5875bSRick Macklem 		return;
87293c5875bSRick Macklem 	if (NFSD_MONOSEC != udp_lasttrim ||
8737e44856eSRick Macklem 	    NFSD_VNET(nfsrc_udpcachesize) >= (nfsrc_udphighwater +
87493c5875bSRick Macklem 	    nfsrc_udphighwater / 2)) {
87593c5875bSRick Macklem 		mtx_lock(&nfsrc_udpmtx);
87693c5875bSRick Macklem 		udp_lasttrim = NFSD_MONOSEC;
8777e44856eSRick Macklem 		TAILQ_FOREACH_SAFE(rp, &NFSD_VNET(nfsrvudplru), rc_lru,
8787e44856eSRick Macklem 		    nextrp) {
8799ec7b004SRick Macklem 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
8809ec7b004SRick Macklem 			     && rp->rc_refcnt == 0
8819ec7b004SRick Macklem 			     && ((rp->rc_flag & RC_REFCNT) ||
88293c5875bSRick Macklem 				 udp_lasttrim > rp->rc_timestamp ||
8837e44856eSRick Macklem 				 NFSD_VNET(nfsrc_udpcachesize) >
8847e44856eSRick Macklem 				 nfsrc_udphighwater))
8859ec7b004SRick Macklem 				nfsrc_freecache(rp);
8869ec7b004SRick Macklem 		}
88793c5875bSRick Macklem 		mtx_unlock(&nfsrc_udpmtx);
88893c5875bSRick Macklem 	}
88993c5875bSRick Macklem 	if (NFSD_MONOSEC != tcp_lasttrim ||
8907e44856eSRick Macklem 	    NFSD_VNET(nfsrc_tcpsavedreplies) >= nfsrc_tcphighwater) {
891d473bac7SAlexander Motin 		force = nfsrc_tcphighwater / 4;
892d473bac7SAlexander Motin 		if (force > 0 &&
8937e44856eSRick Macklem 		    NFSD_VNET(nfsrc_tcpsavedreplies) + force >=
8947e44856eSRick Macklem 		    nfsrc_tcphighwater) {
8951555cf04SAlexander Motin 			for (i = 0; i < HISTSIZE; i++)
89693c5875bSRick Macklem 				time_histo[i] = 0;
897d473bac7SAlexander Motin 			i = 0;
89845e18ea7SAlexander Motin 			lastslot = NFSRVCACHE_HASHSIZE - 1;
899d473bac7SAlexander Motin 		} else {
900d473bac7SAlexander Motin 			force = 0;
901d473bac7SAlexander Motin 			if (NFSD_MONOSEC != tcp_lasttrim) {
902d473bac7SAlexander Motin 				i = 0;
903d473bac7SAlexander Motin 				lastslot = NFSRVCACHE_HASHSIZE - 1;
904d473bac7SAlexander Motin 			} else {
905d473bac7SAlexander Motin 				lastslot = i = oneslot;
906d473bac7SAlexander Motin 				if (++oneslot >= NFSRVCACHE_HASHSIZE)
907d473bac7SAlexander Motin 					oneslot = 0;
908d473bac7SAlexander Motin 			}
909d473bac7SAlexander Motin 		}
9101555cf04SAlexander Motin 		tto = nfsrc_tcptimeout;
91193c5875bSRick Macklem 		tcp_lasttrim = NFSD_MONOSEC;
912d473bac7SAlexander Motin 		for (; i <= lastslot; i++) {
9137e44856eSRick Macklem 			mtx_lock(&NFSD_VNET(nfsrchash_table)[i].mtx);
9147e44856eSRick Macklem 			LIST_FOREACH_SAFE(rp,
9157e44856eSRick Macklem 			    &NFSD_VNET(nfsrchash_table)[i].tbl, rc_hash,
91693c5875bSRick Macklem 			    nextrp) {
91793c5875bSRick Macklem 				if (!(rp->rc_flag &
91893c5875bSRick Macklem 				     (RC_INPROG|RC_LOCKED|RC_WANTED))
91993c5875bSRick Macklem 				     && rp->rc_refcnt == 0) {
9201555cf04SAlexander Motin 					if ((rp->rc_flag & RC_REFCNT) ||
9211555cf04SAlexander Motin 					    tcp_lasttrim > rp->rc_timestamp ||
922d473bac7SAlexander Motin 					    rp->rc_acked == RC_ACK) {
9231555cf04SAlexander Motin 						nfsrc_freecache(rp);
9241555cf04SAlexander Motin 						continue;
9251555cf04SAlexander Motin 					}
9261555cf04SAlexander Motin 
927d473bac7SAlexander Motin 					if (force == 0)
9281555cf04SAlexander Motin 						continue;
92993c5875bSRick Macklem 					/*
93093c5875bSRick Macklem 					 * The timestamps range from roughly the
93193c5875bSRick Macklem 					 * present (tcp_lasttrim) to the present
93293c5875bSRick Macklem 					 * + nfsrc_tcptimeout. Generate a simple
93393c5875bSRick Macklem 					 * histogram of where the timeouts fall.
93493c5875bSRick Macklem 					 */
93593c5875bSRick Macklem 					j = rp->rc_timestamp - tcp_lasttrim;
9361555cf04SAlexander Motin 					if (j >= tto)
9371555cf04SAlexander Motin 						j = HISTSIZE - 1;
9381555cf04SAlexander Motin 					else if (j < 0)
93993c5875bSRick Macklem 						j = 0;
9401555cf04SAlexander Motin 					else
9411555cf04SAlexander Motin 						j = j * HISTSIZE / tto;
94293c5875bSRick Macklem 					time_histo[j]++;
9439ec7b004SRick Macklem 				}
9449ec7b004SRick Macklem 			}
9457e44856eSRick Macklem 			mtx_unlock(&NFSD_VNET(nfsrchash_table)[i].mtx);
94693c5875bSRick Macklem 		}
947d473bac7SAlexander Motin 		if (force) {
94893c5875bSRick Macklem 			/*
94993c5875bSRick Macklem 			 * Trim some more with a smaller timeout of as little
95093c5875bSRick Macklem 			 * as 20% of nfsrc_tcptimeout to try and get below
95193c5875bSRick Macklem 			 * 80% of the nfsrc_tcphighwater.
95293c5875bSRick Macklem 			 */
95393c5875bSRick Macklem 			k = 0;
9541555cf04SAlexander Motin 			for (i = 0; i < (HISTSIZE - 2); i++) {
95593c5875bSRick Macklem 				k += time_histo[i];
956d473bac7SAlexander Motin 				if (k > force)
95793c5875bSRick Macklem 					break;
95893c5875bSRick Macklem 			}
9591555cf04SAlexander Motin 			k = tto * (i + 1) / HISTSIZE;
96093c5875bSRick Macklem 			if (k < 1)
96193c5875bSRick Macklem 				k = 1;
96293c5875bSRick Macklem 			thisstamp = tcp_lasttrim + k;
96393c5875bSRick Macklem 			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
9647e44856eSRick Macklem 				mtx_lock(&NFSD_VNET(nfsrchash_table)[i].mtx);
9657e44856eSRick Macklem 				LIST_FOREACH_SAFE(rp,
9667e44856eSRick Macklem 				    &NFSD_VNET(nfsrchash_table)[i].tbl,
96793c5875bSRick Macklem 				    rc_hash, nextrp) {
96893c5875bSRick Macklem 					if (!(rp->rc_flag &
96993c5875bSRick Macklem 					     (RC_INPROG|RC_LOCKED|RC_WANTED))
97093c5875bSRick Macklem 					     && rp->rc_refcnt == 0
97193c5875bSRick Macklem 					     && ((rp->rc_flag & RC_REFCNT) ||
97293c5875bSRick Macklem 						 thisstamp > rp->rc_timestamp ||
973d473bac7SAlexander Motin 						 rp->rc_acked == RC_ACK))
97493c5875bSRick Macklem 						nfsrc_freecache(rp);
97593c5875bSRick Macklem 				}
9767e44856eSRick Macklem 				mtx_unlock(&NFSD_VNET(nfsrchash_table)[i].mtx);
97793c5875bSRick Macklem 			}
97893c5875bSRick Macklem 		}
97993c5875bSRick Macklem 	}
98093c5875bSRick Macklem 	atomic_store_rel_int(&onethread, 0);
9819ec7b004SRick Macklem }
9829ec7b004SRick Macklem 
9839ec7b004SRick Macklem /*
9849ec7b004SRick Macklem  * Add a seqid# reference to the cache entry.
9859ec7b004SRick Macklem  */
986b9cc3262SRyan Moeller void
nfsrvd_refcache(struct nfsrvcache * rp)9879ec7b004SRick Macklem nfsrvd_refcache(struct nfsrvcache *rp)
9889ec7b004SRick Macklem {
98993c5875bSRick Macklem 	struct mtx *mutex;
9909ec7b004SRick Macklem 
991c59e4cc3SRick Macklem 	if (rp == NULL)
992c59e4cc3SRick Macklem 		/* For NFSv4.1, there is no cache entry. */
993c59e4cc3SRick Macklem 		return;
99493c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
99593c5875bSRick Macklem 	mtx_lock(mutex);
9969ec7b004SRick Macklem 	if (rp->rc_refcnt < 0)
9979ec7b004SRick Macklem 		panic("nfs cache refcnt");
9989ec7b004SRick Macklem 	rp->rc_refcnt++;
99993c5875bSRick Macklem 	mtx_unlock(mutex);
10009ec7b004SRick Macklem }
10019ec7b004SRick Macklem 
10029ec7b004SRick Macklem /*
10039ec7b004SRick Macklem  * Dereference a seqid# cache entry.
10049ec7b004SRick Macklem  */
1005b9cc3262SRyan Moeller void
nfsrvd_derefcache(struct nfsrvcache * rp)10069ec7b004SRick Macklem nfsrvd_derefcache(struct nfsrvcache *rp)
10079ec7b004SRick Macklem {
100893c5875bSRick Macklem 	struct mtx *mutex;
10099ec7b004SRick Macklem 
101093c5875bSRick Macklem 	mutex = nfsrc_cachemutex(rp);
101193c5875bSRick Macklem 	mtx_lock(mutex);
10129ec7b004SRick Macklem 	if (rp->rc_refcnt <= 0)
10139ec7b004SRick Macklem 		panic("nfs cache derefcnt");
10149ec7b004SRick Macklem 	rp->rc_refcnt--;
10159ec7b004SRick Macklem 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
10169ec7b004SRick Macklem 		nfsrc_freecache(rp);
101793c5875bSRick Macklem 	mtx_unlock(mutex);
10189ec7b004SRick Macklem }
10199ec7b004SRick Macklem 
10209ec7b004SRick Macklem /*
10219ec7b004SRick Macklem  * Calculate the length of the mbuf list and a checksum on the first up to
10229ec7b004SRick Macklem  * NFSRVCACHE_CHECKLEN bytes.
10239ec7b004SRick Macklem  */
10249ec7b004SRick Macklem static int
nfsrc_getlenandcksum(struct mbuf * m1,u_int16_t * cksum)1025ae070589SRick Macklem nfsrc_getlenandcksum(struct mbuf *m1, u_int16_t *cksum)
10269ec7b004SRick Macklem {
10279ec7b004SRick Macklem 	int len = 0, cklen;
1028ae070589SRick Macklem 	struct mbuf *m;
10299ec7b004SRick Macklem 
10309ec7b004SRick Macklem 	m = m1;
10319ec7b004SRick Macklem 	while (m) {
10329f6624d3SRick Macklem 		len += m->m_len;
10339f6624d3SRick Macklem 		m = m->m_next;
10349ec7b004SRick Macklem 	}
10359ec7b004SRick Macklem 	cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
10369ec7b004SRick Macklem 	*cksum = in_cksum(m1, cklen);
10379ec7b004SRick Macklem 	return (len);
10389ec7b004SRick Macklem }
10399ec7b004SRick Macklem 
10409ec7b004SRick Macklem /*
10419ec7b004SRick Macklem  * Mark a TCP connection that is seeing retries. Should never happen for
10429ec7b004SRick Macklem  * NFSv4.
10439ec7b004SRick Macklem  */
10449ec7b004SRick Macklem static void
nfsrc_marksametcpconn(u_int64_t sockref)10459ec7b004SRick Macklem nfsrc_marksametcpconn(u_int64_t sockref)
10469ec7b004SRick Macklem {
10479ec7b004SRick Macklem }
1048