xref: /illumos-gate/usr/src/uts/common/inet/cc.h (revision ac05f74f)
1 /*
2  * Copyright (c) 2007-2008
3  * 	Swinburne University of Technology, Melbourne, Australia.
4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5  * Copyright (c) 2010 The FreeBSD Foundation
6  * All rights reserved.
7  * Copyright (c) 2017 by Delphix. All rights reserved.
8  *
9  * This software was developed at the Centre for Advanced Internet
10  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
11  * James Healy, made possible in part by a grant from the Cisco University
12  * Research Program Fund at Community Foundation Silicon Valley.
13  *
14  * Portions of this software were developed at the Centre for Advanced
15  * Internet Architectures, Swinburne University of Technology, Melbourne,
16  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * $FreeBSD$
40  */
41 
42 /*
43  * This software was first released in 2007 by James Healy and Lawrence Stewart
44  * whilst working on the NewTCP research project at Swinburne University of
45  * Technology's Centre for Advanced Internet Architectures, Melbourne,
46  * Australia, which was made possible in part by a grant from the Cisco
47  * University Research Program Fund at Community Foundation Silicon Valley.
48  * More details are available at:
49  *   http://caia.swin.edu.au/urp/newtcp/
50  */
51 
52 #ifndef _NETINET_CC_H_
53 #define	_NETINET_CC_H_
54 
55 #ifdef	__cplusplus
56 extern "C" {
57 #endif
58 
59 #include <netinet/tcp.h>
60 #include <sys/queue.h>
61 #include <sys/rwlock.h>
62 
63 #define	CC_ALGO_NAME_MAX	16	/* max congestion control name length */
64 
65 #define	CC_DEFAULT_ALGO_NAME	"sunreno"
66 
67 struct tcp_s;
68 struct sctp_s;
69 
70 /* CC housekeeping functions. */
71 extern struct cc_algo *cc_load_algo(const char *name);
72 extern int	cc_register_algo(struct cc_algo *add_cc);
73 extern int	cc_deregister_algo(struct cc_algo *remove_cc);
74 
75 /*
76  * Wrapper around transport structs that contain same-named congestion
77  * control variables. Allows algos to be shared amongst multiple CC aware
78  * transports.
79  *
80  * In theory, this code (from FreeBSD) can be used to support pluggable
81  * congestion control for sctp as well as tcp.  However, the support for sctp
82  * in FreeBSD is incomplete, and in practice "type" is ignored.  cc_module.h
83  * provides a CCV macro which implementations can use to get a variable out of
84  * the protocol-appropriate structure.
85  *
86  * If FreeBSD eventually does extend support for pluggable congestion control
87  * to sctp, we'll need to make sure we're setting "type" appropriately or use
88  * a definition of CCV that ignores it.
89  */
90 struct cc_var {
91 	void		*cc_data; /* Per-connection private algorithm data. */
92 	int		bytes_this_ack; /* # bytes acked by the current ACK. */
93 	int		t_bytes_acked; /* # bytes acked during current RTT */
94 	tcp_seq		curack; /* Most recent ACK. */
95 	uint32_t	flags; /* Flags for cc_var (see below) */
96 	int		type; /* Indicates which ptr is valid in ccvc. */
97 	union ccv_container {
98 		struct tcp_s	*tcp;
99 		struct sctp_s	*sctp;
100 	} ccvc;
101 	uint16_t	nsegs; /* # segments coalesced into current chain. */
102 };
103 
104 /*
105  * cc_var flags.
106  *
107  * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed
108  *   according to the Appropriate Byte Counting spec, defined in RFC 3465.
109  */
110 #define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
111 #define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
112 #define	CCF_FASTRECOVERY	0x0004	/* in NewReno Fast Recovery */
113 #define	CCF_WASFRECOVERY	0x0008	/* was in NewReno Fast Recovery */
114 #define	CCF_CONGRECOVERY	0x0010	/* congestion recovery mode */
115 #define	CCF_WASCRECOVERY	0x0020	/* was in congestion recovery */
116 /*
117  * In slow-start due to a retransmission timeout. This flag is enabled for the
118  * duration of the slow-start phase.
119  */
120 #define	CCF_RTO			0x0040	/* in slow-start due to timeout */
121 
122 #define	IN_FASTRECOVERY(flags)		(flags & CCF_FASTRECOVERY)
123 #define	ENTER_FASTRECOVERY(flags)	flags |= CCF_FASTRECOVERY
124 #define	EXIT_FASTRECOVERY(flags)	flags &= ~CCF_FASTRECOVERY
125 
126 #define	IN_CONGRECOVERY(flags)		(flags & CCF_CONGRECOVERY)
127 #define	ENTER_CONGRECOVERY(flags)	flags |= CCF_CONGRECOVERY
128 #define	EXIT_CONGRECOVERY(flags)	flags &= ~CCF_CONGRECOVERY
129 
130 #define	IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY))
131 #define	ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY)
132 #define	EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY)
133 
134 /*
135  * ACK types passed to the ack_received() hook.
136  *
137  * CC_ACK is passed when an ACK acknowledges previously unACKed data.
138  * CC_DUPACK is passed when a duplicate ACK is received.  The conditions under
139  *   which an ACK is considered a duplicate ACK are defined in RFC 5681.
140  */
141 #define	CC_ACK		0x0001	/* Regular in sequence ACK. */
142 #define	CC_DUPACK	0x0002	/* Duplicate ACK. */
143 #define	CC_PARTIALACK	0x0004	/* Not yet. */
144 #define	CC_SACK		0x0008	/* Not yet. */
145 
146 /*
147  * Congestion signal types passed to the cong_signal() hook. The highest order 8
148  * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
149  * congestion signal types.
150  *
151  * The congestion signals defined here cover the following situations:
152  * CC_ECN: A packet with an Explicit Congestion Notification was received
153  *   See RFC 3168.
154  * CC_RTO: A round-trip timeout occured.
155  * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO
156  *   for that sequence number
157  * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving
158  *   N duplicate ACKs indicates packet loss rather than reordering.  Fast
159  *   retransmit is followed by fast recovery.  Fast retransmit and recovery
160  *   were originally described in RFC 2581 and were updated by RFC3782
161  *   (NewReno).  In both RFC2581 and RFC3782, N is 3.
162  */
163 #define	CC_ECN		0x00000001	/* ECN marked packet received. */
164 #define	CC_RTO		0x00000002	/* RTO fired. */
165 #define	CC_RTO_ERR	0x00000004	/* RTO fired in error. */
166 #define	CC_NDUPACK	0x00000008	/* Threshold of dupack's reached. */
167 
168 #define	CC_SIGPRIVMASK	0xFF000000	/* Mask to check if sig is private. */
169 
170 /*
171  * Structure to hold data and function pointers that together represent a
172  * congestion control algorithm.
173  */
174 struct cc_algo {
175 	char	name[CC_ALGO_NAME_MAX];
176 
177 	/* Init CC state for a new control block. */
178 	int	(*cb_init)(struct cc_var *ccv);
179 
180 	/* Cleanup CC state for a terminating control block. */
181 	void	(*cb_destroy)(struct cc_var *ccv);
182 
183 	/* Init variables for a newly established connection. */
184 	void	(*conn_init)(struct cc_var *ccv);
185 
186 	/* Called on receipt of an ack. */
187 	void	(*ack_received)(struct cc_var *ccv, uint16_t type);
188 
189 	/* Called on detection of a congestion signal. */
190 	void	(*cong_signal)(struct cc_var *ccv, uint32_t type);
191 
192 	/* Called after exiting congestion recovery. */
193 	void	(*post_recovery)(struct cc_var *ccv);
194 
195 	/* Called when data transfer resumes after an idle period. */
196 	void	(*after_idle)(struct cc_var *ccv);
197 
198 	STAILQ_ENTRY(cc_algo) entries;
199 };
200 
201 typedef int cc_walk_func_t(void *, struct cc_algo *);
202 extern int	cc_walk_algos(cc_walk_func_t *, void *);
203 
204 /* Macro to obtain the CC algo's struct ptr. */
205 #define	CC_ALGO(tp)	((tp)->tcp_cc_algo)
206 
207 /* Macro to obtain the CC algo's data ptr. */
208 #define	CC_DATA(tp)	((tp)->tcp_ccv.cc_data)
209 
210 #ifdef	__cplusplus
211 }
212 #endif
213 
214 #endif /* _NETINET_CC_H_ */
215