1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3 * The Regents of the University of California. 4 * Copyright (c) 2007-2008,2010 5 * Swinburne University of Technology, Melbourne, Australia. 6 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 7 * Copyright (c) 2010 The FreeBSD Foundation 8 * All rights reserved. 9 * Copyright (c) 2017 by Delphix. All rights reserved. 10 * Copyright 2020 RackTop Systems, Inc. 11 * 12 * This software was developed at the Centre for Advanced Internet 13 * Architectures, Swinburne University of Technology, by Lawrence Stewart, James 14 * Healy and David Hayes, made possible in part by a grant from the Cisco 15 * University Research Program Fund at Community Foundation Silicon Valley. 16 * 17 * Portions of this software were developed at the Centre for Advanced 18 * Internet Architectures, Swinburne University of Technology, Melbourne, 19 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 20 * 21 * Redistribution and use in source and binary forms, with or without 22 * modification, are permitted provided that the following conditions 23 * are met: 24 * 1. Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * 2. Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in the 28 * documentation and/or other materials provided with the distribution. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 */ 42 43 /* 44 * This software was first released in 2007 by James Healy and Lawrence Stewart 45 * whilst working on the NewTCP research project at Swinburne University of 46 * Technology's Centre for Advanced Internet Architectures, Melbourne, 47 * Australia, which was made possible in part by a grant from the Cisco 48 * University Research Program Fund at Community Foundation Silicon Valley. 49 * More details are available at: 50 * http://caia.swin.edu.au/urp/newtcp/ 51 */ 52 53 #include <sys/errno.h> 54 #include <inet/tcp.h> 55 #include <inet/tcp_impl.h> 56 #include <inet/cc.h> 57 #include <inet/cc/cc_module.h> 58 59 static void newreno_ack_received(struct cc_var *ccv, uint16_t type); 60 static void newreno_after_idle(struct cc_var *ccv); 61 static void newreno_cong_signal(struct cc_var *ccv, uint32_t type); 62 static void newreno_post_recovery(struct cc_var *ccv); 63 64 static struct modlmisc cc_newreno_modlmisc = { 65 &mod_miscops, 66 "New Reno Congestion Control" 67 }; 68 69 static struct modlinkage cc_newreno_modlinkage = { 70 MODREV_1, 71 &cc_newreno_modlmisc, 72 NULL 73 }; 74 75 struct cc_algo newreno_cc_algo = { 76 .name = "newreno", 77 .ack_received = newreno_ack_received, 78 .after_idle = newreno_after_idle, 79 .cong_signal = newreno_cong_signal, 80 .post_recovery = newreno_post_recovery, 81 }; 82 83 int 84 _init(void) 85 { 86 int err; 87 88 if ((err = cc_register_algo(&newreno_cc_algo)) == 0) { 89 if ((err = mod_install(&cc_newreno_modlinkage)) != 0) 90 (void) cc_deregister_algo(&newreno_cc_algo); 91 } 92 return (err); 93 } 94 95 int 96 _fini(void) 97 { 98 /* XXX Not unloadable for now */ 99 return (EBUSY); 100 } 101 102 int 103 _info(struct modinfo *modinfop) 104 { 105 return (mod_info(&cc_newreno_modlinkage, modinfop)); 106 } 107 108 static void 109 newreno_ack_received(struct cc_var *ccv, uint16_t type) 110 { 111 if (type == CC_ACK && !IN_RECOVERY(ccv->flags) && 112 (ccv->flags & CCF_CWND_LIMITED)) { 113 uint_t cw = CCV(ccv, tcp_cwnd); 114 uint_t incr = CCV(ccv, tcp_mss); 115 116 /* 117 * Regular in-order ACK, open the congestion window. 118 * Method depends on which congestion control state we're 119 * in (slow start or cong avoid) and if ABC (RFC 3465) is 120 * enabled. 121 * 122 * slow start: cwnd <= ssthresh 123 * cong avoid: cwnd > ssthresh 124 * 125 * slow start and ABC (RFC 3465): 126 * Grow cwnd exponentially by the amount of data 127 * ACKed capping the max increment per ACK to 128 * (abc_l_var * maxseg) bytes. 129 * 130 * slow start without ABC (RFC 5681): 131 * Grow cwnd exponentially by maxseg per ACK. 132 * 133 * cong avoid and ABC (RFC 3465): 134 * Grow cwnd linearly by maxseg per RTT for each 135 * cwnd worth of ACKed data. 136 * 137 * cong avoid without ABC (RFC 5681): 138 * Grow cwnd linearly by approximately maxseg per RTT using 139 * maxseg^2 / cwnd per ACK as the increment. 140 * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to 141 * avoid capping cwnd. 142 */ 143 if (cw > CCV(ccv, tcp_cwnd_ssthresh)) { 144 if (CC_ABC(ccv)) { 145 if (ccv->flags & CCF_ABC_SENTAWND) 146 ccv->flags &= ~CCF_ABC_SENTAWND; 147 else 148 incr = 0; 149 } else 150 incr = max((incr * incr / cw), 1); 151 } else if (CC_ABC(ccv)) { 152 /* 153 * In slow-start with ABC enabled and no RTO in sight? 154 * (Must not use abc_l_var > 1 if slow starting after 155 * an RTO. 156 */ 157 if (ccv->flags & CCF_RTO) { 158 incr = min(ccv->bytes_this_ack, 159 CCV(ccv, tcp_mss)); 160 } else { 161 incr = min(ccv->bytes_this_ack, 162 CC_ABC_L_VAR(ccv) * CCV(ccv, tcp_mss)); 163 } 164 165 } 166 /* ABC is on by default, so incr equals 0 frequently. */ 167 if (incr > 0) 168 CCV(ccv, tcp_cwnd) = min(cw + incr, 169 TCP_MAXWIN << CCV(ccv, tcp_snd_ws)); 170 } 171 } 172 173 static void 174 newreno_after_idle(struct cc_var *ccv) 175 { 176 int rw; 177 178 /* 179 * If we've been idle for more than one retransmit timeout the old 180 * congestion window is no longer current and we have to reduce it to 181 * the restart window before we can transmit again. 182 * 183 * The restart window is the initial window or the last CWND, whichever 184 * is smaller. 185 * 186 * This is done to prevent us from flooding the path with a full CWND at 187 * wirespeed, overloading router and switch buffers along the way. 188 * 189 * See RFC5681 Section 4.1. "Restarting Idle Connections". 190 */ 191 if (CCV(ccv, tcp_init_cwnd) != 0) { 192 /* 193 * The TCP_INIT_CWND socket option was used to override the 194 * default. 195 */ 196 rw = CCV(ccv, tcp_init_cwnd) * CCV(ccv, tcp_mss); 197 } else if (CCSV(ccv, tcps_slow_start_initial) != 0) { 198 /* The _slow_start_initial tunable was explicitly set. */ 199 rw = min(TCP_MAX_INIT_CWND, CCSV(ccv, tcps_slow_start_initial)) 200 * CCV(ccv, tcp_mss); 201 } else { 202 /* Do RFC 3390 */ 203 rw = min(4 * CCV(ccv, tcp_mss), 204 max(2 * CCV(ccv, tcp_mss), 4380)); 205 } 206 207 CCV(ccv, tcp_cwnd) = min(rw, CCV(ccv, tcp_cwnd)); 208 } 209 210 /* 211 * Perform any necessary tasks before we enter congestion recovery. 212 */ 213 static void 214 newreno_cong_signal(struct cc_var *ccv, uint32_t type) 215 { 216 uint32_t cwin, ssthresh_on_loss; 217 uint32_t mss; 218 219 cwin = CCV(ccv, tcp_cwnd); 220 mss = CCV(ccv, tcp_mss); 221 ssthresh_on_loss = 222 max((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) / 2 / mss, 2) 223 * mss; 224 225 /* Catch algos which mistakenly leak private signal types. */ 226 ASSERT((type & CC_SIGPRIVMASK) == 0); 227 228 cwin = max(cwin / 2 / mss, 2) * mss; 229 230 switch (type) { 231 case CC_NDUPACK: 232 if (!IN_FASTRECOVERY(ccv->flags)) { 233 if (!IN_CONGRECOVERY(ccv->flags)) { 234 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss; 235 CCV(ccv, tcp_cwnd) = cwin; 236 } 237 ENTER_RECOVERY(ccv->flags); 238 } 239 break; 240 case CC_ECN: 241 if (!IN_CONGRECOVERY(ccv->flags)) { 242 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss; 243 CCV(ccv, tcp_cwnd) = cwin; 244 ENTER_CONGRECOVERY(ccv->flags); 245 } 246 break; 247 case CC_RTO: 248 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss; 249 CCV(ccv, tcp_cwnd) = mss; 250 break; 251 } 252 } 253 254 /* 255 * Perform any necessary tasks before we exit congestion recovery. 256 */ 257 static void 258 newreno_post_recovery(struct cc_var *ccv) 259 { 260 uint32_t pipe; 261 262 if (IN_FASTRECOVERY(ccv->flags)) { 263 /* 264 * Fast recovery will conclude after returning from this 265 * function. Window inflation should have left us with 266 * approximately cwnd_ssthresh outstanding data. But in case we 267 * would be inclined to send a burst, better to do it via the 268 * slow start mechanism. 269 */ 270 pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna); 271 if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) { 272 /* 273 * Ensure that cwnd does not collapse to 1 MSS under 274 * adverse conditions. Implements RFC6582 275 */ 276 CCV(ccv, tcp_cwnd) = MAX(pipe, CCV(ccv, tcp_mss)) + 277 CCV(ccv, tcp_mss); 278 } else if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) { 279 CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh); 280 } 281 } 282 } 283