1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ 68 * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.17 2008/03/30 20:39:01 dillon Exp $ 69 */ 70 71 #include "opt_compat.h" 72 #include "opt_inet6.h" 73 #include "opt_tcpdebug.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/kernel.h> 78 #include <sys/mbuf.h> 79 #include <sys/sysctl.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/protosw.h> 83 #include <sys/thread.h> 84 #include <sys/globaldata.h> 85 #include <sys/thread2.h> 86 #include <sys/msgport2.h> 87 88 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 89 90 #include <net/route.h> 91 #include <net/netmsg2.h> 92 93 #include <netinet/in.h> 94 #include <netinet/in_systm.h> 95 #include <netinet/in_pcb.h> 96 #ifdef INET6 97 #include <netinet6/in6_pcb.h> 98 #endif 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 #include <netinet/tcp_timer.h> 104 #include <netinet/tcp_timer2.h> 105 #include <netinet/tcp_var.h> 106 #include <netinet/tcpip.h> 107 #ifdef TCPDEBUG 108 #include <netinet/tcp_debug.h> 109 #endif 110 111 #define TCP_TIMER_REXMT 0x01 112 #define TCP_TIMER_PERSIST 0x02 113 #define TCP_TIMER_KEEP 0x04 114 #define TCP_TIMER_2MSL 0x08 115 #define TCP_TIMER_DELACK 0x10 116 117 static struct tcpcb *tcp_timer_rexmt_handler(struct tcpcb *); 118 static struct tcpcb *tcp_timer_persist_handler(struct tcpcb *); 119 static struct tcpcb *tcp_timer_keep_handler(struct tcpcb *); 120 static struct tcpcb *tcp_timer_2msl_handler(struct tcpcb *); 121 static struct tcpcb *tcp_timer_delack_handler(struct tcpcb *); 122 123 static const struct tcp_timer { 124 uint32_t tt_task; 125 struct tcpcb *(*tt_handler)(struct tcpcb *); 126 } tcp_timer_handlers[] = { 127 { TCP_TIMER_DELACK, tcp_timer_delack_handler }, 128 { TCP_TIMER_REXMT, tcp_timer_rexmt_handler }, 129 { TCP_TIMER_PERSIST, tcp_timer_persist_handler }, 130 { TCP_TIMER_KEEP, tcp_timer_keep_handler }, 131 { TCP_TIMER_2MSL, tcp_timer_2msl_handler }, 132 { 0, NULL } 133 }; 134 135 static int 136 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 137 { 138 int error, s, tt; 139 140 tt = *(int *)oidp->oid_arg1; 141 s = (int)((int64_t)tt * 1000 / hz); 142 143 error = sysctl_handle_int(oidp, &s, 0, req); 144 if (error || !req->newptr) 145 return (error); 146 147 tt = (int)((int64_t)s * hz / 1000); 148 if (tt < 1) 149 return (EINVAL); 150 151 *(int *)oidp->oid_arg1 = tt; 152 return (0); 153 } 154 155 int tcp_keepinit; 156 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 157 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "Time to establish TCP connection"); 158 159 int tcp_keepidle; 160 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 161 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "Time before TCP keepalive probes begin"); 162 163 int tcp_keepintvl; 164 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 165 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "Time between TCP keepalive probes"); 166 167 int tcp_delacktime; 168 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, 169 CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 170 "Time before a delayed ACK is sent"); 171 172 int tcp_msl; 173 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 174 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 175 176 int tcp_rexmit_min; 177 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 178 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout"); 179 180 int tcp_rexmit_slop; 181 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 182 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 183 "Retransmission Timer Slop"); 184 185 static int always_keepalive = 1; 186 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 187 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 188 189 /* max idle probes */ 190 int tcp_keepcnt = TCPTV_KEEPCNT; 191 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, 192 &tcp_keepcnt, 0, "Maximum number of keepalive probes to be sent"); 193 194 static int tcp_do_eifel_response = 1; 195 SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel_response, CTLFLAG_RW, 196 &tcp_do_eifel_response, 0, "Eifel response algorithm (RFC 4015)"); 197 198 int tcp_eifel_rtoinc = 2; 199 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, eifel_rtoinc, CTLTYPE_INT|CTLFLAG_RW, 200 &tcp_eifel_rtoinc, 0, sysctl_msec_to_ticks, "I", 201 "Eifel response RTO increment"); 202 203 /* max idle time in persist */ 204 int tcp_maxpersistidle; 205 206 /* 207 * Cancel all timers for TCP tp. 208 */ 209 void 210 tcp_canceltimers(struct tcpcb *tp) 211 { 212 tcp_callout_stop(tp, tp->tt_2msl); 213 tcp_callout_stop(tp, tp->tt_persist); 214 tcp_callout_stop(tp, tp->tt_keep); 215 tcp_callout_stop(tp, tp->tt_rexmt); 216 } 217 218 /* 219 * Caller should be in critical section 220 */ 221 static void 222 tcp_send_timermsg(struct tcpcb *tp, uint32_t task) 223 { 224 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 225 226 KKASSERT(tmsg != NULL && tmsg->tt_cpuid == mycpuid && 227 tmsg->tt_tcb != NULL); 228 229 tmsg->tt_tasks |= task; 230 if (tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) 231 lwkt_sendmsg(tmsg->tt_msgport, &tmsg->tt_msg.lmsg); 232 } 233 234 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 235 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 236 237 int tcp_syn_backoff_low[TCP_MAXRXTSHIFT + 1] = 238 { 1, 1, 2, 4, 8, 8, 16, 16, 32, 64, 64, 64, 64 }; 239 240 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 241 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 242 243 static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 244 245 /* Caller should be in critical section */ 246 static struct tcpcb * 247 tcp_timer_delack_handler(struct tcpcb *tp) 248 { 249 tp->t_flags |= TF_ACKNOW; 250 tcpstat.tcps_delack++; 251 tcp_output(tp); 252 return tp; 253 } 254 255 /* 256 * TCP timer processing. 257 */ 258 void 259 tcp_timer_delack(void *xtp) 260 { 261 struct tcpcb *tp = xtp; 262 struct callout *co = &tp->tt_delack->tc_callout; 263 264 crit_enter(); 265 if (callout_pending(co) || !callout_active(co)) { 266 crit_exit(); 267 return; 268 } 269 callout_deactivate(co); 270 tcp_send_timermsg(tp, TCP_TIMER_DELACK); 271 crit_exit(); 272 } 273 274 /* Caller should be in critical section */ 275 static struct tcpcb * 276 tcp_timer_2msl_handler(struct tcpcb *tp) 277 { 278 #ifdef TCPDEBUG 279 int ostate; 280 #endif 281 282 #ifdef TCPDEBUG 283 ostate = tp->t_state; 284 #endif 285 /* 286 * 2 MSL timeout in shutdown went off. If we're closed but 287 * still waiting for peer to close and connection has been idle 288 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 289 * control block. Otherwise, check again in a bit. 290 */ 291 if (tp->t_state != TCPS_TIME_WAIT && 292 (ticks - tp->t_rcvtime) <= tp->t_maxidle) { 293 tcp_callout_reset(tp, tp->tt_2msl, tp->t_keepintvl, 294 tcp_timer_2msl); 295 } else { 296 tp = tcp_close(tp); 297 } 298 299 #ifdef TCPDEBUG 300 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 301 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 302 #endif 303 return tp; 304 } 305 306 void 307 tcp_timer_2msl(void *xtp) 308 { 309 struct tcpcb *tp = xtp; 310 struct callout *co = &tp->tt_2msl->tc_callout; 311 312 crit_enter(); 313 if (callout_pending(co) || !callout_active(co)) { 314 crit_exit(); 315 return; 316 } 317 callout_deactivate(co); 318 tcp_send_timermsg(tp, TCP_TIMER_2MSL); 319 crit_exit(); 320 } 321 322 /* Caller should be in critical section */ 323 static struct tcpcb * 324 tcp_timer_keep_handler(struct tcpcb *tp) 325 { 326 struct tcptemp *t_template; 327 #ifdef TCPDEBUG 328 int ostate = tp->t_state; 329 #endif 330 331 /* 332 * Keep-alive timer went off; send something 333 * or drop connection if idle for too long. 334 */ 335 tcpstat.tcps_keeptimeo++; 336 if (tp->t_state < TCPS_ESTABLISHED) 337 goto dropit; 338 if ((always_keepalive || (tp->t_flags & TF_KEEPALIVE) || 339 (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE)) && 340 tp->t_state <= TCPS_CLOSING) { 341 if ((ticks - tp->t_rcvtime) >= tp->t_keepidle + tp->t_maxidle) 342 goto dropit; 343 /* 344 * Send a packet designed to force a response 345 * if the peer is up and reachable: 346 * either an ACK if the connection is still alive, 347 * or an RST if the peer has closed the connection 348 * due to timeout or reboot. 349 * Using sequence number tp->snd_una-1 350 * causes the transmitted zero-length segment 351 * to lie outside the receive window; 352 * by the protocol spec, this requires the 353 * correspondent TCP to respond. 354 */ 355 tcpstat.tcps_keepprobe++; 356 t_template = tcp_maketemplate(tp); 357 if (t_template) { 358 tcp_respond(tp, t_template->tt_ipgen, 359 &t_template->tt_t, NULL, 360 tp->rcv_nxt, tp->snd_una - 1, 0); 361 tcp_freetemplate(t_template); 362 } 363 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepintvl, 364 tcp_timer_keep); 365 } else { 366 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepidle, 367 tcp_timer_keep); 368 } 369 370 #ifdef TCPDEBUG 371 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 372 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 373 #endif 374 return tp; 375 376 dropit: 377 tcpstat.tcps_keepdrops++; 378 tp = tcp_drop(tp, ETIMEDOUT); 379 380 #ifdef TCPDEBUG 381 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 382 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 383 #endif 384 return tp; 385 } 386 387 void 388 tcp_timer_keep(void *xtp) 389 { 390 struct tcpcb *tp = xtp; 391 struct callout *co = &tp->tt_keep->tc_callout; 392 393 crit_enter(); 394 if (callout_pending(co) || !callout_active(co)) { 395 crit_exit(); 396 return; 397 } 398 callout_deactivate(co); 399 tcp_send_timermsg(tp, TCP_TIMER_KEEP); 400 crit_exit(); 401 } 402 403 /* Caller should be in critical section */ 404 static struct tcpcb * 405 tcp_timer_persist_handler(struct tcpcb *tp) 406 { 407 #ifdef TCPDEBUG 408 int ostate; 409 #endif 410 411 #ifdef TCPDEBUG 412 ostate = tp->t_state; 413 #endif 414 /* 415 * Persistance timer into zero window. 416 * Force a byte to be output, if possible. 417 */ 418 tcpstat.tcps_persisttimeo++; 419 /* 420 * Hack: if the peer is dead/unreachable, we do not 421 * time out if the window is closed. After a full 422 * backoff, drop the connection if the idle time 423 * (no responses to probes) reaches the maximum 424 * backoff that we would use if retransmitting. 425 */ 426 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 427 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 428 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 429 tcpstat.tcps_persistdrop++; 430 tp = tcp_drop(tp, ETIMEDOUT); 431 goto out; 432 } 433 tcp_setpersist(tp); 434 tp->t_flags |= TF_FORCE; 435 tcp_output(tp); 436 tp->t_flags &= ~TF_FORCE; 437 438 out: 439 #ifdef TCPDEBUG 440 if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 441 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 442 #endif 443 return tp; 444 } 445 446 void 447 tcp_timer_persist(void *xtp) 448 { 449 struct tcpcb *tp = xtp; 450 struct callout *co = &tp->tt_persist->tc_callout; 451 452 crit_enter(); 453 if (callout_pending(co) || !callout_active(co)){ 454 crit_exit(); 455 return; 456 } 457 callout_deactivate(co); 458 tcp_send_timermsg(tp, TCP_TIMER_PERSIST); 459 crit_exit(); 460 } 461 462 void 463 tcp_save_congestion_state(struct tcpcb *tp) 464 { 465 tp->snd_cwnd_prev = tp->snd_cwnd; 466 tp->snd_wacked_prev = tp->snd_wacked; 467 tp->snd_ssthresh_prev = tp->snd_ssthresh; 468 tp->snd_recover_prev = tp->snd_recover; 469 470 tp->t_rxtcur_prev = tp->t_rxtcur; 471 tp->t_srtt_prev = tp->t_srtt + 472 (tcp_eifel_rtoinc << TCP_RTT_SHIFT); 473 tp->t_rttvar_prev = tp->t_rttvar; 474 tp->snd_max_prev = tp->snd_max; 475 tp->t_flags &= ~TF_REBASERTO; 476 477 if (IN_FASTRECOVERY(tp)) 478 tp->t_flags |= TF_WASFRECOVERY; 479 else 480 tp->t_flags &= ~TF_WASFRECOVERY; 481 if (tp->t_flags & TF_RCVD_TSTMP) { 482 tp->t_rexmtTS = ticks; 483 tp->t_flags |= TF_FIRSTACCACK; 484 } 485 #ifdef later 486 tcp_sack_save_scoreboard(&tp->scb); 487 #endif 488 } 489 490 void 491 tcp_revert_congestion_state(struct tcpcb *tp) 492 { 493 tp->snd_cwnd = tp->snd_cwnd_prev; 494 tp->snd_wacked = tp->snd_wacked_prev; 495 tp->snd_ssthresh = tp->snd_ssthresh_prev; 496 tp->snd_recover = tp->snd_recover_prev; 497 if (tp->t_flags & TF_WASFRECOVERY) 498 ENTER_FASTRECOVERY(tp); 499 if (tp->t_flags & TF_FASTREXMT) { 500 ++tcpstat.tcps_sndfastrexmitbad; 501 if (tp->t_flags & TF_EARLYREXMT) 502 ++tcpstat.tcps_sndearlyrexmitbad; 503 } else { 504 ++tcpstat.tcps_sndrtobad; 505 tp->snd_last = ticks; 506 if (tcp_do_eifel_response) 507 tp->t_flags |= TF_REBASERTO; 508 } 509 tp->t_badrxtwin = 0; 510 tp->t_rxtshift = 0; 511 tp->snd_nxt = tp->snd_max; 512 #ifdef later 513 tcp_sack_revert_scoreboard(&tp->scb, tp->snd_una); 514 #endif 515 } 516 517 /* Caller should be in critical section */ 518 static struct tcpcb * 519 tcp_timer_rexmt_handler(struct tcpcb *tp) 520 { 521 int rexmt; 522 #ifdef TCPDEBUG 523 int ostate; 524 #endif 525 526 #ifdef TCPDEBUG 527 ostate = tp->t_state; 528 #endif 529 /* 530 * Retransmission timer went off. Message has not 531 * been acked within retransmit interval. Back off 532 * to a longer retransmit interval and retransmit one segment. 533 */ 534 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 535 tp->t_rxtshift = TCP_MAXRXTSHIFT; 536 tcpstat.tcps_timeoutdrop++; 537 tp = tcp_drop(tp, tp->t_softerror ? 538 tp->t_softerror : ETIMEDOUT); 539 goto out; 540 } 541 if (tp->t_rxtshift == 1) { 542 /* 543 * first retransmit; record ssthresh and cwnd so they can 544 * be recovered if this turns out to be a "bad" retransmit. 545 * A retransmit is considered "bad" if an ACK for this 546 * segment is received within RTT/2 interval; the assumption 547 * here is that the ACK was already in flight. See 548 * "On Estimating End-to-End Network Path Properties" by 549 * Allman and Paxson for more details. 550 */ 551 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 552 tcp_save_congestion_state(tp); 553 tp->t_flags &= ~(TF_FASTREXMT | TF_EARLYREXMT); 554 } 555 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 556 /* 557 * Record the time that we spent in SYN or SYN|ACK 558 * retransmition. 559 * 560 * Needed by RFC3390 and RFC6298. 561 */ 562 tp->t_rxtsyn += tp->t_rxtcur; 563 } 564 /* Throw away SACK blocks on a RTO, as specified by RFC2018. */ 565 tcp_sack_cleanup(&tp->scb); 566 tcpstat.tcps_rexmttimeo++; 567 if (tp->t_state == TCPS_SYN_SENT) { 568 if (tcp_low_rtobase) { 569 rexmt = TCP_REXMTVAL(tp) * 570 tcp_syn_backoff_low[tp->t_rxtshift]; 571 } else { 572 rexmt = TCP_REXMTVAL(tp) * 573 tcp_syn_backoff[tp->t_rxtshift]; 574 } 575 } else { 576 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 577 } 578 TCPT_RANGESET(tp->t_rxtcur, rexmt, 579 tp->t_rttmin, TCPTV_REXMTMAX); 580 /* 581 * If losing, let the lower level know and try for 582 * a better route. Also, if we backed off this far, 583 * our srtt estimate is probably bogus. Clobber it 584 * so we'll take the next rtt measurement as our srtt; 585 * move the current srtt into rttvar to keep the current 586 * retransmit times until then. 587 */ 588 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 589 #ifdef INET6 590 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 591 in6_losing(tp->t_inpcb); 592 else 593 #endif 594 in_losing(tp->t_inpcb); 595 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 596 tp->t_srtt = 0; 597 } 598 tp->snd_nxt = tp->snd_una; 599 tp->rexmt_high = tp->snd_una; 600 tp->t_flags &= ~TF_SACKRESCUED; 601 tp->snd_recover = tp->snd_max; 602 /* 603 * Force a segment to be sent. 604 */ 605 tp->t_flags |= TF_ACKNOW; 606 /* 607 * If timing a segment in this window, stop the timer. 608 */ 609 tp->t_rtttime = 0; 610 /* 611 * Close the congestion window down to one segment 612 * (we'll open it by one segment for each ack we get). 613 * Since we probably have a window's worth of unacked 614 * data accumulated, this "slow start" keeps us from 615 * dumping all that data as back-to-back packets (which 616 * might overwhelm an intermediate gateway). 617 * 618 * There are two phases to the opening: Initially we 619 * open by one mss on each ack. This makes the window 620 * size increase exponentially with time. If the 621 * window is larger than the path can handle, this 622 * exponential growth results in dropped packet(s) 623 * almost immediately. To get more time between 624 * drops but still "push" the network to take advantage 625 * of improving conditions, we switch from exponential 626 * to linear window opening at some threshhold size. 627 * For a threshhold, we use half the current window 628 * size, truncated to a multiple of the mss. 629 * 630 * (the minimum cwnd that will give us exponential 631 * growth is 2 mss. We don't allow the threshhold 632 * to go below this.) 633 */ 634 { 635 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 636 637 if (win < 2) 638 win = 2; 639 tp->snd_cwnd = tp->t_maxseg; 640 tp->snd_wacked = 0; 641 tp->snd_ssthresh = win * tp->t_maxseg; 642 tp->t_dupacks = 0; 643 } 644 EXIT_FASTRECOVERY(tp); 645 tcp_output(tp); 646 647 out: 648 #ifdef TCPDEBUG 649 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 650 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 651 #endif 652 return tp; 653 } 654 655 void 656 tcp_timer_rexmt(void *xtp) 657 { 658 struct tcpcb *tp = xtp; 659 struct callout *co = &tp->tt_rexmt->tc_callout; 660 661 crit_enter(); 662 if (callout_pending(co) || !callout_active(co)) { 663 crit_exit(); 664 return; 665 } 666 callout_deactivate(co); 667 tcp_send_timermsg(tp, TCP_TIMER_REXMT); 668 crit_exit(); 669 } 670 671 static void 672 tcp_timer_handler(netmsg_t msg) 673 { 674 struct netmsg_tcp_timer *tmsg = (struct netmsg_tcp_timer *)msg; 675 const struct tcp_timer *tt; 676 struct tcpcb *tp; 677 678 crit_enter(); 679 680 KKASSERT(tmsg->tt_cpuid == mycpuid && tmsg->tt_tcb != NULL); 681 tp = tmsg->tt_tcb; 682 683 /* Save pending tasks and reset the tasks in message */ 684 tmsg->tt_running_tasks = tmsg->tt_tasks; 685 tmsg->tt_prev_tasks = tmsg->tt_tasks; 686 tmsg->tt_tasks = 0; 687 688 /* Reply ASAP */ 689 lwkt_replymsg(&tmsg->tt_msg.lmsg, 0); 690 691 if (tmsg->tt_running_tasks == 0) { 692 /* 693 * All of the timers are cancelled when the message 694 * is pending; bail out. 695 */ 696 crit_exit(); 697 return; 698 } 699 700 for (tt = tcp_timer_handlers; tt->tt_handler != NULL; ++tt) { 701 if ((tmsg->tt_running_tasks & tt->tt_task) == 0) 702 continue; 703 704 tmsg->tt_running_tasks &= ~tt->tt_task; 705 tp = tt->tt_handler(tp); 706 if (tp == NULL) 707 break; 708 709 if (tmsg->tt_running_tasks == 0) /* nothing left to do */ 710 break; 711 } 712 713 crit_exit(); 714 } 715 716 void 717 tcp_create_timermsg(struct tcpcb *tp, struct lwkt_port *msgport) 718 { 719 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 720 721 netmsg_init(&tmsg->tt_msg, NULL, &netisr_adone_rport, 722 MSGF_DROPABLE | MSGF_PRIORITY, tcp_timer_handler); 723 tmsg->tt_cpuid = mycpuid; 724 tmsg->tt_msgport = msgport; 725 tmsg->tt_tcb = tp; 726 tmsg->tt_tasks = 0; 727 } 728 729 void 730 tcp_destroy_timermsg(struct tcpcb *tp) 731 { 732 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 733 734 if (tmsg == NULL || /* listen socket */ 735 tmsg->tt_tcb == NULL) /* only tcp_attach() is called */ 736 return; 737 738 KKASSERT(tmsg->tt_cpuid == mycpuid); 739 crit_enter(); 740 if ((tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) == 0) { 741 /* 742 * This message is still pending to be processed; 743 * drop it. 744 */ 745 lwkt_dropmsg(&tmsg->tt_msg.lmsg); 746 } 747 crit_exit(); 748 } 749 750 static __inline void 751 tcp_callout_init(struct tcp_callout *tc, uint32_t task) 752 { 753 callout_init_mp(&tc->tc_callout); 754 tc->tc_task = task; 755 } 756 757 void 758 tcp_inittimers(struct tcpcb *tp) 759 { 760 tcp_callout_init(tp->tt_rexmt, TCP_TIMER_REXMT); 761 tcp_callout_init(tp->tt_persist, TCP_TIMER_PERSIST); 762 tcp_callout_init(tp->tt_keep, TCP_TIMER_KEEP); 763 tcp_callout_init(tp->tt_2msl, TCP_TIMER_2MSL); 764 tcp_callout_init(tp->tt_delack, TCP_TIMER_DELACK); 765 } 766