1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 63 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ 64 * $DragonFly: src/sys/netinet/tcp_timer.c,v 1.17 2008/03/30 20:39:01 dillon Exp $ 65 */ 66 67 #include "opt_compat.h" 68 #include "opt_inet6.h" 69 #include "opt_tcpdebug.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/kernel.h> 74 #include <sys/mbuf.h> 75 #include <sys/sysctl.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/protosw.h> 79 #include <sys/thread.h> 80 #include <sys/globaldata.h> 81 #include <sys/thread2.h> 82 #include <sys/msgport2.h> 83 84 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 85 86 #include <net/route.h> 87 #include <net/netmsg2.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_pcb.h> 92 #ifdef INET6 93 #include <netinet6/in6_pcb.h> 94 #endif 95 #include <netinet/ip_var.h> 96 #include <netinet/tcp.h> 97 #include <netinet/tcp_fsm.h> 98 #include <netinet/tcp_seq.h> 99 #include <netinet/tcp_timer.h> 100 #include <netinet/tcp_timer2.h> 101 #include <netinet/tcp_var.h> 102 #include <netinet/tcpip.h> 103 #ifdef TCPDEBUG 104 #include <netinet/tcp_debug.h> 105 #endif 106 107 #define TCP_TIMER_REXMT 0x01 108 #define TCP_TIMER_PERSIST 0x02 109 #define TCP_TIMER_KEEP 0x04 110 #define TCP_TIMER_2MSL 0x08 111 #define TCP_TIMER_DELACK 0x10 112 113 static struct tcpcb *tcp_timer_rexmt_handler(struct tcpcb *); 114 static struct tcpcb *tcp_timer_persist_handler(struct tcpcb *); 115 static struct tcpcb *tcp_timer_keep_handler(struct tcpcb *); 116 static struct tcpcb *tcp_timer_2msl_handler(struct tcpcb *); 117 static struct tcpcb *tcp_timer_delack_handler(struct tcpcb *); 118 119 static const struct tcp_timer { 120 uint32_t tt_task; 121 struct tcpcb *(*tt_handler)(struct tcpcb *); 122 } tcp_timer_handlers[] = { 123 { TCP_TIMER_DELACK, tcp_timer_delack_handler }, 124 { TCP_TIMER_REXMT, tcp_timer_rexmt_handler }, 125 { TCP_TIMER_PERSIST, tcp_timer_persist_handler }, 126 { TCP_TIMER_KEEP, tcp_timer_keep_handler }, 127 { TCP_TIMER_2MSL, tcp_timer_2msl_handler }, 128 { 0, NULL } 129 }; 130 131 static int 132 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 133 { 134 int error, s, tt; 135 136 tt = *(int *)oidp->oid_arg1; 137 s = (int)((int64_t)tt * 1000 / hz); 138 139 error = sysctl_handle_int(oidp, &s, 0, req); 140 if (error || !req->newptr) 141 return (error); 142 143 tt = (int)((int64_t)s * hz / 1000); 144 if (tt < 1) 145 return (EINVAL); 146 147 *(int *)oidp->oid_arg1 = tt; 148 return (0); 149 } 150 151 int tcp_keepinit; 152 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 153 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "Time to establish TCP connection"); 154 155 int tcp_keepidle; 156 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 157 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "Time before TCP keepalive probes begin"); 158 159 int tcp_keepintvl; 160 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 161 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "Time between TCP keepalive probes"); 162 163 int tcp_delacktime; 164 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, 165 CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 166 "Time before a delayed ACK is sent"); 167 168 int tcp_msl; 169 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 170 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 171 172 int tcp_rexmit_min; 173 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 174 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout"); 175 176 int tcp_rexmit_slop; 177 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 178 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 179 "Retransmission Timer Slop"); 180 181 static int always_keepalive = 1; 182 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 183 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 184 185 /* max idle probes */ 186 int tcp_keepcnt = TCPTV_KEEPCNT; 187 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, 188 &tcp_keepcnt, 0, "Maximum number of keepalive probes to be sent"); 189 190 static int tcp_do_eifel_response = 1; 191 SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel_response, CTLFLAG_RW, 192 &tcp_do_eifel_response, 0, "Eifel response algorithm (RFC 4015)"); 193 194 int tcp_eifel_rtoinc = 2; 195 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, eifel_rtoinc, CTLTYPE_INT|CTLFLAG_RW, 196 &tcp_eifel_rtoinc, 0, sysctl_msec_to_ticks, "I", 197 "Eifel response RTO increment"); 198 199 /* max idle time in persist */ 200 int tcp_maxpersistidle; 201 202 /* 203 * Cancel all timers for TCP tp. 204 */ 205 void 206 tcp_canceltimers(struct tcpcb *tp) 207 { 208 tcp_callout_stop(tp, tp->tt_2msl); 209 tcp_callout_stop(tp, tp->tt_persist); 210 tcp_callout_stop(tp, tp->tt_keep); 211 tcp_callout_stop(tp, tp->tt_rexmt); 212 } 213 214 /* 215 * Caller should be in critical section 216 */ 217 static void 218 tcp_send_timermsg(struct tcpcb *tp, uint32_t task) 219 { 220 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 221 222 KKASSERT(tmsg != NULL && tmsg->tt_cpuid == mycpuid && 223 tmsg->tt_tcb != NULL); 224 225 tmsg->tt_tasks |= task; 226 if (tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) 227 lwkt_sendmsg_oncpu(tmsg->tt_msgport, &tmsg->tt_msg.lmsg); 228 } 229 230 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 231 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 232 233 int tcp_syn_backoff_low[TCP_MAXRXTSHIFT + 1] = 234 { 1, 1, 2, 4, 8, 8, 16, 16, 32, 64, 64, 64, 64 }; 235 236 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 237 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 238 239 static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 240 241 /* Caller should be in critical section */ 242 static struct tcpcb * 243 tcp_timer_delack_handler(struct tcpcb *tp) 244 { 245 tp->t_flags |= TF_ACKNOW; 246 tcpstat.tcps_delack++; 247 tcp_output(tp); 248 return tp; 249 } 250 251 /* 252 * TCP timer processing. 253 */ 254 void 255 tcp_timer_delack(void *xtp) 256 { 257 struct tcpcb *tp = xtp; 258 struct callout *co = &tp->tt_delack->tc_callout; 259 260 crit_enter(); 261 if (callout_pending(co) || !callout_active(co)) { 262 crit_exit(); 263 return; 264 } 265 callout_deactivate(co); 266 tcp_send_timermsg(tp, TCP_TIMER_DELACK); 267 crit_exit(); 268 } 269 270 /* Caller should be in critical section */ 271 static struct tcpcb * 272 tcp_timer_2msl_handler(struct tcpcb *tp) 273 { 274 #ifdef TCPDEBUG 275 int ostate; 276 #endif 277 278 #ifdef TCPDEBUG 279 ostate = tp->t_state; 280 #endif 281 /* 282 * 2 MSL timeout in shutdown went off. If we're closed but 283 * still waiting for peer to close and connection has been idle 284 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 285 * control block. Otherwise, check again in a bit. 286 */ 287 if (tp->t_state != TCPS_TIME_WAIT && 288 (ticks - tp->t_rcvtime) <= tp->t_maxidle) { 289 tcp_callout_reset(tp, tp->tt_2msl, tp->t_keepintvl, 290 tcp_timer_2msl); 291 } else { 292 tp = tcp_close(tp); 293 } 294 295 #ifdef TCPDEBUG 296 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 297 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 298 #endif 299 return tp; 300 } 301 302 void 303 tcp_timer_2msl(void *xtp) 304 { 305 struct tcpcb *tp = xtp; 306 struct callout *co = &tp->tt_2msl->tc_callout; 307 308 crit_enter(); 309 if (callout_pending(co) || !callout_active(co)) { 310 crit_exit(); 311 return; 312 } 313 callout_deactivate(co); 314 tcp_send_timermsg(tp, TCP_TIMER_2MSL); 315 crit_exit(); 316 } 317 318 /* Caller should be in critical section */ 319 static struct tcpcb * 320 tcp_timer_keep_handler(struct tcpcb *tp) 321 { 322 struct tcptemp *t_template; 323 #ifdef TCPDEBUG 324 int ostate = tp->t_state; 325 #endif 326 327 /* 328 * Keep-alive timer went off; send something 329 * or drop connection if idle for too long. 330 */ 331 tcpstat.tcps_keeptimeo++; 332 if (tp->t_state < TCPS_ESTABLISHED) 333 goto dropit; 334 if ((always_keepalive || (tp->t_flags & TF_KEEPALIVE) || 335 (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE)) && 336 tp->t_state <= TCPS_CLOSING) { 337 if ((ticks - tp->t_rcvtime) >= tp->t_keepidle + tp->t_maxidle) 338 goto dropit; 339 /* 340 * Send a packet designed to force a response 341 * if the peer is up and reachable: 342 * either an ACK if the connection is still alive, 343 * or an RST if the peer has closed the connection 344 * due to timeout or reboot. 345 * Using sequence number tp->snd_una-1 346 * causes the transmitted zero-length segment 347 * to lie outside the receive window; 348 * by the protocol spec, this requires the 349 * correspondent TCP to respond. 350 */ 351 tcpstat.tcps_keepprobe++; 352 t_template = tcp_maketemplate(tp); 353 if (t_template) { 354 tcp_respond(tp, t_template->tt_ipgen, 355 &t_template->tt_t, NULL, 356 tp->rcv_nxt, tp->snd_una - 1, 0); 357 tcp_freetemplate(t_template); 358 } 359 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepintvl, 360 tcp_timer_keep); 361 } else { 362 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepidle, 363 tcp_timer_keep); 364 } 365 366 #ifdef TCPDEBUG 367 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 368 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 369 #endif 370 return tp; 371 372 dropit: 373 tcpstat.tcps_keepdrops++; 374 tp = tcp_drop(tp, ETIMEDOUT); 375 376 #ifdef TCPDEBUG 377 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 378 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 379 #endif 380 return tp; 381 } 382 383 void 384 tcp_timer_keep(void *xtp) 385 { 386 struct tcpcb *tp = xtp; 387 struct callout *co = &tp->tt_keep->tc_callout; 388 389 crit_enter(); 390 if (callout_pending(co) || !callout_active(co)) { 391 crit_exit(); 392 return; 393 } 394 callout_deactivate(co); 395 tcp_send_timermsg(tp, TCP_TIMER_KEEP); 396 crit_exit(); 397 } 398 399 /* Caller should be in critical section */ 400 static struct tcpcb * 401 tcp_timer_persist_handler(struct tcpcb *tp) 402 { 403 #ifdef TCPDEBUG 404 int ostate; 405 #endif 406 407 #ifdef TCPDEBUG 408 ostate = tp->t_state; 409 #endif 410 /* 411 * Persistance timer into zero window. 412 * Force a byte to be output, if possible. 413 */ 414 tcpstat.tcps_persisttimeo++; 415 /* 416 * Hack: if the peer is dead/unreachable, we do not 417 * time out if the window is closed. After a full 418 * backoff, drop the connection if the idle time 419 * (no responses to probes) reaches the maximum 420 * backoff that we would use if retransmitting. 421 */ 422 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 423 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 424 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 425 tcpstat.tcps_persistdrop++; 426 tp = tcp_drop(tp, ETIMEDOUT); 427 goto out; 428 } 429 tcp_setpersist(tp); 430 tp->t_flags |= TF_FORCE; 431 tcp_output(tp); 432 tp->t_flags &= ~TF_FORCE; 433 434 out: 435 #ifdef TCPDEBUG 436 if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 437 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 438 #endif 439 return tp; 440 } 441 442 void 443 tcp_timer_persist(void *xtp) 444 { 445 struct tcpcb *tp = xtp; 446 struct callout *co = &tp->tt_persist->tc_callout; 447 448 crit_enter(); 449 if (callout_pending(co) || !callout_active(co)){ 450 crit_exit(); 451 return; 452 } 453 callout_deactivate(co); 454 tcp_send_timermsg(tp, TCP_TIMER_PERSIST); 455 crit_exit(); 456 } 457 458 void 459 tcp_save_congestion_state(struct tcpcb *tp) 460 { 461 /* 462 * Record connection's current states so that they could be 463 * recovered, if this turns out to be a spurious retransmit. 464 */ 465 tp->snd_cwnd_prev = tp->snd_cwnd; 466 tp->snd_wacked_prev = tp->snd_wacked; 467 tp->snd_ssthresh_prev = tp->snd_ssthresh; 468 tp->snd_recover_prev = tp->snd_recover; 469 470 /* 471 * State for Eifel response after spurious timeout retransmit 472 * is detected. We save the current value of snd_max even if 473 * we are called from fast retransmit code, so if RTO needs 474 * rebase, it will be rebased using the RTT of segment that 475 * is not sent during possible congestion. 476 */ 477 tp->snd_max_prev = tp->snd_max; 478 479 if (IN_FASTRECOVERY(tp)) 480 tp->rxt_flags |= TRXT_F_WASFRECOVERY; 481 else 482 tp->rxt_flags &= ~TRXT_F_WASFRECOVERY; 483 if (tp->t_flags & TF_RCVD_TSTMP) { 484 /* States for Eifel detection */ 485 tp->t_rexmtTS = ticks; 486 tp->rxt_flags |= TRXT_F_FIRSTACCACK; 487 } 488 #ifdef later 489 tcp_sack_save_scoreboard(&tp->scb); 490 #endif 491 } 492 493 void 494 tcp_revert_congestion_state(struct tcpcb *tp) 495 { 496 tp->snd_cwnd = tp->snd_cwnd_prev; 497 tp->snd_wacked = tp->snd_wacked_prev; 498 tp->snd_ssthresh = tp->snd_ssthresh_prev; 499 tp->snd_recover = tp->snd_recover_prev; 500 if (tp->rxt_flags & TRXT_F_WASFRECOVERY) 501 ENTER_FASTRECOVERY(tp); 502 if (tp->rxt_flags & TRXT_F_FASTREXMT) { 503 ++tcpstat.tcps_sndfastrexmitbad; 504 if (tp->rxt_flags & TRXT_F_EARLYREXMT) 505 ++tcpstat.tcps_sndearlyrexmitbad; 506 } else { 507 ++tcpstat.tcps_sndrtobad; 508 tp->snd_last = ticks; 509 if (tcp_do_eifel_response) 510 tp->rxt_flags |= TRXT_F_REBASERTO; 511 } 512 tp->t_badrxtwin = 0; 513 tp->t_rxtshift = 0; 514 tp->snd_nxt = tp->snd_max; 515 #ifdef later 516 tcp_sack_revert_scoreboard(&tp->scb, tp->snd_una); 517 #endif 518 } 519 520 /* Caller should be in critical section */ 521 static struct tcpcb * 522 tcp_timer_rexmt_handler(struct tcpcb *tp) 523 { 524 int rexmt; 525 #ifdef TCPDEBUG 526 int ostate; 527 #endif 528 529 #ifdef TCPDEBUG 530 ostate = tp->t_state; 531 #endif 532 /* 533 * Retransmission timer went off. Message has not 534 * been acked within retransmit interval. Back off 535 * to a longer retransmit interval and retransmit one segment. 536 */ 537 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 538 tp->t_rxtshift = TCP_MAXRXTSHIFT; 539 tcpstat.tcps_timeoutdrop++; 540 tp = tcp_drop(tp, tp->t_softerror ? 541 tp->t_softerror : ETIMEDOUT); 542 goto out; 543 } 544 if (tp->t_rxtshift == 1) { 545 /* 546 * First retransmit. 547 */ 548 549 /* 550 * State for "RTT based spurious timeout retransmit detection" 551 * 552 * RTT based spurious timeout retransmit detection: 553 * A retransmit is considered spurious if an ACK for this 554 * segment is received within RTT/2 interval; the assumption 555 * here is that the ACK was already in flight. See 556 * "On Estimating End-to-End Network Path Properties" by 557 * Allman and Paxson for more details. 558 */ 559 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 560 561 /* 562 * States for Eifel response after spurious timeout retransmit 563 * is detected. 564 */ 565 tp->t_rxtcur_prev = tp->t_rxtcur; 566 tp->t_srtt_prev = tp->t_srtt + 567 (tcp_eifel_rtoinc << TCP_RTT_SHIFT); 568 tp->t_rttvar_prev = tp->t_rttvar; 569 570 tcp_save_congestion_state(tp); 571 tp->rxt_flags &= ~(TRXT_F_FASTREXMT | TRXT_F_EARLYREXMT | 572 TRXT_F_REBASERTO); 573 } 574 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 575 /* 576 * Record the time that we spent in SYN or SYN|ACK 577 * retransmition. 578 * 579 * Needed by RFC3390 and RFC6298. 580 */ 581 tp->t_rxtsyn += tp->t_rxtcur; 582 } 583 /* Throw away SACK blocks on a RTO, as specified by RFC2018. */ 584 tcp_sack_discard(tp); 585 tcpstat.tcps_rexmttimeo++; 586 if (tp->t_state == TCPS_SYN_SENT) { 587 if (tcp_low_rtobase) { 588 rexmt = TCP_REXMTVAL(tp) * 589 tcp_syn_backoff_low[tp->t_rxtshift]; 590 } else { 591 rexmt = TCP_REXMTVAL(tp) * 592 tcp_syn_backoff[tp->t_rxtshift]; 593 } 594 } else { 595 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 596 } 597 TCPT_RANGESET(tp->t_rxtcur, rexmt, 598 tp->t_rttmin, TCPTV_REXMTMAX); 599 /* 600 * If losing, let the lower level know and try for 601 * a better route. Also, if we backed off this far, 602 * our srtt estimate is probably bogus. Clobber it 603 * so we'll take the next rtt measurement as our srtt; 604 * move the current srtt into rttvar to keep the current 605 * retransmit times until then. 606 */ 607 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 608 #ifdef INET6 609 if (INP_ISIPV6(tp->t_inpcb)) 610 in6_losing(tp->t_inpcb); 611 else 612 #endif 613 in_losing(tp->t_inpcb); 614 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 615 tp->t_srtt = 0; 616 } 617 tp->snd_nxt = tp->snd_una; 618 tp->snd_recover = tp->snd_max; 619 /* 620 * Force a segment to be sent. 621 */ 622 tp->t_flags |= TF_ACKNOW; 623 /* 624 * If timing a segment in this window, stop the timer. 625 */ 626 tp->t_rtttime = 0; 627 /* 628 * Close the congestion window down to one segment 629 * (we'll open it by one segment for each ack we get). 630 * Since we probably have a window's worth of unacked 631 * data accumulated, this "slow start" keeps us from 632 * dumping all that data as back-to-back packets (which 633 * might overwhelm an intermediate gateway). 634 * 635 * There are two phases to the opening: Initially we 636 * open by one mss on each ack. This makes the window 637 * size increase exponentially with time. If the 638 * window is larger than the path can handle, this 639 * exponential growth results in dropped packet(s) 640 * almost immediately. To get more time between 641 * drops but still "push" the network to take advantage 642 * of improving conditions, we switch from exponential 643 * to linear window opening at some threshhold size. 644 * For a threshhold, we use half the current window 645 * size, truncated to a multiple of the mss. 646 * 647 * (the minimum cwnd that will give us exponential 648 * growth is 2 mss. We don't allow the threshhold 649 * to go below this.) 650 */ 651 { 652 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 653 654 if (win < 2) 655 win = 2; 656 tp->snd_cwnd = tp->t_maxseg; 657 tp->snd_wacked = 0; 658 tp->snd_ssthresh = win * tp->t_maxseg; 659 tp->t_dupacks = 0; 660 } 661 EXIT_FASTRECOVERY(tp); 662 tcp_output(tp); 663 664 out: 665 #ifdef TCPDEBUG 666 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 667 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 668 #endif 669 return tp; 670 } 671 672 void 673 tcp_timer_rexmt(void *xtp) 674 { 675 struct tcpcb *tp = xtp; 676 struct callout *co = &tp->tt_rexmt->tc_callout; 677 678 crit_enter(); 679 if (callout_pending(co) || !callout_active(co)) { 680 crit_exit(); 681 return; 682 } 683 callout_deactivate(co); 684 tcp_send_timermsg(tp, TCP_TIMER_REXMT); 685 crit_exit(); 686 } 687 688 static void 689 tcp_timer_handler(netmsg_t msg) 690 { 691 struct netmsg_tcp_timer *tmsg = (struct netmsg_tcp_timer *)msg; 692 const struct tcp_timer *tt; 693 struct tcpcb *tp; 694 695 crit_enter(); 696 697 KKASSERT(tmsg->tt_cpuid == mycpuid && tmsg->tt_tcb != NULL); 698 tp = tmsg->tt_tcb; 699 700 /* Save pending tasks and reset the tasks in message */ 701 tmsg->tt_running_tasks = tmsg->tt_tasks; 702 tmsg->tt_prev_tasks = tmsg->tt_tasks; 703 tmsg->tt_tasks = 0; 704 705 /* Reply ASAP */ 706 lwkt_replymsg(&tmsg->tt_msg.lmsg, 0); 707 708 if (tmsg->tt_running_tasks == 0) { 709 /* 710 * All of the timers are cancelled when the message 711 * is pending; bail out. 712 */ 713 crit_exit(); 714 return; 715 } 716 717 for (tt = tcp_timer_handlers; tt->tt_handler != NULL; ++tt) { 718 if ((tmsg->tt_running_tasks & tt->tt_task) == 0) 719 continue; 720 721 tmsg->tt_running_tasks &= ~tt->tt_task; 722 tp = tt->tt_handler(tp); 723 if (tp == NULL) 724 break; 725 726 if (tmsg->tt_running_tasks == 0) /* nothing left to do */ 727 break; 728 } 729 730 crit_exit(); 731 } 732 733 void 734 tcp_create_timermsg(struct tcpcb *tp, struct lwkt_port *msgport) 735 { 736 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 737 738 netmsg_init(&tmsg->tt_msg, NULL, &netisr_adone_rport, 739 MSGF_DROPABLE | MSGF_PRIORITY, tcp_timer_handler); 740 tmsg->tt_cpuid = mycpuid; 741 tmsg->tt_msgport = msgport; 742 tmsg->tt_tcb = tp; 743 tmsg->tt_tasks = 0; 744 } 745 746 void 747 tcp_destroy_timermsg(struct tcpcb *tp) 748 { 749 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 750 751 if (tmsg == NULL || /* listen socket */ 752 tmsg->tt_tcb == NULL) /* only tcp_attach() is called */ 753 return; 754 755 KKASSERT(tmsg->tt_cpuid == mycpuid); 756 757 /* 758 * This message is still pending to be processed; 759 * drop it. Optimized. 760 */ 761 crit_enter(); 762 if ((tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) == 0) { 763 lwkt_dropmsg(&tmsg->tt_msg.lmsg); 764 } 765 crit_exit(); 766 } 767 768 static __inline void 769 tcp_callout_init(struct tcp_callout *tc, uint32_t task) 770 { 771 callout_init_mp(&tc->tc_callout); 772 tc->tc_task = task; 773 } 774 775 void 776 tcp_inittimers(struct tcpcb *tp) 777 { 778 tcp_callout_init(tp->tt_rexmt, TCP_TIMER_REXMT); 779 tcp_callout_init(tp->tt_persist, TCP_TIMER_PERSIST); 780 tcp_callout_init(tp->tt_keep, TCP_TIMER_KEEP); 781 tcp_callout_init(tp->tt_2msl, TCP_TIMER_2MSL); 782 tcp_callout_init(tp->tt_delack, TCP_TIMER_DELACK); 783 } 784