1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 63 * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.14 2003/02/03 02:33:41 hsu Exp $ 64 */ 65 66 #include "opt_inet6.h" 67 #include "opt_tcpdebug.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/mbuf.h> 73 #include <sys/sysctl.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/thread.h> 78 #include <sys/globaldata.h> 79 #include <sys/thread2.h> 80 #include <sys/msgport2.h> 81 82 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 83 84 #include <net/route.h> 85 #include <net/netmsg2.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/in_pcb.h> 90 #ifdef INET6 91 #include <netinet6/in6_pcb.h> 92 #endif 93 #include <netinet/ip_var.h> 94 #include <netinet/tcp.h> 95 #include <netinet/tcp_fsm.h> 96 #include <netinet/tcp_seq.h> 97 #include <netinet/tcp_timer.h> 98 #include <netinet/tcp_timer2.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #ifdef TCPDEBUG 102 #include <netinet/tcp_debug.h> 103 #endif 104 105 #define TCP_TIMER_REXMT 0x01 106 #define TCP_TIMER_PERSIST 0x02 107 #define TCP_TIMER_KEEP 0x04 108 #define TCP_TIMER_2MSL 0x08 109 #define TCP_TIMER_DELACK 0x10 110 111 static struct tcpcb *tcp_timer_rexmt_handler(struct tcpcb *); 112 static struct tcpcb *tcp_timer_persist_handler(struct tcpcb *); 113 static struct tcpcb *tcp_timer_keep_handler(struct tcpcb *); 114 static struct tcpcb *tcp_timer_2msl_handler(struct tcpcb *); 115 static struct tcpcb *tcp_timer_delack_handler(struct tcpcb *); 116 117 static const struct tcp_timer { 118 uint32_t tt_task; 119 struct tcpcb *(*tt_handler)(struct tcpcb *); 120 } tcp_timer_handlers[] = { 121 { TCP_TIMER_DELACK, tcp_timer_delack_handler }, 122 { TCP_TIMER_REXMT, tcp_timer_rexmt_handler }, 123 { TCP_TIMER_PERSIST, tcp_timer_persist_handler }, 124 { TCP_TIMER_KEEP, tcp_timer_keep_handler }, 125 { TCP_TIMER_2MSL, tcp_timer_2msl_handler }, 126 { 0, NULL } 127 }; 128 129 static int 130 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 131 { 132 int error, s, tt; 133 134 tt = *(int *)oidp->oid_arg1; 135 s = (int)((int64_t)tt * 1000 / hz); 136 137 error = sysctl_handle_int(oidp, &s, 0, req); 138 if (error || !req->newptr) 139 return (error); 140 141 tt = (int)((int64_t)s * hz / 1000); 142 if (tt < 1) 143 return (EINVAL); 144 145 *(int *)oidp->oid_arg1 = tt; 146 return (0); 147 } 148 149 int tcp_keepinit; 150 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 151 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "Time to establish TCP connection"); 152 153 int tcp_keepidle; 154 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 155 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "Time before TCP keepalive probes begin"); 156 157 int tcp_keepintvl; 158 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 159 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "Time between TCP keepalive probes"); 160 161 int tcp_delacktime; 162 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, 163 CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 164 "Time before a delayed ACK is sent"); 165 166 int tcp_msl; 167 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 168 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 169 170 int tcp_rexmit_min; 171 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 172 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout"); 173 174 int tcp_rexmit_slop; 175 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 176 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 177 "Retransmission Timer Slop"); 178 179 static int always_keepalive = 1; 180 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 181 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 182 183 /* max idle probes */ 184 int tcp_keepcnt = TCPTV_KEEPCNT; 185 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, 186 &tcp_keepcnt, 0, "Maximum number of keepalive probes to be sent"); 187 188 static int tcp_do_eifel_response = 1; 189 SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel_response, CTLFLAG_RW, 190 &tcp_do_eifel_response, 0, "Eifel response algorithm (RFC 4015)"); 191 192 int tcp_eifel_rtoinc = 2; 193 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, eifel_rtoinc, CTLTYPE_INT|CTLFLAG_RW, 194 &tcp_eifel_rtoinc, 0, sysctl_msec_to_ticks, "I", 195 "Eifel response RTO increment"); 196 197 /* max idle time in persist */ 198 int tcp_maxpersistidle; 199 200 /* 201 * Cancel all timers for TCP tp. 202 */ 203 void 204 tcp_canceltimers(struct tcpcb *tp) 205 { 206 tcp_callout_stop(tp, tp->tt_2msl); 207 tcp_callout_stop(tp, tp->tt_persist); 208 tcp_callout_stop(tp, tp->tt_keep); 209 tcp_callout_stop(tp, tp->tt_rexmt); 210 } 211 212 /* 213 * Caller should be in critical section 214 */ 215 static void 216 tcp_send_timermsg(struct tcpcb *tp, uint32_t task) 217 { 218 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 219 220 KKASSERT(tmsg != NULL && tmsg->tt_cpuid == mycpuid && 221 tmsg->tt_tcb != NULL); 222 223 tmsg->tt_tasks |= task; 224 if (tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) 225 lwkt_sendmsg_oncpu(tmsg->tt_msgport, &tmsg->tt_msg.lmsg); 226 } 227 228 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 229 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 230 231 int tcp_syn_backoff_low[TCP_MAXRXTSHIFT + 1] = 232 { 1, 1, 2, 4, 8, 8, 16, 16, 32, 64, 64, 64, 64 }; 233 234 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 235 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 236 237 static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 238 239 /* Caller should be in critical section */ 240 static struct tcpcb * 241 tcp_timer_delack_handler(struct tcpcb *tp) 242 { 243 tp->t_flags |= TF_ACKNOW; 244 tcpstat.tcps_delack++; 245 tcp_output(tp); 246 return tp; 247 } 248 249 /* 250 * TCP timer processing. 251 */ 252 void 253 tcp_timer_delack(void *xtp) 254 { 255 struct tcpcb *tp = xtp; 256 struct callout *co = &tp->tt_delack->tc_callout; 257 258 crit_enter(); 259 if (callout_pending(co) || !callout_active(co)) { 260 crit_exit(); 261 return; 262 } 263 callout_deactivate(co); 264 tcp_send_timermsg(tp, TCP_TIMER_DELACK); 265 crit_exit(); 266 } 267 268 /* Caller should be in critical section */ 269 static struct tcpcb * 270 tcp_timer_2msl_handler(struct tcpcb *tp) 271 { 272 #ifdef TCPDEBUG 273 int ostate; 274 #endif 275 276 #ifdef TCPDEBUG 277 ostate = tp->t_state; 278 #endif 279 /* 280 * 2 MSL timeout in shutdown went off. If we're closed but 281 * still waiting for peer to close and connection has been idle 282 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 283 * control block. Otherwise, check again in a bit. 284 */ 285 if (tp->t_state != TCPS_TIME_WAIT && 286 (ticks - tp->t_rcvtime) <= tp->t_maxidle) { 287 tcp_callout_reset(tp, tp->tt_2msl, tp->t_keepintvl, 288 tcp_timer_2msl); 289 } else { 290 tp = tcp_close(tp); 291 } 292 293 #ifdef TCPDEBUG 294 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 295 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 296 #endif 297 return tp; 298 } 299 300 void 301 tcp_timer_2msl(void *xtp) 302 { 303 struct tcpcb *tp = xtp; 304 struct callout *co = &tp->tt_2msl->tc_callout; 305 306 crit_enter(); 307 if (callout_pending(co) || !callout_active(co)) { 308 crit_exit(); 309 return; 310 } 311 callout_deactivate(co); 312 tcp_send_timermsg(tp, TCP_TIMER_2MSL); 313 crit_exit(); 314 } 315 316 /* Caller should be in critical section */ 317 static struct tcpcb * 318 tcp_timer_keep_handler(struct tcpcb *tp) 319 { 320 struct tcptemp *t_template; 321 #ifdef TCPDEBUG 322 int ostate = tp->t_state; 323 #endif 324 325 /* 326 * Keep-alive timer went off; send something 327 * or drop connection if idle for too long. 328 */ 329 tcpstat.tcps_keeptimeo++; 330 if (tp->t_state < TCPS_ESTABLISHED) 331 goto dropit; 332 if ((always_keepalive || (tp->t_flags & TF_KEEPALIVE) || 333 (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE)) && 334 tp->t_state <= TCPS_CLOSING) { 335 if ((ticks - tp->t_rcvtime) >= tp->t_keepidle + tp->t_maxidle) 336 goto dropit; 337 /* 338 * Send a packet designed to force a response 339 * if the peer is up and reachable: 340 * either an ACK if the connection is still alive, 341 * or an RST if the peer has closed the connection 342 * due to timeout or reboot. 343 * Using sequence number tp->snd_una-1 344 * causes the transmitted zero-length segment 345 * to lie outside the receive window; 346 * by the protocol spec, this requires the 347 * correspondent TCP to respond. 348 */ 349 tcpstat.tcps_keepprobe++; 350 t_template = tcp_maketemplate(tp); 351 if (t_template) { 352 tcp_respond(tp, t_template->tt_ipgen, 353 &t_template->tt_t, NULL, 354 tp->rcv_nxt, tp->snd_una - 1, 0); 355 tcp_freetemplate(t_template); 356 } 357 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepintvl, 358 tcp_timer_keep); 359 } else { 360 tcp_callout_reset(tp, tp->tt_keep, tp->t_keepidle, 361 tcp_timer_keep); 362 } 363 364 #ifdef TCPDEBUG 365 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 366 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 367 #endif 368 return tp; 369 370 dropit: 371 tcpstat.tcps_keepdrops++; 372 tp = tcp_drop(tp, ETIMEDOUT); 373 374 #ifdef TCPDEBUG 375 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 376 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 377 #endif 378 return tp; 379 } 380 381 void 382 tcp_timer_keep(void *xtp) 383 { 384 struct tcpcb *tp = xtp; 385 struct callout *co = &tp->tt_keep->tc_callout; 386 387 crit_enter(); 388 if (callout_pending(co) || !callout_active(co)) { 389 crit_exit(); 390 return; 391 } 392 callout_deactivate(co); 393 tcp_send_timermsg(tp, TCP_TIMER_KEEP); 394 crit_exit(); 395 } 396 397 /* Caller should be in critical section */ 398 static struct tcpcb * 399 tcp_timer_persist_handler(struct tcpcb *tp) 400 { 401 #ifdef TCPDEBUG 402 int ostate; 403 #endif 404 405 #ifdef TCPDEBUG 406 ostate = tp->t_state; 407 #endif 408 /* 409 * Persistance timer into zero window. 410 * Force a byte to be output, if possible. 411 */ 412 tcpstat.tcps_persisttimeo++; 413 /* 414 * Hack: if the peer is dead/unreachable, we do not 415 * time out if the window is closed. After a full 416 * backoff, drop the connection if the idle time 417 * (no responses to probes) reaches the maximum 418 * backoff that we would use if retransmitting. 419 */ 420 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 421 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 422 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 423 tcpstat.tcps_persistdrop++; 424 tp = tcp_drop(tp, ETIMEDOUT); 425 goto out; 426 } 427 tcp_setpersist(tp); 428 tp->t_flags |= TF_FORCE; 429 tcp_output(tp); 430 tp->t_flags &= ~TF_FORCE; 431 432 out: 433 #ifdef TCPDEBUG 434 if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 435 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 436 #endif 437 return tp; 438 } 439 440 void 441 tcp_timer_persist(void *xtp) 442 { 443 struct tcpcb *tp = xtp; 444 struct callout *co = &tp->tt_persist->tc_callout; 445 446 crit_enter(); 447 if (callout_pending(co) || !callout_active(co)){ 448 crit_exit(); 449 return; 450 } 451 callout_deactivate(co); 452 tcp_send_timermsg(tp, TCP_TIMER_PERSIST); 453 crit_exit(); 454 } 455 456 void 457 tcp_save_congestion_state(struct tcpcb *tp) 458 { 459 /* 460 * Record connection's current states so that they could be 461 * recovered, if this turns out to be a spurious retransmit. 462 */ 463 tp->snd_cwnd_prev = tp->snd_cwnd; 464 tp->snd_wacked_prev = tp->snd_wacked; 465 tp->snd_ssthresh_prev = tp->snd_ssthresh; 466 tp->snd_recover_prev = tp->snd_recover; 467 468 /* 469 * State for Eifel response after spurious timeout retransmit 470 * is detected. We save the current value of snd_max even if 471 * we are called from fast retransmit code, so if RTO needs 472 * rebase, it will be rebased using the RTT of segment that 473 * is not sent during possible congestion. 474 */ 475 tp->snd_max_prev = tp->snd_max; 476 477 if (IN_FASTRECOVERY(tp)) 478 tp->rxt_flags |= TRXT_F_WASFRECOVERY; 479 else 480 tp->rxt_flags &= ~TRXT_F_WASFRECOVERY; 481 if (tp->t_flags & TF_RCVD_TSTMP) { 482 /* States for Eifel detection */ 483 tp->t_rexmtTS = ticks; 484 tp->rxt_flags |= TRXT_F_FIRSTACCACK; 485 } 486 #ifdef later 487 tcp_sack_save_scoreboard(&tp->scb); 488 #endif 489 } 490 491 void 492 tcp_revert_congestion_state(struct tcpcb *tp) 493 { 494 tp->snd_cwnd = tp->snd_cwnd_prev; 495 tp->snd_wacked = tp->snd_wacked_prev; 496 tp->snd_ssthresh = tp->snd_ssthresh_prev; 497 tp->snd_recover = tp->snd_recover_prev; 498 if (tp->rxt_flags & TRXT_F_WASFRECOVERY) 499 ENTER_FASTRECOVERY(tp); 500 if (tp->rxt_flags & TRXT_F_FASTREXMT) { 501 ++tcpstat.tcps_sndfastrexmitbad; 502 if (tp->rxt_flags & TRXT_F_EARLYREXMT) 503 ++tcpstat.tcps_sndearlyrexmitbad; 504 } else { 505 ++tcpstat.tcps_sndrtobad; 506 tp->snd_last = ticks; 507 if (tcp_do_eifel_response) 508 tp->rxt_flags |= TRXT_F_REBASERTO; 509 } 510 tp->t_badrxtwin = 0; 511 tp->t_rxtshift = 0; 512 tp->snd_nxt = tp->snd_max; 513 #ifdef later 514 tcp_sack_revert_scoreboard(&tp->scb, tp->snd_una); 515 #endif 516 } 517 518 /* Caller should be in critical section */ 519 static struct tcpcb * 520 tcp_timer_rexmt_handler(struct tcpcb *tp) 521 { 522 int rexmt; 523 #ifdef TCPDEBUG 524 int ostate; 525 #endif 526 527 #ifdef TCPDEBUG 528 ostate = tp->t_state; 529 #endif 530 /* 531 * Retransmission timer went off. Message has not 532 * been acked within retransmit interval. Back off 533 * to a longer retransmit interval and retransmit one segment. 534 */ 535 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 536 tp->t_rxtshift = TCP_MAXRXTSHIFT; 537 tcpstat.tcps_timeoutdrop++; 538 tp = tcp_drop(tp, tp->t_softerror ? 539 tp->t_softerror : ETIMEDOUT); 540 goto out; 541 } 542 if (tp->t_rxtshift == 1) { 543 /* 544 * First retransmit. 545 */ 546 547 /* 548 * State for "RTT based spurious timeout retransmit detection" 549 * 550 * RTT based spurious timeout retransmit detection: 551 * A retransmit is considered spurious if an ACK for this 552 * segment is received within RTT/2 interval; the assumption 553 * here is that the ACK was already in flight. See 554 * "On Estimating End-to-End Network Path Properties" by 555 * Allman and Paxson for more details. 556 */ 557 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 558 559 /* 560 * States for Eifel response after spurious timeout retransmit 561 * is detected. 562 */ 563 tp->t_rxtcur_prev = tp->t_rxtcur; 564 tp->t_srtt_prev = tp->t_srtt + 565 (tcp_eifel_rtoinc << TCP_RTT_SHIFT); 566 tp->t_rttvar_prev = tp->t_rttvar; 567 568 tcp_save_congestion_state(tp); 569 tp->rxt_flags &= ~(TRXT_F_FASTREXMT | TRXT_F_EARLYREXMT | 570 TRXT_F_REBASERTO); 571 } 572 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 573 /* 574 * Record the time that we spent in SYN or SYN|ACK 575 * retransmition. 576 * 577 * Needed by RFC3390 and RFC6298. 578 */ 579 tp->t_rxtsyn += tp->t_rxtcur; 580 } 581 /* Throw away SACK blocks on a RTO, as specified by RFC2018. */ 582 tcp_sack_discard(tp); 583 tcpstat.tcps_rexmttimeo++; 584 if (tp->t_state == TCPS_SYN_SENT) { 585 if (tcp_low_rtobase) { 586 rexmt = TCP_REXMTVAL(tp) * 587 tcp_syn_backoff_low[tp->t_rxtshift]; 588 } else { 589 rexmt = TCP_REXMTVAL(tp) * 590 tcp_syn_backoff[tp->t_rxtshift]; 591 } 592 } else { 593 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 594 } 595 TCPT_RANGESET(tp->t_rxtcur, rexmt, 596 tp->t_rttmin, TCPTV_REXMTMAX); 597 /* 598 * If losing, let the lower level know and try for 599 * a better route. Also, if we backed off this far, 600 * our srtt estimate is probably bogus. Clobber it 601 * so we'll take the next rtt measurement as our srtt; 602 * move the current srtt into rttvar to keep the current 603 * retransmit times until then. 604 */ 605 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 606 #ifdef INET6 607 if (INP_ISIPV6(tp->t_inpcb)) 608 in6_losing(tp->t_inpcb); 609 else 610 #endif 611 in_losing(tp->t_inpcb); 612 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 613 tp->t_srtt = 0; 614 } 615 tp->snd_nxt = tp->snd_una; 616 tp->snd_recover = tp->snd_max; 617 /* 618 * Force a segment to be sent. 619 */ 620 tp->t_flags |= TF_ACKNOW; 621 /* 622 * If timing a segment in this window, stop the timer. 623 */ 624 tp->t_rtttime = 0; 625 /* 626 * Close the congestion window down to one segment 627 * (we'll open it by one segment for each ack we get). 628 * Since we probably have a window's worth of unacked 629 * data accumulated, this "slow start" keeps us from 630 * dumping all that data as back-to-back packets (which 631 * might overwhelm an intermediate gateway). 632 * 633 * There are two phases to the opening: Initially we 634 * open by one mss on each ack. This makes the window 635 * size increase exponentially with time. If the 636 * window is larger than the path can handle, this 637 * exponential growth results in dropped packet(s) 638 * almost immediately. To get more time between 639 * drops but still "push" the network to take advantage 640 * of improving conditions, we switch from exponential 641 * to linear window opening at some threshhold size. 642 * For a threshhold, we use half the current window 643 * size, truncated to a multiple of the mss. 644 * 645 * (the minimum cwnd that will give us exponential 646 * growth is 2 mss. We don't allow the threshhold 647 * to go below this.) 648 */ 649 { 650 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 651 652 if (win < 2) 653 win = 2; 654 tp->snd_cwnd = tp->t_maxseg; 655 tp->snd_wacked = 0; 656 tp->snd_ssthresh = win * tp->t_maxseg; 657 tp->t_dupacks = 0; 658 } 659 EXIT_FASTRECOVERY(tp); 660 tcp_output(tp); 661 662 out: 663 #ifdef TCPDEBUG 664 if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 665 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 666 #endif 667 return tp; 668 } 669 670 void 671 tcp_timer_rexmt(void *xtp) 672 { 673 struct tcpcb *tp = xtp; 674 struct callout *co = &tp->tt_rexmt->tc_callout; 675 676 crit_enter(); 677 if (callout_pending(co) || !callout_active(co)) { 678 crit_exit(); 679 return; 680 } 681 callout_deactivate(co); 682 tcp_send_timermsg(tp, TCP_TIMER_REXMT); 683 crit_exit(); 684 } 685 686 static void 687 tcp_timer_handler(netmsg_t msg) 688 { 689 struct netmsg_tcp_timer *tmsg = (struct netmsg_tcp_timer *)msg; 690 const struct tcp_timer *tt; 691 struct tcpcb *tp; 692 693 crit_enter(); 694 695 KKASSERT(tmsg->tt_cpuid == mycpuid && tmsg->tt_tcb != NULL); 696 tp = tmsg->tt_tcb; 697 698 /* Save pending tasks and reset the tasks in message */ 699 tmsg->tt_running_tasks = tmsg->tt_tasks; 700 tmsg->tt_prev_tasks = tmsg->tt_tasks; 701 tmsg->tt_tasks = 0; 702 703 /* Reply ASAP */ 704 lwkt_replymsg(&tmsg->tt_msg.lmsg, 0); 705 706 if (tmsg->tt_running_tasks == 0) { 707 /* 708 * All of the timers are cancelled when the message 709 * is pending; bail out. 710 */ 711 crit_exit(); 712 return; 713 } 714 715 for (tt = tcp_timer_handlers; tt->tt_handler != NULL; ++tt) { 716 if ((tmsg->tt_running_tasks & tt->tt_task) == 0) 717 continue; 718 719 tmsg->tt_running_tasks &= ~tt->tt_task; 720 tp = tt->tt_handler(tp); 721 if (tp == NULL) 722 break; 723 724 if (tmsg->tt_running_tasks == 0) /* nothing left to do */ 725 break; 726 } 727 728 crit_exit(); 729 } 730 731 void 732 tcp_create_timermsg(struct tcpcb *tp, struct lwkt_port *msgport) 733 { 734 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 735 736 netmsg_init(&tmsg->tt_msg, NULL, &netisr_adone_rport, 737 MSGF_DROPABLE | MSGF_PRIORITY, tcp_timer_handler); 738 tmsg->tt_cpuid = mycpuid; 739 tmsg->tt_msgport = msgport; 740 tmsg->tt_tcb = tp; 741 tmsg->tt_tasks = 0; 742 } 743 744 void 745 tcp_destroy_timermsg(struct tcpcb *tp) 746 { 747 struct netmsg_tcp_timer *tmsg = tp->tt_msg; 748 749 if (tmsg == NULL || /* listen socket */ 750 tmsg->tt_tcb == NULL) /* only tcp_attach() is called */ 751 return; 752 753 KKASSERT(tmsg->tt_cpuid == mycpuid); 754 755 /* 756 * This message is still pending to be processed; 757 * drop it. Optimized. 758 */ 759 crit_enter(); 760 if ((tmsg->tt_msg.lmsg.ms_flags & MSGF_DONE) == 0) { 761 lwkt_dropmsg(&tmsg->tt_msg.lmsg); 762 } 763 crit_exit(); 764 } 765 766 static __inline void 767 tcp_callout_init(struct tcp_callout *tc, uint32_t task) 768 { 769 callout_init_mp(&tc->tc_callout); 770 tc->tc_task = task; 771 } 772 773 void 774 tcp_inittimers(struct tcpcb *tp) 775 { 776 tcp_callout_init(tp->tt_rexmt, TCP_TIMER_REXMT); 777 tcp_callout_init(tp->tt_persist, TCP_TIMER_PERSIST); 778 tcp_callout_init(tp->tt_keep, TCP_TIMER_KEEP); 779 tcp_callout_init(tp->tt_2msl, TCP_TIMER_2MSL); 780 tcp_callout_init(tp->tt_delack, TCP_TIMER_DELACK); 781 } 782