1 #ifdef RCSIDENT 2 static char rcsident[] = "$Header: tcp_procs.c,v 1.32 85/07/31 09:34:27 walsh Exp $"; 3 #endif 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/mbuf.h" 8 #include "../h/socket.h" 9 #include "../h/socketvar.h" 10 #include "../h/syslog.h" 11 12 #include "../net/if.h" 13 #include "../net/route.h" 14 15 #include "../bbnnet/in.h" 16 #include "../bbnnet/in_pcb.h" 17 #include "../bbnnet/in_var.h" 18 #include "../bbnnet/net.h" 19 #include "../bbnnet/fsm.h" 20 #include "../bbnnet/tcp.h" 21 #include "../bbnnet/seq.h" 22 #include "../bbnnet/ip.h" 23 #include "../bbnnet/macros.h" 24 #ifdef HMPTRAPS 25 #include "../bbnnet/hmp_traps.h" 26 #endif 27 28 /* 29 * TCP finite state machine procedures. 30 * 31 * Called from finite state machine action routines, these do most of the work 32 * of the protocol. They in turn call primitive routines (in tcp_prim) to 33 * perform lower level functions. 34 */ 35 36 37 /* 38 * This works cooperatively with t_close for freeing up data on receive/send 39 * buffers. 40 */ 41 tcp_pcbdisconnect(inp) 42 struct inpcb *inp; 43 { 44 register struct tcpcb *tp; 45 46 if (tp = (struct tcpcb *) inp->inp_ppcb) 47 { 48 inp->inp_ppcb = (caddr_t) NULL; 49 50 /* 51 * free all data on receive queues 52 */ 53 { 54 register struct th *t, *next; 55 56 t = tp->t_rcv_next; 57 while (t != (struct th *)tp) 58 { 59 next = t->t_next; 60 m_freem(dtom(t)); 61 t = next; 62 } 63 } 64 { 65 register struct mbuf *m, *next; 66 67 m = tp->t_rcv_unack; 68 while (m != NULL) 69 { 70 next = m->m_act; 71 m_freem(m); 72 m = next; 73 } 74 } 75 76 if (tp->oob_data) 77 m_freem(tp->oob_data); 78 79 if (tp->t_template) 80 m_free(dtom(tp->t_template)); 81 82 m_free(dtom(tp)); 83 } 84 } 85 86 /* 87 * Delete TCB and free all resources used by the connection. Called after 88 * the close protocol is complete. 89 */ 90 t_close(tp, state) 91 register struct tcpcb *tp; 92 short state; 93 { 94 register struct inpcb *inp; 95 96 /* 97 * in_pcbdetach() calls soisdisconnected(), which wakes up the 98 * process if it's sleeping. Need only pass user error code if 99 * appropriate (like ENETRESET) and hope he'll close the file 100 * descriptor. Don't need to clear timers since they're in the 101 * tcpcb to be deleted. 102 */ 103 inp = tp->t_in_pcb; 104 if (!tp->usr_abort) 105 inp->inp_socket->so_error = state; 106 in_pcbdetach(inp, tcp_pcbdisconnect); 107 } 108 109 short max_ack_skipped = 1; 110 111 /* 112 * We are in a position where, perhaps, we should send a TCP segment (packet). 113 * The important decisions are: 114 * 1) How big a segment should we send? This is important since most 115 * overhead occurs at the packet level (interrupts, queueing, 116 * header field checks...) and not at the byte level. 117 * 2) Is it worth it to send this packet? Are we sending enough data 118 * or would we be better off waiting for some more to queue up? 119 * 120 * The above requirements are the point of view when called in response to 121 * a user's write request. We are also called on packet arrival in order 122 * to send an ack (with piggy-backed data), and to respond to window openings 123 * by sending any pent up data. 124 * 125 * Send a TCP segment. Send data from left window edge of send buffer up to 126 * window size or end (whichever is less). Set retransmission timers. 127 * 128 * The Ford/Nagle algorithms might be thought of (if outstanding data, only 129 * send if packet would be large), but they are primarily for telnet and 130 * that doesn't go with ideas in comments down by push. Has idea of tcp 131 * changed since RFC? 132 */ 133 send_tcp(tp, ctl) 134 register struct tcpcb *tp; 135 int ctl; 136 { 137 register sequence last, wind; 138 register int snd_flags; 139 register int len; 140 struct sockbuf *sosnd; 141 int forced, error; 142 int sendalot; 143 144 sosnd = &tp->t_in_pcb->inp_socket->so_snd; 145 sendalot = FALSE; 146 snd_flags = 0; 147 tp->snd_lst = tp->snd_nxt; 148 forced = FALSE; 149 /* 150 * Send SYN if this is first data (ISS) 151 */ 152 if (SEQ_EQ(tp->snd_nxt, tp->iss)) 153 { 154 snd_flags |= T_SYN; 155 tp->snd_lst++; 156 } 157 /* 158 * Get seq # of last datum in send buffer 159 */ 160 last = tp->snd_una; 161 if (!tp->syn_acked) 162 last++; /* don't forget SYN */ 163 last += sosnd->sb_cc; 164 /* 165 * If no data to send in buffer, just do FIN check, otherwise see 166 * how much we should send in segment. 167 */ 168 if (SEQ_GEQ(tp->snd_nxt, last)) 169 { 170 /* 171 * should send FIN? don't unless haven't already sent one 172 */ 173 if (tp->snd_fin && 174 (SEQ_EQ(tp->seq_fin, tp->iss) || 175 SEQ_LEQ(tp->snd_nxt, tp->seq_fin))) 176 { 177 snd_flags |= T_FIN; 178 tp->seq_fin = tp->snd_lst++; 179 } 180 } 181 else if (tp->syn_acked) 182 { 183 /* 184 * We can't send more than we have (SYN + data represented 185 * by last). Nor can we send more than the other end is 186 * prepared to receive (represented by the window in snd_wnd 187 * and wind). 188 * 189 * Only send a segment if there is something in the buffer, 190 * and a non-zero window has been received. 191 */ 192 wind = tp->snd_una + tp->snd_wnd; 193 tp->snd_lst = SEQ_MIN(last, wind); 194 195 /* 196 * Make sure the segment is not larger than the remote end 197 * can handle. Though they may advertise a 4K window, perhaps 198 * they can only fill it 512 bytes at a time due to some 199 * buffering or device driver constraint. 200 * 201 * If we're both on the local net, the maxseg is probably the 202 * mtu of the local network, and this will avoid some IP 203 * fragmentation. 204 * 205 * ">=" so that set sendalot. 206 */ 207 if ((len = tp->snd_lst - tp->snd_nxt) >= tp->t_maxseg) 208 { 209 tp->snd_lst -= len - tp->t_maxseg; 210 sendalot = TRUE; 211 } 212 213 /* 214 * If we're not on the same net or on similar nets immediately 215 * connected by a gateway, the negotiated maxseg may cause 216 * fragmentation. Fragmentation per se is not bad, but 217 * tinygrams can cause problems and are inefficient. So, 218 * send something that if it fragments, will produce reasonably 219 * sized fragments. Avoid excessive fragmentation to reduce 220 * probability datagram fails to reassemble. 221 */ 222 if (tp->t_maxfrag) 223 { 224 len = tp->t_maxfrag*3; 225 if ((tp->snd_lst - tp->snd_nxt) > len) 226 { 227 tp->snd_lst = tp->snd_nxt + len; 228 sendalot = TRUE; 229 } 230 } 231 232 if (SEQ_GT(tp->snd_end, tp->snd_una) && 233 SEQ_LEQ(tp->snd_end, tp->snd_lst)) 234 /* 235 * There is data to send, and it should be PUSHed. 236 * PUSHed segments avoid the SWS algorithm since it 237 * might delay transmission. PUSHed data MUST go 238 * out ASAP. Note: To avoid performance degradation, 239 * bulk data transfers should not have PUSH on. 240 */ 241 snd_flags |= T_PUSH; 242 else if (tp->snd_wnd > 0) 243 { 244 /* 245 * Avoid the silly window syndrome (sending small 246 * packets). Making sure the usable window is at 247 * least some % of the offered window ensures we're 248 * sending a relatively (for this connection) good 249 * sized segment. 250 * 251 * If sbspace(sosnd) == 0, then the user 252 * is blocked for send resources, and we won't be 253 * able to send a larger packet later, so send it now. 254 * (Hmm, still true? How about the wakeup after we 255 * trim the acked data?) 256 * 257 * SWS and persistence interaction 258 * If there is outstanding data, snd_nxt - snd_una 259 * will be > 0, we'll have retransmit timers running 260 * forcing eventual window updates. If there is 261 * no outstanding data, then we'll send some and 262 * start up the retransmit timers. So, any time 263 * we run through this segment of code instead of 264 * the next one, we've got some good timers running. 265 */ 266 if (!tp->rexmt && !tp->ack_due && !tp->snd_fin && 267 !sendalot && 268 sbspace(sosnd) > 0 && 269 ((100*(tp->snd_nxt-tp->snd_una))/tp->snd_wnd) 270 > tp->sws_qff) 271 tp->snd_lst = tp->snd_nxt; 272 } 273 else 274 { 275 /* 276 * We have stuff to send, but can't since the other 277 * end can't handle it right now. We start up the 278 * persistence timer in case their window opening 279 * ack is lost. When the timer goes off, we send 280 * a byte to force a window update. Wait for timer 281 * in order to give him a chance to deal with the 282 * remotely buffered data and send us an update. 283 * (We'll get here on acks that stop rxmit timers 284 * but that contain zero window since remote user 285 * has not picked up data yet.) 286 * 287 * If we're called due to a write() or packet arrival, 288 * this is how we enter the persistence state. If 289 * we're called because the persist timer went off, 290 * the timer is restarted to keep persisting. 291 */ 292 if (tp->t_timers[TPERSIST] == 0) 293 tp->t_timers[TPERSIST] = MIN(TCP_tvMAXPERSIST, 294 MAX(TCP_tvMINPERSIST, tp->t_srtt*3)); 295 296 if (tp->force_one) 297 { 298 /* persist timer went off */ 299 tp->snd_lst = tp->snd_nxt + 1; 300 forced = TRUE; 301 } 302 } 303 304 /* must send FIN and no more data left to send after this */ 305 306 if (tp->snd_fin && !forced && SEQ_EQ(tp->snd_lst, last) && 307 (SEQ_EQ(tp->seq_fin, tp->iss) || 308 SEQ_LEQ(tp->snd_nxt, tp->seq_fin))) 309 { 310 snd_flags |= T_FIN; 311 tp->seq_fin = tp->snd_lst++; 312 } 313 } 314 315 /* 316 * Now, we have determined how large a segment to send if our only 317 * purpose is to get data to the other side. If there is something 318 * to send, do it and update timers for rexmt. 319 */ 320 len = tp->snd_lst - tp->snd_nxt; 321 if (len > 0) 322 { /* then SEQ_LT(tp->snd_nxt, tp->snd_lst) */ 323 324 error = send_pkt (tp, snd_flags, len); 325 326 /* 327 * SEQ_LEQ(snd_nxt, t_xmt_val): if this is a retransmission 328 * of the round-trip-time measuring byte, then remeasure the 329 * round trip time. (Keep rtt from drifting upward on lossy 330 * networks.) 331 * 332 * SEQ_GT(snd_una, t_xmt_val): Measure the rtt if the last 333 * timed byte has been acked. 334 */ 335 if (tp->syn_acked && (SEQ_LEQ(tp->snd_nxt, tp->t_xmt_val) || 336 SEQ_GT(tp->snd_una, tp->t_xmt_val))) 337 { 338 if (tp->t_srtt != 0) 339 tp->t_timers[TXMT] = 0; 340 tp->t_xmt_val = tp->snd_nxt; 341 } 342 343 /* 344 * If the window was full, and we're just forcing a byte 345 * out to try to get a new window, then don't use 346 * retransmission timeouts. The other side can take as 347 * long as it wants to process the data it's currently got. 348 */ 349 if (! forced) 350 { 351 /* 352 * Set timers for retransmission. If we already have 353 * some outstanding data, then don't reset timer. Think 354 * of case where send one byte every 1/4 second and only 355 * first byte is lost. Would need to wait until filled 356 * window before retransmission timer started to decrease 357 * and go off. 358 */ 359 if (tp->t_timers[TREXMT] == 0) 360 tp->t_timers[TREXMT] = tp->t_rxmitime; 361 362 if (tp->t_timers[TREXMTTL] == 0) 363 tp->t_timers[TREXMTTL] = tp->t_rttltimeo; 364 365 /* 366 * and remember that next segment out begins 367 * further into the stream if this one got out. 368 */ 369 if (! error) 370 tp->snd_nxt = tp->snd_lst; 371 } 372 373 #if T_DELACK > 0 374 t_cancel(tp, TDELACK); 375 tp->force_ack = FALSE; 376 tp->ack_skipped = 0; 377 #endif 378 tp->ack_due = FALSE; 379 tp->snd_hi = SEQ_MAX(tp->snd_lst, tp->snd_hi); 380 if (!error) 381 return(TRUE); 382 } 383 384 /* 385 * If ctl, make sure to send something so ACK gets through. Attempt 386 * to reduce ACK traffic by delaying ACKs with no data slightly. 387 * Naive ack traffic can account for about 10% of what the receiving 388 * tcp is doing. 389 * 390 * Bidirectional connection (telnet) => ack piggy backs application's 391 * response. 392 * 393 * Unidirectional connection (ftp) => advertise large enough window 394 * so that either #skipped (tp->ack_skipped) or our estimate of what he 395 * thinks window is cause ack. The estimate assumes most packets get 396 * through. This also assumes that the sender buffers enough to take 397 * advantage of the estimated usable window, so we'll assume a minimum 398 * send buffer provided by his operating system. (Remember, his OS has 399 * to buffer it until we ack it.) 400 * 401 * So, test assumes his send buffer > MINTCPBUF bytes large 402 * and his silly window algorithm cuts in at < 50% of window. 403 * 404 * Use of the fasttimeout facility is a possibility. 405 */ 406 if (ctl == TCP_CTL) 407 { 408 #if T_DELACK > 0 409 if (tp->force_ack || 410 (tp->ack_skipped >= max_ack_skipped) || 411 ((tp->rcv_nxt - tp->lastack) > MIN(MINTCPBUF, tp->rcv_wnd>>1))) 412 { 413 (void) send_pkt(tp, 0, 0); 414 t_cancel(tp, TDELACK); 415 tp->force_ack = FALSE; 416 tp->ack_skipped = 0; 417 tp->ack_due = FALSE; 418 tcpstat.t_ackonly ++; 419 } 420 else 421 { 422 tp->ack_skipped ++; 423 if (tp->t_timers[TDELACK] == 0) 424 tp->t_timers[TDELACK] = T_DELACK; 425 } 426 #else 427 (void) send_pkt(tp, 0, 0); 428 tp->ack_due = FALSE; 429 tcpstat.t_ackonly ++; 430 #endif 431 } 432 return(FALSE); 433 } 434 435 /* 436 * Process incoming ACKs. Remove data from send queue up to acknowledgement. 437 * Also handles round-trip timer for retransmissions and acknowledgement of 438 * SYN, and clears the urgent flag if required. 439 */ 440 441 #ifdef BBNPING 442 #define BBNPING_RESET(inp, len) \ 443 if (len > 0){ \ 444 /* \ 445 * We've sent him NEW data, perhaps by a gateway, that he \ 446 * has successfully received. If that's the case, then \ 447 * we know the route works and we don't have to ping that \ 448 * gateway. \ 449 * \ 450 * see check_ping() \ 451 */ \ 452 register struct rtentry *rt; \ 453 \ 454 if (rt = inp->inp_route.ro_rt) \ 455 if (rt->rt_flags & RTF_GATEWAY) \ 456 rt->irt_pings = (-1); \ 457 } 458 #else 459 #define BBNPING_RESET(x,y) /* */ 460 #endif 461 462 #ifdef MBUF_DEBUG 463 #define LENCHECK \ 464 if ((len > sosnd->sb_cc) || (len < 0)){ \ 465 printf("len %d sb_cc %d flags 0x%x state %d\n", \ 466 len, sosnd->sb_cc, n->t_flags, tp->t_state); \ 467 if (len < 0) \ 468 len = 0; \ 469 else \ 470 len = sosnd->sb_cc; \ 471 } 472 #else 473 #define LENCHECK /* */ 474 #endif 475 476 #define smooth(tp) (((75*(tp)->t_timers[TXMT]) + (125*(tp)->t_srtt)) / 200) 477 478 #define RCV_ACK(tp, n) \ 479 { \ 480 register struct inpcb *inp; \ 481 register struct sockbuf *sosnd; \ 482 register len; \ 483 \ 484 inp = tp->t_in_pcb; \ 485 sosnd = &inp->inp_socket->so_snd; \ 486 len = n->t_ackno - tp->snd_una; \ 487 \ 488 tp->snd_una = n->t_ackno; \ 489 if (SEQ_GT(tp->snd_una, tp->snd_nxt)) \ 490 tp->snd_nxt = tp->snd_una; \ 491 \ 492 /* \ 493 * if urgent data has been acked, reset urgent flag \ 494 */ \ 495 \ 496 if (tp->snd_urg && SEQ_GEQ(tp->snd_una, tp->snd_urp)) \ 497 tp->snd_urg = FALSE; \ 498 \ 499 if (tp->syn_acked) { \ 500 /* if timed message has been acknowledged, use the time to set \ 501 the retransmission time value, exponential decay, 60/40 \ 502 weighted average */ \ 503 \ 504 if (SEQ_GEQ(tp->snd_una, tp->t_xmt_val)) { \ 505 if (tp->t_srtt == 0) \ 506 tp->t_srtt = tp->t_timers[TXMT]; \ 507 else \ 508 tp->t_srtt = smooth(tp); \ 509 tp->t_rxmitime = MIN(TCP_tvRXMAX, \ 510 MAX(TCP_tvRXMIN, (3*tp->t_srtt)/2)); \ 511 } \ 512 } else { \ 513 /* handle ack of opening syn (tell user) */ \ 514 \ 515 if (SEQ_GT(tp->snd_una, tp->iss)) { \ 516 tp->syn_acked = TRUE; \ 517 len--; /* ignore SYN */ \ 518 t_cancel(tp, TINIT); /* cancel init timer */ \ 519 } \ 520 } \ 521 \ 522 /* remove acknowledged data from send buff */ \ 523 if (ack_fin(tp, n)) \ 524 len --; \ 525 LENCHECK \ 526 sbdrop (sosnd, len); \ 527 BBNPING_RESET(inp, len) \ 528 sbwakeup (sosnd); /* wakeup iff > x% of buffering avail? */ \ 529 \ 530 /* handle ack of closing fin */ \ 531 \ 532 if (SEQ_NEQ(tp->seq_fin, tp->iss) && SEQ_GT(tp->snd_una, tp->seq_fin)) \ 533 tp->snd_fin = FALSE; \ 534 t_cancel(tp, TREXMT); /* cancel retransmit timer */ \ 535 t_cancel(tp, TREXMTTL); /* cancel retransmit too long timer */ \ 536 tp->cancelled = TRUE; \ 537 } 538 539 540 /* 541 * Process incoming segments 542 */ 543 rcv_tcp(tp, n, ctl) 544 register struct tcpcb *tp; 545 register struct th *n; 546 int ctl; 547 { 548 int sentsomedata; 549 550 tp->dropped_txt = FALSE; 551 tp->ack_due = FALSE; 552 tp->new_window = FALSE; 553 /* 554 * Process SYN 555 */ 556 if (!tp->syn_rcvd && n->t_flags&T_SYN) 557 { 558 tp->snd_wl = tp->rcv_urp = tp->irs = n->t_seq; 559 tp->rcv_urpend = tp->rcv_urp -1; 560 tp->rcv_nxt = n->t_seq + 1; 561 tp->syn_rcvd = TRUE; 562 tp->ack_due = TRUE; 563 } 564 565 if (tp->syn_rcvd) 566 { 567 /* 568 * Process ACK if data not already acked previously. (Take 569 * ACKed data off send queue, and reset rexmt timers). 570 */ 571 if (n->t_flags&T_ACK && SEQ_GT(n->t_ackno, tp->snd_una)) 572 RCV_ACK(tp, n) 573 574 /* 575 * Check for new window. rcv_ack did not change syn_rcvd. 576 */ 577 if (SEQ_GEQ(n->t_seq, tp->snd_wl)) 578 { 579 tp->snd_wl = n->t_seq; 580 tp->snd_wnd = n->t_win; 581 tp->new_window = TRUE; 582 t_cancel(tp, TPERSIST); /* cancel persist timer */ 583 } 584 } 585 586 /* 587 * For data packets only (vs. ctl), process data and URG. 588 */ 589 if (ctl == TCP_DATA) 590 { 591 /* 592 * Remember how much urgent data for present_data 593 */ 594 if (n->t_flags & T_URG) 595 { 596 /* 597 * if last <= urpend, then is a retransmission 598 * bytes [n->t_seq ... last] are urgent 599 */ 600 register sequence last; 601 602 last = n->t_seq + n->t_urp; 603 if (SEQ_GT(last, tp->rcv_urpend)) 604 { 605 /* 606 * Can only remember one contiguous region. 607 */ 608 if (SEQ_GT(n->t_seq, tp->rcv_urpend+1)) 609 { 610 struct socket *so; 611 612 tp->rcv_urp = n->t_seq; 613 if (tp->oob_data) 614 { 615 m_freem(tp->oob_data); 616 tp->oob_data = NULL; 617 } 618 619 so = tp->t_in_pcb->inp_socket; 620 so->so_oobmark = so->so_rcv.sb_cc + 621 (tp->rcv_urp-tp->rcv_nxt); 622 if (so->so_oobmark == 0) 623 so->so_state |= SS_RCVATMARK; 624 } 625 tp->rcv_urpend = last; 626 } 627 } 628 629 if (n->t_len != 0) 630 rcv_text(tp, n); /* accept and sequence data */ 631 632 /* 633 * Delay extraction of out-of-band data until 634 * present_data() so don't have to worry about 635 * duplication... 636 */ 637 638 #ifdef bsd41 639 /* 640 * Process PUSH, mark end of data chain. 641 * 642 * Not done in 4.2. TCP is a byte stream, without record 643 * boundries, so don't have to mark for sbappend(), which 644 * preserves marks, and soreceive(), which terminates reads 645 * at marks. Data IS pushed nevertheless since soreceive 646 * gives the user all that is available and returns. 647 */ 648 if (n->t_flags&T_PUSH && !tp->dropped_txt && 649 tp->t_rcv_prev != (struct th *)tp) 650 { 651 652 /* Find last mbuf on received data chain and mark */ 653 654 m = dtom(tp->t_rcv_prev); 655 if (m != NULL) 656 { 657 while (m->m_next != NULL) 658 m = m->m_next; 659 m->m_act = (struct mbuf *) 1; 660 } 661 } 662 #endif 663 } 664 /* 665 * Process FIN, check for duplicates and make sure all data is in. 666 */ 667 if (n->t_flags&T_FIN && !tp->dropped_txt) 668 { 669 if (tp->fin_rcvd) 670 tp->ack_due = TRUE; 671 else 672 { 673 /* 674 * Check if we really have FIN 675 * (rcv buf filled in, no drops) 676 */ 677 register sequence last; 678 679 last = firstempty(tp); 680 if ((tp->t_rcv_prev == (struct th *)tp && 681 SEQ_EQ(last, t_end(n)+1)) || 682 SEQ_EQ(last, t_end(tp->t_rcv_prev)+1)) 683 { 684 tp->fin_rcvd = TRUE; 685 uwake(tp->t_in_pcb); 686 } 687 /* 688 * If FIN, then set to ACK: incr rcv_nxt, since FIN 689 * occupies sequence space 690 */ 691 if (tp->fin_rcvd && SEQ_GEQ(tp->rcv_nxt, last)) 692 { 693 tp->rcv_nxt = last + 1; 694 tp->ack_due = TRUE; 695 } 696 } 697 } 698 /* 699 * If ACK required or rcv window has changed, try to send something. 700 */ 701 sentsomedata = FALSE; 702 if (tp->ack_due) 703 sentsomedata = send_tcp(tp, TCP_CTL); 704 else if (tp->new_window) 705 sentsomedata = send_tcp(tp, TCP_DATA); 706 /* 707 * tp->cancelled => retransmit, rttl timers are now zero 708 * 709 * If didn't send any data, might not have retransmit, rttl timers 710 * running. If we still have unACKed data and we turned off 711 * the timers above, then ensure timers are running. 712 */ 713 if (!sentsomedata && is_unacked(tp) && tp->cancelled) 714 { 715 tp->t_timers[TREXMT] = tp->t_rxmitime; 716 tp->t_timers[TREXMTTL] = tp->t_rttltimeo; 717 tp->cancelled = FALSE; 718 } 719 } 720 721 #undef BBNPING_RESET 722 #undef LENCHECK 723 724 /* 725 * Process incoming data. Put the segments on sequencing queue in order, 726 * taking care of overlaps and duplicates. Data is removed from sequence 727 * queue by present_data when sequence is complete (no holes at top). 728 * Drop data that falls outside buffer quota if tight for space. Otherwise, 729 * process and recycle data held in tcp_input. 730 */ 731 rcv_text(tp, t) 732 register struct tcpcb *tp; 733 register struct th *t; 734 { 735 register i; 736 register struct sockbuf *sorcv; 737 register struct mbuf *m; 738 register struct th *q; 739 struct th *p; 740 struct mbuf *n; 741 struct th *savq; 742 int j, oldkeep; 743 sequence last; 744 745 /* throw away any data we have already received */ 746 747 if ((i = tp->rcv_nxt - t->t_seq) > 0) 748 { 749 if (i < t->t_len) 750 { 751 t->t_seq += i; 752 t->t_len -= i; 753 m_adj(dtom(t), i); 754 } 755 else 756 { 757 tp->t_olddata++; 758 tp->ack_due = TRUE; /* send ack just in case */ 759 #ifdef HMPTRAPS 760 /* hmp_trap(T_TCP_DUP, (caddr_t)0,0); */ 761 #endif 762 return; 763 } 764 } 765 766 last = t_end(t); /* last seq # in incoming seg */ 767 768 /* # buffers available to con */ 769 770 sorcv = &tp->t_in_pcb->inp_socket->so_rcv; 771 i = sbspace(sorcv); 772 if (i < 0) 773 i = 0; 774 775 /* enough resources to process segment? used to walk mbuf chain to 776 * count up data bytes. let's be smart and use t_len */ 777 778 j = t->t_len; 779 if (j > i) 780 { 781 782 /* if segment preceeds top of sequencing queue, try to take 783 buffers from bottom of queue */ 784 785 q = tp->t_rcv_next; 786 if (q != (struct th *)tp && SEQ_LT(tp->rcv_nxt, q->t_seq) && 787 SEQ_LT(t->t_seq, q->t_seq)) 788 789 for (p = tp->t_rcv_prev; i < j && p != (struct th *)tp;) 790 { 791 savq = p->t_prev; 792 TCP_DEQ(p, tp); 793 #ifdef HMPTRAPS 794 /* hmp_trap(T_TCP_UDROP, (caddr_t)0,0); */ 795 #endif 796 for (m = dtom(p); m != NULL; m = m_free(m)) 797 i += m->m_len; 798 p = savq; 799 } 800 801 /* if still not enough room, drop text from end of new segment */ 802 803 if (j > i) 804 { 805 806 for (m = dtom(t); i > 0 && m != NULL; m = m->m_next) 807 i -= m->m_len; 808 809 while (m != NULL) 810 { 811 t->t_len -= m->m_len; 812 last -= m->m_len; 813 m->m_len = 0; 814 m = m->m_next; 815 } 816 tp->dropped_txt = TRUE; 817 #ifdef HMPTRAPS 818 /* hmp_trap(T_TCP_RDROP, (caddr_t)0,0); */ 819 #endif 820 if (SEQ_LT(last, t->t_seq)) 821 return; 822 } 823 } 824 825 /* merge incoming data into the sequence queue */ 826 827 q = tp->t_rcv_next; /* -> top of sequencing queue */ 828 829 /* skip frags which new doesn't overlap at end */ 830 831 while ((q != (struct th *)tp) && SEQ_GT(t->t_seq, t_end(q))) 832 q = q->t_next; 833 834 if (q == (struct th *)tp) 835 { /* frag at end of chain */ 836 837 if (SEQ_GEQ(last, tp->rcv_nxt)) 838 { 839 tcp_net_keep = TRUE; 840 TCP_ENQ(t, tp->t_rcv_prev, tp); 841 } 842 843 } 844 else 845 { 846 847 #ifdef HMPTRAPS 848 /* we've received an out-of-order packet: trap! */ 849 850 /* hmp_trap(T_TCP_ORDER, (caddr_t)0,0); */ 851 852 #endif 853 /* frag doesn't overlap any on chain */ 854 855 if (SEQ_LT(last, q->t_seq)) 856 { 857 tcp_net_keep = TRUE; 858 TCP_ENQ(t, q->t_prev, tp); 859 860 /* new overlaps beginning of next frag only */ 861 862 } 863 else if (SEQ_LT(last, t_end(q))) 864 { 865 if ((i = last - q->t_seq + 1) < t->t_len) 866 { 867 t->t_len -= i; 868 m_adj(dtom(t), -i); 869 tcp_net_keep = TRUE; 870 TCP_ENQ(t, q->t_prev, tp); 871 } 872 873 /* new overlaps end of previous frag */ 874 875 } 876 else 877 { 878 savq = q; 879 if (SEQ_LEQ(t->t_seq, q->t_seq)) 880 { /* complete cover */ 881 savq = q->t_prev; 882 TCP_DEQ(q, tp); 883 m_freem(dtom(q)); 884 885 } 886 else 887 { /* overlap */ 888 if ((i = t_end(q) - t->t_seq + 1) < t->t_len) 889 { 890 t->t_seq += i; 891 t->t_len -= i; 892 m_adj(dtom(t), i); 893 } 894 else 895 t->t_len = 0; 896 } 897 898 /* new overlaps at beginning of successor frags */ 899 900 q = savq->t_next; 901 while ((q != (struct th *)tp) && (t->t_len != 0) && 902 SEQ_LEQ(q->t_seq, last)) 903 904 /* complete cover */ 905 906 if (SEQ_LEQ(t_end(q), last)) 907 { 908 p = q->t_next; 909 TCP_DEQ(q, tp); 910 m_freem(dtom(q)); 911 q = p; 912 } 913 else 914 { /* overlap */ 915 if ((i = last-q->t_seq+1) < t->t_len) 916 { 917 t->t_len -= i; 918 m_adj(dtom(t), -i); 919 } 920 else 921 t->t_len = 0; 922 break; 923 } 924 925 /* enqueue whatever is left of new before successors */ 926 927 if (t->t_len != 0) 928 { 929 tcp_net_keep = TRUE; 930 TCP_ENQ(t, savq, tp); 931 } 932 } 933 } 934 935 /* set to ack completed data (no gaps) */ 936 937 FIRSTEMPTY(tp, tp->rcv_nxt); 938 tp->ack_due = TRUE; 939 940 /* if any room remaining in rcv buf, take any unprocessed 941 messages and schedule for later processing */ 942 943 if ((m = tp->t_rcv_unack) != NULL && (i = sbspace(sorcv)) > 0) 944 do 945 { 946 947 /* schedule work request */ 948 949 t = mtod(m, struct th *); 950 j = (t->t_off << TCP_OFFSHIFT) + sizeof(struct ip); 951 m->m_off += j; 952 m->m_len -= j; 953 tp->t_rcv_unack = m->m_act; 954 m->m_act = (struct mbuf *)0; 955 oldkeep = tcp_net_keep; 956 tcpstat.t_unack++; 957 w_alloc(INRECV, 0, tp, t); 958 tcp_net_keep = oldkeep; 959 960 /* remaining buffer space */ 961 962 for (n = m; n != NULL; n = n->m_next) 963 i -= n->m_len; 964 } 965 while ((m = tp->t_rcv_unack) != NULL && i > 0); 966 } 967 968 /* 969 * Send a reset segment 970 */ 971 send_rst(tp, n) 972 register struct tcpcb *tp; 973 register struct th *n; 974 { 975 register struct inpcb *inp; 976 struct in_addr src, dst; 977 u_short port; 978 int temp_rst; 979 980 /* don't send a reset in response to a reset */ 981 982 if (n->t_flags&T_RST || (inp = tp->t_in_pcb) == NULL) 983 return; 984 985 tp->snd_rst = TRUE; 986 temp_rst = FALSE; 987 if (n->t_flags&T_ACK) 988 tp->snd_nxt = n->t_ackno; 989 990 /* if reset required from "wildcard" listener, take addresses and 991 port from incoming packet */ 992 993 if (inp->inp_laddr.s_addr == 0 || inp->inp_faddr.s_addr == 0 || 994 inp->inp_fport == 0) 995 { 996 src = inp->inp_laddr; 997 dst = inp->inp_faddr; 998 port = inp->inp_fport; 999 inp->inp_laddr = n->t_d; 1000 inp->inp_faddr = n->t_s; 1001 inp->inp_fport = n->t_src; 1002 tp->t_template = tcp_template(tp); 1003 temp_rst = TRUE; 1004 } 1005 tp->syn_rcvd = FALSE; 1006 if (tp->t_template) 1007 (void) send_pkt(tp, 0, 0); 1008 else 1009 printf("send_rst: no template\n"); 1010 tp->ack_due = FALSE; 1011 tp->snd_rst = FALSE; 1012 #if T_DELACK > 0 1013 tp->force_ack = FALSE; 1014 t_cancel(tp, TDELACK); 1015 tp->ack_skipped = 0; 1016 #endif 1017 1018 /* restore "wildcard" addresses */ 1019 1020 if (temp_rst) 1021 { 1022 inp->inp_laddr = src; 1023 inp->inp_faddr = dst; 1024 inp->inp_fport = port; 1025 tp->snd_nxt = tp->iss; 1026 if (inp->inp_route.ro_rt != NULL) 1027 { 1028 rtfree(inp->inp_route.ro_rt); 1029 inp->inp_route.ro_rt = NULL; 1030 } 1031 if (tp->t_template) 1032 { 1033 m_free(dtom(tp->t_template)); 1034 tp->t_template = NULL; 1035 } 1036 } 1037 } 1038 1039 struct mbuf *extract_oob(tp, mp, sorcv) 1040 struct tcpcb *tp; 1041 struct mbuf *mp; 1042 struct sockbuf *sorcv; 1043 { 1044 struct socket *so; 1045 struct mbuf *top, *here, *m; 1046 int off, len, tmp; 1047 1048 m = mp; 1049 so = tp->t_in_pcb->inp_socket; 1050 /* 1051 * skip over bytes that preceed out of band data. 1052 */ 1053 if ((off = so->so_oobmark - sorcv->sb_cc) < 0) 1054 { 1055 log(LOG_INFO, "extract_oob: neg off\n"); 1056 tp->rcv_urpend = tp->rcv_urp = tp->irs; 1057 return (mp); 1058 } 1059 1060 while (m && (off > 0)) 1061 { 1062 if (m->m_len <= off) 1063 { 1064 off -= m->m_len; 1065 m = m->m_next; 1066 } 1067 else 1068 break; 1069 } 1070 1071 if (!m) 1072 return (mp); 1073 1074 /* 1075 * copy out of band data. removing it from input stream. 1076 */ 1077 len = tp->rcv_urpend - tp->rcv_urp + 1; /* # urgent bytes */ 1078 top = here = NULL; 1079 while (m && (len > 0)) 1080 { 1081 char *p; 1082 struct mbuf *newm; 1083 int dropped; 1084 1085 tmp = MIN(m->m_len - off, len); 1086 /* tmp == # urgent bytes in this mbuf */ 1087 len -= tmp; 1088 tp->rcv_urp += tmp; 1089 1090 p = mtod(m, caddr_t) + off; /* points at first urgent byte */ 1091 dropped = FALSE; 1092 1093 while (tmp > 0) 1094 { 1095 unsigned nbytes; 1096 1097 /* in case this mbuf uses pages */ 1098 nbytes = MIN(tmp, MLEN); 1099 1100 if (! dropped) 1101 { 1102 if (newm = m_get(M_WAIT, MT_DATA)) 1103 { 1104 bcopy (p, mtod(newm, char *), nbytes); 1105 newm->m_len = nbytes; 1106 1107 if (!top) 1108 top = here = newm; 1109 else 1110 { 1111 here->m_next = newm; 1112 here = here->m_next; 1113 } 1114 } 1115 else 1116 /* potential unreliability */ 1117 dropped = TRUE; 1118 } 1119 1120 bcopy(p+nbytes, p, (unsigned)(m->m_len -off -nbytes)); 1121 m->m_len -= nbytes; 1122 tmp -= nbytes; 1123 } 1124 1125 if (m->m_len <= 0) 1126 { 1127 /* 1128 * So soreceive never sees a zero length mbuf 1129 * with m_act set. (PUSHED URGENT data packet) 1130 */ 1131 if (m == mp) 1132 mp = m = m_free(m); 1133 else 1134 m = m_free(m); 1135 } 1136 else 1137 m = m->m_next; 1138 1139 off = 0; 1140 } 1141 1142 if (top) 1143 { 1144 if (tp->oob_data) 1145 m_cat (tp->oob_data, top); 1146 else 1147 tp->oob_data = top; 1148 sohasoutofband(so); 1149 } 1150 1151 return (mp); 1152 } 1153 1154 /* 1155 * Accept data for the user to receive. Moves data from sequenced tcp 1156 * segments from the sequencing queue to the user's receive queue (in the 1157 * ucb). Observes locking on receive queue. 1158 */ 1159 present_data(tp) 1160 register struct tcpcb *tp; 1161 { 1162 PRESENT_DATA(tp) 1163 } 1164