1 #ifdef RCSIDENT 2 static char rcsident[] = "$Header: tcp_input.c,v 1.25 85/07/31 09:33:47 walsh Exp $"; 3 #endif 4 5 #include "../h/param.h" 6 #include "../h/dir.h" 7 #include "../h/user.h" 8 #include "../h/kernel.h" 9 #include "../h/inode.h" 10 #include "../h/mbuf.h" 11 #include "../h/socket.h" 12 #include "../h/socketvar.h" 13 #include "../h/syslog.h" 14 15 #include "../net/if.h" 16 #include "../net/route.h" 17 18 #include "../bbnnet/in.h" 19 #include "../bbnnet/net.h" 20 #include "../bbnnet/in_pcb.h" 21 #include "../bbnnet/in_var.h" 22 #include "../bbnnet/fsm.h" 23 #include "../bbnnet/tcp.h" 24 #include "../bbnnet/seq.h" 25 #include "../bbnnet/ip.h" 26 #include "../bbnnet/fsmdef.h" 27 #include "../bbnnet/macros.h" 28 #include "../bbnnet/nopcb.h" 29 #ifdef HMPTRAPS 30 #include "../bbnnet/hmp_traps.h" 31 #endif 32 33 #ifdef HMPTRAPS 34 #define HMP_TRAP(a,b,c) hmp_trap(a,b,c) 35 #else 36 #define HMP_TRAP(a,b,c) 37 #endif 38 39 extern int nosum; 40 extern struct inpcb tcp; 41 42 /* 43 * net preproc (66,67,68,69,70,71,72,73,74,75,76) 44 * 45 * macro form of former function netprepr() 46 * 47 * tp valid tcpcb 48 * n valid th 49 * inp valid inpcb ( == tp->t_in_pcb ) 50 */ 51 #define NETPREPR(tp, n, inp, retval) \ 52 { \ 53 retval = (-1); /* assume bad */ \ 54 /* tell caller to eat segment (unacceptable) */ \ 55 \ 56 switch (tp->t_state) { \ 57 case LISTEN: \ 58 /* Ignore resets, ACKs cause resets, must have SYN. */ \ 59 if (n->t_flags&T_RST) \ 60 break; \ 61 else if (n->t_flags&T_ACK) \ 62 send_rst(tp, n); \ 63 else if (n->t_flags&T_SYN) \ 64 retval = SAME; \ 65 break; \ 66 \ 67 case SYN_SENT: \ 68 /* Bad ACKs cause resets, good resets close, must have SYN. */ \ 69 if (n->t_flags&T_ACK && (SEQ_GEQ(tp->iss, n->t_ackno) || \ 70 SEQ_GT(n->t_ackno, tp->snd_hi))) \ 71 send_rst(tp, n); \ 72 else if (n->t_flags&T_RST) { \ 73 if (n->t_flags&T_ACK) { \ 74 t_close(tp, ECONNREFUSED); \ 75 retval = CLOSED; \ 76 } \ 77 } else if (n->t_flags&T_SYN) \ 78 retval = SAME; \ 79 break; \ 80 \ 81 case 0: \ 82 /* \ 83 * after bind, but before we've had a chance to \ 84 * listen or connect \ 85 */ \ 86 break; \ 87 \ 88 default: \ 89 { struct sockbuf *sorcv; sequence xend; \ 90 /* \ 91 * Part of packet must fall in window. \ 92 * This allows for segments that are partially retransmits \ 93 * and partially new. \ 94 * otherwise just ACK and drop. \ 95 */ \ 96 sorcv = &inp->inp_socket->so_rcv; \ 97 xend = n->t_seq; \ 98 if (n->t_len) \ 99 /* remember, could be an ACK-only packet */ \ 100 xend += n->t_len -1; \ 101 if (n->t_flags & T_FIN) \ 102 xend ++; /* in case FIN + rxmitted data (TOPS-20) */ \ 103 if (SEQ_LT(xend, tp->rcv_nxt) || \ 104 SEQ_GEQ(n->t_seq, tp->rcv_nxt + sbspace(sorcv))) { \ 105 tp->t_preproc++; \ 106 send_tcp(tp, TCP_CTL); \ 107 HMP_TRAP(T_TCP_WINDOW, (caddr_t)0,0); \ 108 /* \ 109 * Due to 4.2BSD net architecture, don't need to send \ 110 * L_SYN_RCVD socket back to LISTEN on reset since server \ 111 * socket and communication paths are separate. \ 112 */ \ 113 } else if (n->t_flags&T_RST) { \ 114 t_close(tp, ENETRESET); \ 115 retval = CLOSED; \ 116 /* No SYNs allowed unless *SYN_RCVD */ \ 117 } else if ((n->t_flags&T_SYN) && (tp->t_state >= ESTAB)) { \ 118 send_rst(tp, n); \ 119 t_close(tp, ENETRESET); \ 120 retval = CLOSED; \ 121 /* \ 122 * Must have good ACK. Bad ACKs cause resets only in \ 123 * SYN_RCVD states. In other states, this may be a slow pkt? \ 124 */ \ 125 } else if (n->t_flags&T_ACK) \ 126 if (SEQ_GT(tp->snd_una, n->t_ackno) || \ 127 SEQ_GT(n->t_ackno, tp->snd_hi)) { \ 128 if (tp->t_state == SYN_RCVD || \ 129 tp->t_state == L_SYN_RCVD) \ 130 send_rst(tp, n); \ 131 } else { \ 132 /* \ 133 * Acceptable segment: \ 134 * Reset no activity timer on established and \ 135 * closing connections. \ 136 */ \ 137 if (tp->t_state >= ESTAB) \ 138 tp->t_timers[TNOACT] = tp->t_noact; \ 139 retval = SAME; \ 140 } } } } 141 142 143 int tcp_net_keep; 144 145 /* 146 * This is the scheduler for the tcp machine. It is called 147 * from the lower network levels, either directly from the 148 * internet level, in case of input from the network; or 149 * indirectly from netmain, in case of user or timer events 150 * which awaken the main loop. 151 */ 152 tcp_input(mp, fragsize) 153 register struct mbuf *mp; 154 int fragsize; 155 { 156 register struct th *tp; 157 register int hlen; 158 register struct tcpcb *t; 159 register struct inpcb *inp; 160 struct mbuf *m; 161 int i, tlen; 162 struct work w; 163 u_short cks; 164 165 tcpstat.t_total ++; 166 167 /* 168 * see ip_input() 169 */ 170 if ((mp->m_off > MMAXOFF) || (mp->m_len < sizeof(struct th))) 171 { 172 if ((mp = m_pullup(mp, sizeof(struct th))) == NULL) 173 { 174 tcpstat.t_tooshort ++; 175 return; 176 } 177 } 178 179 /* set up needed info from ip header, note that beginning 180 of tcp header struct overlaps ip header. ip options 181 have been removed by ip level option processing */ 182 183 tp = mtod(mp, struct th *); 184 185 /* make sure header does not overflow mbuf */ 186 187 hlen = tp->t_off << TCP_OFFSHIFT; 188 if (hlen < TCPSIZE) 189 { 190 ip_log ((struct ip *) tp, "tcp t_off too small"); 191 netlog(mp); 192 return; 193 } 194 if (hlen > mp->m_len) 195 { 196 if ((mp = m_pullup(mp, hlen)) == NULL) 197 { 198 ip_log((struct ip *) tp, "tcp header overflow"); 199 #ifdef HMPTRAPS 200 /* hmp_trap(T_TCP_OVFLO, (caddr_t)0, 0); */ 201 #else 202 /* netlog(mp); */ 203 #endif 204 return; 205 } 206 tp = mtod(mp, struct th *); 207 } 208 209 tlen = ((struct ip *)tp)->ip_len; 210 tp->t_len = htons((u_short)tlen); 211 tp->t_next = NULL; 212 tp->t_prev = NULL; 213 tp->t_x1 = 0; 214 215 /* 216 * do checksum calculation, drop seg if bad 217 */ 218 i = (u_short)tp->t_sum; 219 tp->t_sum = 0; 220 if (i != (cks = (u_short)in_cksum(mp, tlen + sizeof(struct ip)))) 221 { 222 tcpstat.t_badsum++; 223 if (! nosum) 224 { 225 #ifdef HMPTRAPS 226 /* hmp_trap(T_TCP_CKSUM, (caddr_t)0,0); */ 227 #endif 228 inet_cksum_err ("tcp", (struct ip *) tp, (u_long) i, (u_long) cks); 229 netlog(mp); 230 return; 231 } 232 } 233 234 /* find a tcb for incoming message */ 235 inp = in_pcblookup(&tcp, tp->t_s.s_addr, tp->t_src, 236 tp->t_d.s_addr, tp->t_dst, TRUE); 237 238 if ((inp != NULL) && ((t = (struct tcpcb *)inp->inp_ppcb) != NULL)) 239 { 240 /* found a tcp for message */ 241 /* byte swap header */ 242 243 if ((int)(tp->t_len = tlen - hlen) < 0) 244 { 245 ip_log((struct ip *) tp, "tcp header length"); 246 #ifdef HMPTRAPS 247 /* hmp_trap(T_TCP_HLEN, (caddr_t)0,0); */ 248 #else 249 netlog(mp); 250 #endif 251 return; 252 } 253 tp->t_seq = ntohl(tp->t_seq); 254 tp->t_ackno = ntohl(tp->t_ackno); 255 tp->t_win = ntohs((u_short)tp->t_win); 256 tp->t_urp = ntohs((u_short)tp->t_urp); 257 258 /* record the max fragment size */ 259 260 t->t_maxfrag = MAX(t->t_maxfrag, fragsize); 261 262 /* do TCP option processing */ 263 264 if (hlen > TCPSIZE) 265 tcp_opt(t, tp, hlen); 266 267 /* check seg seq #, do RST processing */ 268 269 NETPREPR(t, tp, inp, i); 270 if (i != SAME) 271 { 272 /* segment failed preprocessing. Drop it and 273 * possibly enter new state. For now, always 274 * returns SAME/-1/CLOSED 275 */ 276 m_freem(mp); 277 /* 278 if ((i != -1) && (i != CLOSED)) 279 t->t_state = i; 280 */ 281 } 282 else 283 { 284 if (sbspace(&inp->inp_socket->so_rcv) <= 0 && 285 tp->t_len != 0) 286 { 287 /* 288 * The user's receive q is full. Either the 289 * remote TCP is not paying attention to the 290 * window, or this is a persistence packet. 291 * 292 * The first reason was once common with 293 * TOPS-20. Let's conserve network resources 294 * by holding onto the packet in the unack q. 295 * Place it at the end of the list. 296 */ 297 mp->m_act = NULL; 298 if ((m = t->t_rcv_unack) != NULL) 299 { 300 while (m->m_act != NULL) 301 m = m->m_act; 302 m->m_act = mp; 303 } 304 else 305 t->t_rcv_unack = mp; 306 307 /* 308 * ACK if it was a window probe, just in case 309 * they have a TNOACT timer running. 310 */ 311 send_tcp(t, TCP_CTL); 312 } 313 else 314 { 315 int act, newstate; 316 struct socket *so; 317 318 /* set up work entry for seg, and call 319 the fsm to process it */ 320 321 hlen += sizeof(struct ip); 322 mp->m_off += hlen; 323 mp->m_len -= hlen; 324 325 /** HAND CODED action() CALL **/ 326 327 w.w_type = INRECV; 328 w.w_tcb = t; 329 w.w_dat = (char *)tp; 330 331 /* get index of action routine from 332 * transition table 333 */ 334 act = fstab[t->t_state][INRECV]; 335 336 /* invalid state transition, just 337 * print a message and ignore */ 338 339 if (act == 0) 340 { 341 log(LOG_INFO, "tcp bad state: tcb=%x state=%d INRECV\n", t, t->t_state); 342 m_freem(mp); 343 return; 344 } 345 346 so = t->t_in_pcb->inp_socket; 347 tcp_net_keep = FALSE; 348 newstate = (*fsactab[act])(&w); 349 350 /* debugging info */ 351 TCP_DEBUG (so, t, &w, act, newstate); 352 353 /* if CLOSED, lost tcpcb */ 354 if ((newstate != SAME) && (newstate != CLOSED)) 355 t->t_state = newstate; 356 if (! tcp_net_keep) 357 m_freem(mp); 358 359 /** END action() **/ 360 } 361 } 362 } 363 else 364 /* nobody wants it */ 365 send_uncon_rst (tp, mp, tlen, hlen); 366 } 367 368 send_uncon_rst (n, mp, tlen, hlen) 369 register struct th *n; 370 register struct mbuf *mp; 371 { 372 struct in_addr tempinaddr; 373 u_short tempport; 374 int error; 375 376 /* make sure we don't send a RST in response to an RST */ 377 378 if (n->t_flags & T_RST) 379 { 380 m_freem(mp); 381 return; 382 } 383 384 /* free everything but the header */ 385 386 m_freem(mp->m_next); 387 mp->m_next = NULL; 388 mp->m_len = sizeof(struct th); 389 390 /* form a reset from the packet and send */ 391 392 tempinaddr = n->t_d; 393 n->t_d = n->t_s; 394 n->t_s = tempinaddr; 395 396 tempport = n->t_src; 397 n->t_src = n->t_dst; 398 n->t_dst = tempport; 399 400 if (n->t_flags&T_ACK) 401 n->t_seq = n->t_ackno; 402 else 403 { 404 n->t_ackno = htonl((u_long) 405 ntohl((u_long)n->t_seq) 406 + tlen - hlen 407 + (n->t_flags&T_SYN ? 1 : 0)); 408 n->t_seq = 0; 409 } 410 n->t_flags = (n->t_flags&T_ACK) ? T_RST : T_RST+T_ACK; 411 n->t_len = htons((u_short)TCPSIZE); 412 n->t_off = TCPSIZE >> TCP_OFFSHIFT; 413 n->t_sum = in_cksum(mp, sizeof(struct th)); 414 415 NOPCB_IPSEND (mp, TCPSIZE, FALSE, error); 416 tcpstat.t_badsegs++; 417 418 #ifdef lint 419 error = error; 420 #endif 421 } 422 423 /* 424 * Entry into TCP finite state machine 425 */ 426 action(wp) 427 register struct work *wp; 428 { 429 register act, newstate; 430 register struct tcpcb *tp; 431 register struct socket *so; 432 433 tp = wp->w_tcb; 434 so = tp->t_in_pcb->inp_socket; 435 436 ACTION (tp, so, wp, wp->w_type, wp->w_dat, act, newstate); 437 return(newstate); 438 } 439 440 441 struct mbuf *tcpdebuf; 442 int tcprint; 443 444 /* 445 * Write a record in the tcp debugging log 446 */ 447 tcp_debug(tp, wp, newstate) 448 register struct tcpcb *tp; 449 register struct work *wp; 450 register newstate; 451 { 452 register struct t_debug *dp; 453 register struct mbuf *m; 454 455 #ifdef TCPDEBUG 456 if (tcprint) 457 { 458 /* 459 * Print debugging info directly on the console (use this for 460 * intial testing only). 461 */ 462 printf("TCP(%x) %s X %s", tp, tcpstates[tp->t_state], 463 tcpinputs[wp->w_type]); 464 465 if (wp->w_type == ISTIMER) 466 printf("(%s)", tcptimers[wp->w_stype]); 467 468 printf(" --> %s", 469 tcpstates[ (newstate > 0) ? newstate : tp->t_state]); 470 471 if (newstate < 0) 472 printf(" (FAILED)\n"); 473 else 474 putchar('\n', 0); 475 } 476 #endif 477 478 /* 479 * Get an mbuf to write the debugging record into. If we don't already 480 * have one, allocate a new one. 481 */ 482 if ((m = tcpdebuf) == NULL) 483 { 484 register struct mbuf *c; 485 486 if ((tcpdebuf = m = m_get(M_DONTWAIT, MT_DATA)) == NULL) 487 return; 488 /* 489 * If possible, use a cluster so that we need to wake up the 490 * raw listener less often and reduce likelihood he misses 491 * some information. 492 */ 493 MCLGET(c, 1); 494 if (c) 495 { 496 m->m_off = ((int) c) - ((int) m); 497 m->m_act = (struct mbuf *) TCDBLEN; 498 } 499 else 500 m->m_act = (struct mbuf *) TDBLEN; 501 m->m_len = 0; 502 } 503 504 dp = (struct t_debug *) (mtod(m, char *) + m->m_len); 505 /* 506 * Set up the debugging record. 507 */ 508 dp->t_iptime = iptime(); 509 dp->t_input = wp->w_type; 510 dp->t_timer = wp->w_stype; 511 dp->t_newstate = newstate; 512 if (tp != NULL) 513 { 514 dp->t_oldstate = tp->t_state; 515 dp->t_tcb = (*tp); /* structure copy */ 516 } 517 else 518 dp->t_oldstate = 0; 519 520 if (wp->w_type == INRECV) 521 { 522 register struct th *n; 523 524 n = (struct th *)wp->w_dat; 525 dp->t_hdr = (*n); /* structure copy */ 526 } 527 /* 528 * If the mbuf is full, dispatch it to a raw listener. 529 * Also flush if the connection we're debugging closes so that 530 * packet-printer/systems analyst sees final transitions. 531 */ 532 m->m_len += sizeof(struct t_debug); 533 if ((m->m_len >= ((int) m->m_act)) || (newstate == CLOSED)) 534 { 535 m->m_act = 0; 536 tcpdebuglog(m); 537 tcpdebuf = NULL; 538 } 539 } 540