1 #ifdef RCSIDENT 2 static char rcsident[] = "$Header: tcp_prim.c,v 1.23 85/07/31 09:34:04 walsh Exp $"; 3 #endif 4 5 #include "../h/param.h" 6 #include "../h/errno.h" 7 #include "../h/mbuf.h" 8 #include "../h/socket.h" 9 #include "../h/socketvar.h" 10 #include "../h/protosw.h" 11 12 #include "../net/if.h" 13 #include "../net/route.h" 14 15 #include "../bbnnet/in.h" 16 #include "../bbnnet/in_pcb.h" 17 #include "../bbnnet/in_var.h" 18 #include "../bbnnet/net.h" 19 #include "../bbnnet/fsm.h" 20 #include "../bbnnet/tcp.h" 21 #include "../bbnnet/seq.h" 22 #include "../bbnnet/ip.h" 23 #include "../bbnnet/macros.h" 24 #include "../bbnnet/sws.h" 25 26 extern struct inpcb tcp; 27 28 /* 29 * TCP finite state machine primitives 30 * 31 * These routines are called from the procedures in tcp_procs.c to do low 32 * level protocol functions. 33 */ 34 35 /* 36 * The hope is that a bcopy will fill in most tcp/ip header fields quicker 37 * than a step by step stuffing of each individually when we have to send 38 * some info. 39 */ 40 struct th *tcp_template(tp) 41 struct tcpcb *tp; 42 { 43 register struct mbuf *m; 44 register struct th *t; 45 register struct inpcb *inp; 46 47 m = m_getclr(M_WAIT, MT_HEADER); 48 if (m == NULL) 49 return ((struct th *) NULL); 50 m->m_len = sizeof (struct th); 51 t = mtod(m, struct th *); 52 inp = tp->t_in_pcb; 53 54 t->t_pr = IPPROTO_TCP; 55 t->t_s = inp->inp_laddr; 56 t->t_d = inp->inp_faddr; 57 t->t_src = inp->inp_lport; 58 t->t_dst = inp->inp_fport; 59 t->t_off = TCPSIZE >> TCP_OFFSHIFT; 60 61 return (t); 62 } 63 64 #ifdef GPROF 65 long tcp_pkt_size[2*1024]; 66 #endif 67 68 /* 69 * Send a tcp segment 70 */ 71 send_pkt(tp, flags, len) 72 register struct tcpcb *tp; 73 register int flags; 74 register int len; /* in sequence units: includes SYN, FIN */ 75 { 76 register struct th *t; 77 register struct inpcb *inp; 78 register struct mbuf *m; 79 struct mbuf *dat; 80 int i; 81 struct sockbuf *sorcv; 82 short *p; 83 struct th *tmpt; 84 85 inp = tp->t_in_pcb; 86 87 /* 88 * Adjust data length for SYN and FIN. 89 */ 90 if (flags & T_FIN) 91 len--; 92 if (flags & T_SYN) 93 len--; 94 95 /* 96 * and get a copy of the data for this transmission 97 */ 98 dat = (struct mbuf *) NULL; 99 if (len > 0) 100 { 101 int off; 102 103 off = tp->snd_nxt - tp->snd_una; 104 if (! tp->syn_acked) 105 if (! (flags & T_SYN)) 106 off --; 107 if ((dat = m_copy (inp->inp_socket->so_snd.sb_mb, off, len)) == NULL) 108 return (ENOBUFS); 109 } 110 111 #ifdef MBUF_DEBUG 112 m = m_get(M_WAIT, MT_HEADER); 113 #else 114 MGET(m, M_WAIT, MT_HEADER); 115 #endif 116 if (m == NULL) 117 return(ENOBUFS); 118 /* 119 * Build tcp leader at bottom of new buffer to leave room for lower 120 * level leaders. Leave an extra four bytes for TCP max segment size 121 * option, which is sent in SYN packets. 122 * Align header for memory access speed in checksumming... 123 */ 124 m->m_off = (MMAXOFF - sizeof(struct th) - TCP_MAXSEG_OPTLEN) & 125 ~(sizeof(long) -1); 126 m->m_len = sizeof(struct th); 127 m->m_next = dat; 128 t = mtod(m, struct th *); 129 130 if (tp->t_template == NULL) 131 panic("send_pkt"); 132 bcopy((caddr_t)tp->t_template, (caddr_t)t, sizeof (struct th)); 133 134 #ifndef NOTCPOPTS 135 /* 136 * Insert maximum segment size option for SYN. 137 */ 138 if (flags & T_SYN) 139 { 140 /* 141 * may not have a route yet. 142 */ 143 if (inp->inp_route.ro_rt) 144 { 145 m->m_len += TCP_MAXSEG_OPTLEN; 146 /* SYN occupies seq space */ 147 len += TCP_MAXSEG_OPTLEN; 148 t->t_off = (TCPSIZE + TCP_MAXSEG_OPTLEN) >> TCP_OFFSHIFT; 149 p = (short *)((int)t + sizeof(struct th)); 150 *p++ = htons((u_short)TCP_MAXSEG_OPTHDR); 151 *p = htons((u_short)inp->inp_route.ro_rt->rt_ifp->if_mtu 152 - TCPIPMAX); 153 } 154 } 155 #endif 156 157 t->t_len = htons((u_short)len + TCPSIZE); 158 t->t_seq = htonl(tp->snd_nxt); 159 t->t_ackno = htonl(tp->rcv_nxt); 160 161 #ifdef GPROF 162 if (len < (sizeof(tcp_pkt_size)/sizeof(tcp_pkt_size[0])) - TCPSIZE) 163 tcp_pkt_size[len+TCPSIZE] ++; 164 else 165 tcp_pkt_size[0] ++; 166 #endif 167 168 if (tp->snd_rst) 169 { 170 flags |= T_RST; 171 flags &= ~T_SYN; 172 } 173 174 if (tp->snd_urg) 175 { 176 if (SEQ_GT(tp->snd_urp, tp->snd_nxt)) 177 { 178 short up; 179 180 /* 181 * SEQ_LEQ(tp->snd_urp, tp->snd_nxt+len) 182 * Strictly speaking, we should be able to have the 183 * urgent pointer point into the stream beyond the 184 * current end of packet, but this is just in case 185 * some implementation is not ready for that. 186 */ 187 flags |= T_URG; 188 up = MIN(len -1, tp->snd_urp - tp->snd_nxt -1); 189 t->t_urp = htons((u_short)up); 190 } 191 } 192 193 if (tp->syn_rcvd) 194 { 195 #if T_DELACK > 0 196 tp->lastack = tp->rcv_nxt; 197 #endif 198 flags |= T_ACK; 199 } 200 201 t->t_flags = flags; 202 203 /* Calculate the window we should advertise */ 204 205 sorcv = &inp->inp_socket->so_rcv; 206 /* 207 * Count bytes left in user rcv queue, and reduce by sequencing queue 208 * Counting the sequencing q contracts the window when packets are 209 * lost (== when the network is over-loaded). 210 */ 211 i = sbspace(sorcv) - tp->t_rcv_len; 212 /* 213 * apply receiver's solution to SWS in case sender does not have such 214 * an algorithm. One 8th was determined by benchmarks writing 2k 215 * buffers on an Ethernet connection. It has a slightly negative 216 * influence on IMP networks when writing 1k buffers. 217 * 218 * (sorcv->sb_hiwat >> 3) limited by 256 == 2k / 8, since application 219 * may adjust its buffering to large amounts via ioctl call. An eighth 220 * of a large number may be a reasonable sized packet to send. 221 * 222 * Only apply this algorithm if are getting packets in order, 223 * so that advertisement of 0 window does not prevent retransmission 224 * of dropped packet. 225 */ 226 tmpt = tp->t_rcv_next; 227 if ((i < MIN(256, (sorcv->sb_hiwat >> 3))) && 228 ((tmpt == (struct th *) tp) || SEQ_LEQ(tmpt->t_seq, tp->rcv_nxt))) 229 i = 0; 230 else 231 { 232 /* 233 * if this connection gets fragmented, constrain the windowsize 234 */ 235 if (tp->t_maxfrag) 236 i = MIN(i, tp->t_maxfrag*3); 237 238 if (i < 0) 239 i = 0; 240 } 241 242 #if T_DELACK > 0 243 tp->rcv_wnd = i; 244 #endif 245 t->t_win = htons((u_short)i); 246 /* 247 * If we sent a zero window, we should try to send a non-zero ACK ASAP. 248 */ 249 if (i == 0) 250 tp->sent_zero = TRUE; 251 else 252 tp->sent_zero = FALSE; 253 254 t->t_sum = in_cksum(m, len + sizeof(struct th)); 255 256 if (inp->inp_socket->so_options & SO_DEBUG) 257 { 258 struct work w; 259 260 w.w_type = INRECV; /* not really. use -1 newstate */ 261 w.w_tcb = tp; 262 w.w_dat = (char *)t; 263 tcp_debug(tp, &w, -1); 264 } 265 266 /* 267 * and ship packet off via IP. Remember that since this protocol 268 * involves retransmissions, errors can occur asynchronous to a 269 * (write) system call, and that therefore we can not send the 270 * error all the way back up through subroutine return values. We 271 * must also post it back via advise_user() at some point, and this 272 * looks like a good point to try it. 273 */ 274 { 275 int error; 276 277 error = ip_send(inp, m, len+TCPSIZE, FALSE); 278 if (error) 279 /* 280 * Since we use retransmissions, don't need to tell user 281 * process about this. (Can be as simple as interface 282 * or host structure queues are too long due to current 283 * heavy traffic. Backing off will take care of that.) 284 */ 285 if (error != ENOBUFS) 286 advise_user(inp->inp_socket, error); 287 return (error); 288 } 289 } 290 291 /* 292 * Find the first empty spot in rcv buffer 293 */ 294 sequence firstempty(tp) 295 register struct tcpcb *tp; 296 { 297 sequence retval; 298 299 FIRSTEMPTY(tp, retval); 300 return(retval); 301 } 302 303 304 /* 305 * TCP timer update routine 306 */ 307 tcp_timeo() 308 { 309 register struct inpcb *inp, *next; 310 register struct tcpcb *tp; 311 register i; 312 register s; 313 extern sequence tcp_iss; /* tcp initial send seq # */ 314 static int tcpmins; /* tcp minute timer */ 315 316 /* search through tcb and update active timers */ 317 s = splnet(); 318 inp = tcp.inp_next; 319 while (inp != &tcp) 320 { 321 next = inp->inp_next; 322 if (tp = inptotcpcb(inp)) 323 { 324 if (tp->sws_qff < SWS_QFF_DEF) 325 tp->sws_qff ++; 326 327 for (i = TINIT; i <= TDELACK; i++) 328 if (tp->t_timers[i] != 0 && --tp->t_timers[i] == 0) 329 { 330 struct work w; 331 332 w.w_type = ISTIMER; 333 w.w_stype = i; 334 w.w_tcb = tp; 335 w.w_dat = (char *) NULL; 336 if (action(&w) == CLOSED) 337 goto next_tcb; 338 } 339 340 if (tp->t_timers[TXMT] < MAX_TCPTIMERVAL-1) 341 tp->t_timers[TXMT]++; 342 343 if (tcpmins == 0) 344 { 345 if (tp->t_timers[TNOACT] != 0 && --tp->t_timers[TNOACT] == 0) 346 w_alloc(ISTIMER, TNOACT, tp, 0); 347 } 348 } 349 next_tcb: 350 inp = next; 351 } 352 splx(s); 353 354 if (--tcpmins < 0) 355 tcpmins = 120-1; /* zero-origin strikes again */ 356 tcp_iss += ISSINCR; /* increment iss */ 357 } 358 359 360 /* 361 * Do TCP option processing 362 */ 363 tcp_opt(tp, t, hlen) 364 register struct tcpcb *tp; 365 register struct th *t; 366 int hlen; 367 { 368 register char *p; 369 register i, j, len; 370 371 p = (char *)((int)t + sizeof(struct th)); /* -> at options */ 372 373 if ((i = hlen - TCPSIZE) > 0) 374 { /* any options */ 375 376 while (i > 0) 377 378 switch (*p++) 379 { 380 case TCP_END_OPT: 381 default: /* garbage: throw away rest */ 382 return; 383 384 case TCP_NOP_OPT: 385 i--; 386 break; 387 388 case TCP_MAXSEG_OPT: /* max segment size */ 389 if (t->t_flags&T_SYN && !tp->syn_rcvd) 390 { 391 len = ntohs(*(u_short *)((int)p + 1)); 392 /* 393 * may not have a route yet 394 */ 395 if (!tp->t_in_pcb->inp_route.ro_rt) 396 /* in LISTEN state */ 397 tp->t_maxseg = len; 398 else 399 /* connecting to server */ 400 tp->t_maxseg = 401 MIN(tp->t_in_pcb->inp_route.ro_rt->rt_ifp->if_mtu - 402 TCPIPMAX, len); 403 tp->t_maxseg -= tp->t_in_pcb->inp_optlen; 404 } 405 if ((j = *p) == 0) 406 break; 407 i -= j; 408 p += j - 1; 409 } 410 } 411 } 412 413 /* 414 * Called at splimp from uipc_mbuf.c 415 * Network needs some space freed! Remove unprocessed packets. 416 */ 417 tcp_drain() 418 { 419 register struct inpcb *inp; 420 register struct tcpcb *tp; 421 register struct mbuf *m; 422 423 for (inp = tcp.inp_next; inp != &tcp; inp = inp->inp_next) 424 { 425 tp = (struct tcpcb *)inp->inp_ppcb; 426 427 if (tp == NULL) 428 continue; 429 430 while (m = tp->t_rcv_unack) 431 { 432 tp->t_rcv_unack = m->m_act; 433 m->m_act = (struct mbuf *)NULL; 434 m_freem (m); 435 } 436 } 437 } 438