1 #ifdef RCSIDENT
2 static char rcsident[] = "$Header: tcp_prim.c,v 1.23 85/07/31 09:34:04 walsh Exp $";
3 #endif
4
5 #include "../h/param.h"
6 #include "../h/errno.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/protosw.h"
11
12 #include "../net/if.h"
13 #include "../net/route.h"
14
15 #include "../bbnnet/in.h"
16 #include "../bbnnet/in_pcb.h"
17 #include "../bbnnet/in_var.h"
18 #include "../bbnnet/net.h"
19 #include "../bbnnet/fsm.h"
20 #include "../bbnnet/tcp.h"
21 #include "../bbnnet/seq.h"
22 #include "../bbnnet/ip.h"
23 #include "../bbnnet/macros.h"
24 #include "../bbnnet/sws.h"
25
26 extern struct inpcb tcp;
27
28 /*
29 * TCP finite state machine primitives
30 *
31 * These routines are called from the procedures in tcp_procs.c to do low
32 * level protocol functions.
33 */
34
35 /*
36 * The hope is that a bcopy will fill in most tcp/ip header fields quicker
37 * than a step by step stuffing of each individually when we have to send
38 * some info.
39 */
tcp_template(tp)40 struct th *tcp_template(tp)
41 struct tcpcb *tp;
42 {
43 register struct mbuf *m;
44 register struct th *t;
45 register struct inpcb *inp;
46
47 m = m_getclr(M_WAIT, MT_HEADER);
48 if (m == NULL)
49 return ((struct th *) NULL);
50 m->m_len = sizeof (struct th);
51 t = mtod(m, struct th *);
52 inp = tp->t_in_pcb;
53
54 t->t_pr = IPPROTO_TCP;
55 t->t_s = inp->inp_laddr;
56 t->t_d = inp->inp_faddr;
57 t->t_src = inp->inp_lport;
58 t->t_dst = inp->inp_fport;
59 t->t_off = TCPSIZE >> TCP_OFFSHIFT;
60
61 return (t);
62 }
63
64 #ifdef GPROF
65 long tcp_pkt_size[2*1024];
66 #endif
67
68 /*
69 * Send a tcp segment
70 */
send_pkt(tp,flags,len)71 send_pkt(tp, flags, len)
72 register struct tcpcb *tp;
73 register int flags;
74 register int len; /* in sequence units: includes SYN, FIN */
75 {
76 register struct th *t;
77 register struct inpcb *inp;
78 register struct mbuf *m;
79 struct mbuf *dat;
80 int i;
81 struct sockbuf *sorcv;
82 short *p;
83 struct th *tmpt;
84
85 inp = tp->t_in_pcb;
86
87 /*
88 * Adjust data length for SYN and FIN.
89 */
90 if (flags & T_FIN)
91 len--;
92 if (flags & T_SYN)
93 len--;
94
95 /*
96 * and get a copy of the data for this transmission
97 */
98 dat = (struct mbuf *) NULL;
99 if (len > 0)
100 {
101 int off;
102
103 off = tp->snd_nxt - tp->snd_una;
104 if (! tp->syn_acked)
105 if (! (flags & T_SYN))
106 off --;
107 if ((dat = m_copy (inp->inp_socket->so_snd.sb_mb, off, len)) == NULL)
108 return (ENOBUFS);
109 }
110
111 #ifdef MBUF_DEBUG
112 m = m_get(M_WAIT, MT_HEADER);
113 #else
114 MGET(m, M_WAIT, MT_HEADER);
115 #endif
116 if (m == NULL)
117 return(ENOBUFS);
118 /*
119 * Build tcp leader at bottom of new buffer to leave room for lower
120 * level leaders. Leave an extra four bytes for TCP max segment size
121 * option, which is sent in SYN packets.
122 * Align header for memory access speed in checksumming...
123 */
124 m->m_off = (MMAXOFF - sizeof(struct th) - TCP_MAXSEG_OPTLEN) &
125 ~(sizeof(long) -1);
126 m->m_len = sizeof(struct th);
127 m->m_next = dat;
128 t = mtod(m, struct th *);
129
130 if (tp->t_template == NULL)
131 panic("send_pkt");
132 bcopy((caddr_t)tp->t_template, (caddr_t)t, sizeof (struct th));
133
134 #ifndef NOTCPOPTS
135 /*
136 * Insert maximum segment size option for SYN.
137 */
138 if (flags & T_SYN)
139 {
140 /*
141 * may not have a route yet.
142 */
143 if (inp->inp_route.ro_rt)
144 {
145 m->m_len += TCP_MAXSEG_OPTLEN;
146 /* SYN occupies seq space */
147 len += TCP_MAXSEG_OPTLEN;
148 t->t_off = (TCPSIZE + TCP_MAXSEG_OPTLEN) >> TCP_OFFSHIFT;
149 p = (short *)((int)t + sizeof(struct th));
150 *p++ = htons((u_short)TCP_MAXSEG_OPTHDR);
151 *p = htons((u_short)inp->inp_route.ro_rt->rt_ifp->if_mtu
152 - TCPIPMAX);
153 }
154 }
155 #endif
156
157 t->t_len = htons((u_short)len + TCPSIZE);
158 t->t_seq = htonl(tp->snd_nxt);
159 t->t_ackno = htonl(tp->rcv_nxt);
160
161 #ifdef GPROF
162 if (len < (sizeof(tcp_pkt_size)/sizeof(tcp_pkt_size[0])) - TCPSIZE)
163 tcp_pkt_size[len+TCPSIZE] ++;
164 else
165 tcp_pkt_size[0] ++;
166 #endif
167
168 if (tp->snd_rst)
169 {
170 flags |= T_RST;
171 flags &= ~T_SYN;
172 }
173
174 if (tp->snd_urg)
175 {
176 if (SEQ_GT(tp->snd_urp, tp->snd_nxt))
177 {
178 short up;
179
180 /*
181 * SEQ_LEQ(tp->snd_urp, tp->snd_nxt+len)
182 * Strictly speaking, we should be able to have the
183 * urgent pointer point into the stream beyond the
184 * current end of packet, but this is just in case
185 * some implementation is not ready for that.
186 */
187 flags |= T_URG;
188 up = MIN(len -1, tp->snd_urp - tp->snd_nxt -1);
189 t->t_urp = htons((u_short)up);
190 }
191 }
192
193 if (tp->syn_rcvd)
194 {
195 #if T_DELACK > 0
196 tp->lastack = tp->rcv_nxt;
197 #endif
198 flags |= T_ACK;
199 }
200
201 t->t_flags = flags;
202
203 /* Calculate the window we should advertise */
204
205 sorcv = &inp->inp_socket->so_rcv;
206 /*
207 * Count bytes left in user rcv queue, and reduce by sequencing queue
208 * Counting the sequencing q contracts the window when packets are
209 * lost (== when the network is over-loaded).
210 */
211 i = sbspace(sorcv) - tp->t_rcv_len;
212 /*
213 * apply receiver's solution to SWS in case sender does not have such
214 * an algorithm. One 8th was determined by benchmarks writing 2k
215 * buffers on an Ethernet connection. It has a slightly negative
216 * influence on IMP networks when writing 1k buffers.
217 *
218 * (sorcv->sb_hiwat >> 3) limited by 256 == 2k / 8, since application
219 * may adjust its buffering to large amounts via ioctl call. An eighth
220 * of a large number may be a reasonable sized packet to send.
221 *
222 * Only apply this algorithm if are getting packets in order,
223 * so that advertisement of 0 window does not prevent retransmission
224 * of dropped packet.
225 */
226 tmpt = tp->t_rcv_next;
227 if ((i < MIN(256, (sorcv->sb_hiwat >> 3))) &&
228 ((tmpt == (struct th *) tp) || SEQ_LEQ(tmpt->t_seq, tp->rcv_nxt)))
229 i = 0;
230 else
231 {
232 /*
233 * if this connection gets fragmented, constrain the windowsize
234 */
235 if (tp->t_maxfrag)
236 i = MIN(i, tp->t_maxfrag*3);
237
238 if (i < 0)
239 i = 0;
240 }
241
242 #if T_DELACK > 0
243 tp->rcv_wnd = i;
244 #endif
245 t->t_win = htons((u_short)i);
246 /*
247 * If we sent a zero window, we should try to send a non-zero ACK ASAP.
248 */
249 if (i == 0)
250 tp->sent_zero = TRUE;
251 else
252 tp->sent_zero = FALSE;
253
254 t->t_sum = in_cksum(m, len + sizeof(struct th));
255
256 if (inp->inp_socket->so_options & SO_DEBUG)
257 {
258 struct work w;
259
260 w.w_type = INRECV; /* not really. use -1 newstate */
261 w.w_tcb = tp;
262 w.w_dat = (char *)t;
263 tcp_debug(tp, &w, -1);
264 }
265
266 /*
267 * and ship packet off via IP. Remember that since this protocol
268 * involves retransmissions, errors can occur asynchronous to a
269 * (write) system call, and that therefore we can not send the
270 * error all the way back up through subroutine return values. We
271 * must also post it back via advise_user() at some point, and this
272 * looks like a good point to try it.
273 */
274 {
275 int error;
276
277 error = ip_send(inp, m, len+TCPSIZE, FALSE);
278 if (error)
279 /*
280 * Since we use retransmissions, don't need to tell user
281 * process about this. (Can be as simple as interface
282 * or host structure queues are too long due to current
283 * heavy traffic. Backing off will take care of that.)
284 */
285 if (error != ENOBUFS)
286 advise_user(inp->inp_socket, error);
287 return (error);
288 }
289 }
290
291 /*
292 * Find the first empty spot in rcv buffer
293 */
firstempty(tp)294 sequence firstempty(tp)
295 register struct tcpcb *tp;
296 {
297 sequence retval;
298
299 FIRSTEMPTY(tp, retval);
300 return(retval);
301 }
302
303
304 /*
305 * TCP timer update routine
306 */
tcp_timeo()307 tcp_timeo()
308 {
309 register struct inpcb *inp, *next;
310 register struct tcpcb *tp;
311 register i;
312 register s;
313 extern sequence tcp_iss; /* tcp initial send seq # */
314 static int tcpmins; /* tcp minute timer */
315
316 /* search through tcb and update active timers */
317 s = splnet();
318 inp = tcp.inp_next;
319 while (inp != &tcp)
320 {
321 next = inp->inp_next;
322 if (tp = inptotcpcb(inp))
323 {
324 if (tp->sws_qff < SWS_QFF_DEF)
325 tp->sws_qff ++;
326
327 for (i = TINIT; i <= TDELACK; i++)
328 if (tp->t_timers[i] != 0 && --tp->t_timers[i] == 0)
329 {
330 struct work w;
331
332 w.w_type = ISTIMER;
333 w.w_stype = i;
334 w.w_tcb = tp;
335 w.w_dat = (char *) NULL;
336 if (action(&w) == CLOSED)
337 goto next_tcb;
338 }
339
340 if (tp->t_timers[TXMT] < MAX_TCPTIMERVAL-1)
341 tp->t_timers[TXMT]++;
342
343 if (tcpmins == 0)
344 {
345 if (tp->t_timers[TNOACT] != 0 && --tp->t_timers[TNOACT] == 0)
346 w_alloc(ISTIMER, TNOACT, tp, 0);
347 }
348 }
349 next_tcb:
350 inp = next;
351 }
352 splx(s);
353
354 if (--tcpmins < 0)
355 tcpmins = 120-1; /* zero-origin strikes again */
356 tcp_iss += ISSINCR; /* increment iss */
357 }
358
359
360 /*
361 * Do TCP option processing
362 */
tcp_opt(tp,t,hlen)363 tcp_opt(tp, t, hlen)
364 register struct tcpcb *tp;
365 register struct th *t;
366 int hlen;
367 {
368 register char *p;
369 register i, j, len;
370
371 p = (char *)((int)t + sizeof(struct th)); /* -> at options */
372
373 if ((i = hlen - TCPSIZE) > 0)
374 { /* any options */
375
376 while (i > 0)
377
378 switch (*p++)
379 {
380 case TCP_END_OPT:
381 default: /* garbage: throw away rest */
382 return;
383
384 case TCP_NOP_OPT:
385 i--;
386 break;
387
388 case TCP_MAXSEG_OPT: /* max segment size */
389 if (t->t_flags&T_SYN && !tp->syn_rcvd)
390 {
391 len = ntohs(*(u_short *)((int)p + 1));
392 /*
393 * may not have a route yet
394 */
395 if (!tp->t_in_pcb->inp_route.ro_rt)
396 /* in LISTEN state */
397 tp->t_maxseg = len;
398 else
399 /* connecting to server */
400 tp->t_maxseg =
401 MIN(tp->t_in_pcb->inp_route.ro_rt->rt_ifp->if_mtu -
402 TCPIPMAX, len);
403 tp->t_maxseg -= tp->t_in_pcb->inp_optlen;
404 }
405 if ((j = *p) == 0)
406 break;
407 i -= j;
408 p += j - 1;
409 }
410 }
411 }
412
413 /*
414 * Called at splimp from uipc_mbuf.c
415 * Network needs some space freed! Remove unprocessed packets.
416 */
tcp_drain()417 tcp_drain()
418 {
419 register struct inpcb *inp;
420 register struct tcpcb *tp;
421 register struct mbuf *m;
422
423 for (inp = tcp.inp_next; inp != &tcp; inp = inp->inp_next)
424 {
425 tp = (struct tcpcb *)inp->inp_ppcb;
426
427 if (tp == NULL)
428 continue;
429
430 while (m = tp->t_rcv_unack)
431 {
432 tp->t_rcv_unack = m->m_act;
433 m->m_act = (struct mbuf *)NULL;
434 m_freem (m);
435 }
436 }
437 }
438