1 #ifdef	RCSIDENT
2 static char rcsident[] = "$Header: tcp_prim.c,v 1.23 85/07/31 09:34:04 walsh Exp $";
3 #endif
4 
5 #include "../h/param.h"
6 #include "../h/errno.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/protosw.h"
11 
12 #include "../net/if.h"
13 #include "../net/route.h"
14 
15 #include "../bbnnet/in.h"
16 #include "../bbnnet/in_pcb.h"
17 #include "../bbnnet/in_var.h"
18 #include "../bbnnet/net.h"
19 #include "../bbnnet/fsm.h"
20 #include "../bbnnet/tcp.h"
21 #include "../bbnnet/seq.h"
22 #include "../bbnnet/ip.h"
23 #include "../bbnnet/macros.h"
24 #include "../bbnnet/sws.h"
25 
26 extern struct inpcb tcp;
27 
28 /*
29  * TCP finite state machine primitives
30  *
31  * These routines are called from the procedures in tcp_procs.c to do low
32  * level protocol functions.
33  */
34 
35 /*
36  * The hope is that a bcopy will fill in most tcp/ip header fields quicker
37  * than a step by step stuffing of each individually when we have to send
38  * some info.
39  */
40 struct th *tcp_template(tp)
41 struct tcpcb	*tp;
42 {
43     register struct mbuf	*m;
44     register struct th	*t;
45     register struct inpcb	*inp;
46 
47     m = m_getclr(M_WAIT, MT_HEADER);
48     if (m == NULL)
49 	return ((struct th *) NULL);
50     m->m_len = sizeof (struct th);
51     t = mtod(m, struct th *);
52     inp = tp->t_in_pcb;
53 
54     t->t_pr = IPPROTO_TCP;
55     t->t_s = inp->inp_laddr;
56     t->t_d = inp->inp_faddr;
57     t->t_src = inp->inp_lport;
58     t->t_dst = inp->inp_fport;
59     t->t_off = TCPSIZE >> TCP_OFFSHIFT;
60 
61     return (t);
62 }
63 
64 #ifdef GPROF
65 long tcp_pkt_size[2*1024];
66 #endif
67 
68 /*
69  * Send a tcp segment
70  */
71 send_pkt(tp, flags, len)
72 register struct tcpcb *tp;
73 register int flags;
74 register int len;	/* in sequence units: includes SYN, FIN */
75 {
76     register struct th *t;
77     register struct inpcb *inp;
78     register struct mbuf *m;
79     struct mbuf *dat;
80     int i;
81     struct sockbuf *sorcv;
82     short *p;
83     struct th *tmpt;
84 
85     inp = tp->t_in_pcb;
86 
87     /*
88      * Adjust data length for SYN and FIN.
89      */
90     if (flags & T_FIN)
91 	len--;
92     if (flags & T_SYN)
93 	len--;
94 
95     /*
96      * and get a copy of the data for this transmission
97      */
98     dat = (struct mbuf *) NULL;
99     if (len > 0)
100     {
101 	int	off;
102 
103 	off = tp->snd_nxt - tp->snd_una;
104 	if (! tp->syn_acked)
105 	    if (! (flags & T_SYN))
106 		off --;
107 	if ((dat = m_copy (inp->inp_socket->so_snd.sb_mb, off, len)) == NULL)
108 	    return (ENOBUFS);
109     }
110 
111 #ifdef MBUF_DEBUG
112     m = m_get(M_WAIT, MT_HEADER);
113 #else
114     MGET(m, M_WAIT, MT_HEADER);
115 #endif
116     if (m == NULL)
117 	return(ENOBUFS);
118     /*
119      * Build tcp leader at bottom of new buffer to leave room for lower
120      * level leaders.  Leave an extra four bytes for TCP max segment size
121      * option, which is sent in SYN packets.
122      * Align header for memory access speed in checksumming...
123      */
124     m->m_off = (MMAXOFF - sizeof(struct th) - TCP_MAXSEG_OPTLEN) &
125 	~(sizeof(long) -1);
126     m->m_len = sizeof(struct th);
127     m->m_next = dat;
128     t = mtod(m, struct th *);
129 
130     if (tp->t_template == NULL)
131 	panic("send_pkt");
132     bcopy((caddr_t)tp->t_template, (caddr_t)t, sizeof (struct th));
133 
134 #ifndef NOTCPOPTS
135     /*
136      * Insert maximum segment size option for SYN.
137      */
138     if (flags & T_SYN)
139     {
140 	/*
141 	 * may not have a route yet.
142 	 */
143 	if (inp->inp_route.ro_rt)
144 	{
145 	    m->m_len += TCP_MAXSEG_OPTLEN;
146 	    /* SYN occupies seq space */
147 	    len += TCP_MAXSEG_OPTLEN;
148 	    t->t_off = (TCPSIZE + TCP_MAXSEG_OPTLEN) >> TCP_OFFSHIFT;
149 	    p = (short *)((int)t + sizeof(struct th));
150 	    *p++ = htons((u_short)TCP_MAXSEG_OPTHDR);
151 	    *p = htons((u_short)inp->inp_route.ro_rt->rt_ifp->if_mtu
152 		- TCPIPMAX);
153 	}
154     }
155 #endif
156 
157     t->t_len   = htons((u_short)len + TCPSIZE);
158     t->t_seq   = htonl(tp->snd_nxt);
159     t->t_ackno = htonl(tp->rcv_nxt);
160 
161 #ifdef GPROF
162     if (len < (sizeof(tcp_pkt_size)/sizeof(tcp_pkt_size[0])) - TCPSIZE)
163 	tcp_pkt_size[len+TCPSIZE] ++;
164     else
165 	tcp_pkt_size[0] ++;
166 #endif
167 
168     if (tp->snd_rst)
169     {
170 	flags |= T_RST;
171 	flags &= ~T_SYN;
172     }
173 
174     if (tp->snd_urg)
175     {
176 	if (SEQ_GT(tp->snd_urp, tp->snd_nxt))
177 	{
178 	    short	up;
179 
180 	    /*
181 	     * SEQ_LEQ(tp->snd_urp, tp->snd_nxt+len)
182 	     * Strictly speaking, we should be able to have the
183 	     * urgent pointer point into the stream beyond the
184 	     * current end of packet, but this is just in case
185 	     * some implementation is not ready for that.
186 	     */
187 	    flags |= T_URG;
188 	    up = MIN(len -1, tp->snd_urp - tp->snd_nxt -1);
189 	    t->t_urp = htons((u_short)up);
190 	}
191     }
192 
193     if (tp->syn_rcvd)
194     {
195 #if T_DELACK > 0
196 	tp->lastack = tp->rcv_nxt;
197 #endif
198 	flags |= T_ACK;
199     }
200 
201     t->t_flags = flags;
202 
203     /* Calculate the window we should advertise */
204 
205     sorcv = &inp->inp_socket->so_rcv;
206     /*
207      * Count bytes left in user rcv queue, and reduce by sequencing queue
208      * Counting the sequencing q contracts the window when packets are
209      * lost (== when the network is over-loaded).
210      */
211     i = sbspace(sorcv) - tp->t_rcv_len;
212     /*
213      * apply receiver's solution to SWS in case sender does not have such
214      * an algorithm.  One 8th was determined by benchmarks writing 2k
215      * buffers on an Ethernet connection.  It has a slightly negative
216      * influence on IMP networks when writing 1k buffers.
217      *
218      * (sorcv->sb_hiwat >> 3) limited by 256 == 2k / 8, since application
219      * may adjust its buffering to large amounts via ioctl call.  An eighth
220      * of a large number may be a reasonable sized packet to send.
221      *
222      * Only apply this algorithm if are getting packets in order,
223      * so that advertisement of 0 window does not prevent retransmission
224      * of dropped packet.
225      */
226     tmpt = tp->t_rcv_next;
227     if ((i < MIN(256, (sorcv->sb_hiwat >> 3))) &&
228 	((tmpt == (struct th *) tp) || SEQ_LEQ(tmpt->t_seq, tp->rcv_nxt)))
229 	i = 0;
230     else
231     {
232 	/*
233 	 * if this connection gets fragmented, constrain the windowsize
234 	 */
235 	if (tp->t_maxfrag)
236 	    i = MIN(i, tp->t_maxfrag*3);
237 
238 	if (i < 0)
239 	    i = 0;
240     }
241 
242 #if T_DELACK > 0
243     tp->rcv_wnd = i;
244 #endif
245     t->t_win = htons((u_short)i);
246     /*
247      * If we sent a zero window, we should try to send a non-zero ACK ASAP.
248      */
249     if (i == 0)
250 	tp->sent_zero = TRUE;
251     else
252 	tp->sent_zero = FALSE;
253 
254     t->t_sum = in_cksum(m, len + sizeof(struct th));
255 
256     if (inp->inp_socket->so_options & SO_DEBUG)
257     {
258 	struct work w;
259 
260 	w.w_type = INRECV;	/* not really. use -1 newstate */
261 	w.w_tcb  = tp;
262 	w.w_dat  = (char *)t;
263 	tcp_debug(tp, &w, -1);
264     }
265 
266     /*
267      * and ship packet off via IP.  Remember that since this protocol
268      * involves retransmissions, errors can occur asynchronous to a
269      * (write) system call, and that therefore we can not send the
270      * error all the way back up through subroutine return values.  We
271      * must also post it back via advise_user() at some point, and this
272      * looks like a good point to try it.
273      */
274     {
275 	int	error;
276 
277 	error = ip_send(inp, m, len+TCPSIZE, FALSE);
278 	if (error)
279 	    /*
280 	     * Since we use retransmissions, don't need to tell user
281 	     * process about this.  (Can be as simple as interface
282 	     * or host structure queues are too long due to current
283 	     * heavy traffic.  Backing off will take care of that.)
284 	     */
285 	    if (error != ENOBUFS)
286 		advise_user(inp->inp_socket, error);
287 	return (error);
288     }
289 }
290 
291 /*
292  * Find the first empty spot in rcv buffer
293  */
294 sequence firstempty(tp)
295 register struct tcpcb *tp;
296 {
297     sequence	retval;
298 
299     FIRSTEMPTY(tp, retval);
300     return(retval);
301 }
302 
303 
304 /*
305  * TCP timer update routine
306  */
307 tcp_timeo()
308 {
309     register struct inpcb *inp, *next;
310     register struct tcpcb *tp;
311     register i;
312     register s;
313     extern sequence tcp_iss;	/* tcp initial send seq # */
314     static int tcpmins;	/* tcp minute timer */
315 
316     /* search through tcb and update active timers */
317     s = splnet();
318     inp = tcp.inp_next;
319     while (inp != &tcp)
320     {
321 	next = inp->inp_next;
322 	if (tp = inptotcpcb(inp))
323 	{
324 	    if (tp->sws_qff < SWS_QFF_DEF)
325 		tp->sws_qff ++;
326 
327 	    for (i = TINIT; i <= TDELACK; i++)
328 		if (tp->t_timers[i] != 0 && --tp->t_timers[i] == 0)
329 		{
330 		    struct work w;
331 
332 		    w.w_type = ISTIMER;
333 		    w.w_stype = i;
334 		    w.w_tcb = tp;
335 		    w.w_dat = (char *) NULL;
336 		    if (action(&w) == CLOSED)
337 			goto next_tcb;
338 		}
339 
340 	    if (tp->t_timers[TXMT] < MAX_TCPTIMERVAL-1)
341 		tp->t_timers[TXMT]++;
342 
343 	    if (tcpmins == 0)
344 	    {
345 		if (tp->t_timers[TNOACT] != 0 && --tp->t_timers[TNOACT] == 0)
346 		    w_alloc(ISTIMER, TNOACT, tp, 0);
347 	    }
348 	}
349 next_tcb:
350 	inp = next;
351     }
352     splx(s);
353 
354     if (--tcpmins < 0)
355 	tcpmins = 120-1;	/* zero-origin strikes again */
356     tcp_iss += ISSINCR;	/* increment iss */
357 }
358 
359 
360 /*
361  * Do TCP option processing
362  */
363 tcp_opt(tp, t, hlen)
364 register struct tcpcb *tp;
365 register struct th *t;
366 int hlen;
367 {
368     register char *p;
369     register i, j, len;
370 
371     p = (char *)((int)t + sizeof(struct th));	/* -> at options */
372 
373     if ((i = hlen - TCPSIZE) > 0)
374     {			/* any options */
375 
376 	while (i > 0)
377 
378 	    switch (*p++)
379 	{
380 	  case TCP_END_OPT:
381 	  default:	/* garbage: throw away rest */
382 	    return;
383 
384 	  case TCP_NOP_OPT:
385 	    i--;
386 	    break;
387 
388 	  case TCP_MAXSEG_OPT:	/* max segment size */
389 	    if (t->t_flags&T_SYN && !tp->syn_rcvd)
390 	    {
391 		len = ntohs(*(u_short *)((int)p + 1));
392 		/*
393 		 * may not have a route yet
394 		 */
395 		if (!tp->t_in_pcb->inp_route.ro_rt)
396 		    /* in LISTEN state */
397 		    tp->t_maxseg = len;
398 		else
399 		    /* connecting to server */
400 		    tp->t_maxseg =
401 		    MIN(tp->t_in_pcb->inp_route.ro_rt->rt_ifp->if_mtu -
402 		    TCPIPMAX, len);
403 		tp->t_maxseg -= tp->t_in_pcb->inp_optlen;
404 	    }
405 	    if ((j = *p) == 0)
406 		break;
407 	    i -= j;
408 	    p += j - 1;
409 	}
410     }
411 }
412 
413 /*
414  * Called at splimp from uipc_mbuf.c
415  * Network needs some space freed!  Remove unprocessed packets.
416  */
417 tcp_drain()
418 {
419     register struct inpcb *inp;
420     register struct tcpcb *tp;
421     register struct mbuf *m;
422 
423     for (inp = tcp.inp_next; inp != &tcp; inp = inp->inp_next)
424     {
425 	tp = (struct tcpcb *)inp->inp_ppcb;
426 
427 	if (tp == NULL)
428 	    continue;
429 
430 	while (m = tp->t_rcv_unack)
431 	{
432 	    tp->t_rcv_unack = m->m_act;
433 	    m->m_act = (struct mbuf *)NULL;
434 	    m_freem (m);
435 	}
436     }
437 }
438