1 #ifdef	RCSIDENT
2 static char rcsident[] = "$Header: tcp_input.c,v 1.25 85/07/31 09:33:47 walsh Exp $";
3 #endif
4 
5 #include "../h/param.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/kernel.h"
9 #include "../h/inode.h"
10 #include "../h/mbuf.h"
11 #include "../h/socket.h"
12 #include "../h/socketvar.h"
13 #include "../h/syslog.h"
14 
15 #include "../net/if.h"
16 #include "../net/route.h"
17 
18 #include "../bbnnet/in.h"
19 #include "../bbnnet/net.h"
20 #include "../bbnnet/in_pcb.h"
21 #include "../bbnnet/in_var.h"
22 #include "../bbnnet/fsm.h"
23 #include "../bbnnet/tcp.h"
24 #include "../bbnnet/seq.h"
25 #include "../bbnnet/ip.h"
26 #include "../bbnnet/fsmdef.h"
27 #include "../bbnnet/macros.h"
28 #include "../bbnnet/nopcb.h"
29 #ifdef	HMPTRAPS
30 #include "../bbnnet/hmp_traps.h"
31 #endif
32 
33 #ifdef HMPTRAPS
34 #define HMP_TRAP(a,b,c)	hmp_trap(a,b,c)
35 #else
36 #define HMP_TRAP(a,b,c)
37 #endif
38 
39 extern int nosum;
40 extern struct inpcb tcp;
41 
42 /*
43  * net preproc (66,67,68,69,70,71,72,73,74,75,76)
44  *
45  * macro form of former function netprepr()
46  *
47  * tp	valid tcpcb
48  * n	valid th
49  * inp	valid inpcb ( == tp->t_in_pcb )
50  */
51 #define NETPREPR(tp, n, inp, retval) \
52 { \
53 	retval = (-1);	/* assume bad */ \
54 			/* tell caller to eat segment (unacceptable) */ \
55  \
56 	switch (tp->t_state) { \
57 	    case LISTEN: \
58 		/* Ignore resets, ACKs cause resets, must have SYN. */ \
59 		if (n->t_flags&T_RST) \
60 			break; \
61 		else if (n->t_flags&T_ACK) \
62 			send_rst(tp, n); \
63 		else if (n->t_flags&T_SYN) \
64 			retval = SAME; \
65 		break; \
66 \
67 	case SYN_SENT: \
68 		/* Bad ACKs cause resets, good resets close, must have SYN. */ \
69 		if (n->t_flags&T_ACK && (SEQ_GEQ(tp->iss, n->t_ackno) || \
70 					 SEQ_GT(n->t_ackno, tp->snd_hi))) \
71 			send_rst(tp, n); \
72 		else if (n->t_flags&T_RST) { \
73 			if (n->t_flags&T_ACK) { \
74 				t_close(tp, ECONNREFUSED); \
75 				retval = CLOSED; \
76 			} \
77 		} else if (n->t_flags&T_SYN) \
78 			retval = SAME; \
79 		break; \
80  \
81 	case 0: \
82 		/* \
83 		 * after bind, but before we've had a chance to \
84 		 * listen or connect \
85 		 */ \
86 		break; \
87  \
88 	default: \
89 		{ struct sockbuf *sorcv; sequence xend; \
90 		/* \
91 		 * Part of packet must fall in window. \
92 		 * This allows for segments that are partially retransmits \
93 		 * and partially new. \
94 		 * otherwise just ACK and drop. \
95 		 */ \
96 		sorcv = &inp->inp_socket->so_rcv; \
97 		xend = n->t_seq; \
98 		if (n->t_len) \
99 			/* remember, could be an ACK-only packet */ \
100 			xend += n->t_len -1; \
101 		if (n->t_flags & T_FIN) \
102 			xend ++; /* in case FIN + rxmitted data (TOPS-20) */ \
103 		if (SEQ_LT(xend, tp->rcv_nxt) || \
104 		    SEQ_GEQ(n->t_seq, tp->rcv_nxt + sbspace(sorcv))) { \
105 			tp->t_preproc++; \
106 			send_tcp(tp, TCP_CTL); \
107 			HMP_TRAP(T_TCP_WINDOW, (caddr_t)0,0); \
108 		/* \
109 		 * Due to 4.2BSD net architecture, don't need to send \
110 		 * L_SYN_RCVD socket back to LISTEN on reset since server \
111 		 * socket and communication paths are separate. \
112 		 */ \
113 		} else if (n->t_flags&T_RST) { \
114 			t_close(tp, ENETRESET); \
115 			retval = CLOSED; \
116 		/* No SYNs allowed unless *SYN_RCVD */ \
117 		} else if ((n->t_flags&T_SYN) && (tp->t_state >= ESTAB)) { \
118 			send_rst(tp, n); \
119 			t_close(tp, ENETRESET); \
120 			retval = CLOSED; \
121 		/* \
122 		 * Must have good ACK.  Bad ACKs cause resets only in \
123 		 * SYN_RCVD states.  In other states, this may be a slow pkt? \
124 		 */ \
125 		} else if (n->t_flags&T_ACK) \
126 			if (SEQ_GT(tp->snd_una, n->t_ackno) ||	\
127 			    SEQ_GT(n->t_ackno, tp->snd_hi)) {	\
128 				if (tp->t_state == SYN_RCVD ||	\
129 				    tp->t_state == L_SYN_RCVD)	\
130 					send_rst(tp, n); \
131 			} else { \
132 				/* \
133 				 * Acceptable segment: \
134 				 * Reset no activity timer on established and \
135 				 * closing connections. \
136 				 */ \
137 				 if (tp->t_state >= ESTAB) \
138 					tp->t_timers[TNOACT] = tp->t_noact; \
139 				retval = SAME; \
140 }	}	}	}
141 
142 
143 int	tcp_net_keep;
144 
145 /*
146  * This is the scheduler for the tcp machine.  It is called
147  * from the lower network levels, either directly from the
148  * internet level, in case of input from the network; or
149  * indirectly from netmain, in case of user or timer events
150  * which awaken the main loop.
151  */
152 tcp_input(mp, fragsize)
153 register struct mbuf *mp;
154 int fragsize;
155 {
156     register struct th *tp;
157     register int hlen;
158     register struct tcpcb *t;
159     register struct inpcb *inp;
160     struct mbuf *m;
161     int i, tlen;
162     struct work w;
163     u_short cks;
164 
165     tcpstat.t_total ++;
166 
167     /*
168      * see ip_input()
169      */
170     if ((mp->m_off > MMAXOFF) || (mp->m_len < sizeof(struct th)))
171     {
172 	if ((mp = m_pullup(mp, sizeof(struct th))) == NULL)
173 	{
174 	    tcpstat.t_tooshort ++;
175 	    return;
176 	}
177     }
178 
179     /* set up needed info from ip header, note that beginning
180        of tcp header struct overlaps ip header.  ip options
181        have been removed by ip level option processing */
182 
183     tp = mtod(mp, struct th *);
184 
185     /* make sure header does not overflow mbuf */
186 
187     hlen = tp->t_off << TCP_OFFSHIFT;
188     if (hlen < TCPSIZE)
189     {
190 	ip_log ((struct ip *) tp, "tcp t_off too small");
191 	netlog(mp);
192 	return;
193     }
194     if (hlen > mp->m_len)
195     {
196 	if ((mp = m_pullup(mp, hlen)) == NULL)
197 	{
198 	    ip_log((struct ip *) tp, "tcp header overflow");
199 #ifdef HMPTRAPS
200 	    /* hmp_trap(T_TCP_OVFLO, (caddr_t)0, 0); */
201 #else
202 	    /* netlog(mp); */
203 #endif
204 	    return;
205 	}
206 	tp = mtod(mp, struct th *);
207     }
208 
209     tlen = ((struct ip *)tp)->ip_len;
210     tp->t_len = htons((u_short)tlen);
211     tp->t_next = NULL;
212     tp->t_prev = NULL;
213     tp->t_x1 = 0;
214 
215     /*
216      * do checksum calculation, drop seg if bad
217      */
218     i = (u_short)tp->t_sum;
219     tp->t_sum = 0;
220     if (i != (cks = (u_short)in_cksum(mp, tlen + sizeof(struct ip))))
221     {
222 	tcpstat.t_badsum++;
223 	if (! nosum)
224 	{
225 #ifdef HMPTRAPS
226 	    /* hmp_trap(T_TCP_CKSUM, (caddr_t)0,0); */
227 #endif
228 	    inet_cksum_err ("tcp", (struct ip *) tp, (u_long) i, (u_long) cks);
229 	    netlog(mp);
230 	    return;
231 	}
232     }
233 
234     /* find a tcb for incoming message */
235     inp = in_pcblookup(&tcp, tp->t_s.s_addr, tp->t_src,
236 			     tp->t_d.s_addr, tp->t_dst, TRUE);
237 
238     if ((inp != NULL) && ((t = (struct tcpcb *)inp->inp_ppcb) != NULL))
239     {
240 	/* found a tcp for message */
241 	/* byte swap header */
242 
243 	if ((int)(tp->t_len = tlen - hlen) < 0)
244 	{
245 	    ip_log((struct ip *) tp, "tcp header length");
246 #ifdef HMPTRAPS
247 	    /* hmp_trap(T_TCP_HLEN, (caddr_t)0,0); */
248 #else
249 	    netlog(mp);
250 #endif
251 	    return;
252 	}
253 	tp->t_seq = ntohl(tp->t_seq);
254 	tp->t_ackno = ntohl(tp->t_ackno);
255 	tp->t_win = ntohs((u_short)tp->t_win);
256 	tp->t_urp = ntohs((u_short)tp->t_urp);
257 
258 	/* record the max fragment size */
259 
260 	t->t_maxfrag = MAX(t->t_maxfrag, fragsize);
261 
262 	/* do TCP option processing */
263 
264 	if (hlen > TCPSIZE)
265 	    tcp_opt(t, tp, hlen);
266 
267 	/* check seg seq #, do RST processing */
268 
269 	NETPREPR(t, tp, inp, i);
270 	if (i != SAME)
271 	{
272 	    /* segment failed preprocessing.  Drop it and
273 	     * possibly enter new state.  For now, always
274 	     * returns SAME/-1/CLOSED
275 	     */
276 	    m_freem(mp);
277 /*
278 	    if ((i != -1) && (i != CLOSED))
279 		t->t_state = i;
280 */
281 	}
282 	else
283 	{
284 	    if (sbspace(&inp->inp_socket->so_rcv) <= 0 &&
285 		tp->t_len != 0)
286 	    {
287 		/*
288 		 * The user's receive q is full.  Either the
289 		 * remote TCP is not paying attention to the
290 		 * window, or this is a persistence packet.
291 		 *
292 		 * The first reason was once common with
293 		 * TOPS-20.  Let's conserve network resources
294 		 * by holding onto the packet in the unack q.
295 		 * Place it at the end of the list.
296 		 */
297 		mp->m_act = NULL;
298 		if ((m = t->t_rcv_unack) != NULL)
299 		{
300 		    while (m->m_act != NULL)
301 			m = m->m_act;
302 		    m->m_act = mp;
303 		}
304 		else
305 		    t->t_rcv_unack = mp;
306 
307 		/*
308 		 * ACK if it was a window probe, just in case
309 		 * they have a TNOACT timer running.
310 		 */
311 		send_tcp(t, TCP_CTL);
312 	    }
313 	    else
314 	    {
315 		int	act, newstate;
316 		struct socket *so;
317 
318 		/* set up work entry for seg, and call
319 		   the fsm to process it */
320 
321 		hlen += sizeof(struct ip);
322 		mp->m_off += hlen;
323 		mp->m_len -= hlen;
324 
325 		/** HAND CODED action() CALL **/
326 
327 		w.w_type = INRECV;
328 		w.w_tcb = t;
329 		w.w_dat = (char *)tp;
330 
331 		/* get index of action routine from
332 		 * transition table
333 		 */
334 		act = fstab[t->t_state][INRECV];
335 
336 		/* invalid state transition, just
337 		 * print a message and ignore */
338 
339 		if (act == 0)
340 		{
341 		    log(LOG_INFO, "tcp bad state: tcb=%x state=%d INRECV\n", t, t->t_state);
342 		    m_freem(mp);
343 		    return;
344 		}
345 
346 		so = t->t_in_pcb->inp_socket;
347 		tcp_net_keep = FALSE;
348 		newstate = (*fsactab[act])(&w);
349 
350 		/* debugging info */
351 		TCP_DEBUG (so, t, &w, act, newstate);
352 
353 		/* if CLOSED, lost tcpcb */
354 		if ((newstate != SAME) && (newstate != CLOSED))
355 		    t->t_state = newstate;
356 		if (! tcp_net_keep)
357 		    m_freem(mp);
358 
359 		/** END action() **/
360 	    }
361 	}
362     }
363     else
364 	/* nobody wants it */
365 	send_uncon_rst (tp, mp, tlen, hlen);
366 }
367 
368 send_uncon_rst (n, mp, tlen, hlen)
369 register struct th	*n;
370 register struct mbuf	*mp;
371 {
372     struct in_addr tempinaddr;
373     u_short tempport;
374     int error;
375 
376     /* make sure we don't send a RST in response to an RST */
377 
378     if (n->t_flags & T_RST)
379     {
380 	m_freem(mp);
381 	return;
382     }
383 
384     /* free everything but the header */
385 
386     m_freem(mp->m_next);
387     mp->m_next = NULL;
388     mp->m_len = sizeof(struct th);
389 
390     /* form a reset from the packet and send */
391 
392     tempinaddr = n->t_d;
393     n->t_d = n->t_s;
394     n->t_s = tempinaddr;
395 
396     tempport = n->t_src;
397     n->t_src = n->t_dst;
398     n->t_dst = tempport;
399 
400     if (n->t_flags&T_ACK)
401 	n->t_seq = n->t_ackno;
402     else
403     {
404 	n->t_ackno = htonl((u_long)
405 	    ntohl((u_long)n->t_seq)
406 	    + tlen - hlen
407 	    + (n->t_flags&T_SYN ? 1 : 0));
408 	n->t_seq = 0;
409     }
410     n->t_flags	= (n->t_flags&T_ACK) ? T_RST : T_RST+T_ACK;
411     n->t_len	= htons((u_short)TCPSIZE);
412     n->t_off	= TCPSIZE >> TCP_OFFSHIFT;
413     n->t_sum	= in_cksum(mp, sizeof(struct th));
414 
415     NOPCB_IPSEND (mp, TCPSIZE, FALSE, error);
416     tcpstat.t_badsegs++;
417 
418 #ifdef lint
419     error = error;
420 #endif
421 }
422 
423 /*
424  * Entry into TCP finite state machine
425  */
426 action(wp)
427 register struct work *wp;
428 {
429     register act, newstate;
430     register struct tcpcb *tp;
431     register struct socket *so;
432 
433     tp = wp->w_tcb;
434     so = tp->t_in_pcb->inp_socket;
435 
436     ACTION (tp, so, wp, wp->w_type, wp->w_dat, act, newstate);
437     return(newstate);
438 }
439 
440 
441 struct mbuf *tcpdebuf;
442 int tcprint;
443 
444 /*
445  * Write a record in the tcp debugging log
446  */
447 tcp_debug(tp, wp, newstate)
448 register struct tcpcb *tp;
449 register struct work *wp;
450 register newstate;
451 {
452     register struct t_debug *dp;
453     register struct mbuf *m;
454 
455 #ifdef TCPDEBUG
456     if (tcprint)
457     {
458 	/*
459 	 * Print debugging info directly on the console (use this for
460 	 * intial testing only).
461 	 */
462 	printf("TCP(%x) %s X %s", tp, tcpstates[tp->t_state],
463 	    tcpinputs[wp->w_type]);
464 
465 	if (wp->w_type == ISTIMER)
466 	    printf("(%s)", tcptimers[wp->w_stype]);
467 
468 	printf(" --> %s",
469 	    tcpstates[ (newstate > 0) ? newstate : tp->t_state]);
470 
471 	if (newstate < 0)
472 	    printf(" (FAILED)\n");
473 	else
474 	    putchar('\n', 0);
475     }
476 #endif
477 
478     /*
479      * Get an mbuf to write the debugging record into.  If we don't already
480      * have one, allocate a new one.
481      */
482     if ((m = tcpdebuf) == NULL)
483     {
484 	register struct mbuf *c;
485 
486 	if ((tcpdebuf = m = m_get(M_DONTWAIT, MT_DATA)) == NULL)
487 	    return;
488 	/*
489 	 * If possible, use a cluster so that we need to wake up the
490 	 * raw listener less often and reduce likelihood he misses
491 	 * some information.
492 	 */
493 	MCLGET(c, 1);
494 	if (c)
495 	{
496 	    m->m_off = ((int) c) - ((int) m);
497 	    m->m_act = (struct mbuf *) TCDBLEN;
498 	}
499 	else
500 	    m->m_act = (struct mbuf *) TDBLEN;
501 	m->m_len = 0;
502     }
503 
504     dp = (struct t_debug *) (mtod(m, char *) + m->m_len);
505     /*
506      * Set up the debugging record.
507      */
508     dp->t_iptime	= iptime();
509     dp->t_input	= wp->w_type;
510     dp->t_timer	= wp->w_stype;
511     dp->t_newstate	= newstate;
512     if (tp != NULL)
513     {
514 	dp->t_oldstate = tp->t_state;
515 	dp->t_tcb = (*tp);	/* structure copy */
516     }
517     else
518 	dp->t_oldstate = 0;
519 
520     if (wp->w_type == INRECV)
521     {
522 	register struct th *n;
523 
524 	n = (struct th *)wp->w_dat;
525 	dp->t_hdr = (*n);	/* structure copy */
526     }
527     /*
528      * If the mbuf is full, dispatch it to a raw listener.
529      * Also flush if the connection we're debugging closes so that
530      * packet-printer/systems analyst sees final transitions.
531      */
532     m->m_len += sizeof(struct t_debug);
533     if ((m->m_len >= ((int) m->m_act)) || (newstate == CLOSED))
534     {
535 	m->m_act = 0;
536 	tcpdebuglog(m);
537 	tcpdebuf = NULL;
538     }
539 }
540