1 #ifdef	RCSIDENT
2 static char rcsident[] = "$Header: tcp_procs.c,v 1.32 85/07/31 09:34:27 walsh Exp $";
3 #endif
4 
5 #include "../h/param.h"
6 #include "../h/systm.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/syslog.h"
11 
12 #include "../net/if.h"
13 #include "../net/route.h"
14 
15 #include "../bbnnet/in.h"
16 #include "../bbnnet/in_pcb.h"
17 #include "../bbnnet/in_var.h"
18 #include "../bbnnet/net.h"
19 #include "../bbnnet/fsm.h"
20 #include "../bbnnet/tcp.h"
21 #include "../bbnnet/seq.h"
22 #include "../bbnnet/ip.h"
23 #include "../bbnnet/macros.h"
24 #ifdef HMPTRAPS
25 #include "../bbnnet/hmp_traps.h"
26 #endif
27 
28 /*
29  * TCP finite state machine procedures.
30  *
31  * Called from finite state machine action routines, these do most of the work
32  * of the protocol.  They in turn call primitive routines (in tcp_prim) to
33  * perform lower level functions.
34  */
35 
36 
37 /*
38  * This works cooperatively with t_close for freeing up data on receive/send
39  * buffers.
40  */
41 tcp_pcbdisconnect(inp)
42 struct inpcb *inp;
43 {
44     register struct tcpcb	*tp;
45 
46     if (tp = (struct tcpcb *) inp->inp_ppcb)
47     {
48 	inp->inp_ppcb = (caddr_t) NULL;
49 
50 	/*
51 	 * free all data on receive queues
52 	 */
53 	{
54 	    register struct th	*t, *next;
55 
56 	    t = tp->t_rcv_next;
57 	    while (t != (struct th *)tp)
58 	    {
59 		next = t->t_next;
60 		m_freem(dtom(t));
61 		t = next;
62 	    }
63 	}
64 	{
65 	    register struct mbuf	*m, *next;
66 
67 	    m = tp->t_rcv_unack;
68 	    while (m != NULL)
69 	    {
70 		next = m->m_act;
71 		m_freem(m);
72 		m = next;
73 	    }
74 	}
75 
76 	if (tp->oob_data)
77 	    m_freem(tp->oob_data);
78 
79 	if (tp->t_template)
80 	    m_free(dtom(tp->t_template));
81 
82 	m_free(dtom(tp));
83     }
84 }
85 
86 /*
87  * Delete TCB and free all resources used by the connection.  Called after
88  * the close protocol is complete.
89  */
90 t_close(tp, state)
91 register struct tcpcb *tp;
92 short state;
93 {
94     register struct inpcb *inp;
95 
96     /*
97      * in_pcbdetach() calls soisdisconnected(), which wakes up the
98      * process if it's sleeping.  Need only pass user error code if
99      * appropriate (like ENETRESET) and hope he'll close the file
100      * descriptor.  Don't need to clear timers since they're in the
101      * tcpcb to be deleted.
102      */
103     inp = tp->t_in_pcb;
104     if (!tp->usr_abort)
105 	inp->inp_socket->so_error = state;
106     in_pcbdetach(inp, tcp_pcbdisconnect);
107 }
108 
109 short max_ack_skipped = 1;
110 
111 /*
112  * We are in a position where, perhaps, we should send a TCP segment (packet).
113  * The important decisions are:
114  *	1)  How big a segment should we send?  This is important since most
115  *		overhead occurs at the packet level (interrupts, queueing,
116  *		header field checks...) and not at the byte level.
117  *	2)  Is it worth it to send this packet?  Are we sending enough data
118  *		or would we be better off waiting for some more to queue up?
119  *
120  * The above requirements are the point of view when called in response to
121  * a user's write request.  We are also called on packet arrival in order
122  * to send an ack (with piggy-backed data), and to respond to window openings
123  * by sending any pent up data.
124  *
125  * Send a TCP segment.  Send data from left window edge of send buffer up to
126  * window size or end (whichever is less).  Set retransmission timers.
127  *
128  * The Ford/Nagle algorithms might be thought of (if outstanding data, only
129  * send if packet would be large), but they are primarily for telnet and
130  * that doesn't go with ideas in comments down by push.  Has idea of tcp
131  * changed since RFC?
132  */
133 send_tcp(tp, ctl)
134 register struct tcpcb *tp;
135 int ctl;
136 {
137     register sequence last, wind;
138     register int snd_flags;
139     register int len;
140     struct sockbuf *sosnd;
141     int forced, error;
142     int sendalot;
143 
144     sosnd = &tp->t_in_pcb->inp_socket->so_snd;
145     sendalot = FALSE;
146     snd_flags = 0;
147     tp->snd_lst = tp->snd_nxt;
148     forced = FALSE;
149     /*
150      * Send SYN if this is first data (ISS)
151      */
152     if (SEQ_EQ(tp->snd_nxt, tp->iss))
153     {
154 	snd_flags |= T_SYN;
155 	tp->snd_lst++;
156     }
157     /*
158      * Get seq # of last datum in send buffer
159      */
160     last = tp->snd_una;
161     if (!tp->syn_acked)
162 	last++;	/* don't forget SYN */
163     last += sosnd->sb_cc;
164     /*
165      * If no data to send in buffer, just do FIN check, otherwise see
166      * how much we should send in segment.
167      */
168     if (SEQ_GEQ(tp->snd_nxt, last))
169     {
170 	/*
171 	 * should send FIN?  don't unless haven't already sent one
172 	 */
173 	if (tp->snd_fin &&
174 	    (SEQ_EQ(tp->seq_fin, tp->iss) ||
175 	    SEQ_LEQ(tp->snd_nxt, tp->seq_fin)))
176 	{
177 	    snd_flags |= T_FIN;
178 	    tp->seq_fin = tp->snd_lst++;
179 	}
180     }
181     else if (tp->syn_acked)
182     {
183 	/*
184 	 * We can't send more than we have (SYN + data represented
185 	 * by last).  Nor can we send more than the other end is
186 	 * prepared to receive (represented by the window in snd_wnd
187 	 * and wind).
188 	 *
189 	 * Only send a segment if there is something in the buffer,
190 	 * and a non-zero window has been received.
191 	 */
192 	wind = tp->snd_una + tp->snd_wnd;
193 	tp->snd_lst = SEQ_MIN(last, wind);
194 
195 	/*
196 	 * Make sure the segment is not larger than the remote end
197 	 * can handle.  Though they may advertise a 4K window, perhaps
198 	 * they can only fill it 512 bytes at a time due to some
199 	 * buffering or device driver constraint.
200 	 *
201 	 * If we're both on the local net, the maxseg is probably the
202 	 * mtu of the local network, and this will avoid some IP
203 	 * fragmentation.
204 	 *
205 	 * ">=" so that set sendalot.
206 	 */
207 	if ((len = tp->snd_lst - tp->snd_nxt) >= tp->t_maxseg)
208 	{
209 	    tp->snd_lst -= len - tp->t_maxseg;
210 	    sendalot = TRUE;
211 	}
212 
213 	/*
214 	 * If we're not on the same net or on similar nets immediately
215 	 * connected by a gateway, the negotiated maxseg may cause
216 	 * fragmentation.  Fragmentation per se is not bad, but
217 	 * tinygrams can cause problems and are inefficient.  So,
218 	 * send something that if it fragments, will produce reasonably
219 	 * sized fragments.  Avoid excessive fragmentation to reduce
220 	 * probability datagram fails to reassemble.
221 	 */
222 	if (tp->t_maxfrag)
223 	{
224 	    len = tp->t_maxfrag*3;
225 	    if ((tp->snd_lst - tp->snd_nxt) > len)
226 	    {
227 		tp->snd_lst = tp->snd_nxt + len;
228 		sendalot = TRUE;
229 	    }
230 	}
231 
232 	if (SEQ_GT(tp->snd_end, tp->snd_una) &&
233 	    SEQ_LEQ(tp->snd_end, tp->snd_lst))
234 	    /*
235 	     * There is data to send, and it should be PUSHed.
236 	     * PUSHed segments avoid the SWS algorithm since it
237 	     * might delay transmission.  PUSHed data MUST go
238 	     * out ASAP.  Note:  To avoid performance degradation,
239 	     * bulk data transfers should not have PUSH on.
240 	     */
241 	    snd_flags |= T_PUSH;
242 	else if (tp->snd_wnd > 0)
243 	{
244 	    /*
245 	     * Avoid the silly window syndrome (sending small
246 	     * packets).  Making sure the usable window is at
247 	     * least some % of the offered window ensures we're
248 	     * sending a relatively (for this connection) good
249 	     * sized segment.
250 	     *
251 	     * If sbspace(sosnd) == 0, then the user
252 	     * is blocked for send resources, and we won't be
253 	     * able to send a larger packet later, so send it now.
254 	     * (Hmm, still true?  How about the wakeup after we
255 	     * trim the acked data?)
256 	     *
257 	     *      SWS and persistence interaction
258 	     * If there is outstanding data, snd_nxt - snd_una
259 	     * will be > 0, we'll have retransmit timers running
260 	     * forcing eventual window updates.  If there is
261 	     * no outstanding data, then we'll send some and
262 	     * start up the retransmit timers.  So, any time
263 	     * we run through this segment of code instead of
264 	     * the next one, we've got some good timers running.
265 	     */
266 	    if (!tp->rexmt && !tp->ack_due && !tp->snd_fin &&
267 		!sendalot &&
268 		sbspace(sosnd) > 0 &&
269 		((100*(tp->snd_nxt-tp->snd_una))/tp->snd_wnd)
270 		> tp->sws_qff)
271 		tp->snd_lst = tp->snd_nxt;
272 	}
273 	else
274 	{
275 	    /*
276 	     * We have stuff to send, but can't since the other
277 	     * end can't handle it right now.  We start up the
278 	     * persistence timer in case their window opening
279 	     * ack is lost.  When the timer goes off, we send
280 	     * a byte to force a window update.  Wait for timer
281 	     * in order to give him a chance to deal with the
282 	     * remotely buffered data and send us an update.
283 	     * (We'll get here on acks that stop rxmit timers
284 	     * but that contain zero window since remote user
285 	     * has not picked up data yet.)
286 	     *
287 	     * If we're called due to a write() or packet arrival,
288 	     * this is how we enter the persistence state.  If
289 	     * we're called because the persist timer went off,
290 	     * the timer is restarted to keep persisting.
291 	     */
292 	    if (tp->t_timers[TPERSIST] == 0)
293 		tp->t_timers[TPERSIST] = MIN(TCP_tvMAXPERSIST,
294 					     MAX(TCP_tvMINPERSIST, tp->t_srtt*3));
295 
296 	    if (tp->force_one)
297 	    {
298 		/* persist timer went off */
299 		tp->snd_lst = tp->snd_nxt + 1;
300 		forced = TRUE;
301 	    }
302 	}
303 
304 	/* must send FIN and no more data left to send after this */
305 
306 	if (tp->snd_fin && !forced && SEQ_EQ(tp->snd_lst, last) &&
307 	    (SEQ_EQ(tp->seq_fin, tp->iss) ||
308 	    SEQ_LEQ(tp->snd_nxt, tp->seq_fin)))
309 	{
310 	    snd_flags |= T_FIN;
311 	    tp->seq_fin = tp->snd_lst++;
312 	}
313     }
314 
315     /*
316      * Now, we have determined how large a segment to send if our only
317      * purpose is to get data to the other side.  If there is something
318      * to send, do it and update timers for rexmt.
319      */
320     len = tp->snd_lst - tp->snd_nxt;
321     if (len > 0)
322     {	/* then SEQ_LT(tp->snd_nxt, tp->snd_lst) */
323 
324 	error = send_pkt (tp, snd_flags, len);
325 
326 	/*
327 	 * SEQ_LEQ(snd_nxt, t_xmt_val): if this is a retransmission
328 	 * of the round-trip-time measuring byte, then remeasure the
329 	 * round trip time.  (Keep rtt from drifting upward on lossy
330 	 * networks.)
331 	 *
332 	 * SEQ_GT(snd_una, t_xmt_val):  Measure the rtt if the last
333 	 * timed byte has been acked.
334 	 */
335 	if (tp->syn_acked && (SEQ_LEQ(tp->snd_nxt, tp->t_xmt_val) ||
336 	    SEQ_GT(tp->snd_una, tp->t_xmt_val)))
337 	{
338 	    if (tp->t_srtt != 0)
339 		tp->t_timers[TXMT] = 0;
340 	    tp->t_xmt_val = tp->snd_nxt;
341 	}
342 
343 	/*
344 	 * If the window was full, and we're just forcing a byte
345 	 * out to try to get a new window, then don't use
346 	 * retransmission timeouts.  The other side can take as
347 	 * long as it wants to process the data it's currently got.
348 	 */
349 	if (! forced)
350 	{
351 	    /*
352 	     * Set timers for retransmission.  If we already have
353 	     * some outstanding data, then don't reset timer.  Think
354 	     * of case where send one byte every 1/4 second and only
355 	     * first byte is lost.  Would need to wait until filled
356 	     * window before retransmission timer started to decrease
357 	     * and go off.
358 	     */
359 	    if (tp->t_timers[TREXMT] == 0)
360 		tp->t_timers[TREXMT] = tp->t_rxmitime;
361 
362 	    if (tp->t_timers[TREXMTTL] == 0)
363 		tp->t_timers[TREXMTTL] = tp->t_rttltimeo;
364 
365 	    /*
366 	     * and remember that next segment out begins
367 	     * further into the stream if this one got out.
368 	     */
369 	    if (! error)
370 		tp->snd_nxt = tp->snd_lst;
371 	}
372 
373 #if T_DELACK > 0
374 	t_cancel(tp, TDELACK);
375 	tp->force_ack = FALSE;
376 	tp->ack_skipped = 0;
377 #endif
378 	tp->ack_due = FALSE;
379 	tp->snd_hi = SEQ_MAX(tp->snd_lst, tp->snd_hi);
380 	if (!error)
381 	    return(TRUE);
382     }
383 
384     /*
385      * If ctl, make sure to send something so ACK gets through.  Attempt
386      * to reduce ACK traffic by delaying ACKs with no data slightly.
387      * Naive ack traffic can account for about 10% of what the receiving
388      * tcp is doing.
389      *
390      * Bidirectional connection (telnet) => ack piggy backs application's
391      * response.
392      *
393      * Unidirectional connection (ftp) => advertise large enough window
394      * so that either #skipped (tp->ack_skipped) or our estimate of what he
395      * thinks window is cause ack.  The estimate assumes most packets get
396      * through.  This also assumes that the sender buffers enough to take
397      * advantage of the estimated usable window, so we'll assume a minimum
398      * send buffer provided by his operating system.  (Remember, his OS has
399      * to buffer it until we ack it.)
400      *
401      * So, test assumes his send buffer > MINTCPBUF bytes large
402      * and his silly window algorithm cuts in at < 50% of window.
403      *
404      * Use of the fasttimeout facility is a possibility.
405      */
406     if (ctl == TCP_CTL)
407     {
408 #if T_DELACK > 0
409 	if (tp->force_ack ||
410 	    (tp->ack_skipped >= max_ack_skipped) ||
411 	    ((tp->rcv_nxt - tp->lastack) > MIN(MINTCPBUF, tp->rcv_wnd>>1)))
412 	{
413 	    (void) send_pkt(tp, 0, 0);
414 	    t_cancel(tp, TDELACK);
415 	    tp->force_ack = FALSE;
416 	    tp->ack_skipped = 0;
417 	    tp->ack_due = FALSE;
418 	    tcpstat.t_ackonly ++;
419 	}
420 	else
421 	{
422 	    tp->ack_skipped ++;
423 	    if (tp->t_timers[TDELACK] == 0)
424 		tp->t_timers[TDELACK] = T_DELACK;
425 	}
426 #else
427 	(void) send_pkt(tp, 0, 0);
428 	tp->ack_due = FALSE;
429 	tcpstat.t_ackonly ++;
430 #endif
431     }
432     return(FALSE);
433 }
434 
435 /*
436  * Process incoming ACKs.  Remove data from send queue up to acknowledgement.
437  * Also handles round-trip timer for retransmissions and acknowledgement of
438  * SYN, and clears the urgent flag if required.
439  */
440 
441 #ifdef BBNPING
442 #define BBNPING_RESET(inp, len) \
443 	if (len > 0){ \
444 		/* \
445 		 * We've sent him NEW data, perhaps by a gateway, that he \
446 		 * has successfully received.  If that's the case, then \
447 		 * we know the route works and we don't have to ping that \
448 		 * gateway. \
449 		 * \
450 		 * see check_ping() \
451 		 */ \
452 		register struct rtentry *rt; \
453  \
454 		if (rt = inp->inp_route.ro_rt) \
455 			if (rt->rt_flags & RTF_GATEWAY) \
456 				rt->irt_pings = (-1); \
457 	}
458 #else
459 #define BBNPING_RESET(x,y) /* */
460 #endif
461 
462 #ifdef MBUF_DEBUG
463 #define LENCHECK \
464 	if ((len > sosnd->sb_cc) || (len < 0)){			\
465 		printf("len %d sb_cc %d flags 0x%x state %d\n",	\
466 		   len, sosnd->sb_cc, n->t_flags, tp->t_state);	\
467 		if (len < 0)					\
468 			len = 0;				\
469 		else						\
470 			len = sosnd->sb_cc;			\
471 	}
472 #else
473 #define LENCHECK /* */
474 #endif
475 
476 #define smooth(tp) (((75*(tp)->t_timers[TXMT]) + (125*(tp)->t_srtt)) / 200)
477 
478 #define RCV_ACK(tp, n) \
479 { \
480 	register struct inpcb *inp; \
481 	register struct sockbuf *sosnd; \
482 	register len; \
483  \
484 	inp	= tp->t_in_pcb; \
485 	sosnd	= &inp->inp_socket->so_snd; \
486 	len	= n->t_ackno - tp->snd_una;  \
487  \
488 	tp->snd_una = n->t_ackno; \
489 	if (SEQ_GT(tp->snd_una, tp->snd_nxt))  \
490 		tp->snd_nxt = tp->snd_una; \
491  \
492 	/* \
493 	 * if urgent data has been acked, reset urgent flag \
494 	 */ \
495  \
496 	if (tp->snd_urg && SEQ_GEQ(tp->snd_una, tp->snd_urp)) \
497 		tp->snd_urg = FALSE; \
498  \
499 	if (tp->syn_acked) { \
500 		/* if timed message has been acknowledged, use the time to set \
501 		   the retransmission time value, exponential decay, 60/40 \
502 		   weighted average */ \
503  \
504 		if (SEQ_GEQ(tp->snd_una, tp->t_xmt_val)) {			\
505 			if (tp->t_srtt == 0)					\
506 				tp->t_srtt = tp->t_timers[TXMT];		\
507 			else							\
508 				tp->t_srtt = smooth(tp);			\
509 			tp->t_rxmitime = MIN(TCP_tvRXMAX,			\
510 					   MAX(TCP_tvRXMIN, (3*tp->t_srtt)/2)); \
511 		} \
512 	} else { \
513 		/* handle ack of opening syn (tell user) */ \
514  \
515 		if (SEQ_GT(tp->snd_una, tp->iss)) { \
516 			tp->syn_acked = TRUE; \
517 			len--;			/* ignore SYN */ \
518 			t_cancel(tp, TINIT);	/* cancel init timer */ \
519 		} \
520 	} \
521  \
522 	/* remove acknowledged data from send buff */ \
523 	if (ack_fin(tp, n)) \
524 		len --; \
525 	LENCHECK \
526 	sbdrop (sosnd, len); \
527 	BBNPING_RESET(inp, len) \
528 	sbwakeup (sosnd);	/* wakeup iff > x% of buffering avail? */ \
529  \
530 	/* handle ack of closing fin */ \
531  \
532 	if (SEQ_NEQ(tp->seq_fin, tp->iss) && SEQ_GT(tp->snd_una, tp->seq_fin)) \
533 		tp->snd_fin = FALSE; \
534 	t_cancel(tp, TREXMT);          /* cancel retransmit timer */ \
535 	t_cancel(tp, TREXMTTL);        /* cancel retransmit too long timer */ \
536 	tp->cancelled = TRUE; \
537 }
538 
539 
540 /*
541  * Process incoming segments
542  */
543 rcv_tcp(tp, n, ctl)
544 register struct tcpcb *tp;
545 register struct th *n;
546 int ctl;
547 {
548     int sentsomedata;
549 
550     tp->dropped_txt = FALSE;
551     tp->ack_due = FALSE;
552     tp->new_window = FALSE;
553     /*
554      * Process SYN
555      */
556     if (!tp->syn_rcvd && n->t_flags&T_SYN)
557     {
558 	tp->snd_wl = tp->rcv_urp = tp->irs = n->t_seq;
559 	tp->rcv_urpend	= tp->rcv_urp -1;
560 	tp->rcv_nxt	= n->t_seq + 1;
561 	tp->syn_rcvd	= TRUE;
562 	tp->ack_due	= TRUE;
563     }
564 
565     if (tp->syn_rcvd)
566     {
567 	/*
568 	 * Process ACK if data not already acked previously. (Take
569 	 * ACKed data off send queue, and reset rexmt timers).
570 	 */
571 	if (n->t_flags&T_ACK && SEQ_GT(n->t_ackno, tp->snd_una))
572 	    RCV_ACK(tp, n)
573 
574 	/*
575 	 * Check for new window.  rcv_ack did not change syn_rcvd.
576 	 */
577 	if (SEQ_GEQ(n->t_seq, tp->snd_wl))
578 	{
579 	    tp->snd_wl = n->t_seq;
580 	    tp->snd_wnd = n->t_win;
581 	    tp->new_window = TRUE;
582 	    t_cancel(tp, TPERSIST); /* cancel persist timer */
583 	}
584     }
585 
586     /*
587      * For data packets only (vs. ctl), process data and URG.
588      */
589     if (ctl == TCP_DATA)
590     {
591 	/*
592 	 * Remember how much urgent data for present_data
593 	 */
594 	if (n->t_flags & T_URG)
595 	{
596 	    /*
597 	     * if last <= urpend, then is a retransmission
598 	     * bytes [n->t_seq ... last] are urgent
599 	     */
600 	    register sequence last;
601 
602 	    last = n->t_seq + n->t_urp;
603 	    if (SEQ_GT(last, tp->rcv_urpend))
604 	    {
605 		/*
606 		 * Can only remember one contiguous region.
607 		 */
608 		if (SEQ_GT(n->t_seq, tp->rcv_urpend+1))
609 		{
610 		    struct socket *so;
611 
612 		    tp->rcv_urp = n->t_seq;
613 		    if (tp->oob_data)
614 		    {
615 			m_freem(tp->oob_data);
616 			tp->oob_data = NULL;
617 		    }
618 
619 		    so = tp->t_in_pcb->inp_socket;
620 		    so->so_oobmark = so->so_rcv.sb_cc +
621 			(tp->rcv_urp-tp->rcv_nxt);
622 		    if (so->so_oobmark == 0)
623 			so->so_state |= SS_RCVATMARK;
624 		}
625 		tp->rcv_urpend = last;
626 	    }
627 	}
628 
629 	if (n->t_len != 0)
630 	    rcv_text(tp, n);	/* accept and sequence data */
631 
632 	/*
633 	 * Delay extraction of out-of-band data until
634 	 * present_data() so don't have to worry about
635 	 * duplication...
636 	 */
637 
638 #ifdef bsd41
639 	/*
640 	 * Process PUSH, mark end of data chain.
641 	 *
642 	 * Not done in 4.2.  TCP is a byte stream, without record
643 	 * boundries, so don't have to mark for sbappend(), which
644 	 * preserves marks, and soreceive(), which terminates reads
645 	 * at marks.  Data IS pushed nevertheless since soreceive
646 	 * gives the user all that is available and returns.
647 	 */
648 	if (n->t_flags&T_PUSH && !tp->dropped_txt &&
649 	    tp->t_rcv_prev != (struct th *)tp)
650 	{
651 
652 	    /* Find last mbuf on received data chain and mark */
653 
654 	    m = dtom(tp->t_rcv_prev);
655 	    if (m != NULL)
656 	    {
657 		while (m->m_next != NULL)
658 		    m = m->m_next;
659 		m->m_act = (struct mbuf *) 1;
660 	    }
661 	}
662 #endif
663     }
664     /*
665      * Process FIN, check for duplicates and make sure all data is in.
666      */
667     if (n->t_flags&T_FIN && !tp->dropped_txt)
668     {
669 	if (tp->fin_rcvd)
670 	    tp->ack_due = TRUE;
671 	else
672 	{
673 	    /*
674 	     * Check if we really have FIN
675 	     * (rcv buf filled in, no drops)
676 	     */
677 	    register sequence last;
678 
679 	    last = firstempty(tp);
680 	    if ((tp->t_rcv_prev == (struct th *)tp &&
681 		SEQ_EQ(last, t_end(n)+1)) ||
682 		SEQ_EQ(last, t_end(tp->t_rcv_prev)+1))
683 	    {
684 		tp->fin_rcvd = TRUE;
685 		uwake(tp->t_in_pcb);
686 	    }
687 	    /*
688 	     * If FIN, then set to ACK: incr rcv_nxt, since FIN
689 	     * occupies sequence space
690 	     */
691 	    if (tp->fin_rcvd && SEQ_GEQ(tp->rcv_nxt, last))
692 	    {
693 		tp->rcv_nxt = last + 1;
694 		tp->ack_due = TRUE;
695 	    }
696 	}
697     }
698     /*
699      * If ACK required or rcv window has changed, try to send something.
700      */
701     sentsomedata = FALSE;
702     if (tp->ack_due)
703 	sentsomedata = send_tcp(tp, TCP_CTL);
704     else if (tp->new_window)
705 	sentsomedata = send_tcp(tp, TCP_DATA);
706     /*
707      * tp->cancelled => retransmit, rttl timers are now zero
708      *
709      * If didn't send any data, might not have retransmit, rttl timers
710      * running.  If we still have unACKed data and we turned off
711      * the timers above, then ensure timers are running.
712      */
713     if (!sentsomedata && is_unacked(tp) && tp->cancelled)
714     {
715 	tp->t_timers[TREXMT] = tp->t_rxmitime;
716 	tp->t_timers[TREXMTTL] = tp->t_rttltimeo;
717 	tp->cancelled = FALSE;
718     }
719 }
720 
721 #undef BBNPING_RESET
722 #undef LENCHECK
723 
724 /*
725  * Process incoming data.  Put the segments on sequencing queue in order,
726  * taking care of overlaps and duplicates.  Data is removed from sequence
727  * queue by present_data when sequence is complete (no holes at top).
728  * Drop data that falls outside buffer quota if tight for space.  Otherwise,
729  * process and recycle data held in tcp_input.
730  */
731 rcv_text(tp, t)
732 register struct tcpcb *tp;
733 register struct th *t;
734 {
735     register i;
736     register struct sockbuf *sorcv;
737     register struct mbuf *m;
738     register struct th *q;
739     struct th *p;
740     struct mbuf *n;
741     struct th *savq;
742     int j, oldkeep;
743     sequence last;
744 
745     /* throw away any data we have already received */
746 
747     if ((i = tp->rcv_nxt - t->t_seq) > 0)
748     {
749 	if (i < t->t_len)
750 	{
751 	    t->t_seq += i;
752 	    t->t_len -= i;
753 	    m_adj(dtom(t), i);
754 	}
755 	else
756 	{
757 	    tp->t_olddata++;
758 	    tp->ack_due = TRUE;	/* send ack just in case */
759 #ifdef HMPTRAPS
760 	    /* hmp_trap(T_TCP_DUP, (caddr_t)0,0); */
761 #endif
762 	    return;
763 	}
764     }
765 
766     last = t_end(t); /* last seq # in incoming seg */
767 
768     /* # buffers available to con */
769 
770     sorcv = &tp->t_in_pcb->inp_socket->so_rcv;
771     i = sbspace(sorcv);
772     if (i < 0)
773 	i = 0;
774 
775     /* enough resources to process segment? used to walk mbuf chain to
776      * count up data bytes. let's be smart and use t_len */
777 
778     j = t->t_len;
779     if (j > i)
780     {
781 
782 	/* if segment preceeds top of sequencing queue, try to take
783 	   buffers from bottom of queue */
784 
785 	q = tp->t_rcv_next;
786 	if (q != (struct th *)tp && SEQ_LT(tp->rcv_nxt, q->t_seq) &&
787 	    SEQ_LT(t->t_seq, q->t_seq))
788 
789 	    for (p = tp->t_rcv_prev; i < j && p != (struct th *)tp;)
790 	    {
791 		savq = p->t_prev;
792 		TCP_DEQ(p, tp);
793 #ifdef HMPTRAPS
794 		/* hmp_trap(T_TCP_UDROP, (caddr_t)0,0); */
795 #endif
796 		for (m = dtom(p); m != NULL; m = m_free(m))
797 		    i += m->m_len;
798 		p = savq;
799 	    }
800 
801 	/* if still not enough room, drop text from end of new segment */
802 
803 	if (j > i)
804 	{
805 
806 	    for (m = dtom(t); i > 0 && m != NULL; m = m->m_next)
807 		i -= m->m_len;
808 
809 	    while (m != NULL)
810 	    {
811 		t->t_len -= m->m_len;
812 		last -= m->m_len;
813 		m->m_len = 0;
814 		m = m->m_next;
815 	    }
816 	    tp->dropped_txt = TRUE;
817 #ifdef HMPTRAPS
818 	    /* hmp_trap(T_TCP_RDROP, (caddr_t)0,0); */
819 #endif
820 	    if (SEQ_LT(last, t->t_seq))
821 		return;
822 	}
823     }
824 
825     /* merge incoming data into the sequence queue */
826 
827     q = tp->t_rcv_next;	/* -> top of sequencing queue */
828 
829     /* skip frags which new doesn't overlap at end */
830 
831     while ((q != (struct th *)tp) && SEQ_GT(t->t_seq, t_end(q)))
832 	q = q->t_next;
833 
834     if (q == (struct th *)tp)
835     {		/* frag at end of chain */
836 
837 	if (SEQ_GEQ(last, tp->rcv_nxt))
838 	{
839 	    tcp_net_keep = TRUE;
840 	    TCP_ENQ(t, tp->t_rcv_prev, tp);
841 	}
842 
843     }
844     else
845     {
846 
847 #ifdef HMPTRAPS
848 	/* we've received an out-of-order packet: trap! */
849 
850 	/* hmp_trap(T_TCP_ORDER, (caddr_t)0,0); */
851 
852 #endif
853 	/* frag doesn't overlap any on chain */
854 
855 	if (SEQ_LT(last, q->t_seq))
856 	{
857 	    tcp_net_keep = TRUE;
858 	    TCP_ENQ(t, q->t_prev, tp);
859 
860 	    /* new overlaps beginning of next frag only */
861 
862 	}
863 	else if (SEQ_LT(last, t_end(q)))
864 	{
865 	    if ((i = last - q->t_seq + 1) < t->t_len)
866 	    {
867 		t->t_len -= i;
868 		m_adj(dtom(t), -i);
869 		tcp_net_keep = TRUE;
870 		TCP_ENQ(t, q->t_prev, tp);
871 	    }
872 
873 	    /* new overlaps end of previous frag */
874 
875 	}
876 	else
877 	{
878 	    savq = q;
879 	    if (SEQ_LEQ(t->t_seq, q->t_seq))
880 	    {  /* complete cover */
881 		savq = q->t_prev;
882 		TCP_DEQ(q, tp);
883 		m_freem(dtom(q));
884 
885 	    }
886 	    else
887 	    {						/* overlap */
888 		if ((i = t_end(q) - t->t_seq + 1) < t->t_len)
889 		{
890 		    t->t_seq += i;
891 		    t->t_len -= i;
892 		    m_adj(dtom(t), i);
893 		}
894 		else
895 		    t->t_len = 0;
896 	    }
897 
898 	    /* new overlaps at beginning of successor frags */
899 
900 	    q = savq->t_next;
901 	    while ((q != (struct th *)tp) && (t->t_len != 0) &&
902 		SEQ_LEQ(q->t_seq, last))
903 
904 		/* complete cover */
905 
906 		if (SEQ_LEQ(t_end(q), last))
907 		{
908 		    p = q->t_next;
909 		    TCP_DEQ(q, tp);
910 		    m_freem(dtom(q));
911 		    q = p;
912 		}
913 		else
914 		{		/* overlap */
915 		    if ((i = last-q->t_seq+1) < t->t_len)
916 		    {
917 			t->t_len -= i;
918 			m_adj(dtom(t), -i);
919 		    }
920 		    else
921 			t->t_len = 0;
922 		    break;
923 		}
924 
925 	    /* enqueue whatever is left of new before successors */
926 
927 	    if (t->t_len != 0)
928 	    {
929 		tcp_net_keep = TRUE;
930 		TCP_ENQ(t, savq, tp);
931 	    }
932 	}
933     }
934 
935     /* set to ack completed data (no gaps) */
936 
937     FIRSTEMPTY(tp, tp->rcv_nxt);
938     tp->ack_due = TRUE;
939 
940     /* if any room remaining in rcv buf, take any unprocessed
941        messages and schedule for later processing */
942 
943     if ((m = tp->t_rcv_unack) != NULL && (i = sbspace(sorcv)) > 0)
944     do
945     {
946 
947 	/* schedule work request */
948 
949 	t = mtod(m, struct th *);
950 	j = (t->t_off << TCP_OFFSHIFT) + sizeof(struct ip);
951 	m->m_off += j;
952 	m->m_len -= j;
953 	tp->t_rcv_unack = m->m_act;
954 	m->m_act = (struct mbuf *)0;
955 	oldkeep = tcp_net_keep;
956 	tcpstat.t_unack++;
957 	w_alloc(INRECV, 0, tp, t);
958 	tcp_net_keep = oldkeep;
959 
960 	/* remaining buffer space */
961 
962 	for (n = m; n != NULL; n = n->m_next)
963 	    i -= n->m_len;
964     }
965     while ((m = tp->t_rcv_unack) != NULL && i > 0);
966 }
967 
968 /*
969  * Send a reset segment
970  */
971 send_rst(tp, n)
972 register struct tcpcb *tp;
973 register struct th *n;
974 {
975     register struct inpcb *inp;
976     struct in_addr src, dst;
977     u_short port;
978     int temp_rst;
979 
980     /* don't send a reset in response to a reset */
981 
982     if (n->t_flags&T_RST || (inp = tp->t_in_pcb) == NULL)
983 	return;
984 
985     tp->snd_rst = TRUE;
986     temp_rst = FALSE;
987     if (n->t_flags&T_ACK)
988 	tp->snd_nxt = n->t_ackno;
989 
990     /* if reset required from "wildcard" listener, take addresses and
991        port from incoming packet */
992 
993     if (inp->inp_laddr.s_addr == 0 || inp->inp_faddr.s_addr == 0 ||
994 	inp->inp_fport == 0)
995     {
996 	src = inp->inp_laddr;
997 	dst = inp->inp_faddr;
998 	port = inp->inp_fport;
999 	inp->inp_laddr = n->t_d;
1000 	inp->inp_faddr = n->t_s;
1001 	inp->inp_fport = n->t_src;
1002 	tp->t_template = tcp_template(tp);
1003 	temp_rst = TRUE;
1004     }
1005     tp->syn_rcvd = FALSE;
1006     if (tp->t_template)
1007 	(void) send_pkt(tp, 0, 0);
1008     else
1009 	printf("send_rst: no template\n");
1010     tp->ack_due = FALSE;
1011     tp->snd_rst = FALSE;
1012 #if T_DELACK > 0
1013     tp->force_ack = FALSE;
1014     t_cancel(tp, TDELACK);
1015     tp->ack_skipped = 0;
1016 #endif
1017 
1018     /* restore "wildcard" addresses */
1019 
1020     if (temp_rst)
1021     {
1022 	inp->inp_laddr = src;
1023 	inp->inp_faddr = dst;
1024 	inp->inp_fport = port;
1025 	tp->snd_nxt = tp->iss;
1026 	if (inp->inp_route.ro_rt != NULL)
1027 	{
1028 	    rtfree(inp->inp_route.ro_rt);
1029 	    inp->inp_route.ro_rt = NULL;
1030 	}
1031 	if (tp->t_template)
1032 	{
1033 	    m_free(dtom(tp->t_template));
1034 	    tp->t_template = NULL;
1035 	}
1036     }
1037 }
1038 
1039 struct mbuf *extract_oob(tp, mp, sorcv)
1040 struct tcpcb	*tp;
1041 struct mbuf	*mp;
1042 struct sockbuf	*sorcv;
1043 {
1044     struct socket	*so;
1045     struct mbuf	*top, *here, *m;
1046     int off, len, tmp;
1047 
1048     m = mp;
1049     so = tp->t_in_pcb->inp_socket;
1050     /*
1051      * skip over bytes that preceed out of band data.
1052      */
1053     if ((off = so->so_oobmark - sorcv->sb_cc) < 0)
1054     {
1055 	log(LOG_INFO, "extract_oob:  neg off\n");
1056 	tp->rcv_urpend = tp->rcv_urp = tp->irs;
1057 	return (mp);
1058     }
1059 
1060     while (m && (off > 0))
1061     {
1062 	if (m->m_len <= off)
1063 	{
1064 	    off -= m->m_len;
1065 	    m = m->m_next;
1066 	}
1067 	else
1068 	    break;
1069     }
1070 
1071     if (!m)
1072 	return (mp);
1073 
1074     /*
1075      * copy out of band data.  removing it from input stream.
1076      */
1077     len = tp->rcv_urpend - tp->rcv_urp + 1; /* # urgent bytes */
1078     top = here = NULL;
1079     while (m && (len > 0))
1080     {
1081 	char	*p;
1082 	struct mbuf *newm;
1083 	int dropped;
1084 
1085 	tmp = MIN(m->m_len - off, len);
1086 	/* tmp == # urgent bytes in this mbuf */
1087 	len -= tmp;
1088 	tp->rcv_urp += tmp;
1089 
1090 	p = mtod(m, caddr_t) + off; /* points at first urgent byte */
1091 	dropped = FALSE;
1092 
1093 	while (tmp > 0)
1094 	{
1095 	    unsigned nbytes;
1096 
1097 	    /* in case this mbuf uses pages */
1098 	    nbytes = MIN(tmp, MLEN);
1099 
1100 	    if (! dropped)
1101 	    {
1102 		if (newm = m_get(M_WAIT, MT_DATA))
1103 		{
1104 		    bcopy (p, mtod(newm, char *), nbytes);
1105 		    newm->m_len = nbytes;
1106 
1107 		    if (!top)
1108 			top = here = newm;
1109 		    else
1110 		    {
1111 			here->m_next = newm;
1112 			here = here->m_next;
1113 		    }
1114 		}
1115 		else
1116 		    /* potential unreliability */
1117 		    dropped = TRUE;
1118 	    }
1119 
1120 	    bcopy(p+nbytes, p,  (unsigned)(m->m_len -off -nbytes));
1121 	    m->m_len -= nbytes;
1122 	    tmp -= nbytes;
1123 	}
1124 
1125 	if (m->m_len <= 0)
1126 	{
1127 	    /*
1128 	     * So soreceive never sees a zero length mbuf
1129 	     * with m_act set.  (PUSHED URGENT data packet)
1130 	     */
1131 	    if (m == mp)
1132 		mp = m = m_free(m);
1133 	    else
1134 		m = m_free(m);
1135 	}
1136 	else
1137 	    m = m->m_next;
1138 
1139 	off = 0;
1140     }
1141 
1142     if (top)
1143     {
1144 	if (tp->oob_data)
1145 	    m_cat (tp->oob_data, top);
1146 	else
1147 	    tp->oob_data = top;
1148 	sohasoutofband(so);
1149     }
1150 
1151     return (mp);
1152 }
1153 
1154 /*
1155  * Accept data for the user to receive.  Moves data from sequenced tcp
1156  * segments from the sequencing queue to the user's receive queue (in the
1157  * ucb).  Observes locking on receive queue.
1158  */
1159 present_data(tp)
1160 register struct tcpcb *tp;
1161 {
1162     PRESENT_DATA(tp)
1163 }
1164