1 #ifdef RCSIDENT
2 static char rcsident[] = "$Header: tcp_procs.c,v 1.32 85/07/31 09:34:27 walsh Exp $";
3 #endif
4
5 #include "../h/param.h"
6 #include "../h/systm.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/syslog.h"
11
12 #include "../net/if.h"
13 #include "../net/route.h"
14
15 #include "../bbnnet/in.h"
16 #include "../bbnnet/in_pcb.h"
17 #include "../bbnnet/in_var.h"
18 #include "../bbnnet/net.h"
19 #include "../bbnnet/fsm.h"
20 #include "../bbnnet/tcp.h"
21 #include "../bbnnet/seq.h"
22 #include "../bbnnet/ip.h"
23 #include "../bbnnet/macros.h"
24 #ifdef HMPTRAPS
25 #include "../bbnnet/hmp_traps.h"
26 #endif
27
28 /*
29 * TCP finite state machine procedures.
30 *
31 * Called from finite state machine action routines, these do most of the work
32 * of the protocol. They in turn call primitive routines (in tcp_prim) to
33 * perform lower level functions.
34 */
35
36
37 /*
38 * This works cooperatively with t_close for freeing up data on receive/send
39 * buffers.
40 */
41 tcp_pcbdisconnect(inp)
42 struct inpcb *inp;
43 {
44 register struct tcpcb *tp;
45
46 if (tp = (struct tcpcb *) inp->inp_ppcb)
47 {
48 inp->inp_ppcb = (caddr_t) NULL;
49
50 /*
51 * free all data on receive queues
52 */
53 {
54 register struct th *t, *next;
55
56 t = tp->t_rcv_next;
57 while (t != (struct th *)tp)
58 {
59 next = t->t_next;
60 m_freem(dtom(t));
61 t = next;
62 }
63 }
64 {
65 register struct mbuf *m, *next;
66
67 m = tp->t_rcv_unack;
68 while (m != NULL)
69 {
70 next = m->m_act;
71 m_freem(m);
72 m = next;
73 }
74 }
75
76 if (tp->oob_data)
77 m_freem(tp->oob_data);
78
79 if (tp->t_template)
80 m_free(dtom(tp->t_template));
81
82 m_free(dtom(tp));
83 }
84 }
85
86 /*
87 * Delete TCB and free all resources used by the connection. Called after
88 * the close protocol is complete.
89 */
t_close(tp,state)90 t_close(tp, state)
91 register struct tcpcb *tp;
92 short state;
93 {
94 register struct inpcb *inp;
95
96 /*
97 * in_pcbdetach() calls soisdisconnected(), which wakes up the
98 * process if it's sleeping. Need only pass user error code if
99 * appropriate (like ENETRESET) and hope he'll close the file
100 * descriptor. Don't need to clear timers since they're in the
101 * tcpcb to be deleted.
102 */
103 inp = tp->t_in_pcb;
104 if (!tp->usr_abort)
105 inp->inp_socket->so_error = state;
106 in_pcbdetach(inp, tcp_pcbdisconnect);
107 }
108
109 short max_ack_skipped = 1;
110
111 /*
112 * We are in a position where, perhaps, we should send a TCP segment (packet).
113 * The important decisions are:
114 * 1) How big a segment should we send? This is important since most
115 * overhead occurs at the packet level (interrupts, queueing,
116 * header field checks...) and not at the byte level.
117 * 2) Is it worth it to send this packet? Are we sending enough data
118 * or would we be better off waiting for some more to queue up?
119 *
120 * The above requirements are the point of view when called in response to
121 * a user's write request. We are also called on packet arrival in order
122 * to send an ack (with piggy-backed data), and to respond to window openings
123 * by sending any pent up data.
124 *
125 * Send a TCP segment. Send data from left window edge of send buffer up to
126 * window size or end (whichever is less). Set retransmission timers.
127 *
128 * The Ford/Nagle algorithms might be thought of (if outstanding data, only
129 * send if packet would be large), but they are primarily for telnet and
130 * that doesn't go with ideas in comments down by push. Has idea of tcp
131 * changed since RFC?
132 */
send_tcp(tp,ctl)133 send_tcp(tp, ctl)
134 register struct tcpcb *tp;
135 int ctl;
136 {
137 register sequence last, wind;
138 register int snd_flags;
139 register int len;
140 struct sockbuf *sosnd;
141 int forced, error;
142 int sendalot;
143
144 sosnd = &tp->t_in_pcb->inp_socket->so_snd;
145 sendalot = FALSE;
146 snd_flags = 0;
147 tp->snd_lst = tp->snd_nxt;
148 forced = FALSE;
149 /*
150 * Send SYN if this is first data (ISS)
151 */
152 if (SEQ_EQ(tp->snd_nxt, tp->iss))
153 {
154 snd_flags |= T_SYN;
155 tp->snd_lst++;
156 }
157 /*
158 * Get seq # of last datum in send buffer
159 */
160 last = tp->snd_una;
161 if (!tp->syn_acked)
162 last++; /* don't forget SYN */
163 last += sosnd->sb_cc;
164 /*
165 * If no data to send in buffer, just do FIN check, otherwise see
166 * how much we should send in segment.
167 */
168 if (SEQ_GEQ(tp->snd_nxt, last))
169 {
170 /*
171 * should send FIN? don't unless haven't already sent one
172 */
173 if (tp->snd_fin &&
174 (SEQ_EQ(tp->seq_fin, tp->iss) ||
175 SEQ_LEQ(tp->snd_nxt, tp->seq_fin)))
176 {
177 snd_flags |= T_FIN;
178 tp->seq_fin = tp->snd_lst++;
179 }
180 }
181 else if (tp->syn_acked)
182 {
183 /*
184 * We can't send more than we have (SYN + data represented
185 * by last). Nor can we send more than the other end is
186 * prepared to receive (represented by the window in snd_wnd
187 * and wind).
188 *
189 * Only send a segment if there is something in the buffer,
190 * and a non-zero window has been received.
191 */
192 wind = tp->snd_una + tp->snd_wnd;
193 tp->snd_lst = SEQ_MIN(last, wind);
194
195 /*
196 * Make sure the segment is not larger than the remote end
197 * can handle. Though they may advertise a 4K window, perhaps
198 * they can only fill it 512 bytes at a time due to some
199 * buffering or device driver constraint.
200 *
201 * If we're both on the local net, the maxseg is probably the
202 * mtu of the local network, and this will avoid some IP
203 * fragmentation.
204 *
205 * ">=" so that set sendalot.
206 */
207 if ((len = tp->snd_lst - tp->snd_nxt) >= tp->t_maxseg)
208 {
209 tp->snd_lst -= len - tp->t_maxseg;
210 sendalot = TRUE;
211 }
212
213 /*
214 * If we're not on the same net or on similar nets immediately
215 * connected by a gateway, the negotiated maxseg may cause
216 * fragmentation. Fragmentation per se is not bad, but
217 * tinygrams can cause problems and are inefficient. So,
218 * send something that if it fragments, will produce reasonably
219 * sized fragments. Avoid excessive fragmentation to reduce
220 * probability datagram fails to reassemble.
221 */
222 if (tp->t_maxfrag)
223 {
224 len = tp->t_maxfrag*3;
225 if ((tp->snd_lst - tp->snd_nxt) > len)
226 {
227 tp->snd_lst = tp->snd_nxt + len;
228 sendalot = TRUE;
229 }
230 }
231
232 if (SEQ_GT(tp->snd_end, tp->snd_una) &&
233 SEQ_LEQ(tp->snd_end, tp->snd_lst))
234 /*
235 * There is data to send, and it should be PUSHed.
236 * PUSHed segments avoid the SWS algorithm since it
237 * might delay transmission. PUSHed data MUST go
238 * out ASAP. Note: To avoid performance degradation,
239 * bulk data transfers should not have PUSH on.
240 */
241 snd_flags |= T_PUSH;
242 else if (tp->snd_wnd > 0)
243 {
244 /*
245 * Avoid the silly window syndrome (sending small
246 * packets). Making sure the usable window is at
247 * least some % of the offered window ensures we're
248 * sending a relatively (for this connection) good
249 * sized segment.
250 *
251 * If sbspace(sosnd) == 0, then the user
252 * is blocked for send resources, and we won't be
253 * able to send a larger packet later, so send it now.
254 * (Hmm, still true? How about the wakeup after we
255 * trim the acked data?)
256 *
257 * SWS and persistence interaction
258 * If there is outstanding data, snd_nxt - snd_una
259 * will be > 0, we'll have retransmit timers running
260 * forcing eventual window updates. If there is
261 * no outstanding data, then we'll send some and
262 * start up the retransmit timers. So, any time
263 * we run through this segment of code instead of
264 * the next one, we've got some good timers running.
265 */
266 if (!tp->rexmt && !tp->ack_due && !tp->snd_fin &&
267 !sendalot &&
268 sbspace(sosnd) > 0 &&
269 ((100*(tp->snd_nxt-tp->snd_una))/tp->snd_wnd)
270 > tp->sws_qff)
271 tp->snd_lst = tp->snd_nxt;
272 }
273 else
274 {
275 /*
276 * We have stuff to send, but can't since the other
277 * end can't handle it right now. We start up the
278 * persistence timer in case their window opening
279 * ack is lost. When the timer goes off, we send
280 * a byte to force a window update. Wait for timer
281 * in order to give him a chance to deal with the
282 * remotely buffered data and send us an update.
283 * (We'll get here on acks that stop rxmit timers
284 * but that contain zero window since remote user
285 * has not picked up data yet.)
286 *
287 * If we're called due to a write() or packet arrival,
288 * this is how we enter the persistence state. If
289 * we're called because the persist timer went off,
290 * the timer is restarted to keep persisting.
291 */
292 if (tp->t_timers[TPERSIST] == 0)
293 tp->t_timers[TPERSIST] = MIN(TCP_tvMAXPERSIST,
294 MAX(TCP_tvMINPERSIST, tp->t_srtt*3));
295
296 if (tp->force_one)
297 {
298 /* persist timer went off */
299 tp->snd_lst = tp->snd_nxt + 1;
300 forced = TRUE;
301 }
302 }
303
304 /* must send FIN and no more data left to send after this */
305
306 if (tp->snd_fin && !forced && SEQ_EQ(tp->snd_lst, last) &&
307 (SEQ_EQ(tp->seq_fin, tp->iss) ||
308 SEQ_LEQ(tp->snd_nxt, tp->seq_fin)))
309 {
310 snd_flags |= T_FIN;
311 tp->seq_fin = tp->snd_lst++;
312 }
313 }
314
315 /*
316 * Now, we have determined how large a segment to send if our only
317 * purpose is to get data to the other side. If there is something
318 * to send, do it and update timers for rexmt.
319 */
320 len = tp->snd_lst - tp->snd_nxt;
321 if (len > 0)
322 { /* then SEQ_LT(tp->snd_nxt, tp->snd_lst) */
323
324 error = send_pkt (tp, snd_flags, len);
325
326 /*
327 * SEQ_LEQ(snd_nxt, t_xmt_val): if this is a retransmission
328 * of the round-trip-time measuring byte, then remeasure the
329 * round trip time. (Keep rtt from drifting upward on lossy
330 * networks.)
331 *
332 * SEQ_GT(snd_una, t_xmt_val): Measure the rtt if the last
333 * timed byte has been acked.
334 */
335 if (tp->syn_acked && (SEQ_LEQ(tp->snd_nxt, tp->t_xmt_val) ||
336 SEQ_GT(tp->snd_una, tp->t_xmt_val)))
337 {
338 if (tp->t_srtt != 0)
339 tp->t_timers[TXMT] = 0;
340 tp->t_xmt_val = tp->snd_nxt;
341 }
342
343 /*
344 * If the window was full, and we're just forcing a byte
345 * out to try to get a new window, then don't use
346 * retransmission timeouts. The other side can take as
347 * long as it wants to process the data it's currently got.
348 */
349 if (! forced)
350 {
351 /*
352 * Set timers for retransmission. If we already have
353 * some outstanding data, then don't reset timer. Think
354 * of case where send one byte every 1/4 second and only
355 * first byte is lost. Would need to wait until filled
356 * window before retransmission timer started to decrease
357 * and go off.
358 */
359 if (tp->t_timers[TREXMT] == 0)
360 tp->t_timers[TREXMT] = tp->t_rxmitime;
361
362 if (tp->t_timers[TREXMTTL] == 0)
363 tp->t_timers[TREXMTTL] = tp->t_rttltimeo;
364
365 /*
366 * and remember that next segment out begins
367 * further into the stream if this one got out.
368 */
369 if (! error)
370 tp->snd_nxt = tp->snd_lst;
371 }
372
373 #if T_DELACK > 0
374 t_cancel(tp, TDELACK);
375 tp->force_ack = FALSE;
376 tp->ack_skipped = 0;
377 #endif
378 tp->ack_due = FALSE;
379 tp->snd_hi = SEQ_MAX(tp->snd_lst, tp->snd_hi);
380 if (!error)
381 return(TRUE);
382 }
383
384 /*
385 * If ctl, make sure to send something so ACK gets through. Attempt
386 * to reduce ACK traffic by delaying ACKs with no data slightly.
387 * Naive ack traffic can account for about 10% of what the receiving
388 * tcp is doing.
389 *
390 * Bidirectional connection (telnet) => ack piggy backs application's
391 * response.
392 *
393 * Unidirectional connection (ftp) => advertise large enough window
394 * so that either #skipped (tp->ack_skipped) or our estimate of what he
395 * thinks window is cause ack. The estimate assumes most packets get
396 * through. This also assumes that the sender buffers enough to take
397 * advantage of the estimated usable window, so we'll assume a minimum
398 * send buffer provided by his operating system. (Remember, his OS has
399 * to buffer it until we ack it.)
400 *
401 * So, test assumes his send buffer > MINTCPBUF bytes large
402 * and his silly window algorithm cuts in at < 50% of window.
403 *
404 * Use of the fasttimeout facility is a possibility.
405 */
406 if (ctl == TCP_CTL)
407 {
408 #if T_DELACK > 0
409 if (tp->force_ack ||
410 (tp->ack_skipped >= max_ack_skipped) ||
411 ((tp->rcv_nxt - tp->lastack) > MIN(MINTCPBUF, tp->rcv_wnd>>1)))
412 {
413 (void) send_pkt(tp, 0, 0);
414 t_cancel(tp, TDELACK);
415 tp->force_ack = FALSE;
416 tp->ack_skipped = 0;
417 tp->ack_due = FALSE;
418 tcpstat.t_ackonly ++;
419 }
420 else
421 {
422 tp->ack_skipped ++;
423 if (tp->t_timers[TDELACK] == 0)
424 tp->t_timers[TDELACK] = T_DELACK;
425 }
426 #else
427 (void) send_pkt(tp, 0, 0);
428 tp->ack_due = FALSE;
429 tcpstat.t_ackonly ++;
430 #endif
431 }
432 return(FALSE);
433 }
434
435 /*
436 * Process incoming ACKs. Remove data from send queue up to acknowledgement.
437 * Also handles round-trip timer for retransmissions and acknowledgement of
438 * SYN, and clears the urgent flag if required.
439 */
440
441 #ifdef BBNPING
442 #define BBNPING_RESET(inp, len) \
443 if (len > 0){ \
444 /* \
445 * We've sent him NEW data, perhaps by a gateway, that he \
446 * has successfully received. If that's the case, then \
447 * we know the route works and we don't have to ping that \
448 * gateway. \
449 * \
450 * see check_ping() \
451 */ \
452 register struct rtentry *rt; \
453 \
454 if (rt = inp->inp_route.ro_rt) \
455 if (rt->rt_flags & RTF_GATEWAY) \
456 rt->irt_pings = (-1); \
457 }
458 #else
459 #define BBNPING_RESET(x,y) /* */
460 #endif
461
462 #ifdef MBUF_DEBUG
463 #define LENCHECK \
464 if ((len > sosnd->sb_cc) || (len < 0)){ \
465 printf("len %d sb_cc %d flags 0x%x state %d\n", \
466 len, sosnd->sb_cc, n->t_flags, tp->t_state); \
467 if (len < 0) \
468 len = 0; \
469 else \
470 len = sosnd->sb_cc; \
471 }
472 #else
473 #define LENCHECK /* */
474 #endif
475
476 #define smooth(tp) (((75*(tp)->t_timers[TXMT]) + (125*(tp)->t_srtt)) / 200)
477
478 #define RCV_ACK(tp, n) \
479 { \
480 register struct inpcb *inp; \
481 register struct sockbuf *sosnd; \
482 register len; \
483 \
484 inp = tp->t_in_pcb; \
485 sosnd = &inp->inp_socket->so_snd; \
486 len = n->t_ackno - tp->snd_una; \
487 \
488 tp->snd_una = n->t_ackno; \
489 if (SEQ_GT(tp->snd_una, tp->snd_nxt)) \
490 tp->snd_nxt = tp->snd_una; \
491 \
492 /* \
493 * if urgent data has been acked, reset urgent flag \
494 */ \
495 \
496 if (tp->snd_urg && SEQ_GEQ(tp->snd_una, tp->snd_urp)) \
497 tp->snd_urg = FALSE; \
498 \
499 if (tp->syn_acked) { \
500 /* if timed message has been acknowledged, use the time to set \
501 the retransmission time value, exponential decay, 60/40 \
502 weighted average */ \
503 \
504 if (SEQ_GEQ(tp->snd_una, tp->t_xmt_val)) { \
505 if (tp->t_srtt == 0) \
506 tp->t_srtt = tp->t_timers[TXMT]; \
507 else \
508 tp->t_srtt = smooth(tp); \
509 tp->t_rxmitime = MIN(TCP_tvRXMAX, \
510 MAX(TCP_tvRXMIN, (3*tp->t_srtt)/2)); \
511 } \
512 } else { \
513 /* handle ack of opening syn (tell user) */ \
514 \
515 if (SEQ_GT(tp->snd_una, tp->iss)) { \
516 tp->syn_acked = TRUE; \
517 len--; /* ignore SYN */ \
518 t_cancel(tp, TINIT); /* cancel init timer */ \
519 } \
520 } \
521 \
522 /* remove acknowledged data from send buff */ \
523 if (ack_fin(tp, n)) \
524 len --; \
525 LENCHECK \
526 sbdrop (sosnd, len); \
527 BBNPING_RESET(inp, len) \
528 sbwakeup (sosnd); /* wakeup iff > x% of buffering avail? */ \
529 \
530 /* handle ack of closing fin */ \
531 \
532 if (SEQ_NEQ(tp->seq_fin, tp->iss) && SEQ_GT(tp->snd_una, tp->seq_fin)) \
533 tp->snd_fin = FALSE; \
534 t_cancel(tp, TREXMT); /* cancel retransmit timer */ \
535 t_cancel(tp, TREXMTTL); /* cancel retransmit too long timer */ \
536 tp->cancelled = TRUE; \
537 }
538
539
540 /*
541 * Process incoming segments
542 */
rcv_tcp(tp,n,ctl)543 rcv_tcp(tp, n, ctl)
544 register struct tcpcb *tp;
545 register struct th *n;
546 int ctl;
547 {
548 int sentsomedata;
549
550 tp->dropped_txt = FALSE;
551 tp->ack_due = FALSE;
552 tp->new_window = FALSE;
553 /*
554 * Process SYN
555 */
556 if (!tp->syn_rcvd && n->t_flags&T_SYN)
557 {
558 tp->snd_wl = tp->rcv_urp = tp->irs = n->t_seq;
559 tp->rcv_urpend = tp->rcv_urp -1;
560 tp->rcv_nxt = n->t_seq + 1;
561 tp->syn_rcvd = TRUE;
562 tp->ack_due = TRUE;
563 }
564
565 if (tp->syn_rcvd)
566 {
567 /*
568 * Process ACK if data not already acked previously. (Take
569 * ACKed data off send queue, and reset rexmt timers).
570 */
571 if (n->t_flags&T_ACK && SEQ_GT(n->t_ackno, tp->snd_una))
572 RCV_ACK(tp, n)
573
574 /*
575 * Check for new window. rcv_ack did not change syn_rcvd.
576 */
577 if (SEQ_GEQ(n->t_seq, tp->snd_wl))
578 {
579 tp->snd_wl = n->t_seq;
580 tp->snd_wnd = n->t_win;
581 tp->new_window = TRUE;
582 t_cancel(tp, TPERSIST); /* cancel persist timer */
583 }
584 }
585
586 /*
587 * For data packets only (vs. ctl), process data and URG.
588 */
589 if (ctl == TCP_DATA)
590 {
591 /*
592 * Remember how much urgent data for present_data
593 */
594 if (n->t_flags & T_URG)
595 {
596 /*
597 * if last <= urpend, then is a retransmission
598 * bytes [n->t_seq ... last] are urgent
599 */
600 register sequence last;
601
602 last = n->t_seq + n->t_urp;
603 if (SEQ_GT(last, tp->rcv_urpend))
604 {
605 /*
606 * Can only remember one contiguous region.
607 */
608 if (SEQ_GT(n->t_seq, tp->rcv_urpend+1))
609 {
610 struct socket *so;
611
612 tp->rcv_urp = n->t_seq;
613 if (tp->oob_data)
614 {
615 m_freem(tp->oob_data);
616 tp->oob_data = NULL;
617 }
618
619 so = tp->t_in_pcb->inp_socket;
620 so->so_oobmark = so->so_rcv.sb_cc +
621 (tp->rcv_urp-tp->rcv_nxt);
622 if (so->so_oobmark == 0)
623 so->so_state |= SS_RCVATMARK;
624 }
625 tp->rcv_urpend = last;
626 }
627 }
628
629 if (n->t_len != 0)
630 rcv_text(tp, n); /* accept and sequence data */
631
632 /*
633 * Delay extraction of out-of-band data until
634 * present_data() so don't have to worry about
635 * duplication...
636 */
637
638 #ifdef bsd41
639 /*
640 * Process PUSH, mark end of data chain.
641 *
642 * Not done in 4.2. TCP is a byte stream, without record
643 * boundries, so don't have to mark for sbappend(), which
644 * preserves marks, and soreceive(), which terminates reads
645 * at marks. Data IS pushed nevertheless since soreceive
646 * gives the user all that is available and returns.
647 */
648 if (n->t_flags&T_PUSH && !tp->dropped_txt &&
649 tp->t_rcv_prev != (struct th *)tp)
650 {
651
652 /* Find last mbuf on received data chain and mark */
653
654 m = dtom(tp->t_rcv_prev);
655 if (m != NULL)
656 {
657 while (m->m_next != NULL)
658 m = m->m_next;
659 m->m_act = (struct mbuf *) 1;
660 }
661 }
662 #endif
663 }
664 /*
665 * Process FIN, check for duplicates and make sure all data is in.
666 */
667 if (n->t_flags&T_FIN && !tp->dropped_txt)
668 {
669 if (tp->fin_rcvd)
670 tp->ack_due = TRUE;
671 else
672 {
673 /*
674 * Check if we really have FIN
675 * (rcv buf filled in, no drops)
676 */
677 register sequence last;
678
679 last = firstempty(tp);
680 if ((tp->t_rcv_prev == (struct th *)tp &&
681 SEQ_EQ(last, t_end(n)+1)) ||
682 SEQ_EQ(last, t_end(tp->t_rcv_prev)+1))
683 {
684 tp->fin_rcvd = TRUE;
685 uwake(tp->t_in_pcb);
686 }
687 /*
688 * If FIN, then set to ACK: incr rcv_nxt, since FIN
689 * occupies sequence space
690 */
691 if (tp->fin_rcvd && SEQ_GEQ(tp->rcv_nxt, last))
692 {
693 tp->rcv_nxt = last + 1;
694 tp->ack_due = TRUE;
695 }
696 }
697 }
698 /*
699 * If ACK required or rcv window has changed, try to send something.
700 */
701 sentsomedata = FALSE;
702 if (tp->ack_due)
703 sentsomedata = send_tcp(tp, TCP_CTL);
704 else if (tp->new_window)
705 sentsomedata = send_tcp(tp, TCP_DATA);
706 /*
707 * tp->cancelled => retransmit, rttl timers are now zero
708 *
709 * If didn't send any data, might not have retransmit, rttl timers
710 * running. If we still have unACKed data and we turned off
711 * the timers above, then ensure timers are running.
712 */
713 if (!sentsomedata && is_unacked(tp) && tp->cancelled)
714 {
715 tp->t_timers[TREXMT] = tp->t_rxmitime;
716 tp->t_timers[TREXMTTL] = tp->t_rttltimeo;
717 tp->cancelled = FALSE;
718 }
719 }
720
721 #undef BBNPING_RESET
722 #undef LENCHECK
723
724 /*
725 * Process incoming data. Put the segments on sequencing queue in order,
726 * taking care of overlaps and duplicates. Data is removed from sequence
727 * queue by present_data when sequence is complete (no holes at top).
728 * Drop data that falls outside buffer quota if tight for space. Otherwise,
729 * process and recycle data held in tcp_input.
730 */
rcv_text(tp,t)731 rcv_text(tp, t)
732 register struct tcpcb *tp;
733 register struct th *t;
734 {
735 register i;
736 register struct sockbuf *sorcv;
737 register struct mbuf *m;
738 register struct th *q;
739 struct th *p;
740 struct mbuf *n;
741 struct th *savq;
742 int j, oldkeep;
743 sequence last;
744
745 /* throw away any data we have already received */
746
747 if ((i = tp->rcv_nxt - t->t_seq) > 0)
748 {
749 if (i < t->t_len)
750 {
751 t->t_seq += i;
752 t->t_len -= i;
753 m_adj(dtom(t), i);
754 }
755 else
756 {
757 tp->t_olddata++;
758 tp->ack_due = TRUE; /* send ack just in case */
759 #ifdef HMPTRAPS
760 /* hmp_trap(T_TCP_DUP, (caddr_t)0,0); */
761 #endif
762 return;
763 }
764 }
765
766 last = t_end(t); /* last seq # in incoming seg */
767
768 /* # buffers available to con */
769
770 sorcv = &tp->t_in_pcb->inp_socket->so_rcv;
771 i = sbspace(sorcv);
772 if (i < 0)
773 i = 0;
774
775 /* enough resources to process segment? used to walk mbuf chain to
776 * count up data bytes. let's be smart and use t_len */
777
778 j = t->t_len;
779 if (j > i)
780 {
781
782 /* if segment preceeds top of sequencing queue, try to take
783 buffers from bottom of queue */
784
785 q = tp->t_rcv_next;
786 if (q != (struct th *)tp && SEQ_LT(tp->rcv_nxt, q->t_seq) &&
787 SEQ_LT(t->t_seq, q->t_seq))
788
789 for (p = tp->t_rcv_prev; i < j && p != (struct th *)tp;)
790 {
791 savq = p->t_prev;
792 TCP_DEQ(p, tp);
793 #ifdef HMPTRAPS
794 /* hmp_trap(T_TCP_UDROP, (caddr_t)0,0); */
795 #endif
796 for (m = dtom(p); m != NULL; m = m_free(m))
797 i += m->m_len;
798 p = savq;
799 }
800
801 /* if still not enough room, drop text from end of new segment */
802
803 if (j > i)
804 {
805
806 for (m = dtom(t); i > 0 && m != NULL; m = m->m_next)
807 i -= m->m_len;
808
809 while (m != NULL)
810 {
811 t->t_len -= m->m_len;
812 last -= m->m_len;
813 m->m_len = 0;
814 m = m->m_next;
815 }
816 tp->dropped_txt = TRUE;
817 #ifdef HMPTRAPS
818 /* hmp_trap(T_TCP_RDROP, (caddr_t)0,0); */
819 #endif
820 if (SEQ_LT(last, t->t_seq))
821 return;
822 }
823 }
824
825 /* merge incoming data into the sequence queue */
826
827 q = tp->t_rcv_next; /* -> top of sequencing queue */
828
829 /* skip frags which new doesn't overlap at end */
830
831 while ((q != (struct th *)tp) && SEQ_GT(t->t_seq, t_end(q)))
832 q = q->t_next;
833
834 if (q == (struct th *)tp)
835 { /* frag at end of chain */
836
837 if (SEQ_GEQ(last, tp->rcv_nxt))
838 {
839 tcp_net_keep = TRUE;
840 TCP_ENQ(t, tp->t_rcv_prev, tp);
841 }
842
843 }
844 else
845 {
846
847 #ifdef HMPTRAPS
848 /* we've received an out-of-order packet: trap! */
849
850 /* hmp_trap(T_TCP_ORDER, (caddr_t)0,0); */
851
852 #endif
853 /* frag doesn't overlap any on chain */
854
855 if (SEQ_LT(last, q->t_seq))
856 {
857 tcp_net_keep = TRUE;
858 TCP_ENQ(t, q->t_prev, tp);
859
860 /* new overlaps beginning of next frag only */
861
862 }
863 else if (SEQ_LT(last, t_end(q)))
864 {
865 if ((i = last - q->t_seq + 1) < t->t_len)
866 {
867 t->t_len -= i;
868 m_adj(dtom(t), -i);
869 tcp_net_keep = TRUE;
870 TCP_ENQ(t, q->t_prev, tp);
871 }
872
873 /* new overlaps end of previous frag */
874
875 }
876 else
877 {
878 savq = q;
879 if (SEQ_LEQ(t->t_seq, q->t_seq))
880 { /* complete cover */
881 savq = q->t_prev;
882 TCP_DEQ(q, tp);
883 m_freem(dtom(q));
884
885 }
886 else
887 { /* overlap */
888 if ((i = t_end(q) - t->t_seq + 1) < t->t_len)
889 {
890 t->t_seq += i;
891 t->t_len -= i;
892 m_adj(dtom(t), i);
893 }
894 else
895 t->t_len = 0;
896 }
897
898 /* new overlaps at beginning of successor frags */
899
900 q = savq->t_next;
901 while ((q != (struct th *)tp) && (t->t_len != 0) &&
902 SEQ_LEQ(q->t_seq, last))
903
904 /* complete cover */
905
906 if (SEQ_LEQ(t_end(q), last))
907 {
908 p = q->t_next;
909 TCP_DEQ(q, tp);
910 m_freem(dtom(q));
911 q = p;
912 }
913 else
914 { /* overlap */
915 if ((i = last-q->t_seq+1) < t->t_len)
916 {
917 t->t_len -= i;
918 m_adj(dtom(t), -i);
919 }
920 else
921 t->t_len = 0;
922 break;
923 }
924
925 /* enqueue whatever is left of new before successors */
926
927 if (t->t_len != 0)
928 {
929 tcp_net_keep = TRUE;
930 TCP_ENQ(t, savq, tp);
931 }
932 }
933 }
934
935 /* set to ack completed data (no gaps) */
936
937 FIRSTEMPTY(tp, tp->rcv_nxt);
938 tp->ack_due = TRUE;
939
940 /* if any room remaining in rcv buf, take any unprocessed
941 messages and schedule for later processing */
942
943 if ((m = tp->t_rcv_unack) != NULL && (i = sbspace(sorcv)) > 0)
944 do
945 {
946
947 /* schedule work request */
948
949 t = mtod(m, struct th *);
950 j = (t->t_off << TCP_OFFSHIFT) + sizeof(struct ip);
951 m->m_off += j;
952 m->m_len -= j;
953 tp->t_rcv_unack = m->m_act;
954 m->m_act = (struct mbuf *)0;
955 oldkeep = tcp_net_keep;
956 tcpstat.t_unack++;
957 w_alloc(INRECV, 0, tp, t);
958 tcp_net_keep = oldkeep;
959
960 /* remaining buffer space */
961
962 for (n = m; n != NULL; n = n->m_next)
963 i -= n->m_len;
964 }
965 while ((m = tp->t_rcv_unack) != NULL && i > 0);
966 }
967
968 /*
969 * Send a reset segment
970 */
send_rst(tp,n)971 send_rst(tp, n)
972 register struct tcpcb *tp;
973 register struct th *n;
974 {
975 register struct inpcb *inp;
976 struct in_addr src, dst;
977 u_short port;
978 int temp_rst;
979
980 /* don't send a reset in response to a reset */
981
982 if (n->t_flags&T_RST || (inp = tp->t_in_pcb) == NULL)
983 return;
984
985 tp->snd_rst = TRUE;
986 temp_rst = FALSE;
987 if (n->t_flags&T_ACK)
988 tp->snd_nxt = n->t_ackno;
989
990 /* if reset required from "wildcard" listener, take addresses and
991 port from incoming packet */
992
993 if (inp->inp_laddr.s_addr == 0 || inp->inp_faddr.s_addr == 0 ||
994 inp->inp_fport == 0)
995 {
996 src = inp->inp_laddr;
997 dst = inp->inp_faddr;
998 port = inp->inp_fport;
999 inp->inp_laddr = n->t_d;
1000 inp->inp_faddr = n->t_s;
1001 inp->inp_fport = n->t_src;
1002 tp->t_template = tcp_template(tp);
1003 temp_rst = TRUE;
1004 }
1005 tp->syn_rcvd = FALSE;
1006 if (tp->t_template)
1007 (void) send_pkt(tp, 0, 0);
1008 else
1009 printf("send_rst: no template\n");
1010 tp->ack_due = FALSE;
1011 tp->snd_rst = FALSE;
1012 #if T_DELACK > 0
1013 tp->force_ack = FALSE;
1014 t_cancel(tp, TDELACK);
1015 tp->ack_skipped = 0;
1016 #endif
1017
1018 /* restore "wildcard" addresses */
1019
1020 if (temp_rst)
1021 {
1022 inp->inp_laddr = src;
1023 inp->inp_faddr = dst;
1024 inp->inp_fport = port;
1025 tp->snd_nxt = tp->iss;
1026 if (inp->inp_route.ro_rt != NULL)
1027 {
1028 rtfree(inp->inp_route.ro_rt);
1029 inp->inp_route.ro_rt = NULL;
1030 }
1031 if (tp->t_template)
1032 {
1033 m_free(dtom(tp->t_template));
1034 tp->t_template = NULL;
1035 }
1036 }
1037 }
1038
extract_oob(tp,mp,sorcv)1039 struct mbuf *extract_oob(tp, mp, sorcv)
1040 struct tcpcb *tp;
1041 struct mbuf *mp;
1042 struct sockbuf *sorcv;
1043 {
1044 struct socket *so;
1045 struct mbuf *top, *here, *m;
1046 int off, len, tmp;
1047
1048 m = mp;
1049 so = tp->t_in_pcb->inp_socket;
1050 /*
1051 * skip over bytes that preceed out of band data.
1052 */
1053 if ((off = so->so_oobmark - sorcv->sb_cc) < 0)
1054 {
1055 log(LOG_INFO, "extract_oob: neg off\n");
1056 tp->rcv_urpend = tp->rcv_urp = tp->irs;
1057 return (mp);
1058 }
1059
1060 while (m && (off > 0))
1061 {
1062 if (m->m_len <= off)
1063 {
1064 off -= m->m_len;
1065 m = m->m_next;
1066 }
1067 else
1068 break;
1069 }
1070
1071 if (!m)
1072 return (mp);
1073
1074 /*
1075 * copy out of band data. removing it from input stream.
1076 */
1077 len = tp->rcv_urpend - tp->rcv_urp + 1; /* # urgent bytes */
1078 top = here = NULL;
1079 while (m && (len > 0))
1080 {
1081 char *p;
1082 struct mbuf *newm;
1083 int dropped;
1084
1085 tmp = MIN(m->m_len - off, len);
1086 /* tmp == # urgent bytes in this mbuf */
1087 len -= tmp;
1088 tp->rcv_urp += tmp;
1089
1090 p = mtod(m, caddr_t) + off; /* points at first urgent byte */
1091 dropped = FALSE;
1092
1093 while (tmp > 0)
1094 {
1095 unsigned nbytes;
1096
1097 /* in case this mbuf uses pages */
1098 nbytes = MIN(tmp, MLEN);
1099
1100 if (! dropped)
1101 {
1102 if (newm = m_get(M_WAIT, MT_DATA))
1103 {
1104 bcopy (p, mtod(newm, char *), nbytes);
1105 newm->m_len = nbytes;
1106
1107 if (!top)
1108 top = here = newm;
1109 else
1110 {
1111 here->m_next = newm;
1112 here = here->m_next;
1113 }
1114 }
1115 else
1116 /* potential unreliability */
1117 dropped = TRUE;
1118 }
1119
1120 bcopy(p+nbytes, p, (unsigned)(m->m_len -off -nbytes));
1121 m->m_len -= nbytes;
1122 tmp -= nbytes;
1123 }
1124
1125 if (m->m_len <= 0)
1126 {
1127 /*
1128 * So soreceive never sees a zero length mbuf
1129 * with m_act set. (PUSHED URGENT data packet)
1130 */
1131 if (m == mp)
1132 mp = m = m_free(m);
1133 else
1134 m = m_free(m);
1135 }
1136 else
1137 m = m->m_next;
1138
1139 off = 0;
1140 }
1141
1142 if (top)
1143 {
1144 if (tp->oob_data)
1145 m_cat (tp->oob_data, top);
1146 else
1147 tp->oob_data = top;
1148 sohasoutofband(so);
1149 }
1150
1151 return (mp);
1152 }
1153
1154 /*
1155 * Accept data for the user to receive. Moves data from sequenced tcp
1156 * segments from the sequencing queue to the user's receive queue (in the
1157 * ucb). Observes locking on receive queue.
1158 */
present_data(tp)1159 present_data(tp)
1160 register struct tcpcb *tp;
1161 {
1162 PRESENT_DATA(tp)
1163 }
1164