xref: /original-bsd/sys/netinet/tcp_output.c (revision 7a7a259e)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)tcp_output.c	6.12 (Berkeley) 01/22/86
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mbuf.h"
12 #include "protosw.h"
13 #include "socket.h"
14 #include "socketvar.h"
15 #include "errno.h"
16 
17 #include "../net/route.h"
18 
19 #include "in.h"
20 #include "in_pcb.h"
21 #include "in_systm.h"
22 #include "ip.h"
23 #include "ip_var.h"
24 #include "tcp.h"
25 #define	TCPOUTFLAGS
26 #include "tcp_fsm.h"
27 #include "tcp_seq.h"
28 #include "tcp_timer.h"
29 #include "tcp_var.h"
30 #include "tcpip.h"
31 #include "tcp_debug.h"
32 
33 /*
34  * Initial options.
35  */
36 u_char	tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
37 
38 /*
39  * Tcp output routine: figure out what should be sent and send it.
40  */
41 tcp_output(tp)
42 	register struct tcpcb *tp;
43 {
44 	register struct socket *so = tp->t_inpcb->inp_socket;
45 	register int len, win;
46 	struct mbuf *m0;
47 	int off, flags, error;
48 	register struct mbuf *m;
49 	register struct tcpiphdr *ti;
50 	u_char *opt;
51 	unsigned optlen = 0;
52 	int idle, sendalot;
53 
54 	/*
55 	 * Determine length of data that should be transmitted,
56 	 * and flags that will be used.
57 	 * If there is some data or critical controls (SYN, RST)
58 	 * to send, then transmit; otherwise, investigate further.
59 	 */
60 	idle = (tp->snd_max == tp->snd_una);
61 again:
62 	sendalot = 0;
63 	off = tp->snd_nxt - tp->snd_una;
64 	win = MIN(tp->snd_wnd, tp->snd_cwnd);
65 	/*
66 	 * If in persist timeout with window of 0, send 1 byte.
67 	 * Otherwise, if window is small but nonzero
68 	 * and timer expired, we will send what we can
69 	 * and go to transmit state.
70 	 */
71 	if (tp->t_force) {
72 		if (win == 0)
73 			win = 1;
74 		else {
75 			tp->t_timer[TCPT_PERSIST] = 0;
76 			tp->t_rxtshift = 0;
77 		}
78 	}
79 
80 	len = MIN(so->so_snd.sb_cc, win) - off;
81 	if (len < 0)
82 		return (0);	/* ??? */	/* past FIN */
83 	if (len > tp->t_maxseg) {
84 		len = tp->t_maxseg;
85 		/*
86 		 * Don't send more than one segment if retransmitting
87 		 * (or persisting, but then we shouldn't be here).
88 		 */
89 		if (tp->t_rxtshift == 0)
90 			sendalot = 1;
91 	}
92 
93 	win = sbspace(&so->so_rcv);
94 	flags = tcp_outflags[tp->t_state];
95 	if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc)
96 		flags &= ~TH_FIN;
97 	if (flags & (TH_SYN|TH_RST|TH_FIN))
98 		goto send;
99 
100 	/*
101 	 * Send if we owe peer an ACK.
102 	 */
103 	if (tp->t_flags&TF_ACKNOW)
104 		goto send;
105 	if (SEQ_GT(tp->snd_up, tp->snd_una))
106 		goto send;
107 
108 	/*
109 	 * Sender silly window avoidance.  If connection is idle
110 	 * and can send all data, a maximum segment,
111 	 * at least a maximum default-size segment do it,
112 	 * or are forced, do it; otherwise don't bother.
113 	 * If peer's buffer is tiny, then send
114 	 * when window is at least half open.
115 	 * If retransmitting (possibly after persist timer forced us
116 	 * to send into a small window), then must resend.
117 	 */
118 	if (len) {
119 		if (len == tp->t_maxseg || len >= TCP_MSS)	/* a lot */
120 			goto send;
121 		if ((idle || tp->t_flags & TF_NODELAY) &&
122 		    len + off >= so->so_snd.sb_cc)
123 			goto send;
124 		if (tp->t_force)
125 			goto send;
126 		if (len >= tp->max_sndwnd / 2)
127 			goto send;
128 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
129 			goto send;
130 	} else
131 		/*
132 		 * If window shrank after we sent into it,
133 		 * cancel pending retransmit.  We will enter
134 		 * persist state below.
135 		 */
136 		if (off == 0 && SEQ_LT(tp->snd_nxt, tp->snd_max))
137 			tp->t_timer[TCPT_REXMT] = 0;
138 
139 
140 	/*
141 	 * Compare available window to amount of window
142 	 * known to peer (as advertised window less
143 	 * next expected input.)  If the difference is 35% or more of the
144 	 * maximum possible window, then want to send a window update to peer.
145 	 */
146 	if (win > 0 &&
147 	    ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35))
148 		goto send;
149 
150 	/*
151 	 * TCP window updates are not reliable, rather a polling protocol
152 	 * using ``persist'' packets is used to insure receipt of window
153 	 * updates.  The three ``states'' for the output side are:
154 	 *	idle			not doing retransmits or persists
155 	 *	persisting		to move a small or zero window
156 	 *	(re)transmitting	and thereby not persisting
157 	 *
158 	 * tp->t_timer[TCPT_PERSIST]
159 	 *	is set when we are in persist state.
160 	 * tp->t_force
161 	 *	is set when we are called to send a persist packet.
162 	 * tp->t_timer[TCPT_REXMT]
163 	 *	is set when we are retransmitting
164 	 * The output side is idle when both timers are zero.
165 	 *
166 	 * If send window is too small, there is data to transmit, and no
167 	 * retransmit or persist is pending, then go to persist state.
168 	 * If nothing happens soon, send when timer expires:
169 	 * if window is nonzero, transmit what we can,
170 	 * otherwise force out a byte.
171 	 */
172 	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
173 	    tp->t_timer[TCPT_PERSIST] == 0) {
174 		tp->t_rxtshift = 0;
175 		tcp_setpersist(tp);
176 	}
177 
178 	/*
179 	 * No reason to send a segment, just return.
180 	 */
181 	return (0);
182 
183 send:
184 	/*
185 	 * Grab a header mbuf, attaching a copy of data to
186 	 * be transmitted, and initialize the header from
187 	 * the template for sends on this connection.
188 	 */
189 	MGET(m, M_DONTWAIT, MT_HEADER);
190 	if (m == NULL)
191 		return (ENOBUFS);
192 	m->m_off = MMAXOFF - sizeof (struct tcpiphdr);
193 	m->m_len = sizeof (struct tcpiphdr);
194 	if (len) {
195 		m->m_next = m_copy(so->so_snd.sb_mb, off, len);
196 		if (m->m_next == 0)
197 			len = 0;
198 	}
199 	ti = mtod(m, struct tcpiphdr *);
200 	if (tp->t_template == 0)
201 		panic("tcp_output");
202 	bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
203 
204 	/*
205 	 * Fill in fields, remembering maximum advertised
206 	 * window for use in delaying messages about window sizes.
207 	 */
208 	ti->ti_seq = htonl(tp->snd_nxt);
209 	ti->ti_ack = htonl(tp->rcv_nxt);
210 	/*
211 	 * Before ESTABLISHED, force sending of initial options
212 	 * unless TCP set to not do any options.
213 	 */
214 	opt = NULL;
215 	if (tp->t_state < TCPS_ESTABLISHED && (tp->t_flags & TF_NOOPT) == 0) {
216 		int mss;
217 
218 		mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
219 		if (mss > IP_MSS - sizeof(struct tcpiphdr)) {
220 			opt = tcp_initopt;
221 			optlen = sizeof (tcp_initopt);
222 			*(u_short *)(opt + 2) = htons(mss);
223 		}
224 	} else if (tp->t_tcpopt) {
225 		opt = mtod(tp->t_tcpopt, u_char *);
226 		optlen = tp->t_tcpopt->m_len;
227 	}
228 	if (opt) {
229 		m0 = m->m_next;
230 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
231 		if (m->m_next == 0) {
232 			(void) m_free(m);
233 			m_freem(m0);
234 			return (ENOBUFS);
235 		}
236 		m->m_next->m_next = m0;
237 		m0 = m->m_next;
238 		m0->m_len = optlen;
239 		bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
240 		opt = (u_char *)(mtod(m0, caddr_t) + optlen);
241 		while (m0->m_len & 0x3) {
242 			*opt++ = TCPOPT_EOL;
243 			m0->m_len++;
244 		}
245 		optlen = m0->m_len;
246 		ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
247 	}
248 	ti->ti_flags = flags;
249 	/*
250 	 * Calculate receive window.  Don't shrink window,
251 	 * but avoid silly window syndrome.
252 	 */
253 	if (win < so->so_rcv.sb_hiwat / 4 && win < tp->t_maxseg)
254 		win = 0;
255 	if (win < (int)(tp->rcv_adv - tp->rcv_nxt))
256 		win = (int)(tp->rcv_adv - tp->rcv_nxt);
257 	ti->ti_win = htons((u_short)win);
258 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
259 		ti->ti_urp = htons(tp->snd_up - tp->snd_nxt);
260 		ti->ti_flags |= TH_URG;
261 	} else
262 		/*
263 		 * If no urgent pointer to send, then we pull
264 		 * the urgent pointer to the left edge of the send window
265 		 * so that it doesn't drift into the send window on sequence
266 		 * number wraparound.
267 		 */
268 		tp->snd_up = tp->snd_una;		/* drag it along */
269 	/*
270 	 * If anything to send and we can send it all, set PUSH.
271 	 * (This will keep happy those implementations which only
272 	 * give data to the user when a buffer fills or a PUSH comes in.)
273 	 */
274 	if (len && off+len == so->so_snd.sb_cc)
275 		ti->ti_flags |= TH_PUSH;
276 
277 	/*
278 	 * Put TCP length in extended header, and then
279 	 * checksum extended header and data.
280 	 */
281 	if (len + optlen)
282 		ti->ti_len = htons((u_short)(sizeof(struct tcphdr) +
283 		    optlen + len));
284 	ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len);
285 
286 	/*
287 	 * In transmit state, time the transmission and arrange for
288 	 * the retransmit.  In persist state, just set snd_max.
289 	 */
290 	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
291 		/*
292 		 * Advance snd_nxt over sequence space of this segment.
293 		 */
294 		if (flags & (TH_SYN|TH_FIN))
295 			tp->snd_nxt++;
296 		tp->snd_nxt += len;
297 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
298 			tp->snd_max = tp->snd_nxt;
299 			/*
300 			 * Time this transmission if not a retransmission and
301 			 * not currently timing anything.
302 			 */
303 			if (tp->t_rtt == 0) {
304 				tp->t_rtt = 1;
305 				tp->t_rtseq = tp->snd_nxt - len;
306 			}
307 		}
308 
309 		/*
310 		 * Set retransmit timer if not currently set,
311 		 * and not doing a keep-alive probe.
312 		 * Initial value for retransmit timer is tcp_beta*tp->t_srtt.
313 		 * Initialize shift counter which is used for exponential
314 		 * backoff of retransmit time.
315 		 */
316 		if (tp->t_timer[TCPT_REXMT] == 0 &&
317 		    tp->snd_nxt != tp->snd_una) {
318 			TCPT_RANGESET(tp->t_timer[TCPT_REXMT],
319 			    tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX);
320 			tp->t_rxtshift = 0;
321 		}
322 		tp->t_timer[TCPT_PERSIST] = 0;
323 	} else {
324 		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
325 			tp->snd_max = tp->snd_nxt + len;
326 	}
327 
328 	/*
329 	 * Trace.
330 	 */
331 	if (so->so_options & SO_DEBUG)
332 		tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
333 
334 	/*
335 	 * Fill in IP length and desired time to live and
336 	 * send to IP level.
337 	 */
338 	((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
339 	((struct ip *)ti)->ip_ttl = TCP_TTL;
340 	if (so->so_options & SO_DONTROUTE)
341 		error =
342 		   ip_output(m, tp->t_inpcb->inp_options, (struct route *)0,
343 			IP_ROUTETOIF);
344 	else
345 		error = ip_output(m, tp->t_inpcb->inp_options,
346 		    &tp->t_inpcb->inp_route, 0);
347 	if (error)
348 		return (error);
349 
350 	/*
351 	 * Data sent (as far as we can tell).
352 	 * If this advertises a larger window than any other segment,
353 	 * then remember the size of the advertised window.
354 	 * Any pending ACK has now been sent.
355 	 */
356 	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
357 		tp->rcv_adv = tp->rcv_nxt + win;
358 	tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
359 	if (sendalot)
360 		goto again;
361 	return (0);
362 }
363 
364 tcp_setpersist(tp)
365 	register struct tcpcb *tp;
366 {
367 
368 	if (tp->t_timer[TCPT_REXMT])
369 		panic("tcp_output REXMT");
370 	/*
371 	 * Start/restart persistance timer.
372 	 */
373 	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
374 	    ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift,
375 	    TCPTV_PERSMIN, TCPTV_MAX);
376 	tp->t_rxtshift++;
377 	if (tp->t_rxtshift >= TCP_MAXRXTSHIFT)
378 		tp->t_rxtshift = 0;
379 }
380