xref: /original-bsd/sys/netiso/tp_subr.c (revision be1f24e8)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)tp_subr.c	7.19 (Berkeley) 09/21/92
8  */
9 
10 /***********************************************************
11 		Copyright IBM Corporation 1987
12 
13                       All Rights Reserved
14 
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22 
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30 
31 ******************************************************************/
32 
33 /*
34  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35  */
36 /*
37  * ARGO TP
38  *
39  * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40  * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41  *
42  * The main work of data transfer is done here.
43  * These routines are called from tp.trans.
44  * They include the routines that check the validity of acks and Xacks,
45  * (tp_goodack() and tp_goodXack() )
46  * take packets from socket buffers and send them (tp_send()),
47  * drop the data from the socket buffers (tp_sbdrop()),
48  * and put incoming packet data into socket buffers (tp_stash()).
49  */
50 
51 #include "param.h"
52 #include "systm.h"
53 #include "mbuf.h"
54 #include "socket.h"
55 #include "socketvar.h"
56 #include "protosw.h"
57 #include "errno.h"
58 #include "types.h"
59 #include "time.h"
60 #include "kernel.h"
61 
62 #include "tp_ip.h"
63 #include "iso.h"
64 #include "argo_debug.h"
65 #include "tp_timer.h"
66 #include "tp_param.h"
67 #include "tp_stat.h"
68 #include "tp_pcb.h"
69 #include "tp_tpdu.h"
70 #include "tp_trace.h"
71 #include "tp_meas.h"
72 #include "tp_seq.h"
73 
74 int		tp_emit(), tp_sbdrop();
75 int		tprexmtthresh = 3;
76 extern int	ticks;
77 void	tp_send();
78 
79 /*
80  * CALLED FROM:
81  *	tp.trans, when an XAK arrives
82  * FUNCTION and ARGUMENTS:
83  * 	Determines if the sequence number (seq) from the XAK
84  * 	acks anything new.  If so, drop the appropriate tpdu
85  * 	from the XPD send queue.
86  * RETURN VALUE:
87  * 	Returns 1 if it did this, 0 if the ack caused no action.
88  */
89 int
90 tp_goodXack(tpcb, seq)
91 	struct tp_pcb	*tpcb;
92 	SeqNum 			seq;
93 {
94 
95 	IFTRACE(D_XPD)
96 		tptraceTPCB(TPPTgotXack,
97 			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
98 			tpcb->tp_snduna);
99 	ENDTRACE
100 
101 	if ( seq == tpcb->tp_Xuna ) {
102 			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
103 
104 			/* DROP 1 packet from the Xsnd socket buf - just so happens
105 			 * that only one packet can be there at any time
106 			 * so drop the whole thing.  If you allow > 1 packet
107 			 * the socket buffer, then you'll have to keep
108 			 * track of how many characters went w/ each XPD tpdu, so this
109 			 * will get messier
110 			 */
111 			IFDEBUG(D_XPD)
112 				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
113 					"tp_goodXack Xsnd before sbdrop");
114 			ENDDEBUG
115 
116 			IFTRACE(D_XPD)
117 				tptraceTPCB(TPPTmisc,
118 					"goodXack: dropping cc ",
119 					(int)(tpcb->tp_Xsnd.sb_cc),
120 					0,0,0);
121 			ENDTRACE
122 			sbdroprecord(&tpcb->tp_Xsnd);
123 			return 1;
124 	}
125 	return 0;
126 }
127 
128 /*
129  * CALLED FROM:
130  *  tp_good_ack()
131  * FUNCTION and ARGUMENTS:
132  *  updates
133  *  smoothed average round trip time (*rtt)
134  *  roundtrip time variance (*rtv) - actually deviation, not variance
135  *  given the new value (diff)
136  * RETURN VALUE:
137  * void
138  */
139 
140 void
141 tp_rtt_rtv(tpcb)
142 register struct tp_pcb *tpcb;
143 {
144 	int old = tpcb->tp_rtt;
145 	int delta, elapsed = ticks - tpcb->tp_rttemit;
146 
147 	if (tpcb->tp_rtt != 0) {
148 		/*
149 		 * rtt is the smoothed round trip time in machine clock ticks (hz).
150 		 * It is stored as a fixed point number, unscaled (unlike the tcp
151 		 * srtt).  The rationale here is that it is only significant to the
152 		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
153 		 * so there is no need to scale.  The smoothing is done according
154 		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
155 		 */
156 		delta = elapsed - tpcb->tp_rtt;
157 		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
158 			tpcb->tp_rtt = 1;
159 		/*
160 		 * rtv is a smoothed accumulated mean difference, unscaled
161 		 * for reasons expressed above.
162 		 * It is smoothed with an alpha of .75, and the round trip timer
163 		 * will be set to rtt + 4*rtv, also as TCP does.
164 		 */
165 		if (delta < 0)
166 			delta = -delta;
167 		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
168 			tpcb->tp_rtv = 1;
169 	} else {
170 		/*
171 		 * No rtt measurement yet - use the unsmoothed rtt.
172 		 * Set the variance to half the rtt (so our first
173 		 * retransmit happens at 3*rtt)
174 		 */
175 		tpcb->tp_rtt = elapsed;
176 		tpcb->tp_rtv = elapsed >> 1;
177 	}
178 	tpcb->tp_rttemit = 0;
179 	tpcb->tp_rxtshift = 0;
180 	/*
181 	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
182 	 * Because of the way we do the smoothing, srtt and rttvar
183 	 * will each average +1/2 tick of bias.  When we compute
184 	 * the retransmit timer, we want 1/2 tick of rounding and
185 	 * 1 extra tick because of +-1/2 tick uncertainty in the
186 	 * firing of the timer.  The bias will give us exactly the
187 	 * 1.5 tick we need.  But, because the bias is
188 	 * statistical, we have to test that we don't drop below
189 	 * the minimum feasible timer (which is 2 ticks)."
190 	 */
191 	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
192 		tpcb->tp_peer_acktime, 128 /* XXX */);
193 	IFDEBUG(D_RTT)
194 		printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
195 			"tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
196 	ENDDEBUG
197 	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
198 }
199 
200 /*
201  * CALLED FROM:
202  *  tp.trans when an AK arrives
203  * FUNCTION and ARGUMENTS:
204  * 	Given (cdt), the credit from the AK tpdu, and
205  *	(seq), the sequence number from the AK tpdu,
206  *  tp_goodack() determines if the AK acknowledges something in the send
207  * 	window, and if so, drops the appropriate packets from the retransmission
208  *  list, computes the round trip time, and updates the retransmission timer
209  *  based on the new smoothed round trip time.
210  * RETURN VALUE:
211  * 	Returns 1 if
212  * 	EITHER it actually acked something heretofore unacknowledged
213  * 	OR no news but the credit should be processed.
214  * 	If something heretofore unacked was acked with this sequence number,
215  * 	the appropriate tpdus are dropped from the retransmission control list,
216  * 	by calling tp_sbdrop().
217  * 	No need to see the tpdu itself.
218  */
219 int
220 tp_goodack(tpcb, cdt, seq, subseq)
221 	register struct tp_pcb	*tpcb;
222 	u_int					cdt;
223 	register SeqNum			seq;
224 	u_int					subseq;
225 {
226 	int 	old_fcredit;
227 	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
228 	u_int	bytes_acked;
229 
230 	IFDEBUG(D_ACKRECV)
231 		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
232 			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
233 	ENDDEBUG
234 	IFTRACE(D_ACKRECV)
235 		tptraceTPCB(TPPTgotack,
236 			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
237 	ENDTRACE
238 
239 	IFPERF(tpcb)
240 		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
241 	ENDPERF
242 
243 	if (seq == tpcb->tp_snduna) {
244 		if (subseq < tpcb->tp_r_subseq ||
245 			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
246 		discard_the_ack:
247 			IFDEBUG(D_ACKRECV)
248 				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
249 					tpcb, subseq, tpcb->tp_r_subseq);
250 			ENDDEBUG
251 			goto done;
252 		}
253 		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
254 			tpcb->tp_r_subseq = subseq;
255 			if (tpcb->tp_timer[TM_data_retrans] == 0)
256 				tpcb->tp_dupacks = 0;
257 			else if (++tpcb->tp_dupacks == tprexmtthresh) {
258 				/* partner went out of his way to signal with different
259 				   subsequences that he has the same lack of an expected
260 				   packet.  This may be an early indiciation of a loss */
261 
262 				SeqNum onxt = tpcb->tp_sndnxt;
263 				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
264 				u_int win = min(tpcb->tp_fcredit,
265 							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
266 				IFDEBUG(D_ACKRECV)
267 					printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
268 						"goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
269 				ENDDEBUG
270 				if (win < 2)
271 					win = 2;
272 				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
273 				tpcb->tp_timer[TM_data_retrans] = 0;
274 				tpcb->tp_rttemit = 0;
275 				tpcb->tp_sndnxt = tpcb->tp_snduna;
276 				tpcb->tp_sndnxt_m = 0;
277 				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
278 				tp_send(tpcb);
279 				tpcb->tp_cong_win = tpcb->tp_ssthresh +
280 					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
281 				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
282 					tpcb->tp_sndnxt = onxt;
283 					tpcb->tp_sndnxt_m = onxt_m;
284 				}
285 
286 			} else if (tpcb->tp_dupacks > tprexmtthresh) {
287 				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
288 			}
289 			goto done;
290 		}
291 	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
292 		goto discard_the_ack;
293 	/*
294 	 * If the congestion window was inflated to account
295 	 * for the other side's cached packets, retract it.
296 	 */
297 	if (tpcb->tp_dupacks > tprexmtthresh &&
298 		tpcb->tp_cong_win > tpcb->tp_ssthresh)
299 			tpcb->tp_cong_win = tpcb->tp_ssthresh;
300 	tpcb->tp_r_subseq = subseq;
301 	old_fcredit = tpcb->tp_fcredit;
302 	tpcb->tp_fcredit = cdt;
303 	if (cdt > tpcb->tp_maxfcredit)
304 		tpcb->tp_maxfcredit = cdt;
305 	tpcb->tp_dupacks = 0;
306 
307 	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
308 
309 		tpsbcheck(tpcb, 0);
310 		bytes_acked = tp_sbdrop(tpcb, seq);
311 		tpsbcheck(tpcb, 1);
312 		/*
313 		 * If transmit timer is running and timed sequence
314 		 * number was acked, update smoothed round trip time.
315 		 * Since we now have an rtt measurement, cancel the
316 		 * timer backoff (cf., Phil Karn's retransmit alg.).
317 		 * Recompute the initial retransmit timer.
318 		 */
319 		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
320 			tp_rtt_rtv(tpcb);
321 		/*
322 		 * If all outstanding data is acked, stop retransmit timer.
323 		 * If there is more data to be acked, restart retransmit
324 		 * timer, using current (possibly backed-off) value.
325 		 * OSI combines the keepalive and persistance functions.
326 		 * So, there is no persistance timer per se, to restart.
327 		 */
328 		if (tpcb->tp_class != TP_CLASS_0)
329 			tpcb->tp_timer[TM_data_retrans] =
330 				(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
331 		/*
332 		 * When new data is acked, open the congestion window.
333 		 * If the window gives us less than ssthresh packets
334 		 * in flight, open exponentially (maxseg per packet).
335 		 * Otherwise open linearly: maxseg per window
336 		 * (maxseg^2 / cwnd per packet), plus a constant
337 		 * fraction of a packet (maxseg/8) to help larger windows
338 		 * open quickly enough.
339 		 */
340 		{
341 			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
342 
343 			incr = min(incr, bytes_acked);
344 			if (cw > tpcb->tp_ssthresh)
345 				incr = incr * incr / cw + incr / 8;
346 			tpcb->tp_cong_win =
347 				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
348 		}
349 		tpcb->tp_snduna = seq;
350 		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
351 				tpcb->tp_sndnxt = seq;
352 				tpcb->tp_sndnxt_m = 0;
353 		}
354 		bang++;
355 	}
356 
357 	if( cdt != 0 && old_fcredit == 0 ) {
358 		tpcb->tp_sendfcc = 1;
359 	}
360 	if (cdt == 0) {
361 		if (old_fcredit != 0)
362 			IncStat(ts_zfcdt);
363 		/* The following might mean that the window shrunk */
364 		if (tpcb->tp_timer[TM_data_retrans]) {
365 			tpcb->tp_timer[TM_data_retrans] = 0;
366 			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
367 			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
368 				tpcb->tp_sndnxt = tpcb->tp_snduna;
369 				tpcb->tp_sndnxt_m = 0;
370 			}
371 		}
372 	}
373 	tpcb->tp_fcredit = cdt;
374 	bang |= (old_fcredit < cdt);
375 
376 done:
377 	IFDEBUG(D_ACKRECV)
378 		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
379 			bang, cdt, old_fcredit, tpcb->tp_cong_win);
380 	ENDDEBUG
381 	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
382 		tp_send(tpcb);
383 	return (bang);
384 }
385 
386 /*
387  * CALLED FROM:
388  *  tp_goodack()
389  * FUNCTION and ARGUMENTS:
390  *  drops everything up TO but not INCLUDING seq # (seq)
391  *  from the retransmission queue.
392  */
393 tp_sbdrop(tpcb, seq)
394 	register struct 	tp_pcb 			*tpcb;
395 	SeqNum					seq;
396 {
397 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
398 	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
399 	int	oldcc = sb->sb_cc, oldi = i;
400 
401 	if (i >= tpcb->tp_seqhalf)
402 		printf("tp_spdropping too much -- should panic");
403 	while (i-- > 0)
404 		sbdroprecord(sb);
405 	IFDEBUG(D_ACKRECV)
406 		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
407 			oldi, oldcc - sb->sb_cc, tpcb, seq);
408 	ENDDEBUG
409 	if (sb->sb_flags & SB_NOTIFY)
410 		sowwakeup(tpcb->tp_sock);
411 	return (oldcc - sb->sb_cc);
412 }
413 
414 /*
415  * CALLED FROM:
416  * 	tp.trans on user send request, arrival of AK and arrival of XAK
417  * FUNCTION and ARGUMENTS:
418  * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
419  * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
420  * 			c) it hits seq number (highseq) limited by cong or credit.
421  *
422  * 	If you want XPD to buffer > 1 du per socket buffer, you can
423  * 	modifiy this to issue XPD tpdus also, but then it'll have
424  * 	to take some argument(s) to distinguish between the type of DU to
425  * 	hand tp_emit.
426  *
427  * 	When something is sent for the first time, its time-of-send
428  * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
429  *  When the ack arrives, the smoothed round-trip time is figured
430  *  using this value.
431  */
432 void
433 tp_send(tpcb)
434 	register struct tp_pcb	*tpcb;
435 {
436 	register int			len;
437 	register struct mbuf	*m;
438 	struct mbuf				*mb = 0;
439 	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
440 	unsigned int			eotsdu = 0;
441 	SeqNum					highseq, checkseq;
442 	int						idle, idleticks, off, cong_win;
443 #ifdef TP_PERF_MEAS
444 	int			 			send_start_time = ticks;
445 	SeqNum					oldnxt = tpcb->tp_sndnxt;
446 #endif TP_PERF_MEAS
447 
448 	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
449 	if (idle) {
450 		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
451 		if (idleticks > tpcb->tp_dt_ticks)
452 			/*
453 			 * We have been idle for "a while" and no acks are
454 			 * expected to clock out any data we send --
455 			 * slow start to get ack "clock" running again.
456 			 */
457 			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
458 	}
459 
460 	cong_win = tpcb->tp_cong_win;
461 	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
462 	if (tpcb->tp_Xsnd.sb_mb)
463 		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
464 
465 	IFDEBUG(D_DATA)
466 		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
467 				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
468 	ENDDEBUG
469 	IFTRACE(D_DATA)
470 		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
471 			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
472 		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
473 			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
474 	ENDTRACE
475 	IFTRACE(D_DATA)
476 		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
477 			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
478 	ENDTRACE
479 
480 	if (tpcb->tp_sndnxt_m)
481 		m = tpcb->tp_sndnxt_m;
482 	else {
483 		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
484 		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
485 			off--;
486 	}
487 send:
488 	/*
489 	 * Avoid silly window syndrome here . . . figure out how!
490 	 */
491 	checkseq = tpcb->tp_sndnum;
492 	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
493 		checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
494 
495 	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
496 
497 		eotsdu = (m->m_flags & M_EOR) != 0;
498 		len = m->m_pkthdr.len;
499 		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
500 			len < (tpcb->tp_l_tpdusize / 2))
501 				break;  /* Nagle . . . . . */
502 		cong_win -= len;
503 		/* make a copy - mb goes into the retransmission list
504 		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
505 		 */
506 		mb = m;
507 		m = m_copy(mb, 0, M_COPYALL);
508 		if (m == MNULL)
509 				break;
510 		IFTRACE(D_STASH)
511 			tptraceTPCB( TPPTmisc,
512 				"tp_send mcopy nxt high eotsdu len",
513 				tpcb->tp_sndnxt, highseq, eotsdu, len);
514 		ENDTRACE
515 
516 		IFDEBUG(D_DATA)
517 			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
518 				tpcb, tpcb->tp_sndnxt);
519 		ENDDEBUG
520 		/* when headers are precomputed, may need to fill
521 			   in checksum here */
522 		if (tpcb->tp_sock->so_error =
523 			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
524 			/* error */
525 			break;
526 		}
527 		m = mb->m_nextpkt;
528 		tpcb->tp_sndnxt_m = m;
529 		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
530 			SEQ_INC(tpcb, tpcb->tp_sndnew);
531 			/*
532 			 * Time this transmission if not a retransmission and
533 			 * not currently timing anything.
534 			 */
535 			if (tpcb->tp_rttemit == 0) {
536 				tpcb->tp_rttemit = ticks;
537 				tpcb->tp_rttseq = tpcb->tp_sndnxt;
538 			}
539 			tpcb->tp_sndnxt = tpcb->tp_sndnew;
540 		} else
541 			SEQ_INC(tpcb, tpcb->tp_sndnxt);
542 		/*
543 		 * Set retransmit timer if not currently set.
544 		 * Initial value for retransmit timer is smoothed
545 		 * round-trip time + 2 * round-trip time variance.
546 		 * Initialize shift counter which is used for backoff
547 		 * of retransmit time.
548 		 */
549 		if (tpcb->tp_timer[TM_data_retrans] == 0 &&
550 			tpcb->tp_class != TP_CLASS_0) {
551 			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
552 			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
553 			tpcb->tp_rxtshift = 0;
554 		}
555 	}
556 	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
557 		tpcb->tp_oktonagle = 0;
558 #ifdef TP_PERF_MEAS
559 	IFPERF(tpcb)
560 		{
561 			register int npkts;
562 			int	 elapsed = ticks - send_start_time, *t;
563 			struct timeval now;
564 
565 			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
566 
567 			if (npkts > 0)
568 				tpcb->tp_Nwindow++;
569 
570 			if (npkts > TP_PM_MAX)
571 				npkts = TP_PM_MAX;
572 
573 			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
574 			*t += (t - elapsed) >> TP_RTT_ALPHA;
575 
576 			if (mb == 0) {
577 				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
578 			} else {
579 				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
580 				/* not true with congestion-window being used */
581 			}
582 			now.tv_sec = elapsed / hz;
583 			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
584 			tpmeas( tpcb->tp_lref,
585 					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
586 		}
587 	ENDPERF
588 #endif TP_PERF_MEAS
589 
590 
591 	IFTRACE(D_DATA)
592 		tptraceTPCB( TPPTmisc,
593 			"tp_send at end: new nxt eotsdu error",
594 			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
595 
596 	ENDTRACE
597 }
598 
599 int TPNagleok;
600 int TPNagled;
601 
602 tp_packetize(tpcb, m, eotsdu)
603 register struct tp_pcb *tpcb;
604 register struct mbuf *m;
605 int eotsdu;
606 {
607 	register struct mbuf *n;
608 	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
609 	int	maxsize = tpcb->tp_l_tpdusize
610 			- tp_headersize(DT_TPDU_type, tpcb)
611 			- (tpcb->tp_use_checksum?4:0) ;
612 	int totlen = m->m_pkthdr.len;
613 	struct mbuf *m_split();
614 	/*
615 	 * Pre-packetize the data in the sockbuf
616 	 * according to negotiated mtu.  Do it here
617 	 * where we can safely wait for mbufs.
618 	 *
619 	 * This presumes knowledge of sockbuf conventions.
620 	 * TODO: allocate space for header and fill it in (once!).
621 	 */
622 	IFDEBUG(D_DATA)
623 		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
624 			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
625 	ENDTRACE
626 	if (tpcb->tp_oktonagle) {
627 		if ((n = sb->sb_mb) == 0)
628 			panic("tp_packetize");
629 		while (n->m_act)
630 			n = n->m_act;
631 		if (n->m_flags & M_EOR)
632 			panic("tp_packetize 2");
633 		SEQ_INC(tpcb, tpcb->tp_sndnum);
634 		if (totlen + n->m_pkthdr.len < maxsize) {
635 			/* There is an unsent packet with space, combine data */
636 			struct mbuf *old_n = n;
637 			tpsbcheck(tpcb,3);
638 			n->m_pkthdr.len += totlen;
639 			while (n->m_next)
640 				n = n->m_next;
641 			sbcompress(sb, m, n);
642 			tpsbcheck(tpcb,4);
643 			n = old_n;
644 			TPNagled++;
645 			goto out;
646 		}
647 	}
648 	while (m) {
649 		n = m;
650 		if (totlen > maxsize) {
651 			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
652 				panic("tp_packetize");
653 		} else
654 			m = 0;
655 		totlen -= maxsize;
656 		tpsbcheck(tpcb, 5);
657 		sbappendrecord(sb, n);
658 		tpsbcheck(tpcb, 6);
659 		SEQ_INC(tpcb, tpcb->tp_sndnum);
660 	}
661 out:
662 	if (eotsdu) {
663 		n->m_flags |= M_EOR;  /* XXX belongs at end */
664 		tpcb->tp_oktonagle = 0;
665 	} else {
666 		SEQ_DEC(tpcb, tpcb->tp_sndnum);
667 		tpcb->tp_oktonagle = 1;
668 		TPNagleok++;
669 	}
670 	IFDEBUG(D_DATA)
671 		printf("SEND out: oktonagle %d sndnum 0x%x\n",
672 			tpcb->tp_oktonagle, tpcb->tp_sndnum);
673 	ENDTRACE
674 	return 0;
675 }
676 
677 
678 /*
679  * NAME: tp_stash()
680  * CALLED FROM:
681  *	tp.trans on arrival of a DT tpdu
682  * FUNCTION, ARGUMENTS, and RETURN VALUE:
683  * 	Returns 1 if
684  *		a) something new arrived and it's got eotsdu_reached bit on,
685  * 		b) this arrival was caused other out-of-sequence things to be
686  *    	accepted, or
687  * 		c) this arrival is the highest seq # for which we last gave credit
688  *   	(sender just sent a whole window)
689  *  In other words, returns 1 if tp should send an ack immediately, 0 if
690  *  the ack can wait a while.
691  *
692  * Note: this implementation no longer renegs on credit, (except
693  * when debugging option D_RENEG is on, for the purpose of testing
694  * ack subsequencing), so we don't  need to check for incoming tpdus
695  * being in a reneged portion of the window.
696  */
697 
698 tp_stash(tpcb, e)
699 	register struct tp_pcb		*tpcb;
700 	register struct tp_event	*e;
701 {
702 	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
703 									/* 0--> delay acks until full window */
704 									/* 1--> ack each tpdu */
705 #ifndef lint
706 #define E e->ATTR(DT_TPDU)
707 #else lint
708 #define E e->ev_union.EV_DT_TPDU
709 #endif lint
710 
711 	if ( E.e_eot ) {
712 		register struct mbuf *n = E.e_data;
713 		n->m_flags |= M_EOR;
714 		n->m_act = 0;
715 	}
716 		IFDEBUG(D_STASH)
717 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
718 				"stash: so_rcv before appending");
719 			dump_mbuf(E.e_data,
720 				"stash: e_data before appending");
721 		ENDDEBUG
722 
723 	IFPERF(tpcb)
724 		PStat(tpcb, Nb_from_ll) += E.e_datalen;
725 		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
726 			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
727 	ENDPERF
728 
729 	if (E.e_seq == tpcb->tp_rcvnxt) {
730 
731 		IFDEBUG(D_STASH)
732 			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
733 			E.e_seq, E.e_datalen, E.e_eot);
734 		ENDDEBUG
735 
736 		IFTRACE(D_STASH)
737 			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
738 			E.e_seq, E.e_datalen, E.e_eot, 0);
739 		ENDTRACE
740 
741 		SET_DELACK(tpcb);
742 
743 		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
744 
745 		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
746 		/*
747 		 * move chains from the reassembly queue to the socket buffer
748 		 */
749 		if (tpcb->tp_rsycnt) {
750 			register struct mbuf **mp;
751 			struct mbuf **mplim;
752 
753 			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
754 			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
755 
756 			while (tpcb->tp_rsycnt && *mp) {
757 				sbappend(&tpcb->tp_sock->so_rcv, *mp);
758 				tpcb->tp_rsycnt--;
759 				*mp = 0;
760 				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
761 				ack_reason |= ACK_REORDER;
762 				if (++mp == mplim)
763 					mp = tpcb->tp_rsyq;
764 			}
765 		}
766 		IFDEBUG(D_STASH)
767 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
768 				"stash: so_rcv after appending");
769 		ENDDEBUG
770 
771 	} else {
772 		register struct mbuf **mp;
773 		SeqNum uwe;
774 
775 		IFTRACE(D_STASH)
776 			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
777 			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
778 		ENDTRACE
779 
780 		if (tpcb->tp_rsyq == 0)
781 			tp_rsyset(tpcb);
782 		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
783 		if (tpcb->tp_rsyq == 0 ||
784 						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
785 			ack_reason = ACK_DONT;
786 			m_freem(E.e_data);
787 		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
788 			IFDEBUG(D_STASH)
789 				printf("tp_stash - drop & ack\n");
790 			ENDDEBUG
791 
792 			/* retransmission - drop it and force an ack */
793 			IncStat(ts_dt_dup);
794 			IFPERF(tpcb)
795 				IncPStat(tpcb, tps_n_ack_cuz_dup);
796 			ENDPERF
797 
798 			m_freem(E.e_data);
799 			ack_reason |= ACK_DUP;
800 		} else {
801 			*mp = E.e_data;
802 			tpcb->tp_rsycnt++;
803 			ack_reason = ACK_DONT;
804 		}
805 	}
806 	/* there were some comments of historical interest here. */
807 	{
808 		LOCAL_CREDIT(tpcb);
809 
810 		if ( E.e_seq ==  tpcb->tp_sent_uwe )
811 			ack_reason |= ACK_STRAT_FULLWIN;
812 
813 		IFTRACE(D_STASH)
814 			tptraceTPCB(TPPTmisc,
815 				"end of stash, eot, ack_reason, sent_uwe ",
816 				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
817 		ENDTRACE
818 
819 		if ( ack_reason == ACK_DONT ) {
820 			IncStat( ts_ackreason[ACK_DONT] );
821 			return 0;
822 		} else {
823 			IFPERF(tpcb)
824 				if(ack_reason & ACK_STRAT_EACH) {
825 					IncPStat(tpcb, tps_n_ack_cuz_strat);
826 				} else if(ack_reason & ACK_STRAT_FULLWIN) {
827 					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
828 				} else if(ack_reason & ACK_REORDER) {
829 					IncPStat(tpcb, tps_n_ack_cuz_reorder);
830 				}
831 				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
832 							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
833 			ENDPERF
834 			{
835 				register int i;
836 
837 				/* keep track of all reasons that apply */
838 				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
839 					if( ack_reason & (1<<i) )
840 						IncStat( ts_ackreason[i] );
841 				}
842 			}
843 			return 1;
844 		}
845 	}
846 }
847 
848 /*
849  * tp_rsyflush - drop all the packets on the reassembly queue.
850  * Do this when closing the socket, or when somebody has changed
851  * the space avaible in the receive socket (XXX).
852  */
853 tp_rsyflush(tpcb)
854 register struct tp_pcb *tpcb;
855 {
856 	register struct mbuf *m, **mp;
857 	if (tpcb->tp_rsycnt) {
858 		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
859 									 --mp >= tpcb->tp_rsyq; )
860 			if (*mp) {
861 				tpcb->tp_rsycnt--;
862 				m_freem(*mp);
863 			}
864 		if (tpcb->tp_rsycnt) {
865 			printf("tp_rsyflush %x\n", tpcb);
866 			tpcb->tp_rsycnt = 0;
867 		}
868 	}
869 	free((caddr_t)tpcb->tp_rsyq, M_PCB);
870 	tpcb->tp_rsyq = 0;
871 }
872 
873 tp_rsyset(tpcb)
874 register struct tp_pcb *tpcb;
875 {
876 	register struct socket *so = tpcb->tp_sock;
877 	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
878 	int old_credit = tpcb->tp_maxlcredit;
879 	caddr_t	rsyq;
880 
881 	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
882 		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
883 
884 	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
885 		return;
886 	maxcredit *= sizeof(struct mbuf *);
887 	if (tpcb->tp_rsyq)
888 		tp_rsyflush(tpcb);
889 	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
890 		bzero(rsyq, maxcredit);
891 	tpcb->tp_rsyq = (struct mbuf **)rsyq;
892 }
893 
894 tpsbcheck(tpcb, i)
895 struct tp_pcb *tpcb;
896 {
897 	register struct mbuf *n, *m;
898 	register int len = 0, mbcnt = 0, pktlen;
899 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
900 
901 	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
902 		if ((n->m_flags & M_PKTHDR) == 0)
903 			panic("tpsbcheck nohdr");
904 		pktlen = len + n->m_pkthdr.len;
905 	    for (m = n; m; m = m->m_next) {
906 			len += m->m_len;
907 			mbcnt += MSIZE;
908 			if (m->m_flags & M_EXT)
909 				mbcnt += m->m_ext.ext_size;
910 		}
911 		if (len != pktlen) {
912 			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
913 				i, len, pktlen, n);
914 			panic("tpsbcheck short");
915 		}
916 	}
917 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
918 		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
919 		    mbcnt, sb->sb_mbcnt);
920 		panic("tpsbcheck");
921 	}
922 }
923