xref: /original-bsd/sys/netiso/tp_subr.c (revision e59fb703)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)tp_subr.c	7.17 (Berkeley) 10/08/91
8  */
9 
10 /***********************************************************
11 		Copyright IBM Corporation 1987
12 
13                       All Rights Reserved
14 
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22 
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30 
31 ******************************************************************/
32 
33 /*
34  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35  */
36 /*
37  * ARGO TP
38  *
39  * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40  * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41  *
42  * The main work of data transfer is done here.
43  * These routines are called from tp.trans.
44  * They include the routines that check the validity of acks and Xacks,
45  * (tp_goodack() and tp_goodXack() )
46  * take packets from socket buffers and send them (tp_send()),
47  * drop the data from the socket buffers (tp_sbdrop()),
48  * and put incoming packet data into socket buffers (tp_stash()).
49  */
50 
51 #include "param.h"
52 #include "mbuf.h"
53 #include "socket.h"
54 #include "socketvar.h"
55 #include "protosw.h"
56 #include "errno.h"
57 #include "types.h"
58 #include "time.h"
59 #include "kernel.h"
60 
61 #include "tp_ip.h"
62 #include "iso.h"
63 #include "argo_debug.h"
64 #include "tp_timer.h"
65 #include "tp_param.h"
66 #include "tp_stat.h"
67 #include "tp_pcb.h"
68 #include "tp_tpdu.h"
69 #include "tp_trace.h"
70 #include "tp_meas.h"
71 #include "tp_seq.h"
72 
73 int		tp_emit(), tp_sbdrop();
74 int		tprexmtthresh = 3;
75 extern int	ticks;
76 void	tp_send();
77 
78 /*
79  * CALLED FROM:
80  *	tp.trans, when an XAK arrives
81  * FUNCTION and ARGUMENTS:
82  * 	Determines if the sequence number (seq) from the XAK
83  * 	acks anything new.  If so, drop the appropriate tpdu
84  * 	from the XPD send queue.
85  * RETURN VALUE:
86  * 	Returns 1 if it did this, 0 if the ack caused no action.
87  */
88 int
89 tp_goodXack(tpcb, seq)
90 	struct tp_pcb	*tpcb;
91 	SeqNum 			seq;
92 {
93 
94 	IFTRACE(D_XPD)
95 		tptraceTPCB(TPPTgotXack,
96 			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
97 			tpcb->tp_snduna);
98 	ENDTRACE
99 
100 	if ( seq == tpcb->tp_Xuna ) {
101 			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
102 
103 			/* DROP 1 packet from the Xsnd socket buf - just so happens
104 			 * that only one packet can be there at any time
105 			 * so drop the whole thing.  If you allow > 1 packet
106 			 * the socket buffer, then you'll have to keep
107 			 * track of how many characters went w/ each XPD tpdu, so this
108 			 * will get messier
109 			 */
110 			IFDEBUG(D_XPD)
111 				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
112 					"tp_goodXack Xsnd before sbdrop");
113 			ENDDEBUG
114 
115 			IFTRACE(D_XPD)
116 				tptraceTPCB(TPPTmisc,
117 					"goodXack: dropping cc ",
118 					(int)(tpcb->tp_Xsnd.sb_cc),
119 					0,0,0);
120 			ENDTRACE
121 			sbdroprecord(&tpcb->tp_Xsnd);
122 			return 1;
123 	}
124 	return 0;
125 }
126 
127 /*
128  * CALLED FROM:
129  *  tp_good_ack()
130  * FUNCTION and ARGUMENTS:
131  *  updates
132  *  smoothed average round trip time (*rtt)
133  *  roundtrip time variance (*rtv) - actually deviation, not variance
134  *  given the new value (diff)
135  * RETURN VALUE:
136  * void
137  */
138 
139 void
140 tp_rtt_rtv(tpcb)
141 register struct tp_pcb *tpcb;
142 {
143 	int old = tpcb->tp_rtt;
144 	int delta, elapsed = ticks - tpcb->tp_rttemit;
145 
146 	if (tpcb->tp_rtt != 0) {
147 		/*
148 		 * rtt is the smoothed round trip time in machine clock ticks (hz).
149 		 * It is stored as a fixed point number, unscaled (unlike the tcp
150 		 * srtt).  The rationale here is that it is only significant to the
151 		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
152 		 * so there is no need to scale.  The smoothing is done according
153 		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
154 		 */
155 		delta = elapsed - tpcb->tp_rtt;
156 		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
157 			tpcb->tp_rtt = 1;
158 		/*
159 		 * rtv is a smoothed accumulated mean difference, unscaled
160 		 * for reasons expressed above.
161 		 * It is smoothed with an alpha of .75, and the round trip timer
162 		 * will be set to rtt + 4*rtv, also as TCP does.
163 		 */
164 		if (delta < 0)
165 			delta = -delta;
166 		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
167 			tpcb->tp_rtv = 1;
168 	} else {
169 		/*
170 		 * No rtt measurement yet - use the unsmoothed rtt.
171 		 * Set the variance to half the rtt (so our first
172 		 * retransmit happens at 3*rtt)
173 		 */
174 		tpcb->tp_rtt = elapsed;
175 		tpcb->tp_rtv = elapsed >> 1;
176 	}
177 	tpcb->tp_rttemit = 0;
178 	tpcb->tp_rxtshift = 0;
179 	/*
180 	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
181 	 * Because of the way we do the smoothing, srtt and rttvar
182 	 * will each average +1/2 tick of bias.  When we compute
183 	 * the retransmit timer, we want 1/2 tick of rounding and
184 	 * 1 extra tick because of +-1/2 tick uncertainty in the
185 	 * firing of the timer.  The bias will give us exactly the
186 	 * 1.5 tick we need.  But, because the bias is
187 	 * statistical, we have to test that we don't drop below
188 	 * the minimum feasible timer (which is 2 ticks)."
189 	 */
190 	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
191 		tpcb->tp_peer_acktime, 128 /* XXX */);
192 	IFDEBUG(D_RTT)
193 		printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
194 			"tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
195 	ENDDEBUG
196 	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
197 }
198 
199 /*
200  * CALLED FROM:
201  *  tp.trans when an AK arrives
202  * FUNCTION and ARGUMENTS:
203  * 	Given (cdt), the credit from the AK tpdu, and
204  *	(seq), the sequence number from the AK tpdu,
205  *  tp_goodack() determines if the AK acknowledges something in the send
206  * 	window, and if so, drops the appropriate packets from the retransmission
207  *  list, computes the round trip time, and updates the retransmission timer
208  *  based on the new smoothed round trip time.
209  * RETURN VALUE:
210  * 	Returns 1 if
211  * 	EITHER it actually acked something heretofore unacknowledged
212  * 	OR no news but the credit should be processed.
213  * 	If something heretofore unacked was acked with this sequence number,
214  * 	the appropriate tpdus are dropped from the retransmission control list,
215  * 	by calling tp_sbdrop().
216  * 	No need to see the tpdu itself.
217  */
218 int
219 tp_goodack(tpcb, cdt, seq, subseq)
220 	register struct tp_pcb	*tpcb;
221 	u_int					cdt;
222 	register SeqNum			seq;
223 	u_int					subseq;
224 {
225 	int 	old_fcredit;
226 	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
227 	u_int	bytes_acked;
228 
229 	IFDEBUG(D_ACKRECV)
230 		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
231 			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
232 	ENDDEBUG
233 	IFTRACE(D_ACKRECV)
234 		tptraceTPCB(TPPTgotack,
235 			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
236 	ENDTRACE
237 
238 	IFPERF(tpcb)
239 		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
240 	ENDPERF
241 
242 	if (seq == tpcb->tp_snduna) {
243 		if (subseq < tpcb->tp_r_subseq ||
244 			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
245 		discard_the_ack:
246 			IFDEBUG(D_ACKRECV)
247 				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
248 					tpcb, subseq, tpcb->tp_r_subseq);
249 			ENDDEBUG
250 			goto done;
251 		}
252 		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
253 			tpcb->tp_r_subseq = subseq;
254 			if (tpcb->tp_timer[TM_data_retrans] == 0)
255 				tpcb->tp_dupacks = 0;
256 			else if (++tpcb->tp_dupacks == tprexmtthresh) {
257 				/* partner went out of his way to signal with different
258 				   subsequences that he has the same lack of an expected
259 				   packet.  This may be an early indiciation of a loss */
260 
261 				SeqNum onxt = tpcb->tp_sndnxt;
262 				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
263 				u_int win = min(tpcb->tp_fcredit,
264 							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
265 				IFDEBUG(D_ACKRECV)
266 					printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
267 						"goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
268 				ENDDEBUG
269 				if (win < 2)
270 					win = 2;
271 				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
272 				tpcb->tp_timer[TM_data_retrans] = 0;
273 				tpcb->tp_rttemit = 0;
274 				tpcb->tp_sndnxt = tpcb->tp_snduna;
275 				tpcb->tp_sndnxt_m = 0;
276 				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
277 				tp_send(tpcb);
278 				tpcb->tp_cong_win = tpcb->tp_ssthresh +
279 					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
280 				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
281 					tpcb->tp_sndnxt = onxt;
282 					tpcb->tp_sndnxt_m = onxt_m;
283 				}
284 
285 			} else if (tpcb->tp_dupacks > tprexmtthresh) {
286 				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
287 			}
288 			goto done;
289 		}
290 	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
291 		goto discard_the_ack;
292 	/*
293 	 * If the congestion window was inflated to account
294 	 * for the other side's cached packets, retract it.
295 	 */
296 	if (tpcb->tp_dupacks > tprexmtthresh &&
297 		tpcb->tp_cong_win > tpcb->tp_ssthresh)
298 			tpcb->tp_cong_win = tpcb->tp_ssthresh;
299 	tpcb->tp_r_subseq = subseq;
300 	old_fcredit = tpcb->tp_fcredit;
301 	tpcb->tp_fcredit = cdt;
302 	if (cdt > tpcb->tp_maxfcredit)
303 		tpcb->tp_maxfcredit = cdt;
304 	tpcb->tp_dupacks = 0;
305 
306 	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
307 
308 		tpsbcheck(tpcb, 0);
309 		bytes_acked = tp_sbdrop(tpcb, seq);
310 		tpsbcheck(tpcb, 1);
311 		/*
312 		 * If transmit timer is running and timed sequence
313 		 * number was acked, update smoothed round trip time.
314 		 * Since we now have an rtt measurement, cancel the
315 		 * timer backoff (cf., Phil Karn's retransmit alg.).
316 		 * Recompute the initial retransmit timer.
317 		 */
318 		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
319 			tp_rtt_rtv(tpcb);
320 		/*
321 		 * If all outstanding data is acked, stop retransmit timer.
322 		 * If there is more data to be acked, restart retransmit
323 		 * timer, using current (possibly backed-off) value.
324 		 * OSI combines the keepalive and persistance functions.
325 		 * So, there is no persistance timer per se, to restart.
326 		 */
327 		tpcb->tp_timer[TM_data_retrans] =
328 			(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
329 		/*
330 		 * When new data is acked, open the congestion window.
331 		 * If the window gives us less than ssthresh packets
332 		 * in flight, open exponentially (maxseg per packet).
333 		 * Otherwise open linearly: maxseg per window
334 		 * (maxseg^2 / cwnd per packet), plus a constant
335 		 * fraction of a packet (maxseg/8) to help larger windows
336 		 * open quickly enough.
337 		 */
338 		{
339 			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
340 
341 			incr = min(incr, bytes_acked);
342 			if (cw > tpcb->tp_ssthresh)
343 				incr = incr * incr / cw + incr / 8;
344 			tpcb->tp_cong_win =
345 				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
346 		}
347 		tpcb->tp_snduna = seq;
348 		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
349 				tpcb->tp_sndnxt = seq;
350 				tpcb->tp_sndnxt_m = 0;
351 		}
352 		bang++;
353 	}
354 
355 	if( cdt != 0 && old_fcredit == 0 ) {
356 		tpcb->tp_sendfcc = 1;
357 	}
358 	if (cdt == 0) {
359 		if (old_fcredit != 0)
360 			IncStat(ts_zfcdt);
361 		/* The following might mean that the window shrunk */
362 		if (tpcb->tp_timer[TM_data_retrans]) {
363 			tpcb->tp_timer[TM_data_retrans] = 0;
364 			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
365 			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
366 				tpcb->tp_sndnxt = tpcb->tp_snduna;
367 				tpcb->tp_sndnxt_m = 0;
368 			}
369 		}
370 	}
371 	tpcb->tp_fcredit = cdt;
372 	bang |= (old_fcredit < cdt);
373 
374 done:
375 	IFDEBUG(D_ACKRECV)
376 		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
377 			bang, cdt, old_fcredit, tpcb->tp_cong_win);
378 	ENDDEBUG
379 	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
380 		tp_send(tpcb);
381 	return (bang);
382 }
383 
384 /*
385  * CALLED FROM:
386  *  tp_goodack()
387  * FUNCTION and ARGUMENTS:
388  *  drops everything up TO but not INCLUDING seq # (seq)
389  *  from the retransmission queue.
390  */
391 tp_sbdrop(tpcb, seq)
392 	register struct 	tp_pcb 			*tpcb;
393 	SeqNum					seq;
394 {
395 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
396 	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
397 	int	oldcc = sb->sb_cc, oldi = i;
398 
399 	if (i >= tpcb->tp_seqhalf)
400 		printf("tp_spdropping too much -- should panic");
401 	while (i-- > 0)
402 		sbdroprecord(sb);
403 	IFDEBUG(D_ACKRECV)
404 		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
405 			oldi, oldcc - sb->sb_cc, tpcb, seq);
406 	ENDDEBUG
407 	if (sb->sb_flags & SB_NOTIFY)
408 		sowwakeup(tpcb->tp_sock);
409 	return (oldcc - sb->sb_cc);
410 }
411 
412 /*
413  * CALLED FROM:
414  * 	tp.trans on user send request, arrival of AK and arrival of XAK
415  * FUNCTION and ARGUMENTS:
416  * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
417  * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
418  * 			c) it hits seq number (highseq) limited by cong or credit.
419  *
420  * 	If you want XPD to buffer > 1 du per socket buffer, you can
421  * 	modifiy this to issue XPD tpdus also, but then it'll have
422  * 	to take some argument(s) to distinguish between the type of DU to
423  * 	hand tp_emit.
424  *
425  * 	When something is sent for the first time, its time-of-send
426  * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
427  *  When the ack arrives, the smoothed round-trip time is figured
428  *  using this value.
429  */
430 void
431 tp_send(tpcb)
432 	register struct tp_pcb	*tpcb;
433 {
434 	register int			len;
435 	register struct mbuf	*m;
436 	struct mbuf				*mb = 0;
437 	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
438 	unsigned int			eotsdu = 0;
439 	SeqNum					highseq, checkseq;
440 	int						idle, idleticks, off, cong_win;
441 #ifdef TP_PERF_MEAS
442 	int			 			send_start_time = ticks;
443 	SeqNum					oldnxt = tpcb->tp_sndnxt;
444 #endif TP_PERF_MEAS
445 
446 	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
447 	if (idle) {
448 		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
449 		if (idleticks > tpcb->tp_dt_ticks)
450 			/*
451 			 * We have been idle for "a while" and no acks are
452 			 * expected to clock out any data we send --
453 			 * slow start to get ack "clock" running again.
454 			 */
455 			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
456 	}
457 
458 	cong_win = tpcb->tp_cong_win;
459 	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
460 	if (tpcb->tp_Xsnd.sb_mb)
461 		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
462 
463 	IFDEBUG(D_DATA)
464 		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
465 				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
466 	ENDDEBUG
467 	IFTRACE(D_DATA)
468 		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
469 			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
470 		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
471 			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
472 	ENDTRACE
473 	IFTRACE(D_DATA)
474 		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
475 			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
476 	ENDTRACE
477 
478 	if (tpcb->tp_sndnxt_m)
479 		m = tpcb->tp_sndnxt_m;
480 	else {
481 		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
482 		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
483 			off--;
484 	}
485 send:
486 	/*
487 	 * Avoid silly window syndrome here . . . figure out how!
488 	 */
489 	checkseq = tpcb->tp_sndnum;
490 	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
491 		checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
492 
493 	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
494 
495 		eotsdu = (m->m_flags & M_EOR) != 0;
496 		len = m->m_pkthdr.len;
497 		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
498 			len < (tpcb->tp_l_tpdusize / 2))
499 				break;  /* Nagle . . . . . */
500 		cong_win -= len;
501 		/* make a copy - mb goes into the retransmission list
502 		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
503 		 */
504 		mb = m;
505 		m = m_copy(mb, 0, M_COPYALL);
506 		if (m == MNULL)
507 				break;
508 		IFTRACE(D_STASH)
509 			tptraceTPCB( TPPTmisc,
510 				"tp_send mcopy nxt high eotsdu len",
511 				tpcb->tp_sndnxt, highseq, eotsdu, len);
512 		ENDTRACE
513 
514 		IFDEBUG(D_DATA)
515 			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
516 				tpcb, tpcb->tp_sndnxt);
517 		ENDDEBUG
518 		/* when headers are precomputed, may need to fill
519 			   in checksum here */
520 		if (tpcb->tp_sock->so_error =
521 			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
522 			/* error */
523 			break;
524 		}
525 		m = mb->m_nextpkt;
526 		tpcb->tp_sndnxt_m = m;
527 		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
528 			SEQ_INC(tpcb, tpcb->tp_sndnew);
529 			/*
530 			 * Time this transmission if not a retransmission and
531 			 * not currently timing anything.
532 			 */
533 			if (tpcb->tp_rttemit == 0) {
534 				tpcb->tp_rttemit = ticks;
535 				tpcb->tp_rttseq = tpcb->tp_sndnxt;
536 			}
537 			tpcb->tp_sndnxt = tpcb->tp_sndnew;
538 		} else
539 			SEQ_INC(tpcb, tpcb->tp_sndnxt);
540 		/*
541 		 * Set retransmit timer if not currently set.
542 		 * Initial value for retransmit timer is smoothed
543 		 * round-trip time + 2 * round-trip time variance.
544 		 * Initialize shift counter which is used for backoff
545 		 * of retransmit time.
546 		 */
547 		if (tpcb->tp_timer[TM_data_retrans] == 0) {
548 			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
549 			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
550 			tpcb->tp_rxtshift = 0;
551 		}
552 	}
553 	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
554 		tpcb->tp_oktonagle = 0;
555 #ifdef TP_PERF_MEAS
556 	IFPERF(tpcb)
557 		{
558 			register int npkts;
559 			int	 elapsed = ticks - send_start_time, *t;
560 			struct timeval now;
561 
562 			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
563 
564 			if (npkts > 0)
565 				tpcb->tp_Nwindow++;
566 
567 			if (npkts > TP_PM_MAX)
568 				npkts = TP_PM_MAX;
569 
570 			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
571 			*t += (t - elapsed) >> TP_RTT_ALPHA;
572 
573 			if (mb == 0) {
574 				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
575 			} else {
576 				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
577 				/* not true with congestion-window being used */
578 			}
579 			now.tv_sec = elapsed / hz;
580 			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
581 			tpmeas( tpcb->tp_lref,
582 					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
583 		}
584 	ENDPERF
585 #endif TP_PERF_MEAS
586 
587 
588 	IFTRACE(D_DATA)
589 		tptraceTPCB( TPPTmisc,
590 			"tp_send at end: new nxt eotsdu error",
591 			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
592 
593 	ENDTRACE
594 }
595 
596 int TPNagleok;
597 int TPNagled;
598 
599 tp_packetize(tpcb, m, eotsdu)
600 register struct tp_pcb *tpcb;
601 register struct mbuf *m;
602 int eotsdu;
603 {
604 	register struct mbuf *n;
605 	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
606 	int	maxsize = tpcb->tp_l_tpdusize
607 			- tp_headersize(DT_TPDU_type, tpcb)
608 			- (tpcb->tp_use_checksum?4:0) ;
609 	int totlen = m->m_pkthdr.len;
610 	struct mbuf *m_split();
611 	/*
612 	 * Pre-packetize the data in the sockbuf
613 	 * according to negotiated mtu.  Do it here
614 	 * where we can safely wait for mbufs.
615 	 *
616 	 * This presumes knowledge of sockbuf conventions.
617 	 * TODO: allocate space for header and fill it in (once!).
618 	 */
619 	IFDEBUG(D_DATA)
620 		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
621 			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
622 	ENDTRACE
623 	if (tpcb->tp_oktonagle) {
624 		if ((n = sb->sb_mb) == 0)
625 			panic("tp_packetize");
626 		while (n->m_act)
627 			n = n->m_act;
628 		if (n->m_flags & M_EOR)
629 			panic("tp_packetize 2");
630 		SEQ_INC(tpcb, tpcb->tp_sndnum);
631 		if (totlen + n->m_pkthdr.len < maxsize) {
632 			/* There is an unsent packet with space, combine data */
633 			struct mbuf *old_n = n;
634 			tpsbcheck(tpcb,3);
635 			n->m_pkthdr.len += totlen;
636 			while (n->m_next)
637 				n = n->m_next;
638 			sbcompress(sb, m, n);
639 			tpsbcheck(tpcb,4);
640 			n = old_n;
641 			TPNagled++;
642 			goto out;
643 		}
644 	}
645 	while (m) {
646 		n = m;
647 		if (totlen > maxsize) {
648 			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
649 				panic("tp_packetize");
650 		} else
651 			m = 0;
652 		totlen -= maxsize;
653 		tpsbcheck(tpcb, 5);
654 		sbappendrecord(sb, n);
655 		tpsbcheck(tpcb, 6);
656 		SEQ_INC(tpcb, tpcb->tp_sndnum);
657 	}
658 out:
659 	if (eotsdu) {
660 		n->m_flags |= M_EOR;  /* XXX belongs at end */
661 		tpcb->tp_oktonagle = 0;
662 	} else {
663 		SEQ_DEC(tpcb, tpcb->tp_sndnum);
664 		tpcb->tp_oktonagle = 1;
665 		TPNagleok++;
666 	}
667 	IFDEBUG(D_DATA)
668 		printf("SEND out: oktonagle %d sndnum 0x%x\n",
669 			tpcb->tp_oktonagle, tpcb->tp_sndnum);
670 	ENDTRACE
671 	return 0;
672 }
673 
674 
675 /*
676  * NAME: tp_stash()
677  * CALLED FROM:
678  *	tp.trans on arrival of a DT tpdu
679  * FUNCTION, ARGUMENTS, and RETURN VALUE:
680  * 	Returns 1 if
681  *		a) something new arrived and it's got eotsdu_reached bit on,
682  * 		b) this arrival was caused other out-of-sequence things to be
683  *    	accepted, or
684  * 		c) this arrival is the highest seq # for which we last gave credit
685  *   	(sender just sent a whole window)
686  *  In other words, returns 1 if tp should send an ack immediately, 0 if
687  *  the ack can wait a while.
688  *
689  * Note: this implementation no longer renegs on credit, (except
690  * when debugging option D_RENEG is on, for the purpose of testing
691  * ack subsequencing), so we don't  need to check for incoming tpdus
692  * being in a reneged portion of the window.
693  */
694 
695 tp_stash(tpcb, e)
696 	register struct tp_pcb		*tpcb;
697 	register struct tp_event	*e;
698 {
699 	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
700 									/* 0--> delay acks until full window */
701 									/* 1--> ack each tpdu */
702 #ifndef lint
703 #define E e->ATTR(DT_TPDU)
704 #else lint
705 #define E e->ev_union.EV_DT_TPDU
706 #endif lint
707 
708 	if ( E.e_eot ) {
709 		register struct mbuf *n = E.e_data;
710 		n->m_flags |= M_EOR;
711 		n->m_act = 0;
712 	}
713 		IFDEBUG(D_STASH)
714 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
715 				"stash: so_rcv before appending");
716 			dump_mbuf(E.e_data,
717 				"stash: e_data before appending");
718 		ENDDEBUG
719 
720 	IFPERF(tpcb)
721 		PStat(tpcb, Nb_from_ll) += E.e_datalen;
722 		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
723 			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
724 	ENDPERF
725 
726 	if (E.e_seq == tpcb->tp_rcvnxt) {
727 
728 		IFDEBUG(D_STASH)
729 			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
730 			E.e_seq, E.e_datalen, E.e_eot);
731 		ENDDEBUG
732 
733 		IFTRACE(D_STASH)
734 			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
735 			E.e_seq, E.e_datalen, E.e_eot, 0);
736 		ENDTRACE
737 
738 		SET_DELACK(tpcb);
739 
740 		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
741 
742 		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
743 		/*
744 		 * move chains from the reassembly queue to the socket buffer
745 		 */
746 		if (tpcb->tp_rsycnt) {
747 			register struct mbuf **mp;
748 			struct mbuf **mplim;
749 
750 			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
751 			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
752 
753 			while (tpcb->tp_rsycnt && *mp) {
754 				sbappend(&tpcb->tp_sock->so_rcv, *mp);
755 				tpcb->tp_rsycnt--;
756 				*mp = 0;
757 				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
758 				ack_reason |= ACK_REORDER;
759 				if (++mp == mplim)
760 					mp = tpcb->tp_rsyq;
761 			}
762 		}
763 		IFDEBUG(D_STASH)
764 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
765 				"stash: so_rcv after appending");
766 		ENDDEBUG
767 
768 	} else {
769 		register struct mbuf **mp;
770 		SeqNum uwe;
771 
772 		IFTRACE(D_STASH)
773 			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
774 			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
775 		ENDTRACE
776 
777 		if (tpcb->tp_rsyq == 0)
778 			tp_rsyset(tpcb);
779 		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
780 		if (tpcb->tp_rsyq == 0 ||
781 						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
782 			ack_reason = ACK_DONT;
783 			m_freem(E.e_data);
784 		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
785 			IFDEBUG(D_STASH)
786 				printf("tp_stash - drop & ack\n");
787 			ENDDEBUG
788 
789 			/* retransmission - drop it and force an ack */
790 			IncStat(ts_dt_dup);
791 			IFPERF(tpcb)
792 				IncPStat(tpcb, tps_n_ack_cuz_dup);
793 			ENDPERF
794 
795 			m_freem(E.e_data);
796 			ack_reason |= ACK_DUP;
797 		} else {
798 			*mp = E.e_data;
799 			tpcb->tp_rsycnt++;
800 			ack_reason = ACK_DONT;
801 		}
802 	}
803 	/* there were some comments of historical interest here. */
804 	{
805 		LOCAL_CREDIT(tpcb);
806 
807 		if ( E.e_seq ==  tpcb->tp_sent_uwe )
808 			ack_reason |= ACK_STRAT_FULLWIN;
809 
810 		IFTRACE(D_STASH)
811 			tptraceTPCB(TPPTmisc,
812 				"end of stash, eot, ack_reason, sent_uwe ",
813 				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
814 		ENDTRACE
815 
816 		if ( ack_reason == ACK_DONT ) {
817 			IncStat( ts_ackreason[ACK_DONT] );
818 			return 0;
819 		} else {
820 			IFPERF(tpcb)
821 				if(ack_reason & ACK_STRAT_EACH) {
822 					IncPStat(tpcb, tps_n_ack_cuz_strat);
823 				} else if(ack_reason & ACK_STRAT_FULLWIN) {
824 					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
825 				} else if(ack_reason & ACK_REORDER) {
826 					IncPStat(tpcb, tps_n_ack_cuz_reorder);
827 				}
828 				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
829 							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
830 			ENDPERF
831 			{
832 				register int i;
833 
834 				/* keep track of all reasons that apply */
835 				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
836 					if( ack_reason & (1<<i) )
837 						IncStat( ts_ackreason[i] );
838 				}
839 			}
840 			return 1;
841 		}
842 	}
843 }
844 
845 /*
846  * tp_rsyflush - drop all the packets on the reassembly queue.
847  * Do this when closing the socket, or when somebody has changed
848  * the space avaible in the receive socket (XXX).
849  */
850 tp_rsyflush(tpcb)
851 register struct tp_pcb *tpcb;
852 {
853 	register struct mbuf *m, **mp;
854 	if (tpcb->tp_rsycnt) {
855 		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
856 									 --mp >= tpcb->tp_rsyq; )
857 			if (*mp) {
858 				tpcb->tp_rsycnt--;
859 				m_freem(*mp);
860 			}
861 		if (tpcb->tp_rsycnt)
862 			panic("tp_rsyflush");
863 	}
864 	free((caddr_t)tpcb->tp_rsyq, M_PCB);
865 	tpcb->tp_rsyq = 0;
866 }
867 
868 tp_rsyset(tpcb)
869 register struct tp_pcb *tpcb;
870 {
871 	register struct socket *so = tpcb->tp_sock;
872 	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
873 	int old_credit = tpcb->tp_maxlcredit;
874 	caddr_t	rsyq;
875 
876 	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
877 		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
878 
879 	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
880 		return;
881 	maxcredit *= sizeof(struct mbuf *);
882 	if (tpcb->tp_rsyq)
883 		tp_rsyflush(tpcb);
884 	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
885 		bzero(rsyq, maxcredit);
886 	tpcb->tp_rsyq = (struct mbuf **)rsyq;
887 }
888 
889 tpsbcheck(tpcb, i)
890 struct tp_pcb *tpcb;
891 {
892 	register struct mbuf *n, *m;
893 	register int len = 0, mbcnt = 0, pktlen;
894 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
895 
896 	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
897 		if ((n->m_flags & M_PKTHDR) == 0)
898 			panic("tpsbcheck nohdr");
899 		pktlen = len + n->m_pkthdr.len;
900 	    for (m = n; m; m = m->m_next) {
901 			len += m->m_len;
902 			mbcnt += MSIZE;
903 			if (m->m_flags & M_EXT)
904 				mbcnt += m->m_ext.ext_size;
905 		}
906 		if (len != pktlen) {
907 			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
908 				i, len, pktlen, n);
909 			panic("tpsbcheck short");
910 		}
911 	}
912 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
913 		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
914 		    mbcnt, sb->sb_mbcnt);
915 		panic("tpsbcheck");
916 	}
917 }
918