xref: /original-bsd/sys/netiso/tp_subr.c (revision 3705696b)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)tp_subr.c	8.1 (Berkeley) 06/10/93
8  */
9 
10 /***********************************************************
11 		Copyright IBM Corporation 1987
12 
13                       All Rights Reserved
14 
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22 
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30 
31 ******************************************************************/
32 
33 /*
34  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35  */
36 /*
37  * ARGO TP
38  *
39  * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40  * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41  *
42  * The main work of data transfer is done here.
43  * These routines are called from tp.trans.
44  * They include the routines that check the validity of acks and Xacks,
45  * (tp_goodack() and tp_goodXack() )
46  * take packets from socket buffers and send them (tp_send()),
47  * drop the data from the socket buffers (tp_sbdrop()),
48  * and put incoming packet data into socket buffers (tp_stash()).
49  */
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/mbuf.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/protosw.h>
57 #include <sys/errno.h>
58 #include <sys/time.h>
59 #include <sys/kernel.h>
60 
61 #include <netiso/tp_ip.h>
62 #include <netiso/iso.h>
63 #include <netiso/argo_debug.h>
64 #include <netiso/tp_timer.h>
65 #include <netiso/tp_param.h>
66 #include <netiso/tp_stat.h>
67 #include <netiso/tp_pcb.h>
68 #include <netiso/tp_tpdu.h>
69 #include <netiso/tp_trace.h>
70 #include <netiso/tp_meas.h>
71 #include <netiso/tp_seq.h>
72 
73 int		tp_emit(), tp_sbdrop();
74 int		tprexmtthresh = 3;
75 extern int	ticks;
76 void	tp_send();
77 
78 /*
79  * CALLED FROM:
80  *	tp.trans, when an XAK arrives
81  * FUNCTION and ARGUMENTS:
82  * 	Determines if the sequence number (seq) from the XAK
83  * 	acks anything new.  If so, drop the appropriate tpdu
84  * 	from the XPD send queue.
85  * RETURN VALUE:
86  * 	Returns 1 if it did this, 0 if the ack caused no action.
87  */
88 int
89 tp_goodXack(tpcb, seq)
90 	struct tp_pcb	*tpcb;
91 	SeqNum 			seq;
92 {
93 
94 	IFTRACE(D_XPD)
95 		tptraceTPCB(TPPTgotXack,
96 			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
97 			tpcb->tp_snduna);
98 	ENDTRACE
99 
100 	if ( seq == tpcb->tp_Xuna ) {
101 			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
102 
103 			/* DROP 1 packet from the Xsnd socket buf - just so happens
104 			 * that only one packet can be there at any time
105 			 * so drop the whole thing.  If you allow > 1 packet
106 			 * the socket buffer, then you'll have to keep
107 			 * track of how many characters went w/ each XPD tpdu, so this
108 			 * will get messier
109 			 */
110 			IFDEBUG(D_XPD)
111 				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
112 					"tp_goodXack Xsnd before sbdrop");
113 			ENDDEBUG
114 
115 			IFTRACE(D_XPD)
116 				tptraceTPCB(TPPTmisc,
117 					"goodXack: dropping cc ",
118 					(int)(tpcb->tp_Xsnd.sb_cc),
119 					0,0,0);
120 			ENDTRACE
121 			sbdroprecord(&tpcb->tp_Xsnd);
122 			return 1;
123 	}
124 	return 0;
125 }
126 
127 /*
128  * CALLED FROM:
129  *  tp_good_ack()
130  * FUNCTION and ARGUMENTS:
131  *  updates
132  *  smoothed average round trip time (*rtt)
133  *  roundtrip time variance (*rtv) - actually deviation, not variance
134  *  given the new value (diff)
135  * RETURN VALUE:
136  * void
137  */
138 
139 void
140 tp_rtt_rtv(tpcb)
141 register struct tp_pcb *tpcb;
142 {
143 	int old = tpcb->tp_rtt;
144 	int delta, elapsed = ticks - tpcb->tp_rttemit;
145 
146 	if (tpcb->tp_rtt != 0) {
147 		/*
148 		 * rtt is the smoothed round trip time in machine clock ticks (hz).
149 		 * It is stored as a fixed point number, unscaled (unlike the tcp
150 		 * srtt).  The rationale here is that it is only significant to the
151 		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
152 		 * so there is no need to scale.  The smoothing is done according
153 		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
154 		 */
155 		delta = elapsed - tpcb->tp_rtt;
156 		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
157 			tpcb->tp_rtt = 1;
158 		/*
159 		 * rtv is a smoothed accumulated mean difference, unscaled
160 		 * for reasons expressed above.
161 		 * It is smoothed with an alpha of .75, and the round trip timer
162 		 * will be set to rtt + 4*rtv, also as TCP does.
163 		 */
164 		if (delta < 0)
165 			delta = -delta;
166 		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
167 			tpcb->tp_rtv = 1;
168 	} else {
169 		/*
170 		 * No rtt measurement yet - use the unsmoothed rtt.
171 		 * Set the variance to half the rtt (so our first
172 		 * retransmit happens at 3*rtt)
173 		 */
174 		tpcb->tp_rtt = elapsed;
175 		tpcb->tp_rtv = elapsed >> 1;
176 	}
177 	tpcb->tp_rttemit = 0;
178 	tpcb->tp_rxtshift = 0;
179 	/*
180 	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
181 	 * Because of the way we do the smoothing, srtt and rttvar
182 	 * will each average +1/2 tick of bias.  When we compute
183 	 * the retransmit timer, we want 1/2 tick of rounding and
184 	 * 1 extra tick because of +-1/2 tick uncertainty in the
185 	 * firing of the timer.  The bias will give us exactly the
186 	 * 1.5 tick we need.  But, because the bias is
187 	 * statistical, we have to test that we don't drop below
188 	 * the minimum feasible timer (which is 2 ticks)."
189 	 */
190 	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
191 		tpcb->tp_peer_acktime, 128 /* XXX */);
192 	IFDEBUG(D_RTT)
193 		printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
194 			"tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
195 	ENDDEBUG
196 	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
197 }
198 
199 /*
200  * CALLED FROM:
201  *  tp.trans when an AK arrives
202  * FUNCTION and ARGUMENTS:
203  * 	Given (cdt), the credit from the AK tpdu, and
204  *	(seq), the sequence number from the AK tpdu,
205  *  tp_goodack() determines if the AK acknowledges something in the send
206  * 	window, and if so, drops the appropriate packets from the retransmission
207  *  list, computes the round trip time, and updates the retransmission timer
208  *  based on the new smoothed round trip time.
209  * RETURN VALUE:
210  * 	Returns 1 if
211  * 	EITHER it actually acked something heretofore unacknowledged
212  * 	OR no news but the credit should be processed.
213  * 	If something heretofore unacked was acked with this sequence number,
214  * 	the appropriate tpdus are dropped from the retransmission control list,
215  * 	by calling tp_sbdrop().
216  * 	No need to see the tpdu itself.
217  */
218 int
219 tp_goodack(tpcb, cdt, seq, subseq)
220 	register struct tp_pcb	*tpcb;
221 	u_int					cdt;
222 	register SeqNum			seq;
223 	u_int					subseq;
224 {
225 	int 	old_fcredit;
226 	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
227 	u_int	bytes_acked;
228 
229 	IFDEBUG(D_ACKRECV)
230 		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
231 			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
232 	ENDDEBUG
233 	IFTRACE(D_ACKRECV)
234 		tptraceTPCB(TPPTgotack,
235 			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
236 	ENDTRACE
237 
238 	IFPERF(tpcb)
239 		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
240 	ENDPERF
241 
242 	if (seq == tpcb->tp_snduna) {
243 		if (subseq < tpcb->tp_r_subseq ||
244 			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
245 		discard_the_ack:
246 			IFDEBUG(D_ACKRECV)
247 				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
248 					tpcb, subseq, tpcb->tp_r_subseq);
249 			ENDDEBUG
250 			goto done;
251 		}
252 		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
253 			tpcb->tp_r_subseq = subseq;
254 			if (tpcb->tp_timer[TM_data_retrans] == 0)
255 				tpcb->tp_dupacks = 0;
256 			else if (++tpcb->tp_dupacks == tprexmtthresh) {
257 				/* partner went out of his way to signal with different
258 				   subsequences that he has the same lack of an expected
259 				   packet.  This may be an early indiciation of a loss */
260 
261 				SeqNum onxt = tpcb->tp_sndnxt;
262 				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
263 				u_int win = min(tpcb->tp_fcredit,
264 							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
265 				IFDEBUG(D_ACKRECV)
266 					printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
267 						"goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
268 				ENDDEBUG
269 				if (win < 2)
270 					win = 2;
271 				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
272 				tpcb->tp_timer[TM_data_retrans] = 0;
273 				tpcb->tp_rttemit = 0;
274 				tpcb->tp_sndnxt = tpcb->tp_snduna;
275 				tpcb->tp_sndnxt_m = 0;
276 				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
277 				tp_send(tpcb);
278 				tpcb->tp_cong_win = tpcb->tp_ssthresh +
279 					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
280 				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
281 					tpcb->tp_sndnxt = onxt;
282 					tpcb->tp_sndnxt_m = onxt_m;
283 				}
284 
285 			} else if (tpcb->tp_dupacks > tprexmtthresh) {
286 				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
287 			}
288 			goto done;
289 		}
290 	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
291 		goto discard_the_ack;
292 	/*
293 	 * If the congestion window was inflated to account
294 	 * for the other side's cached packets, retract it.
295 	 */
296 	if (tpcb->tp_dupacks > tprexmtthresh &&
297 		tpcb->tp_cong_win > tpcb->tp_ssthresh)
298 			tpcb->tp_cong_win = tpcb->tp_ssthresh;
299 	tpcb->tp_r_subseq = subseq;
300 	old_fcredit = tpcb->tp_fcredit;
301 	tpcb->tp_fcredit = cdt;
302 	if (cdt > tpcb->tp_maxfcredit)
303 		tpcb->tp_maxfcredit = cdt;
304 	tpcb->tp_dupacks = 0;
305 
306 	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
307 
308 		tpsbcheck(tpcb, 0);
309 		bytes_acked = tp_sbdrop(tpcb, seq);
310 		tpsbcheck(tpcb, 1);
311 		/*
312 		 * If transmit timer is running and timed sequence
313 		 * number was acked, update smoothed round trip time.
314 		 * Since we now have an rtt measurement, cancel the
315 		 * timer backoff (cf., Phil Karn's retransmit alg.).
316 		 * Recompute the initial retransmit timer.
317 		 */
318 		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
319 			tp_rtt_rtv(tpcb);
320 		/*
321 		 * If all outstanding data is acked, stop retransmit timer.
322 		 * If there is more data to be acked, restart retransmit
323 		 * timer, using current (possibly backed-off) value.
324 		 * OSI combines the keepalive and persistance functions.
325 		 * So, there is no persistance timer per se, to restart.
326 		 */
327 		if (tpcb->tp_class != TP_CLASS_0)
328 			tpcb->tp_timer[TM_data_retrans] =
329 				(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
330 		/*
331 		 * When new data is acked, open the congestion window.
332 		 * If the window gives us less than ssthresh packets
333 		 * in flight, open exponentially (maxseg per packet).
334 		 * Otherwise open linearly: maxseg per window
335 		 * (maxseg^2 / cwnd per packet), plus a constant
336 		 * fraction of a packet (maxseg/8) to help larger windows
337 		 * open quickly enough.
338 		 */
339 		{
340 			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
341 
342 			incr = min(incr, bytes_acked);
343 			if (cw > tpcb->tp_ssthresh)
344 				incr = incr * incr / cw + incr / 8;
345 			tpcb->tp_cong_win =
346 				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
347 		}
348 		tpcb->tp_snduna = seq;
349 		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
350 				tpcb->tp_sndnxt = seq;
351 				tpcb->tp_sndnxt_m = 0;
352 		}
353 		bang++;
354 	}
355 
356 	if( cdt != 0 && old_fcredit == 0 ) {
357 		tpcb->tp_sendfcc = 1;
358 	}
359 	if (cdt == 0) {
360 		if (old_fcredit != 0)
361 			IncStat(ts_zfcdt);
362 		/* The following might mean that the window shrunk */
363 		if (tpcb->tp_timer[TM_data_retrans]) {
364 			tpcb->tp_timer[TM_data_retrans] = 0;
365 			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
366 			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
367 				tpcb->tp_sndnxt = tpcb->tp_snduna;
368 				tpcb->tp_sndnxt_m = 0;
369 			}
370 		}
371 	}
372 	tpcb->tp_fcredit = cdt;
373 	bang |= (old_fcredit < cdt);
374 
375 done:
376 	IFDEBUG(D_ACKRECV)
377 		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
378 			bang, cdt, old_fcredit, tpcb->tp_cong_win);
379 	ENDDEBUG
380 	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
381 		tp_send(tpcb);
382 	return (bang);
383 }
384 
385 /*
386  * CALLED FROM:
387  *  tp_goodack()
388  * FUNCTION and ARGUMENTS:
389  *  drops everything up TO but not INCLUDING seq # (seq)
390  *  from the retransmission queue.
391  */
392 tp_sbdrop(tpcb, seq)
393 	register struct 	tp_pcb 			*tpcb;
394 	SeqNum					seq;
395 {
396 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
397 	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
398 	int	oldcc = sb->sb_cc, oldi = i;
399 
400 	if (i >= tpcb->tp_seqhalf)
401 		printf("tp_spdropping too much -- should panic");
402 	while (i-- > 0)
403 		sbdroprecord(sb);
404 	IFDEBUG(D_ACKRECV)
405 		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
406 			oldi, oldcc - sb->sb_cc, tpcb, seq);
407 	ENDDEBUG
408 	if (sb->sb_flags & SB_NOTIFY)
409 		sowwakeup(tpcb->tp_sock);
410 	return (oldcc - sb->sb_cc);
411 }
412 
413 /*
414  * CALLED FROM:
415  * 	tp.trans on user send request, arrival of AK and arrival of XAK
416  * FUNCTION and ARGUMENTS:
417  * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
418  * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
419  * 			c) it hits seq number (highseq) limited by cong or credit.
420  *
421  * 	If you want XPD to buffer > 1 du per socket buffer, you can
422  * 	modifiy this to issue XPD tpdus also, but then it'll have
423  * 	to take some argument(s) to distinguish between the type of DU to
424  * 	hand tp_emit.
425  *
426  * 	When something is sent for the first time, its time-of-send
427  * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
428  *  When the ack arrives, the smoothed round-trip time is figured
429  *  using this value.
430  */
431 void
432 tp_send(tpcb)
433 	register struct tp_pcb	*tpcb;
434 {
435 	register int			len;
436 	register struct mbuf	*m;
437 	struct mbuf				*mb = 0;
438 	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
439 	unsigned int			eotsdu = 0;
440 	SeqNum					highseq, checkseq;
441 	int						idle, idleticks, off, cong_win;
442 #ifdef TP_PERF_MEAS
443 	int			 			send_start_time = ticks;
444 	SeqNum					oldnxt = tpcb->tp_sndnxt;
445 #endif /* TP_PERF_MEAS */
446 
447 	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
448 	if (idle) {
449 		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
450 		if (idleticks > tpcb->tp_dt_ticks)
451 			/*
452 			 * We have been idle for "a while" and no acks are
453 			 * expected to clock out any data we send --
454 			 * slow start to get ack "clock" running again.
455 			 */
456 			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
457 	}
458 
459 	cong_win = tpcb->tp_cong_win;
460 	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
461 	if (tpcb->tp_Xsnd.sb_mb)
462 		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
463 
464 	IFDEBUG(D_DATA)
465 		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
466 				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
467 	ENDDEBUG
468 	IFTRACE(D_DATA)
469 		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
470 			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
471 		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
472 			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
473 	ENDTRACE
474 	IFTRACE(D_DATA)
475 		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
476 			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
477 	ENDTRACE
478 
479 	if (tpcb->tp_sndnxt_m)
480 		m = tpcb->tp_sndnxt_m;
481 	else {
482 		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
483 		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
484 			off--;
485 	}
486 send:
487 	/*
488 	 * Avoid silly window syndrome here . . . figure out how!
489 	 */
490 	checkseq = tpcb->tp_sndnum;
491 	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
492 		checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
493 
494 	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
495 
496 		eotsdu = (m->m_flags & M_EOR) != 0;
497 		len = m->m_pkthdr.len;
498 		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
499 			len < (tpcb->tp_l_tpdusize / 2))
500 				break;  /* Nagle . . . . . */
501 		cong_win -= len;
502 		/* make a copy - mb goes into the retransmission list
503 		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
504 		 */
505 		mb = m;
506 		m = m_copy(mb, 0, M_COPYALL);
507 		if (m == MNULL)
508 				break;
509 		IFTRACE(D_STASH)
510 			tptraceTPCB( TPPTmisc,
511 				"tp_send mcopy nxt high eotsdu len",
512 				tpcb->tp_sndnxt, highseq, eotsdu, len);
513 		ENDTRACE
514 
515 		IFDEBUG(D_DATA)
516 			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
517 				tpcb, tpcb->tp_sndnxt);
518 		ENDDEBUG
519 		/* when headers are precomputed, may need to fill
520 			   in checksum here */
521 		if (tpcb->tp_sock->so_error =
522 			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
523 			/* error */
524 			break;
525 		}
526 		m = mb->m_nextpkt;
527 		tpcb->tp_sndnxt_m = m;
528 		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
529 			SEQ_INC(tpcb, tpcb->tp_sndnew);
530 			/*
531 			 * Time this transmission if not a retransmission and
532 			 * not currently timing anything.
533 			 */
534 			if (tpcb->tp_rttemit == 0) {
535 				tpcb->tp_rttemit = ticks;
536 				tpcb->tp_rttseq = tpcb->tp_sndnxt;
537 			}
538 			tpcb->tp_sndnxt = tpcb->tp_sndnew;
539 		} else
540 			SEQ_INC(tpcb, tpcb->tp_sndnxt);
541 		/*
542 		 * Set retransmit timer if not currently set.
543 		 * Initial value for retransmit timer is smoothed
544 		 * round-trip time + 2 * round-trip time variance.
545 		 * Initialize shift counter which is used for backoff
546 		 * of retransmit time.
547 		 */
548 		if (tpcb->tp_timer[TM_data_retrans] == 0 &&
549 			tpcb->tp_class != TP_CLASS_0) {
550 			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
551 			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
552 			tpcb->tp_rxtshift = 0;
553 		}
554 	}
555 	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
556 		tpcb->tp_oktonagle = 0;
557 #ifdef TP_PERF_MEAS
558 	IFPERF(tpcb)
559 		{
560 			register int npkts;
561 			int	 elapsed = ticks - send_start_time, *t;
562 			struct timeval now;
563 
564 			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
565 
566 			if (npkts > 0)
567 				tpcb->tp_Nwindow++;
568 
569 			if (npkts > TP_PM_MAX)
570 				npkts = TP_PM_MAX;
571 
572 			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
573 			*t += (t - elapsed) >> TP_RTT_ALPHA;
574 
575 			if (mb == 0) {
576 				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
577 			} else {
578 				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
579 				/* not true with congestion-window being used */
580 			}
581 			now.tv_sec = elapsed / hz;
582 			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
583 			tpmeas( tpcb->tp_lref,
584 					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
585 		}
586 	ENDPERF
587 #endif /* TP_PERF_MEAS */
588 
589 
590 	IFTRACE(D_DATA)
591 		tptraceTPCB( TPPTmisc,
592 			"tp_send at end: new nxt eotsdu error",
593 			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
594 
595 	ENDTRACE
596 }
597 
598 int TPNagleok;
599 int TPNagled;
600 
601 tp_packetize(tpcb, m, eotsdu)
602 register struct tp_pcb *tpcb;
603 register struct mbuf *m;
604 int eotsdu;
605 {
606 	register struct mbuf *n;
607 	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
608 	int	maxsize = tpcb->tp_l_tpdusize
609 			- tp_headersize(DT_TPDU_type, tpcb)
610 			- (tpcb->tp_use_checksum?4:0) ;
611 	int totlen = m->m_pkthdr.len;
612 	struct mbuf *m_split();
613 	/*
614 	 * Pre-packetize the data in the sockbuf
615 	 * according to negotiated mtu.  Do it here
616 	 * where we can safely wait for mbufs.
617 	 *
618 	 * This presumes knowledge of sockbuf conventions.
619 	 * TODO: allocate space for header and fill it in (once!).
620 	 */
621 	IFDEBUG(D_DATA)
622 		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
623 			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
624 	ENDTRACE
625 	if (tpcb->tp_oktonagle) {
626 		if ((n = sb->sb_mb) == 0)
627 			panic("tp_packetize");
628 		while (n->m_act)
629 			n = n->m_act;
630 		if (n->m_flags & M_EOR)
631 			panic("tp_packetize 2");
632 		SEQ_INC(tpcb, tpcb->tp_sndnum);
633 		if (totlen + n->m_pkthdr.len < maxsize) {
634 			/* There is an unsent packet with space, combine data */
635 			struct mbuf *old_n = n;
636 			tpsbcheck(tpcb,3);
637 			n->m_pkthdr.len += totlen;
638 			while (n->m_next)
639 				n = n->m_next;
640 			sbcompress(sb, m, n);
641 			tpsbcheck(tpcb,4);
642 			n = old_n;
643 			TPNagled++;
644 			goto out;
645 		}
646 	}
647 	while (m) {
648 		n = m;
649 		if (totlen > maxsize) {
650 			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
651 				panic("tp_packetize");
652 		} else
653 			m = 0;
654 		totlen -= maxsize;
655 		tpsbcheck(tpcb, 5);
656 		sbappendrecord(sb, n);
657 		tpsbcheck(tpcb, 6);
658 		SEQ_INC(tpcb, tpcb->tp_sndnum);
659 	}
660 out:
661 	if (eotsdu) {
662 		n->m_flags |= M_EOR;  /* XXX belongs at end */
663 		tpcb->tp_oktonagle = 0;
664 	} else {
665 		SEQ_DEC(tpcb, tpcb->tp_sndnum);
666 		tpcb->tp_oktonagle = 1;
667 		TPNagleok++;
668 	}
669 	IFDEBUG(D_DATA)
670 		printf("SEND out: oktonagle %d sndnum 0x%x\n",
671 			tpcb->tp_oktonagle, tpcb->tp_sndnum);
672 	ENDTRACE
673 	return 0;
674 }
675 
676 
677 /*
678  * NAME: tp_stash()
679  * CALLED FROM:
680  *	tp.trans on arrival of a DT tpdu
681  * FUNCTION, ARGUMENTS, and RETURN VALUE:
682  * 	Returns 1 if
683  *		a) something new arrived and it's got eotsdu_reached bit on,
684  * 		b) this arrival was caused other out-of-sequence things to be
685  *    	accepted, or
686  * 		c) this arrival is the highest seq # for which we last gave credit
687  *   	(sender just sent a whole window)
688  *  In other words, returns 1 if tp should send an ack immediately, 0 if
689  *  the ack can wait a while.
690  *
691  * Note: this implementation no longer renegs on credit, (except
692  * when debugging option D_RENEG is on, for the purpose of testing
693  * ack subsequencing), so we don't  need to check for incoming tpdus
694  * being in a reneged portion of the window.
695  */
696 
697 tp_stash(tpcb, e)
698 	register struct tp_pcb		*tpcb;
699 	register struct tp_event	*e;
700 {
701 	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
702 									/* 0--> delay acks until full window */
703 									/* 1--> ack each tpdu */
704 #ifndef lint
705 #define E e->ATTR(DT_TPDU)
706 #else /* lint */
707 #define E e->ev_union.EV_DT_TPDU
708 #endif /* lint */
709 
710 	if ( E.e_eot ) {
711 		register struct mbuf *n = E.e_data;
712 		n->m_flags |= M_EOR;
713 		n->m_act = 0;
714 	}
715 		IFDEBUG(D_STASH)
716 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
717 				"stash: so_rcv before appending");
718 			dump_mbuf(E.e_data,
719 				"stash: e_data before appending");
720 		ENDDEBUG
721 
722 	IFPERF(tpcb)
723 		PStat(tpcb, Nb_from_ll) += E.e_datalen;
724 		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
725 			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
726 	ENDPERF
727 
728 	if (E.e_seq == tpcb->tp_rcvnxt) {
729 
730 		IFDEBUG(D_STASH)
731 			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
732 			E.e_seq, E.e_datalen, E.e_eot);
733 		ENDDEBUG
734 
735 		IFTRACE(D_STASH)
736 			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
737 			E.e_seq, E.e_datalen, E.e_eot, 0);
738 		ENDTRACE
739 
740 		SET_DELACK(tpcb);
741 
742 		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
743 
744 		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
745 		/*
746 		 * move chains from the reassembly queue to the socket buffer
747 		 */
748 		if (tpcb->tp_rsycnt) {
749 			register struct mbuf **mp;
750 			struct mbuf **mplim;
751 
752 			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
753 			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
754 
755 			while (tpcb->tp_rsycnt && *mp) {
756 				sbappend(&tpcb->tp_sock->so_rcv, *mp);
757 				tpcb->tp_rsycnt--;
758 				*mp = 0;
759 				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
760 				ack_reason |= ACK_REORDER;
761 				if (++mp == mplim)
762 					mp = tpcb->tp_rsyq;
763 			}
764 		}
765 		IFDEBUG(D_STASH)
766 			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
767 				"stash: so_rcv after appending");
768 		ENDDEBUG
769 
770 	} else {
771 		register struct mbuf **mp;
772 		SeqNum uwe;
773 
774 		IFTRACE(D_STASH)
775 			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
776 			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
777 		ENDTRACE
778 
779 		if (tpcb->tp_rsyq == 0)
780 			tp_rsyset(tpcb);
781 		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
782 		if (tpcb->tp_rsyq == 0 ||
783 						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
784 			ack_reason = ACK_DONT;
785 			m_freem(E.e_data);
786 		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
787 			IFDEBUG(D_STASH)
788 				printf("tp_stash - drop & ack\n");
789 			ENDDEBUG
790 
791 			/* retransmission - drop it and force an ack */
792 			IncStat(ts_dt_dup);
793 			IFPERF(tpcb)
794 				IncPStat(tpcb, tps_n_ack_cuz_dup);
795 			ENDPERF
796 
797 			m_freem(E.e_data);
798 			ack_reason |= ACK_DUP;
799 		} else {
800 			*mp = E.e_data;
801 			tpcb->tp_rsycnt++;
802 			ack_reason = ACK_DONT;
803 		}
804 	}
805 	/* there were some comments of historical interest here. */
806 	{
807 		LOCAL_CREDIT(tpcb);
808 
809 		if ( E.e_seq ==  tpcb->tp_sent_uwe )
810 			ack_reason |= ACK_STRAT_FULLWIN;
811 
812 		IFTRACE(D_STASH)
813 			tptraceTPCB(TPPTmisc,
814 				"end of stash, eot, ack_reason, sent_uwe ",
815 				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
816 		ENDTRACE
817 
818 		if ( ack_reason == ACK_DONT ) {
819 			IncStat( ts_ackreason[ACK_DONT] );
820 			return 0;
821 		} else {
822 			IFPERF(tpcb)
823 				if(ack_reason & ACK_STRAT_EACH) {
824 					IncPStat(tpcb, tps_n_ack_cuz_strat);
825 				} else if(ack_reason & ACK_STRAT_FULLWIN) {
826 					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
827 				} else if(ack_reason & ACK_REORDER) {
828 					IncPStat(tpcb, tps_n_ack_cuz_reorder);
829 				}
830 				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
831 							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
832 			ENDPERF
833 			{
834 				register int i;
835 
836 				/* keep track of all reasons that apply */
837 				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
838 					if( ack_reason & (1<<i) )
839 						IncStat( ts_ackreason[i] );
840 				}
841 			}
842 			return 1;
843 		}
844 	}
845 }
846 
847 /*
848  * tp_rsyflush - drop all the packets on the reassembly queue.
849  * Do this when closing the socket, or when somebody has changed
850  * the space avaible in the receive socket (XXX).
851  */
852 tp_rsyflush(tpcb)
853 register struct tp_pcb *tpcb;
854 {
855 	register struct mbuf *m, **mp;
856 	if (tpcb->tp_rsycnt) {
857 		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
858 									 --mp >= tpcb->tp_rsyq; )
859 			if (*mp) {
860 				tpcb->tp_rsycnt--;
861 				m_freem(*mp);
862 			}
863 		if (tpcb->tp_rsycnt) {
864 			printf("tp_rsyflush %x\n", tpcb);
865 			tpcb->tp_rsycnt = 0;
866 		}
867 	}
868 	free((caddr_t)tpcb->tp_rsyq, M_PCB);
869 	tpcb->tp_rsyq = 0;
870 }
871 
872 tp_rsyset(tpcb)
873 register struct tp_pcb *tpcb;
874 {
875 	register struct socket *so = tpcb->tp_sock;
876 	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
877 	int old_credit = tpcb->tp_maxlcredit;
878 	caddr_t	rsyq;
879 
880 	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
881 		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
882 
883 	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
884 		return;
885 	maxcredit *= sizeof(struct mbuf *);
886 	if (tpcb->tp_rsyq)
887 		tp_rsyflush(tpcb);
888 	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
889 		bzero(rsyq, maxcredit);
890 	tpcb->tp_rsyq = (struct mbuf **)rsyq;
891 }
892 
893 tpsbcheck(tpcb, i)
894 struct tp_pcb *tpcb;
895 {
896 	register struct mbuf *n, *m;
897 	register int len = 0, mbcnt = 0, pktlen;
898 	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
899 
900 	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
901 		if ((n->m_flags & M_PKTHDR) == 0)
902 			panic("tpsbcheck nohdr");
903 		pktlen = len + n->m_pkthdr.len;
904 	    for (m = n; m; m = m->m_next) {
905 			len += m->m_len;
906 			mbcnt += MSIZE;
907 			if (m->m_flags & M_EXT)
908 				mbcnt += m->m_ext.ext_size;
909 		}
910 		if (len != pktlen) {
911 			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
912 				i, len, pktlen, n);
913 			panic("tpsbcheck short");
914 		}
915 	}
916 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
917 		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
918 		    mbcnt, sb->sb_mbcnt);
919 		panic("tpsbcheck");
920 	}
921 }
922