1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)tp_subr.c 8.1 (Berkeley) 06/10/93
8 */
9
10 /***********************************************************
11 Copyright IBM Corporation 1987
12
13 All Rights Reserved
14
15 Permission to use, copy, modify, and distribute this software and its
16 documentation for any purpose and without fee is hereby granted,
17 provided that the above copyright notice appear in all copies and that
18 both that copyright notice and this permission notice appear in
19 supporting documentation, and that the name of IBM not be
20 used in advertising or publicity pertaining to distribution of the
21 software without specific, written prior permission.
22
23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29 SOFTWARE.
30
31 ******************************************************************/
32
33 /*
34 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35 */
36 /*
37 * ARGO TP
38 *
39 * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40 * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41 *
42 * The main work of data transfer is done here.
43 * These routines are called from tp.trans.
44 * They include the routines that check the validity of acks and Xacks,
45 * (tp_goodack() and tp_goodXack() )
46 * take packets from socket buffers and send them (tp_send()),
47 * drop the data from the socket buffers (tp_sbdrop()),
48 * and put incoming packet data into socket buffers (tp_stash()).
49 */
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/mbuf.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/protosw.h>
57 #include <sys/errno.h>
58 #include <sys/time.h>
59 #include <sys/kernel.h>
60
61 #include <netiso/tp_ip.h>
62 #include <netiso/iso.h>
63 #include <netiso/argo_debug.h>
64 #include <netiso/tp_timer.h>
65 #include <netiso/tp_param.h>
66 #include <netiso/tp_stat.h>
67 #include <netiso/tp_pcb.h>
68 #include <netiso/tp_tpdu.h>
69 #include <netiso/tp_trace.h>
70 #include <netiso/tp_meas.h>
71 #include <netiso/tp_seq.h>
72
73 int tp_emit(), tp_sbdrop();
74 int tprexmtthresh = 3;
75 extern int ticks;
76 void tp_send();
77
78 /*
79 * CALLED FROM:
80 * tp.trans, when an XAK arrives
81 * FUNCTION and ARGUMENTS:
82 * Determines if the sequence number (seq) from the XAK
83 * acks anything new. If so, drop the appropriate tpdu
84 * from the XPD send queue.
85 * RETURN VALUE:
86 * Returns 1 if it did this, 0 if the ack caused no action.
87 */
88 int
tp_goodXack(tpcb,seq)89 tp_goodXack(tpcb, seq)
90 struct tp_pcb *tpcb;
91 SeqNum seq;
92 {
93
94 IFTRACE(D_XPD)
95 tptraceTPCB(TPPTgotXack,
96 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
97 tpcb->tp_snduna);
98 ENDTRACE
99
100 if ( seq == tpcb->tp_Xuna ) {
101 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
102
103 /* DROP 1 packet from the Xsnd socket buf - just so happens
104 * that only one packet can be there at any time
105 * so drop the whole thing. If you allow > 1 packet
106 * the socket buffer, then you'll have to keep
107 * track of how many characters went w/ each XPD tpdu, so this
108 * will get messier
109 */
110 IFDEBUG(D_XPD)
111 dump_mbuf(tpcb->tp_Xsnd.sb_mb,
112 "tp_goodXack Xsnd before sbdrop");
113 ENDDEBUG
114
115 IFTRACE(D_XPD)
116 tptraceTPCB(TPPTmisc,
117 "goodXack: dropping cc ",
118 (int)(tpcb->tp_Xsnd.sb_cc),
119 0,0,0);
120 ENDTRACE
121 sbdroprecord(&tpcb->tp_Xsnd);
122 return 1;
123 }
124 return 0;
125 }
126
127 /*
128 * CALLED FROM:
129 * tp_good_ack()
130 * FUNCTION and ARGUMENTS:
131 * updates
132 * smoothed average round trip time (*rtt)
133 * roundtrip time variance (*rtv) - actually deviation, not variance
134 * given the new value (diff)
135 * RETURN VALUE:
136 * void
137 */
138
139 void
tp_rtt_rtv(tpcb)140 tp_rtt_rtv(tpcb)
141 register struct tp_pcb *tpcb;
142 {
143 int old = tpcb->tp_rtt;
144 int delta, elapsed = ticks - tpcb->tp_rttemit;
145
146 if (tpcb->tp_rtt != 0) {
147 /*
148 * rtt is the smoothed round trip time in machine clock ticks (hz).
149 * It is stored as a fixed point number, unscaled (unlike the tcp
150 * srtt). The rationale here is that it is only significant to the
151 * nearest unit of slowtimo, which is at least 8 machine clock ticks
152 * so there is no need to scale. The smoothing is done according
153 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
154 */
155 delta = elapsed - tpcb->tp_rtt;
156 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
157 tpcb->tp_rtt = 1;
158 /*
159 * rtv is a smoothed accumulated mean difference, unscaled
160 * for reasons expressed above.
161 * It is smoothed with an alpha of .75, and the round trip timer
162 * will be set to rtt + 4*rtv, also as TCP does.
163 */
164 if (delta < 0)
165 delta = -delta;
166 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
167 tpcb->tp_rtv = 1;
168 } else {
169 /*
170 * No rtt measurement yet - use the unsmoothed rtt.
171 * Set the variance to half the rtt (so our first
172 * retransmit happens at 3*rtt)
173 */
174 tpcb->tp_rtt = elapsed;
175 tpcb->tp_rtv = elapsed >> 1;
176 }
177 tpcb->tp_rttemit = 0;
178 tpcb->tp_rxtshift = 0;
179 /*
180 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
181 * Because of the way we do the smoothing, srtt and rttvar
182 * will each average +1/2 tick of bias. When we compute
183 * the retransmit timer, we want 1/2 tick of rounding and
184 * 1 extra tick because of +-1/2 tick uncertainty in the
185 * firing of the timer. The bias will give us exactly the
186 * 1.5 tick we need. But, because the bias is
187 * statistical, we have to test that we don't drop below
188 * the minimum feasible timer (which is 2 ticks)."
189 */
190 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
191 tpcb->tp_peer_acktime, 128 /* XXX */);
192 IFDEBUG(D_RTT)
193 printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
194 "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
195 ENDDEBUG
196 tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
197 }
198
199 /*
200 * CALLED FROM:
201 * tp.trans when an AK arrives
202 * FUNCTION and ARGUMENTS:
203 * Given (cdt), the credit from the AK tpdu, and
204 * (seq), the sequence number from the AK tpdu,
205 * tp_goodack() determines if the AK acknowledges something in the send
206 * window, and if so, drops the appropriate packets from the retransmission
207 * list, computes the round trip time, and updates the retransmission timer
208 * based on the new smoothed round trip time.
209 * RETURN VALUE:
210 * Returns 1 if
211 * EITHER it actually acked something heretofore unacknowledged
212 * OR no news but the credit should be processed.
213 * If something heretofore unacked was acked with this sequence number,
214 * the appropriate tpdus are dropped from the retransmission control list,
215 * by calling tp_sbdrop().
216 * No need to see the tpdu itself.
217 */
218 int
tp_goodack(tpcb,cdt,seq,subseq)219 tp_goodack(tpcb, cdt, seq, subseq)
220 register struct tp_pcb *tpcb;
221 u_int cdt;
222 register SeqNum seq;
223 u_int subseq;
224 {
225 int old_fcredit;
226 int bang = 0; /* bang --> ack for something heretofore unacked */
227 u_int bytes_acked;
228
229 IFDEBUG(D_ACKRECV)
230 printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
231 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
232 ENDDEBUG
233 IFTRACE(D_ACKRECV)
234 tptraceTPCB(TPPTgotack,
235 seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
236 ENDTRACE
237
238 IFPERF(tpcb)
239 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
240 ENDPERF
241
242 if (seq == tpcb->tp_snduna) {
243 if (subseq < tpcb->tp_r_subseq ||
244 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
245 discard_the_ack:
246 IFDEBUG(D_ACKRECV)
247 printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
248 tpcb, subseq, tpcb->tp_r_subseq);
249 ENDDEBUG
250 goto done;
251 }
252 if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
253 tpcb->tp_r_subseq = subseq;
254 if (tpcb->tp_timer[TM_data_retrans] == 0)
255 tpcb->tp_dupacks = 0;
256 else if (++tpcb->tp_dupacks == tprexmtthresh) {
257 /* partner went out of his way to signal with different
258 subsequences that he has the same lack of an expected
259 packet. This may be an early indiciation of a loss */
260
261 SeqNum onxt = tpcb->tp_sndnxt;
262 struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
263 u_int win = min(tpcb->tp_fcredit,
264 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
265 IFDEBUG(D_ACKRECV)
266 printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
267 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
268 ENDDEBUG
269 if (win < 2)
270 win = 2;
271 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
272 tpcb->tp_timer[TM_data_retrans] = 0;
273 tpcb->tp_rttemit = 0;
274 tpcb->tp_sndnxt = tpcb->tp_snduna;
275 tpcb->tp_sndnxt_m = 0;
276 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
277 tp_send(tpcb);
278 tpcb->tp_cong_win = tpcb->tp_ssthresh +
279 tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
280 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
281 tpcb->tp_sndnxt = onxt;
282 tpcb->tp_sndnxt_m = onxt_m;
283 }
284
285 } else if (tpcb->tp_dupacks > tprexmtthresh) {
286 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
287 }
288 goto done;
289 }
290 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
291 goto discard_the_ack;
292 /*
293 * If the congestion window was inflated to account
294 * for the other side's cached packets, retract it.
295 */
296 if (tpcb->tp_dupacks > tprexmtthresh &&
297 tpcb->tp_cong_win > tpcb->tp_ssthresh)
298 tpcb->tp_cong_win = tpcb->tp_ssthresh;
299 tpcb->tp_r_subseq = subseq;
300 old_fcredit = tpcb->tp_fcredit;
301 tpcb->tp_fcredit = cdt;
302 if (cdt > tpcb->tp_maxfcredit)
303 tpcb->tp_maxfcredit = cdt;
304 tpcb->tp_dupacks = 0;
305
306 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
307
308 tpsbcheck(tpcb, 0);
309 bytes_acked = tp_sbdrop(tpcb, seq);
310 tpsbcheck(tpcb, 1);
311 /*
312 * If transmit timer is running and timed sequence
313 * number was acked, update smoothed round trip time.
314 * Since we now have an rtt measurement, cancel the
315 * timer backoff (cf., Phil Karn's retransmit alg.).
316 * Recompute the initial retransmit timer.
317 */
318 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
319 tp_rtt_rtv(tpcb);
320 /*
321 * If all outstanding data is acked, stop retransmit timer.
322 * If there is more data to be acked, restart retransmit
323 * timer, using current (possibly backed-off) value.
324 * OSI combines the keepalive and persistance functions.
325 * So, there is no persistance timer per se, to restart.
326 */
327 if (tpcb->tp_class != TP_CLASS_0)
328 tpcb->tp_timer[TM_data_retrans] =
329 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
330 /*
331 * When new data is acked, open the congestion window.
332 * If the window gives us less than ssthresh packets
333 * in flight, open exponentially (maxseg per packet).
334 * Otherwise open linearly: maxseg per window
335 * (maxseg^2 / cwnd per packet), plus a constant
336 * fraction of a packet (maxseg/8) to help larger windows
337 * open quickly enough.
338 */
339 {
340 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
341
342 incr = min(incr, bytes_acked);
343 if (cw > tpcb->tp_ssthresh)
344 incr = incr * incr / cw + incr / 8;
345 tpcb->tp_cong_win =
346 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
347 }
348 tpcb->tp_snduna = seq;
349 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
350 tpcb->tp_sndnxt = seq;
351 tpcb->tp_sndnxt_m = 0;
352 }
353 bang++;
354 }
355
356 if( cdt != 0 && old_fcredit == 0 ) {
357 tpcb->tp_sendfcc = 1;
358 }
359 if (cdt == 0) {
360 if (old_fcredit != 0)
361 IncStat(ts_zfcdt);
362 /* The following might mean that the window shrunk */
363 if (tpcb->tp_timer[TM_data_retrans]) {
364 tpcb->tp_timer[TM_data_retrans] = 0;
365 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
366 if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
367 tpcb->tp_sndnxt = tpcb->tp_snduna;
368 tpcb->tp_sndnxt_m = 0;
369 }
370 }
371 }
372 tpcb->tp_fcredit = cdt;
373 bang |= (old_fcredit < cdt);
374
375 done:
376 IFDEBUG(D_ACKRECV)
377 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
378 bang, cdt, old_fcredit, tpcb->tp_cong_win);
379 ENDDEBUG
380 /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
381 tp_send(tpcb);
382 return (bang);
383 }
384
385 /*
386 * CALLED FROM:
387 * tp_goodack()
388 * FUNCTION and ARGUMENTS:
389 * drops everything up TO but not INCLUDING seq # (seq)
390 * from the retransmission queue.
391 */
tp_sbdrop(tpcb,seq)392 tp_sbdrop(tpcb, seq)
393 register struct tp_pcb *tpcb;
394 SeqNum seq;
395 {
396 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
397 register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
398 int oldcc = sb->sb_cc, oldi = i;
399
400 if (i >= tpcb->tp_seqhalf)
401 printf("tp_spdropping too much -- should panic");
402 while (i-- > 0)
403 sbdroprecord(sb);
404 IFDEBUG(D_ACKRECV)
405 printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
406 oldi, oldcc - sb->sb_cc, tpcb, seq);
407 ENDDEBUG
408 if (sb->sb_flags & SB_NOTIFY)
409 sowwakeup(tpcb->tp_sock);
410 return (oldcc - sb->sb_cc);
411 }
412
413 /*
414 * CALLED FROM:
415 * tp.trans on user send request, arrival of AK and arrival of XAK
416 * FUNCTION and ARGUMENTS:
417 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
418 * Emits until a) runs out of data, or b) runs into an XPD mark, or
419 * c) it hits seq number (highseq) limited by cong or credit.
420 *
421 * If you want XPD to buffer > 1 du per socket buffer, you can
422 * modifiy this to issue XPD tpdus also, but then it'll have
423 * to take some argument(s) to distinguish between the type of DU to
424 * hand tp_emit.
425 *
426 * When something is sent for the first time, its time-of-send
427 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
428 * When the ack arrives, the smoothed round-trip time is figured
429 * using this value.
430 */
431 void
tp_send(tpcb)432 tp_send(tpcb)
433 register struct tp_pcb *tpcb;
434 {
435 register int len;
436 register struct mbuf *m;
437 struct mbuf *mb = 0;
438 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
439 unsigned int eotsdu = 0;
440 SeqNum highseq, checkseq;
441 int idle, idleticks, off, cong_win;
442 #ifdef TP_PERF_MEAS
443 int send_start_time = ticks;
444 SeqNum oldnxt = tpcb->tp_sndnxt;
445 #endif /* TP_PERF_MEAS */
446
447 idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
448 if (idle) {
449 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
450 if (idleticks > tpcb->tp_dt_ticks)
451 /*
452 * We have been idle for "a while" and no acks are
453 * expected to clock out any data we send --
454 * slow start to get ack "clock" running again.
455 */
456 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
457 }
458
459 cong_win = tpcb->tp_cong_win;
460 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
461 if (tpcb->tp_Xsnd.sb_mb)
462 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
463
464 IFDEBUG(D_DATA)
465 printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
466 tpcb, tpcb->tp_sndnxt, cong_win, highseq);
467 ENDDEBUG
468 IFTRACE(D_DATA)
469 tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
470 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
471 tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
472 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
473 ENDTRACE
474 IFTRACE(D_DATA)
475 tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
476 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
477 ENDTRACE
478
479 if (tpcb->tp_sndnxt_m)
480 m = tpcb->tp_sndnxt_m;
481 else {
482 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
483 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
484 off--;
485 }
486 send:
487 /*
488 * Avoid silly window syndrome here . . . figure out how!
489 */
490 checkseq = tpcb->tp_sndnum;
491 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
492 checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
493
494 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
495
496 eotsdu = (m->m_flags & M_EOR) != 0;
497 len = m->m_pkthdr.len;
498 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
499 len < (tpcb->tp_l_tpdusize / 2))
500 break; /* Nagle . . . . . */
501 cong_win -= len;
502 /* make a copy - mb goes into the retransmission list
503 * while m gets emitted. m_copy won't copy a zero-length mbuf.
504 */
505 mb = m;
506 m = m_copy(mb, 0, M_COPYALL);
507 if (m == MNULL)
508 break;
509 IFTRACE(D_STASH)
510 tptraceTPCB( TPPTmisc,
511 "tp_send mcopy nxt high eotsdu len",
512 tpcb->tp_sndnxt, highseq, eotsdu, len);
513 ENDTRACE
514
515 IFDEBUG(D_DATA)
516 printf("tp_sending tpcb 0x%x nxt 0x%x\n",
517 tpcb, tpcb->tp_sndnxt);
518 ENDDEBUG
519 /* when headers are precomputed, may need to fill
520 in checksum here */
521 if (tpcb->tp_sock->so_error =
522 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
523 /* error */
524 break;
525 }
526 m = mb->m_nextpkt;
527 tpcb->tp_sndnxt_m = m;
528 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
529 SEQ_INC(tpcb, tpcb->tp_sndnew);
530 /*
531 * Time this transmission if not a retransmission and
532 * not currently timing anything.
533 */
534 if (tpcb->tp_rttemit == 0) {
535 tpcb->tp_rttemit = ticks;
536 tpcb->tp_rttseq = tpcb->tp_sndnxt;
537 }
538 tpcb->tp_sndnxt = tpcb->tp_sndnew;
539 } else
540 SEQ_INC(tpcb, tpcb->tp_sndnxt);
541 /*
542 * Set retransmit timer if not currently set.
543 * Initial value for retransmit timer is smoothed
544 * round-trip time + 2 * round-trip time variance.
545 * Initialize shift counter which is used for backoff
546 * of retransmit time.
547 */
548 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
549 tpcb->tp_class != TP_CLASS_0) {
550 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
551 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
552 tpcb->tp_rxtshift = 0;
553 }
554 }
555 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
556 tpcb->tp_oktonagle = 0;
557 #ifdef TP_PERF_MEAS
558 IFPERF(tpcb)
559 {
560 register int npkts;
561 int elapsed = ticks - send_start_time, *t;
562 struct timeval now;
563
564 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
565
566 if (npkts > 0)
567 tpcb->tp_Nwindow++;
568
569 if (npkts > TP_PM_MAX)
570 npkts = TP_PM_MAX;
571
572 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
573 *t += (t - elapsed) >> TP_RTT_ALPHA;
574
575 if (mb == 0) {
576 IncPStat(tpcb, tps_win_lim_by_data[npkts] );
577 } else {
578 IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
579 /* not true with congestion-window being used */
580 }
581 now.tv_sec = elapsed / hz;
582 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
583 tpmeas( tpcb->tp_lref,
584 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
585 }
586 ENDPERF
587 #endif /* TP_PERF_MEAS */
588
589
590 IFTRACE(D_DATA)
591 tptraceTPCB( TPPTmisc,
592 "tp_send at end: new nxt eotsdu error",
593 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
594
595 ENDTRACE
596 }
597
598 int TPNagleok;
599 int TPNagled;
600
tp_packetize(tpcb,m,eotsdu)601 tp_packetize(tpcb, m, eotsdu)
602 register struct tp_pcb *tpcb;
603 register struct mbuf *m;
604 int eotsdu;
605 {
606 register struct mbuf *n;
607 register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
608 int maxsize = tpcb->tp_l_tpdusize
609 - tp_headersize(DT_TPDU_type, tpcb)
610 - (tpcb->tp_use_checksum?4:0) ;
611 int totlen = m->m_pkthdr.len;
612 struct mbuf *m_split();
613 /*
614 * Pre-packetize the data in the sockbuf
615 * according to negotiated mtu. Do it here
616 * where we can safely wait for mbufs.
617 *
618 * This presumes knowledge of sockbuf conventions.
619 * TODO: allocate space for header and fill it in (once!).
620 */
621 IFDEBUG(D_DATA)
622 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
623 maxsize, totlen, eotsdu, tpcb->tp_sndnum);
624 ENDTRACE
625 if (tpcb->tp_oktonagle) {
626 if ((n = sb->sb_mb) == 0)
627 panic("tp_packetize");
628 while (n->m_act)
629 n = n->m_act;
630 if (n->m_flags & M_EOR)
631 panic("tp_packetize 2");
632 SEQ_INC(tpcb, tpcb->tp_sndnum);
633 if (totlen + n->m_pkthdr.len < maxsize) {
634 /* There is an unsent packet with space, combine data */
635 struct mbuf *old_n = n;
636 tpsbcheck(tpcb,3);
637 n->m_pkthdr.len += totlen;
638 while (n->m_next)
639 n = n->m_next;
640 sbcompress(sb, m, n);
641 tpsbcheck(tpcb,4);
642 n = old_n;
643 TPNagled++;
644 goto out;
645 }
646 }
647 while (m) {
648 n = m;
649 if (totlen > maxsize) {
650 if ((m = m_split(n, maxsize, M_WAIT)) == 0)
651 panic("tp_packetize");
652 } else
653 m = 0;
654 totlen -= maxsize;
655 tpsbcheck(tpcb, 5);
656 sbappendrecord(sb, n);
657 tpsbcheck(tpcb, 6);
658 SEQ_INC(tpcb, tpcb->tp_sndnum);
659 }
660 out:
661 if (eotsdu) {
662 n->m_flags |= M_EOR; /* XXX belongs at end */
663 tpcb->tp_oktonagle = 0;
664 } else {
665 SEQ_DEC(tpcb, tpcb->tp_sndnum);
666 tpcb->tp_oktonagle = 1;
667 TPNagleok++;
668 }
669 IFDEBUG(D_DATA)
670 printf("SEND out: oktonagle %d sndnum 0x%x\n",
671 tpcb->tp_oktonagle, tpcb->tp_sndnum);
672 ENDTRACE
673 return 0;
674 }
675
676
677 /*
678 * NAME: tp_stash()
679 * CALLED FROM:
680 * tp.trans on arrival of a DT tpdu
681 * FUNCTION, ARGUMENTS, and RETURN VALUE:
682 * Returns 1 if
683 * a) something new arrived and it's got eotsdu_reached bit on,
684 * b) this arrival was caused other out-of-sequence things to be
685 * accepted, or
686 * c) this arrival is the highest seq # for which we last gave credit
687 * (sender just sent a whole window)
688 * In other words, returns 1 if tp should send an ack immediately, 0 if
689 * the ack can wait a while.
690 *
691 * Note: this implementation no longer renegs on credit, (except
692 * when debugging option D_RENEG is on, for the purpose of testing
693 * ack subsequencing), so we don't need to check for incoming tpdus
694 * being in a reneged portion of the window.
695 */
696
tp_stash(tpcb,e)697 tp_stash(tpcb, e)
698 register struct tp_pcb *tpcb;
699 register struct tp_event *e;
700 {
701 register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
702 /* 0--> delay acks until full window */
703 /* 1--> ack each tpdu */
704 #ifndef lint
705 #define E e->ATTR(DT_TPDU)
706 #else /* lint */
707 #define E e->ev_union.EV_DT_TPDU
708 #endif /* lint */
709
710 if ( E.e_eot ) {
711 register struct mbuf *n = E.e_data;
712 n->m_flags |= M_EOR;
713 n->m_act = 0;
714 }
715 IFDEBUG(D_STASH)
716 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
717 "stash: so_rcv before appending");
718 dump_mbuf(E.e_data,
719 "stash: e_data before appending");
720 ENDDEBUG
721
722 IFPERF(tpcb)
723 PStat(tpcb, Nb_from_ll) += E.e_datalen;
724 tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
725 E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
726 ENDPERF
727
728 if (E.e_seq == tpcb->tp_rcvnxt) {
729
730 IFDEBUG(D_STASH)
731 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
732 E.e_seq, E.e_datalen, E.e_eot);
733 ENDDEBUG
734
735 IFTRACE(D_STASH)
736 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
737 E.e_seq, E.e_datalen, E.e_eot, 0);
738 ENDTRACE
739
740 SET_DELACK(tpcb);
741
742 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
743
744 SEQ_INC( tpcb, tpcb->tp_rcvnxt );
745 /*
746 * move chains from the reassembly queue to the socket buffer
747 */
748 if (tpcb->tp_rsycnt) {
749 register struct mbuf **mp;
750 struct mbuf **mplim;
751
752 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
753 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
754
755 while (tpcb->tp_rsycnt && *mp) {
756 sbappend(&tpcb->tp_sock->so_rcv, *mp);
757 tpcb->tp_rsycnt--;
758 *mp = 0;
759 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
760 ack_reason |= ACK_REORDER;
761 if (++mp == mplim)
762 mp = tpcb->tp_rsyq;
763 }
764 }
765 IFDEBUG(D_STASH)
766 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
767 "stash: so_rcv after appending");
768 ENDDEBUG
769
770 } else {
771 register struct mbuf **mp;
772 SeqNum uwe;
773
774 IFTRACE(D_STASH)
775 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
776 E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
777 ENDTRACE
778
779 if (tpcb->tp_rsyq == 0)
780 tp_rsyset(tpcb);
781 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
782 if (tpcb->tp_rsyq == 0 ||
783 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
784 ack_reason = ACK_DONT;
785 m_freem(E.e_data);
786 } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
787 IFDEBUG(D_STASH)
788 printf("tp_stash - drop & ack\n");
789 ENDDEBUG
790
791 /* retransmission - drop it and force an ack */
792 IncStat(ts_dt_dup);
793 IFPERF(tpcb)
794 IncPStat(tpcb, tps_n_ack_cuz_dup);
795 ENDPERF
796
797 m_freem(E.e_data);
798 ack_reason |= ACK_DUP;
799 } else {
800 *mp = E.e_data;
801 tpcb->tp_rsycnt++;
802 ack_reason = ACK_DONT;
803 }
804 }
805 /* there were some comments of historical interest here. */
806 {
807 LOCAL_CREDIT(tpcb);
808
809 if ( E.e_seq == tpcb->tp_sent_uwe )
810 ack_reason |= ACK_STRAT_FULLWIN;
811
812 IFTRACE(D_STASH)
813 tptraceTPCB(TPPTmisc,
814 "end of stash, eot, ack_reason, sent_uwe ",
815 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
816 ENDTRACE
817
818 if ( ack_reason == ACK_DONT ) {
819 IncStat( ts_ackreason[ACK_DONT] );
820 return 0;
821 } else {
822 IFPERF(tpcb)
823 if(ack_reason & ACK_STRAT_EACH) {
824 IncPStat(tpcb, tps_n_ack_cuz_strat);
825 } else if(ack_reason & ACK_STRAT_FULLWIN) {
826 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
827 } else if(ack_reason & ACK_REORDER) {
828 IncPStat(tpcb, tps_n_ack_cuz_reorder);
829 }
830 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
831 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
832 ENDPERF
833 {
834 register int i;
835
836 /* keep track of all reasons that apply */
837 for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
838 if( ack_reason & (1<<i) )
839 IncStat( ts_ackreason[i] );
840 }
841 }
842 return 1;
843 }
844 }
845 }
846
847 /*
848 * tp_rsyflush - drop all the packets on the reassembly queue.
849 * Do this when closing the socket, or when somebody has changed
850 * the space avaible in the receive socket (XXX).
851 */
tp_rsyflush(tpcb)852 tp_rsyflush(tpcb)
853 register struct tp_pcb *tpcb;
854 {
855 register struct mbuf *m, **mp;
856 if (tpcb->tp_rsycnt) {
857 for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
858 --mp >= tpcb->tp_rsyq; )
859 if (*mp) {
860 tpcb->tp_rsycnt--;
861 m_freem(*mp);
862 }
863 if (tpcb->tp_rsycnt) {
864 printf("tp_rsyflush %x\n", tpcb);
865 tpcb->tp_rsycnt = 0;
866 }
867 }
868 free((caddr_t)tpcb->tp_rsyq, M_PCB);
869 tpcb->tp_rsyq = 0;
870 }
871
tp_rsyset(tpcb)872 tp_rsyset(tpcb)
873 register struct tp_pcb *tpcb;
874 {
875 register struct socket *so = tpcb->tp_sock;
876 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
877 int old_credit = tpcb->tp_maxlcredit;
878 caddr_t rsyq;
879
880 tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
881 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
882
883 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
884 return;
885 maxcredit *= sizeof(struct mbuf *);
886 if (tpcb->tp_rsyq)
887 tp_rsyflush(tpcb);
888 if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
889 bzero(rsyq, maxcredit);
890 tpcb->tp_rsyq = (struct mbuf **)rsyq;
891 }
892
893 tpsbcheck(tpcb, i)
894 struct tp_pcb *tpcb;
895 {
896 register struct mbuf *n, *m;
897 register int len = 0, mbcnt = 0, pktlen;
898 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
899
900 for (n = sb->sb_mb; n; n = n->m_nextpkt) {
901 if ((n->m_flags & M_PKTHDR) == 0)
902 panic("tpsbcheck nohdr");
903 pktlen = len + n->m_pkthdr.len;
904 for (m = n; m; m = m->m_next) {
905 len += m->m_len;
906 mbcnt += MSIZE;
907 if (m->m_flags & M_EXT)
908 mbcnt += m->m_ext.ext_size;
909 }
910 if (len != pktlen) {
911 printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
912 i, len, pktlen, n);
913 panic("tpsbcheck short");
914 }
915 }
916 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
917 printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
918 mbcnt, sb->sb_mbcnt);
919 panic("tpsbcheck");
920 }
921 }
922