1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)tp_subr.c 7.17 (Berkeley) 10/08/91 8 */ 9 10 /*********************************************************** 11 Copyright IBM Corporation 1987 12 13 All Rights Reserved 14 15 Permission to use, copy, modify, and distribute this software and its 16 documentation for any purpose and without fee is hereby granted, 17 provided that the above copyright notice appear in all copies and that 18 both that copyright notice and this permission notice appear in 19 supporting documentation, and that the name of IBM not be 20 used in advertising or publicity pertaining to distribution of the 21 software without specific, written prior permission. 22 23 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 24 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL 25 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR 26 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 27 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 28 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 29 SOFTWARE. 30 31 ******************************************************************/ 32 33 /* 34 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison 35 */ 36 /* 37 * ARGO TP 38 * 39 * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $ 40 * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $ 41 * 42 * The main work of data transfer is done here. 43 * These routines are called from tp.trans. 44 * They include the routines that check the validity of acks and Xacks, 45 * (tp_goodack() and tp_goodXack() ) 46 * take packets from socket buffers and send them (tp_send()), 47 * drop the data from the socket buffers (tp_sbdrop()), 48 * and put incoming packet data into socket buffers (tp_stash()). 49 */ 50 51 #include "param.h" 52 #include "mbuf.h" 53 #include "socket.h" 54 #include "socketvar.h" 55 #include "protosw.h" 56 #include "errno.h" 57 #include "types.h" 58 #include "time.h" 59 #include "kernel.h" 60 61 #include "tp_ip.h" 62 #include "iso.h" 63 #include "argo_debug.h" 64 #include "tp_timer.h" 65 #include "tp_param.h" 66 #include "tp_stat.h" 67 #include "tp_pcb.h" 68 #include "tp_tpdu.h" 69 #include "tp_trace.h" 70 #include "tp_meas.h" 71 #include "tp_seq.h" 72 73 int tp_emit(), tp_sbdrop(); 74 int tprexmtthresh = 3; 75 extern int ticks; 76 void tp_send(); 77 78 /* 79 * CALLED FROM: 80 * tp.trans, when an XAK arrives 81 * FUNCTION and ARGUMENTS: 82 * Determines if the sequence number (seq) from the XAK 83 * acks anything new. If so, drop the appropriate tpdu 84 * from the XPD send queue. 85 * RETURN VALUE: 86 * Returns 1 if it did this, 0 if the ack caused no action. 87 */ 88 int 89 tp_goodXack(tpcb, seq) 90 struct tp_pcb *tpcb; 91 SeqNum seq; 92 { 93 94 IFTRACE(D_XPD) 95 tptraceTPCB(TPPTgotXack, 96 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew, 97 tpcb->tp_snduna); 98 ENDTRACE 99 100 if ( seq == tpcb->tp_Xuna ) { 101 tpcb->tp_Xuna = tpcb->tp_Xsndnxt; 102 103 /* DROP 1 packet from the Xsnd socket buf - just so happens 104 * that only one packet can be there at any time 105 * so drop the whole thing. If you allow > 1 packet 106 * the socket buffer, then you'll have to keep 107 * track of how many characters went w/ each XPD tpdu, so this 108 * will get messier 109 */ 110 IFDEBUG(D_XPD) 111 dump_mbuf(tpcb->tp_Xsnd.sb_mb, 112 "tp_goodXack Xsnd before sbdrop"); 113 ENDDEBUG 114 115 IFTRACE(D_XPD) 116 tptraceTPCB(TPPTmisc, 117 "goodXack: dropping cc ", 118 (int)(tpcb->tp_Xsnd.sb_cc), 119 0,0,0); 120 ENDTRACE 121 sbdroprecord(&tpcb->tp_Xsnd); 122 return 1; 123 } 124 return 0; 125 } 126 127 /* 128 * CALLED FROM: 129 * tp_good_ack() 130 * FUNCTION and ARGUMENTS: 131 * updates 132 * smoothed average round trip time (*rtt) 133 * roundtrip time variance (*rtv) - actually deviation, not variance 134 * given the new value (diff) 135 * RETURN VALUE: 136 * void 137 */ 138 139 void 140 tp_rtt_rtv(tpcb) 141 register struct tp_pcb *tpcb; 142 { 143 int old = tpcb->tp_rtt; 144 int delta, elapsed = ticks - tpcb->tp_rttemit; 145 146 if (tpcb->tp_rtt != 0) { 147 /* 148 * rtt is the smoothed round trip time in machine clock ticks (hz). 149 * It is stored as a fixed point number, unscaled (unlike the tcp 150 * srtt). The rationale here is that it is only significant to the 151 * nearest unit of slowtimo, which is at least 8 machine clock ticks 152 * so there is no need to scale. The smoothing is done according 153 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8). 154 */ 155 delta = elapsed - tpcb->tp_rtt; 156 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0) 157 tpcb->tp_rtt = 1; 158 /* 159 * rtv is a smoothed accumulated mean difference, unscaled 160 * for reasons expressed above. 161 * It is smoothed with an alpha of .75, and the round trip timer 162 * will be set to rtt + 4*rtv, also as TCP does. 163 */ 164 if (delta < 0) 165 delta = -delta; 166 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0) 167 tpcb->tp_rtv = 1; 168 } else { 169 /* 170 * No rtt measurement yet - use the unsmoothed rtt. 171 * Set the variance to half the rtt (so our first 172 * retransmit happens at 3*rtt) 173 */ 174 tpcb->tp_rtt = elapsed; 175 tpcb->tp_rtv = elapsed >> 1; 176 } 177 tpcb->tp_rttemit = 0; 178 tpcb->tp_rxtshift = 0; 179 /* 180 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar. 181 * Because of the way we do the smoothing, srtt and rttvar 182 * will each average +1/2 tick of bias. When we compute 183 * the retransmit timer, we want 1/2 tick of rounding and 184 * 1 extra tick because of +-1/2 tick uncertainty in the 185 * firing of the timer. The bias will give us exactly the 186 * 1.5 tick we need. But, because the bias is 187 * statistical, we have to test that we don't drop below 188 * the minimum feasible timer (which is 2 ticks)." 189 */ 190 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb), 191 tpcb->tp_peer_acktime, 128 /* XXX */); 192 IFDEBUG(D_RTT) 193 printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n", 194 "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old); 195 ENDDEBUG 196 tpcb->tp_rxtcur = tpcb->tp_dt_ticks; 197 } 198 199 /* 200 * CALLED FROM: 201 * tp.trans when an AK arrives 202 * FUNCTION and ARGUMENTS: 203 * Given (cdt), the credit from the AK tpdu, and 204 * (seq), the sequence number from the AK tpdu, 205 * tp_goodack() determines if the AK acknowledges something in the send 206 * window, and if so, drops the appropriate packets from the retransmission 207 * list, computes the round trip time, and updates the retransmission timer 208 * based on the new smoothed round trip time. 209 * RETURN VALUE: 210 * Returns 1 if 211 * EITHER it actually acked something heretofore unacknowledged 212 * OR no news but the credit should be processed. 213 * If something heretofore unacked was acked with this sequence number, 214 * the appropriate tpdus are dropped from the retransmission control list, 215 * by calling tp_sbdrop(). 216 * No need to see the tpdu itself. 217 */ 218 int 219 tp_goodack(tpcb, cdt, seq, subseq) 220 register struct tp_pcb *tpcb; 221 u_int cdt; 222 register SeqNum seq; 223 u_int subseq; 224 { 225 int old_fcredit; 226 int bang = 0; /* bang --> ack for something heretofore unacked */ 227 u_int bytes_acked; 228 229 IFDEBUG(D_ACKRECV) 230 printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n", 231 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt); 232 ENDDEBUG 233 IFTRACE(D_ACKRECV) 234 tptraceTPCB(TPPTgotack, 235 seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq); 236 ENDTRACE 237 238 IFPERF(tpcb) 239 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0); 240 ENDPERF 241 242 if (seq == tpcb->tp_snduna) { 243 if (subseq < tpcb->tp_r_subseq || 244 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) { 245 discard_the_ack: 246 IFDEBUG(D_ACKRECV) 247 printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n", 248 tpcb, subseq, tpcb->tp_r_subseq); 249 ENDDEBUG 250 goto done; 251 } 252 if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) { 253 tpcb->tp_r_subseq = subseq; 254 if (tpcb->tp_timer[TM_data_retrans] == 0) 255 tpcb->tp_dupacks = 0; 256 else if (++tpcb->tp_dupacks == tprexmtthresh) { 257 /* partner went out of his way to signal with different 258 subsequences that he has the same lack of an expected 259 packet. This may be an early indiciation of a loss */ 260 261 SeqNum onxt = tpcb->tp_sndnxt; 262 struct mbuf *onxt_m = tpcb->tp_sndnxt_m; 263 u_int win = min(tpcb->tp_fcredit, 264 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2; 265 IFDEBUG(D_ACKRECV) 266 printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n", 267 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt); 268 ENDDEBUG 269 if (win < 2) 270 win = 2; 271 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize; 272 tpcb->tp_timer[TM_data_retrans] = 0; 273 tpcb->tp_rttemit = 0; 274 tpcb->tp_sndnxt = tpcb->tp_snduna; 275 tpcb->tp_sndnxt_m = 0; 276 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 277 tp_send(tpcb); 278 tpcb->tp_cong_win = tpcb->tp_ssthresh + 279 tpcb->tp_dupacks * tpcb->tp_l_tpdusize; 280 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) { 281 tpcb->tp_sndnxt = onxt; 282 tpcb->tp_sndnxt_m = onxt_m; 283 } 284 285 } else if (tpcb->tp_dupacks > tprexmtthresh) { 286 tpcb->tp_cong_win += tpcb->tp_l_tpdusize; 287 } 288 goto done; 289 } 290 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna)) 291 goto discard_the_ack; 292 /* 293 * If the congestion window was inflated to account 294 * for the other side's cached packets, retract it. 295 */ 296 if (tpcb->tp_dupacks > tprexmtthresh && 297 tpcb->tp_cong_win > tpcb->tp_ssthresh) 298 tpcb->tp_cong_win = tpcb->tp_ssthresh; 299 tpcb->tp_r_subseq = subseq; 300 old_fcredit = tpcb->tp_fcredit; 301 tpcb->tp_fcredit = cdt; 302 if (cdt > tpcb->tp_maxfcredit) 303 tpcb->tp_maxfcredit = cdt; 304 tpcb->tp_dupacks = 0; 305 306 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) { 307 308 tpsbcheck(tpcb, 0); 309 bytes_acked = tp_sbdrop(tpcb, seq); 310 tpsbcheck(tpcb, 1); 311 /* 312 * If transmit timer is running and timed sequence 313 * number was acked, update smoothed round trip time. 314 * Since we now have an rtt measurement, cancel the 315 * timer backoff (cf., Phil Karn's retransmit alg.). 316 * Recompute the initial retransmit timer. 317 */ 318 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq)) 319 tp_rtt_rtv(tpcb); 320 /* 321 * If all outstanding data is acked, stop retransmit timer. 322 * If there is more data to be acked, restart retransmit 323 * timer, using current (possibly backed-off) value. 324 * OSI combines the keepalive and persistance functions. 325 * So, there is no persistance timer per se, to restart. 326 */ 327 tpcb->tp_timer[TM_data_retrans] = 328 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur; 329 /* 330 * When new data is acked, open the congestion window. 331 * If the window gives us less than ssthresh packets 332 * in flight, open exponentially (maxseg per packet). 333 * Otherwise open linearly: maxseg per window 334 * (maxseg^2 / cwnd per packet), plus a constant 335 * fraction of a packet (maxseg/8) to help larger windows 336 * open quickly enough. 337 */ 338 { 339 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize; 340 341 incr = min(incr, bytes_acked); 342 if (cw > tpcb->tp_ssthresh) 343 incr = incr * incr / cw + incr / 8; 344 tpcb->tp_cong_win = 345 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat); 346 } 347 tpcb->tp_snduna = seq; 348 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) { 349 tpcb->tp_sndnxt = seq; 350 tpcb->tp_sndnxt_m = 0; 351 } 352 bang++; 353 } 354 355 if( cdt != 0 && old_fcredit == 0 ) { 356 tpcb->tp_sendfcc = 1; 357 } 358 if (cdt == 0) { 359 if (old_fcredit != 0) 360 IncStat(ts_zfcdt); 361 /* The following might mean that the window shrunk */ 362 if (tpcb->tp_timer[TM_data_retrans]) { 363 tpcb->tp_timer[TM_data_retrans] = 0; 364 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks; 365 if (tpcb->tp_sndnxt != tpcb->tp_snduna) { 366 tpcb->tp_sndnxt = tpcb->tp_snduna; 367 tpcb->tp_sndnxt_m = 0; 368 } 369 } 370 } 371 tpcb->tp_fcredit = cdt; 372 bang |= (old_fcredit < cdt); 373 374 done: 375 IFDEBUG(D_ACKRECV) 376 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n", 377 bang, cdt, old_fcredit, tpcb->tp_cong_win); 378 ENDDEBUG 379 /* if (bang) XXXXX Very bad to remove this test, but somethings broken */ 380 tp_send(tpcb); 381 return (bang); 382 } 383 384 /* 385 * CALLED FROM: 386 * tp_goodack() 387 * FUNCTION and ARGUMENTS: 388 * drops everything up TO but not INCLUDING seq # (seq) 389 * from the retransmission queue. 390 */ 391 tp_sbdrop(tpcb, seq) 392 register struct tp_pcb *tpcb; 393 SeqNum seq; 394 { 395 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 396 register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna); 397 int oldcc = sb->sb_cc, oldi = i; 398 399 if (i >= tpcb->tp_seqhalf) 400 printf("tp_spdropping too much -- should panic"); 401 while (i-- > 0) 402 sbdroprecord(sb); 403 IFDEBUG(D_ACKRECV) 404 printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n", 405 oldi, oldcc - sb->sb_cc, tpcb, seq); 406 ENDDEBUG 407 if (sb->sb_flags & SB_NOTIFY) 408 sowwakeup(tpcb->tp_sock); 409 return (oldcc - sb->sb_cc); 410 } 411 412 /* 413 * CALLED FROM: 414 * tp.trans on user send request, arrival of AK and arrival of XAK 415 * FUNCTION and ARGUMENTS: 416 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt). 417 * Emits until a) runs out of data, or b) runs into an XPD mark, or 418 * c) it hits seq number (highseq) limited by cong or credit. 419 * 420 * If you want XPD to buffer > 1 du per socket buffer, you can 421 * modifiy this to issue XPD tpdus also, but then it'll have 422 * to take some argument(s) to distinguish between the type of DU to 423 * hand tp_emit. 424 * 425 * When something is sent for the first time, its time-of-send 426 * is stashed (in system clock ticks rather than pf_slowtimo ticks). 427 * When the ack arrives, the smoothed round-trip time is figured 428 * using this value. 429 */ 430 void 431 tp_send(tpcb) 432 register struct tp_pcb *tpcb; 433 { 434 register int len; 435 register struct mbuf *m; 436 struct mbuf *mb = 0; 437 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 438 unsigned int eotsdu = 0; 439 SeqNum highseq, checkseq; 440 int idle, idleticks, off, cong_win; 441 #ifdef TP_PERF_MEAS 442 int send_start_time = ticks; 443 SeqNum oldnxt = tpcb->tp_sndnxt; 444 #endif TP_PERF_MEAS 445 446 idle = (tpcb->tp_snduna == tpcb->tp_sndnew); 447 if (idle) { 448 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact]; 449 if (idleticks > tpcb->tp_dt_ticks) 450 /* 451 * We have been idle for "a while" and no acks are 452 * expected to clock out any data we send -- 453 * slow start to get ack "clock" running again. 454 */ 455 tpcb->tp_cong_win = tpcb->tp_l_tpdusize; 456 } 457 458 cong_win = tpcb->tp_cong_win; 459 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna); 460 if (tpcb->tp_Xsnd.sb_mb) 461 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew); 462 463 IFDEBUG(D_DATA) 464 printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n", 465 tpcb, tpcb->tp_sndnxt, cong_win, highseq); 466 ENDDEBUG 467 IFTRACE(D_DATA) 468 tptraceTPCB( TPPTmisc, "tp_send sndnew snduna", 469 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0); 470 tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin", 471 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win); 472 ENDTRACE 473 IFTRACE(D_DATA) 474 tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin", 475 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win); 476 ENDTRACE 477 478 if (tpcb->tp_sndnxt_m) 479 m = tpcb->tp_sndnxt_m; 480 else { 481 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna); 482 for (m = sb->sb_mb; m && off > 0; m = m->m_next) 483 off--; 484 } 485 send: 486 /* 487 * Avoid silly window syndrome here . . . figure out how! 488 */ 489 checkseq = tpcb->tp_sndnum; 490 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq)) 491 checkseq = highseq; /* i.e. DON'T retain highest assigned packet */ 492 493 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) { 494 495 eotsdu = (m->m_flags & M_EOR) != 0; 496 len = m->m_pkthdr.len; 497 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 && 498 len < (tpcb->tp_l_tpdusize / 2)) 499 break; /* Nagle . . . . . */ 500 cong_win -= len; 501 /* make a copy - mb goes into the retransmission list 502 * while m gets emitted. m_copy won't copy a zero-length mbuf. 503 */ 504 mb = m; 505 m = m_copy(mb, 0, M_COPYALL); 506 if (m == MNULL) 507 break; 508 IFTRACE(D_STASH) 509 tptraceTPCB( TPPTmisc, 510 "tp_send mcopy nxt high eotsdu len", 511 tpcb->tp_sndnxt, highseq, eotsdu, len); 512 ENDTRACE 513 514 IFDEBUG(D_DATA) 515 printf("tp_sending tpcb 0x%x nxt 0x%x\n", 516 tpcb, tpcb->tp_sndnxt); 517 ENDDEBUG 518 /* when headers are precomputed, may need to fill 519 in checksum here */ 520 if (tpcb->tp_sock->so_error = 521 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) { 522 /* error */ 523 break; 524 } 525 m = mb->m_nextpkt; 526 tpcb->tp_sndnxt_m = m; 527 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) { 528 SEQ_INC(tpcb, tpcb->tp_sndnew); 529 /* 530 * Time this transmission if not a retransmission and 531 * not currently timing anything. 532 */ 533 if (tpcb->tp_rttemit == 0) { 534 tpcb->tp_rttemit = ticks; 535 tpcb->tp_rttseq = tpcb->tp_sndnxt; 536 } 537 tpcb->tp_sndnxt = tpcb->tp_sndnew; 538 } else 539 SEQ_INC(tpcb, tpcb->tp_sndnxt); 540 /* 541 * Set retransmit timer if not currently set. 542 * Initial value for retransmit timer is smoothed 543 * round-trip time + 2 * round-trip time variance. 544 * Initialize shift counter which is used for backoff 545 * of retransmit time. 546 */ 547 if (tpcb->tp_timer[TM_data_retrans] == 0) { 548 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks; 549 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks; 550 tpcb->tp_rxtshift = 0; 551 } 552 } 553 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum)) 554 tpcb->tp_oktonagle = 0; 555 #ifdef TP_PERF_MEAS 556 IFPERF(tpcb) 557 { 558 register int npkts; 559 int elapsed = ticks - send_start_time, *t; 560 struct timeval now; 561 562 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt); 563 564 if (npkts > 0) 565 tpcb->tp_Nwindow++; 566 567 if (npkts > TP_PM_MAX) 568 npkts = TP_PM_MAX; 569 570 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]); 571 *t += (t - elapsed) >> TP_RTT_ALPHA; 572 573 if (mb == 0) { 574 IncPStat(tpcb, tps_win_lim_by_data[npkts] ); 575 } else { 576 IncPStat(tpcb, tps_win_lim_by_cdt[npkts] ); 577 /* not true with congestion-window being used */ 578 } 579 now.tv_sec = elapsed / hz; 580 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz; 581 tpmeas( tpcb->tp_lref, 582 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts); 583 } 584 ENDPERF 585 #endif TP_PERF_MEAS 586 587 588 IFTRACE(D_DATA) 589 tptraceTPCB( TPPTmisc, 590 "tp_send at end: new nxt eotsdu error", 591 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error); 592 593 ENDTRACE 594 } 595 596 int TPNagleok; 597 int TPNagled; 598 599 tp_packetize(tpcb, m, eotsdu) 600 register struct tp_pcb *tpcb; 601 register struct mbuf *m; 602 int eotsdu; 603 { 604 register struct mbuf *n; 605 register struct sockbuf *sb = &tpcb->tp_sock->so_snd; 606 int maxsize = tpcb->tp_l_tpdusize 607 - tp_headersize(DT_TPDU_type, tpcb) 608 - (tpcb->tp_use_checksum?4:0) ; 609 int totlen = m->m_pkthdr.len; 610 struct mbuf *m_split(); 611 /* 612 * Pre-packetize the data in the sockbuf 613 * according to negotiated mtu. Do it here 614 * where we can safely wait for mbufs. 615 * 616 * This presumes knowledge of sockbuf conventions. 617 * TODO: allocate space for header and fill it in (once!). 618 */ 619 IFDEBUG(D_DATA) 620 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n", 621 maxsize, totlen, eotsdu, tpcb->tp_sndnum); 622 ENDTRACE 623 if (tpcb->tp_oktonagle) { 624 if ((n = sb->sb_mb) == 0) 625 panic("tp_packetize"); 626 while (n->m_act) 627 n = n->m_act; 628 if (n->m_flags & M_EOR) 629 panic("tp_packetize 2"); 630 SEQ_INC(tpcb, tpcb->tp_sndnum); 631 if (totlen + n->m_pkthdr.len < maxsize) { 632 /* There is an unsent packet with space, combine data */ 633 struct mbuf *old_n = n; 634 tpsbcheck(tpcb,3); 635 n->m_pkthdr.len += totlen; 636 while (n->m_next) 637 n = n->m_next; 638 sbcompress(sb, m, n); 639 tpsbcheck(tpcb,4); 640 n = old_n; 641 TPNagled++; 642 goto out; 643 } 644 } 645 while (m) { 646 n = m; 647 if (totlen > maxsize) { 648 if ((m = m_split(n, maxsize, M_WAIT)) == 0) 649 panic("tp_packetize"); 650 } else 651 m = 0; 652 totlen -= maxsize; 653 tpsbcheck(tpcb, 5); 654 sbappendrecord(sb, n); 655 tpsbcheck(tpcb, 6); 656 SEQ_INC(tpcb, tpcb->tp_sndnum); 657 } 658 out: 659 if (eotsdu) { 660 n->m_flags |= M_EOR; /* XXX belongs at end */ 661 tpcb->tp_oktonagle = 0; 662 } else { 663 SEQ_DEC(tpcb, tpcb->tp_sndnum); 664 tpcb->tp_oktonagle = 1; 665 TPNagleok++; 666 } 667 IFDEBUG(D_DATA) 668 printf("SEND out: oktonagle %d sndnum 0x%x\n", 669 tpcb->tp_oktonagle, tpcb->tp_sndnum); 670 ENDTRACE 671 return 0; 672 } 673 674 675 /* 676 * NAME: tp_stash() 677 * CALLED FROM: 678 * tp.trans on arrival of a DT tpdu 679 * FUNCTION, ARGUMENTS, and RETURN VALUE: 680 * Returns 1 if 681 * a) something new arrived and it's got eotsdu_reached bit on, 682 * b) this arrival was caused other out-of-sequence things to be 683 * accepted, or 684 * c) this arrival is the highest seq # for which we last gave credit 685 * (sender just sent a whole window) 686 * In other words, returns 1 if tp should send an ack immediately, 0 if 687 * the ack can wait a while. 688 * 689 * Note: this implementation no longer renegs on credit, (except 690 * when debugging option D_RENEG is on, for the purpose of testing 691 * ack subsequencing), so we don't need to check for incoming tpdus 692 * being in a reneged portion of the window. 693 */ 694 695 tp_stash(tpcb, e) 696 register struct tp_pcb *tpcb; 697 register struct tp_event *e; 698 { 699 register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH; 700 /* 0--> delay acks until full window */ 701 /* 1--> ack each tpdu */ 702 #ifndef lint 703 #define E e->ATTR(DT_TPDU) 704 #else lint 705 #define E e->ev_union.EV_DT_TPDU 706 #endif lint 707 708 if ( E.e_eot ) { 709 register struct mbuf *n = E.e_data; 710 n->m_flags |= M_EOR; 711 n->m_act = 0; 712 } 713 IFDEBUG(D_STASH) 714 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 715 "stash: so_rcv before appending"); 716 dump_mbuf(E.e_data, 717 "stash: e_data before appending"); 718 ENDDEBUG 719 720 IFPERF(tpcb) 721 PStat(tpcb, Nb_from_ll) += E.e_datalen; 722 tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time, 723 E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen); 724 ENDPERF 725 726 if (E.e_seq == tpcb->tp_rcvnxt) { 727 728 IFDEBUG(D_STASH) 729 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n", 730 E.e_seq, E.e_datalen, E.e_eot); 731 ENDDEBUG 732 733 IFTRACE(D_STASH) 734 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot", 735 E.e_seq, E.e_datalen, E.e_eot, 0); 736 ENDTRACE 737 738 SET_DELACK(tpcb); 739 740 sbappend(&tpcb->tp_sock->so_rcv, E.e_data); 741 742 SEQ_INC( tpcb, tpcb->tp_rcvnxt ); 743 /* 744 * move chains from the reassembly queue to the socket buffer 745 */ 746 if (tpcb->tp_rsycnt) { 747 register struct mbuf **mp; 748 struct mbuf **mplim; 749 750 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit); 751 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit; 752 753 while (tpcb->tp_rsycnt && *mp) { 754 sbappend(&tpcb->tp_sock->so_rcv, *mp); 755 tpcb->tp_rsycnt--; 756 *mp = 0; 757 SEQ_INC(tpcb, tpcb->tp_rcvnxt); 758 ack_reason |= ACK_REORDER; 759 if (++mp == mplim) 760 mp = tpcb->tp_rsyq; 761 } 762 } 763 IFDEBUG(D_STASH) 764 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 765 "stash: so_rcv after appending"); 766 ENDDEBUG 767 768 } else { 769 register struct mbuf **mp; 770 SeqNum uwe; 771 772 IFTRACE(D_STASH) 773 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt", 774 E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0); 775 ENDTRACE 776 777 if (tpcb->tp_rsyq == 0) 778 tp_rsyset(tpcb); 779 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit); 780 if (tpcb->tp_rsyq == 0 || 781 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) { 782 ack_reason = ACK_DONT; 783 m_freem(E.e_data); 784 } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) { 785 IFDEBUG(D_STASH) 786 printf("tp_stash - drop & ack\n"); 787 ENDDEBUG 788 789 /* retransmission - drop it and force an ack */ 790 IncStat(ts_dt_dup); 791 IFPERF(tpcb) 792 IncPStat(tpcb, tps_n_ack_cuz_dup); 793 ENDPERF 794 795 m_freem(E.e_data); 796 ack_reason |= ACK_DUP; 797 } else { 798 *mp = E.e_data; 799 tpcb->tp_rsycnt++; 800 ack_reason = ACK_DONT; 801 } 802 } 803 /* there were some comments of historical interest here. */ 804 { 805 LOCAL_CREDIT(tpcb); 806 807 if ( E.e_seq == tpcb->tp_sent_uwe ) 808 ack_reason |= ACK_STRAT_FULLWIN; 809 810 IFTRACE(D_STASH) 811 tptraceTPCB(TPPTmisc, 812 "end of stash, eot, ack_reason, sent_uwe ", 813 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0); 814 ENDTRACE 815 816 if ( ack_reason == ACK_DONT ) { 817 IncStat( ts_ackreason[ACK_DONT] ); 818 return 0; 819 } else { 820 IFPERF(tpcb) 821 if(ack_reason & ACK_STRAT_EACH) { 822 IncPStat(tpcb, tps_n_ack_cuz_strat); 823 } else if(ack_reason & ACK_STRAT_FULLWIN) { 824 IncPStat(tpcb, tps_n_ack_cuz_fullwin); 825 } else if(ack_reason & ACK_REORDER) { 826 IncPStat(tpcb, tps_n_ack_cuz_reorder); 827 } 828 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0, 829 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0); 830 ENDPERF 831 { 832 register int i; 833 834 /* keep track of all reasons that apply */ 835 for( i=1; i<_ACK_NUM_REASONS_ ;i++) { 836 if( ack_reason & (1<<i) ) 837 IncStat( ts_ackreason[i] ); 838 } 839 } 840 return 1; 841 } 842 } 843 } 844 845 /* 846 * tp_rsyflush - drop all the packets on the reassembly queue. 847 * Do this when closing the socket, or when somebody has changed 848 * the space avaible in the receive socket (XXX). 849 */ 850 tp_rsyflush(tpcb) 851 register struct tp_pcb *tpcb; 852 { 853 register struct mbuf *m, **mp; 854 if (tpcb->tp_rsycnt) { 855 for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit; 856 --mp >= tpcb->tp_rsyq; ) 857 if (*mp) { 858 tpcb->tp_rsycnt--; 859 m_freem(*mp); 860 } 861 if (tpcb->tp_rsycnt) 862 panic("tp_rsyflush"); 863 } 864 free((caddr_t)tpcb->tp_rsyq, M_PCB); 865 tpcb->tp_rsyq = 0; 866 } 867 868 tp_rsyset(tpcb) 869 register struct tp_pcb *tpcb; 870 { 871 register struct socket *so = tpcb->tp_sock; 872 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf; 873 int old_credit = tpcb->tp_maxlcredit; 874 caddr_t rsyq; 875 876 tpcb->tp_maxlcredit = maxcredit = min(maxcredit, 877 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize); 878 879 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0) 880 return; 881 maxcredit *= sizeof(struct mbuf *); 882 if (tpcb->tp_rsyq) 883 tp_rsyflush(tpcb); 884 if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT)) 885 bzero(rsyq, maxcredit); 886 tpcb->tp_rsyq = (struct mbuf **)rsyq; 887 } 888 889 tpsbcheck(tpcb, i) 890 struct tp_pcb *tpcb; 891 { 892 register struct mbuf *n, *m; 893 register int len = 0, mbcnt = 0, pktlen; 894 struct sockbuf *sb = &tpcb->tp_sock->so_snd; 895 896 for (n = sb->sb_mb; n; n = n->m_nextpkt) { 897 if ((n->m_flags & M_PKTHDR) == 0) 898 panic("tpsbcheck nohdr"); 899 pktlen = len + n->m_pkthdr.len; 900 for (m = n; m; m = m->m_next) { 901 len += m->m_len; 902 mbcnt += MSIZE; 903 if (m->m_flags & M_EXT) 904 mbcnt += m->m_ext.ext_size; 905 } 906 if (len != pktlen) { 907 printf("test %d; len %d != pktlen %d on mbuf 0x%x\n", 908 i, len, pktlen, n); 909 panic("tpsbcheck short"); 910 } 911 } 912 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 913 printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc, 914 mbcnt, sb->sb_mbcnt); 915 panic("tpsbcheck"); 916 } 917 } 918