1 /* $NetBSD: altq_rmclass.c,v 1.5 2001/11/12 23:14:22 lukem Exp $ */ 2 /* $KAME: altq_rmclass.c,v 1.9 2000/12/14 08:12:46 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1991-1997 Regents of the University of California. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the Network Research 19 * Group at Lawrence Berkeley Laboratory. 20 * 4. Neither the name of the University nor of the Laboratory may be used 21 * to endorse or promote products derived from this software without 22 * specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * LBL code modified by speer@eng.sun.com, May 1977. 37 * For questions and/or comments, please send mail to cbq@ee.lbl.gov 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.5 2001/11/12 23:14:22 lukem Exp $"); 42 43 #ident "@(#)rm_class.c 1.48 97/12/05 SMI" 44 45 #if defined(__FreeBSD__) || defined(__NetBSD__) 46 #include "opt_altq.h" 47 #if (__FreeBSD__ != 2) 48 #include "opt_inet.h" 49 #ifdef __FreeBSD__ 50 #include "opt_inet6.h" 51 #endif 52 #endif 53 #endif /* __FreeBSD__ || __NetBSD__ */ 54 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 55 56 #include <sys/param.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/socket.h> 60 #include <sys/systm.h> 61 #include <sys/errno.h> 62 #include <sys/time.h> 63 #include <sys/kernel.h> 64 65 #include <net/if.h> 66 #include <netinet/in.h> 67 #include <netinet/in_systm.h> 68 #include <netinet/ip.h> 69 70 #include <altq/altq.h> 71 #include <altq/altq_rmclass.h> 72 #include <altq/altq_rmclass_debug.h> 73 #include <altq/altq_red.h> 74 #include <altq/altq_rio.h> 75 76 /* 77 * Local Macros 78 */ 79 80 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 81 82 /* 83 * Local routines. 84 */ 85 86 static int rmc_satisfied __P((struct rm_class *, struct timeval *)); 87 static void rmc_wrr_set_weights __P((struct rm_ifdat *)); 88 static void rmc_depth_compute __P((struct rm_class *)); 89 static void rmc_depth_recompute __P((rm_class_t *)); 90 91 static mbuf_t *_rmc_wrr_dequeue_next __P((struct rm_ifdat *, int)); 92 static mbuf_t *_rmc_prr_dequeue_next __P((struct rm_ifdat *, int)); 93 94 static int _rmc_addq __P((rm_class_t *, mbuf_t *)); 95 static void _rmc_dropq __P((rm_class_t *)); 96 static mbuf_t *_rmc_getq __P((rm_class_t *)); 97 static mbuf_t *_rmc_pollq __P((rm_class_t *)); 98 99 static int rmc_under_limit __P((struct rm_class *, struct timeval *)); 100 static void rmc_tl_satisfied __P((struct rm_ifdat *, struct timeval *)); 101 static void rmc_drop_action __P((struct rm_class *)); 102 static void rmc_restart __P((struct rm_class *)); 103 static void rmc_root_overlimit __P((struct rm_class *, struct rm_class *)); 104 105 #define BORROW_OFFTIME 106 /* 107 * BORROW_OFFTIME (experimental): 108 * borrow the offtime of the class borrowing from. 109 * the reason is that when its own offtime is set, the class is unable 110 * to borrow much, especially when cutoff is taking effect. 111 * but when the borrowed class is overloaded (advidle is close to minidle), 112 * use the borrowing class's offtime to avoid overload. 113 */ 114 #define ADJUST_CUTOFF 115 /* 116 * ADJUST_CUTOFF (experimental): 117 * if no underlimit class is found due to cutoff, increase cutoff and 118 * retry the scheduling loop. 119 * also, don't invoke delay_actions while cutoff is taking effect, 120 * since a sleeping class won't have a chance to be scheduled in the 121 * next loop. 122 * 123 * now heuristics for setting the top-level variable (cutoff_) becomes: 124 * 1. if a packet arrives for a not-overlimit class, set cutoff 125 * to the depth of the class. 126 * 2. if cutoff is i, and a packet arrives for an overlimit class 127 * with an underlimit ancestor at a lower level than i (say j), 128 * then set cutoff to j. 129 * 3. at scheduling a packet, if there is no underlimit class 130 * due to the current cutoff level, increase cutoff by 1 and 131 * then try to schedule again. 132 */ 133 134 /* 135 * rm_class_t * 136 * rmc_newclass(...) - Create a new resource management class at priority 137 * 'pri' on the interface given by 'ifd'. 138 * 139 * nsecPerByte is the data rate of the interface in nanoseconds/byte. 140 * E.g., 800 for a 10Mb/s ethernet. If the class gets less 141 * than 100% of the bandwidth, this number should be the 142 * 'effective' rate for the class. Let f be the 143 * bandwidth fraction allocated to this class, and let 144 * nsPerByte be the data rate of the output link in 145 * nanoseconds/byte. Then nsecPerByte is set to 146 * nsPerByte / f. E.g., 1600 (= 800 / .5) 147 * for a class that gets 50% of an ethernet's bandwidth. 148 * 149 * action the routine to call when the class is over limit. 150 * 151 * maxq max allowable queue size for class (in packets). 152 * 153 * parent parent class pointer. 154 * 155 * borrow class to borrow from (should be either 'parent' or null). 156 * 157 * maxidle max value allowed for class 'idle' time estimate (this 158 * parameter determines how large an initial burst of packets 159 * can be before overlimit action is invoked. 160 * 161 * offtime how long 'delay' action will delay when class goes over 162 * limit (this parameter determines the steady-state burst 163 * size when a class is running over its limit). 164 * 165 * Maxidle and offtime have to be computed from the following: If the 166 * average packet size is s, the bandwidth fraction allocated to this 167 * class is f, we want to allow b packet bursts, and the gain of the 168 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 169 * 170 * ptime = s * nsPerByte * (1 - f) / f 171 * maxidle = ptime * (1 - g^b) / g^b 172 * minidle = -ptime * (1 / (f - 1)) 173 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 174 * 175 * Operationally, it's convenient to specify maxidle & offtime in units 176 * independent of the link bandwidth so the maxidle & offtime passed to 177 * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 178 * (The constant factor is a scale factor needed to make the parameters 179 * integers. This scaling also means that the 'unscaled' values of 180 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 181 * not nanoseconds.) Also note that the 'idle' filter computation keeps 182 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 183 * maxidle also must be scaled upward by this value. Thus, the passed 184 * values for maxidle and offtime can be computed as follows: 185 * 186 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 187 * offtime = offtime * 8 / (1000 * nsecPerByte) 188 * 189 * When USE_HRTIME is employed, then maxidle and offtime become: 190 * maxidle = maxilde * (8.0 / nsecPerByte); 191 * offtime = offtime * (8.0 / nsecPerByte); 192 */ 193 194 struct rm_class * 195 rmc_newclass(pri, ifd, nsecPerByte, action, maxq, parent, borrow, 196 maxidle, minidle, offtime, pktsize, flags) 197 int pri; 198 struct rm_ifdat *ifd; 199 u_int nsecPerByte; 200 void (*action)(rm_class_t *, rm_class_t *); 201 int maxq; 202 struct rm_class *parent; 203 struct rm_class *borrow; 204 u_int maxidle; 205 int minidle; 206 u_int offtime; 207 int pktsize; 208 int flags; 209 { 210 struct rm_class *cl; 211 struct rm_class *peer; 212 int s; 213 214 if (pri >= RM_MAXPRIO) 215 return (NULL); 216 #ifndef ALTQ_RED 217 if (flags & RMCF_RED) { 218 printf("rmc_newclass: RED not configured for CBQ!\n"); 219 return (NULL); 220 } 221 #endif 222 #ifndef ALTQ_RIO 223 if (flags & RMCF_RIO) { 224 printf("rmc_newclass: RIO not configured for CBQ!\n"); 225 return (NULL); 226 } 227 #endif 228 229 MALLOC(cl, struct rm_class *, sizeof(struct rm_class), 230 M_DEVBUF, M_WAITOK); 231 if (cl == NULL) 232 return (NULL); 233 bzero(cl, sizeof(struct rm_class)); 234 CALLOUT_INIT(&cl->callout_); 235 MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t), 236 M_DEVBUF, M_WAITOK); 237 if (cl->q_ == NULL) { 238 FREE(cl, M_DEVBUF); 239 return (NULL); 240 } 241 bzero(cl->q_, sizeof(class_queue_t)); 242 243 /* 244 * Class initialization. 245 */ 246 cl->children_ = NULL; 247 cl->parent_ = parent; 248 cl->borrow_ = borrow; 249 cl->leaf_ = 1; 250 cl->ifdat_ = ifd; 251 cl->pri_ = pri; 252 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 253 cl->depth_ = 0; 254 cl->qthresh_ = 0; 255 cl->ns_per_byte_ = nsecPerByte; 256 257 qlimit(cl->q_) = maxq; 258 qtype(cl->q_) = Q_DROPHEAD; 259 qlen(cl->q_) = 0; 260 cl->flags_ = flags; 261 262 #if 1 /* minidle is also scaled in ALTQ */ 263 cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 264 if (cl->minidle_ > 0) 265 cl->minidle_ = 0; 266 #else 267 cl->minidle_ = minidle; 268 #endif 269 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 270 if (cl->maxidle_ == 0) 271 cl->maxidle_ = 1; 272 #if 1 /* offtime is also scaled in ALTQ */ 273 cl->avgidle_ = cl->maxidle_; 274 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 275 if (cl->offtime_ == 0) 276 cl->offtime_ = 1; 277 #else 278 cl->avgidle_ = 0; 279 cl->offtime_ = (offtime * nsecPerByte) / 8; 280 #endif 281 cl->overlimit = action; 282 283 #ifdef ALTQ_RED 284 if (flags & (RMCF_RED|RMCF_RIO)) { 285 int red_flags, red_pkttime; 286 287 red_flags = 0; 288 if (flags & RMCF_ECN) 289 red_flags |= REDF_ECN; 290 if (flags & RMCF_FLOWVALVE) 291 red_flags |= REDF_FLOWVALVE; 292 #ifdef ALTQ_RIO 293 if (flags & RMCF_CLEARDSCP) 294 red_flags |= RIOF_CLEARDSCP; 295 #endif 296 red_pkttime = nsecPerByte * pktsize / 1000; 297 298 if (flags & RMCF_RED) { 299 cl->red_ = red_alloc(0, 0, 0, 0, 300 red_flags, red_pkttime); 301 if (cl->red_ != NULL) 302 qtype(cl->q_) = Q_RED; 303 } 304 #ifdef ALTQ_RIO 305 else { 306 cl->red_ = (red_t *)rio_alloc(0, NULL, 307 red_flags, red_pkttime); 308 if (cl->red_ != NULL) 309 qtype(cl->q_) = Q_RIO; 310 } 311 #endif 312 } 313 #endif /* ALTQ_RED */ 314 315 /* 316 * put the class into the class tree 317 */ 318 s = splnet(); 319 if ((peer = ifd->active_[pri]) != NULL) { 320 /* find the last class at this pri */ 321 cl->peer_ = peer; 322 while (peer->peer_ != ifd->active_[pri]) 323 peer = peer->peer_; 324 peer->peer_ = cl; 325 } else { 326 ifd->active_[pri] = cl; 327 cl->peer_ = cl; 328 } 329 330 if (cl->parent_) { 331 cl->next_ = parent->children_; 332 parent->children_ = cl; 333 parent->leaf_ = 0; 334 } 335 336 /* 337 * Compute the depth of this class and it's ancestors in the class 338 * hierarchy. 339 */ 340 rmc_depth_compute(cl); 341 342 /* 343 * If CBQ's WRR is enabled, then initailize the class WRR state. 344 */ 345 if (ifd->wrr_) { 346 ifd->num_[pri]++; 347 ifd->alloc_[pri] += cl->allotment_; 348 rmc_wrr_set_weights(ifd); 349 } 350 splx(s); 351 return (cl); 352 } 353 354 int 355 rmc_modclass(cl, nsecPerByte, maxq, maxidle, minidle, offtime, pktsize) 356 struct rm_class *cl; 357 u_int nsecPerByte; 358 int maxq; 359 u_int maxidle; 360 int minidle; 361 u_int offtime; 362 int pktsize; 363 { 364 struct rm_ifdat *ifd; 365 u_int old_allotment; 366 int s; 367 368 ifd = cl->ifdat_; 369 old_allotment = cl->allotment_; 370 371 s = splnet(); 372 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 373 cl->qthresh_ = 0; 374 cl->ns_per_byte_ = nsecPerByte; 375 376 qlimit(cl->q_) = maxq; 377 378 #if 1 /* minidle is also scaled in ALTQ */ 379 cl->minidle_ = (minidle * nsecPerByte) / 8; 380 if (cl->minidle_ > 0) 381 cl->minidle_ = 0; 382 #else 383 cl->minidle_ = minidle; 384 #endif 385 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 386 if (cl->maxidle_ == 0) 387 cl->maxidle_ = 1; 388 #if 1 /* offtime is also scaled in ALTQ */ 389 cl->avgidle_ = cl->maxidle_; 390 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 391 if (cl->offtime_ == 0) 392 cl->offtime_ = 1; 393 #else 394 cl->avgidle_ = 0; 395 cl->offtime_ = (offtime * nsecPerByte) / 8; 396 #endif 397 398 /* 399 * If CBQ's WRR is enabled, then initailize the class WRR state. 400 */ 401 if (ifd->wrr_) { 402 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 403 rmc_wrr_set_weights(ifd); 404 } 405 splx(s); 406 return (0); 407 } 408 409 /* 410 * static void 411 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 412 * the appropriate run robin weights for the CBQ weighted round robin 413 * algorithm. 414 * 415 * Returns: NONE 416 */ 417 418 static void 419 rmc_wrr_set_weights(ifd) 420 struct rm_ifdat *ifd; 421 { 422 int i; 423 struct rm_class *cl, *clh; 424 425 for (i = 0; i < RM_MAXPRIO; i++) { 426 /* 427 * This is inverted from that of the simulator to 428 * maintain precision. 429 */ 430 if (ifd->num_[i] == 0) 431 ifd->M_[i] = 0; 432 else 433 ifd->M_[i] = ifd->alloc_[i] / 434 (ifd->num_[i] * ifd->maxpkt_); 435 /* 436 * Compute the weigthed allotment for each class. 437 * This takes the expensive div instruction out 438 * of the main loop for the wrr scheduling path. 439 * These only get recomputed when a class comes or 440 * goes. 441 */ 442 if (ifd->active_[i] != NULL) { 443 clh = cl = ifd->active_[i]; 444 do { 445 /* safe-guard for slow link or alloc_ == 0 */ 446 if (ifd->M_[i] == 0) 447 cl->w_allotment_ = 0; 448 else 449 cl->w_allotment_ = cl->allotment_ / 450 ifd->M_[i]; 451 cl = cl->peer_; 452 } while ((cl != NULL) && (cl != clh)); 453 } 454 } 455 } 456 457 int 458 rmc_get_weight(ifd, pri) 459 struct rm_ifdat *ifd; 460 int pri; 461 { 462 if ((pri >= 0) && (pri < RM_MAXPRIO)) 463 return (ifd->M_[pri]); 464 else 465 return (0); 466 } 467 468 /* 469 * static void 470 * rmc_depth_compute(struct rm_class *cl) - This function computes the 471 * appropriate depth of class 'cl' and its ancestors. 472 * 473 * Returns: NONE 474 */ 475 476 static void 477 rmc_depth_compute(cl) 478 struct rm_class *cl; 479 { 480 rm_class_t *t = cl, *p; 481 482 /* 483 * Recompute the depth for the branch of the tree. 484 */ 485 while (t != NULL) { 486 p = t->parent_; 487 if (p && (t->depth_ >= p->depth_)) { 488 p->depth_ = t->depth_ + 1; 489 t = p; 490 } else 491 t = NULL; 492 } 493 } 494 495 /* 496 * static void 497 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 498 * the depth of the tree after a class has been deleted. 499 * 500 * Returns: NONE 501 */ 502 503 static void 504 rmc_depth_recompute(rm_class_t *cl) 505 { 506 #if 1 /* ALTQ */ 507 rm_class_t *p, *t; 508 509 p = cl; 510 while (p != NULL) { 511 if ((t = p->children_) == NULL) { 512 p->depth_ = 0; 513 } else { 514 int cdepth = 0; 515 516 while (t != NULL) { 517 if (t->depth_ > cdepth) 518 cdepth = t->depth_; 519 t = t->next_; 520 } 521 522 if (p->depth_ == cdepth + 1) 523 /* no change to this parent */ 524 return; 525 526 p->depth_ = cdepth + 1; 527 } 528 529 p = p->parent_; 530 } 531 #else 532 rm_class_t *t; 533 534 if (cl->depth_ >= 1) { 535 if (cl->children_ == NULL) { 536 cl->depth_ = 0; 537 } else if ((t = cl->children_) != NULL) { 538 while (t != NULL) { 539 if (t->children_ != NULL) 540 rmc_depth_recompute(t); 541 t = t->next_; 542 } 543 } else 544 rmc_depth_compute(cl); 545 } 546 #endif 547 } 548 549 /* 550 * void 551 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 552 * function deletes a class from the link-sharing stucture and frees 553 * all resources associated with the class. 554 * 555 * Returns: NONE 556 */ 557 558 void 559 rmc_delete_class(ifd, cl) 560 struct rm_ifdat *ifd; 561 struct rm_class *cl; 562 { 563 struct rm_class *p, *head, *previous; 564 int s; 565 566 ASSERT(cl->children_ == NULL); 567 568 if (cl->sleeping_) 569 CALLOUT_STOP(&cl->callout_); 570 571 s = splnet(); 572 /* 573 * Free packets in the packet queue. 574 * XXX - this may not be a desired behavior. Packets should be 575 * re-queued. 576 */ 577 rmc_dropall(cl); 578 579 /* 580 * If the class has a parent, then remove the class from the 581 * class from the parent's children chain. 582 */ 583 if (cl->parent_ != NULL) { 584 head = cl->parent_->children_; 585 p = previous = head; 586 if (head->next_ == NULL) { 587 ASSERT(head == cl); 588 cl->parent_->children_ = NULL; 589 cl->parent_->leaf_ = 1; 590 } else while (p != NULL) { 591 if (p == cl) { 592 if (cl == head) 593 cl->parent_->children_ = cl->next_; 594 else 595 previous->next_ = cl->next_; 596 cl->next_ = NULL; 597 p = NULL; 598 } else { 599 previous = p; 600 p = p->next_; 601 } 602 } 603 } 604 605 /* 606 * Delete class from class priority peer list. 607 */ 608 if ((p = ifd->active_[cl->pri_]) != NULL) { 609 /* 610 * If there is more than one member of this priority 611 * level, then look for class(cl) in the priority level. 612 */ 613 if (p != p->peer_) { 614 while (p->peer_ != cl) 615 p = p->peer_; 616 p->peer_ = cl->peer_; 617 618 if (ifd->active_[cl->pri_] == cl) 619 ifd->active_[cl->pri_] = cl->peer_; 620 } else { 621 ASSERT(p == cl); 622 ifd->active_[cl->pri_] = NULL; 623 } 624 } 625 626 /* 627 * Recompute the WRR weights. 628 */ 629 if (ifd->wrr_) { 630 ifd->alloc_[cl->pri_] -= cl->allotment_; 631 ifd->num_[cl->pri_]--; 632 rmc_wrr_set_weights(ifd); 633 } 634 635 /* 636 * Re-compute the depth of the tree. 637 */ 638 #if 1 /* ALTQ */ 639 rmc_depth_recompute(cl->parent_); 640 #else 641 rmc_depth_recompute(ifd->root_); 642 #endif 643 644 splx(s); 645 646 /* 647 * Free the class structure. 648 */ 649 if (cl->red_ != NULL) { 650 #ifdef ALTQ_RIO 651 if (q_is_rio(cl->q_)) 652 rio_destroy((rio_t *)cl->red_); 653 #endif 654 #ifdef ALTQ_RED 655 if (q_is_red(cl->q_)) 656 red_destroy(cl->red_); 657 #endif 658 } 659 FREE(cl->q_, M_DEVBUF); 660 FREE(cl, M_DEVBUF); 661 } 662 663 664 /* 665 * void 666 * rmc_init(...) - Initialize the resource management data structures 667 * associated with the output portion of interface 'ifp'. 'ifd' is 668 * where the structures will be built (for backwards compatibility, the 669 * structures aren't kept in the ifnet struct). 'nsecPerByte' 670 * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 671 * 'restart' is the driver-specific routine that the generic 'delay 672 * until under limit' action will call to restart output. `maxq' 673 * is the queue size of the 'link' & 'default' classes. 'maxqueued' 674 * is the maximum number of packets that the resource management 675 * code will allow to be queued 'downstream' (this is typically 1). 676 * 677 * Returns: NONE 678 */ 679 680 void 681 rmc_init(ifq, ifd, nsecPerByte, restart, maxq, maxqueued, maxidle, 682 minidle, offtime, flags) 683 struct ifaltq *ifq; 684 struct rm_ifdat *ifd; 685 u_int nsecPerByte; 686 void (*restart)(struct ifaltq *); 687 int maxq, maxqueued; 688 u_int maxidle; 689 int minidle; 690 u_int offtime; 691 int flags; 692 { 693 int i, mtu; 694 695 /* 696 * Initialize the CBQ traciing/debug facility. 697 */ 698 CBQTRACEINIT(); 699 700 bzero((char *)ifd, sizeof (*ifd)); 701 mtu = ifq->altq_ifp->if_mtu; 702 ifd->ifq_ = ifq; 703 ifd->restart = restart; 704 ifd->maxqueued_ = maxqueued; 705 ifd->ns_per_byte_ = nsecPerByte; 706 ifd->maxpkt_ = mtu; 707 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 708 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 709 #if 1 710 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 711 if (mtu * nsecPerByte > 10 * 1000000) 712 ifd->maxiftime_ /= 4; 713 #endif 714 715 reset_cutoff(ifd); 716 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 717 718 /* 719 * Initialize the CBQ's WRR state. 720 */ 721 for (i = 0; i < RM_MAXPRIO; i++) { 722 ifd->alloc_[i] = 0; 723 ifd->M_[i] = 0; 724 ifd->num_[i] = 0; 725 ifd->na_[i] = 0; 726 ifd->active_[i] = NULL; 727 } 728 729 /* 730 * Initialize current packet state. 731 */ 732 ifd->qi_ = 0; 733 ifd->qo_ = 0; 734 for (i = 0; i < RM_MAXQUEUED; i++) { 735 ifd->class_[i] = NULL; 736 ifd->curlen_[i] = 0; 737 ifd->borrowed_[i] = NULL; 738 } 739 740 /* 741 * Create the root class of the link-sharing structure. 742 */ 743 if ((ifd->root_ = rmc_newclass(0, ifd, 744 nsecPerByte, 745 rmc_root_overlimit, maxq, 0, 0, 746 maxidle, minidle, offtime, 747 0, 0)) == NULL) { 748 printf("rmc_init: root class not allocated\n"); 749 return ; 750 } 751 ifd->root_->depth_ = 0; 752 } 753 754 /* 755 * void 756 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 757 * mbuf 'm' to queue for resource class 'cl'. This routine is called 758 * by a driver's if_output routine. This routine must be called with 759 * output packet completion interrupts locked out (to avoid racing with 760 * rmc_dequeue_next). 761 * 762 * Returns: 0 on successful queueing 763 * -1 when packet drop occurs 764 */ 765 int 766 rmc_queue_packet(cl, m) 767 struct rm_class *cl; 768 mbuf_t *m; 769 { 770 struct timeval now; 771 struct rm_ifdat *ifd = cl->ifdat_; 772 int cpri = cl->pri_; 773 int is_empty = qempty(cl->q_); 774 775 RM_GETTIME(now); 776 if (ifd->cutoff_ > 0) { 777 if (TV_LT(&cl->undertime_, &now)) { 778 if (ifd->cutoff_ > cl->depth_) 779 ifd->cutoff_ = cl->depth_; 780 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 781 } 782 #if 1 /* ALTQ */ 783 else { 784 /* 785 * the class is overlimit. if the class has 786 * underlimit ancestors, set cutoff to the lowest 787 * depth among them. 788 */ 789 struct rm_class *borrow = cl->borrow_; 790 791 while (borrow != NULL && 792 borrow->depth_ < ifd->cutoff_) { 793 if (TV_LT(&borrow->undertime_, &now)) { 794 ifd->cutoff_ = borrow->depth_; 795 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 796 break; 797 } 798 borrow = borrow->borrow_; 799 } 800 } 801 #else /* !ALTQ */ 802 else if ((ifd->cutoff_ > 1) && cl->borrow_) { 803 if (TV_LT(&cl->borrow_->undertime_, &now)) { 804 ifd->cutoff_ = cl->borrow_->depth_; 805 CBQTRACE(rmc_queue_packet, 'ffob', 806 cl->borrow_->depth_); 807 } 808 } 809 #endif /* !ALTQ */ 810 } 811 812 if (_rmc_addq(cl, m) < 0) 813 /* failed */ 814 return (-1); 815 816 if (is_empty) { 817 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 818 ifd->na_[cpri]++; 819 } 820 821 if (qlen(cl->q_) > qlimit(cl->q_)) { 822 /* note: qlimit can be set to 0 or 1 */ 823 rmc_drop_action(cl); 824 return (-1); 825 } 826 return (0); 827 } 828 829 /* 830 * void 831 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 832 * classes to see if there are satified. 833 */ 834 835 static void 836 rmc_tl_satisfied(ifd, now) 837 struct rm_ifdat *ifd; 838 struct timeval *now; 839 { 840 int i; 841 rm_class_t *p, *bp; 842 843 for (i = RM_MAXPRIO - 1; i >= 0; i--) { 844 if ((bp = ifd->active_[i]) != NULL) { 845 p = bp; 846 do { 847 if (!rmc_satisfied(p, now)) { 848 ifd->cutoff_ = p->depth_; 849 return; 850 } 851 p = p->peer_; 852 } while (p != bp); 853 } 854 } 855 856 reset_cutoff(ifd); 857 } 858 859 /* 860 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 861 */ 862 863 static int 864 rmc_satisfied(cl, now) 865 struct rm_class *cl; 866 struct timeval *now; 867 { 868 rm_class_t *p; 869 870 if (cl == NULL) 871 return (1); 872 if (TV_LT(now, &cl->undertime_)) 873 return (1); 874 if (cl->depth_ == 0) { 875 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 876 return (0); 877 else 878 return (1); 879 } 880 if (cl->children_ != NULL) { 881 p = cl->children_; 882 while (p != NULL) { 883 if (!rmc_satisfied(p, now)) 884 return (0); 885 p = p->next_; 886 } 887 } 888 889 return (1); 890 } 891 892 /* 893 * Return 1 if class 'cl' is under limit or can borrow from a parent, 894 * 0 if overlimit. As a side-effect, this routine will invoke the 895 * class overlimit action if the class if overlimit. 896 */ 897 898 static int 899 rmc_under_limit(cl, now) 900 struct rm_class *cl; 901 struct timeval *now; 902 { 903 rm_class_t *p = cl; 904 rm_class_t *top; 905 struct rm_ifdat *ifd = cl->ifdat_; 906 907 ifd->borrowed_[ifd->qi_] = NULL; 908 /* 909 * If cl is the root class, then always return that it is 910 * underlimit. Otherwise, check to see if the class is underlimit. 911 */ 912 if (cl->parent_ == NULL) 913 return (1); 914 915 if (cl->sleeping_) { 916 if (TV_LT(now, &cl->undertime_)) 917 return (0); 918 919 CALLOUT_STOP(&cl->callout_); 920 cl->sleeping_ = 0; 921 cl->undertime_.tv_sec = 0; 922 return (1); 923 } 924 925 top = NULL; 926 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 927 if (((cl = cl->borrow_) == NULL) || 928 (cl->depth_ > ifd->cutoff_)) { 929 #ifdef ADJUST_CUTOFF 930 if (cl != NULL) 931 /* cutoff is taking effect, just 932 return false without calling 933 the delay action. */ 934 return (0); 935 #endif 936 #ifdef BORROW_OFFTIME 937 /* 938 * check if the class can borrow offtime too. 939 * borrow offtime from the top of the borrow 940 * chain if the top class is not overloaded. 941 */ 942 if (cl != NULL) { 943 /* cutoff is taking effect, use this class as top. */ 944 top = cl; 945 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 946 } 947 if (top != NULL && top->avgidle_ == top->minidle_) 948 top = NULL; 949 p->overtime_ = *now; 950 (p->overlimit)(p, top); 951 #else 952 p->overtime_ = *now; 953 (p->overlimit)(p, NULL); 954 #endif 955 return (0); 956 } 957 top = cl; 958 } 959 960 if (cl != p) 961 ifd->borrowed_[ifd->qi_] = cl; 962 return (1); 963 } 964 965 /* 966 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 967 * Packet-by-packet round robin. 968 * 969 * The heart of the weigthed round-robin scheduler, which decides which 970 * class next gets to send a packet. Highest priority first, then 971 * weighted round-robin within priorites. 972 * 973 * Each able-to-send class gets to send until its byte allocation is 974 * exhausted. Thus, the active pointer is only changed after a class has 975 * exhausted its allocation. 976 * 977 * If the scheduler finds no class that is underlimit or able to borrow, 978 * then the first class found that had a nonzero queue and is allowed to 979 * borrow gets to send. 980 */ 981 982 static mbuf_t * 983 _rmc_wrr_dequeue_next(ifd, op) 984 struct rm_ifdat *ifd; 985 int op; 986 { 987 struct rm_class *cl = NULL, *first = NULL; 988 u_int deficit; 989 int cpri; 990 mbuf_t *m; 991 struct timeval now; 992 993 RM_GETTIME(now); 994 995 /* 996 * if the driver polls the top of the queue and then removes 997 * the polled packet, we must return the same packet. 998 */ 999 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1000 cl = ifd->pollcache_; 1001 cpri = cl->pri_; 1002 if (ifd->efficient_) { 1003 /* check if this class is overlimit */ 1004 if (cl->undertime_.tv_sec != 0 && 1005 rmc_under_limit(cl, &now) == 0) 1006 first = cl; 1007 } 1008 ifd->pollcache_ = NULL; 1009 goto _wrr_out; 1010 } 1011 else { 1012 /* mode == ALTDQ_POLL || pollcache == NULL */ 1013 ifd->pollcache_ = NULL; 1014 ifd->borrowed_[ifd->qi_] = NULL; 1015 } 1016 #ifdef ADJUST_CUTOFF 1017 _again: 1018 #endif 1019 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1020 if (ifd->na_[cpri] == 0) 1021 continue; 1022 deficit = 0; 1023 /* 1024 * Loop through twice for a priority level, if some class 1025 * was unable to send a packet the first round because 1026 * of the weighted round-robin mechanism. 1027 * During the second loop at this level, deficit==2. 1028 * (This second loop is not needed if for every class, 1029 * "M[cl->pri_])" times "cl->allotment" is greater than 1030 * the byte size for the largest packet in the class.) 1031 */ 1032 _wrr_loop: 1033 cl = ifd->active_[cpri]; 1034 ASSERT(cl != NULL); 1035 do { 1036 if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 1037 cl->bytes_alloc_ += cl->w_allotment_; 1038 if (!qempty(cl->q_)) { 1039 if ((cl->undertime_.tv_sec == 0) || 1040 rmc_under_limit(cl, &now)) { 1041 if (cl->bytes_alloc_ > 0 || deficit > 1) 1042 goto _wrr_out; 1043 1044 /* underlimit but no alloc */ 1045 deficit = 1; 1046 #if 1 1047 ifd->borrowed_[ifd->qi_] = NULL; 1048 #endif 1049 } 1050 else if (first == NULL && cl->borrow_ != NULL) 1051 first = cl; /* borrowing candidate */ 1052 } 1053 1054 cl->bytes_alloc_ = 0; 1055 cl = cl->peer_; 1056 } while (cl != ifd->active_[cpri]); 1057 1058 if (deficit == 1) { 1059 /* first loop found an underlimit class with deficit */ 1060 /* Loop on same priority level, with new deficit. */ 1061 deficit = 2; 1062 goto _wrr_loop; 1063 } 1064 } 1065 1066 #ifdef ADJUST_CUTOFF 1067 /* 1068 * no underlimit class found. if cutoff is taking effect, 1069 * increase cutoff and try again. 1070 */ 1071 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1072 ifd->cutoff_++; 1073 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1074 goto _again; 1075 } 1076 #endif /* ADJUST_CUTOFF */ 1077 /* 1078 * If LINK_EFFICIENCY is turned on, then the first overlimit 1079 * class we encounter will send a packet if all the classes 1080 * of the link-sharing structure are overlimit. 1081 */ 1082 reset_cutoff(ifd); 1083 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1084 1085 if (!ifd->efficient_ || first == NULL) 1086 return (NULL); 1087 1088 cl = first; 1089 cpri = cl->pri_; 1090 #if 0 /* too time-consuming for nothing */ 1091 if (cl->sleeping_) 1092 CALLOUT_STOP(&cl->callout_); 1093 cl->sleeping_ = 0; 1094 cl->undertime_.tv_sec = 0; 1095 #endif 1096 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1097 ifd->cutoff_ = cl->borrow_->depth_; 1098 1099 /* 1100 * Deque the packet and do the book keeping... 1101 */ 1102 _wrr_out: 1103 if (op == ALTDQ_REMOVE) { 1104 m = _rmc_getq(cl); 1105 if (m == NULL) 1106 panic("_rmc_wrr_dequeue_next"); 1107 if (qempty(cl->q_)) 1108 ifd->na_[cpri]--; 1109 1110 /* 1111 * Update class statistics and link data. 1112 */ 1113 if (cl->bytes_alloc_ > 0) 1114 cl->bytes_alloc_ -= m_pktlen(m); 1115 1116 if ((cl->bytes_alloc_ <= 0) || first == cl) 1117 ifd->active_[cl->pri_] = cl->peer_; 1118 else 1119 ifd->active_[cl->pri_] = cl; 1120 1121 ifd->class_[ifd->qi_] = cl; 1122 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1123 ifd->now_[ifd->qi_] = now; 1124 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1125 ifd->queued_++; 1126 } else { 1127 /* mode == ALTDQ_PPOLL */ 1128 m = _rmc_pollq(cl); 1129 ifd->pollcache_ = cl; 1130 } 1131 return (m); 1132 } 1133 1134 /* 1135 * Dequeue & return next packet from the highest priority class that 1136 * has a packet to send & has enough allocation to send it. This 1137 * routine is called by a driver whenever it needs a new packet to 1138 * output. 1139 */ 1140 static mbuf_t * 1141 _rmc_prr_dequeue_next(ifd, op) 1142 struct rm_ifdat *ifd; 1143 int op; 1144 { 1145 mbuf_t *m; 1146 int cpri; 1147 struct rm_class *cl, *first = NULL; 1148 struct timeval now; 1149 1150 RM_GETTIME(now); 1151 1152 /* 1153 * if the driver polls the top of the queue and then removes 1154 * the polled packet, we must return the same packet. 1155 */ 1156 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1157 cl = ifd->pollcache_; 1158 cpri = cl->pri_; 1159 ifd->pollcache_ = NULL; 1160 goto _prr_out; 1161 } else { 1162 /* mode == ALTDQ_POLL || pollcache == NULL */ 1163 ifd->pollcache_ = NULL; 1164 ifd->borrowed_[ifd->qi_] = NULL; 1165 } 1166 #ifdef ADJUST_CUTOFF 1167 _again: 1168 #endif 1169 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1170 if (ifd->na_[cpri] == 0) 1171 continue; 1172 cl = ifd->active_[cpri]; 1173 ASSERT(cl != NULL); 1174 do { 1175 if (!qempty(cl->q_)) { 1176 if ((cl->undertime_.tv_sec == 0) || 1177 rmc_under_limit(cl, &now)) 1178 goto _prr_out; 1179 if (first == NULL && cl->borrow_ != NULL) 1180 first = cl; 1181 } 1182 cl = cl->peer_; 1183 } while (cl != ifd->active_[cpri]); 1184 } 1185 1186 #ifdef ADJUST_CUTOFF 1187 /* 1188 * no underlimit class found. if cutoff is taking effect, increase 1189 * cutoff and try again. 1190 */ 1191 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1192 ifd->cutoff_++; 1193 goto _again; 1194 } 1195 #endif /* ADJUST_CUTOFF */ 1196 /* 1197 * If LINK_EFFICIENCY is turned on, then the first overlimit 1198 * class we encounter will send a packet if all the classes 1199 * of the link-sharing structure are overlimit. 1200 */ 1201 reset_cutoff(ifd); 1202 if (!ifd->efficient_ || first == NULL) 1203 return (NULL); 1204 1205 cl = first; 1206 cpri = cl->pri_; 1207 #if 0 /* too time-consuming for nothing */ 1208 if (cl->sleeping_) 1209 CALLOUT_STOP(&cl->callout_); 1210 cl->sleeping_ = 0; 1211 cl->undertime_.tv_sec = 0; 1212 #endif 1213 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1214 ifd->cutoff_ = cl->borrow_->depth_; 1215 1216 /* 1217 * Deque the packet and do the book keeping... 1218 */ 1219 _prr_out: 1220 if (op == ALTDQ_REMOVE) { 1221 m = _rmc_getq(cl); 1222 if (m == NULL) 1223 panic("_rmc_prr_dequeue_next"); 1224 if (qempty(cl->q_)) 1225 ifd->na_[cpri]--; 1226 1227 ifd->active_[cpri] = cl->peer_; 1228 1229 ifd->class_[ifd->qi_] = cl; 1230 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1231 ifd->now_[ifd->qi_] = now; 1232 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1233 ifd->queued_++; 1234 } else { 1235 /* mode == ALTDQ_POLL */ 1236 m = _rmc_pollq(cl); 1237 ifd->pollcache_ = cl; 1238 } 1239 return (m); 1240 } 1241 1242 /* 1243 * mbuf_t * 1244 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1245 * is invoked by the packet driver to get the next packet to be 1246 * dequeued and output on the link. If WRR is enabled, then the 1247 * WRR dequeue next routine will determine the next packet to sent. 1248 * Otherwise, packet-by-packet round robin is invoked. 1249 * 1250 * Returns: NULL, if a packet is not available or if all 1251 * classes are overlimit. 1252 * 1253 * Otherwise, Pointer to the next packet. 1254 */ 1255 1256 mbuf_t * 1257 rmc_dequeue_next(ifd, mode) 1258 struct rm_ifdat *ifd; 1259 int mode; 1260 { 1261 if (ifd->queued_ >= ifd->maxqueued_) 1262 return (NULL); 1263 else if (ifd->wrr_) 1264 return (_rmc_wrr_dequeue_next(ifd, mode)); 1265 else 1266 return (_rmc_prr_dequeue_next(ifd, mode)); 1267 } 1268 1269 /* 1270 * Update the utilization estimate for the packet that just completed. 1271 * The packet's class & the parent(s) of that class all get their 1272 * estimators updated. This routine is called by the driver's output- 1273 * packet-completion interrupt service routine. 1274 */ 1275 1276 /* 1277 * a macro to approximate "divide by 1000" that gives 0.000999, 1278 * if a value has enough effective digits. 1279 * (on pentium, mul takes 9 cycles but div takes 46!) 1280 */ 1281 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1282 void 1283 rmc_update_class_util(ifd) 1284 struct rm_ifdat *ifd; 1285 { 1286 int idle, avgidle, pktlen; 1287 int pkt_time, tidle; 1288 rm_class_t *cl, *borrowed; 1289 rm_class_t *borrows; 1290 struct timeval *nowp; 1291 1292 /* 1293 * Get the most recent completed class. 1294 */ 1295 if ((cl = ifd->class_[ifd->qo_]) == NULL) 1296 return; 1297 1298 pktlen = ifd->curlen_[ifd->qo_]; 1299 borrowed = ifd->borrowed_[ifd->qo_]; 1300 borrows = borrowed; 1301 1302 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1303 1304 /* 1305 * Run estimator on class and it's ancesstors. 1306 */ 1307 /* 1308 * rm_update_class_util is designed to be called when the 1309 * transfer is completed from a xmit complete interrupt, 1310 * but most drivers don't implement an upcall for that. 1311 * so, just use estimated completion time. 1312 * as a result, ifd->qi_ and ifd->qo_ are always synced. 1313 */ 1314 nowp = &ifd->now_[ifd->qo_]; 1315 /* get pkt_time (for link) in usec */ 1316 #if 1 /* use approximation */ 1317 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1318 pkt_time = NSEC_TO_USEC(pkt_time); 1319 #else 1320 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1321 #endif 1322 #if 1 /* ALTQ4PPP */ 1323 if (TV_LT(nowp, &ifd->ifnow_)) { 1324 int iftime; 1325 1326 /* 1327 * make sure the estimated completion time does not go 1328 * too far. it can happen when the link layer supports 1329 * data compression or the interface speed is set to 1330 * a much lower value. 1331 */ 1332 TV_DELTA(&ifd->ifnow_, nowp, iftime); 1333 if (iftime+pkt_time < ifd->maxiftime_) { 1334 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1335 } else { 1336 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1337 } 1338 } else { 1339 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1340 } 1341 #else 1342 if (TV_LT(nowp, &ifd->ifnow_)) { 1343 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1344 } else { 1345 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1346 } 1347 #endif 1348 1349 while (cl != NULL) { 1350 TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1351 if (idle >= 2000000) 1352 /* 1353 * this class is idle enough, reset avgidle. 1354 * (TV_DELTA returns 2000000 us when delta is large.) 1355 */ 1356 cl->avgidle_ = cl->maxidle_; 1357 1358 /* get pkt_time (for class) in usec */ 1359 #if 1 /* use approximation */ 1360 pkt_time = pktlen * cl->ns_per_byte_; 1361 pkt_time = NSEC_TO_USEC(pkt_time); 1362 #else 1363 pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1364 #endif 1365 idle -= pkt_time; 1366 1367 avgidle = cl->avgidle_; 1368 avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1369 cl->avgidle_ = avgidle; 1370 1371 /* Are we overlimit ? */ 1372 if (avgidle <= 0) { 1373 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1374 #if 1 /* ALTQ */ 1375 /* 1376 * need some lower bound for avgidle, otherwise 1377 * a borrowing class gets unbounded penalty. 1378 */ 1379 if (avgidle < cl->minidle_) 1380 avgidle = cl->avgidle_ = cl->minidle_; 1381 #endif 1382 /* set next idle to make avgidle 0 */ 1383 tidle = pkt_time + 1384 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1385 TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1386 ++cl->stats_.over; 1387 } else { 1388 cl->avgidle_ = 1389 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1390 cl->undertime_.tv_sec = 0; 1391 if (cl->sleeping_) { 1392 CALLOUT_STOP(&cl->callout_); 1393 cl->sleeping_ = 0; 1394 } 1395 } 1396 1397 if (borrows != NULL) { 1398 if (borrows != cl) 1399 ++cl->stats_.borrows; 1400 else 1401 borrows = NULL; 1402 } 1403 cl->last_ = ifd->ifnow_; 1404 cl->last_pkttime_ = pkt_time; 1405 1406 #if 1 1407 if (cl->parent_ == NULL) { 1408 /* take stats of root class */ 1409 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1410 } 1411 #endif 1412 1413 cl = cl->parent_; 1414 } 1415 1416 /* 1417 * Check to see if cutoff needs to set to a new level. 1418 */ 1419 cl = ifd->class_[ifd->qo_]; 1420 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1421 #if 1 /* ALTQ */ 1422 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1423 rmc_tl_satisfied(ifd, nowp); 1424 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1425 } else { 1426 ifd->cutoff_ = borrowed->depth_; 1427 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1428 } 1429 #else /* !ALTQ */ 1430 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1431 reset_cutoff(ifd); 1432 #ifdef notdef 1433 rmc_tl_satisfied(ifd, &now); 1434 #endif 1435 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1436 } else { 1437 ifd->cutoff_ = borrowed->depth_; 1438 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1439 } 1440 #endif /* !ALTQ */ 1441 } 1442 1443 /* 1444 * Release class slot 1445 */ 1446 ifd->borrowed_[ifd->qo_] = NULL; 1447 ifd->class_[ifd->qo_] = NULL; 1448 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1449 ifd->queued_--; 1450 } 1451 1452 /* 1453 * void 1454 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1455 * over-limit action routines. These get invoked by rmc_under_limit() 1456 * if a class with packets to send if over its bandwidth limit & can't 1457 * borrow from a parent class. 1458 * 1459 * Returns: NONE 1460 */ 1461 1462 static void 1463 rmc_drop_action(cl) 1464 struct rm_class *cl; 1465 { 1466 struct rm_ifdat *ifd = cl->ifdat_; 1467 1468 ASSERT(qlen(cl->q_) > 0); 1469 _rmc_dropq(cl); 1470 if (qempty(cl->q_)) 1471 ifd->na_[cl->pri_]--; 1472 } 1473 1474 void rmc_dropall(cl) 1475 struct rm_class *cl; 1476 { 1477 struct rm_ifdat *ifd = cl->ifdat_; 1478 1479 if (!qempty(cl->q_)) { 1480 _flushq(cl->q_); 1481 1482 ifd->na_[cl->pri_]--; 1483 } 1484 } 1485 1486 #if (__FreeBSD_version > 300000) 1487 /* hzto() is removed from FreeBSD-3.0 */ 1488 static int hzto __P((struct timeval *)); 1489 1490 static int 1491 hzto(tv) 1492 struct timeval *tv; 1493 { 1494 struct timeval t2; 1495 1496 getmicrotime(&t2); 1497 t2.tv_sec = tv->tv_sec - t2.tv_sec; 1498 t2.tv_usec = tv->tv_usec - t2.tv_usec; 1499 return (tvtohz(&t2)); 1500 } 1501 #endif /* __FreeBSD_version > 300000 */ 1502 1503 /* 1504 * void 1505 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1506 * delay action routine. It is invoked via rmc_under_limit when the 1507 * packet is discoverd to be overlimit. 1508 * 1509 * If the delay action is result of borrow class being overlimit, then 1510 * delay for the offtime of the borrowing class that is overlimit. 1511 * 1512 * Returns: NONE 1513 */ 1514 1515 void 1516 rmc_delay_action(cl, borrow) 1517 struct rm_class *cl, *borrow; 1518 { 1519 int delay, t, extradelay; 1520 1521 cl->stats_.overactions++; 1522 TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1523 #ifndef BORROW_OFFTIME 1524 delay += cl->offtime_; 1525 #endif 1526 1527 if (!cl->sleeping_) { 1528 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1529 #ifdef BORROW_OFFTIME 1530 if (borrow != NULL) 1531 extradelay = borrow->offtime_; 1532 else 1533 #endif 1534 extradelay = cl->offtime_; 1535 1536 #ifdef ALTQ 1537 /* 1538 * XXX recalculate suspend time: 1539 * current undertime is (tidle + pkt_time) calculated 1540 * from the last transmission. 1541 * tidle: time required to bring avgidle back to 0 1542 * pkt_time: target waiting time for this class 1543 * we need to replace pkt_time by offtime 1544 */ 1545 extradelay -= cl->last_pkttime_; 1546 #endif 1547 if (extradelay > 0) { 1548 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1549 delay += extradelay; 1550 } 1551 1552 cl->sleeping_ = 1; 1553 cl->stats_.delays++; 1554 1555 /* 1556 * Since packets are phased randomly with respect to the 1557 * clock, 1 tick (the next clock tick) can be an arbitrarily 1558 * short time so we have to wait for at least two ticks. 1559 * NOTE: If there's no other traffic, we need the timer as 1560 * a 'backstop' to restart this class. 1561 */ 1562 if (delay > tick * 2) { 1563 #ifdef __FreeBSD__ 1564 /* FreeBSD rounds up the tick */ 1565 t = hzto(&cl->undertime_); 1566 #else 1567 /* other BSDs round down the tick */ 1568 t = hzto(&cl->undertime_) + 1; 1569 #endif 1570 } else 1571 t = 2; 1572 CALLOUT_RESET(&cl->callout_, t, 1573 (timeout_t *)rmc_restart, (caddr_t)cl); 1574 } 1575 } 1576 1577 /* 1578 * void 1579 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1580 * called by the system timer code & is responsible checking if the 1581 * class is still sleeping (it might have been restarted as a side 1582 * effect of the queue scan on a packet arrival) and, if so, restarting 1583 * output for the class. Inspecting the class state & restarting output 1584 * require locking the class structure. In general the driver is 1585 * responsible for locking but this is the only routine that is not 1586 * called directly or indirectly from the interface driver so it has 1587 * know about system locking conventions. Under bsd, locking is done 1588 * by raising IPL to splnet so that's what's implemented here. On a 1589 * different system this would probably need to be changed. 1590 * 1591 * Returns: NONE 1592 */ 1593 1594 static void 1595 rmc_restart(cl) 1596 struct rm_class *cl; 1597 { 1598 struct rm_ifdat *ifd = cl->ifdat_; 1599 int s; 1600 1601 s = splnet(); 1602 if (cl->sleeping_) { 1603 cl->sleeping_ = 0; 1604 cl->undertime_.tv_sec = 0; 1605 1606 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1607 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1608 (ifd->restart)(ifd->ifq_); 1609 } 1610 } 1611 splx(s); 1612 } 1613 1614 /* 1615 * void 1616 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1617 * handling routine for the root class of the link sharing structure. 1618 * 1619 * Returns: NONE 1620 */ 1621 1622 static void 1623 rmc_root_overlimit(cl, borrow) 1624 struct rm_class *cl, *borrow; 1625 { 1626 panic("rmc_root_overlimit"); 1627 } 1628 1629 /* 1630 * Packet Queue handling routines. Eventually, this is to localize the 1631 * effects on the code whether queues are red queues or droptail 1632 * queues. 1633 */ 1634 1635 static int 1636 _rmc_addq(cl, m) 1637 rm_class_t *cl; 1638 mbuf_t *m; 1639 { 1640 #ifdef ALTQ_RIO 1641 if (q_is_rio(cl->q_)) 1642 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1643 #endif 1644 #ifdef ALTQ_RED 1645 if (q_is_red(cl->q_)) 1646 return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1647 #endif /* ALTQ_RED */ 1648 1649 if (cl->flags_ & RMCF_CLEARDSCP) 1650 write_dsfield(m, cl->pktattr_, 0); 1651 1652 _addq(cl->q_, m); 1653 return (0); 1654 } 1655 1656 /* note: _rmc_dropq is not called for red */ 1657 static void 1658 _rmc_dropq(cl) 1659 rm_class_t *cl; 1660 { 1661 mbuf_t *m; 1662 1663 if ((m = _getq(cl->q_)) != NULL) 1664 m_freem(m); 1665 } 1666 1667 static mbuf_t * 1668 _rmc_getq(cl) 1669 rm_class_t *cl; 1670 { 1671 #ifdef ALTQ_RIO 1672 if (q_is_rio(cl->q_)) 1673 return rio_getq((rio_t *)cl->red_, cl->q_); 1674 #endif 1675 #ifdef ALTQ_RED 1676 if (q_is_red(cl->q_)) 1677 return red_getq(cl->red_, cl->q_); 1678 #endif 1679 return _getq(cl->q_); 1680 } 1681 1682 static mbuf_t * 1683 _rmc_pollq(cl) 1684 rm_class_t *cl; 1685 { 1686 return qhead(cl->q_); 1687 } 1688 1689 #ifdef CBQ_TRACE 1690 1691 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1692 struct cbqtrace *cbqtrace_ptr = NULL; 1693 int cbqtrace_count; 1694 1695 /* 1696 * DDB hook to trace cbq events: 1697 * the last 1024 events are held in a circular buffer. 1698 * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1699 */ 1700 void cbqtrace_dump(int); 1701 static char *rmc_funcname(void *); 1702 1703 static struct rmc_funcs { 1704 void *func; 1705 char *name; 1706 } rmc_funcs[] = 1707 { 1708 rmc_init, "rmc_init", 1709 rmc_queue_packet, "rmc_queue_packet", 1710 rmc_under_limit, "rmc_under_limit", 1711 rmc_update_class_util, "rmc_update_class_util", 1712 rmc_delay_action, "rmc_delay_action", 1713 rmc_restart, "rmc_restart", 1714 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1715 NULL, NULL 1716 }; 1717 1718 static char *rmc_funcname(func) 1719 void *func; 1720 { 1721 struct rmc_funcs *fp; 1722 1723 for (fp = rmc_funcs; fp->func != NULL; fp++) 1724 if (fp->func == func) 1725 return (fp->name); 1726 return ("unknown"); 1727 } 1728 1729 void cbqtrace_dump(counter) 1730 int counter; 1731 { 1732 int i, *p; 1733 char *cp; 1734 1735 counter = counter % NCBQTRACE; 1736 p = (int *)&cbqtrace_buffer[counter]; 1737 1738 for (i=0; i<20; i++) { 1739 printf("[0x%x] ", *p++); 1740 printf("%s: ", rmc_funcname((void *)*p++)); 1741 cp = (char *)p++; 1742 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1743 printf("%d\n",*p++); 1744 1745 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1746 p = (int *)cbqtrace_buffer; 1747 } 1748 } 1749 #endif /* CBQ_TRACE */ 1750 1751 #endif /* ALTQ_CBQ */ 1752 1753 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) 1754 #if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1755 1756 void 1757 _addq(q, m) 1758 class_queue_t *q; 1759 mbuf_t *m; 1760 { 1761 mbuf_t *m0; 1762 1763 if ((m0 = qtail(q)) != NULL) 1764 m->m_nextpkt = m0->m_nextpkt; 1765 else 1766 m0 = m; 1767 m0->m_nextpkt = m; 1768 qtail(q) = m; 1769 qlen(q)++; 1770 } 1771 1772 mbuf_t * 1773 _getq(q) 1774 class_queue_t *q; 1775 { 1776 mbuf_t *m, *m0; 1777 1778 if ((m = qtail(q)) == NULL) 1779 return (NULL); 1780 if ((m0 = m->m_nextpkt) != m) 1781 m->m_nextpkt = m0->m_nextpkt; 1782 else { 1783 ASSERT(qlen(q) == 1); 1784 qtail(q) = NULL; 1785 } 1786 qlen(q)--; 1787 m0->m_nextpkt = NULL; 1788 return (m0); 1789 } 1790 1791 /* drop a packet at the tail of the queue */ 1792 mbuf_t * 1793 _getq_tail(q) 1794 class_queue_t *q; 1795 { 1796 mbuf_t *m, *m0, *prev; 1797 1798 if ((m = m0 = qtail(q)) == NULL) 1799 return NULL; 1800 do { 1801 prev = m0; 1802 m0 = m0->m_nextpkt; 1803 } while (m0 != m); 1804 prev->m_nextpkt = m->m_nextpkt; 1805 if (prev == m) { 1806 ASSERT(qlen(q) == 1); 1807 qtail(q) = NULL; 1808 } else 1809 qtail(q) = prev; 1810 qlen(q)--; 1811 m->m_nextpkt = NULL; 1812 return (m); 1813 } 1814 1815 /* randomly select a packet in the queue */ 1816 mbuf_t * 1817 _getq_random(q) 1818 class_queue_t *q; 1819 { 1820 struct mbuf *m; 1821 int i, n; 1822 1823 if ((m = qtail(q)) == NULL) 1824 return NULL; 1825 if (m->m_nextpkt == m) { 1826 ASSERT(qlen(q) == 1); 1827 qtail(q) = NULL; 1828 } else { 1829 struct mbuf *prev = NULL; 1830 1831 n = random() % qlen(q) + 1; 1832 for (i = 0; i < n; i++) { 1833 prev = m; 1834 m = m->m_nextpkt; 1835 } 1836 prev->m_nextpkt = m->m_nextpkt; 1837 if (m == qtail(q)) 1838 qtail(q) = prev; 1839 } 1840 qlen(q)--; 1841 m->m_nextpkt = NULL; 1842 return (m); 1843 } 1844 1845 void 1846 _removeq(q, m) 1847 class_queue_t *q; 1848 mbuf_t *m; 1849 { 1850 mbuf_t *m0, *prev; 1851 1852 m0 = qtail(q); 1853 do { 1854 prev = m0; 1855 m0 = m0->m_nextpkt; 1856 } while (m0 != m); 1857 prev->m_nextpkt = m->m_nextpkt; 1858 if (prev == m) 1859 qtail(q) = NULL; 1860 else if (qtail(q) == m) 1861 qtail(q) = prev; 1862 qlen(q)--; 1863 } 1864 1865 void 1866 _flushq(q) 1867 class_queue_t *q; 1868 { 1869 mbuf_t *m; 1870 1871 while ((m = _getq(q)) != NULL) 1872 m_freem(m); 1873 ASSERT(qlen(q) == 0); 1874 } 1875 1876 #endif /* !__GNUC__ || ALTQ_DEBUG */ 1877 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1878