1 /* $NetBSD: altq_rio.c,v 1.4 2001/11/12 23:14:22 lukem Exp $ */ 2 /* $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */ 3 4 /* 5 * Copyright (C) 1998-2000 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 /* 30 * Copyright (c) 1990-1994 Regents of the University of California. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. All advertising materials mentioning features or use of this software 42 * must display the following acknowledgement: 43 * This product includes software developed by the Computer Systems 44 * Engineering Group at Lawrence Berkeley Laboratory. 45 * 4. Neither the name of the University nor of the Laboratory may be used 46 * to endorse or promote products derived from this software without 47 * specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.4 2001/11/12 23:14:22 lukem Exp $"); 64 65 #if defined(__FreeBSD__) || defined(__NetBSD__) 66 #include "opt_altq.h" 67 #if (__FreeBSD__ != 2) 68 #include "opt_inet.h" 69 #ifdef __FreeBSD__ 70 #include "opt_inet6.h" 71 #endif 72 #endif 73 #endif /* __FreeBSD__ || __NetBSD__ */ 74 #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ 75 76 #include <sys/param.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/errno.h> 84 #include <sys/kernel.h> 85 86 #include <net/if.h> 87 #include <net/if_types.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/ip.h> 92 #ifdef INET6 93 #include <netinet/ip6.h> 94 #endif 95 96 #include <altq/altq.h> 97 #include <altq/altq_conf.h> 98 #include <altq/altq_cdnr.h> 99 #include <altq/altq_red.h> 100 #include <altq/altq_rio.h> 101 102 /* 103 * RIO: RED with IN/OUT bit 104 * described in 105 * "Explicit Allocation of Best Effort Packet Delivery Service" 106 * David D. Clark and Wenjia Fang, MIT Lab for Computer Science 107 * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} 108 * 109 * this implementation is extended to support more than 2 drop precedence 110 * values as described in RFC2597 (Assured Forwarding PHB Group). 111 * 112 */ 113 /* 114 * AF DS (differentiated service) codepoints. 115 * (classes can be mapped to CBQ or H-FSC classes.) 116 * 117 * 0 1 2 3 4 5 6 7 118 * +---+---+---+---+---+---+---+---+ 119 * | CLASS |DropPre| 0 | CU | 120 * +---+---+---+---+---+---+---+---+ 121 * 122 * class 1: 001 123 * class 2: 010 124 * class 3: 011 125 * class 4: 100 126 * 127 * low drop prec: 01 128 * medium drop prec: 10 129 * high drop prec: 01 130 */ 131 132 /* normal red parameters */ 133 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 134 /* q_weight = 0.00195 */ 135 136 /* red parameters for a slow link */ 137 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 138 /* q_weight = 0.0078125 */ 139 140 /* red parameters for a very slow link (e.g., dialup) */ 141 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 142 /* q_weight = 0.015625 */ 143 144 /* fixed-point uses 12-bit decimal places */ 145 #define FP_SHIFT 12 /* fixed-point shift */ 146 147 /* red parameters for drop probability */ 148 #define INV_P_MAX 10 /* inverse of max drop probability */ 149 #define TH_MIN 5 /* min threshold */ 150 #define TH_MAX 15 /* max threshold */ 151 152 #define RIO_LIMIT 60 /* default max queue lenght */ 153 #define RIO_STATS /* collect statistics */ 154 155 #define TV_DELTA(a, b, delta) { \ 156 register int xxs; \ 157 \ 158 delta = (a)->tv_usec - (b)->tv_usec; \ 159 if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ 160 if (xxs < 0) { \ 161 printf("rm_class: bogus time values"); \ 162 delta = 60000000; \ 163 } else if (xxs > 4) { \ 164 if (xxs > 60) \ 165 delta = 60000000; \ 166 else \ 167 delta += xxs * 1000000; \ 168 } else while (xxs > 0) { \ 169 delta += 1000000; \ 170 xxs--; \ 171 } \ 172 } \ 173 } 174 175 /* rio_list keeps all rio_queue_t's allocated. */ 176 static rio_queue_t *rio_list = NULL; 177 /* default rio parameter values */ 178 static struct redparams default_rio_params[RIO_NDROPPREC] = { 179 /* th_min, th_max, inv_pmax */ 180 { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ 181 { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ 182 { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ 183 }; 184 185 /* internal function prototypes */ 186 static int rio_enqueue __P((struct ifaltq *, struct mbuf *, 187 struct altq_pktattr *)); 188 static struct mbuf *rio_dequeue __P((struct ifaltq *, int)); 189 static int rio_request __P((struct ifaltq *, int, void *)); 190 static int rio_detach __P((rio_queue_t *)); 191 static int dscp2index __P((u_int8_t)); 192 193 /* 194 * rio device interface 195 */ 196 altqdev_decl(rio); 197 198 int 199 rioopen(dev, flag, fmt, p) 200 dev_t dev; 201 int flag, fmt; 202 struct proc *p; 203 { 204 /* everything will be done when the queueing scheme is attached. */ 205 return 0; 206 } 207 208 int 209 rioclose(dev, flag, fmt, p) 210 dev_t dev; 211 int flag, fmt; 212 struct proc *p; 213 { 214 rio_queue_t *rqp; 215 int err, error = 0; 216 217 while ((rqp = rio_list) != NULL) { 218 /* destroy all */ 219 err = rio_detach(rqp); 220 if (err != 0 && error == 0) 221 error = err; 222 } 223 224 return error; 225 } 226 227 int 228 rioioctl(dev, cmd, addr, flag, p) 229 dev_t dev; 230 ioctlcmd_t cmd; 231 caddr_t addr; 232 int flag; 233 struct proc *p; 234 { 235 rio_queue_t *rqp; 236 struct rio_interface *ifacep; 237 struct ifnet *ifp; 238 int error = 0; 239 240 /* check super-user privilege */ 241 switch (cmd) { 242 case RIO_GETSTATS: 243 break; 244 default: 245 #if (__FreeBSD_version > 400000) 246 if ((error = suser(p)) != 0) 247 return (error); 248 #else 249 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 250 return (error); 251 #endif 252 break; 253 } 254 255 switch (cmd) { 256 257 case RIO_ENABLE: 258 ifacep = (struct rio_interface *)addr; 259 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 260 error = EBADF; 261 break; 262 } 263 error = altq_enable(rqp->rq_ifq); 264 break; 265 266 case RIO_DISABLE: 267 ifacep = (struct rio_interface *)addr; 268 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 269 error = EBADF; 270 break; 271 } 272 error = altq_disable(rqp->rq_ifq); 273 break; 274 275 case RIO_IF_ATTACH: 276 ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); 277 if (ifp == NULL) { 278 error = ENXIO; 279 break; 280 } 281 282 /* allocate and initialize rio_queue_t */ 283 MALLOC(rqp, rio_queue_t *, sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); 284 if (rqp == NULL) { 285 error = ENOMEM; 286 break; 287 } 288 bzero(rqp, sizeof(rio_queue_t)); 289 290 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t), 291 M_DEVBUF, M_WAITOK); 292 if (rqp->rq_q == NULL) { 293 FREE(rqp, M_DEVBUF); 294 error = ENOMEM; 295 break; 296 } 297 bzero(rqp->rq_q, sizeof(class_queue_t)); 298 299 rqp->rq_rio = rio_alloc(0, NULL, 0, 0); 300 if (rqp->rq_rio == NULL) { 301 FREE(rqp->rq_q, M_DEVBUF); 302 FREE(rqp, M_DEVBUF); 303 error = ENOMEM; 304 break; 305 } 306 307 rqp->rq_ifq = &ifp->if_snd; 308 qtail(rqp->rq_q) = NULL; 309 qlen(rqp->rq_q) = 0; 310 qlimit(rqp->rq_q) = RIO_LIMIT; 311 qtype(rqp->rq_q) = Q_RIO; 312 313 /* 314 * set RIO to this ifnet structure. 315 */ 316 error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, 317 rio_enqueue, rio_dequeue, rio_request, 318 NULL, NULL); 319 if (error) { 320 rio_destroy(rqp->rq_rio); 321 FREE(rqp->rq_q, M_DEVBUF); 322 FREE(rqp, M_DEVBUF); 323 break; 324 } 325 326 /* add this state to the rio list */ 327 rqp->rq_next = rio_list; 328 rio_list = rqp; 329 break; 330 331 case RIO_IF_DETACH: 332 ifacep = (struct rio_interface *)addr; 333 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 334 error = EBADF; 335 break; 336 } 337 error = rio_detach(rqp); 338 break; 339 340 case RIO_GETSTATS: 341 do { 342 struct rio_stats *q_stats; 343 rio_t *rp; 344 int i; 345 346 q_stats = (struct rio_stats *)addr; 347 if ((rqp = altq_lookup(q_stats->iface.rio_ifname, 348 ALTQT_RIO)) == NULL) { 349 error = EBADF; 350 break; 351 } 352 353 rp = rqp->rq_rio; 354 355 q_stats->q_limit = qlimit(rqp->rq_q); 356 q_stats->weight = rp->rio_weight; 357 q_stats->flags = rp->rio_flags; 358 359 for (i = 0; i < RIO_NDROPPREC; i++) { 360 q_stats->q_len[i] = rp->rio_precstate[i].qlen; 361 bcopy(&rp->q_stats[i], &q_stats->q_stats[i], 362 sizeof(struct redstats)); 363 q_stats->q_stats[i].q_avg = 364 rp->rio_precstate[i].avg >> rp->rio_wshift; 365 366 q_stats->q_params[i].inv_pmax 367 = rp->rio_precstate[i].inv_pmax; 368 q_stats->q_params[i].th_min 369 = rp->rio_precstate[i].th_min; 370 q_stats->q_params[i].th_max 371 = rp->rio_precstate[i].th_max; 372 } 373 } while (0); 374 break; 375 376 case RIO_CONFIG: 377 do { 378 struct rio_conf *fc; 379 rio_t *new; 380 int s, limit, i; 381 382 fc = (struct rio_conf *)addr; 383 if ((rqp = altq_lookup(fc->iface.rio_ifname, 384 ALTQT_RIO)) == NULL) { 385 error = EBADF; 386 break; 387 } 388 389 new = rio_alloc(fc->rio_weight, &fc->q_params[0], 390 fc->rio_flags, fc->rio_pkttime); 391 if (new == NULL) { 392 error = ENOMEM; 393 break; 394 } 395 396 s = splnet(); 397 _flushq(rqp->rq_q); 398 limit = fc->rio_limit; 399 if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) 400 limit = fc->q_params[RIO_NDROPPREC-1].th_max; 401 qlimit(rqp->rq_q) = limit; 402 403 rio_destroy(rqp->rq_rio); 404 rqp->rq_rio = new; 405 406 splx(s); 407 408 /* write back new values */ 409 fc->rio_limit = limit; 410 for (i = 0; i < RIO_NDROPPREC; i++) { 411 fc->q_params[i].inv_pmax = 412 rqp->rq_rio->rio_precstate[i].inv_pmax; 413 fc->q_params[i].th_min = 414 rqp->rq_rio->rio_precstate[i].th_min; 415 fc->q_params[i].th_max = 416 rqp->rq_rio->rio_precstate[i].th_max; 417 } 418 } while (0); 419 break; 420 421 case RIO_SETDEFAULTS: 422 do { 423 struct redparams *rp; 424 int i; 425 426 rp = (struct redparams *)addr; 427 for (i = 0; i < RIO_NDROPPREC; i++) 428 default_rio_params[i] = rp[i]; 429 } while (0); 430 break; 431 432 default: 433 error = EINVAL; 434 break; 435 } 436 437 return error; 438 } 439 440 static int 441 rio_detach(rqp) 442 rio_queue_t *rqp; 443 { 444 rio_queue_t *tmp; 445 int error = 0; 446 447 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 448 altq_disable(rqp->rq_ifq); 449 450 if ((error = altq_detach(rqp->rq_ifq))) 451 return (error); 452 453 if (rio_list == rqp) 454 rio_list = rqp->rq_next; 455 else { 456 for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) 457 if (tmp->rq_next == rqp) { 458 tmp->rq_next = rqp->rq_next; 459 break; 460 } 461 if (tmp == NULL) 462 printf("rio_detach: no state found in rio_list!\n"); 463 } 464 465 rio_destroy(rqp->rq_rio); 466 FREE(rqp->rq_q, M_DEVBUF); 467 FREE(rqp, M_DEVBUF); 468 return (error); 469 } 470 471 /* 472 * rio support routines 473 */ 474 static int 475 rio_request(ifq, req, arg) 476 struct ifaltq *ifq; 477 int req; 478 void *arg; 479 { 480 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 481 482 switch (req) { 483 case ALTRQ_PURGE: 484 _flushq(rqp->rq_q); 485 if (ALTQ_IS_ENABLED(ifq)) 486 ifq->ifq_len = 0; 487 break; 488 } 489 return (0); 490 } 491 492 493 rio_t * 494 rio_alloc(weight, params, flags, pkttime) 495 int weight; 496 struct redparams *params; 497 int flags, pkttime; 498 { 499 rio_t *rp; 500 int w, i; 501 int npkts_per_sec; 502 503 MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK); 504 if (rp == NULL) 505 return (NULL); 506 bzero(rp, sizeof(rio_t)); 507 508 rp->rio_flags = flags; 509 if (pkttime == 0) 510 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 511 rp->rio_pkttime = 800; 512 else 513 rp->rio_pkttime = pkttime; 514 515 if (weight != 0) 516 rp->rio_weight = weight; 517 else { 518 /* use derfault */ 519 rp->rio_weight = W_WEIGHT; 520 521 /* when the link is very slow, adjust red parameters */ 522 npkts_per_sec = 1000000 / rp->rio_pkttime; 523 if (npkts_per_sec < 50) { 524 /* up to about 400Kbps */ 525 rp->rio_weight = W_WEIGHT_2; 526 } else if (npkts_per_sec < 300) { 527 /* up to about 2.4Mbps */ 528 rp->rio_weight = W_WEIGHT_1; 529 } 530 } 531 532 /* calculate wshift. weight must be power of 2 */ 533 w = rp->rio_weight; 534 for (i = 0; w > 1; i++) 535 w = w >> 1; 536 rp->rio_wshift = i; 537 w = 1 << rp->rio_wshift; 538 if (w != rp->rio_weight) { 539 printf("invalid weight value %d for red! use %d\n", 540 rp->rio_weight, w); 541 rp->rio_weight = w; 542 } 543 544 /* allocate weight table */ 545 rp->rio_wtab = wtab_alloc(rp->rio_weight); 546 547 for (i = 0; i < RIO_NDROPPREC; i++) { 548 struct dropprec_state *prec = &rp->rio_precstate[i]; 549 550 prec->avg = 0; 551 prec->idle = 1; 552 553 if (params == NULL || params[i].inv_pmax == 0) 554 prec->inv_pmax = default_rio_params[i].inv_pmax; 555 else 556 prec->inv_pmax = params[i].inv_pmax; 557 if (params == NULL || params[i].th_min == 0) 558 prec->th_min = default_rio_params[i].th_min; 559 else 560 prec->th_min = params[i].th_min; 561 if (params == NULL || params[i].th_max == 0) 562 prec->th_max = default_rio_params[i].th_max; 563 else 564 prec->th_max = params[i].th_max; 565 566 /* 567 * th_min_s and th_max_s are scaled versions of th_min 568 * and th_max to be compared with avg. 569 */ 570 prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); 571 prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); 572 573 /* 574 * precompute probability denominator 575 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 576 */ 577 prec->probd = (2 * (prec->th_max - prec->th_min) 578 * prec->inv_pmax) << FP_SHIFT; 579 580 microtime(&prec->last); 581 } 582 583 return (rp); 584 } 585 586 void 587 rio_destroy(rp) 588 rio_t *rp; 589 { 590 wtab_destroy(rp->rio_wtab); 591 FREE(rp, M_DEVBUF); 592 } 593 594 void 595 rio_getstats(rp, sp) 596 rio_t *rp; 597 struct redstats *sp; 598 { 599 int i; 600 601 for (i = 0; i < RIO_NDROPPREC; i++) { 602 bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); 603 sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; 604 sp++; 605 } 606 } 607 608 /* 609 * enqueue routine: 610 * 611 * returns: 0 when successfully queued. 612 * ENOBUFS when drop occurs. 613 */ 614 static int 615 rio_enqueue(ifq, m, pktattr) 616 struct ifaltq *ifq; 617 struct mbuf *m; 618 struct altq_pktattr *pktattr; 619 { 620 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 621 int error = 0; 622 623 if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) 624 ifq->ifq_len++; 625 else 626 error = ENOBUFS; 627 return error; 628 } 629 630 #if (RIO_NDROPPREC == 3) 631 /* 632 * internally, a drop precedence value is converted to an index 633 * starting from 0. 634 */ 635 static int 636 dscp2index(u_int8_t dscp) 637 { 638 int dpindex = dscp & AF_DROPPRECMASK; 639 640 if (dpindex == 0) 641 return (0); 642 return ((dpindex >> 3) - 1); 643 } 644 #endif 645 646 #if 1 647 /* 648 * kludge: when a packet is dequeued, we need to know its drop precedence 649 * in order to keep the queue length of each drop precedence. 650 * use m_pkthdr.rcvif to pass this info. 651 */ 652 #define RIOM_SET_PRECINDEX(m, idx) \ 653 do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0) 654 #define RIOM_GET_PRECINDEX(m) \ 655 ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ 656 (m)->m_pkthdr.rcvif = NULL; idx; }) 657 #endif 658 659 int 660 rio_addq(rp, q, m, pktattr) 661 rio_t *rp; 662 class_queue_t *q; 663 struct mbuf *m; 664 struct altq_pktattr *pktattr; 665 { 666 int avg, droptype; 667 u_int8_t dsfield, odsfield; 668 int dpindex, i, n, t; 669 struct timeval now; 670 struct dropprec_state *prec; 671 672 dsfield = odsfield = read_dsfield(m, pktattr); 673 dpindex = dscp2index(dsfield); 674 675 /* 676 * update avg of the precedence states whose drop precedence 677 * is larger than or equal to the drop precedence of the packet 678 */ 679 now.tv_sec = 0; 680 for (i = dpindex; i < RIO_NDROPPREC; i++) { 681 prec = &rp->rio_precstate[i]; 682 avg = prec->avg; 683 if (prec->idle) { 684 prec->idle = 0; 685 if (now.tv_sec == 0) 686 microtime(&now); 687 t = (now.tv_sec - prec->last.tv_sec); 688 if (t > 60) 689 avg = 0; 690 else { 691 t = t * 1000000 + 692 (now.tv_usec - prec->last.tv_usec); 693 n = t / rp->rio_pkttime; 694 /* calculate (avg = (1 - Wq)^n * avg) */ 695 if (n > 0) 696 avg = (avg >> FP_SHIFT) * 697 pow_w(rp->rio_wtab, n); 698 } 699 } 700 701 /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ 702 avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); 703 prec->avg = avg; /* save the new value */ 704 /* 705 * count keeps a tally of arriving traffic that has not 706 * been dropped. 707 */ 708 prec->count++; 709 } 710 711 prec = &rp->rio_precstate[dpindex]; 712 avg = prec->avg; 713 714 /* see if we drop early */ 715 droptype = DTYPE_NODROP; 716 if (avg >= prec->th_min_s && prec->qlen > 1) { 717 if (avg >= prec->th_max_s) { 718 /* avg >= th_max: forced drop */ 719 droptype = DTYPE_FORCED; 720 } else if (prec->old == 0) { 721 /* first exceeds th_min */ 722 prec->count = 1; 723 prec->old = 1; 724 } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, 725 prec->probd, prec->count)) { 726 /* unforced drop by red */ 727 droptype = DTYPE_EARLY; 728 } 729 } else { 730 /* avg < th_min */ 731 prec->old = 0; 732 } 733 734 /* 735 * if the queue length hits the hard limit, it's a forced drop. 736 */ 737 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 738 droptype = DTYPE_FORCED; 739 740 if (droptype != DTYPE_NODROP) { 741 /* always drop incoming packet (as opposed to randomdrop) */ 742 for (i = dpindex; i < RIO_NDROPPREC; i++) 743 rp->rio_precstate[i].count = 0; 744 #ifdef RIO_STATS 745 if (droptype == DTYPE_EARLY) 746 rp->q_stats[dpindex].drop_unforced++; 747 else 748 rp->q_stats[dpindex].drop_forced++; 749 PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); 750 #endif 751 m_freem(m); 752 return (-1); 753 } 754 755 for (i = dpindex; i < RIO_NDROPPREC; i++) 756 rp->rio_precstate[i].qlen++; 757 758 /* save drop precedence index in mbuf hdr */ 759 RIOM_SET_PRECINDEX(m, dpindex); 760 761 if (rp->rio_flags & RIOF_CLEARDSCP) 762 dsfield &= ~DSCP_MASK; 763 764 if (dsfield != odsfield) 765 write_dsfield(m, pktattr, dsfield); 766 767 _addq(q, m); 768 769 #ifdef RIO_STATS 770 PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); 771 #endif 772 return (0); 773 } 774 775 /* 776 * dequeue routine: 777 * must be called in splnet. 778 * 779 * returns: mbuf dequeued. 780 * NULL when no packet is available in the queue. 781 */ 782 783 static struct mbuf * 784 rio_dequeue(ifq, op) 785 struct ifaltq *ifq; 786 int op; 787 { 788 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 789 struct mbuf *m = NULL; 790 791 if (op == ALTDQ_POLL) 792 return qhead(rqp->rq_q); 793 794 m = rio_getq(rqp->rq_rio, rqp->rq_q); 795 if (m != NULL) 796 ifq->ifq_len--; 797 return m; 798 } 799 800 struct mbuf * 801 rio_getq(rp, q) 802 rio_t *rp; 803 class_queue_t *q; 804 { 805 struct mbuf *m; 806 int dpindex, i; 807 808 if ((m = _getq(q)) == NULL) 809 return NULL; 810 811 dpindex = RIOM_GET_PRECINDEX(m); 812 for (i = dpindex; i < RIO_NDROPPREC; i++) { 813 if (--rp->rio_precstate[i].qlen == 0) { 814 if (rp->rio_precstate[i].idle == 0) { 815 rp->rio_precstate[i].idle = 1; 816 microtime(&rp->rio_precstate[i].last); 817 } 818 } 819 } 820 return (m); 821 } 822 823 #ifdef KLD_MODULE 824 825 static struct altqsw rio_sw = 826 {"rio", rioopen, rioclose, rioioctl}; 827 828 ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); 829 830 #endif /* KLD_MODULE */ 831 832 #endif /* ALTQ_RIO */ 833