1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $ 35 */ 36 /* 37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build 38 * fairq. The fairq algorithm is completely different then priq, of course, 39 * but because I used priq's skeleton I believe I should include priq's 40 * copyright. 41 * 42 * Copyright (C) 2000-2003 43 * Sony Computer Science Laboratories Inc. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * FAIRQ - take traffic classified by keep state (hashed into 69 * pf->state_hash) and bucketize it. Fairly extract 70 * the first packet from each bucket in a round-robin fashion. 71 * 72 * TODO - better overall qlimit support (right now it is per-bucket). 73 * - NOTE: red etc is per bucket, not overall. 74 * - better service curve support. 75 * 76 * EXAMPLE: 77 * 78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk } 79 * queue std priority 3 bandwidth 200Kb \ 80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50 81 * queue bulk priority 2 bandwidth 100Kb \ 82 * fairq (buckets 64, hogs 1Kb) qlimit 50 83 * 84 * NOTE: When the aggregate bandwidth is less than the link bandwidth 85 * any remaining bandwidth is dynamically assigned using the 86 * existing bandwidth specs as weightings. 87 * 88 * pass out on em0 from any to any keep state queue std 89 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk 90 */ 91 #include "opt_altq.h" 92 #include "opt_inet.h" 93 #include "opt_inet6.h" 94 95 #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ 96 97 #include <sys/param.h> 98 #include <sys/malloc.h> 99 #include <sys/mbuf.h> 100 #include <sys/socket.h> 101 #include <sys/sockio.h> 102 #include <sys/systm.h> 103 #include <sys/proc.h> 104 #include <sys/errno.h> 105 #include <sys/kernel.h> 106 #include <sys/queue.h> 107 #include <sys/thread.h> 108 109 #include <net/if.h> 110 #include <net/ifq_var.h> 111 #include <netinet/in.h> 112 113 #include <net/pf/pfvar.h> 114 #include <net/altq/altq.h> 115 #include <net/altq/altq_fairq.h> 116 117 #include <sys/thread2.h> 118 119 #define FAIRQ_SUBQ_INDEX ALTQ_SUBQ_INDEX_DEFAULT 120 #define FAIRQ_LOCK(ifq) \ 121 ALTQ_SQ_LOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX]) 122 #define FAIRQ_UNLOCK(ifq) \ 123 ALTQ_SQ_UNLOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX]) 124 125 /* 126 * function prototypes 127 */ 128 static int fairq_clear_interface(struct fairq_if *); 129 static int fairq_request(struct ifaltq_subque *, int, void *); 130 static void fairq_purge(struct fairq_if *); 131 static struct fairq_class *fairq_class_create(struct fairq_if *, int, 132 int, u_int, struct fairq_opts *, int); 133 static int fairq_class_destroy(struct fairq_class *); 134 static int fairq_enqueue(struct ifaltq_subque *, struct mbuf *, 135 struct altq_pktattr *); 136 static struct mbuf *fairq_dequeue(struct ifaltq_subque *, int); 137 138 static int fairq_addq(struct fairq_class *, struct mbuf *, int hash); 139 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); 140 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); 141 static fairq_bucket_t *fairq_selectq(struct fairq_class *); 142 static void fairq_purgeq(struct fairq_class *); 143 144 static void get_class_stats(struct fairq_classstats *, 145 struct fairq_class *); 146 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); 147 148 int 149 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq) 150 { 151 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, ifq_mapsubq_default, 152 fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); 153 } 154 155 int 156 fairq_add_altq(struct pf_altq *a) 157 { 158 struct fairq_if *pif; 159 struct ifnet *ifp; 160 161 ifnet_lock(); 162 163 if ((ifp = ifunit(a->ifname)) == NULL) { 164 ifnet_unlock(); 165 return (EINVAL); 166 } 167 if (!ifq_is_ready(&ifp->if_snd)) { 168 ifnet_unlock(); 169 return (ENODEV); 170 } 171 172 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO); 173 pif->pif_bandwidth = a->ifbandwidth; 174 pif->pif_maxpri = -1; 175 pif->pif_ifq = &ifp->if_snd; 176 ifq_purge_all(&ifp->if_snd); 177 178 ifnet_unlock(); 179 180 /* keep the state in pf_altq */ 181 a->altq_disc = pif; 182 183 return (0); 184 } 185 186 int 187 fairq_remove_altq(struct pf_altq *a) 188 { 189 struct fairq_if *pif; 190 191 if ((pif = a->altq_disc) == NULL) 192 return (EINVAL); 193 a->altq_disc = NULL; 194 195 fairq_clear_interface(pif); 196 197 kfree(pif, M_ALTQ); 198 return (0); 199 } 200 201 static int 202 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif) 203 { 204 struct fairq_class *cl; 205 206 KKASSERT(a->priority < FAIRQ_MAXPRI); 207 KKASSERT(a->qid != 0); 208 209 if (pif->pif_classes[a->priority] != NULL) 210 return (EBUSY); 211 if (clh_to_clp(pif, a->qid) != NULL) 212 return (EBUSY); 213 214 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, 215 &a->pq_u.fairq_opts, a->qid); 216 if (cl == NULL) 217 return (ENOMEM); 218 219 return (0); 220 } 221 222 int 223 fairq_add_queue(struct pf_altq *a) 224 { 225 struct fairq_if *pif; 226 struct ifaltq *ifq; 227 int error; 228 229 /* check parameters */ 230 if (a->priority >= FAIRQ_MAXPRI) 231 return (EINVAL); 232 if (a->qid == 0) 233 return (EINVAL); 234 235 /* XXX not MP safe */ 236 if ((pif = a->altq_disc) == NULL) 237 return (EINVAL); 238 ifq = pif->pif_ifq; 239 240 FAIRQ_LOCK(ifq); 241 error = fairq_add_queue_locked(a, pif); 242 FAIRQ_UNLOCK(ifq); 243 244 return error; 245 } 246 247 static int 248 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif) 249 { 250 struct fairq_class *cl; 251 252 if ((cl = clh_to_clp(pif, a->qid)) == NULL) 253 return (EINVAL); 254 255 return (fairq_class_destroy(cl)); 256 } 257 258 int 259 fairq_remove_queue(struct pf_altq *a) 260 { 261 struct fairq_if *pif; 262 struct ifaltq *ifq; 263 int error; 264 265 /* XXX not MP safe */ 266 if ((pif = a->altq_disc) == NULL) 267 return (EINVAL); 268 ifq = pif->pif_ifq; 269 270 FAIRQ_LOCK(ifq); 271 error = fairq_remove_queue_locked(a, pif); 272 FAIRQ_UNLOCK(ifq); 273 274 return error; 275 } 276 277 int 278 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 279 { 280 struct fairq_if *pif; 281 struct fairq_class *cl; 282 struct fairq_classstats stats; 283 struct ifaltq *ifq; 284 int error = 0; 285 286 if (*nbytes < sizeof(stats)) 287 return (EINVAL); 288 289 ifnet_lock(); 290 291 /* XXX not MP safe */ 292 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) { 293 ifnet_unlock(); 294 return (EBADF); 295 } 296 ifq = pif->pif_ifq; 297 298 FAIRQ_LOCK(ifq); 299 300 if ((cl = clh_to_clp(pif, a->qid)) == NULL) { 301 FAIRQ_UNLOCK(ifq); 302 ifnet_unlock(); 303 return (EINVAL); 304 } 305 306 get_class_stats(&stats, cl); 307 308 FAIRQ_UNLOCK(ifq); 309 310 ifnet_unlock(); 311 312 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 313 return (error); 314 *nbytes = sizeof(stats); 315 return (0); 316 } 317 318 /* 319 * bring the interface back to the initial state by discarding 320 * all the filters and classes. 321 */ 322 static int 323 fairq_clear_interface(struct fairq_if *pif) 324 { 325 struct fairq_class *cl; 326 int pri; 327 328 /* clear out the classes */ 329 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 330 if ((cl = pif->pif_classes[pri]) != NULL) 331 fairq_class_destroy(cl); 332 } 333 334 return (0); 335 } 336 337 static int 338 fairq_request(struct ifaltq_subque *ifsq, int req, void *arg) 339 { 340 struct ifaltq *ifq = ifsq->ifsq_altq; 341 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 342 343 crit_enter(); 344 switch (req) { 345 case ALTRQ_PURGE: 346 if (ifsq_get_index(ifsq) == FAIRQ_SUBQ_INDEX) { 347 fairq_purge(pif); 348 } else { 349 /* 350 * Race happened, the unrelated subqueue was 351 * picked during the packet scheduler transition. 352 */ 353 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 354 } 355 break; 356 } 357 crit_exit(); 358 return (0); 359 } 360 361 /* discard all the queued packets on the interface */ 362 static void 363 fairq_purge(struct fairq_if *pif) 364 { 365 struct fairq_class *cl; 366 int pri; 367 368 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 369 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) 370 fairq_purgeq(cl); 371 } 372 if (ifq_is_enabled(pif->pif_ifq)) 373 ALTQ_SQ_CNTR_RESET(&pif->pif_ifq->altq_subq[FAIRQ_SUBQ_INDEX]); 374 } 375 376 static struct fairq_class * 377 fairq_class_create(struct fairq_if *pif, int pri, int qlimit, 378 u_int bandwidth, struct fairq_opts *opts, int qid) 379 { 380 struct fairq_class *cl; 381 int flags = opts->flags; 382 u_int nbuckets = opts->nbuckets; 383 int i; 384 385 #ifndef ALTQ_RED 386 if (flags & FARF_RED) { 387 #ifdef ALTQ_DEBUG 388 kprintf("fairq_class_create: RED not configured for FAIRQ!\n"); 389 #endif 390 return (NULL); 391 } 392 #endif 393 if (nbuckets == 0) 394 nbuckets = 256; 395 if (nbuckets > FAIRQ_MAX_BUCKETS) 396 nbuckets = FAIRQ_MAX_BUCKETS; 397 /* enforce power-of-2 size */ 398 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) 399 ++nbuckets; 400 401 if ((cl = pif->pif_classes[pri]) != NULL) { 402 /* modify the class instead of creating a new one */ 403 crit_enter(); 404 if (cl->cl_head) 405 fairq_purgeq(cl); 406 crit_exit(); 407 #ifdef ALTQ_RIO 408 if (cl->cl_qtype == Q_RIO) 409 rio_destroy((rio_t *)cl->cl_red); 410 #endif 411 #ifdef ALTQ_RED 412 if (cl->cl_qtype == Q_RED) 413 red_destroy(cl->cl_red); 414 #endif 415 } else { 416 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO); 417 cl->cl_nbuckets = nbuckets; 418 cl->cl_nbucket_mask = nbuckets - 1; 419 420 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) * 421 cl->cl_nbuckets, 422 M_ALTQ, M_WAITOK | M_ZERO); 423 cl->cl_head = NULL; 424 } 425 426 pif->pif_classes[pri] = cl; 427 if (flags & FARF_DEFAULTCLASS) 428 pif->pif_default = cl; 429 if (qlimit == 0) 430 qlimit = 50; /* use default */ 431 cl->cl_qlimit = qlimit; 432 for (i = 0; i < cl->cl_nbuckets; ++i) { 433 qlimit(&cl->cl_buckets[i].queue) = qlimit; 434 } 435 cl->cl_bandwidth = bandwidth / 8; /* cvt to bytes per second */ 436 cl->cl_qtype = Q_DROPTAIL; 437 cl->cl_flags = flags & FARF_USERFLAGS; 438 cl->cl_pri = pri; 439 if (pri > pif->pif_maxpri) 440 pif->pif_maxpri = pri; 441 cl->cl_pif = pif; 442 cl->cl_handle = qid; 443 cl->cl_hogs_m1 = opts->hogs_m1 / 8; 444 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ 445 cl->cl_bw_current = 0; 446 447 #ifdef ALTQ_RED 448 if (flags & (FARF_RED|FARF_RIO)) { 449 int red_flags, red_pkttime; 450 451 red_flags = 0; 452 if (flags & FARF_ECN) 453 red_flags |= REDF_ECN; 454 #ifdef ALTQ_RIO 455 if (flags & FARF_CLEARDSCP) 456 red_flags |= RIOF_CLEARDSCP; 457 #endif 458 if (pif->pif_bandwidth < 8) 459 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ 460 else 461 red_pkttime = 462 (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 463 (1000 * 1000 * 1000) / 464 (pif->pif_bandwidth / 8 + 1); 465 #ifdef ALTQ_RIO 466 if (flags & FARF_RIO) { 467 cl->cl_red = (red_t *)rio_alloc(0, NULL, 468 red_flags, red_pkttime); 469 if (cl->cl_red != NULL) 470 cl->cl_qtype = Q_RIO; 471 } else 472 #endif 473 if (flags & FARF_RED) { 474 cl->cl_red = red_alloc(0, 0, 475 cl->cl_qlimit * 10 / 100, 476 cl->cl_qlimit * 30 / 100, 477 red_flags, red_pkttime); 478 if (cl->cl_red != NULL) 479 cl->cl_qtype = Q_RED; 480 } 481 } 482 #endif /* ALTQ_RED */ 483 484 return (cl); 485 } 486 487 static int 488 fairq_class_destroy(struct fairq_class *cl) 489 { 490 struct fairq_if *pif; 491 int pri; 492 493 crit_enter(); 494 495 if (cl->cl_head) 496 fairq_purgeq(cl); 497 498 pif = cl->cl_pif; 499 pif->pif_classes[cl->cl_pri] = NULL; 500 if (pif->pif_poll_cache == cl) 501 pif->pif_poll_cache = NULL; 502 if (pif->pif_maxpri == cl->cl_pri) { 503 for (pri = cl->cl_pri; pri >= 0; pri--) 504 if (pif->pif_classes[pri] != NULL) { 505 pif->pif_maxpri = pri; 506 break; 507 } 508 if (pri < 0) 509 pif->pif_maxpri = -1; 510 } 511 crit_exit(); 512 513 if (cl->cl_red != NULL) { 514 #ifdef ALTQ_RIO 515 if (cl->cl_qtype == Q_RIO) 516 rio_destroy((rio_t *)cl->cl_red); 517 #endif 518 #ifdef ALTQ_RED 519 if (cl->cl_qtype == Q_RED) 520 red_destroy(cl->cl_red); 521 #endif 522 } 523 kfree(cl->cl_buckets, M_ALTQ); 524 cl->cl_head = NULL; /* sanity */ 525 cl->cl_buckets = NULL; /* sanity */ 526 kfree(cl, M_ALTQ); 527 528 return (0); 529 } 530 531 /* 532 * fairq_enqueue is an enqueue function to be registered to 533 * (*ifsq_enqueue) in struct ifaltq_subque. 534 */ 535 static int 536 fairq_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 537 struct altq_pktattr *pktattr) 538 { 539 struct ifaltq *ifq = ifsq->ifsq_altq; 540 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 541 struct fairq_class *cl; 542 int error; 543 int len; 544 int hash; 545 546 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) { 547 /* 548 * Race happened, the unrelated subqueue was 549 * picked during the packet scheduler transition. 550 */ 551 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 552 m_freem(m); 553 return ENOBUFS; 554 } 555 556 crit_enter(); 557 558 /* grab class set by classifier */ 559 M_ASSERTPKTHDR(m); 560 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) { 561 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid); 562 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED) 563 hash = (int)m->m_pkthdr.pf.state_hash; 564 else 565 hash = 0; 566 } else { 567 cl = NULL; 568 hash = 0; 569 } 570 if (cl == NULL) { 571 cl = pif->pif_default; 572 if (cl == NULL) { 573 m_freem(m); 574 error = ENOBUFS; 575 goto done; 576 } 577 } 578 cl->cl_flags |= FARF_HAS_PACKETS; 579 cl->cl_pktattr = NULL; 580 len = m_pktlen(m); 581 if (fairq_addq(cl, m, hash) != 0) { 582 /* drop occurred. mbuf was freed in fairq_addq. */ 583 PKTCNTR_ADD(&cl->cl_dropcnt, len); 584 error = ENOBUFS; 585 goto done; 586 } 587 ALTQ_SQ_PKTCNT_INC(ifsq); 588 error = 0; 589 done: 590 crit_exit(); 591 return (error); 592 } 593 594 /* 595 * fairq_dequeue is a dequeue function to be registered to 596 * (*ifsq_dequeue) in struct ifaltq_subque. 597 * 598 * note: ALTDQ_POLL returns the next packet without removing the packet 599 * from the queue. ALTDQ_REMOVE is a normal dequeue operation. 600 */ 601 static struct mbuf * 602 fairq_dequeue(struct ifaltq_subque *ifsq, int op) 603 { 604 struct ifaltq *ifq = ifsq->ifsq_altq; 605 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 606 struct fairq_class *cl; 607 struct fairq_class *best_cl; 608 struct mbuf *best_m; 609 struct mbuf *m; 610 uint64_t cur_time = read_machclk(); 611 uint64_t best_scale; 612 uint64_t scale; 613 int pri; 614 int hit_limit; 615 616 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) { 617 /* 618 * Race happened, the unrelated subqueue was 619 * picked during the packet scheduler transition. 620 */ 621 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 622 return NULL; 623 } 624 625 if (ifsq_is_empty(ifsq)) { 626 /* no packet in the queue */ 627 return (NULL); 628 } 629 630 crit_enter(); 631 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { 632 best_cl = pif->pif_poll_cache; 633 m = fairq_getq(best_cl, cur_time); 634 pif->pif_poll_cache = NULL; 635 if (m) { 636 ALTQ_SQ_PKTCNT_DEC(ifsq); 637 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 638 } 639 } else { 640 best_cl = NULL; 641 best_m = NULL; 642 best_scale = 0xFFFFFFFFFFFFFFFFLLU; 643 644 for (pri = pif->pif_maxpri; pri >= 0; pri--) { 645 if ((cl = pif->pif_classes[pri]) == NULL) 646 continue; 647 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) 648 continue; 649 m = fairq_pollq(cl, cur_time, &hit_limit); 650 if (m == NULL) { 651 cl->cl_flags &= ~FARF_HAS_PACKETS; 652 continue; 653 } 654 655 /* 656 * We can halt the search immediately if the queue 657 * did not hit its bandwidth limit. 658 */ 659 if (hit_limit == 0) { 660 best_cl = cl; 661 best_m = m; 662 break; 663 } 664 665 /* 666 * Otherwise calculate the scale factor and select 667 * the queue with the lowest scale factor. This 668 * apportions any unused bandwidth weighted by 669 * the relative bandwidth specification. 670 * 671 * scale = (bw / max) with a multiple of 256. 672 * 673 * The calculation is refactored to reduce the 674 * chance of overflow. 675 */ 676 scale = cl->cl_bw_current * 16 / 677 (cl->cl_bandwidth / 16 + 1); 678 if (best_scale > scale) { 679 best_cl = cl; 680 best_m = m; 681 best_scale = scale; 682 } 683 } 684 685 if (op == ALTDQ_POLL) { 686 #ifdef foo 687 /* 688 * Don't use poll cache; the poll/dequeue 689 * model is no longer applicable to SMP 690 * system. e.g. 691 * CPU-A CPU-B 692 * : : 693 * poll : 694 * : poll 695 * dequeue (+) : 696 * 697 * The dequeue at (+) will hit the poll 698 * cache set by CPU-B. 699 */ 700 pif->pif_poll_cache = best_cl; 701 #endif 702 m = best_m; 703 } else if (best_cl) { 704 m = fairq_getq(best_cl, cur_time); 705 KKASSERT(best_m == m); 706 ALTQ_SQ_PKTCNT_DEC(ifsq); 707 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 708 } else { 709 m = NULL; 710 } 711 } 712 crit_exit(); 713 return (m); 714 } 715 716 static int 717 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash) 718 { 719 fairq_bucket_t *b; 720 u_int hindex; 721 uint64_t bw; 722 723 /* 724 * If the packet doesn't have any keep state put it on the end of 725 * our queue. XXX this can result in out of order delivery. 726 */ 727 if (hash == 0) { 728 if (cl->cl_head) 729 b = cl->cl_head->prev; 730 else 731 b = &cl->cl_buckets[0]; 732 } else { 733 hindex = hash & cl->cl_nbucket_mask; 734 b = &cl->cl_buckets[hindex]; 735 } 736 737 /* 738 * Add the bucket to the end of the circular list of active buckets. 739 * 740 * As a special case we add the bucket to the beginning of the list 741 * instead of the end if it was not previously on the list and if 742 * its traffic is less then the hog level. 743 */ 744 if (b->in_use == 0) { 745 b->in_use = 1; 746 if (cl->cl_head == NULL) { 747 cl->cl_head = b; 748 cl->cl_advanced = 1; 749 b->next = b; 750 b->prev = b; 751 } else { 752 b->next = cl->cl_head; 753 b->prev = cl->cl_head->prev; 754 b->prev->next = b; 755 b->next->prev = b; 756 757 if (b->bw_delta && cl->cl_hogs_m1) { 758 bw = b->bw_bytes * machclk_freq / b->bw_delta; 759 if (bw < cl->cl_hogs_m1) { 760 cl->cl_head = b; 761 cl->cl_advanced = 1; 762 } 763 } 764 } 765 } 766 767 #ifdef ALTQ_RIO 768 if (cl->cl_qtype == Q_RIO) 769 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); 770 #endif 771 #ifdef ALTQ_RED 772 if (cl->cl_qtype == Q_RED) 773 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); 774 #endif 775 if (qlen(&b->queue) >= qlimit(&b->queue)) { 776 m_freem(m); 777 return (-1); 778 } 779 780 if (cl->cl_flags & FARF_CLEARDSCP) 781 write_dsfield(m, cl->cl_pktattr, 0); 782 783 _addq(&b->queue, m); 784 785 return (0); 786 } 787 788 static struct mbuf * 789 fairq_getq(struct fairq_class *cl, uint64_t cur_time) 790 { 791 fairq_bucket_t *b; 792 struct mbuf *m; 793 794 b = fairq_selectq(cl); 795 if (b == NULL) 796 m = NULL; 797 #ifdef ALTQ_RIO 798 else if (cl->cl_qtype == Q_RIO) 799 m = rio_getq((rio_t *)cl->cl_red, &b->queue); 800 #endif 801 #ifdef ALTQ_RED 802 else if (cl->cl_qtype == Q_RED) 803 m = red_getq(cl->cl_red, &b->queue); 804 #endif 805 else 806 m = _getq(&b->queue); 807 808 /* 809 * Calculate the BW change 810 */ 811 if (m != NULL) { 812 uint64_t delta; 813 814 /* 815 * Per-class bandwidth calculation 816 */ 817 delta = (cur_time - cl->cl_last_time); 818 if (delta > machclk_freq * 8) 819 delta = machclk_freq * 8; 820 cl->cl_bw_delta += delta; 821 cl->cl_bw_bytes += m->m_pkthdr.len; 822 cl->cl_last_time = cur_time; 823 824 /* 825 * Cap delta at ~machclk_freq to avoid overflows. 826 */ 827 if (cl->cl_bw_delta > machclk_freq) { 828 uint64_t f = cl->cl_bw_delta * 32 / machclk_freq; 829 cl->cl_bw_delta = cl->cl_bw_delta * 16 / f; 830 cl->cl_bw_bytes = cl->cl_bw_bytes * 16 / f; 831 } 832 833 /* 834 * Per-bucket bandwidth calculation. 835 */ 836 delta = (cur_time - b->last_time); 837 if (delta > machclk_freq * 8) 838 delta = machclk_freq * 8; 839 b->bw_delta += delta; 840 b->bw_bytes += m->m_pkthdr.len; 841 b->last_time = cur_time; 842 843 /* 844 * Cap bw_delta at ~machclk_freq to avoid overflows. 845 */ 846 if (b->bw_delta > machclk_freq) { 847 uint64_t f = b->bw_delta * 32 / machclk_freq; 848 b->bw_delta = b->bw_delta * 16 / f; 849 b->bw_bytes = b->bw_bytes * 16 / f; 850 } 851 } 852 return(m); 853 } 854 855 /* 856 * Figure out what the next packet would be if there were no limits. If 857 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise 858 * it is set to 0. A non-NULL mbuf is returned either way. 859 */ 860 static struct mbuf * 861 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) 862 { 863 fairq_bucket_t *b; 864 struct mbuf *m; 865 uint64_t delta; 866 uint64_t bw; 867 868 *hit_limit = 0; 869 b = fairq_selectq(cl); 870 if (b == NULL) 871 return(NULL); 872 m = qhead(&b->queue); 873 if (m == NULL) 874 return(NULL); 875 cl->cl_advanced = 1; /* so next select/get doesn't re-advance */ 876 877 /* 878 * Did this packet exceed the class bandwidth? 879 * 880 * Calculate the bandwidth component of the packet in bytes/sec. 881 * Avoid overflows when machclk_freq is very high. 882 */ 883 delta = cur_time - cl->cl_last_time; 884 if (delta > machclk_freq * 8) 885 delta = machclk_freq * 8; 886 cl->cl_bw_delta += delta; 887 cl->cl_last_time = cur_time; 888 889 if (cl->cl_bw_delta) { 890 bw = (cl->cl_bw_bytes + m->m_pkthdr.len) * 891 machclk_freq / cl->cl_bw_delta; 892 if (bw > cl->cl_bandwidth) 893 *hit_limit = 1; 894 cl->cl_bw_current = bw; 895 #if 0 896 kprintf("BW %6lld relative to %6llu %d queue %p\n", 897 bw, cl->cl_bandwidth, *hit_limit, b); 898 #endif 899 } 900 return(m); 901 } 902 903 /* 904 * Locate the next queue we want to pull a packet out of. This code 905 * is also responsible for removing empty buckets from the circular list. 906 */ 907 static 908 fairq_bucket_t * 909 fairq_selectq(struct fairq_class *cl) 910 { 911 fairq_bucket_t *b; 912 uint64_t bw; 913 914 while ((b = cl->cl_head) != NULL) { 915 /* 916 * Remove empty queues from consideration 917 */ 918 if (qempty(&b->queue)) { 919 b->in_use = 0; 920 cl->cl_head = b->next; 921 cl->cl_advanced = 1; 922 if (cl->cl_head == b) { 923 cl->cl_head = NULL; 924 } else { 925 b->next->prev = b->prev; 926 b->prev->next = b->next; 927 } 928 continue; 929 } 930 931 /* 932 * Advance the round robin. Queues with bandwidths less 933 * then the hog bandwidth are allowed to burst. 934 * 935 * Don't advance twice if the previous head emptied. 936 */ 937 if (cl->cl_advanced) { 938 cl->cl_advanced = 0; 939 break; 940 } 941 if (cl->cl_hogs_m1 == 0) { 942 cl->cl_head = b->next; 943 } else if (b->bw_delta) { 944 bw = b->bw_bytes * machclk_freq / b->bw_delta; 945 if (bw >= cl->cl_hogs_m1) 946 cl->cl_head = b->next; 947 } 948 949 /* 950 * Return the (possibly new) head. 951 */ 952 b = cl->cl_head; 953 break; 954 } 955 return(b); 956 } 957 958 static void 959 fairq_purgeq(struct fairq_class *cl) 960 { 961 fairq_bucket_t *b; 962 struct mbuf *m; 963 964 while ((b = fairq_selectq(cl)) != NULL) { 965 while ((m = _getq(&b->queue)) != NULL) { 966 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); 967 m_freem(m); 968 } 969 KKASSERT(qlen(&b->queue) == 0); 970 } 971 } 972 973 static void 974 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) 975 { 976 fairq_bucket_t *b; 977 978 sp->class_handle = cl->cl_handle; 979 sp->qlimit = cl->cl_qlimit; 980 sp->xmit_cnt = cl->cl_xmitcnt; 981 sp->drop_cnt = cl->cl_dropcnt; 982 sp->qtype = cl->cl_qtype; 983 sp->qlength = 0; 984 985 if (cl->cl_head) { 986 b = cl->cl_head; 987 do { 988 sp->qlength += qlen(&b->queue); 989 b = b->next; 990 } while (b != cl->cl_head); 991 } 992 993 #ifdef ALTQ_RED 994 if (cl->cl_qtype == Q_RED) 995 red_getstats(cl->cl_red, &sp->red[0]); 996 #endif 997 #ifdef ALTQ_RIO 998 if (cl->cl_qtype == Q_RIO) 999 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); 1000 #endif 1001 } 1002 1003 /* convert a class handle to the corresponding class pointer */ 1004 static struct fairq_class * 1005 clh_to_clp(struct fairq_if *pif, uint32_t chandle) 1006 { 1007 struct fairq_class *cl; 1008 int idx; 1009 1010 if (chandle == 0) 1011 return (NULL); 1012 1013 for (idx = pif->pif_maxpri; idx >= 0; idx--) 1014 if ((cl = pif->pif_classes[idx]) != NULL && 1015 cl->cl_handle == chandle) 1016 return (cl); 1017 1018 return (NULL); 1019 } 1020 1021 #endif /* ALTQ_FAIRQ */ 1022