1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $ 35 */ 36 /* 37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build 38 * fairq. The fairq algorithm is completely different then priq, of course, 39 * but because I used priq's skeleton I believe I should include priq's 40 * copyright. 41 * 42 * Copyright (C) 2000-2003 43 * Sony Computer Science Laboratories Inc. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * FAIRQ - take traffic classified by keep state (hashed into 69 * pf->state_hash) and bucketize it. Fairly extract 70 * the first packet from each bucket in a round-robin fashion. 71 * 72 * TODO - better overall qlimit support (right now it is per-bucket). 73 * - NOTE: red etc is per bucket, not overall. 74 * - better service curve support. 75 * 76 * EXAMPLE: 77 * 78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk } 79 * queue std priority 3 bandwidth 200Kb \ 80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50 81 * queue bulk priority 2 bandwidth 100Kb \ 82 * fairq (buckets 64, hogs 1Kb) qlimit 50 83 * 84 * NOTE: When the aggregate bandwidth is less than the link bandwidth 85 * any remaining bandwidth is dynamically assigned using the 86 * existing bandwidth specs as weightings. 87 * 88 * pass out on em0 from any to any keep state queue std 89 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk 90 */ 91 #include "opt_altq.h" 92 #include "opt_inet.h" 93 #include "opt_inet6.h" 94 95 #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ 96 97 #include <sys/param.h> 98 #include <sys/malloc.h> 99 #include <sys/mbuf.h> 100 #include <sys/socket.h> 101 #include <sys/sockio.h> 102 #include <sys/systm.h> 103 #include <sys/proc.h> 104 #include <sys/errno.h> 105 #include <sys/kernel.h> 106 #include <sys/queue.h> 107 #include <sys/thread.h> 108 109 #include <net/if.h> 110 #include <net/ifq_var.h> 111 #include <netinet/in.h> 112 113 #include <net/pf/pfvar.h> 114 #include <net/altq/altq.h> 115 #include <net/altq/altq_fairq.h> 116 117 #include <sys/thread2.h> 118 119 #define FAIRQ_SUBQ_INDEX ALTQ_SUBQ_INDEX_DEFAULT 120 #define FAIRQ_LOCK(ifq) \ 121 ALTQ_SQ_LOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX]) 122 #define FAIRQ_UNLOCK(ifq) \ 123 ALTQ_SQ_UNLOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX]) 124 125 /* 126 * function prototypes 127 */ 128 static int fairq_clear_interface(struct fairq_if *); 129 static int fairq_request(struct ifaltq_subque *, int, void *); 130 static void fairq_purge(struct fairq_if *); 131 static struct fairq_class *fairq_class_create(struct fairq_if *, int, 132 int, u_int, struct fairq_opts *, int); 133 static int fairq_class_destroy(struct fairq_class *); 134 static int fairq_enqueue(struct ifaltq_subque *, struct mbuf *, 135 struct altq_pktattr *); 136 static struct mbuf *fairq_dequeue(struct ifaltq_subque *, int); 137 138 static int fairq_addq(struct fairq_class *, struct mbuf *, int hash); 139 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); 140 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); 141 static fairq_bucket_t *fairq_selectq(struct fairq_class *); 142 static void fairq_purgeq(struct fairq_class *); 143 144 static void get_class_stats(struct fairq_classstats *, 145 struct fairq_class *); 146 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); 147 148 int 149 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq) 150 { 151 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, ifq_mapsubq_default, 152 fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); 153 } 154 155 int 156 fairq_add_altq(struct pf_altq *a) 157 { 158 struct fairq_if *pif; 159 struct ifnet *ifp; 160 161 if ((ifp = ifunit(a->ifname)) == NULL) 162 return (EINVAL); 163 if (!ifq_is_ready(&ifp->if_snd)) 164 return (ENODEV); 165 166 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO); 167 pif->pif_bandwidth = a->ifbandwidth; 168 pif->pif_maxpri = -1; 169 pif->pif_ifq = &ifp->if_snd; 170 ifq_purge_all(&ifp->if_snd); 171 172 /* keep the state in pf_altq */ 173 a->altq_disc = pif; 174 175 return (0); 176 } 177 178 int 179 fairq_remove_altq(struct pf_altq *a) 180 { 181 struct fairq_if *pif; 182 183 if ((pif = a->altq_disc) == NULL) 184 return (EINVAL); 185 a->altq_disc = NULL; 186 187 fairq_clear_interface(pif); 188 189 kfree(pif, M_ALTQ); 190 return (0); 191 } 192 193 static int 194 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif) 195 { 196 struct fairq_class *cl; 197 198 KKASSERT(a->priority < FAIRQ_MAXPRI); 199 KKASSERT(a->qid != 0); 200 201 if (pif->pif_classes[a->priority] != NULL) 202 return (EBUSY); 203 if (clh_to_clp(pif, a->qid) != NULL) 204 return (EBUSY); 205 206 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, 207 &a->pq_u.fairq_opts, a->qid); 208 if (cl == NULL) 209 return (ENOMEM); 210 211 return (0); 212 } 213 214 int 215 fairq_add_queue(struct pf_altq *a) 216 { 217 struct fairq_if *pif; 218 struct ifaltq *ifq; 219 int error; 220 221 /* check parameters */ 222 if (a->priority >= FAIRQ_MAXPRI) 223 return (EINVAL); 224 if (a->qid == 0) 225 return (EINVAL); 226 227 /* XXX not MP safe */ 228 if ((pif = a->altq_disc) == NULL) 229 return (EINVAL); 230 ifq = pif->pif_ifq; 231 232 FAIRQ_LOCK(ifq); 233 error = fairq_add_queue_locked(a, pif); 234 FAIRQ_UNLOCK(ifq); 235 236 return error; 237 } 238 239 static int 240 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif) 241 { 242 struct fairq_class *cl; 243 244 if ((cl = clh_to_clp(pif, a->qid)) == NULL) 245 return (EINVAL); 246 247 return (fairq_class_destroy(cl)); 248 } 249 250 int 251 fairq_remove_queue(struct pf_altq *a) 252 { 253 struct fairq_if *pif; 254 struct ifaltq *ifq; 255 int error; 256 257 /* XXX not MP safe */ 258 if ((pif = a->altq_disc) == NULL) 259 return (EINVAL); 260 ifq = pif->pif_ifq; 261 262 FAIRQ_LOCK(ifq); 263 error = fairq_remove_queue_locked(a, pif); 264 FAIRQ_UNLOCK(ifq); 265 266 return error; 267 } 268 269 int 270 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 271 { 272 struct fairq_if *pif; 273 struct fairq_class *cl; 274 struct fairq_classstats stats; 275 struct ifaltq *ifq; 276 int error = 0; 277 278 if (*nbytes < sizeof(stats)) 279 return (EINVAL); 280 281 /* XXX not MP safe */ 282 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) 283 return (EBADF); 284 ifq = pif->pif_ifq; 285 286 FAIRQ_LOCK(ifq); 287 288 if ((cl = clh_to_clp(pif, a->qid)) == NULL) { 289 FAIRQ_UNLOCK(ifq); 290 return (EINVAL); 291 } 292 293 get_class_stats(&stats, cl); 294 295 FAIRQ_UNLOCK(ifq); 296 297 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 298 return (error); 299 *nbytes = sizeof(stats); 300 return (0); 301 } 302 303 /* 304 * bring the interface back to the initial state by discarding 305 * all the filters and classes. 306 */ 307 static int 308 fairq_clear_interface(struct fairq_if *pif) 309 { 310 struct fairq_class *cl; 311 int pri; 312 313 /* clear out the classes */ 314 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 315 if ((cl = pif->pif_classes[pri]) != NULL) 316 fairq_class_destroy(cl); 317 } 318 319 return (0); 320 } 321 322 static int 323 fairq_request(struct ifaltq_subque *ifsq, int req, void *arg) 324 { 325 struct ifaltq *ifq = ifsq->ifsq_altq; 326 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 327 328 crit_enter(); 329 switch (req) { 330 case ALTRQ_PURGE: 331 if (ifsq_get_index(ifsq) == FAIRQ_SUBQ_INDEX) { 332 fairq_purge(pif); 333 } else { 334 /* 335 * Race happened, the unrelated subqueue was 336 * picked during the packet scheduler transition. 337 */ 338 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 339 } 340 break; 341 } 342 crit_exit(); 343 return (0); 344 } 345 346 /* discard all the queued packets on the interface */ 347 static void 348 fairq_purge(struct fairq_if *pif) 349 { 350 struct fairq_class *cl; 351 int pri; 352 353 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 354 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) 355 fairq_purgeq(cl); 356 } 357 if (ifq_is_enabled(pif->pif_ifq)) 358 ALTQ_SQ_CNTR_RESET(&pif->pif_ifq->altq_subq[FAIRQ_SUBQ_INDEX]); 359 } 360 361 static struct fairq_class * 362 fairq_class_create(struct fairq_if *pif, int pri, int qlimit, 363 u_int bandwidth, struct fairq_opts *opts, int qid) 364 { 365 struct fairq_class *cl; 366 int flags = opts->flags; 367 u_int nbuckets = opts->nbuckets; 368 int i; 369 370 #ifndef ALTQ_RED 371 if (flags & FARF_RED) { 372 #ifdef ALTQ_DEBUG 373 kprintf("fairq_class_create: RED not configured for FAIRQ!\n"); 374 #endif 375 return (NULL); 376 } 377 #endif 378 if (nbuckets == 0) 379 nbuckets = 256; 380 if (nbuckets > FAIRQ_MAX_BUCKETS) 381 nbuckets = FAIRQ_MAX_BUCKETS; 382 /* enforce power-of-2 size */ 383 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) 384 ++nbuckets; 385 386 if ((cl = pif->pif_classes[pri]) != NULL) { 387 /* modify the class instead of creating a new one */ 388 crit_enter(); 389 if (cl->cl_head) 390 fairq_purgeq(cl); 391 crit_exit(); 392 #ifdef ALTQ_RIO 393 if (cl->cl_qtype == Q_RIO) 394 rio_destroy((rio_t *)cl->cl_red); 395 #endif 396 #ifdef ALTQ_RED 397 if (cl->cl_qtype == Q_RED) 398 red_destroy(cl->cl_red); 399 #endif 400 } else { 401 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO); 402 cl->cl_nbuckets = nbuckets; 403 cl->cl_nbucket_mask = nbuckets - 1; 404 405 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) * 406 cl->cl_nbuckets, 407 M_ALTQ, M_WAITOK | M_ZERO); 408 cl->cl_head = NULL; 409 } 410 411 pif->pif_classes[pri] = cl; 412 if (flags & FARF_DEFAULTCLASS) 413 pif->pif_default = cl; 414 if (qlimit == 0) 415 qlimit = 50; /* use default */ 416 cl->cl_qlimit = qlimit; 417 for (i = 0; i < cl->cl_nbuckets; ++i) { 418 qlimit(&cl->cl_buckets[i].queue) = qlimit; 419 } 420 cl->cl_bandwidth = bandwidth / 8; /* cvt to bytes per second */ 421 cl->cl_qtype = Q_DROPTAIL; 422 cl->cl_flags = flags & FARF_USERFLAGS; 423 cl->cl_pri = pri; 424 if (pri > pif->pif_maxpri) 425 pif->pif_maxpri = pri; 426 cl->cl_pif = pif; 427 cl->cl_handle = qid; 428 cl->cl_hogs_m1 = opts->hogs_m1 / 8; 429 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ 430 cl->cl_bw_current = 0; 431 432 #ifdef ALTQ_RED 433 if (flags & (FARF_RED|FARF_RIO)) { 434 int red_flags, red_pkttime; 435 436 red_flags = 0; 437 if (flags & FARF_ECN) 438 red_flags |= REDF_ECN; 439 #ifdef ALTQ_RIO 440 if (flags & FARF_CLEARDSCP) 441 red_flags |= RIOF_CLEARDSCP; 442 #endif 443 if (pif->pif_bandwidth < 8) 444 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ 445 else 446 red_pkttime = 447 (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 448 (1000 * 1000 * 1000) / 449 (pif->pif_bandwidth / 8 + 1); 450 #ifdef ALTQ_RIO 451 if (flags & FARF_RIO) { 452 cl->cl_red = (red_t *)rio_alloc(0, NULL, 453 red_flags, red_pkttime); 454 if (cl->cl_red != NULL) 455 cl->cl_qtype = Q_RIO; 456 } else 457 #endif 458 if (flags & FARF_RED) { 459 cl->cl_red = red_alloc(0, 0, 460 cl->cl_qlimit * 10 / 100, 461 cl->cl_qlimit * 30 / 100, 462 red_flags, red_pkttime); 463 if (cl->cl_red != NULL) 464 cl->cl_qtype = Q_RED; 465 } 466 } 467 #endif /* ALTQ_RED */ 468 469 return (cl); 470 } 471 472 static int 473 fairq_class_destroy(struct fairq_class *cl) 474 { 475 struct fairq_if *pif; 476 int pri; 477 478 crit_enter(); 479 480 if (cl->cl_head) 481 fairq_purgeq(cl); 482 483 pif = cl->cl_pif; 484 pif->pif_classes[cl->cl_pri] = NULL; 485 if (pif->pif_poll_cache == cl) 486 pif->pif_poll_cache = NULL; 487 if (pif->pif_maxpri == cl->cl_pri) { 488 for (pri = cl->cl_pri; pri >= 0; pri--) 489 if (pif->pif_classes[pri] != NULL) { 490 pif->pif_maxpri = pri; 491 break; 492 } 493 if (pri < 0) 494 pif->pif_maxpri = -1; 495 } 496 crit_exit(); 497 498 if (cl->cl_red != NULL) { 499 #ifdef ALTQ_RIO 500 if (cl->cl_qtype == Q_RIO) 501 rio_destroy((rio_t *)cl->cl_red); 502 #endif 503 #ifdef ALTQ_RED 504 if (cl->cl_qtype == Q_RED) 505 red_destroy(cl->cl_red); 506 #endif 507 } 508 kfree(cl->cl_buckets, M_ALTQ); 509 cl->cl_head = NULL; /* sanity */ 510 cl->cl_buckets = NULL; /* sanity */ 511 kfree(cl, M_ALTQ); 512 513 return (0); 514 } 515 516 /* 517 * fairq_enqueue is an enqueue function to be registered to 518 * (*ifsq_enqueue) in struct ifaltq_subque. 519 */ 520 static int 521 fairq_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 522 struct altq_pktattr *pktattr) 523 { 524 struct ifaltq *ifq = ifsq->ifsq_altq; 525 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 526 struct fairq_class *cl; 527 int error; 528 int len; 529 int hash; 530 531 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) { 532 /* 533 * Race happened, the unrelated subqueue was 534 * picked during the packet scheduler transition. 535 */ 536 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 537 m_freem(m); 538 return ENOBUFS; 539 } 540 541 crit_enter(); 542 543 /* grab class set by classifier */ 544 M_ASSERTPKTHDR(m); 545 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) { 546 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid); 547 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED) 548 hash = (int)m->m_pkthdr.pf.state_hash; 549 else 550 hash = 0; 551 } else { 552 cl = NULL; 553 hash = 0; 554 } 555 if (cl == NULL) { 556 cl = pif->pif_default; 557 if (cl == NULL) { 558 m_freem(m); 559 error = ENOBUFS; 560 goto done; 561 } 562 } 563 cl->cl_flags |= FARF_HAS_PACKETS; 564 cl->cl_pktattr = NULL; 565 len = m_pktlen(m); 566 if (fairq_addq(cl, m, hash) != 0) { 567 /* drop occurred. mbuf was freed in fairq_addq. */ 568 PKTCNTR_ADD(&cl->cl_dropcnt, len); 569 error = ENOBUFS; 570 goto done; 571 } 572 ALTQ_SQ_CNTR_INC(ifsq, len); 573 error = 0; 574 done: 575 crit_exit(); 576 return (error); 577 } 578 579 /* 580 * fairq_dequeue is a dequeue function to be registered to 581 * (*ifsq_dequeue) in struct ifaltq_subque. 582 * 583 * note: ALTDQ_POLL returns the next packet without removing the packet 584 * from the queue. ALTDQ_REMOVE is a normal dequeue operation. 585 */ 586 static struct mbuf * 587 fairq_dequeue(struct ifaltq_subque *ifsq, int op) 588 { 589 struct ifaltq *ifq = ifsq->ifsq_altq; 590 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 591 struct fairq_class *cl; 592 struct fairq_class *best_cl; 593 struct mbuf *best_m; 594 struct mbuf *m; 595 uint64_t cur_time = read_machclk(); 596 uint64_t best_scale; 597 uint64_t scale; 598 int pri; 599 int hit_limit; 600 601 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) { 602 /* 603 * Race happened, the unrelated subqueue was 604 * picked during the packet scheduler transition. 605 */ 606 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL); 607 return NULL; 608 } 609 610 if (ifsq_is_empty(ifsq)) { 611 /* no packet in the queue */ 612 return (NULL); 613 } 614 615 crit_enter(); 616 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { 617 best_cl = pif->pif_poll_cache; 618 m = fairq_getq(best_cl, cur_time); 619 pif->pif_poll_cache = NULL; 620 if (m) { 621 ALTQ_SQ_CNTR_DEC(ifsq, m_pktlen(m)); 622 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 623 } 624 } else { 625 best_cl = NULL; 626 best_m = NULL; 627 best_scale = 0xFFFFFFFFFFFFFFFFLLU; 628 629 for (pri = pif->pif_maxpri; pri >= 0; pri--) { 630 if ((cl = pif->pif_classes[pri]) == NULL) 631 continue; 632 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) 633 continue; 634 m = fairq_pollq(cl, cur_time, &hit_limit); 635 if (m == NULL) { 636 cl->cl_flags &= ~FARF_HAS_PACKETS; 637 continue; 638 } 639 640 /* 641 * We can halt the search immediately if the queue 642 * did not hit its bandwidth limit. 643 */ 644 if (hit_limit == 0) { 645 best_cl = cl; 646 best_m = m; 647 break; 648 } 649 650 /* 651 * Otherwise calculate the scale factor and select 652 * the queue with the lowest scale factor. This 653 * apportions any unused bandwidth weighted by 654 * the relative bandwidth specification. 655 * 656 * scale = (bw / max) with a multiple of 256. 657 * 658 * The calculation is refactored to reduce the 659 * chance of overflow. 660 */ 661 scale = cl->cl_bw_current * 16 / 662 (cl->cl_bandwidth / 16 + 1); 663 if (best_scale > scale) { 664 best_cl = cl; 665 best_m = m; 666 best_scale = scale; 667 } 668 } 669 670 if (op == ALTDQ_POLL) { 671 #ifdef foo 672 /* 673 * Don't use poll cache; the poll/dequeue 674 * model is no longer applicable to SMP 675 * system. e.g. 676 * CPU-A CPU-B 677 * : : 678 * poll : 679 * : poll 680 * dequeue (+) : 681 * 682 * The dequeue at (+) will hit the poll 683 * cache set by CPU-B. 684 */ 685 pif->pif_poll_cache = best_cl; 686 #endif 687 m = best_m; 688 } else if (best_cl) { 689 m = fairq_getq(best_cl, cur_time); 690 KKASSERT(best_m == m); 691 ALTQ_SQ_CNTR_DEC(ifsq, m_pktlen(m)); 692 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 693 } else { 694 m = NULL; 695 } 696 } 697 crit_exit(); 698 return (m); 699 } 700 701 static int 702 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash) 703 { 704 fairq_bucket_t *b; 705 u_int hindex; 706 uint64_t bw; 707 708 /* 709 * If the packet doesn't have any keep state put it on the end of 710 * our queue. XXX this can result in out of order delivery. 711 */ 712 if (hash == 0) { 713 if (cl->cl_head) 714 b = cl->cl_head->prev; 715 else 716 b = &cl->cl_buckets[0]; 717 } else { 718 hindex = hash & cl->cl_nbucket_mask; 719 b = &cl->cl_buckets[hindex]; 720 } 721 722 /* 723 * Add the bucket to the end of the circular list of active buckets. 724 * 725 * As a special case we add the bucket to the beginning of the list 726 * instead of the end if it was not previously on the list and if 727 * its traffic is less then the hog level. 728 */ 729 if (b->in_use == 0) { 730 b->in_use = 1; 731 if (cl->cl_head == NULL) { 732 cl->cl_head = b; 733 cl->cl_advanced = 1; 734 b->next = b; 735 b->prev = b; 736 } else { 737 b->next = cl->cl_head; 738 b->prev = cl->cl_head->prev; 739 b->prev->next = b; 740 b->next->prev = b; 741 742 if (b->bw_delta && cl->cl_hogs_m1) { 743 bw = b->bw_bytes * machclk_freq / b->bw_delta; 744 if (bw < cl->cl_hogs_m1) { 745 cl->cl_head = b; 746 cl->cl_advanced = 1; 747 } 748 } 749 } 750 } 751 752 #ifdef ALTQ_RIO 753 if (cl->cl_qtype == Q_RIO) 754 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); 755 #endif 756 #ifdef ALTQ_RED 757 if (cl->cl_qtype == Q_RED) 758 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); 759 #endif 760 if (qlen(&b->queue) >= qlimit(&b->queue)) { 761 m_freem(m); 762 return (-1); 763 } 764 765 if (cl->cl_flags & FARF_CLEARDSCP) 766 write_dsfield(m, cl->cl_pktattr, 0); 767 768 _addq(&b->queue, m); 769 770 return (0); 771 } 772 773 static struct mbuf * 774 fairq_getq(struct fairq_class *cl, uint64_t cur_time) 775 { 776 fairq_bucket_t *b; 777 struct mbuf *m; 778 779 b = fairq_selectq(cl); 780 if (b == NULL) 781 m = NULL; 782 #ifdef ALTQ_RIO 783 else if (cl->cl_qtype == Q_RIO) 784 m = rio_getq((rio_t *)cl->cl_red, &b->queue); 785 #endif 786 #ifdef ALTQ_RED 787 else if (cl->cl_qtype == Q_RED) 788 m = red_getq(cl->cl_red, &b->queue); 789 #endif 790 else 791 m = _getq(&b->queue); 792 793 /* 794 * Calculate the BW change 795 */ 796 if (m != NULL) { 797 uint64_t delta; 798 799 /* 800 * Per-class bandwidth calculation 801 */ 802 delta = (cur_time - cl->cl_last_time); 803 if (delta > machclk_freq * 8) 804 delta = machclk_freq * 8; 805 cl->cl_bw_delta += delta; 806 cl->cl_bw_bytes += m->m_pkthdr.len; 807 cl->cl_last_time = cur_time; 808 809 /* 810 * Cap delta at ~machclk_freq to avoid overflows. 811 */ 812 if (cl->cl_bw_delta > machclk_freq) { 813 uint64_t f = cl->cl_bw_delta * 32 / machclk_freq; 814 cl->cl_bw_delta = cl->cl_bw_delta * 16 / f; 815 cl->cl_bw_bytes = cl->cl_bw_bytes * 16 / f; 816 } 817 818 /* 819 * Per-bucket bandwidth calculation. 820 */ 821 delta = (cur_time - b->last_time); 822 if (delta > machclk_freq * 8) 823 delta = machclk_freq * 8; 824 b->bw_delta += delta; 825 b->bw_bytes += m->m_pkthdr.len; 826 b->last_time = cur_time; 827 828 /* 829 * Cap bw_delta at ~machclk_freq to avoid overflows. 830 */ 831 if (b->bw_delta > machclk_freq) { 832 uint64_t f = b->bw_delta * 32 / machclk_freq; 833 b->bw_delta = b->bw_delta * 16 / f; 834 b->bw_bytes = b->bw_bytes * 16 / f; 835 } 836 } 837 return(m); 838 } 839 840 /* 841 * Figure out what the next packet would be if there were no limits. If 842 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise 843 * it is set to 0. A non-NULL mbuf is returned either way. 844 */ 845 static struct mbuf * 846 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) 847 { 848 fairq_bucket_t *b; 849 struct mbuf *m; 850 uint64_t delta; 851 uint64_t bw; 852 853 *hit_limit = 0; 854 b = fairq_selectq(cl); 855 if (b == NULL) 856 return(NULL); 857 m = qhead(&b->queue); 858 cl->cl_advanced = 1; /* so next select/get doesn't re-advance */ 859 860 /* 861 * Did this packet exceed the class bandwidth? 862 * 863 * Calculate the bandwidth component of the packet in bytes/sec. 864 * Avoid overflows when machclk_freq is very high. 865 */ 866 delta = cur_time - cl->cl_last_time; 867 if (delta > machclk_freq * 8) 868 delta = machclk_freq * 8; 869 cl->cl_bw_delta += delta; 870 cl->cl_last_time = cur_time; 871 872 if (cl->cl_bw_delta) { 873 bw = (cl->cl_bw_bytes + m->m_pkthdr.len) * 874 machclk_freq / cl->cl_bw_delta; 875 if (bw > cl->cl_bandwidth) 876 *hit_limit = 1; 877 cl->cl_bw_current = bw; 878 #if 0 879 kprintf("BW %6lld relative to %6llu %d queue %p\n", 880 bw, cl->cl_bandwidth, *hit_limit, b); 881 #endif 882 } 883 return(m); 884 } 885 886 /* 887 * Locate the next queue we want to pull a packet out of. This code 888 * is also responsible for removing empty buckets from the circular list. 889 */ 890 static 891 fairq_bucket_t * 892 fairq_selectq(struct fairq_class *cl) 893 { 894 fairq_bucket_t *b; 895 uint64_t bw; 896 897 while ((b = cl->cl_head) != NULL) { 898 /* 899 * Remove empty queues from consideration 900 */ 901 if (qempty(&b->queue)) { 902 b->in_use = 0; 903 cl->cl_head = b->next; 904 cl->cl_advanced = 1; 905 if (cl->cl_head == b) { 906 cl->cl_head = NULL; 907 } else { 908 b->next->prev = b->prev; 909 b->prev->next = b->next; 910 } 911 continue; 912 } 913 914 /* 915 * Advance the round robin. Queues with bandwidths less 916 * then the hog bandwidth are allowed to burst. 917 * 918 * Don't advance twice if the previous head emptied. 919 */ 920 if (cl->cl_advanced) { 921 cl->cl_advanced = 0; 922 break; 923 } 924 if (cl->cl_hogs_m1 == 0) { 925 cl->cl_head = b->next; 926 } else if (b->bw_delta) { 927 bw = b->bw_bytes * machclk_freq / b->bw_delta; 928 if (bw >= cl->cl_hogs_m1) 929 cl->cl_head = b->next; 930 } 931 932 /* 933 * Return the (possibly new) head. 934 */ 935 b = cl->cl_head; 936 break; 937 } 938 return(b); 939 } 940 941 static void 942 fairq_purgeq(struct fairq_class *cl) 943 { 944 fairq_bucket_t *b; 945 struct mbuf *m; 946 947 while ((b = fairq_selectq(cl)) != NULL) { 948 while ((m = _getq(&b->queue)) != NULL) { 949 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); 950 m_freem(m); 951 } 952 KKASSERT(qlen(&b->queue) == 0); 953 } 954 } 955 956 static void 957 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) 958 { 959 fairq_bucket_t *b; 960 961 sp->class_handle = cl->cl_handle; 962 sp->qlimit = cl->cl_qlimit; 963 sp->xmit_cnt = cl->cl_xmitcnt; 964 sp->drop_cnt = cl->cl_dropcnt; 965 sp->qtype = cl->cl_qtype; 966 sp->qlength = 0; 967 968 if (cl->cl_head) { 969 b = cl->cl_head; 970 do { 971 sp->qlength += qlen(&b->queue); 972 b = b->next; 973 } while (b != cl->cl_head); 974 } 975 976 #ifdef ALTQ_RED 977 if (cl->cl_qtype == Q_RED) 978 red_getstats(cl->cl_red, &sp->red[0]); 979 #endif 980 #ifdef ALTQ_RIO 981 if (cl->cl_qtype == Q_RIO) 982 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); 983 #endif 984 } 985 986 /* convert a class handle to the corresponding class pointer */ 987 static struct fairq_class * 988 clh_to_clp(struct fairq_if *pif, uint32_t chandle) 989 { 990 struct fairq_class *cl; 991 int idx; 992 993 if (chandle == 0) 994 return (NULL); 995 996 for (idx = pif->pif_maxpri; idx >= 0; idx--) 997 if ((cl = pif->pif_classes[idx]) != NULL && 998 cl->cl_handle == chandle) 999 return (cl); 1000 1001 return (NULL); 1002 } 1003 1004 #endif /* ALTQ_FAIRQ */ 1005