1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $ 35 */ 36 /* 37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build 38 * fairq. The fairq algorithm is completely different then priq, of course, 39 * but because I used priq's skeleton I believe I should include priq's 40 * copyright. 41 * 42 * Copyright (C) 2000-2003 43 * Sony Computer Science Laboratories Inc. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * FAIRQ - take traffic classified by keep state (hashed into 69 * pf->state_hash) and bucketize it. Fairly extract 70 * the first packet from each bucket in a round-robin fashion. 71 * 72 * TODO - better overall qlimit support (right now it is per-bucket). 73 * - NOTE: red etc is per bucket, not overall. 74 * - better service curve support. 75 * 76 * EXAMPLE: 77 * 78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk } 79 * queue std priority 3 bandwidth 200Kb \ 80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50 81 * queue bulk priority 2 bandwidth 100Kb \ 82 * fairq (buckets 64, hogs 1Kb) qlimit 50 83 * 84 * NOTE: When the aggregate bandwidth is less than the link bandwidth 85 * any remaining bandwidth is dynamically assigned using the 86 * existing bandwidth specs as weightings. 87 * 88 * pass out on em0 from any to any keep state queue std 89 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk 90 */ 91 #include "opt_altq.h" 92 #include "opt_inet.h" 93 #include "opt_inet6.h" 94 95 #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ 96 97 #include <sys/param.h> 98 #include <sys/malloc.h> 99 #include <sys/mbuf.h> 100 #include <sys/socket.h> 101 #include <sys/sockio.h> 102 #include <sys/systm.h> 103 #include <sys/proc.h> 104 #include <sys/errno.h> 105 #include <sys/kernel.h> 106 #include <sys/queue.h> 107 #include <sys/thread.h> 108 109 #include <net/if.h> 110 #include <net/ifq_var.h> 111 #include <netinet/in.h> 112 113 #include <net/pf/pfvar.h> 114 #include <net/altq/altq.h> 115 #include <net/altq/altq_fairq.h> 116 117 #include <sys/thread2.h> 118 119 /* 120 * function prototypes 121 */ 122 static int fairq_clear_interface(struct fairq_if *); 123 static int fairq_request(struct ifaltq *, int, void *); 124 static void fairq_purge(struct fairq_if *); 125 static struct fairq_class *fairq_class_create(struct fairq_if *, int, 126 int, u_int, struct fairq_opts *, int); 127 static int fairq_class_destroy(struct fairq_class *); 128 static int fairq_enqueue(struct ifaltq *, struct mbuf *, 129 struct altq_pktattr *); 130 static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int); 131 132 static int fairq_addq(struct fairq_class *, struct mbuf *, int hash); 133 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); 134 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); 135 static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); 136 static void fairq_purgeq(struct fairq_class *); 137 138 static void get_class_stats(struct fairq_classstats *, 139 struct fairq_class *); 140 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); 141 142 int 143 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq) 144 { 145 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, 146 fairq_enqueue, fairq_dequeue, 147 fairq_request, NULL, NULL); 148 } 149 150 int 151 fairq_add_altq(struct pf_altq *a) 152 { 153 struct fairq_if *pif; 154 struct ifnet *ifp; 155 156 if ((ifp = ifunit(a->ifname)) == NULL) 157 return (EINVAL); 158 if (!ifq_is_ready(&ifp->if_snd)) 159 return (ENODEV); 160 161 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO); 162 pif->pif_bandwidth = a->ifbandwidth; 163 pif->pif_maxpri = -1; 164 pif->pif_ifq = &ifp->if_snd; 165 ifq_purge(&ifp->if_snd); 166 167 /* keep the state in pf_altq */ 168 a->altq_disc = pif; 169 170 return (0); 171 } 172 173 int 174 fairq_remove_altq(struct pf_altq *a) 175 { 176 struct fairq_if *pif; 177 178 if ((pif = a->altq_disc) == NULL) 179 return (EINVAL); 180 a->altq_disc = NULL; 181 182 fairq_clear_interface(pif); 183 184 kfree(pif, M_ALTQ); 185 return (0); 186 } 187 188 static int 189 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif) 190 { 191 struct fairq_class *cl; 192 193 KKASSERT(a->priority < FAIRQ_MAXPRI); 194 KKASSERT(a->qid != 0); 195 196 if (pif->pif_classes[a->priority] != NULL) 197 return (EBUSY); 198 if (clh_to_clp(pif, a->qid) != NULL) 199 return (EBUSY); 200 201 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, 202 &a->pq_u.fairq_opts, a->qid); 203 if (cl == NULL) 204 return (ENOMEM); 205 206 return (0); 207 } 208 209 int 210 fairq_add_queue(struct pf_altq *a) 211 { 212 struct fairq_if *pif; 213 struct ifaltq *ifq; 214 int error; 215 216 /* check parameters */ 217 if (a->priority >= FAIRQ_MAXPRI) 218 return (EINVAL); 219 if (a->qid == 0) 220 return (EINVAL); 221 222 /* XXX not MP safe */ 223 if ((pif = a->altq_disc) == NULL) 224 return (EINVAL); 225 ifq = pif->pif_ifq; 226 227 ALTQ_LOCK(ifq); 228 error = fairq_add_queue_locked(a, pif); 229 ALTQ_UNLOCK(ifq); 230 231 return error; 232 } 233 234 static int 235 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif) 236 { 237 struct fairq_class *cl; 238 239 if ((cl = clh_to_clp(pif, a->qid)) == NULL) 240 return (EINVAL); 241 242 return (fairq_class_destroy(cl)); 243 } 244 245 int 246 fairq_remove_queue(struct pf_altq *a) 247 { 248 struct fairq_if *pif; 249 struct ifaltq *ifq; 250 int error; 251 252 /* XXX not MP safe */ 253 if ((pif = a->altq_disc) == NULL) 254 return (EINVAL); 255 ifq = pif->pif_ifq; 256 257 ALTQ_LOCK(ifq); 258 error = fairq_remove_queue_locked(a, pif); 259 ALTQ_UNLOCK(ifq); 260 261 return error; 262 } 263 264 int 265 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 266 { 267 struct fairq_if *pif; 268 struct fairq_class *cl; 269 struct fairq_classstats stats; 270 struct ifaltq *ifq; 271 int error = 0; 272 273 if (*nbytes < sizeof(stats)) 274 return (EINVAL); 275 276 /* XXX not MP safe */ 277 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) 278 return (EBADF); 279 ifq = pif->pif_ifq; 280 281 ALTQ_LOCK(ifq); 282 283 if ((cl = clh_to_clp(pif, a->qid)) == NULL) { 284 ALTQ_UNLOCK(ifq); 285 return (EINVAL); 286 } 287 288 get_class_stats(&stats, cl); 289 290 ALTQ_UNLOCK(ifq); 291 292 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 293 return (error); 294 *nbytes = sizeof(stats); 295 return (0); 296 } 297 298 /* 299 * bring the interface back to the initial state by discarding 300 * all the filters and classes. 301 */ 302 static int 303 fairq_clear_interface(struct fairq_if *pif) 304 { 305 struct fairq_class *cl; 306 int pri; 307 308 /* clear out the classes */ 309 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 310 if ((cl = pif->pif_classes[pri]) != NULL) 311 fairq_class_destroy(cl); 312 } 313 314 return (0); 315 } 316 317 static int 318 fairq_request(struct ifaltq *ifq, int req, void *arg) 319 { 320 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 321 322 crit_enter(); 323 switch (req) { 324 case ALTRQ_PURGE: 325 fairq_purge(pif); 326 break; 327 } 328 crit_exit(); 329 return (0); 330 } 331 332 /* discard all the queued packets on the interface */ 333 static void 334 fairq_purge(struct fairq_if *pif) 335 { 336 struct fairq_class *cl; 337 int pri; 338 339 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 340 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) 341 fairq_purgeq(cl); 342 } 343 if (ifq_is_enabled(pif->pif_ifq)) 344 pif->pif_ifq->ifq_len = 0; 345 } 346 347 static struct fairq_class * 348 fairq_class_create(struct fairq_if *pif, int pri, int qlimit, 349 u_int bandwidth, struct fairq_opts *opts, int qid) 350 { 351 struct fairq_class *cl; 352 int flags = opts->flags; 353 u_int nbuckets = opts->nbuckets; 354 int i; 355 356 #ifndef ALTQ_RED 357 if (flags & FARF_RED) { 358 #ifdef ALTQ_DEBUG 359 kprintf("fairq_class_create: RED not configured for FAIRQ!\n"); 360 #endif 361 return (NULL); 362 } 363 #endif 364 if (nbuckets == 0) 365 nbuckets = 256; 366 if (nbuckets > FAIRQ_MAX_BUCKETS) 367 nbuckets = FAIRQ_MAX_BUCKETS; 368 /* enforce power-of-2 size */ 369 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) 370 ++nbuckets; 371 372 if ((cl = pif->pif_classes[pri]) != NULL) { 373 /* modify the class instead of creating a new one */ 374 crit_enter(); 375 if (cl->cl_head) 376 fairq_purgeq(cl); 377 crit_exit(); 378 #ifdef ALTQ_RIO 379 if (cl->cl_qtype == Q_RIO) 380 rio_destroy((rio_t *)cl->cl_red); 381 #endif 382 #ifdef ALTQ_RED 383 if (cl->cl_qtype == Q_RED) 384 red_destroy(cl->cl_red); 385 #endif 386 } else { 387 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO); 388 cl->cl_nbuckets = nbuckets; 389 cl->cl_nbucket_mask = nbuckets - 1; 390 391 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) * 392 cl->cl_nbuckets, 393 M_ALTQ, M_WAITOK | M_ZERO); 394 cl->cl_head = NULL; 395 } 396 397 pif->pif_classes[pri] = cl; 398 if (flags & FARF_DEFAULTCLASS) 399 pif->pif_default = cl; 400 if (qlimit == 0) 401 qlimit = 50; /* use default */ 402 cl->cl_qlimit = qlimit; 403 for (i = 0; i < cl->cl_nbuckets; ++i) { 404 qlimit(&cl->cl_buckets[i].queue) = qlimit; 405 } 406 cl->cl_bandwidth = bandwidth / 8; /* cvt to bytes per second */ 407 cl->cl_qtype = Q_DROPTAIL; 408 cl->cl_flags = flags & FARF_USERFLAGS; 409 cl->cl_pri = pri; 410 if (pri > pif->pif_maxpri) 411 pif->pif_maxpri = pri; 412 cl->cl_pif = pif; 413 cl->cl_handle = qid; 414 cl->cl_hogs_m1 = opts->hogs_m1 / 8; 415 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ 416 cl->cl_bw_current = 0; 417 418 #ifdef ALTQ_RED 419 if (flags & (FARF_RED|FARF_RIO)) { 420 int red_flags, red_pkttime; 421 422 red_flags = 0; 423 if (flags & FARF_ECN) 424 red_flags |= REDF_ECN; 425 #ifdef ALTQ_RIO 426 if (flags & FARF_CLEARDSCP) 427 red_flags |= RIOF_CLEARDSCP; 428 #endif 429 if (pif->pif_bandwidth < 8) 430 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ 431 else 432 red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu 433 * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); 434 #ifdef ALTQ_RIO 435 if (flags & FARF_RIO) { 436 cl->cl_red = (red_t *)rio_alloc(0, NULL, 437 red_flags, red_pkttime); 438 if (cl->cl_red != NULL) 439 cl->cl_qtype = Q_RIO; 440 } else 441 #endif 442 if (flags & FARF_RED) { 443 cl->cl_red = red_alloc(0, 0, 444 cl->cl_qlimit * 10/100, 445 cl->cl_qlimit * 30/100, 446 red_flags, red_pkttime); 447 if (cl->cl_red != NULL) 448 cl->cl_qtype = Q_RED; 449 } 450 } 451 #endif /* ALTQ_RED */ 452 453 return (cl); 454 } 455 456 static int 457 fairq_class_destroy(struct fairq_class *cl) 458 { 459 struct fairq_if *pif; 460 int pri; 461 462 crit_enter(); 463 464 if (cl->cl_head) 465 fairq_purgeq(cl); 466 467 pif = cl->cl_pif; 468 pif->pif_classes[cl->cl_pri] = NULL; 469 if (pif->pif_poll_cache == cl) 470 pif->pif_poll_cache = NULL; 471 if (pif->pif_maxpri == cl->cl_pri) { 472 for (pri = cl->cl_pri; pri >= 0; pri--) 473 if (pif->pif_classes[pri] != NULL) { 474 pif->pif_maxpri = pri; 475 break; 476 } 477 if (pri < 0) 478 pif->pif_maxpri = -1; 479 } 480 crit_exit(); 481 482 if (cl->cl_red != NULL) { 483 #ifdef ALTQ_RIO 484 if (cl->cl_qtype == Q_RIO) 485 rio_destroy((rio_t *)cl->cl_red); 486 #endif 487 #ifdef ALTQ_RED 488 if (cl->cl_qtype == Q_RED) 489 red_destroy(cl->cl_red); 490 #endif 491 } 492 kfree(cl->cl_buckets, M_ALTQ); 493 cl->cl_head = NULL; /* sanity */ 494 cl->cl_polled = NULL; /* sanity */ 495 cl->cl_buckets = NULL; /* sanity */ 496 kfree(cl, M_ALTQ); 497 498 return (0); 499 } 500 501 /* 502 * fairq_enqueue is an enqueue function to be registered to 503 * (*altq_enqueue) in struct ifaltq. 504 */ 505 static int 506 fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 507 { 508 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 509 struct fairq_class *cl; 510 int error; 511 int len; 512 int hash; 513 514 crit_enter(); 515 516 /* grab class set by classifier */ 517 if ((m->m_flags & M_PKTHDR) == 0) { 518 /* should not happen */ 519 if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n"); 520 m_freem(m); 521 error = ENOBUFS; 522 goto done; 523 } 524 525 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) { 526 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid); 527 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED) 528 hash = (int)m->m_pkthdr.pf.state_hash; 529 else 530 hash = 0; 531 } else { 532 cl = NULL; 533 hash = 0; 534 } 535 if (cl == NULL) { 536 cl = pif->pif_default; 537 if (cl == NULL) { 538 m_freem(m); 539 error = ENOBUFS; 540 goto done; 541 } 542 } 543 cl->cl_flags |= FARF_HAS_PACKETS; 544 cl->cl_pktattr = NULL; 545 len = m_pktlen(m); 546 if (fairq_addq(cl, m, hash) != 0) { 547 /* drop occurred. mbuf was freed in fairq_addq. */ 548 PKTCNTR_ADD(&cl->cl_dropcnt, len); 549 error = ENOBUFS; 550 goto done; 551 } 552 ifq->ifq_len++; 553 error = 0; 554 done: 555 crit_exit(); 556 return (error); 557 } 558 559 /* 560 * fairq_dequeue is a dequeue function to be registered to 561 * (*altq_dequeue) in struct ifaltq. 562 * 563 * note: ALTDQ_POLL returns the next packet without removing the packet 564 * from the queue. ALTDQ_REMOVE is a normal dequeue operation. 565 * ALTDQ_REMOVE must return the same packet if called immediately 566 * after ALTDQ_POLL. 567 */ 568 static struct mbuf * 569 fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op) 570 { 571 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 572 struct fairq_class *cl; 573 struct fairq_class *best_cl; 574 struct mbuf *best_m; 575 struct mbuf *m; 576 uint64_t cur_time = read_machclk(); 577 u_int best_scale; 578 u_int scale; 579 int pri; 580 int hit_limit; 581 582 if (ifq_is_empty(ifq)) { 583 /* no packet in the queue */ 584 KKASSERT(mpolled == NULL); 585 return (NULL); 586 } 587 588 crit_enter(); 589 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { 590 best_cl = pif->pif_poll_cache; 591 m = fairq_getq(best_cl, cur_time); 592 pif->pif_poll_cache = NULL; 593 if (m) { 594 ifq->ifq_len--; 595 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 596 } 597 } else { 598 best_cl = NULL; 599 best_m = NULL; 600 best_scale = 0xFFFFFFFFU; 601 602 for (pri = pif->pif_maxpri; pri >= 0; pri--) { 603 if ((cl = pif->pif_classes[pri]) == NULL) 604 continue; 605 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) 606 continue; 607 m = fairq_pollq(cl, cur_time, &hit_limit); 608 if (m == NULL) { 609 cl->cl_flags &= ~FARF_HAS_PACKETS; 610 continue; 611 } 612 613 /* 614 * We can halt the search immediately if the queue 615 * did not hit its bandwidth limit. 616 */ 617 if (hit_limit == 0) { 618 best_cl = cl; 619 best_m = m; 620 break; 621 } 622 623 /* 624 * Otherwise calculate the scale factor and select 625 * the queue with the lowest scale factor. This 626 * apportions any unused bandwidth weighted by 627 * the relative bandwidth specification. 628 */ 629 scale = cl->cl_bw_current * 100 / cl->cl_bandwidth; 630 if (scale < best_scale) { 631 best_cl = cl; 632 best_m = m; 633 best_scale = scale; 634 } 635 } 636 637 if (op == ALTDQ_POLL) { 638 pif->pif_poll_cache = best_cl; 639 m = best_m; 640 } else if (best_cl) { 641 m = fairq_getq(best_cl, cur_time); 642 KKASSERT(best_m == m); 643 ifq->ifq_len--; 644 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 645 } else { 646 m = NULL; 647 } 648 } 649 crit_exit(); 650 KKASSERT(mpolled == NULL || mpolled == m); 651 return (m); 652 } 653 654 static int 655 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash) 656 { 657 fairq_bucket_t *b; 658 u_int hindex; 659 uint64_t bw; 660 661 /* 662 * If the packet doesn't have any keep state put it on the end of 663 * our queue. XXX this can result in out of order delivery. 664 */ 665 if (hash == 0) { 666 if (cl->cl_head) 667 b = cl->cl_head->prev; 668 else 669 b = &cl->cl_buckets[0]; 670 } else { 671 hindex = hash & cl->cl_nbucket_mask; 672 b = &cl->cl_buckets[hindex]; 673 } 674 675 /* 676 * Add the bucket to the end of the circular list of active buckets. 677 * 678 * As a special case we add the bucket to the beginning of the list 679 * instead of the end if it was not previously on the list and if 680 * its traffic is less then the hog level. 681 */ 682 if (b->in_use == 0) { 683 b->in_use = 1; 684 if (cl->cl_head == NULL) { 685 cl->cl_head = b; 686 b->next = b; 687 b->prev = b; 688 } else { 689 b->next = cl->cl_head; 690 b->prev = cl->cl_head->prev; 691 b->prev->next = b; 692 b->next->prev = b; 693 694 if (b->bw_delta && cl->cl_hogs_m1) { 695 bw = b->bw_bytes * machclk_freq / b->bw_delta; 696 if (bw < cl->cl_hogs_m1) 697 cl->cl_head = b; 698 } 699 } 700 } 701 702 #ifdef ALTQ_RIO 703 if (cl->cl_qtype == Q_RIO) 704 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); 705 #endif 706 #ifdef ALTQ_RED 707 if (cl->cl_qtype == Q_RED) 708 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); 709 #endif 710 if (qlen(&b->queue) >= qlimit(&b->queue)) { 711 m_freem(m); 712 return (-1); 713 } 714 715 if (cl->cl_flags & FARF_CLEARDSCP) 716 write_dsfield(m, cl->cl_pktattr, 0); 717 718 _addq(&b->queue, m); 719 720 return (0); 721 } 722 723 static struct mbuf * 724 fairq_getq(struct fairq_class *cl, uint64_t cur_time) 725 { 726 fairq_bucket_t *b; 727 struct mbuf *m; 728 729 b = fairq_selectq(cl, 0); 730 if (b == NULL) 731 m = NULL; 732 #ifdef ALTQ_RIO 733 else if (cl->cl_qtype == Q_RIO) 734 m = rio_getq((rio_t *)cl->cl_red, &b->queue); 735 #endif 736 #ifdef ALTQ_RED 737 else if (cl->cl_qtype == Q_RED) 738 m = red_getq(cl->cl_red, &b->queue); 739 #endif 740 else 741 m = _getq(&b->queue); 742 743 /* 744 * Calculate the BW change 745 */ 746 if (m != NULL) { 747 uint64_t delta; 748 749 /* 750 * Per-class bandwidth calculation 751 */ 752 delta = (cur_time - cl->cl_last_time); 753 if (delta > machclk_freq * 8) 754 delta = machclk_freq * 8; 755 cl->cl_bw_delta += delta; 756 cl->cl_bw_bytes += m->m_pkthdr.len; 757 cl->cl_last_time = cur_time; 758 if (cl->cl_bw_delta > machclk_freq) { 759 cl->cl_bw_delta -= cl->cl_bw_delta >> 2; 760 cl->cl_bw_bytes -= cl->cl_bw_bytes >> 2; 761 } 762 763 /* 764 * Per-bucket bandwidth calculation 765 */ 766 delta = (cur_time - b->last_time); 767 if (delta > machclk_freq * 8) 768 delta = machclk_freq * 8; 769 b->bw_delta += delta; 770 b->bw_bytes += m->m_pkthdr.len; 771 b->last_time = cur_time; 772 if (b->bw_delta > machclk_freq) { 773 b->bw_delta -= b->bw_delta >> 2; 774 b->bw_bytes -= b->bw_bytes >> 2; 775 } 776 } 777 return(m); 778 } 779 780 /* 781 * Figure out what the next packet would be if there were no limits. If 782 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise 783 * it is set to 0. A non-NULL mbuf is returned either way. 784 */ 785 static struct mbuf * 786 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) 787 { 788 fairq_bucket_t *b; 789 struct mbuf *m; 790 uint64_t delta; 791 uint64_t bw; 792 793 *hit_limit = 0; 794 b = fairq_selectq(cl, 1); 795 if (b == NULL) 796 return(NULL); 797 m = qhead(&b->queue); 798 799 /* 800 * Did this packet exceed the class bandwidth? Calculate the 801 * bandwidth component of the packet. 802 * 803 * - Calculate bytes per second 804 */ 805 delta = cur_time - cl->cl_last_time; 806 if (delta > machclk_freq * 8) 807 delta = machclk_freq * 8; 808 cl->cl_bw_delta += delta; 809 cl->cl_last_time = cur_time; 810 if (cl->cl_bw_delta) { 811 bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; 812 813 if (bw > cl->cl_bandwidth) 814 *hit_limit = 1; 815 cl->cl_bw_current = bw; 816 #if 0 817 kprintf("BW %6lld relative to %6u %d queue %p\n", 818 bw, cl->cl_bandwidth, *hit_limit, b); 819 #endif 820 } 821 return(m); 822 } 823 824 /* 825 * Locate the next queue we want to pull a packet out of. This code 826 * is also responsible for removing empty buckets from the circular list. 827 */ 828 static 829 fairq_bucket_t * 830 fairq_selectq(struct fairq_class *cl, int ispoll) 831 { 832 fairq_bucket_t *b; 833 uint64_t bw; 834 835 if (ispoll == 0 && cl->cl_polled) { 836 b = cl->cl_polled; 837 cl->cl_polled = NULL; 838 return(b); 839 } 840 841 while ((b = cl->cl_head) != NULL) { 842 /* 843 * Remove empty queues from consideration 844 */ 845 if (qempty(&b->queue)) { 846 b->in_use = 0; 847 cl->cl_head = b->next; 848 if (cl->cl_head == b) { 849 cl->cl_head = NULL; 850 } else { 851 b->next->prev = b->prev; 852 b->prev->next = b->next; 853 } 854 continue; 855 } 856 857 /* 858 * Advance the round robin. Queues with bandwidths less 859 * then the hog bandwidth are allowed to burst. 860 */ 861 if (cl->cl_hogs_m1 == 0) { 862 cl->cl_head = b->next; 863 } else if (b->bw_delta) { 864 bw = b->bw_bytes * machclk_freq / b->bw_delta; 865 if (bw >= cl->cl_hogs_m1) { 866 cl->cl_head = b->next; 867 } 868 /* 869 * XXX TODO - 870 */ 871 } 872 873 /* 874 * Return bucket b. 875 */ 876 break; 877 } 878 if (ispoll) 879 cl->cl_polled = b; 880 return(b); 881 } 882 883 static void 884 fairq_purgeq(struct fairq_class *cl) 885 { 886 fairq_bucket_t *b; 887 struct mbuf *m; 888 889 while ((b = fairq_selectq(cl, 0)) != NULL) { 890 while ((m = _getq(&b->queue)) != NULL) { 891 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); 892 m_freem(m); 893 } 894 KKASSERT(qlen(&b->queue) == 0); 895 } 896 } 897 898 static void 899 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) 900 { 901 fairq_bucket_t *b; 902 903 sp->class_handle = cl->cl_handle; 904 sp->qlimit = cl->cl_qlimit; 905 sp->xmit_cnt = cl->cl_xmitcnt; 906 sp->drop_cnt = cl->cl_dropcnt; 907 sp->qtype = cl->cl_qtype; 908 sp->qlength = 0; 909 910 if (cl->cl_head) { 911 b = cl->cl_head; 912 do { 913 sp->qlength += qlen(&b->queue); 914 b = b->next; 915 } while (b != cl->cl_head); 916 } 917 918 #ifdef ALTQ_RED 919 if (cl->cl_qtype == Q_RED) 920 red_getstats(cl->cl_red, &sp->red[0]); 921 #endif 922 #ifdef ALTQ_RIO 923 if (cl->cl_qtype == Q_RIO) 924 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); 925 #endif 926 } 927 928 /* convert a class handle to the corresponding class pointer */ 929 static struct fairq_class * 930 clh_to_clp(struct fairq_if *pif, uint32_t chandle) 931 { 932 struct fairq_class *cl; 933 int idx; 934 935 if (chandle == 0) 936 return (NULL); 937 938 for (idx = pif->pif_maxpri; idx >= 0; idx--) 939 if ((cl = pif->pif_classes[idx]) != NULL && 940 cl->cl_handle == chandle) 941 return (cl); 942 943 return (NULL); 944 } 945 946 #endif /* ALTQ_FAIRQ */ 947