1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $ 35 */ 36 /* 37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build 38 * fairq. The fairq algorithm is completely different then priq, of course, 39 * but because I used priq's skeleton I believe I should include priq's 40 * copyright. 41 * 42 * Copyright (C) 2000-2003 43 * Sony Computer Science Laboratories Inc. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * FAIRQ - take traffic classified by keep state (hashed into 69 * pf->state_hash) and bucketize it. Fairly extract 70 * the first packet from each bucket in a round-robin fashion. 71 * 72 * TODO - better overall qlimit support (right now it is per-bucket). 73 * - NOTE: red etc is per bucket, not overall. 74 * - better service curve support. 75 * 76 * EXAMPLE: 77 * 78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk } 79 * queue std priority 3 bandwidth 400Kb \ 80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50 81 * queue bulk priority 2 bandwidth 100Kb \ 82 * fairq (buckets 64, hogs 1Kb) qlimit 50 83 * 84 * pass out on em0 from any to any keep state queue std 85 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk 86 */ 87 #include "opt_altq.h" 88 #include "opt_inet.h" 89 #include "opt_inet6.h" 90 91 #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ 92 93 #include <sys/param.h> 94 #include <sys/malloc.h> 95 #include <sys/mbuf.h> 96 #include <sys/socket.h> 97 #include <sys/sockio.h> 98 #include <sys/systm.h> 99 #include <sys/proc.h> 100 #include <sys/errno.h> 101 #include <sys/kernel.h> 102 #include <sys/queue.h> 103 #include <sys/thread.h> 104 105 #include <net/if.h> 106 #include <net/ifq_var.h> 107 #include <netinet/in.h> 108 109 #include <net/pf/pfvar.h> 110 #include <net/altq/altq.h> 111 #include <net/altq/altq_fairq.h> 112 113 #include <sys/thread2.h> 114 115 /* 116 * function prototypes 117 */ 118 static int fairq_clear_interface(struct fairq_if *); 119 static int fairq_request(struct ifaltq *, int, void *); 120 static void fairq_purge(struct fairq_if *); 121 static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); 122 static int fairq_class_destroy(struct fairq_class *); 123 static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 124 static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int); 125 126 static int fairq_addq(struct fairq_class *, struct mbuf *, int hash); 127 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); 128 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); 129 static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); 130 static void fairq_purgeq(struct fairq_class *); 131 132 static void get_class_stats(struct fairq_classstats *, struct fairq_class *); 133 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); 134 135 int 136 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq) 137 { 138 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, 139 fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); 140 } 141 142 int 143 fairq_add_altq(struct pf_altq *a) 144 { 145 struct fairq_if *pif; 146 struct ifnet *ifp; 147 148 if ((ifp = ifunit(a->ifname)) == NULL) 149 return (EINVAL); 150 if (!ifq_is_ready(&ifp->if_snd)) 151 return (ENODEV); 152 153 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO); 154 pif->pif_bandwidth = a->ifbandwidth; 155 pif->pif_maxpri = -1; 156 pif->pif_ifq = &ifp->if_snd; 157 ifq_purge(&ifp->if_snd); 158 159 /* keep the state in pf_altq */ 160 a->altq_disc = pif; 161 162 return (0); 163 } 164 165 int 166 fairq_remove_altq(struct pf_altq *a) 167 { 168 struct fairq_if *pif; 169 170 if ((pif = a->altq_disc) == NULL) 171 return (EINVAL); 172 a->altq_disc = NULL; 173 174 fairq_clear_interface(pif); 175 176 kfree(pif, M_ALTQ); 177 return (0); 178 } 179 180 static int 181 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif) 182 { 183 struct fairq_class *cl; 184 185 KKASSERT(a->priority < FAIRQ_MAXPRI); 186 KKASSERT(a->qid != 0); 187 188 if (pif->pif_classes[a->priority] != NULL) 189 return (EBUSY); 190 if (clh_to_clp(pif, a->qid) != NULL) 191 return (EBUSY); 192 193 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, 194 &a->pq_u.fairq_opts, a->qid); 195 if (cl == NULL) 196 return (ENOMEM); 197 198 return (0); 199 } 200 201 int 202 fairq_add_queue(struct pf_altq *a) 203 { 204 struct fairq_if *pif; 205 struct ifaltq *ifq; 206 int error; 207 208 /* check parameters */ 209 if (a->priority >= FAIRQ_MAXPRI) 210 return (EINVAL); 211 if (a->qid == 0) 212 return (EINVAL); 213 214 /* XXX not MP safe */ 215 if ((pif = a->altq_disc) == NULL) 216 return (EINVAL); 217 ifq = pif->pif_ifq; 218 219 ALTQ_LOCK(ifq); 220 error = fairq_add_queue_locked(a, pif); 221 ALTQ_UNLOCK(ifq); 222 223 return error; 224 } 225 226 static int 227 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif) 228 { 229 struct fairq_class *cl; 230 231 if ((cl = clh_to_clp(pif, a->qid)) == NULL) 232 return (EINVAL); 233 234 return (fairq_class_destroy(cl)); 235 } 236 237 int 238 fairq_remove_queue(struct pf_altq *a) 239 { 240 struct fairq_if *pif; 241 struct ifaltq *ifq; 242 int error; 243 244 /* XXX not MP safe */ 245 if ((pif = a->altq_disc) == NULL) 246 return (EINVAL); 247 ifq = pif->pif_ifq; 248 249 ALTQ_LOCK(ifq); 250 error = fairq_remove_queue_locked(a, pif); 251 ALTQ_UNLOCK(ifq); 252 253 return error; 254 } 255 256 int 257 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 258 { 259 struct fairq_if *pif; 260 struct fairq_class *cl; 261 struct fairq_classstats stats; 262 struct ifaltq *ifq; 263 int error = 0; 264 265 if (*nbytes < sizeof(stats)) 266 return (EINVAL); 267 268 /* XXX not MP safe */ 269 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) 270 return (EBADF); 271 ifq = pif->pif_ifq; 272 273 ALTQ_LOCK(ifq); 274 275 if ((cl = clh_to_clp(pif, a->qid)) == NULL) { 276 ALTQ_UNLOCK(ifq); 277 return (EINVAL); 278 } 279 280 get_class_stats(&stats, cl); 281 282 ALTQ_UNLOCK(ifq); 283 284 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 285 return (error); 286 *nbytes = sizeof(stats); 287 return (0); 288 } 289 290 /* 291 * bring the interface back to the initial state by discarding 292 * all the filters and classes. 293 */ 294 static int 295 fairq_clear_interface(struct fairq_if *pif) 296 { 297 struct fairq_class *cl; 298 int pri; 299 300 /* clear out the classes */ 301 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 302 if ((cl = pif->pif_classes[pri]) != NULL) 303 fairq_class_destroy(cl); 304 } 305 306 return (0); 307 } 308 309 static int 310 fairq_request(struct ifaltq *ifq, int req, void *arg) 311 { 312 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 313 314 crit_enter(); 315 switch (req) { 316 case ALTRQ_PURGE: 317 fairq_purge(pif); 318 break; 319 } 320 crit_exit(); 321 return (0); 322 } 323 324 /* discard all the queued packets on the interface */ 325 static void 326 fairq_purge(struct fairq_if *pif) 327 { 328 struct fairq_class *cl; 329 int pri; 330 331 for (pri = 0; pri <= pif->pif_maxpri; pri++) { 332 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) 333 fairq_purgeq(cl); 334 } 335 if (ifq_is_enabled(pif->pif_ifq)) 336 pif->pif_ifq->ifq_len = 0; 337 } 338 339 static struct fairq_class * 340 fairq_class_create(struct fairq_if *pif, int pri, int qlimit, 341 u_int bandwidth, struct fairq_opts *opts, int qid) 342 { 343 struct fairq_class *cl; 344 int flags = opts->flags; 345 u_int nbuckets = opts->nbuckets; 346 int i; 347 348 #ifndef ALTQ_RED 349 if (flags & FARF_RED) { 350 #ifdef ALTQ_DEBUG 351 kprintf("fairq_class_create: RED not configured for FAIRQ!\n"); 352 #endif 353 return (NULL); 354 } 355 #endif 356 if (nbuckets == 0) 357 nbuckets = 256; 358 if (nbuckets > FAIRQ_MAX_BUCKETS) 359 nbuckets = FAIRQ_MAX_BUCKETS; 360 /* enforce power-of-2 size */ 361 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) 362 ++nbuckets; 363 364 if ((cl = pif->pif_classes[pri]) != NULL) { 365 /* modify the class instead of creating a new one */ 366 crit_enter(); 367 if (cl->cl_head) 368 fairq_purgeq(cl); 369 crit_exit(); 370 #ifdef ALTQ_RIO 371 if (cl->cl_qtype == Q_RIO) 372 rio_destroy((rio_t *)cl->cl_red); 373 #endif 374 #ifdef ALTQ_RED 375 if (cl->cl_qtype == Q_RED) 376 red_destroy(cl->cl_red); 377 #endif 378 } else { 379 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO); 380 cl->cl_nbuckets = nbuckets; 381 cl->cl_nbucket_mask = nbuckets - 1; 382 383 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) * 384 cl->cl_nbuckets, 385 M_ALTQ, M_WAITOK | M_ZERO); 386 cl->cl_head = NULL; 387 } 388 389 pif->pif_classes[pri] = cl; 390 if (flags & FARF_DEFAULTCLASS) 391 pif->pif_default = cl; 392 if (qlimit == 0) 393 qlimit = 50; /* use default */ 394 cl->cl_qlimit = qlimit; 395 for (i = 0; i < cl->cl_nbuckets; ++i) { 396 qlimit(&cl->cl_buckets[i].queue) = qlimit; 397 } 398 cl->cl_bandwidth = bandwidth / 8; 399 cl->cl_qtype = Q_DROPTAIL; 400 cl->cl_flags = flags & FARF_USERFLAGS; 401 cl->cl_pri = pri; 402 if (pri > pif->pif_maxpri) 403 pif->pif_maxpri = pri; 404 cl->cl_pif = pif; 405 cl->cl_handle = qid; 406 cl->cl_hogs_m1 = opts->hogs_m1 / 8; 407 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ 408 409 #ifdef ALTQ_RED 410 if (flags & (FARF_RED|FARF_RIO)) { 411 int red_flags, red_pkttime; 412 413 red_flags = 0; 414 if (flags & FARF_ECN) 415 red_flags |= REDF_ECN; 416 #ifdef ALTQ_RIO 417 if (flags & FARF_CLEARDSCP) 418 red_flags |= RIOF_CLEARDSCP; 419 #endif 420 if (pif->pif_bandwidth < 8) 421 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ 422 else 423 red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu 424 * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); 425 #ifdef ALTQ_RIO 426 if (flags & FARF_RIO) { 427 cl->cl_red = (red_t *)rio_alloc(0, NULL, 428 red_flags, red_pkttime); 429 if (cl->cl_red != NULL) 430 cl->cl_qtype = Q_RIO; 431 } else 432 #endif 433 if (flags & FARF_RED) { 434 cl->cl_red = red_alloc(0, 0, 435 cl->cl_qlimit * 10/100, 436 cl->cl_qlimit * 30/100, 437 red_flags, red_pkttime); 438 if (cl->cl_red != NULL) 439 cl->cl_qtype = Q_RED; 440 } 441 } 442 #endif /* ALTQ_RED */ 443 444 return (cl); 445 } 446 447 static int 448 fairq_class_destroy(struct fairq_class *cl) 449 { 450 struct fairq_if *pif; 451 int pri; 452 453 crit_enter(); 454 455 if (cl->cl_head) 456 fairq_purgeq(cl); 457 458 pif = cl->cl_pif; 459 pif->pif_classes[cl->cl_pri] = NULL; 460 if (pif->pif_poll_cache == cl) 461 pif->pif_poll_cache = NULL; 462 if (pif->pif_maxpri == cl->cl_pri) { 463 for (pri = cl->cl_pri; pri >= 0; pri--) 464 if (pif->pif_classes[pri] != NULL) { 465 pif->pif_maxpri = pri; 466 break; 467 } 468 if (pri < 0) 469 pif->pif_maxpri = -1; 470 } 471 crit_exit(); 472 473 if (cl->cl_red != NULL) { 474 #ifdef ALTQ_RIO 475 if (cl->cl_qtype == Q_RIO) 476 rio_destroy((rio_t *)cl->cl_red); 477 #endif 478 #ifdef ALTQ_RED 479 if (cl->cl_qtype == Q_RED) 480 red_destroy(cl->cl_red); 481 #endif 482 } 483 kfree(cl->cl_buckets, M_ALTQ); 484 cl->cl_head = NULL; /* sanity */ 485 cl->cl_polled = NULL; /* sanity */ 486 cl->cl_buckets = NULL; /* sanity */ 487 kfree(cl, M_ALTQ); 488 489 return (0); 490 } 491 492 /* 493 * fairq_enqueue is an enqueue function to be registered to 494 * (*altq_enqueue) in struct ifaltq. 495 */ 496 static int 497 fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 498 { 499 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 500 struct fairq_class *cl; 501 int error; 502 int len; 503 int hash; 504 505 crit_enter(); 506 507 /* grab class set by classifier */ 508 if ((m->m_flags & M_PKTHDR) == 0) { 509 /* should not happen */ 510 if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n"); 511 m_freem(m); 512 error = ENOBUFS; 513 goto done; 514 } 515 516 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) { 517 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid); 518 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED) 519 hash = (int)m->m_pkthdr.pf.state_hash; 520 else 521 hash = 0; 522 } else { 523 cl = NULL; 524 hash = 0; 525 } 526 if (cl == NULL) { 527 cl = pif->pif_default; 528 if (cl == NULL) { 529 m_freem(m); 530 error = ENOBUFS; 531 goto done; 532 } 533 } 534 cl->cl_flags |= FARF_HAS_PACKETS; 535 cl->cl_pktattr = NULL; 536 len = m_pktlen(m); 537 if (fairq_addq(cl, m, hash) != 0) { 538 /* drop occurred. mbuf was freed in fairq_addq. */ 539 PKTCNTR_ADD(&cl->cl_dropcnt, len); 540 error = ENOBUFS; 541 goto done; 542 } 543 ifq->ifq_len++; 544 error = 0; 545 done: 546 crit_exit(); 547 return (error); 548 } 549 550 /* 551 * fairq_dequeue is a dequeue function to be registered to 552 * (*altq_dequeue) in struct ifaltq. 553 * 554 * note: ALTDQ_POLL returns the next packet without removing the packet 555 * from the queue. ALTDQ_REMOVE is a normal dequeue operation. 556 * ALTDQ_REMOVE must return the same packet if called immediately 557 * after ALTDQ_POLL. 558 */ 559 static struct mbuf * 560 fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op) 561 { 562 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; 563 struct fairq_class *cl; 564 struct fairq_class *best_cl; 565 struct mbuf *best_m; 566 struct mbuf *m; 567 uint64_t cur_time = read_machclk(); 568 int pri; 569 int hit_limit; 570 571 if (ifq_is_empty(ifq)) { 572 /* no packet in the queue */ 573 KKASSERT(mpolled == NULL); 574 return (NULL); 575 } 576 577 crit_enter(); 578 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { 579 best_cl = pif->pif_poll_cache; 580 m = fairq_getq(best_cl, cur_time); 581 pif->pif_poll_cache = NULL; 582 if (m) { 583 ifq->ifq_len--; 584 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 585 } 586 } else { 587 best_cl = NULL; 588 best_m = NULL; 589 590 for (pri = pif->pif_maxpri; pri >= 0; pri--) { 591 if ((cl = pif->pif_classes[pri]) == NULL) 592 continue; 593 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) 594 continue; 595 m = fairq_pollq(cl, cur_time, &hit_limit); 596 if (m == NULL) { 597 cl->cl_flags &= ~FARF_HAS_PACKETS; 598 continue; 599 } 600 601 /* 602 * Only override the best choice if we are under 603 * the BW limit. 604 */ 605 if (hit_limit == 0 || best_cl == NULL) { 606 best_cl = cl; 607 best_m = m; 608 } 609 610 /* 611 * Remember the highest priority mbuf in case we 612 * do not find any lower priority mbufs. 613 */ 614 if (hit_limit) 615 continue; 616 break; 617 } 618 if (op == ALTDQ_POLL) { 619 pif->pif_poll_cache = best_cl; 620 m = best_m; 621 } else if (best_cl) { 622 m = fairq_getq(best_cl, cur_time); 623 KKASSERT(best_m == m); 624 ifq->ifq_len--; 625 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); 626 } else { 627 m = NULL; 628 } 629 } 630 crit_exit(); 631 KKASSERT(mpolled == NULL || mpolled == m); 632 return (m); 633 } 634 635 static int 636 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash) 637 { 638 fairq_bucket_t *b; 639 u_int hindex; 640 uint64_t bw; 641 642 /* 643 * If the packet doesn't have any keep state put it on the end of 644 * our queue. XXX this can result in out of order delivery. 645 */ 646 if (hash == 0) { 647 if (cl->cl_head) 648 b = cl->cl_head->prev; 649 else 650 b = &cl->cl_buckets[0]; 651 } else { 652 hindex = hash & cl->cl_nbucket_mask; 653 b = &cl->cl_buckets[hindex]; 654 } 655 656 /* 657 * Add the bucket to the end of the circular list of active buckets. 658 * 659 * As a special case we add the bucket to the beginning of the list 660 * instead of the end if it was not previously on the list and if 661 * its traffic is less then the hog level. 662 */ 663 if (b->in_use == 0) { 664 b->in_use = 1; 665 if (cl->cl_head == NULL) { 666 cl->cl_head = b; 667 b->next = b; 668 b->prev = b; 669 } else { 670 b->next = cl->cl_head; 671 b->prev = cl->cl_head->prev; 672 b->prev->next = b; 673 b->next->prev = b; 674 675 if (b->bw_delta && cl->cl_hogs_m1) { 676 bw = b->bw_bytes * machclk_freq / b->bw_delta; 677 if (bw < cl->cl_hogs_m1) 678 cl->cl_head = b; 679 } 680 } 681 } 682 683 #ifdef ALTQ_RIO 684 if (cl->cl_qtype == Q_RIO) 685 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); 686 #endif 687 #ifdef ALTQ_RED 688 if (cl->cl_qtype == Q_RED) 689 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); 690 #endif 691 if (qlen(&b->queue) >= qlimit(&b->queue)) { 692 m_freem(m); 693 return (-1); 694 } 695 696 if (cl->cl_flags & FARF_CLEARDSCP) 697 write_dsfield(m, cl->cl_pktattr, 0); 698 699 _addq(&b->queue, m); 700 701 return (0); 702 } 703 704 static struct mbuf * 705 fairq_getq(struct fairq_class *cl, uint64_t cur_time) 706 { 707 fairq_bucket_t *b; 708 struct mbuf *m; 709 710 b = fairq_selectq(cl, 0); 711 if (b == NULL) 712 m = NULL; 713 #ifdef ALTQ_RIO 714 else if (cl->cl_qtype == Q_RIO) 715 m = rio_getq((rio_t *)cl->cl_red, &b->queue); 716 #endif 717 #ifdef ALTQ_RED 718 else if (cl->cl_qtype == Q_RED) 719 m = red_getq(cl->cl_red, &b->queue); 720 #endif 721 else 722 m = _getq(&b->queue); 723 724 /* 725 * Calculate the BW change 726 */ 727 if (m != NULL) { 728 uint64_t delta; 729 730 /* 731 * Per-class bandwidth calculation 732 */ 733 delta = (cur_time - cl->cl_last_time); 734 if (delta > machclk_freq * 8) 735 delta = machclk_freq * 8; 736 cl->cl_bw_delta += delta; 737 cl->cl_bw_bytes += m->m_pkthdr.len; 738 cl->cl_last_time = cur_time; 739 cl->cl_bw_delta -= cl->cl_bw_delta >> 3; 740 cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; 741 742 /* 743 * Per-bucket bandwidth calculation 744 */ 745 delta = (cur_time - b->last_time); 746 if (delta > machclk_freq * 8) 747 delta = machclk_freq * 8; 748 b->bw_delta += delta; 749 b->bw_bytes += m->m_pkthdr.len; 750 b->last_time = cur_time; 751 b->bw_delta -= b->bw_delta >> 3; 752 b->bw_bytes -= b->bw_bytes >> 3; 753 } 754 return(m); 755 } 756 757 /* 758 * Figure out what the next packet would be if there were no limits. If 759 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise 760 * it is set to 0. A non-NULL mbuf is returned either way. 761 */ 762 static struct mbuf * 763 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) 764 { 765 fairq_bucket_t *b; 766 struct mbuf *m; 767 uint64_t delta; 768 uint64_t bw; 769 770 *hit_limit = 0; 771 b = fairq_selectq(cl, 1); 772 if (b == NULL) 773 return(NULL); 774 m = qhead(&b->queue); 775 776 /* 777 * Did this packet exceed the class bandwidth? Calculate the 778 * bandwidth component of the packet. 779 * 780 * - Calculate bytes per second 781 */ 782 delta = cur_time - cl->cl_last_time; 783 if (delta > machclk_freq * 8) 784 delta = machclk_freq * 8; 785 cl->cl_bw_delta += delta; 786 cl->cl_last_time = cur_time; 787 if (cl->cl_bw_delta) { 788 bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; 789 790 if (bw > cl->cl_bandwidth) 791 *hit_limit = 1; 792 #if 0 793 kprintf("BW %6lld relative to %6u %d queue %p\n", 794 bw, cl->cl_bandwidth, *hit_limit, b); 795 #endif 796 } 797 return(m); 798 } 799 800 /* 801 * Locate the next queue we want to pull a packet out of. This code 802 * is also responsible for removing empty buckets from the circular list. 803 */ 804 static 805 fairq_bucket_t * 806 fairq_selectq(struct fairq_class *cl, int ispoll) 807 { 808 fairq_bucket_t *b; 809 uint64_t bw; 810 811 if (ispoll == 0 && cl->cl_polled) { 812 b = cl->cl_polled; 813 cl->cl_polled = NULL; 814 return(b); 815 } 816 817 while ((b = cl->cl_head) != NULL) { 818 /* 819 * Remove empty queues from consideration 820 */ 821 if (qempty(&b->queue)) { 822 b->in_use = 0; 823 cl->cl_head = b->next; 824 if (cl->cl_head == b) { 825 cl->cl_head = NULL; 826 } else { 827 b->next->prev = b->prev; 828 b->prev->next = b->next; 829 } 830 continue; 831 } 832 833 /* 834 * Advance the round robin. Queues with bandwidths less 835 * then the hog bandwidth are allowed to burst. 836 */ 837 if (cl->cl_hogs_m1 == 0) { 838 cl->cl_head = b->next; 839 } else if (b->bw_delta) { 840 bw = b->bw_bytes * machclk_freq / b->bw_delta; 841 if (bw >= cl->cl_hogs_m1) { 842 cl->cl_head = b->next; 843 } 844 /* 845 * XXX TODO - 846 */ 847 } 848 849 /* 850 * Return bucket b. 851 */ 852 break; 853 } 854 if (ispoll) 855 cl->cl_polled = b; 856 return(b); 857 } 858 859 static void 860 fairq_purgeq(struct fairq_class *cl) 861 { 862 fairq_bucket_t *b; 863 struct mbuf *m; 864 865 while ((b = fairq_selectq(cl, 0)) != NULL) { 866 while ((m = _getq(&b->queue)) != NULL) { 867 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); 868 m_freem(m); 869 } 870 KKASSERT(qlen(&b->queue) == 0); 871 } 872 } 873 874 static void 875 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) 876 { 877 fairq_bucket_t *b; 878 879 sp->class_handle = cl->cl_handle; 880 sp->qlimit = cl->cl_qlimit; 881 sp->xmit_cnt = cl->cl_xmitcnt; 882 sp->drop_cnt = cl->cl_dropcnt; 883 sp->qtype = cl->cl_qtype; 884 sp->qlength = 0; 885 886 if (cl->cl_head) { 887 b = cl->cl_head; 888 do { 889 sp->qlength += qlen(&b->queue); 890 b = b->next; 891 } while (b != cl->cl_head); 892 } 893 894 #ifdef ALTQ_RED 895 if (cl->cl_qtype == Q_RED) 896 red_getstats(cl->cl_red, &sp->red[0]); 897 #endif 898 #ifdef ALTQ_RIO 899 if (cl->cl_qtype == Q_RIO) 900 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); 901 #endif 902 } 903 904 /* convert a class handle to the corresponding class pointer */ 905 static struct fairq_class * 906 clh_to_clp(struct fairq_if *pif, uint32_t chandle) 907 { 908 struct fairq_class *cl; 909 int idx; 910 911 if (chandle == 0) 912 return (NULL); 913 914 for (idx = pif->pif_maxpri; idx >= 0; idx--) 915 if ((cl = pif->pif_classes[idx]) != NULL && 916 cl->cl_handle == chandle) 917 return (cl); 918 919 return (NULL); 920 } 921 922 #endif /* ALTQ_FAIRQ */ 923