1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
35 */
36 /*
37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
38 * fairq. The fairq algorithm is completely different then priq, of course,
39 * but because I used priq's skeleton I believe I should include priq's
40 * copyright.
41 *
42 * Copyright (C) 2000-2003
43 * Sony Computer Science Laboratories Inc. All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 *
54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67 /*
68 * FAIRQ - take traffic classified by keep state (hashed into
69 * pf->state_hash) and bucketize it. Fairly extract
70 * the first packet from each bucket in a round-robin fashion.
71 *
72 * TODO - better overall qlimit support (right now it is per-bucket).
73 * - NOTE: red etc is per bucket, not overall.
74 * - better service curve support.
75 *
76 * EXAMPLE:
77 *
78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
79 * queue std priority 3 bandwidth 200Kb \
80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50
81 * queue bulk priority 2 bandwidth 100Kb \
82 * fairq (buckets 64, hogs 1Kb) qlimit 50
83 *
84 * NOTE: When the aggregate bandwidth is less than the link bandwidth
85 * any remaining bandwidth is dynamically assigned using the
86 * existing bandwidth specs as weightings.
87 *
88 * pass out on em0 from any to any keep state queue std
89 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
90 */
91 #include "opt_altq.h"
92 #include "opt_inet.h"
93 #include "opt_inet6.h"
94
95 #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */
96
97 #include <sys/param.h>
98 #include <sys/malloc.h>
99 #include <sys/mbuf.h>
100 #include <sys/socket.h>
101 #include <sys/sockio.h>
102 #include <sys/systm.h>
103 #include <sys/proc.h>
104 #include <sys/errno.h>
105 #include <sys/kernel.h>
106 #include <sys/queue.h>
107 #include <sys/thread.h>
108
109 #include <net/if.h>
110 #include <net/ifq_var.h>
111 #include <netinet/in.h>
112
113 #include <net/pf/pfvar.h>
114 #include <net/altq/altq.h>
115 #include <net/altq/altq_fairq.h>
116
117 #include <sys/thread2.h>
118
119 #define FAIRQ_SUBQ_INDEX ALTQ_SUBQ_INDEX_DEFAULT
120 #define FAIRQ_LOCK(ifq) \
121 ALTQ_SQ_LOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX])
122 #define FAIRQ_UNLOCK(ifq) \
123 ALTQ_SQ_UNLOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX])
124
125 /*
126 * function prototypes
127 */
128 static int fairq_clear_interface(struct fairq_if *);
129 static int fairq_request(struct ifaltq_subque *, int, void *);
130 static void fairq_purge(struct fairq_if *);
131 static struct fairq_class *fairq_class_create(struct fairq_if *, int,
132 int, u_int, struct fairq_opts *, int);
133 static int fairq_class_destroy(struct fairq_class *);
134 static int fairq_enqueue(struct ifaltq_subque *, struct mbuf *,
135 struct altq_pktattr *);
136 static struct mbuf *fairq_dequeue(struct ifaltq_subque *, int);
137
138 static int fairq_addq(struct fairq_class *, struct mbuf *, int hash);
139 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
140 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
141 static fairq_bucket_t *fairq_selectq(struct fairq_class *);
142 static void fairq_purgeq(struct fairq_class *);
143
144 static void get_class_stats(struct fairq_classstats *,
145 struct fairq_class *);
146 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
147
148 int
fairq_pfattach(struct pf_altq * a,struct ifaltq * ifq)149 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq)
150 {
151 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, ifq_mapsubq_default,
152 fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
153 }
154
155 int
fairq_add_altq(struct pf_altq * a)156 fairq_add_altq(struct pf_altq *a)
157 {
158 struct fairq_if *pif;
159 struct ifnet *ifp;
160
161 ifnet_lock();
162
163 if ((ifp = ifunit(a->ifname)) == NULL) {
164 ifnet_unlock();
165 return (EINVAL);
166 }
167 if (!ifq_is_ready(&ifp->if_snd)) {
168 ifnet_unlock();
169 return (ENODEV);
170 }
171
172 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO);
173 pif->pif_bandwidth = a->ifbandwidth;
174 pif->pif_maxpri = -1;
175 pif->pif_ifq = &ifp->if_snd;
176 ifq_purge_all(&ifp->if_snd);
177
178 ifnet_unlock();
179
180 /* keep the state in pf_altq */
181 a->altq_disc = pif;
182
183 return (0);
184 }
185
186 int
fairq_remove_altq(struct pf_altq * a)187 fairq_remove_altq(struct pf_altq *a)
188 {
189 struct fairq_if *pif;
190
191 if ((pif = a->altq_disc) == NULL)
192 return (EINVAL);
193 a->altq_disc = NULL;
194
195 fairq_clear_interface(pif);
196
197 kfree(pif, M_ALTQ);
198 return (0);
199 }
200
201 static int
fairq_add_queue_locked(struct pf_altq * a,struct fairq_if * pif)202 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif)
203 {
204 struct fairq_class *cl;
205
206 KKASSERT(a->priority < FAIRQ_MAXPRI);
207 KKASSERT(a->qid != 0);
208
209 if (pif->pif_classes[a->priority] != NULL)
210 return (EBUSY);
211 if (clh_to_clp(pif, a->qid) != NULL)
212 return (EBUSY);
213
214 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
215 &a->pq_u.fairq_opts, a->qid);
216 if (cl == NULL)
217 return (ENOMEM);
218
219 return (0);
220 }
221
222 int
fairq_add_queue(struct pf_altq * a)223 fairq_add_queue(struct pf_altq *a)
224 {
225 struct fairq_if *pif;
226 struct ifaltq *ifq;
227 int error;
228
229 /* check parameters */
230 if (a->priority >= FAIRQ_MAXPRI)
231 return (EINVAL);
232 if (a->qid == 0)
233 return (EINVAL);
234
235 /* XXX not MP safe */
236 if ((pif = a->altq_disc) == NULL)
237 return (EINVAL);
238 ifq = pif->pif_ifq;
239
240 FAIRQ_LOCK(ifq);
241 error = fairq_add_queue_locked(a, pif);
242 FAIRQ_UNLOCK(ifq);
243
244 return error;
245 }
246
247 static int
fairq_remove_queue_locked(struct pf_altq * a,struct fairq_if * pif)248 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif)
249 {
250 struct fairq_class *cl;
251
252 if ((cl = clh_to_clp(pif, a->qid)) == NULL)
253 return (EINVAL);
254
255 return (fairq_class_destroy(cl));
256 }
257
258 int
fairq_remove_queue(struct pf_altq * a)259 fairq_remove_queue(struct pf_altq *a)
260 {
261 struct fairq_if *pif;
262 struct ifaltq *ifq;
263 int error;
264
265 /* XXX not MP safe */
266 if ((pif = a->altq_disc) == NULL)
267 return (EINVAL);
268 ifq = pif->pif_ifq;
269
270 FAIRQ_LOCK(ifq);
271 error = fairq_remove_queue_locked(a, pif);
272 FAIRQ_UNLOCK(ifq);
273
274 return error;
275 }
276
277 int
fairq_getqstats(struct pf_altq * a,void * ubuf,int * nbytes)278 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
279 {
280 struct fairq_if *pif;
281 struct fairq_class *cl;
282 struct fairq_classstats stats;
283 struct ifaltq *ifq;
284 int error = 0;
285
286 if (*nbytes < sizeof(stats))
287 return (EINVAL);
288
289 ifnet_lock();
290
291 /* XXX not MP safe */
292 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) {
293 ifnet_unlock();
294 return (EBADF);
295 }
296 ifq = pif->pif_ifq;
297
298 FAIRQ_LOCK(ifq);
299
300 if ((cl = clh_to_clp(pif, a->qid)) == NULL) {
301 FAIRQ_UNLOCK(ifq);
302 ifnet_unlock();
303 return (EINVAL);
304 }
305
306 get_class_stats(&stats, cl);
307
308 FAIRQ_UNLOCK(ifq);
309
310 ifnet_unlock();
311
312 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
313 return (error);
314 *nbytes = sizeof(stats);
315 return (0);
316 }
317
318 /*
319 * bring the interface back to the initial state by discarding
320 * all the filters and classes.
321 */
322 static int
fairq_clear_interface(struct fairq_if * pif)323 fairq_clear_interface(struct fairq_if *pif)
324 {
325 struct fairq_class *cl;
326 int pri;
327
328 /* clear out the classes */
329 for (pri = 0; pri <= pif->pif_maxpri; pri++) {
330 if ((cl = pif->pif_classes[pri]) != NULL)
331 fairq_class_destroy(cl);
332 }
333
334 return (0);
335 }
336
337 static int
fairq_request(struct ifaltq_subque * ifsq,int req,void * arg)338 fairq_request(struct ifaltq_subque *ifsq, int req, void *arg)
339 {
340 struct ifaltq *ifq = ifsq->ifsq_altq;
341 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
342
343 crit_enter();
344 switch (req) {
345 case ALTRQ_PURGE:
346 if (ifsq_get_index(ifsq) == FAIRQ_SUBQ_INDEX) {
347 fairq_purge(pif);
348 } else {
349 /*
350 * Race happened, the unrelated subqueue was
351 * picked during the packet scheduler transition.
352 */
353 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
354 }
355 break;
356 }
357 crit_exit();
358 return (0);
359 }
360
361 /* discard all the queued packets on the interface */
362 static void
fairq_purge(struct fairq_if * pif)363 fairq_purge(struct fairq_if *pif)
364 {
365 struct fairq_class *cl;
366 int pri;
367
368 for (pri = 0; pri <= pif->pif_maxpri; pri++) {
369 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
370 fairq_purgeq(cl);
371 }
372 if (ifq_is_enabled(pif->pif_ifq))
373 ALTQ_SQ_CNTR_RESET(&pif->pif_ifq->altq_subq[FAIRQ_SUBQ_INDEX]);
374 }
375
376 static struct fairq_class *
fairq_class_create(struct fairq_if * pif,int pri,int qlimit,u_int bandwidth,struct fairq_opts * opts,int qid)377 fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
378 u_int bandwidth, struct fairq_opts *opts, int qid)
379 {
380 struct fairq_class *cl;
381 int flags = opts->flags;
382 u_int nbuckets = opts->nbuckets;
383 int i;
384
385 #ifndef ALTQ_RED
386 if (flags & FARF_RED) {
387 #ifdef ALTQ_DEBUG
388 kprintf("fairq_class_create: RED not configured for FAIRQ!\n");
389 #endif
390 return (NULL);
391 }
392 #endif
393 if (nbuckets == 0)
394 nbuckets = 256;
395 if (nbuckets > FAIRQ_MAX_BUCKETS)
396 nbuckets = FAIRQ_MAX_BUCKETS;
397 /* enforce power-of-2 size */
398 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
399 ++nbuckets;
400
401 if ((cl = pif->pif_classes[pri]) != NULL) {
402 /* modify the class instead of creating a new one */
403 crit_enter();
404 if (cl->cl_head)
405 fairq_purgeq(cl);
406 crit_exit();
407 #ifdef ALTQ_RIO
408 if (cl->cl_qtype == Q_RIO)
409 rio_destroy((rio_t *)cl->cl_red);
410 #endif
411 #ifdef ALTQ_RED
412 if (cl->cl_qtype == Q_RED)
413 red_destroy(cl->cl_red);
414 #endif
415 } else {
416 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO);
417 cl->cl_nbuckets = nbuckets;
418 cl->cl_nbucket_mask = nbuckets - 1;
419
420 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) *
421 cl->cl_nbuckets,
422 M_ALTQ, M_WAITOK | M_ZERO);
423 cl->cl_head = NULL;
424 }
425
426 pif->pif_classes[pri] = cl;
427 if (flags & FARF_DEFAULTCLASS)
428 pif->pif_default = cl;
429 if (qlimit == 0)
430 qlimit = 50; /* use default */
431 cl->cl_qlimit = qlimit;
432 for (i = 0; i < cl->cl_nbuckets; ++i) {
433 qlimit(&cl->cl_buckets[i].queue) = qlimit;
434 }
435 cl->cl_bandwidth = bandwidth / 8; /* cvt to bytes per second */
436 cl->cl_qtype = Q_DROPTAIL;
437 cl->cl_flags = flags & FARF_USERFLAGS;
438 cl->cl_pri = pri;
439 if (pri > pif->pif_maxpri)
440 pif->pif_maxpri = pri;
441 cl->cl_pif = pif;
442 cl->cl_handle = qid;
443 cl->cl_hogs_m1 = opts->hogs_m1 / 8;
444 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */
445 cl->cl_bw_current = 0;
446
447 #ifdef ALTQ_RED
448 if (flags & (FARF_RED|FARF_RIO)) {
449 int red_flags, red_pkttime;
450
451 red_flags = 0;
452 if (flags & FARF_ECN)
453 red_flags |= REDF_ECN;
454 #ifdef ALTQ_RIO
455 if (flags & FARF_CLEARDSCP)
456 red_flags |= RIOF_CLEARDSCP;
457 #endif
458 if (pif->pif_bandwidth < 8)
459 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
460 else
461 red_pkttime =
462 (int64_t)pif->pif_ifq->altq_ifp->if_mtu *
463 (1000 * 1000 * 1000) /
464 (pif->pif_bandwidth / 8 + 1);
465 #ifdef ALTQ_RIO
466 if (flags & FARF_RIO) {
467 cl->cl_red = (red_t *)rio_alloc(0, NULL,
468 red_flags, red_pkttime);
469 if (cl->cl_red != NULL)
470 cl->cl_qtype = Q_RIO;
471 } else
472 #endif
473 if (flags & FARF_RED) {
474 cl->cl_red = red_alloc(0, 0,
475 cl->cl_qlimit * 10 / 100,
476 cl->cl_qlimit * 30 / 100,
477 red_flags, red_pkttime);
478 if (cl->cl_red != NULL)
479 cl->cl_qtype = Q_RED;
480 }
481 }
482 #endif /* ALTQ_RED */
483
484 return (cl);
485 }
486
487 static int
fairq_class_destroy(struct fairq_class * cl)488 fairq_class_destroy(struct fairq_class *cl)
489 {
490 struct fairq_if *pif;
491 int pri;
492
493 crit_enter();
494
495 if (cl->cl_head)
496 fairq_purgeq(cl);
497
498 pif = cl->cl_pif;
499 pif->pif_classes[cl->cl_pri] = NULL;
500 if (pif->pif_poll_cache == cl)
501 pif->pif_poll_cache = NULL;
502 if (pif->pif_maxpri == cl->cl_pri) {
503 for (pri = cl->cl_pri; pri >= 0; pri--)
504 if (pif->pif_classes[pri] != NULL) {
505 pif->pif_maxpri = pri;
506 break;
507 }
508 if (pri < 0)
509 pif->pif_maxpri = -1;
510 }
511 crit_exit();
512
513 if (cl->cl_red != NULL) {
514 #ifdef ALTQ_RIO
515 if (cl->cl_qtype == Q_RIO)
516 rio_destroy((rio_t *)cl->cl_red);
517 #endif
518 #ifdef ALTQ_RED
519 if (cl->cl_qtype == Q_RED)
520 red_destroy(cl->cl_red);
521 #endif
522 }
523 kfree(cl->cl_buckets, M_ALTQ);
524 cl->cl_head = NULL; /* sanity */
525 cl->cl_buckets = NULL; /* sanity */
526 kfree(cl, M_ALTQ);
527
528 return (0);
529 }
530
531 /*
532 * fairq_enqueue is an enqueue function to be registered to
533 * (*ifsq_enqueue) in struct ifaltq_subque.
534 */
535 static int
fairq_enqueue(struct ifaltq_subque * ifsq,struct mbuf * m,struct altq_pktattr * pktattr)536 fairq_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
537 struct altq_pktattr *pktattr)
538 {
539 struct ifaltq *ifq = ifsq->ifsq_altq;
540 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
541 struct fairq_class *cl;
542 int error;
543 int len;
544 int hash;
545
546 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) {
547 /*
548 * Race happened, the unrelated subqueue was
549 * picked during the packet scheduler transition.
550 */
551 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
552 m_freem(m);
553 return ENOBUFS;
554 }
555
556 crit_enter();
557
558 /* grab class set by classifier */
559 M_ASSERTPKTHDR(m);
560 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) {
561 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid);
562 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED)
563 hash = (int)m->m_pkthdr.pf.state_hash;
564 else
565 hash = 0;
566 } else {
567 cl = NULL;
568 hash = 0;
569 }
570 if (cl == NULL) {
571 cl = pif->pif_default;
572 if (cl == NULL) {
573 m_freem(m);
574 error = ENOBUFS;
575 goto done;
576 }
577 }
578 cl->cl_flags |= FARF_HAS_PACKETS;
579 cl->cl_pktattr = NULL;
580 len = m_pktlen(m);
581 if (fairq_addq(cl, m, hash) != 0) {
582 /* drop occurred. mbuf was freed in fairq_addq. */
583 PKTCNTR_ADD(&cl->cl_dropcnt, len);
584 error = ENOBUFS;
585 goto done;
586 }
587 ALTQ_SQ_PKTCNT_INC(ifsq);
588 error = 0;
589 done:
590 crit_exit();
591 return (error);
592 }
593
594 /*
595 * fairq_dequeue is a dequeue function to be registered to
596 * (*ifsq_dequeue) in struct ifaltq_subque.
597 *
598 * note: ALTDQ_POLL returns the next packet without removing the packet
599 * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
600 */
601 static struct mbuf *
fairq_dequeue(struct ifaltq_subque * ifsq,int op)602 fairq_dequeue(struct ifaltq_subque *ifsq, int op)
603 {
604 struct ifaltq *ifq = ifsq->ifsq_altq;
605 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
606 struct fairq_class *cl;
607 struct fairq_class *best_cl;
608 struct mbuf *best_m;
609 struct mbuf *m;
610 uint64_t cur_time = read_machclk();
611 uint64_t best_scale;
612 uint64_t scale;
613 int pri;
614 int hit_limit;
615
616 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) {
617 /*
618 * Race happened, the unrelated subqueue was
619 * picked during the packet scheduler transition.
620 */
621 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
622 return NULL;
623 }
624
625 if (ifsq_is_empty(ifsq)) {
626 /* no packet in the queue */
627 return (NULL);
628 }
629
630 crit_enter();
631 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
632 best_cl = pif->pif_poll_cache;
633 m = fairq_getq(best_cl, cur_time);
634 pif->pif_poll_cache = NULL;
635 if (m) {
636 ALTQ_SQ_PKTCNT_DEC(ifsq);
637 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
638 }
639 } else {
640 best_cl = NULL;
641 best_m = NULL;
642 best_scale = 0xFFFFFFFFFFFFFFFFLLU;
643
644 for (pri = pif->pif_maxpri; pri >= 0; pri--) {
645 if ((cl = pif->pif_classes[pri]) == NULL)
646 continue;
647 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
648 continue;
649 m = fairq_pollq(cl, cur_time, &hit_limit);
650 if (m == NULL) {
651 cl->cl_flags &= ~FARF_HAS_PACKETS;
652 continue;
653 }
654
655 /*
656 * We can halt the search immediately if the queue
657 * did not hit its bandwidth limit.
658 */
659 if (hit_limit == 0) {
660 best_cl = cl;
661 best_m = m;
662 break;
663 }
664
665 /*
666 * Otherwise calculate the scale factor and select
667 * the queue with the lowest scale factor. This
668 * apportions any unused bandwidth weighted by
669 * the relative bandwidth specification.
670 *
671 * scale = (bw / max) with a multiple of 256.
672 *
673 * The calculation is refactored to reduce the
674 * chance of overflow.
675 */
676 scale = cl->cl_bw_current * 16 /
677 (cl->cl_bandwidth / 16 + 1);
678 if (best_scale > scale) {
679 best_cl = cl;
680 best_m = m;
681 best_scale = scale;
682 }
683 }
684
685 if (op == ALTDQ_POLL) {
686 #ifdef foo
687 /*
688 * Don't use poll cache; the poll/dequeue
689 * model is no longer applicable to SMP
690 * system. e.g.
691 * CPU-A CPU-B
692 * : :
693 * poll :
694 * : poll
695 * dequeue (+) :
696 *
697 * The dequeue at (+) will hit the poll
698 * cache set by CPU-B.
699 */
700 pif->pif_poll_cache = best_cl;
701 #endif
702 m = best_m;
703 } else if (best_cl) {
704 m = fairq_getq(best_cl, cur_time);
705 KKASSERT(best_m == m);
706 ALTQ_SQ_PKTCNT_DEC(ifsq);
707 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
708 } else {
709 m = NULL;
710 }
711 }
712 crit_exit();
713 return (m);
714 }
715
716 static int
fairq_addq(struct fairq_class * cl,struct mbuf * m,int hash)717 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash)
718 {
719 fairq_bucket_t *b;
720 u_int hindex;
721 uint64_t bw;
722
723 /*
724 * If the packet doesn't have any keep state put it on the end of
725 * our queue. XXX this can result in out of order delivery.
726 */
727 if (hash == 0) {
728 if (cl->cl_head)
729 b = cl->cl_head->prev;
730 else
731 b = &cl->cl_buckets[0];
732 } else {
733 hindex = hash & cl->cl_nbucket_mask;
734 b = &cl->cl_buckets[hindex];
735 }
736
737 /*
738 * Add the bucket to the end of the circular list of active buckets.
739 *
740 * As a special case we add the bucket to the beginning of the list
741 * instead of the end if it was not previously on the list and if
742 * its traffic is less then the hog level.
743 */
744 if (b->in_use == 0) {
745 b->in_use = 1;
746 if (cl->cl_head == NULL) {
747 cl->cl_head = b;
748 cl->cl_advanced = 1;
749 b->next = b;
750 b->prev = b;
751 } else {
752 b->next = cl->cl_head;
753 b->prev = cl->cl_head->prev;
754 b->prev->next = b;
755 b->next->prev = b;
756
757 if (b->bw_delta && cl->cl_hogs_m1) {
758 bw = b->bw_bytes * machclk_freq / b->bw_delta;
759 if (bw < cl->cl_hogs_m1) {
760 cl->cl_head = b;
761 cl->cl_advanced = 1;
762 }
763 }
764 }
765 }
766
767 #ifdef ALTQ_RIO
768 if (cl->cl_qtype == Q_RIO)
769 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
770 #endif
771 #ifdef ALTQ_RED
772 if (cl->cl_qtype == Q_RED)
773 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
774 #endif
775 if (qlen(&b->queue) >= qlimit(&b->queue)) {
776 m_freem(m);
777 return (-1);
778 }
779
780 if (cl->cl_flags & FARF_CLEARDSCP)
781 write_dsfield(m, cl->cl_pktattr, 0);
782
783 _addq(&b->queue, m);
784
785 return (0);
786 }
787
788 static struct mbuf *
fairq_getq(struct fairq_class * cl,uint64_t cur_time)789 fairq_getq(struct fairq_class *cl, uint64_t cur_time)
790 {
791 fairq_bucket_t *b;
792 struct mbuf *m;
793
794 b = fairq_selectq(cl);
795 if (b == NULL)
796 m = NULL;
797 #ifdef ALTQ_RIO
798 else if (cl->cl_qtype == Q_RIO)
799 m = rio_getq((rio_t *)cl->cl_red, &b->queue);
800 #endif
801 #ifdef ALTQ_RED
802 else if (cl->cl_qtype == Q_RED)
803 m = red_getq(cl->cl_red, &b->queue);
804 #endif
805 else
806 m = _getq(&b->queue);
807
808 /*
809 * Calculate the BW change
810 */
811 if (m != NULL) {
812 uint64_t delta;
813
814 /*
815 * Per-class bandwidth calculation
816 */
817 delta = (cur_time - cl->cl_last_time);
818 if (delta > machclk_freq * 8)
819 delta = machclk_freq * 8;
820 cl->cl_bw_delta += delta;
821 cl->cl_bw_bytes += m->m_pkthdr.len;
822 cl->cl_last_time = cur_time;
823
824 /*
825 * Cap delta at ~machclk_freq to avoid overflows.
826 */
827 if (cl->cl_bw_delta > machclk_freq) {
828 uint64_t f = cl->cl_bw_delta * 32 / machclk_freq;
829 cl->cl_bw_delta = cl->cl_bw_delta * 16 / f;
830 cl->cl_bw_bytes = cl->cl_bw_bytes * 16 / f;
831 }
832
833 /*
834 * Per-bucket bandwidth calculation.
835 */
836 delta = (cur_time - b->last_time);
837 if (delta > machclk_freq * 8)
838 delta = machclk_freq * 8;
839 b->bw_delta += delta;
840 b->bw_bytes += m->m_pkthdr.len;
841 b->last_time = cur_time;
842
843 /*
844 * Cap bw_delta at ~machclk_freq to avoid overflows.
845 */
846 if (b->bw_delta > machclk_freq) {
847 uint64_t f = b->bw_delta * 32 / machclk_freq;
848 b->bw_delta = b->bw_delta * 16 / f;
849 b->bw_bytes = b->bw_bytes * 16 / f;
850 }
851 }
852 return(m);
853 }
854
855 /*
856 * Figure out what the next packet would be if there were no limits. If
857 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
858 * it is set to 0. A non-NULL mbuf is returned either way.
859 */
860 static struct mbuf *
fairq_pollq(struct fairq_class * cl,uint64_t cur_time,int * hit_limit)861 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
862 {
863 fairq_bucket_t *b;
864 struct mbuf *m;
865 uint64_t delta;
866 uint64_t bw;
867
868 *hit_limit = 0;
869 b = fairq_selectq(cl);
870 if (b == NULL)
871 return(NULL);
872 m = qhead(&b->queue);
873 if (m == NULL)
874 return(NULL);
875 cl->cl_advanced = 1; /* so next select/get doesn't re-advance */
876
877 /*
878 * Did this packet exceed the class bandwidth?
879 *
880 * Calculate the bandwidth component of the packet in bytes/sec.
881 * Avoid overflows when machclk_freq is very high.
882 */
883 delta = cur_time - cl->cl_last_time;
884 if (delta > machclk_freq * 8)
885 delta = machclk_freq * 8;
886 cl->cl_bw_delta += delta;
887 cl->cl_last_time = cur_time;
888
889 if (cl->cl_bw_delta) {
890 bw = (cl->cl_bw_bytes + m->m_pkthdr.len) *
891 machclk_freq / cl->cl_bw_delta;
892 if (bw > cl->cl_bandwidth)
893 *hit_limit = 1;
894 cl->cl_bw_current = bw;
895 #if 0
896 kprintf("BW %6lld relative to %6llu %d queue %p\n",
897 bw, cl->cl_bandwidth, *hit_limit, b);
898 #endif
899 }
900 return(m);
901 }
902
903 /*
904 * Locate the next queue we want to pull a packet out of. This code
905 * is also responsible for removing empty buckets from the circular list.
906 */
907 static
908 fairq_bucket_t *
fairq_selectq(struct fairq_class * cl)909 fairq_selectq(struct fairq_class *cl)
910 {
911 fairq_bucket_t *b;
912 uint64_t bw;
913
914 while ((b = cl->cl_head) != NULL) {
915 /*
916 * Remove empty queues from consideration
917 */
918 if (qempty(&b->queue)) {
919 b->in_use = 0;
920 cl->cl_head = b->next;
921 cl->cl_advanced = 1;
922 if (cl->cl_head == b) {
923 cl->cl_head = NULL;
924 } else {
925 b->next->prev = b->prev;
926 b->prev->next = b->next;
927 }
928 continue;
929 }
930
931 /*
932 * Advance the round robin. Queues with bandwidths less
933 * then the hog bandwidth are allowed to burst.
934 *
935 * Don't advance twice if the previous head emptied.
936 */
937 if (cl->cl_advanced) {
938 cl->cl_advanced = 0;
939 break;
940 }
941 if (cl->cl_hogs_m1 == 0) {
942 cl->cl_head = b->next;
943 } else if (b->bw_delta) {
944 bw = b->bw_bytes * machclk_freq / b->bw_delta;
945 if (bw >= cl->cl_hogs_m1)
946 cl->cl_head = b->next;
947 }
948
949 /*
950 * Return the (possibly new) head.
951 */
952 b = cl->cl_head;
953 break;
954 }
955 return(b);
956 }
957
958 static void
fairq_purgeq(struct fairq_class * cl)959 fairq_purgeq(struct fairq_class *cl)
960 {
961 fairq_bucket_t *b;
962 struct mbuf *m;
963
964 while ((b = fairq_selectq(cl)) != NULL) {
965 while ((m = _getq(&b->queue)) != NULL) {
966 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
967 m_freem(m);
968 }
969 KKASSERT(qlen(&b->queue) == 0);
970 }
971 }
972
973 static void
get_class_stats(struct fairq_classstats * sp,struct fairq_class * cl)974 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
975 {
976 fairq_bucket_t *b;
977
978 sp->class_handle = cl->cl_handle;
979 sp->qlimit = cl->cl_qlimit;
980 sp->xmit_cnt = cl->cl_xmitcnt;
981 sp->drop_cnt = cl->cl_dropcnt;
982 sp->qtype = cl->cl_qtype;
983 sp->qlength = 0;
984
985 if (cl->cl_head) {
986 b = cl->cl_head;
987 do {
988 sp->qlength += qlen(&b->queue);
989 b = b->next;
990 } while (b != cl->cl_head);
991 }
992
993 #ifdef ALTQ_RED
994 if (cl->cl_qtype == Q_RED)
995 red_getstats(cl->cl_red, &sp->red[0]);
996 #endif
997 #ifdef ALTQ_RIO
998 if (cl->cl_qtype == Q_RIO)
999 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
1000 #endif
1001 }
1002
1003 /* convert a class handle to the corresponding class pointer */
1004 static struct fairq_class *
clh_to_clp(struct fairq_if * pif,uint32_t chandle)1005 clh_to_clp(struct fairq_if *pif, uint32_t chandle)
1006 {
1007 struct fairq_class *cl;
1008 int idx;
1009
1010 if (chandle == 0)
1011 return (NULL);
1012
1013 for (idx = pif->pif_maxpri; idx >= 0; idx--)
1014 if ((cl = pif->pif_classes[idx]) != NULL &&
1015 cl->cl_handle == chandle)
1016 return (cl);
1017
1018 return (NULL);
1019 }
1020
1021 #endif /* ALTQ_FAIRQ */
1022