xref: /dragonfly/sys/net/altq/altq_fairq.c (revision bcb3e04d)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
35  */
36 /*
37  * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
38  * fairq.  The fairq algorithm is completely different then priq, of course,
39  * but because I used priq's skeleton I believe I should include priq's
40  * copyright.
41  *
42  * Copyright (C) 2000-2003
43  *	Sony Computer Science Laboratories Inc.  All rights reserved.
44  *
45  * Redistribution and use in source and binary forms, with or without
46  * modification, are permitted provided that the following conditions
47  * are met:
48  * 1. Redistributions of source code must retain the above copyright
49  *    notice, this list of conditions and the following disclaimer.
50  * 2. Redistributions in binary form must reproduce the above copyright
51  *    notice, this list of conditions and the following disclaimer in the
52  *    documentation and/or other materials provided with the distribution.
53  *
54  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 /*
68  * FAIRQ - take traffic classified by keep state (hashed into
69  *	   pf->state_hash) and bucketize it.  Fairly extract
70  *	   the first packet from each bucket in a round-robin fashion.
71  *
72  * TODO - better overall qlimit support (right now it is per-bucket).
73  *	- NOTE: red etc is per bucket, not overall.
74  *	- better service curve support.
75  *
76  * EXAMPLE:
77  *
78  *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
79  *  queue std  priority 3 bandwidth 400Kb \
80  *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
81  *  queue bulk priority 2 bandwidth 100Kb \
82  *	fairq (buckets 64, hogs 1Kb) qlimit 50
83  *
84  *  pass out on em0 from any to any keep state queue std
85  *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
86  */
87 #include "opt_altq.h"
88 #include "opt_inet.h"
89 #include "opt_inet6.h"
90 
91 #ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
92 
93 #include <sys/param.h>
94 #include <sys/malloc.h>
95 #include <sys/mbuf.h>
96 #include <sys/socket.h>
97 #include <sys/sockio.h>
98 #include <sys/systm.h>
99 #include <sys/proc.h>
100 #include <sys/errno.h>
101 #include <sys/kernel.h>
102 #include <sys/queue.h>
103 #include <sys/thread.h>
104 
105 #include <net/if.h>
106 #include <net/ifq_var.h>
107 #include <netinet/in.h>
108 
109 #include <net/pf/pfvar.h>
110 #include <net/altq/altq.h>
111 #include <net/altq/altq_fairq.h>
112 
113 #include <sys/thread2.h>
114 
115 /*
116  * function prototypes
117  */
118 static int	fairq_clear_interface(struct fairq_if *);
119 static int	fairq_request(struct ifaltq *, int, void *);
120 static void	fairq_purge(struct fairq_if *);
121 static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
122 static int	fairq_class_destroy(struct fairq_class *);
123 static int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
124 static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int);
125 
126 static int	fairq_addq(struct fairq_class *, struct mbuf *, int hash);
127 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
128 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
129 static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
130 static void	fairq_purgeq(struct fairq_class *);
131 
132 static void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
133 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
134 
135 int
136 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq)
137 {
138 	return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc,
139 	    fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
140 }
141 
142 int
143 fairq_add_altq(struct pf_altq *a)
144 {
145 	struct fairq_if *pif;
146 	struct ifnet *ifp;
147 
148 	if ((ifp = ifunit(a->ifname)) == NULL)
149 		return (EINVAL);
150 	if (!ifq_is_ready(&ifp->if_snd))
151 		return (ENODEV);
152 
153 	pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO);
154 	pif->pif_bandwidth = a->ifbandwidth;
155 	pif->pif_maxpri = -1;
156 	pif->pif_ifq = &ifp->if_snd;
157 	ifq_purge(&ifp->if_snd);
158 
159 	/* keep the state in pf_altq */
160 	a->altq_disc = pif;
161 
162 	return (0);
163 }
164 
165 int
166 fairq_remove_altq(struct pf_altq *a)
167 {
168 	struct fairq_if *pif;
169 
170 	if ((pif = a->altq_disc) == NULL)
171 		return (EINVAL);
172 	a->altq_disc = NULL;
173 
174 	fairq_clear_interface(pif);
175 
176 	kfree(pif, M_ALTQ);
177 	return (0);
178 }
179 
180 static int
181 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif)
182 {
183 	struct fairq_class *cl;
184 
185 	KKASSERT(a->priority < FAIRQ_MAXPRI);
186 	KKASSERT(a->qid != 0);
187 
188 	if (pif->pif_classes[a->priority] != NULL)
189 		return (EBUSY);
190 	if (clh_to_clp(pif, a->qid) != NULL)
191 		return (EBUSY);
192 
193 	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
194 			       &a->pq_u.fairq_opts, a->qid);
195 	if (cl == NULL)
196 		return (ENOMEM);
197 
198 	return (0);
199 }
200 
201 int
202 fairq_add_queue(struct pf_altq *a)
203 {
204 	struct fairq_if *pif;
205 	struct ifaltq *ifq;
206 	int error;
207 
208 	/* check parameters */
209 	if (a->priority >= FAIRQ_MAXPRI)
210 		return (EINVAL);
211 	if (a->qid == 0)
212 		return (EINVAL);
213 
214 	/* XXX not MP safe */
215 	if ((pif = a->altq_disc) == NULL)
216 		return (EINVAL);
217 	ifq = pif->pif_ifq;
218 
219 	ALTQ_LOCK(ifq);
220 	error = fairq_add_queue_locked(a, pif);
221 	ALTQ_UNLOCK(ifq);
222 
223 	return error;
224 }
225 
226 static int
227 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif)
228 {
229 	struct fairq_class *cl;
230 
231 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
232 		return (EINVAL);
233 
234 	return (fairq_class_destroy(cl));
235 }
236 
237 int
238 fairq_remove_queue(struct pf_altq *a)
239 {
240 	struct fairq_if *pif;
241 	struct ifaltq *ifq;
242 	int error;
243 
244 	/* XXX not MP safe */
245 	if ((pif = a->altq_disc) == NULL)
246 		return (EINVAL);
247 	ifq = pif->pif_ifq;
248 
249 	ALTQ_LOCK(ifq);
250 	error = fairq_remove_queue_locked(a, pif);
251 	ALTQ_UNLOCK(ifq);
252 
253 	return error;
254 }
255 
256 int
257 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
258 {
259 	struct fairq_if *pif;
260 	struct fairq_class *cl;
261 	struct fairq_classstats stats;
262 	struct ifaltq *ifq;
263 	int error = 0;
264 
265 	if (*nbytes < sizeof(stats))
266 		return (EINVAL);
267 
268 	/* XXX not MP safe */
269 	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
270 		return (EBADF);
271 	ifq = pif->pif_ifq;
272 
273 	ALTQ_LOCK(ifq);
274 
275 	if ((cl = clh_to_clp(pif, a->qid)) == NULL) {
276 		ALTQ_UNLOCK(ifq);
277 		return (EINVAL);
278 	}
279 
280 	get_class_stats(&stats, cl);
281 
282 	ALTQ_UNLOCK(ifq);
283 
284 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
285 		return (error);
286 	*nbytes = sizeof(stats);
287 	return (0);
288 }
289 
290 /*
291  * bring the interface back to the initial state by discarding
292  * all the filters and classes.
293  */
294 static int
295 fairq_clear_interface(struct fairq_if *pif)
296 {
297 	struct fairq_class *cl;
298 	int pri;
299 
300 	/* clear out the classes */
301 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
302 		if ((cl = pif->pif_classes[pri]) != NULL)
303 			fairq_class_destroy(cl);
304 	}
305 
306 	return (0);
307 }
308 
309 static int
310 fairq_request(struct ifaltq *ifq, int req, void *arg)
311 {
312 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
313 
314 	crit_enter();
315 	switch (req) {
316 	case ALTRQ_PURGE:
317 		fairq_purge(pif);
318 		break;
319 	}
320 	crit_exit();
321 	return (0);
322 }
323 
324 /* discard all the queued packets on the interface */
325 static void
326 fairq_purge(struct fairq_if *pif)
327 {
328 	struct fairq_class *cl;
329 	int pri;
330 
331 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
332 		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
333 			fairq_purgeq(cl);
334 	}
335 	if (ifq_is_enabled(pif->pif_ifq))
336 		pif->pif_ifq->ifq_len = 0;
337 }
338 
339 static struct fairq_class *
340 fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
341 		   u_int bandwidth, struct fairq_opts *opts, int qid)
342 {
343 	struct fairq_class *cl;
344 	int flags = opts->flags;
345 	u_int nbuckets = opts->nbuckets;
346 	int i;
347 
348 #ifndef ALTQ_RED
349 	if (flags & FARF_RED) {
350 #ifdef ALTQ_DEBUG
351 		kprintf("fairq_class_create: RED not configured for FAIRQ!\n");
352 #endif
353 		return (NULL);
354 	}
355 #endif
356 	if (nbuckets == 0)
357 		nbuckets = 256;
358 	if (nbuckets > FAIRQ_MAX_BUCKETS)
359 		nbuckets = FAIRQ_MAX_BUCKETS;
360 	/* enforce power-of-2 size */
361 	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
362 		++nbuckets;
363 
364 	if ((cl = pif->pif_classes[pri]) != NULL) {
365 		/* modify the class instead of creating a new one */
366 		crit_enter();
367 		if (cl->cl_head)
368 			fairq_purgeq(cl);
369 		crit_exit();
370 #ifdef ALTQ_RIO
371 		if (cl->cl_qtype == Q_RIO)
372 			rio_destroy((rio_t *)cl->cl_red);
373 #endif
374 #ifdef ALTQ_RED
375 		if (cl->cl_qtype == Q_RED)
376 			red_destroy(cl->cl_red);
377 #endif
378 	} else {
379 		cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO);
380 		cl->cl_nbuckets = nbuckets;
381 		cl->cl_nbucket_mask = nbuckets - 1;
382 
383 		cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) *
384 					 cl->cl_nbuckets,
385 					 M_ALTQ, M_WAITOK | M_ZERO);
386 		cl->cl_head = NULL;
387 	}
388 
389 	pif->pif_classes[pri] = cl;
390 	if (flags & FARF_DEFAULTCLASS)
391 		pif->pif_default = cl;
392 	if (qlimit == 0)
393 		qlimit = 50;  /* use default */
394 	cl->cl_qlimit = qlimit;
395 	for (i = 0; i < cl->cl_nbuckets; ++i) {
396 		qlimit(&cl->cl_buckets[i].queue) = qlimit;
397 	}
398 	cl->cl_bandwidth = bandwidth / 8;
399 	cl->cl_qtype = Q_DROPTAIL;
400 	cl->cl_flags = flags & FARF_USERFLAGS;
401 	cl->cl_pri = pri;
402 	if (pri > pif->pif_maxpri)
403 		pif->pif_maxpri = pri;
404 	cl->cl_pif = pif;
405 	cl->cl_handle = qid;
406 	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
407 	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
408 
409 #ifdef ALTQ_RED
410 	if (flags & (FARF_RED|FARF_RIO)) {
411 		int red_flags, red_pkttime;
412 
413 		red_flags = 0;
414 		if (flags & FARF_ECN)
415 			red_flags |= REDF_ECN;
416 #ifdef ALTQ_RIO
417 		if (flags & FARF_CLEARDSCP)
418 			red_flags |= RIOF_CLEARDSCP;
419 #endif
420 		if (pif->pif_bandwidth < 8)
421 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
422 		else
423 			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
424 			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
425 #ifdef ALTQ_RIO
426 		if (flags & FARF_RIO) {
427 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
428 						red_flags, red_pkttime);
429 			if (cl->cl_red != NULL)
430 				cl->cl_qtype = Q_RIO;
431 		} else
432 #endif
433 		if (flags & FARF_RED) {
434 			cl->cl_red = red_alloc(0, 0,
435 			    cl->cl_qlimit * 10/100,
436 			    cl->cl_qlimit * 30/100,
437 			    red_flags, red_pkttime);
438 			if (cl->cl_red != NULL)
439 				cl->cl_qtype = Q_RED;
440 		}
441 	}
442 #endif /* ALTQ_RED */
443 
444 	return (cl);
445 }
446 
447 static int
448 fairq_class_destroy(struct fairq_class *cl)
449 {
450 	struct fairq_if *pif;
451 	int pri;
452 
453 	crit_enter();
454 
455 	if (cl->cl_head)
456 		fairq_purgeq(cl);
457 
458 	pif = cl->cl_pif;
459 	pif->pif_classes[cl->cl_pri] = NULL;
460 	if (pif->pif_poll_cache == cl)
461 		pif->pif_poll_cache = NULL;
462 	if (pif->pif_maxpri == cl->cl_pri) {
463 		for (pri = cl->cl_pri; pri >= 0; pri--)
464 			if (pif->pif_classes[pri] != NULL) {
465 				pif->pif_maxpri = pri;
466 				break;
467 			}
468 		if (pri < 0)
469 			pif->pif_maxpri = -1;
470 	}
471 	crit_exit();
472 
473 	if (cl->cl_red != NULL) {
474 #ifdef ALTQ_RIO
475 		if (cl->cl_qtype == Q_RIO)
476 			rio_destroy((rio_t *)cl->cl_red);
477 #endif
478 #ifdef ALTQ_RED
479 		if (cl->cl_qtype == Q_RED)
480 			red_destroy(cl->cl_red);
481 #endif
482 	}
483 	kfree(cl->cl_buckets, M_ALTQ);
484 	cl->cl_head = NULL;	/* sanity */
485 	cl->cl_polled = NULL;	/* sanity */
486 	cl->cl_buckets = NULL;	/* sanity */
487 	kfree(cl, M_ALTQ);
488 
489 	return (0);
490 }
491 
492 /*
493  * fairq_enqueue is an enqueue function to be registered to
494  * (*altq_enqueue) in struct ifaltq.
495  */
496 static int
497 fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
498 {
499 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
500 	struct fairq_class *cl;
501 	int error;
502 	int len;
503 	int hash;
504 
505 	crit_enter();
506 
507 	/* grab class set by classifier */
508 	if ((m->m_flags & M_PKTHDR) == 0) {
509 		/* should not happen */
510 		if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n");
511 		m_freem(m);
512 		error = ENOBUFS;
513 		goto done;
514 	}
515 
516 	if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) {
517 		cl = clh_to_clp(pif, m->m_pkthdr.pf.qid);
518 		if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED)
519 			hash = (int)m->m_pkthdr.pf.state_hash;
520 		else
521 			hash = 0;
522 	} else {
523 		cl = NULL;
524 		hash = 0;
525 	}
526 	if (cl == NULL) {
527 		cl = pif->pif_default;
528 		if (cl == NULL) {
529 			m_freem(m);
530 			error = ENOBUFS;
531 			goto done;
532 		}
533 	}
534 	cl->cl_flags |= FARF_HAS_PACKETS;
535 	cl->cl_pktattr = NULL;
536 	len = m_pktlen(m);
537 	if (fairq_addq(cl, m, hash) != 0) {
538 		/* drop occurred.  mbuf was freed in fairq_addq. */
539 		PKTCNTR_ADD(&cl->cl_dropcnt, len);
540 		error = ENOBUFS;
541 		goto done;
542 	}
543 	ifq->ifq_len++;
544 	error = 0;
545 done:
546 	crit_exit();
547 	return (error);
548 }
549 
550 /*
551  * fairq_dequeue is a dequeue function to be registered to
552  * (*altq_dequeue) in struct ifaltq.
553  *
554  * note: ALTDQ_POLL returns the next packet without removing the packet
555  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
556  *	ALTDQ_REMOVE must return the same packet if called immediately
557  *	after ALTDQ_POLL.
558  */
559 static struct mbuf *
560 fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
561 {
562 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
563 	struct fairq_class *cl;
564 	struct fairq_class *best_cl;
565 	struct mbuf *best_m;
566 	struct mbuf *m;
567 	uint64_t cur_time = read_machclk();
568 	int pri;
569 	int hit_limit;
570 
571 	if (ifq_is_empty(ifq)) {
572 		/* no packet in the queue */
573 		KKASSERT(mpolled == NULL);
574 		return (NULL);
575 	}
576 
577 	crit_enter();
578 	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
579 		best_cl = pif->pif_poll_cache;
580 		m = fairq_getq(best_cl, cur_time);
581 		pif->pif_poll_cache = NULL;
582 		if (m) {
583 			ifq->ifq_len--;
584 			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
585 		}
586 	} else {
587 		best_cl = NULL;
588 		best_m = NULL;
589 
590 		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
591 			if ((cl = pif->pif_classes[pri]) == NULL)
592 				continue;
593 			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
594 				continue;
595 			m = fairq_pollq(cl, cur_time, &hit_limit);
596 			if (m == NULL) {
597 				cl->cl_flags &= ~FARF_HAS_PACKETS;
598 				continue;
599 			}
600 
601 			/*
602 			 * Only override the best choice if we are under
603 			 * the BW limit.
604 			 */
605 			if (hit_limit == 0 || best_cl == NULL) {
606 				best_cl = cl;
607 				best_m = m;
608 			}
609 
610 			/*
611 			 * Remember the highest priority mbuf in case we
612 			 * do not find any lower priority mbufs.
613 			 */
614 			if (hit_limit)
615 				continue;
616 			break;
617 		}
618 		if (op == ALTDQ_POLL) {
619 			pif->pif_poll_cache = best_cl;
620 			m = best_m;
621 		} else if (best_cl) {
622 			m = fairq_getq(best_cl, cur_time);
623 			KKASSERT(best_m == m);
624 			ifq->ifq_len--;
625 			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
626 		} else {
627 			m = NULL;
628 		}
629 	}
630 	crit_exit();
631 	KKASSERT(mpolled == NULL || mpolled == m);
632 	return (m);
633 }
634 
635 static int
636 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash)
637 {
638 	fairq_bucket_t *b;
639 	u_int hindex;
640 	uint64_t bw;
641 
642 	/*
643 	 * If the packet doesn't have any keep state put it on the end of
644 	 * our queue.  XXX this can result in out of order delivery.
645 	 */
646 	if (hash == 0) {
647 		if (cl->cl_head)
648 			b = cl->cl_head->prev;
649 		else
650 			b = &cl->cl_buckets[0];
651 	} else {
652 		hindex = hash & cl->cl_nbucket_mask;
653 		b = &cl->cl_buckets[hindex];
654 	}
655 
656 	/*
657 	 * Add the bucket to the end of the circular list of active buckets.
658 	 *
659 	 * As a special case we add the bucket to the beginning of the list
660 	 * instead of the end if it was not previously on the list and if
661 	 * its traffic is less then the hog level.
662 	 */
663 	if (b->in_use == 0) {
664 		b->in_use = 1;
665 		if (cl->cl_head == NULL) {
666 			cl->cl_head = b;
667 			b->next = b;
668 			b->prev = b;
669 		} else {
670 			b->next = cl->cl_head;
671 			b->prev = cl->cl_head->prev;
672 			b->prev->next = b;
673 			b->next->prev = b;
674 
675 			if (b->bw_delta && cl->cl_hogs_m1) {
676 				bw = b->bw_bytes * machclk_freq / b->bw_delta;
677 				if (bw < cl->cl_hogs_m1)
678 					cl->cl_head = b;
679 			}
680 		}
681 	}
682 
683 #ifdef ALTQ_RIO
684 	if (cl->cl_qtype == Q_RIO)
685 		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
686 #endif
687 #ifdef ALTQ_RED
688 	if (cl->cl_qtype == Q_RED)
689 		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
690 #endif
691 	if (qlen(&b->queue) >= qlimit(&b->queue)) {
692 		m_freem(m);
693 		return (-1);
694 	}
695 
696 	if (cl->cl_flags & FARF_CLEARDSCP)
697 		write_dsfield(m, cl->cl_pktattr, 0);
698 
699 	_addq(&b->queue, m);
700 
701 	return (0);
702 }
703 
704 static struct mbuf *
705 fairq_getq(struct fairq_class *cl, uint64_t cur_time)
706 {
707 	fairq_bucket_t *b;
708 	struct mbuf *m;
709 
710 	b = fairq_selectq(cl, 0);
711 	if (b == NULL)
712 		m = NULL;
713 #ifdef ALTQ_RIO
714 	else if (cl->cl_qtype == Q_RIO)
715 		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
716 #endif
717 #ifdef ALTQ_RED
718 	else if (cl->cl_qtype == Q_RED)
719 		m = red_getq(cl->cl_red, &b->queue);
720 #endif
721 	else
722 		m = _getq(&b->queue);
723 
724 	/*
725 	 * Calculate the BW change
726 	 */
727 	if (m != NULL) {
728 		uint64_t delta;
729 
730 		/*
731 		 * Per-class bandwidth calculation
732 		 */
733 		delta = (cur_time - cl->cl_last_time);
734 		if (delta > machclk_freq * 8)
735 			delta = machclk_freq * 8;
736 		cl->cl_bw_delta += delta;
737 		cl->cl_bw_bytes += m->m_pkthdr.len;
738 		cl->cl_last_time = cur_time;
739 		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
740 		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
741 
742 		/*
743 		 * Per-bucket bandwidth calculation
744 		 */
745 		delta = (cur_time - b->last_time);
746 		if (delta > machclk_freq * 8)
747 			delta = machclk_freq * 8;
748 		b->bw_delta += delta;
749 		b->bw_bytes += m->m_pkthdr.len;
750 		b->last_time = cur_time;
751 		b->bw_delta -= b->bw_delta >> 3;
752 		b->bw_bytes -= b->bw_bytes >> 3;
753 	}
754 	return(m);
755 }
756 
757 /*
758  * Figure out what the next packet would be if there were no limits.  If
759  * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
760  * it is set to 0.  A non-NULL mbuf is returned either way.
761  */
762 static struct mbuf *
763 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
764 {
765 	fairq_bucket_t *b;
766 	struct mbuf *m;
767 	uint64_t delta;
768 	uint64_t bw;
769 
770 	*hit_limit = 0;
771 	b = fairq_selectq(cl, 1);
772 	if (b == NULL)
773 		return(NULL);
774 	m = qhead(&b->queue);
775 
776 	/*
777 	 * Did this packet exceed the class bandwidth?  Calculate the
778 	 * bandwidth component of the packet.
779 	 *
780 	 * - Calculate bytes per second
781 	 */
782 	delta = cur_time - cl->cl_last_time;
783 	if (delta > machclk_freq * 8)
784 		delta = machclk_freq * 8;
785 	cl->cl_bw_delta += delta;
786 	cl->cl_last_time = cur_time;
787 	if (cl->cl_bw_delta) {
788 		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
789 
790 		if (bw > cl->cl_bandwidth)
791 			*hit_limit = 1;
792 #if 0
793 		kprintf("BW %6lld relative to %6u %d queue %p\n",
794 			bw, cl->cl_bandwidth, *hit_limit, b);
795 #endif
796 	}
797 	return(m);
798 }
799 
800 /*
801  * Locate the next queue we want to pull a packet out of.  This code
802  * is also responsible for removing empty buckets from the circular list.
803  */
804 static
805 fairq_bucket_t *
806 fairq_selectq(struct fairq_class *cl, int ispoll)
807 {
808 	fairq_bucket_t *b;
809 	uint64_t bw;
810 
811 	if (ispoll == 0 && cl->cl_polled) {
812 		b = cl->cl_polled;
813 		cl->cl_polled = NULL;
814 		return(b);
815 	}
816 
817 	while ((b = cl->cl_head) != NULL) {
818 		/*
819 		 * Remove empty queues from consideration
820 		 */
821 		if (qempty(&b->queue)) {
822 			b->in_use = 0;
823 			cl->cl_head = b->next;
824 			if (cl->cl_head == b) {
825 				cl->cl_head = NULL;
826 			} else {
827 				b->next->prev = b->prev;
828 				b->prev->next = b->next;
829 			}
830 			continue;
831 		}
832 
833 		/*
834 		 * Advance the round robin.  Queues with bandwidths less
835 		 * then the hog bandwidth are allowed to burst.
836 		 */
837 		if (cl->cl_hogs_m1 == 0) {
838 			cl->cl_head = b->next;
839 		} else if (b->bw_delta) {
840 			bw = b->bw_bytes * machclk_freq / b->bw_delta;
841 			if (bw >= cl->cl_hogs_m1) {
842 				cl->cl_head = b->next;
843 			}
844 			/*
845 			 * XXX TODO -
846 			 */
847 		}
848 
849 		/*
850 		 * Return bucket b.
851 		 */
852 		break;
853 	}
854 	if (ispoll)
855 		cl->cl_polled = b;
856 	return(b);
857 }
858 
859 static void
860 fairq_purgeq(struct fairq_class *cl)
861 {
862 	fairq_bucket_t *b;
863 	struct mbuf *m;
864 
865 	while ((b = fairq_selectq(cl, 0)) != NULL) {
866 		while ((m = _getq(&b->queue)) != NULL) {
867 			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
868 			m_freem(m);
869 		}
870 		KKASSERT(qlen(&b->queue) == 0);
871 	}
872 }
873 
874 static void
875 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
876 {
877 	fairq_bucket_t *b;
878 
879 	sp->class_handle = cl->cl_handle;
880 	sp->qlimit = cl->cl_qlimit;
881 	sp->xmit_cnt = cl->cl_xmitcnt;
882 	sp->drop_cnt = cl->cl_dropcnt;
883 	sp->qtype = cl->cl_qtype;
884 	sp->qlength = 0;
885 
886 	if (cl->cl_head) {
887 		b = cl->cl_head;
888 		do {
889 			sp->qlength += qlen(&b->queue);
890 			b = b->next;
891 		} while (b != cl->cl_head);
892 	}
893 
894 #ifdef ALTQ_RED
895 	if (cl->cl_qtype == Q_RED)
896 		red_getstats(cl->cl_red, &sp->red[0]);
897 #endif
898 #ifdef ALTQ_RIO
899 	if (cl->cl_qtype == Q_RIO)
900 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
901 #endif
902 }
903 
904 /* convert a class handle to the corresponding class pointer */
905 static struct fairq_class *
906 clh_to_clp(struct fairq_if *pif, uint32_t chandle)
907 {
908 	struct fairq_class *cl;
909 	int idx;
910 
911 	if (chandle == 0)
912 		return (NULL);
913 
914 	for (idx = pif->pif_maxpri; idx >= 0; idx--)
915 		if ((cl = pif->pif_classes[idx]) != NULL &&
916 		    cl->cl_handle == chandle)
917 			return (cl);
918 
919 	return (NULL);
920 }
921 
922 #endif /* ALTQ_FAIRQ */
923