xref: /dragonfly/sys/net/altq/altq_fairq.c (revision 650094e1)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
35  */
36 /*
37  * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
38  * fairq.  The fairq algorithm is completely different then priq, of course,
39  * but because I used priq's skeleton I believe I should include priq's
40  * copyright.
41  *
42  * Copyright (C) 2000-2003
43  *	Sony Computer Science Laboratories Inc.  All rights reserved.
44  *
45  * Redistribution and use in source and binary forms, with or without
46  * modification, are permitted provided that the following conditions
47  * are met:
48  * 1. Redistributions of source code must retain the above copyright
49  *    notice, this list of conditions and the following disclaimer.
50  * 2. Redistributions in binary form must reproduce the above copyright
51  *    notice, this list of conditions and the following disclaimer in the
52  *    documentation and/or other materials provided with the distribution.
53  *
54  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 /*
68  * FAIRQ - take traffic classified by keep state (hashed into
69  *	   pf->state_hash) and bucketize it.  Fairly extract
70  *	   the first packet from each bucket in a round-robin fashion.
71  *
72  * TODO - better overall qlimit support (right now it is per-bucket).
73  *	- NOTE: red etc is per bucket, not overall.
74  *	- better service curve support.
75  *
76  * EXAMPLE:
77  *
78  *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
79  *  queue std  priority 3 bandwidth 200Kb \
80  *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
81  *  queue bulk priority 2 bandwidth 100Kb \
82  *	fairq (buckets 64, hogs 1Kb) qlimit 50
83  *
84  *	NOTE: When the aggregate bandwidth is less than the link bandwidth
85  *	      any remaining bandwidth is dynamically assigned using the
86  *	      existing bandwidth specs as weightings.
87  *
88  *  pass out on em0 from any to any keep state queue std
89  *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
90  */
91 #include "opt_altq.h"
92 #include "opt_inet.h"
93 #include "opt_inet6.h"
94 
95 #ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
96 
97 #include <sys/param.h>
98 #include <sys/malloc.h>
99 #include <sys/mbuf.h>
100 #include <sys/socket.h>
101 #include <sys/sockio.h>
102 #include <sys/systm.h>
103 #include <sys/proc.h>
104 #include <sys/errno.h>
105 #include <sys/kernel.h>
106 #include <sys/queue.h>
107 #include <sys/thread.h>
108 
109 #include <net/if.h>
110 #include <net/ifq_var.h>
111 #include <netinet/in.h>
112 
113 #include <net/pf/pfvar.h>
114 #include <net/altq/altq.h>
115 #include <net/altq/altq_fairq.h>
116 
117 #include <sys/thread2.h>
118 
119 /*
120  * function prototypes
121  */
122 static int	fairq_clear_interface(struct fairq_if *);
123 static int	fairq_request(struct ifaltq *, int, void *);
124 static void	fairq_purge(struct fairq_if *);
125 static struct fairq_class *fairq_class_create(struct fairq_if *, int,
126 					int, u_int, struct fairq_opts *, int);
127 static int	fairq_class_destroy(struct fairq_class *);
128 static int	fairq_enqueue(struct ifaltq *, struct mbuf *,
129 					struct altq_pktattr *);
130 static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int);
131 
132 static int	fairq_addq(struct fairq_class *, struct mbuf *, int hash);
133 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
134 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
135 static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
136 static void	fairq_purgeq(struct fairq_class *);
137 
138 static void	get_class_stats(struct fairq_classstats *,
139 					struct fairq_class *);
140 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
141 
142 int
143 fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq)
144 {
145 	return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc,
146 			   fairq_enqueue, fairq_dequeue,
147 			   fairq_request, NULL, NULL);
148 }
149 
150 int
151 fairq_add_altq(struct pf_altq *a)
152 {
153 	struct fairq_if *pif;
154 	struct ifnet *ifp;
155 
156 	if ((ifp = ifunit(a->ifname)) == NULL)
157 		return (EINVAL);
158 	if (!ifq_is_ready(&ifp->if_snd))
159 		return (ENODEV);
160 
161 	pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO);
162 	pif->pif_bandwidth = a->ifbandwidth;
163 	pif->pif_maxpri = -1;
164 	pif->pif_ifq = &ifp->if_snd;
165 	ifq_purge(&ifp->if_snd);
166 
167 	/* keep the state in pf_altq */
168 	a->altq_disc = pif;
169 
170 	return (0);
171 }
172 
173 int
174 fairq_remove_altq(struct pf_altq *a)
175 {
176 	struct fairq_if *pif;
177 
178 	if ((pif = a->altq_disc) == NULL)
179 		return (EINVAL);
180 	a->altq_disc = NULL;
181 
182 	fairq_clear_interface(pif);
183 
184 	kfree(pif, M_ALTQ);
185 	return (0);
186 }
187 
188 static int
189 fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif)
190 {
191 	struct fairq_class *cl;
192 
193 	KKASSERT(a->priority < FAIRQ_MAXPRI);
194 	KKASSERT(a->qid != 0);
195 
196 	if (pif->pif_classes[a->priority] != NULL)
197 		return (EBUSY);
198 	if (clh_to_clp(pif, a->qid) != NULL)
199 		return (EBUSY);
200 
201 	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
202 			       &a->pq_u.fairq_opts, a->qid);
203 	if (cl == NULL)
204 		return (ENOMEM);
205 
206 	return (0);
207 }
208 
209 int
210 fairq_add_queue(struct pf_altq *a)
211 {
212 	struct fairq_if *pif;
213 	struct ifaltq *ifq;
214 	int error;
215 
216 	/* check parameters */
217 	if (a->priority >= FAIRQ_MAXPRI)
218 		return (EINVAL);
219 	if (a->qid == 0)
220 		return (EINVAL);
221 
222 	/* XXX not MP safe */
223 	if ((pif = a->altq_disc) == NULL)
224 		return (EINVAL);
225 	ifq = pif->pif_ifq;
226 
227 	ALTQ_LOCK(ifq);
228 	error = fairq_add_queue_locked(a, pif);
229 	ALTQ_UNLOCK(ifq);
230 
231 	return error;
232 }
233 
234 static int
235 fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif)
236 {
237 	struct fairq_class *cl;
238 
239 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
240 		return (EINVAL);
241 
242 	return (fairq_class_destroy(cl));
243 }
244 
245 int
246 fairq_remove_queue(struct pf_altq *a)
247 {
248 	struct fairq_if *pif;
249 	struct ifaltq *ifq;
250 	int error;
251 
252 	/* XXX not MP safe */
253 	if ((pif = a->altq_disc) == NULL)
254 		return (EINVAL);
255 	ifq = pif->pif_ifq;
256 
257 	ALTQ_LOCK(ifq);
258 	error = fairq_remove_queue_locked(a, pif);
259 	ALTQ_UNLOCK(ifq);
260 
261 	return error;
262 }
263 
264 int
265 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
266 {
267 	struct fairq_if *pif;
268 	struct fairq_class *cl;
269 	struct fairq_classstats stats;
270 	struct ifaltq *ifq;
271 	int error = 0;
272 
273 	if (*nbytes < sizeof(stats))
274 		return (EINVAL);
275 
276 	/* XXX not MP safe */
277 	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
278 		return (EBADF);
279 	ifq = pif->pif_ifq;
280 
281 	ALTQ_LOCK(ifq);
282 
283 	if ((cl = clh_to_clp(pif, a->qid)) == NULL) {
284 		ALTQ_UNLOCK(ifq);
285 		return (EINVAL);
286 	}
287 
288 	get_class_stats(&stats, cl);
289 
290 	ALTQ_UNLOCK(ifq);
291 
292 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
293 		return (error);
294 	*nbytes = sizeof(stats);
295 	return (0);
296 }
297 
298 /*
299  * bring the interface back to the initial state by discarding
300  * all the filters and classes.
301  */
302 static int
303 fairq_clear_interface(struct fairq_if *pif)
304 {
305 	struct fairq_class *cl;
306 	int pri;
307 
308 	/* clear out the classes */
309 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
310 		if ((cl = pif->pif_classes[pri]) != NULL)
311 			fairq_class_destroy(cl);
312 	}
313 
314 	return (0);
315 }
316 
317 static int
318 fairq_request(struct ifaltq *ifq, int req, void *arg)
319 {
320 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
321 
322 	crit_enter();
323 	switch (req) {
324 	case ALTRQ_PURGE:
325 		fairq_purge(pif);
326 		break;
327 	}
328 	crit_exit();
329 	return (0);
330 }
331 
332 /* discard all the queued packets on the interface */
333 static void
334 fairq_purge(struct fairq_if *pif)
335 {
336 	struct fairq_class *cl;
337 	int pri;
338 
339 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
340 		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
341 			fairq_purgeq(cl);
342 	}
343 	if (ifq_is_enabled(pif->pif_ifq))
344 		pif->pif_ifq->ifq_len = 0;
345 }
346 
347 static struct fairq_class *
348 fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
349 		   u_int bandwidth, struct fairq_opts *opts, int qid)
350 {
351 	struct fairq_class *cl;
352 	int flags = opts->flags;
353 	u_int nbuckets = opts->nbuckets;
354 	int i;
355 
356 #ifndef ALTQ_RED
357 	if (flags & FARF_RED) {
358 #ifdef ALTQ_DEBUG
359 		kprintf("fairq_class_create: RED not configured for FAIRQ!\n");
360 #endif
361 		return (NULL);
362 	}
363 #endif
364 	if (nbuckets == 0)
365 		nbuckets = 256;
366 	if (nbuckets > FAIRQ_MAX_BUCKETS)
367 		nbuckets = FAIRQ_MAX_BUCKETS;
368 	/* enforce power-of-2 size */
369 	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
370 		++nbuckets;
371 
372 	if ((cl = pif->pif_classes[pri]) != NULL) {
373 		/* modify the class instead of creating a new one */
374 		crit_enter();
375 		if (cl->cl_head)
376 			fairq_purgeq(cl);
377 		crit_exit();
378 #ifdef ALTQ_RIO
379 		if (cl->cl_qtype == Q_RIO)
380 			rio_destroy((rio_t *)cl->cl_red);
381 #endif
382 #ifdef ALTQ_RED
383 		if (cl->cl_qtype == Q_RED)
384 			red_destroy(cl->cl_red);
385 #endif
386 	} else {
387 		cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO);
388 		cl->cl_nbuckets = nbuckets;
389 		cl->cl_nbucket_mask = nbuckets - 1;
390 
391 		cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) *
392 					 cl->cl_nbuckets,
393 					 M_ALTQ, M_WAITOK | M_ZERO);
394 		cl->cl_head = NULL;
395 	}
396 
397 	pif->pif_classes[pri] = cl;
398 	if (flags & FARF_DEFAULTCLASS)
399 		pif->pif_default = cl;
400 	if (qlimit == 0)
401 		qlimit = 50;  /* use default */
402 	cl->cl_qlimit = qlimit;
403 	for (i = 0; i < cl->cl_nbuckets; ++i) {
404 		qlimit(&cl->cl_buckets[i].queue) = qlimit;
405 	}
406 	cl->cl_bandwidth = bandwidth / 8;	/* cvt to bytes per second */
407 	cl->cl_qtype = Q_DROPTAIL;
408 	cl->cl_flags = flags & FARF_USERFLAGS;
409 	cl->cl_pri = pri;
410 	if (pri > pif->pif_maxpri)
411 		pif->pif_maxpri = pri;
412 	cl->cl_pif = pif;
413 	cl->cl_handle = qid;
414 	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
415 	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
416 	cl->cl_bw_current = 0;
417 
418 #ifdef ALTQ_RED
419 	if (flags & (FARF_RED|FARF_RIO)) {
420 		int red_flags, red_pkttime;
421 
422 		red_flags = 0;
423 		if (flags & FARF_ECN)
424 			red_flags |= REDF_ECN;
425 #ifdef ALTQ_RIO
426 		if (flags & FARF_CLEARDSCP)
427 			red_flags |= RIOF_CLEARDSCP;
428 #endif
429 		if (pif->pif_bandwidth < 8)
430 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
431 		else
432 			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
433 			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
434 #ifdef ALTQ_RIO
435 		if (flags & FARF_RIO) {
436 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
437 						red_flags, red_pkttime);
438 			if (cl->cl_red != NULL)
439 				cl->cl_qtype = Q_RIO;
440 		} else
441 #endif
442 		if (flags & FARF_RED) {
443 			cl->cl_red = red_alloc(0, 0,
444 			    cl->cl_qlimit * 10/100,
445 			    cl->cl_qlimit * 30/100,
446 			    red_flags, red_pkttime);
447 			if (cl->cl_red != NULL)
448 				cl->cl_qtype = Q_RED;
449 		}
450 	}
451 #endif /* ALTQ_RED */
452 
453 	return (cl);
454 }
455 
456 static int
457 fairq_class_destroy(struct fairq_class *cl)
458 {
459 	struct fairq_if *pif;
460 	int pri;
461 
462 	crit_enter();
463 
464 	if (cl->cl_head)
465 		fairq_purgeq(cl);
466 
467 	pif = cl->cl_pif;
468 	pif->pif_classes[cl->cl_pri] = NULL;
469 	if (pif->pif_poll_cache == cl)
470 		pif->pif_poll_cache = NULL;
471 	if (pif->pif_maxpri == cl->cl_pri) {
472 		for (pri = cl->cl_pri; pri >= 0; pri--)
473 			if (pif->pif_classes[pri] != NULL) {
474 				pif->pif_maxpri = pri;
475 				break;
476 			}
477 		if (pri < 0)
478 			pif->pif_maxpri = -1;
479 	}
480 	crit_exit();
481 
482 	if (cl->cl_red != NULL) {
483 #ifdef ALTQ_RIO
484 		if (cl->cl_qtype == Q_RIO)
485 			rio_destroy((rio_t *)cl->cl_red);
486 #endif
487 #ifdef ALTQ_RED
488 		if (cl->cl_qtype == Q_RED)
489 			red_destroy(cl->cl_red);
490 #endif
491 	}
492 	kfree(cl->cl_buckets, M_ALTQ);
493 	cl->cl_head = NULL;	/* sanity */
494 	cl->cl_polled = NULL;	/* sanity */
495 	cl->cl_buckets = NULL;	/* sanity */
496 	kfree(cl, M_ALTQ);
497 
498 	return (0);
499 }
500 
501 /*
502  * fairq_enqueue is an enqueue function to be registered to
503  * (*altq_enqueue) in struct ifaltq.
504  */
505 static int
506 fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
507 {
508 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
509 	struct fairq_class *cl;
510 	int error;
511 	int len;
512 	int hash;
513 
514 	crit_enter();
515 
516 	/* grab class set by classifier */
517 	if ((m->m_flags & M_PKTHDR) == 0) {
518 		/* should not happen */
519 		if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n");
520 		m_freem(m);
521 		error = ENOBUFS;
522 		goto done;
523 	}
524 
525 	if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) {
526 		cl = clh_to_clp(pif, m->m_pkthdr.pf.qid);
527 		if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED)
528 			hash = (int)m->m_pkthdr.pf.state_hash;
529 		else
530 			hash = 0;
531 	} else {
532 		cl = NULL;
533 		hash = 0;
534 	}
535 	if (cl == NULL) {
536 		cl = pif->pif_default;
537 		if (cl == NULL) {
538 			m_freem(m);
539 			error = ENOBUFS;
540 			goto done;
541 		}
542 	}
543 	cl->cl_flags |= FARF_HAS_PACKETS;
544 	cl->cl_pktattr = NULL;
545 	len = m_pktlen(m);
546 	if (fairq_addq(cl, m, hash) != 0) {
547 		/* drop occurred.  mbuf was freed in fairq_addq. */
548 		PKTCNTR_ADD(&cl->cl_dropcnt, len);
549 		error = ENOBUFS;
550 		goto done;
551 	}
552 	ifq->ifq_len++;
553 	error = 0;
554 done:
555 	crit_exit();
556 	return (error);
557 }
558 
559 /*
560  * fairq_dequeue is a dequeue function to be registered to
561  * (*altq_dequeue) in struct ifaltq.
562  *
563  * note: ALTDQ_POLL returns the next packet without removing the packet
564  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
565  *	ALTDQ_REMOVE must return the same packet if called immediately
566  *	after ALTDQ_POLL.
567  */
568 static struct mbuf *
569 fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
570 {
571 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
572 	struct fairq_class *cl;
573 	struct fairq_class *best_cl;
574 	struct mbuf *best_m;
575 	struct mbuf *m;
576 	uint64_t cur_time = read_machclk();
577 	u_int best_scale;
578 	u_int scale;
579 	int pri;
580 	int hit_limit;
581 
582 	if (ifq_is_empty(ifq)) {
583 		/* no packet in the queue */
584 		KKASSERT(mpolled == NULL);
585 		return (NULL);
586 	}
587 
588 	crit_enter();
589 	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
590 		best_cl = pif->pif_poll_cache;
591 		m = fairq_getq(best_cl, cur_time);
592 		pif->pif_poll_cache = NULL;
593 		if (m) {
594 			ifq->ifq_len--;
595 			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
596 		}
597 	} else {
598 		best_cl = NULL;
599 		best_m = NULL;
600 		best_scale = 0xFFFFFFFFU;
601 
602 		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
603 			if ((cl = pif->pif_classes[pri]) == NULL)
604 				continue;
605 			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
606 				continue;
607 			m = fairq_pollq(cl, cur_time, &hit_limit);
608 			if (m == NULL) {
609 				cl->cl_flags &= ~FARF_HAS_PACKETS;
610 				continue;
611 			}
612 
613 			/*
614 			 * We can halt the search immediately if the queue
615 			 * did not hit its bandwidth limit.
616 			 */
617 			if (hit_limit == 0) {
618 				best_cl = cl;
619 				best_m = m;
620 				break;
621 			}
622 
623 			/*
624 			 * Otherwise calculate the scale factor and select
625 			 * the queue with the lowest scale factor.  This
626 			 * apportions any unused bandwidth weighted by
627 			 * the relative bandwidth specification.
628 			 */
629 			scale = cl->cl_bw_current * 100 / cl->cl_bandwidth;
630 			if (scale < best_scale) {
631 				best_cl = cl;
632 				best_m = m;
633 				best_scale = scale;
634 			}
635 		}
636 
637 		if (op == ALTDQ_POLL) {
638 			pif->pif_poll_cache = best_cl;
639 			m = best_m;
640 		} else if (best_cl) {
641 			m = fairq_getq(best_cl, cur_time);
642 			KKASSERT(best_m == m);
643 			ifq->ifq_len--;
644 			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
645 		} else {
646 			m = NULL;
647 		}
648 	}
649 	crit_exit();
650 	KKASSERT(mpolled == NULL || mpolled == m);
651 	return (m);
652 }
653 
654 static int
655 fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash)
656 {
657 	fairq_bucket_t *b;
658 	u_int hindex;
659 	uint64_t bw;
660 
661 	/*
662 	 * If the packet doesn't have any keep state put it on the end of
663 	 * our queue.  XXX this can result in out of order delivery.
664 	 */
665 	if (hash == 0) {
666 		if (cl->cl_head)
667 			b = cl->cl_head->prev;
668 		else
669 			b = &cl->cl_buckets[0];
670 	} else {
671 		hindex = hash & cl->cl_nbucket_mask;
672 		b = &cl->cl_buckets[hindex];
673 	}
674 
675 	/*
676 	 * Add the bucket to the end of the circular list of active buckets.
677 	 *
678 	 * As a special case we add the bucket to the beginning of the list
679 	 * instead of the end if it was not previously on the list and if
680 	 * its traffic is less then the hog level.
681 	 */
682 	if (b->in_use == 0) {
683 		b->in_use = 1;
684 		if (cl->cl_head == NULL) {
685 			cl->cl_head = b;
686 			b->next = b;
687 			b->prev = b;
688 		} else {
689 			b->next = cl->cl_head;
690 			b->prev = cl->cl_head->prev;
691 			b->prev->next = b;
692 			b->next->prev = b;
693 
694 			if (b->bw_delta && cl->cl_hogs_m1) {
695 				bw = b->bw_bytes * machclk_freq / b->bw_delta;
696 				if (bw < cl->cl_hogs_m1)
697 					cl->cl_head = b;
698 			}
699 		}
700 	}
701 
702 #ifdef ALTQ_RIO
703 	if (cl->cl_qtype == Q_RIO)
704 		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
705 #endif
706 #ifdef ALTQ_RED
707 	if (cl->cl_qtype == Q_RED)
708 		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
709 #endif
710 	if (qlen(&b->queue) >= qlimit(&b->queue)) {
711 		m_freem(m);
712 		return (-1);
713 	}
714 
715 	if (cl->cl_flags & FARF_CLEARDSCP)
716 		write_dsfield(m, cl->cl_pktattr, 0);
717 
718 	_addq(&b->queue, m);
719 
720 	return (0);
721 }
722 
723 static struct mbuf *
724 fairq_getq(struct fairq_class *cl, uint64_t cur_time)
725 {
726 	fairq_bucket_t *b;
727 	struct mbuf *m;
728 
729 	b = fairq_selectq(cl, 0);
730 	if (b == NULL)
731 		m = NULL;
732 #ifdef ALTQ_RIO
733 	else if (cl->cl_qtype == Q_RIO)
734 		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
735 #endif
736 #ifdef ALTQ_RED
737 	else if (cl->cl_qtype == Q_RED)
738 		m = red_getq(cl->cl_red, &b->queue);
739 #endif
740 	else
741 		m = _getq(&b->queue);
742 
743 	/*
744 	 * Calculate the BW change
745 	 */
746 	if (m != NULL) {
747 		uint64_t delta;
748 
749 		/*
750 		 * Per-class bandwidth calculation
751 		 */
752 		delta = (cur_time - cl->cl_last_time);
753 		if (delta > machclk_freq * 8)
754 			delta = machclk_freq * 8;
755 		cl->cl_bw_delta += delta;
756 		cl->cl_bw_bytes += m->m_pkthdr.len;
757 		cl->cl_last_time = cur_time;
758 		if (cl->cl_bw_delta > machclk_freq) {
759 			cl->cl_bw_delta -= cl->cl_bw_delta >> 2;
760 			cl->cl_bw_bytes -= cl->cl_bw_bytes >> 2;
761 		}
762 
763 		/*
764 		 * Per-bucket bandwidth calculation
765 		 */
766 		delta = (cur_time - b->last_time);
767 		if (delta > machclk_freq * 8)
768 			delta = machclk_freq * 8;
769 		b->bw_delta += delta;
770 		b->bw_bytes += m->m_pkthdr.len;
771 		b->last_time = cur_time;
772 		if (b->bw_delta > machclk_freq) {
773 			b->bw_delta -= b->bw_delta >> 2;
774 			b->bw_bytes -= b->bw_bytes >> 2;
775 		}
776 	}
777 	return(m);
778 }
779 
780 /*
781  * Figure out what the next packet would be if there were no limits.  If
782  * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
783  * it is set to 0.  A non-NULL mbuf is returned either way.
784  */
785 static struct mbuf *
786 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
787 {
788 	fairq_bucket_t *b;
789 	struct mbuf *m;
790 	uint64_t delta;
791 	uint64_t bw;
792 
793 	*hit_limit = 0;
794 	b = fairq_selectq(cl, 1);
795 	if (b == NULL)
796 		return(NULL);
797 	m = qhead(&b->queue);
798 
799 	/*
800 	 * Did this packet exceed the class bandwidth?  Calculate the
801 	 * bandwidth component of the packet.
802 	 *
803 	 * - Calculate bytes per second
804 	 */
805 	delta = cur_time - cl->cl_last_time;
806 	if (delta > machclk_freq * 8)
807 		delta = machclk_freq * 8;
808 	cl->cl_bw_delta += delta;
809 	cl->cl_last_time = cur_time;
810 	if (cl->cl_bw_delta) {
811 		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
812 
813 		if (bw > cl->cl_bandwidth)
814 			*hit_limit = 1;
815 		cl->cl_bw_current = bw;
816 #if 0
817 		kprintf("BW %6lld relative to %6u %d queue %p\n",
818 			bw, cl->cl_bandwidth, *hit_limit, b);
819 #endif
820 	}
821 	return(m);
822 }
823 
824 /*
825  * Locate the next queue we want to pull a packet out of.  This code
826  * is also responsible for removing empty buckets from the circular list.
827  */
828 static
829 fairq_bucket_t *
830 fairq_selectq(struct fairq_class *cl, int ispoll)
831 {
832 	fairq_bucket_t *b;
833 	uint64_t bw;
834 
835 	if (ispoll == 0 && cl->cl_polled) {
836 		b = cl->cl_polled;
837 		cl->cl_polled = NULL;
838 		return(b);
839 	}
840 
841 	while ((b = cl->cl_head) != NULL) {
842 		/*
843 		 * Remove empty queues from consideration
844 		 */
845 		if (qempty(&b->queue)) {
846 			b->in_use = 0;
847 			cl->cl_head = b->next;
848 			if (cl->cl_head == b) {
849 				cl->cl_head = NULL;
850 			} else {
851 				b->next->prev = b->prev;
852 				b->prev->next = b->next;
853 			}
854 			continue;
855 		}
856 
857 		/*
858 		 * Advance the round robin.  Queues with bandwidths less
859 		 * then the hog bandwidth are allowed to burst.
860 		 */
861 		if (cl->cl_hogs_m1 == 0) {
862 			cl->cl_head = b->next;
863 		} else if (b->bw_delta) {
864 			bw = b->bw_bytes * machclk_freq / b->bw_delta;
865 			if (bw >= cl->cl_hogs_m1) {
866 				cl->cl_head = b->next;
867 			}
868 			/*
869 			 * XXX TODO -
870 			 */
871 		}
872 
873 		/*
874 		 * Return bucket b.
875 		 */
876 		break;
877 	}
878 	if (ispoll)
879 		cl->cl_polled = b;
880 	return(b);
881 }
882 
883 static void
884 fairq_purgeq(struct fairq_class *cl)
885 {
886 	fairq_bucket_t *b;
887 	struct mbuf *m;
888 
889 	while ((b = fairq_selectq(cl, 0)) != NULL) {
890 		while ((m = _getq(&b->queue)) != NULL) {
891 			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
892 			m_freem(m);
893 		}
894 		KKASSERT(qlen(&b->queue) == 0);
895 	}
896 }
897 
898 static void
899 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
900 {
901 	fairq_bucket_t *b;
902 
903 	sp->class_handle = cl->cl_handle;
904 	sp->qlimit = cl->cl_qlimit;
905 	sp->xmit_cnt = cl->cl_xmitcnt;
906 	sp->drop_cnt = cl->cl_dropcnt;
907 	sp->qtype = cl->cl_qtype;
908 	sp->qlength = 0;
909 
910 	if (cl->cl_head) {
911 		b = cl->cl_head;
912 		do {
913 			sp->qlength += qlen(&b->queue);
914 			b = b->next;
915 		} while (b != cl->cl_head);
916 	}
917 
918 #ifdef ALTQ_RED
919 	if (cl->cl_qtype == Q_RED)
920 		red_getstats(cl->cl_red, &sp->red[0]);
921 #endif
922 #ifdef ALTQ_RIO
923 	if (cl->cl_qtype == Q_RIO)
924 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
925 #endif
926 }
927 
928 /* convert a class handle to the corresponding class pointer */
929 static struct fairq_class *
930 clh_to_clp(struct fairq_if *pif, uint32_t chandle)
931 {
932 	struct fairq_class *cl;
933 	int idx;
934 
935 	if (chandle == 0)
936 		return (NULL);
937 
938 	for (idx = pif->pif_maxpri; idx >= 0; idx--)
939 		if ((cl = pif->pif_classes[idx]) != NULL &&
940 		    cl->cl_handle == chandle)
941 			return (cl);
942 
943 	return (NULL);
944 }
945 
946 #endif /* ALTQ_FAIRQ */
947