xref: /openbsd/sys/net/fq_codel.c (revision d415bd75)
1 /* $OpenBSD: fq_codel.c,v 1.15 2022/01/02 22:36:03 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * Codel - The Controlled-Delay Active Queue Management algorithm
21  * IETF draft-ietf-aqm-codel-07
22  *
23  * Based on the algorithm by Kathleen Nichols and Van Jacobson with
24  * improvements from Dave Taht and Eric Dumazet.
25  */
26 
27 /*
28  * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
29  * IETF draft-ietf-aqm-fq-codel-06
30  *
31  * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
32  * Internet Architectures, Swinburne University of Technology, Melbourne,
33  * Australia.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/socket.h>
39 #include <sys/mbuf.h>
40 #include <sys/queue.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/pfvar.h>
45 #include <net/fq_codel.h>
46 
47 /* #define FQCODEL_DEBUG 1 */
48 
49 #ifdef FQCODEL_DEBUG
50 #define DPRINTF(x...)		printf(x)
51 #else
52 #define DPRINTF(x...)
53 #endif
54 
55 struct codel {
56 	struct mbuf_list q;
57 
58 	unsigned int	 dropping:1;	/* Dropping state */
59 	unsigned int	 backlog:31;	/* Number of bytes in the queue */
60 
61 	unsigned short	 drops;		/* Free running counter of drops */
62 	unsigned short	 ldrops;	/* Value from the previous run */
63 
64 	int64_t		 start;		/* The moment queue was above target */
65 	int64_t		 next;		/* Next interval */
66 	int64_t		 delay;		/* Delay incurred by the last packet */
67 };
68 
69 struct codel_params {
70 	int64_t		 target;
71 	int64_t		 interval;
72 	int		 quantum;
73 
74 	uint32_t	*intervals;
75 };
76 
77 void		 codel_initparams(struct codel_params *, unsigned int,
78 		    unsigned int, int);
79 void		 codel_freeparams(struct codel_params *);
80 void		 codel_enqueue(struct codel *, int64_t, struct mbuf *);
81 struct mbuf	*codel_dequeue(struct codel *, struct codel_params *, int64_t,
82 		    struct mbuf_list *, uint64_t *, uint64_t *);
83 struct mbuf	*codel_commit(struct codel *, struct mbuf *);
84 void		 codel_purge(struct codel *, struct mbuf_list *ml);
85 
86 struct flow {
87 	struct codel		 cd;
88 	int			 active:1;
89 	int			 deficit:31;
90 #ifdef FQCODEL_DEBUG
91 	uint16_t		 id;
92 #endif
93 	SIMPLEQ_ENTRY(flow)	 flowentry;
94 };
95 SIMPLEQ_HEAD(flowq, flow);
96 
97 struct fqcodel {
98 	struct flowq		 newq;
99 	struct flowq		 oldq;
100 
101 	struct flow		*flows;
102 	unsigned int		 qlength;
103 
104 	struct ifnet		*ifp;
105 
106 	struct codel_params	 cparams;
107 
108 	unsigned int		 nflows;
109 	unsigned int		 qlimit;
110 	int			 quantum;
111 
112 	unsigned int		 flags;
113 #define FQCF_FIXED_QUANTUM	  0x1
114 
115 	/* stats */
116 	struct fqcodel_pktcntr   xmit_cnt;
117 	struct fqcodel_pktcntr 	 drop_cnt;
118 };
119 
120 unsigned int	 fqcodel_idx(unsigned int, const struct mbuf *);
121 void		*fqcodel_alloc(unsigned int, void *);
122 void		 fqcodel_free(unsigned int, void *);
123 struct mbuf	*fqcodel_if_enq(struct ifqueue *, struct mbuf *);
124 struct mbuf	*fqcodel_if_deq_begin(struct ifqueue *, void **);
125 void		 fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
126 void		 fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
127 
128 struct mbuf	*fqcodel_enq(struct fqcodel *, struct mbuf *);
129 struct mbuf	*fqcodel_deq_begin(struct fqcodel *, void **,
130 		    struct mbuf_list *);
131 void		 fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
132 void		 fqcodel_purge(struct fqcodel *, struct mbuf_list *);
133 
134 /*
135  * ifqueue glue.
136  */
137 
138 static const struct ifq_ops fqcodel_ops = {
139 	fqcodel_idx,
140 	fqcodel_if_enq,
141 	fqcodel_if_deq_begin,
142 	fqcodel_if_deq_commit,
143 	fqcodel_if_purge,
144 	fqcodel_alloc,
145 	fqcodel_free
146 };
147 
148 const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
149 
150 void		*fqcodel_pf_alloc(struct ifnet *);
151 int		 fqcodel_pf_addqueue(void *, struct pf_queuespec *);
152 void		 fqcodel_pf_free(void *);
153 int		 fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
154 unsigned int	 fqcodel_pf_qlength(void *);
155 struct mbuf *	 fqcodel_pf_enqueue(void *, struct mbuf *);
156 struct mbuf *	 fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
157 void		 fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
158 void		 fqcodel_pf_purge(void *, struct mbuf_list *);
159 
160 /*
161  * pf queue glue.
162  */
163 
164 static const struct pfq_ops fqcodel_pf_ops = {
165 	fqcodel_pf_alloc,
166 	fqcodel_pf_addqueue,
167 	fqcodel_pf_free,
168 	fqcodel_pf_qstats,
169 	fqcodel_pf_qlength,
170 	fqcodel_pf_enqueue,
171 	fqcodel_pf_deq_begin,
172 	fqcodel_pf_deq_commit,
173 	fqcodel_pf_purge
174 };
175 
176 const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
177 
178 /* Default aggregate queue depth */
179 static const unsigned int fqcodel_qlimit = 1024;
180 
181 /*
182  * CoDel implementation
183  */
184 
185 /* Delay target, 5ms */
186 static const int64_t codel_target = 5000000;
187 
188 /* Grace period after last drop, 16 100ms intervals */
189 static const int64_t codel_grace = 1600000000;
190 
191 /* First 399 "100 / sqrt(x)" intervals, ns precision */
192 static const uint32_t codel_intervals[] = {
193 	100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
194 	35355339,  33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
195 	25819889,  25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
196 	21320072,  20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
197 	18569534,  18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
198 	16666667,  16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
199 	15249857,  15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
200 	14142136,  14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
201 	13245324,  13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
202 	12500000,  12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
203 	11867817,  11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
204 	11322770,  11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
205 	10846523,  10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
206 	10425721,  10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
207 	10050378,  10000000, 9950372,  9901475,  9853293,  9805807,  9759001,
208 	9712859,   9667365,  9622504,  9578263,  9534626,  9491580,  9449112,
209 	9407209,   9365858,  9325048,  9284767,  9245003,  9205746,  9166985,
210 	9128709,   9090909,  9053575,  9016696,  8980265,  8944272,  8908708,
211 	8873565,   8838835,  8804509,  8770580,  8737041,  8703883,  8671100,
212 	8638684,   8606630,  8574929,  8543577,  8512565,  8481889,  8451543,
213 	8421519,   8391814,  8362420,  8333333,  8304548,  8276059,  8247861,
214 	8219949,   8192319,  8164966,  8137885,  8111071,  8084521,  8058230,
215 	8032193,   8006408,  7980869,  7955573,  7930516,  7905694,  7881104,
216 	7856742,   7832604,  7808688,  7784989,  7761505,  7738232,  7715167,
217 	7692308,   7669650,  7647191,  7624929,  7602859,  7580980,  7559289,
218 	7537784,   7516460,  7495317,  7474351,  7453560,  7432941,  7412493,
219 	7392213,   7372098,  7352146,  7332356,  7312724,  7293250,  7273930,
220 	7254763,   7235746,  7216878,  7198158,  7179582,  7161149,  7142857,
221 	7124705,   7106691,  7088812,  7071068,  7053456,  7035975,  7018624,
222 	7001400,   6984303,  6967330,  6950480,  6933752,  6917145,  6900656,
223 	6884284,   6868028,  6851887,  6835859,  6819943,  6804138,  6788442,
224 	6772855,   6757374,  6741999,  6726728,  6711561,  6696495,  6681531,
225 	6666667,   6651901,  6637233,  6622662,  6608186,  6593805,  6579517,
226 	6565322,   6551218,  6537205,  6523281,  6509446,  6495698,  6482037,
227 	6468462,   6454972,  6441566,  6428243,  6415003,  6401844,  6388766,
228 	6375767,   6362848,  6350006,  6337243,  6324555,  6311944,  6299408,
229 	6286946,   6274558,  6262243,  6250000,  6237829,  6225728,  6213698,
230 	6201737,   6189845,  6178021,  6166264,  6154575,  6142951,  6131393,
231 	6119901,   6108472,  6097108,  6085806,  6074567,  6063391,  6052275,
232 	6041221,   6030227,  6019293,  6008418,  5997601,  5986843,  5976143,
233 	5965500,   5954913,  5944383,  5933908,  5923489,  5913124,  5902813,
234 	5892557,   5882353,  5872202,  5862104,  5852057,  5842062,  5832118,
235 	5822225,   5812382,  5802589,  5792844,  5783149,  5773503,  5763904,
236 	5754353,   5744850,  5735393,  5725983,  5716620,  5707301,  5698029,
237 	5688801,   5679618,  5670480,  5661385,  5652334,  5643326,  5634362,
238 	5625440,   5616560,  5607722,  5598925,  5590170,  5581456,  5572782,
239 	5564149,   5555556,  5547002,  5538488,  5530013,  5521576,  5513178,
240 	5504819,   5496497,  5488213,  5479966,  5471757,  5463584,  5455447,
241 	5447347,   5439283,  5431254,  5423261,  5415304,  5407381,  5399492,
242 	5391639,   5383819,  5376033,  5368281,  5360563,  5352877,  5345225,
243 	5337605,   5330018,  5322463,  5314940,  5307449,  5299989,  5292561,
244 	5285164,   5277798,  5270463,  5263158,  5255883,  5248639,  5241424,
245 	5234239,   5227084,  5219958,  5212860,  5205792,  5198752,  5191741,
246 	5184758,   5177804,  5170877,  5163978,  5157106,  5150262,  5143445,
247 	5136655,   5129892,  5123155,  5116445,  5109761,  5103104,  5096472,
248 	5089866,   5083286,  5076731,  5070201,  5063697,  5057217,  5050763,
249 	5044333,   5037927,  5031546,  5025189,  5018856,  5012547,  5006262
250 };
251 
252 void
253 codel_initparams(struct codel_params *cp, unsigned int target,
254     unsigned int interval, int quantum)
255 {
256 	uint64_t mult;
257 	unsigned int i;
258 
259 	/*
260 	 * Update observation intervals table according to the configured
261 	 * initial interval value.
262 	 */
263 	if (interval > codel_intervals[0]) {
264 		/* Select either specified target or 5% of an interval */
265 		cp->target = MAX(target, interval / 5);
266 		cp->interval = interval;
267 
268 		/* The coefficient is scaled up by a 1000 */
269 		mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
270 
271 		/* Prepare table of intervals */
272 		cp->intervals = mallocarray(nitems(codel_intervals),
273 		    sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
274 		for (i = 0; i < nitems(codel_intervals); i++)
275 			cp->intervals[i] = ((uint64_t)codel_intervals[i] *
276 			    mult) / 1000;
277 	} else {
278 		cp->target = MAX(target, codel_target);
279 		cp->interval = codel_intervals[0];
280 		cp->intervals = (uint32_t *)codel_intervals;
281 	}
282 
283 	cp->quantum = quantum;
284 }
285 
286 void
287 codel_freeparams(struct codel_params *cp)
288 {
289 	if (cp->intervals != codel_intervals)
290 		free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
291 		    sizeof(codel_intervals[0]));
292 	cp->intervals = NULL;
293 }
294 
295 static inline void
296 codel_gettime(int64_t *now)
297 {
298 	struct timespec tv;
299 
300 	nanouptime(&tv);
301 	*now = tv.tv_sec * 1000000000LL + tv.tv_nsec;
302 }
303 
304 static inline unsigned int
305 codel_backlog(struct codel *cd)
306 {
307 	return (cd->backlog);
308 }
309 
310 static inline unsigned int
311 codel_qlength(struct codel *cd)
312 {
313 	return (ml_len(&cd->q));
314 }
315 
316 static inline int64_t
317 codel_delay(struct codel *cd)
318 {
319 	return (cd->delay);
320 }
321 
322 void
323 codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
324 {
325 	m->m_pkthdr.ph_timestamp = now;
326 
327 	ml_enqueue(&cd->q, m);
328 	cd->backlog += m->m_pkthdr.len;
329 }
330 
331 /*
332  * Select the next interval according to the number of drops
333  * in the current one relative to the provided timestamp.
334  */
335 static inline void
336 control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
337 {
338 	unsigned int idx;
339 
340 	idx = min(cd->drops, nitems(codel_intervals) - 1);
341 	cd->next = rts + cp->intervals[idx];
342 }
343 
344 /*
345  * Pick the next enqueued packet and determine the queueing delay
346  * as well as whether or not it's a good candidate for dropping
347  * from the queue.
348  *
349  * The decision whether to drop the packet or not is made based
350  * on the queueing delay target of 5ms and on the current queue
351  * length in bytes which shouldn't be less than the amount of data
352  * that arrives in a typical interarrival time (MTU-sized packets
353  * arriving spaced by the amount of time it takes to send such a
354  * packet on the bottleneck).
355  */
356 static inline struct mbuf *
357 codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
358     int *drop)
359 {
360 	struct mbuf *m;
361 
362 	*drop = 0;
363 
364 	m = MBUF_LIST_FIRST(&cd->q);
365 	if (m == NULL) {
366 		KASSERT(cd->backlog == 0);
367 		/* Empty queue, reset interval */
368 		cd->start = 0;
369 		return (NULL);
370 	}
371 
372 	if (now - m->m_pkthdr.ph_timestamp < cp->target ||
373 	    cd->backlog <= cp->quantum) {
374 		/*
375 		 * The minimum delay decreased below the target, reset
376 		 * the current observation interval.
377 		 */
378 		cd->start = 0;
379 		return (m);
380 	}
381 
382 	if (cd->start == 0) {
383 		/*
384 		 * This is the first packet to be delayed for more than
385 		 * the target, start the first observation interval after
386 		 * a single RTT and see if the minimum delay goes below
387 		 * the target within the interval, otherwise punish the
388 		 * next packet.
389 		 */
390 		cd->start = now + cp->interval;
391 	} else if (now > cd->start) {
392 		*drop = 1;
393 	}
394 	return (m);
395 }
396 
397 enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
398 
399 static inline int
400 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
401     int state)
402 {
403 	if (state == FIRSTDROP)
404 		return (ACCEPTING);
405 
406 	if (cd->dropping) {
407 		if (!drop)
408 			return (RECOVERY);
409 		else if (now >= cd->next)
410 			return (state == DROPPING ? CONTROL : DROPPING);
411 	} else if (drop)
412 		return (FIRSTDROP);
413 
414 	if (m == NULL)
415 		return (RECOVERY);
416 
417 	return (ACCEPTING);
418 }
419 
420 struct mbuf *
421 codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
422     struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
423 {
424 	struct mbuf *m;
425 	unsigned short delta;
426 	int drop, state, done = 0;
427 
428 	state = INITIAL;
429 
430 	while (!done) {
431 		m = codel_next_packet(cd, cp, now, &drop);
432 		state = codel_state_change(cd, now, m, drop, state);
433 
434 		switch (state) {
435 		case FIRSTDROP:
436 			m = codel_commit(cd, m);
437 			ml_enqueue(free_ml, m);
438 
439 			*dpkts += 1;
440 			*dbytes += m->m_pkthdr.len;
441 
442 			cd->dropping = 1;
443 
444 			/*
445 			 * If we're still within the grace period and not
446 			 * meeting our minimal delay target we treat this
447 			 * as a continuation of the previous observation
448 			 * interval and shrink it further.  Otherwise we
449 			 * start from the initial one.
450 			 */
451 			delta = cd->drops - cd->ldrops;
452 			if (delta > 1) {
453 				if (now < cd->next ||
454 				    now - cd->next < codel_grace)
455 					cd->drops = delta;
456 				else
457 					cd->drops = 1;
458 			} else
459 				cd->drops = 1;
460 			control_law(cd, cp, now);
461 			cd->ldrops = cd->drops;
462 
463 			/* fetches the next packet and goes to ACCEPTING */
464 			break;
465 		case DROPPING:
466 			m = codel_commit(cd, m);
467 			ml_enqueue(free_ml, m);
468 			cd->drops++;
469 
470 			*dpkts += 1;
471 			*dbytes += m->m_pkthdr.len;
472 
473 			/* fetches the next packet and goes to CONTROL */
474 			break;
475 		case CONTROL:
476 			if (drop) {
477 				control_law(cd, cp, cd->next);
478 				continue;
479 			}
480 			/* FALLTHROUGH */
481 		case RECOVERY:
482 			cd->dropping = 0;
483 			/* FALLTHROUGH */
484 		case ACCEPTING:
485 			done = 1;
486 			break;
487 		}
488 	}
489 
490 	if (m != NULL)
491 		cd->delay = now - m->m_pkthdr.ph_timestamp;
492 
493 	return (m);
494 }
495 
496 struct mbuf *
497 codel_commit(struct codel *cd, struct mbuf *m)
498 {
499 	struct mbuf *n;
500 
501 	n = ml_dequeue(&cd->q);
502 	if (m)
503 		KASSERT(n == m);
504 	KASSERT(n != NULL);
505 	KASSERT(cd->backlog >= n->m_pkthdr.len);
506 	cd->backlog -= n->m_pkthdr.len;
507 	return (n);
508 }
509 
510 void
511 codel_purge(struct codel *cd, struct mbuf_list *ml)
512 {
513 	ml_enlist(ml, &cd->q);
514 	cd->backlog = 0;
515 }
516 
517 /*
518  * FQ-CoDel implementation
519  */
520 
521 static inline struct flow *
522 classify_flow(struct fqcodel *fqc, struct mbuf *m)
523 {
524 	unsigned int index = 0;
525 
526 	if (m->m_pkthdr.csum_flags & M_FLOWID)
527 		index = m->m_pkthdr.ph_flowid % fqc->nflows;
528 
529 	DPRINTF("%s: %u\n", __func__, index);
530 
531 	return (&fqc->flows[index]);
532 }
533 
534 struct mbuf *
535 fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
536 {
537 	struct flow *flow;
538 	unsigned int backlog = 0;
539 	int64_t now;
540 	int i;
541 
542 	flow = classify_flow(fqc, m);
543 	if (flow == NULL)
544 		return (m);
545 
546 	codel_gettime(&now);
547 	codel_enqueue(&flow->cd, now, m);
548 	fqc->qlength++;
549 
550 	if (!flow->active) {
551 		SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
552 		flow->deficit = fqc->quantum;
553 		flow->active = 1;
554 		DPRINTF("%s: flow %u active deficit %d\n", __func__,
555 		    flow->id, flow->deficit);
556 	}
557 
558 	/*
559 	 * Check the limit for all queues and remove a packet
560 	 * from the longest one.
561 	 */
562 	if (fqc->qlength >= fqcodel_qlimit) {
563 		for (i = 0; i < fqc->nflows; i++) {
564 			if (codel_backlog(&fqc->flows[i].cd) > backlog) {
565 				flow = &fqc->flows[i];
566 				backlog = codel_backlog(&flow->cd);
567 			}
568 		}
569 
570 		KASSERT(flow != NULL);
571 		m = codel_commit(&flow->cd, NULL);
572 
573 		fqc->drop_cnt.packets++;
574 		fqc->drop_cnt.bytes += m->m_pkthdr.len;
575 
576 		fqc->qlength--;
577 
578 		DPRINTF("%s: dropping from flow %u\n", __func__,
579 		    flow->id);
580 		return (m);
581 	}
582 
583 	return (NULL);
584 }
585 
586 static inline struct flowq *
587 select_queue(struct fqcodel *fqc)
588 {
589 	struct flowq *fq = NULL;
590 
591 	if (!SIMPLEQ_EMPTY(&fqc->newq))
592 		fq = &fqc->newq;
593 	else if (!SIMPLEQ_EMPTY(&fqc->oldq))
594 		fq = &fqc->oldq;
595 	return (fq);
596 }
597 
598 static inline struct flow *
599 first_flow(struct fqcodel *fqc, struct flowq **fq)
600 {
601 	struct flow *flow;
602 
603 	while ((*fq = select_queue(fqc)) != NULL) {
604 		while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
605 			if (flow->deficit <= 0) {
606 				flow->deficit += fqc->quantum;
607 				SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
608 				SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
609 				    flowentry);
610 				DPRINTF("%s: flow %u deficit %d\n", __func__,
611 				    flow->id, flow->deficit);
612 			} else
613 				return (flow);
614 		}
615 	}
616 
617 	return (NULL);
618 }
619 
620 static inline struct flow *
621 next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
622 {
623 	SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
624 
625 	if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
626 		/* A packet was dropped, starve the queue */
627 		SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
628 		DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
629 		    flow->id, flow->deficit);
630 	} else {
631 		/* A packet was dropped on a starved queue, disable it */
632 		flow->active = 0;
633 		DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
634 		    flow->id, flow->deficit);
635 	}
636 
637 	return (first_flow(fqc, fq));
638 }
639 
640 struct mbuf *
641 fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
642     struct mbuf_list *free_ml)
643 {
644 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
645 	struct flowq *fq;
646 	struct flow *flow;
647 	struct mbuf *m;
648 	int64_t now;
649 
650 	if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
651 		fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
652 
653 	codel_gettime(&now);
654 
655 	for (flow = first_flow(fqc, &fq); flow != NULL;
656 	     flow = next_flow(fqc, flow, &fq)) {
657 		m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
658 		    &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
659 
660 		KASSERT(fqc->qlength >= ml_len(&ml));
661 		fqc->qlength -= ml_len(&ml);
662 
663 		ml_enlist(free_ml, &ml);
664 
665 		if (m != NULL) {
666 			flow->deficit -= m->m_pkthdr.len;
667 			DPRINTF("%s: flow %u deficit %d\n", __func__,
668 			    flow->id, flow->deficit);
669 			*cookiep = flow;
670 			return (m);
671 		}
672 	}
673 
674 	return (NULL);
675 }
676 
677 void
678 fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
679 {
680 	struct flow *flow = cookie;
681 
682 	KASSERT(fqc->qlength > 0);
683 	fqc->qlength--;
684 
685 	fqc->xmit_cnt.packets++;
686 	fqc->xmit_cnt.bytes += m->m_pkthdr.len;
687 
688 	(void)codel_commit(&flow->cd, m);
689 }
690 
691 void
692 fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
693 {
694 	unsigned int i;
695 
696 	for (i = 0; i < fqc->nflows; i++)
697 		codel_purge(&fqc->flows[i].cd, ml);
698 	fqc->qlength = 0;
699 }
700 
701 struct mbuf *
702 fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
703 {
704 	return fqcodel_enq(ifq->ifq_q, m);
705 }
706 
707 struct mbuf *
708 fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
709 {
710 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
711 	struct mbuf *m;
712 
713 	m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
714 	ifq_mfreeml(ifq, &free_ml);
715 	return (m);
716 }
717 
718 void
719 fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
720 {
721 	return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
722 }
723 
724 void
725 fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
726 {
727 	return fqcodel_purge(ifq->ifq_q, ml);
728 }
729 
730 void *
731 fqcodel_pf_alloc(struct ifnet *ifp)
732 {
733 	struct fqcodel *fqc;
734 
735 	fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
736 
737 	SIMPLEQ_INIT(&fqc->newq);
738 	SIMPLEQ_INIT(&fqc->oldq);
739 
740 	return (fqc);
741 }
742 
743 int
744 fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
745 {
746 	struct ifnet *ifp = qs->kif->pfik_ifp;
747 	struct fqcodel *fqc = arg;
748 
749 	if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff)
750 		return (EINVAL);
751 
752 	fqc->nflows = qs->flowqueue.flows;
753 	fqc->quantum = qs->flowqueue.quantum;
754 	if (qs->qlimit > 0)
755 		fqc->qlimit = qs->qlimit;
756 	else
757 		fqc->qlimit = fqcodel_qlimit;
758 	if (fqc->quantum > 0)
759 		fqc->flags |= FQCF_FIXED_QUANTUM;
760 	else
761 		fqc->quantum = ifp->if_mtu + max_linkhdr;
762 
763 	codel_initparams(&fqc->cparams, qs->flowqueue.target,
764 	    qs->flowqueue.interval, fqc->quantum);
765 
766 	fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
767 	    M_DEVBUF, M_WAITOK | M_ZERO);
768 
769 #ifdef FQCODEL_DEBUG
770 	{
771 		unsigned int i;
772 
773 		for (i = 0; i < fqc->nflows; i++)
774 			fqc->flows[i].id = i;
775 	}
776 #endif
777 
778 	fqc->ifp = ifp;
779 
780 	DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
781 	    "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
782 	    fqc->quantum, fqc->cparams.target / 1000000,
783 	    fqc->cparams.interval / 1000000);
784 
785 	return (0);
786 }
787 
788 void
789 fqcodel_pf_free(void *arg)
790 {
791 	struct fqcodel *fqc = arg;
792 
793 	codel_freeparams(&fqc->cparams);
794 	free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
795 	free(fqc, M_DEVBUF, sizeof(struct fqcodel));
796 }
797 
798 int
799 fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
800 {
801 	struct ifnet *ifp = qs->kif->pfik_ifp;
802 	struct fqcodel_stats stats;
803 	struct fqcodel *fqc;
804 	int64_t delay;
805 	unsigned int i;
806 	int error = 0;
807 
808 	if (ifp == NULL)
809 		return (EBADF);
810 
811 	if (*nbytes < sizeof(stats))
812 		return (EINVAL);
813 
814 	memset(&stats, 0, sizeof(stats));
815 
816 	/* XXX: multi-q? */
817 	fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
818 	if (fqc == NULL)
819 		return (EBADF);
820 
821 	stats.xmit_cnt = fqc->xmit_cnt;
822 	stats.drop_cnt = fqc->drop_cnt;
823 
824 	stats.qlength = ifq_len(&ifp->if_snd);
825 	stats.qlimit = fqc->qlimit;
826 
827 	stats.flows = 0;
828 	stats.delaysum = stats.delaysumsq = 0;
829 
830 	for (i = 0; i < fqc->nflows; i++) {
831 		if (codel_qlength(&fqc->flows[i].cd) == 0)
832 			continue;
833 		/* Scale down to microseconds to avoid overflows */
834 		delay = codel_delay(&fqc->flows[i].cd) / 1000;
835 		stats.delaysum += delay;
836 		stats.delaysumsq += delay * delay;
837 		stats.flows++;
838 	}
839 
840 	ifq_q_leave(&ifp->if_snd, fqc);
841 
842 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
843 		return (error);
844 
845 	*nbytes = sizeof(stats);
846 	return (0);
847 }
848 
849 unsigned int
850 fqcodel_pf_qlength(void *fqc)
851 {
852 	return ((struct fqcodel *)fqc)->qlength;
853 }
854 
855 struct mbuf *
856 fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
857 {
858 	return fqcodel_enq(fqc, m);
859 }
860 
861 struct mbuf *
862 fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
863 {
864 	return fqcodel_deq_begin(fqc, cookiep, free_ml);
865 }
866 
867 void
868 fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
869 {
870 	return fqcodel_deq_commit(fqc, m, cookie);
871 }
872 
873 void
874 fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
875 {
876 	return fqcodel_purge(fqc, ml);
877 }
878 
879 unsigned int
880 fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
881 {
882 	return (0);
883 }
884 
885 void *
886 fqcodel_alloc(unsigned int idx, void *arg)
887 {
888 	/* Allocation is done in fqcodel_pf_alloc */
889 	return (arg);
890 }
891 
892 void
893 fqcodel_free(unsigned int idx, void *arg)
894 {
895 	/* nothing to do here */
896 }
897