xref: /openbsd/sys/net/fq_codel.c (revision e5dd7070)
1 /* $OpenBSD: fq_codel.c,v 1.13 2020/06/18 23:29:59 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * Codel - The Controlled-Delay Active Queue Management algorithm
21  * IETF draft-ietf-aqm-codel-07
22  *
23  * Based on the algorithm by Kathleen Nichols and Van Jacobson with
24  * improvements from Dave Taht and Eric Dumazet.
25  */
26 
27 /*
28  * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
29  * IETF draft-ietf-aqm-fq-codel-06
30  *
31  * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
32  * Internet Architectures, Swinburne University of Technology, Melbourne,
33  * Australia.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/socket.h>
39 #include <sys/mbuf.h>
40 #include <sys/queue.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/pfvar.h>
45 #include <net/fq_codel.h>
46 
47 /* #define FQCODEL_DEBUG 1 */
48 
49 #ifdef FQCODEL_DEBUG
50 #define DPRINTF(x...)		printf(x)
51 #else
52 #define DPRINTF(x...)
53 #endif
54 
55 struct codel {
56 	struct mbuf_list q;
57 
58 	unsigned int	 dropping:1;	/* Dropping state */
59 	unsigned int	 backlog:31;	/* Number of bytes in the queue */
60 
61 	unsigned short	 drops;		/* Free running counter of drops */
62 	unsigned short	 ldrops;	/* Value from the previous run */
63 
64 	int64_t		 start;		/* The moment queue was above target */
65 	int64_t		 next;		/* Next interval */
66 	int64_t		 delay;		/* Delay incurred by the last packet */
67 };
68 
69 struct codel_params {
70 	int64_t		 target;
71 	int64_t		 interval;
72 	int		 quantum;
73 
74 	uint32_t	*intervals;
75 };
76 
77 void		 codel_initparams(struct codel_params *, unsigned int,
78 		    unsigned int, int);
79 void		 codel_freeparams(struct codel_params *);
80 void		 codel_enqueue(struct codel *, int64_t, struct mbuf *);
81 struct mbuf	*codel_dequeue(struct codel *, struct codel_params *, int64_t,
82 		    struct mbuf_list *, uint64_t *, uint64_t *);
83 struct mbuf	*codel_commit(struct codel *, struct mbuf *);
84 void		 codel_purge(struct codel *, struct mbuf_list *ml);
85 
86 struct flow {
87 	struct codel		 cd;
88 	int			 active:1;
89 	int			 deficit:31;
90 #ifdef FQCODEL_DEBUG
91 	uint16_t		 id;
92 #endif
93 	SIMPLEQ_ENTRY(flow)	 flowentry;
94 };
95 SIMPLEQ_HEAD(flowq, flow);
96 
97 struct fqcodel {
98 	struct flowq		 newq;
99 	struct flowq		 oldq;
100 
101 	struct flow		*flows;
102 	unsigned int		 qlength;
103 
104 	struct ifnet		*ifp;
105 
106 	struct codel_params	 cparams;
107 
108 	unsigned int		 nflows;
109 	unsigned int		 qlimit;
110 	int			 quantum;
111 
112 	unsigned int		 flags;
113 #define FQCF_FIXED_QUANTUM	  0x1
114 
115 	/* stats */
116 	struct fqcodel_pktcntr   xmit_cnt;
117 	struct fqcodel_pktcntr 	 drop_cnt;
118 };
119 
120 unsigned int	 fqcodel_idx(unsigned int, const struct mbuf *);
121 void		*fqcodel_alloc(unsigned int, void *);
122 void		 fqcodel_free(unsigned int, void *);
123 struct mbuf	*fqcodel_if_enq(struct ifqueue *, struct mbuf *);
124 struct mbuf	*fqcodel_if_deq_begin(struct ifqueue *, void **);
125 void		 fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
126 void		 fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
127 
128 struct mbuf	*fqcodel_enq(struct fqcodel *, struct mbuf *);
129 struct mbuf	*fqcodel_deq_begin(struct fqcodel *, void **,
130 		    struct mbuf_list *);
131 void		 fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
132 void		 fqcodel_purge(struct fqcodel *, struct mbuf_list *);
133 
134 /*
135  * ifqueue glue.
136  */
137 
138 static const struct ifq_ops fqcodel_ops = {
139 	fqcodel_idx,
140 	fqcodel_if_enq,
141 	fqcodel_if_deq_begin,
142 	fqcodel_if_deq_commit,
143 	fqcodel_if_purge,
144 	fqcodel_alloc,
145 	fqcodel_free
146 };
147 
148 const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
149 
150 void		*fqcodel_pf_alloc(struct ifnet *);
151 int		 fqcodel_pf_addqueue(void *, struct pf_queuespec *);
152 void		 fqcodel_pf_free(void *);
153 int		 fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
154 unsigned int	 fqcodel_pf_qlength(void *);
155 struct mbuf *	 fqcodel_pf_enqueue(void *, struct mbuf *);
156 struct mbuf *	 fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
157 void		 fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
158 void		 fqcodel_pf_purge(void *, struct mbuf_list *);
159 
160 /*
161  * pf queue glue.
162  */
163 
164 static const struct pfq_ops fqcodel_pf_ops = {
165 	fqcodel_pf_alloc,
166 	fqcodel_pf_addqueue,
167 	fqcodel_pf_free,
168 	fqcodel_pf_qstats,
169 	fqcodel_pf_qlength,
170 	fqcodel_pf_enqueue,
171 	fqcodel_pf_deq_begin,
172 	fqcodel_pf_deq_commit,
173 	fqcodel_pf_purge
174 };
175 
176 const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
177 
178 /* Default aggregate queue depth */
179 static const unsigned int fqcodel_qlimit = 1024;
180 
181 /*
182  * CoDel implementation
183  */
184 
185 /* Delay target, 5ms */
186 static const int64_t codel_target = 5000000;
187 
188 /* Grace period after last drop, 16 100ms intervals */
189 static const int64_t codel_grace = 1600000000;
190 
191 /* First 399 "100 / sqrt(x)" intervarls, ns precision */
192 static const uint32_t codel_intervals[] = {
193 	100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
194 	35355339,  33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
195 	25819889,  25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
196 	21320072,  20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
197 	18569534,  18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
198 	16666667,  16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
199 	15249857,  15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
200 	14142136,  14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
201 	13245324,  13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
202 	12500000,  12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
203 	11867817,  11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
204 	11322770,  11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
205 	10846523,  10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
206 	10425721,  10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
207 	10050378,  10000000, 9950372,  9901475,  9853293,  9805807,  9759001,
208 	9712859,   9667365,  9622504,  9578263,  9534626,  9491580,  9449112,
209 	9407209,   9365858,  9325048,  9284767,  9245003,  9205746,  9166985,
210 	9128709,   9090909,  9053575,  9016696,  8980265,  8944272,  8908708,
211 	8873565,   8838835,  8804509,  8770580,  8737041,  8703883,  8671100,
212 	8638684,   8606630,  8574929,  8543577,  8512565,  8481889,  8451543,
213 	8421519,   8391814,  8362420,  8333333,  8304548,  8276059,  8247861,
214 	8219949,   8192319,  8164966,  8137885,  8111071,  8084521,  8058230,
215 	8032193,   8006408,  7980869,  7955573,  7930516,  7905694,  7881104,
216 	7856742,   7832604,  7808688,  7784989,  7761505,  7738232,  7715167,
217 	7692308,   7669650,  7647191,  7624929,  7602859,  7580980,  7559289,
218 	7537784,   7516460,  7495317,  7474351,  7453560,  7432941,  7412493,
219 	7392213,   7372098,  7352146,  7332356,  7312724,  7293250,  7273930,
220 	7254763,   7235746,  7216878,  7198158,  7179582,  7161149,  7142857,
221 	7124705,   7106691,  7088812,  7071068,  7053456,  7035975,  7018624,
222 	7001400,   6984303,  6967330,  6950480,  6933752,  6917145,  6900656,
223 	6884284,   6868028,  6851887,  6835859,  6819943,  6804138,  6788442,
224 	6772855,   6757374,  6741999,  6726728,  6711561,  6696495,  6681531,
225 	6666667,   6651901,  6637233,  6622662,  6608186,  6593805,  6579517,
226 	6565322,   6551218,  6537205,  6523281,  6509446,  6495698,  6482037,
227 	6468462,   6454972,  6441566,  6428243,  6415003,  6401844,  6388766,
228 	6375767,   6362848,  6350006,  6337243,  6324555,  6311944,  6299408,
229 	6286946,   6274558,  6262243,  6250000,  6237829,  6225728,  6213698,
230 	6201737,   6189845,  6178021,  6166264,  6154575,  6142951,  6131393,
231 	6119901,   6108472,  6097108,  6085806,  6074567,  6063391,  6052275,
232 	6041221,   6030227,  6019293,  6008418,  5997601,  5986843,  5976143,
233 	5965500,   5954913,  5944383,  5933908,  5923489,  5913124,  5902813,
234 	5892557,   5882353,  5872202,  5862104,  5852057,  5842062,  5832118,
235 	5822225,   5812382,  5802589,  5792844,  5783149,  5773503,  5763904,
236 	5754353,   5744850,  5735393,  5725983,  5716620,  5707301,  5698029,
237 	5688801,   5679618,  5670480,  5661385,  5652334,  5643326,  5634362,
238 	5625440,   5616560,  5607722,  5598925,  5590170,  5581456,  5572782,
239 	5564149,   5555556,  5547002,  5538488,  5530013,  5521576,  5513178,
240 	5504819,   5496497,  5488213,  5479966,  5471757,  5463584,  5455447,
241 	5447347,   5439283,  5431254,  5423261,  5415304,  5407381,  5399492,
242 	5391639,   5383819,  5376033,  5368281,  5360563,  5352877,  5345225,
243 	5337605,   5330018,  5322463,  5314940,  5307449,  5299989,  5292561,
244 	5285164,   5277798,  5270463,  5263158,  5255883,  5248639,  5241424,
245 	5234239,   5227084,  5219958,  5212860,  5205792,  5198752,  5191741,
246 	5184758,   5177804,  5170877,  5163978,  5157106,  5150262,  5143445,
247 	5136655,   5129892,  5123155,  5116445,  5109761,  5103104,  5096472,
248 	5089866,   5083286,  5076731,  5070201,  5063697,  5057217,  5050763,
249 	5044333,   5037927,  5031546,  5025189,  5018856,  5012547,  5006262
250 };
251 
252 void
253 codel_initparams(struct codel_params *cp, unsigned int target,
254     unsigned int interval, int quantum)
255 {
256 	uint64_t mult;
257 	unsigned int i;
258 
259 	/*
260 	 * Update observation intervals table according to the configured
261 	 * initial interval value.
262 	 */
263 	if (interval > codel_intervals[0]) {
264 		/* Select either specified target or 5% of an interval */
265 		cp->target = MAX(target, interval / 5);
266 		cp->interval = interval;
267 
268 		/* The coefficient is scaled up by a 1000 */
269 		mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
270 
271 		/* Prepare table of intervals */
272 		cp->intervals = mallocarray(nitems(codel_intervals),
273 		    sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
274 		for (i = 0; i < nitems(codel_intervals); i++)
275 			cp->intervals[i] = ((uint64_t)codel_intervals[i] *
276 			    mult) / 1000;
277 	} else {
278 		cp->target = MAX(target, codel_target);
279 		cp->interval = codel_intervals[0];
280 		cp->intervals = (uint32_t *)codel_intervals;
281 	}
282 
283 	cp->quantum = quantum;
284 }
285 
286 void
287 codel_freeparams(struct codel_params *cp)
288 {
289 	if (cp->intervals != codel_intervals)
290 		free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
291 		    sizeof(codel_intervals[0]));
292 	cp->intervals = NULL;
293 }
294 
295 static inline void
296 codel_gettime(int64_t *now)
297 {
298 	struct timespec tv;
299 
300 	nanouptime(&tv);
301 	*now = tv.tv_sec * 1000000000LL + tv.tv_nsec;
302 }
303 
304 static inline unsigned int
305 codel_backlog(struct codel *cd)
306 {
307 	return (cd->backlog);
308 }
309 
310 static inline unsigned int
311 codel_qlength(struct codel *cd)
312 {
313 	return (ml_len(&cd->q));
314 }
315 
316 static inline int64_t
317 codel_delay(struct codel *cd)
318 {
319 	return (cd->delay);
320 }
321 
322 void
323 codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
324 {
325 	m->m_pkthdr.ph_timestamp = now;
326 
327 	ml_enqueue(&cd->q, m);
328 	cd->backlog += m->m_pkthdr.len;
329 }
330 
331 /*
332  * Select the next interval according to the number of drops
333  * in the current one relative to the provided timestamp.
334  */
335 static inline void
336 control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
337 {
338 	unsigned int idx;
339 
340 	idx = min(cd->drops, nitems(codel_intervals) - 1);
341 	cd->next = rts + cp->intervals[idx];
342 }
343 
344 /*
345  * Pick the next enqueued packet and determine the queueing delay
346  * as well as whether or not it's a good candidate for dropping
347  * from the queue.
348  *
349  * The decision whether to drop the packet or not is made based
350  * on the queueing delay target of 5ms and on the current queue
351  * length in bytes which shouldn't be less than the amount of data
352  * that arrives in a typical interarrival time (MTU-sized packets
353  * arriving spaced by the amount of time it takes to send such a
354  * packet on the bottleneck).
355  */
356 static inline struct mbuf *
357 codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
358     int *drop)
359 {
360 	struct mbuf *m;
361 
362 	*drop = 0;
363 
364 	m = MBUF_LIST_FIRST(&cd->q);
365 	if (m == NULL) {
366 		KASSERT(cd->backlog == 0);
367 		/* Empty queue, reset interval */
368 		cd->start = 0;
369 		return (NULL);
370 	}
371 
372 	if (now - m->m_pkthdr.ph_timestamp < cp->target ||
373 	    cd->backlog <= cp->quantum) {
374 		/*
375 		 * The minimum delay decreased below the target, reset
376 		 * the current observation interval.
377 		 */
378 		cd->start = 0;
379 		return (m);
380 	}
381 
382 	if (cd->start == 0) {
383 		/*
384 		 * This is the first packet to be delayed for more than
385 		 * the target, start the first observation interval after
386 		 * a single RTT and see if the minimum delay goes below
387 		 * the target within the interval, otherwise punish the
388 		 * next packet.
389 		 */
390 		cd->start = now + cp->interval;
391 	} else if (now > cd->start) {
392 		*drop = 1;
393 	}
394 	return (m);
395 }
396 
397 enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
398 
399 static inline int
400 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
401     int state)
402 {
403 	if (state == FIRSTDROP)
404 		return (ACCEPTING);
405 
406 	if (cd->dropping) {
407 		if (!drop)
408 			return (RECOVERY);
409 		else if (now >= cd->next)
410 			return (state == DROPPING ? CONTROL : DROPPING);
411 	} else if (drop)
412 		return (FIRSTDROP);
413 
414 	if (m == NULL)
415 		return (RECOVERY);
416 
417 	return (ACCEPTING);
418 }
419 
420 struct mbuf *
421 codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
422     struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
423 {
424 	struct mbuf *m;
425 	unsigned short delta;
426 	int drop, state, done = 0;
427 
428 	state = INITIAL;
429 
430 	while (!done) {
431 		m = codel_next_packet(cd, cp, now, &drop);
432 		state = codel_state_change(cd, now, m, drop, state);
433 
434 		switch (state) {
435 		case FIRSTDROP:
436 			m = codel_commit(cd, m);
437 			ml_enqueue(free_ml, m);
438 
439 			*dpkts += 1;
440 			*dbytes += m->m_pkthdr.len;
441 
442 			cd->dropping = 1;
443 
444 			/*
445 			 * If we're still within the grace period and not
446 			 * meeting our minimal delay target we treat this
447 			 * as a continuation of the previous observation
448 			 * interval and shrink it further.  Otherwise we
449 			 * start from the initial one.
450 			 */
451 			delta = cd->drops - cd->ldrops;
452 			if (delta > 1) {
453 				if (now < cd->next ||
454 				    now - cd->next < codel_grace)
455 					cd->drops = delta;
456 				else
457 					cd->drops = 1;
458 			} else
459 				cd->drops = 1;
460 			control_law(cd, cp, now);
461 			cd->ldrops = cd->drops;
462 
463 			/* fetches the next packet and goes to ACCEPTING */
464 			break;
465 		case DROPPING:
466 			m = codel_commit(cd, m);
467 			ml_enqueue(free_ml, m);
468 			cd->drops++;
469 
470 			*dpkts += 1;
471 			*dbytes += m->m_pkthdr.len;
472 
473 			/* fetches the next packet and goes to CONTROL */
474 			break;
475 		case CONTROL:
476 			if (drop) {
477 				control_law(cd, cp, cd->next);
478 				continue;
479 			}
480 			/* FALLTHROUGH */
481 		case RECOVERY:
482 			cd->dropping = 0;
483 			/* FALLTHROUGH */
484 		case ACCEPTING:
485 			done = 1;
486 			break;
487 		}
488 	}
489 
490 	if (m != NULL)
491 		cd->delay = now - m->m_pkthdr.ph_timestamp;
492 
493 	return (m);
494 }
495 
496 struct mbuf *
497 codel_commit(struct codel *cd, struct mbuf *m)
498 {
499 	struct mbuf *n;
500 
501 	n = ml_dequeue(&cd->q);
502 	if (m)
503 		KASSERT(n == m);
504 	KASSERT(n != NULL);
505 	KASSERT(cd->backlog >= n->m_pkthdr.len);
506 	cd->backlog -= n->m_pkthdr.len;
507 	return (n);
508 }
509 
510 void
511 codel_purge(struct codel *cd, struct mbuf_list *ml)
512 {
513 	ml_enlist(ml, &cd->q);
514 	cd->backlog = 0;
515 }
516 
517 /*
518  * FQ-CoDel implementation
519  */
520 
521 static inline struct flow *
522 classify_flow(struct fqcodel *fqc, struct mbuf *m)
523 {
524 	unsigned int index;
525 
526 	if (m->m_pkthdr.csum_flags & M_FLOWID)
527 		index = m->m_pkthdr.ph_flowid % fqc->nflows;
528 	else
529 		index = arc4random_uniform(fqc->nflows);
530 
531 	DPRINTF("%s: %u\n", __func__, index);
532 
533 	return (&fqc->flows[index]);
534 }
535 
536 struct mbuf *
537 fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
538 {
539 	struct flow *flow;
540 	unsigned int backlog = 0;
541 	int64_t now;
542 	int i;
543 
544 	flow = classify_flow(fqc, m);
545 	if (flow == NULL)
546 		return (m);
547 
548 	codel_gettime(&now);
549 	codel_enqueue(&flow->cd, now, m);
550 	fqc->qlength++;
551 
552 	if (!flow->active) {
553 		SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
554 		flow->deficit = fqc->quantum;
555 		flow->active = 1;
556 		DPRINTF("%s: flow %u active deficit %d\n", __func__,
557 		    flow->id, flow->deficit);
558 	}
559 
560 	/*
561 	 * Check the limit for all queues and remove a packet
562 	 * from the longest one.
563 	 */
564 	if (fqc->qlength >= fqcodel_qlimit) {
565 		for (i = 0; i < fqc->nflows; i++) {
566 			if (codel_backlog(&fqc->flows[i].cd) > backlog) {
567 				flow = &fqc->flows[i];
568 				backlog = codel_backlog(&flow->cd);
569 			}
570 		}
571 
572 		KASSERT(flow != NULL);
573 		m = codel_commit(&flow->cd, NULL);
574 
575 		fqc->drop_cnt.packets++;
576 		fqc->drop_cnt.bytes += m->m_pkthdr.len;
577 
578 		fqc->qlength--;
579 
580 		DPRINTF("%s: dropping from flow %u\n", __func__,
581 		    flow->id);
582 		return (m);
583 	}
584 
585 	return (NULL);
586 }
587 
588 static inline struct flowq *
589 select_queue(struct fqcodel *fqc)
590 {
591 	struct flowq *fq = NULL;
592 
593 	if (!SIMPLEQ_EMPTY(&fqc->newq))
594 		fq = &fqc->newq;
595 	else if (!SIMPLEQ_EMPTY(&fqc->oldq))
596 		fq = &fqc->oldq;
597 	return (fq);
598 }
599 
600 static inline struct flow *
601 first_flow(struct fqcodel *fqc, struct flowq **fq)
602 {
603 	struct flow *flow;
604 
605 	while ((*fq = select_queue(fqc)) != NULL) {
606 		while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
607 			if (flow->deficit <= 0) {
608 				flow->deficit += fqc->quantum;
609 				SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
610 				SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
611 				    flowentry);
612 				DPRINTF("%s: flow %u deficit %d\n", __func__,
613 				    flow->id, flow->deficit);
614 			} else
615 				return (flow);
616 		}
617 	}
618 
619 	return (NULL);
620 }
621 
622 static inline struct flow *
623 next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
624 {
625 	SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
626 
627 	if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
628 		/* A packet was dropped, starve the queue */
629 		SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
630 		DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
631 		    flow->id, flow->deficit);
632 	} else {
633 		/* A packet was dropped on a starved queue, disable it */
634 		flow->active = 0;
635 		DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
636 		    flow->id, flow->deficit);
637 	}
638 
639 	return (first_flow(fqc, fq));
640 }
641 
642 struct mbuf *
643 fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
644     struct mbuf_list *free_ml)
645 {
646 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
647 	struct flowq *fq;
648 	struct flow *flow;
649 	struct mbuf *m;
650 	int64_t now;
651 
652 	if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
653 		fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
654 
655 	codel_gettime(&now);
656 
657 	for (flow = first_flow(fqc, &fq); flow != NULL;
658 	     flow = next_flow(fqc, flow, &fq)) {
659 		m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
660 		    &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
661 
662 		KASSERT(fqc->qlength >= ml_len(&ml));
663 		fqc->qlength -= ml_len(&ml);
664 
665 		ml_enlist(free_ml, &ml);
666 
667 		if (m != NULL) {
668 			flow->deficit -= m->m_pkthdr.len;
669 			DPRINTF("%s: flow %u deficit %d\n", __func__,
670 			    flow->id, flow->deficit);
671 			*cookiep = flow;
672 			return (m);
673 		}
674 	}
675 
676 	return (NULL);
677 }
678 
679 void
680 fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
681 {
682 	struct flow *flow = cookie;
683 
684 	KASSERT(fqc->qlength > 0);
685 	fqc->qlength--;
686 
687 	fqc->xmit_cnt.packets++;
688 	fqc->xmit_cnt.bytes += m->m_pkthdr.len;
689 
690 	(void)codel_commit(&flow->cd, m);
691 }
692 
693 void
694 fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
695 {
696 	unsigned int i;
697 
698 	for (i = 0; i < fqc->nflows; i++)
699 		codel_purge(&fqc->flows[i].cd, ml);
700 	fqc->qlength = 0;
701 }
702 
703 struct mbuf *
704 fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
705 {
706 	return fqcodel_enq(ifq->ifq_q, m);
707 }
708 
709 struct mbuf *
710 fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
711 {
712 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
713 	struct mbuf *m;
714 
715 	m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
716 	ifq_mfreeml(ifq, &free_ml);
717 	return (m);
718 }
719 
720 void
721 fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
722 {
723 	return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
724 }
725 
726 void
727 fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
728 {
729 	return fqcodel_purge(ifq->ifq_q, ml);
730 }
731 
732 void *
733 fqcodel_pf_alloc(struct ifnet *ifp)
734 {
735 	struct fqcodel *fqc;
736 
737 	fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
738 
739 	SIMPLEQ_INIT(&fqc->newq);
740 	SIMPLEQ_INIT(&fqc->oldq);
741 
742 	return (fqc);
743 }
744 
745 int
746 fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
747 {
748 	struct ifnet *ifp = qs->kif->pfik_ifp;
749 	struct fqcodel *fqc = arg;
750 
751 	if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff)
752 		return (EINVAL);
753 
754 	fqc->nflows = qs->flowqueue.flows;
755 	fqc->quantum = qs->flowqueue.quantum;
756 	if (qs->qlimit > 0)
757 		fqc->qlimit = qs->qlimit;
758 	else
759 		fqc->qlimit = fqcodel_qlimit;
760 	if (fqc->quantum > 0)
761 		fqc->flags |= FQCF_FIXED_QUANTUM;
762 	else
763 		fqc->quantum = ifp->if_mtu + max_linkhdr;
764 
765 	codel_initparams(&fqc->cparams, qs->flowqueue.target,
766 	    qs->flowqueue.interval, fqc->quantum);
767 
768 	fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
769 	    M_DEVBUF, M_WAITOK | M_ZERO);
770 
771 #ifdef FQCODEL_DEBUG
772 	{
773 		unsigned int i;
774 
775 		for (i = 0; i < fqc->nflows; i++)
776 			fqc->flows[i].id = i;
777 	}
778 #endif
779 
780 	fqc->ifp = ifp;
781 
782 	DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
783 	    "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
784 	    fqc->quantum, fqc->cparams.target / 1000000,
785 	    fqc->cparams.interval / 1000000);
786 
787 	return (0);
788 }
789 
790 void
791 fqcodel_pf_free(void *arg)
792 {
793 	struct fqcodel *fqc = arg;
794 
795 	codel_freeparams(&fqc->cparams);
796 	free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
797 	free(fqc, M_DEVBUF, sizeof(struct fqcodel));
798 }
799 
800 int
801 fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
802 {
803 	struct ifnet *ifp = qs->kif->pfik_ifp;
804 	struct fqcodel_stats stats;
805 	struct fqcodel *fqc;
806 	int64_t delay;
807 	unsigned int i;
808 	int error = 0;
809 
810 	if (ifp == NULL)
811 		return (EBADF);
812 
813 	if (*nbytes < sizeof(stats))
814 		return (EINVAL);
815 
816 	memset(&stats, 0, sizeof(stats));
817 
818 	/* XXX: multi-q? */
819 	fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
820 	if (fqc == NULL)
821 		return (EBADF);
822 
823 	stats.xmit_cnt = fqc->xmit_cnt;
824 	stats.drop_cnt = fqc->drop_cnt;
825 
826 	stats.qlength = ifq_len(&ifp->if_snd);
827 	stats.qlimit = fqc->qlimit;
828 
829 	stats.flows = 0;
830 	stats.delaysum = stats.delaysumsq = 0;
831 
832 	for (i = 0; i < fqc->nflows; i++) {
833 		if (codel_qlength(&fqc->flows[i].cd) == 0)
834 			continue;
835 		/* Scale down to microseconds to avoid overflows */
836 		delay = codel_delay(&fqc->flows[i].cd) / 1000;
837 		stats.delaysum += delay;
838 		stats.delaysumsq += delay * delay;
839 		stats.flows++;
840 	}
841 
842 	ifq_q_leave(&ifp->if_snd, fqc);
843 
844 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
845 		return (error);
846 
847 	*nbytes = sizeof(stats);
848 	return (0);
849 }
850 
851 unsigned int
852 fqcodel_pf_qlength(void *fqc)
853 {
854 	return ((struct fqcodel *)fqc)->qlength;
855 }
856 
857 struct mbuf *
858 fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
859 {
860 	return fqcodel_enq(fqc, m);
861 }
862 
863 struct mbuf *
864 fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
865 {
866 	return fqcodel_deq_begin(fqc, cookiep, free_ml);
867 }
868 
869 void
870 fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
871 {
872 	return fqcodel_deq_commit(fqc, m, cookie);
873 }
874 
875 void
876 fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
877 {
878 	return fqcodel_purge(fqc, ml);
879 }
880 
881 unsigned int
882 fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
883 {
884 	return (0);
885 }
886 
887 void *
888 fqcodel_alloc(unsigned int idx, void *arg)
889 {
890 	/* Allocation is done in fqcodel_pf_alloc */
891 	return (arg);
892 }
893 
894 void
895 fqcodel_free(unsigned int idx, void *arg)
896 {
897 	/* nothing to do here */
898 }
899