xref: /openbsd/sys/net/fq_codel.c (revision 5873c738)
1 /* $OpenBSD: fq_codel.c,v 1.16 2024/10/29 23:25:45 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * Codel - The Controlled-Delay Active Queue Management algorithm
21  * IETF draft-ietf-aqm-codel-07
22  *
23  * Based on the algorithm by Kathleen Nichols and Van Jacobson with
24  * improvements from Dave Taht and Eric Dumazet.
25  */
26 
27 /*
28  * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
29  * IETF draft-ietf-aqm-fq-codel-06
30  *
31  * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
32  * Internet Architectures, Swinburne University of Technology, Melbourne,
33  * Australia.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/socket.h>
39 #include <sys/mbuf.h>
40 #include <sys/queue.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/pfvar.h>
45 #include <net/fq_codel.h>
46 
47 /* #define FQCODEL_DEBUG 1 */
48 
49 #ifdef FQCODEL_DEBUG
50 #define DPRINTF(x...)		printf(x)
51 #else
52 #define DPRINTF(x...)
53 #endif
54 
55 struct codel {
56 	struct mbuf_list q;
57 
58 	unsigned int	 dropping:1;	/* Dropping state */
59 	unsigned int	 backlog:31;	/* Number of bytes in the queue */
60 
61 	unsigned short	 drops;		/* Free running counter of drops */
62 	unsigned short	 ldrops;	/* Value from the previous run */
63 
64 	int64_t		 start;		/* The moment queue was above target */
65 	int64_t		 next;		/* Next interval */
66 	int64_t		 delay;		/* Delay incurred by the last packet */
67 };
68 
69 struct codel_params {
70 	int64_t		 target;
71 	int64_t		 interval;
72 	int		 quantum;
73 
74 	uint32_t	*intervals;
75 };
76 
77 void		 codel_initparams(struct codel_params *, unsigned int,
78 		    unsigned int, int);
79 void		 codel_freeparams(struct codel_params *);
80 void		 codel_enqueue(struct codel *, int64_t, struct mbuf *);
81 struct mbuf	*codel_dequeue(struct codel *, struct codel_params *, int64_t,
82 		    struct mbuf_list *, uint64_t *, uint64_t *);
83 struct mbuf	*codel_commit(struct codel *, struct mbuf *);
84 void		 codel_purge(struct codel *, struct mbuf_list *ml);
85 
86 struct flow {
87 	struct codel		 cd;
88 	int			 active:1;
89 	int			 deficit:31;
90 #ifdef FQCODEL_DEBUG
91 	uint16_t		 id;
92 #endif
93 	SIMPLEQ_ENTRY(flow)	 flowentry;
94 };
95 SIMPLEQ_HEAD(flowq, flow);
96 
97 struct fqcodel {
98 	struct flowq		 newq;
99 	struct flowq		 oldq;
100 
101 	struct flow		*flows;
102 	unsigned int		 qlength;
103 
104 	struct ifnet		*ifp;
105 
106 	struct codel_params	 cparams;
107 
108 	unsigned int		 nflows;
109 	unsigned int		 qlimit;
110 	int			 quantum;
111 
112 	unsigned int		 flags;
113 #define FQCF_FIXED_QUANTUM	  0x1
114 
115 	/* stats */
116 	struct fqcodel_pktcntr   xmit_cnt;
117 	struct fqcodel_pktcntr 	 drop_cnt;
118 };
119 
120 unsigned int	 fqcodel_idx(unsigned int, const struct mbuf *);
121 void		*fqcodel_alloc(unsigned int, void *);
122 void		 fqcodel_free(unsigned int, void *);
123 struct mbuf	*fqcodel_if_enq(struct ifqueue *, struct mbuf *);
124 struct mbuf	*fqcodel_if_deq_begin(struct ifqueue *, void **);
125 void		 fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
126 void		 fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
127 
128 struct mbuf	*fqcodel_enq(struct fqcodel *, struct mbuf *);
129 struct mbuf	*fqcodel_deq_begin(struct fqcodel *, void **,
130 		    struct mbuf_list *);
131 void		 fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
132 void		 fqcodel_purge(struct fqcodel *, struct mbuf_list *);
133 
134 /*
135  * ifqueue glue.
136  */
137 
138 static const struct ifq_ops fqcodel_ops = {
139 	fqcodel_idx,
140 	fqcodel_if_enq,
141 	fqcodel_if_deq_begin,
142 	fqcodel_if_deq_commit,
143 	fqcodel_if_purge,
144 	fqcodel_alloc,
145 	fqcodel_free
146 };
147 
148 const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
149 
150 void		*fqcodel_pf_alloc(struct ifnet *);
151 int		 fqcodel_pf_addqueue(void *, struct pf_queuespec *);
152 void		 fqcodel_pf_free(void *);
153 int		 fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
154 unsigned int	 fqcodel_pf_qlength(void *);
155 struct mbuf *	 fqcodel_pf_enqueue(void *, struct mbuf *);
156 struct mbuf *	 fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
157 void		 fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
158 void		 fqcodel_pf_purge(void *, struct mbuf_list *);
159 
160 /*
161  * pf queue glue.
162  */
163 
164 static const struct pfq_ops fqcodel_pf_ops = {
165 	fqcodel_pf_alloc,
166 	fqcodel_pf_addqueue,
167 	fqcodel_pf_free,
168 	fqcodel_pf_qstats,
169 	fqcodel_pf_qlength,
170 	fqcodel_pf_enqueue,
171 	fqcodel_pf_deq_begin,
172 	fqcodel_pf_deq_commit,
173 	fqcodel_pf_purge
174 };
175 
176 const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
177 
178 /* Default aggregate queue depth */
179 static const unsigned int fqcodel_qlimit = 1024;
180 
181 /*
182  * CoDel implementation
183  */
184 
185 /* Delay target, 5ms */
186 static const int64_t codel_target = 5000000;
187 
188 /* Grace period after last drop, 16 100ms intervals */
189 static const int64_t codel_grace = 1600000000;
190 
191 /* First 399 "100 / sqrt(x)" intervals, ns precision */
192 static const uint32_t codel_intervals[] = {
193 	100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
194 	35355339,  33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
195 	25819889,  25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
196 	21320072,  20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
197 	18569534,  18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
198 	16666667,  16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
199 	15249857,  15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
200 	14142136,  14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
201 	13245324,  13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
202 	12500000,  12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
203 	11867817,  11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
204 	11322770,  11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
205 	10846523,  10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
206 	10425721,  10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
207 	10050378,  10000000, 9950372,  9901475,  9853293,  9805807,  9759001,
208 	9712859,   9667365,  9622504,  9578263,  9534626,  9491580,  9449112,
209 	9407209,   9365858,  9325048,  9284767,  9245003,  9205746,  9166985,
210 	9128709,   9090909,  9053575,  9016696,  8980265,  8944272,  8908708,
211 	8873565,   8838835,  8804509,  8770580,  8737041,  8703883,  8671100,
212 	8638684,   8606630,  8574929,  8543577,  8512565,  8481889,  8451543,
213 	8421519,   8391814,  8362420,  8333333,  8304548,  8276059,  8247861,
214 	8219949,   8192319,  8164966,  8137885,  8111071,  8084521,  8058230,
215 	8032193,   8006408,  7980869,  7955573,  7930516,  7905694,  7881104,
216 	7856742,   7832604,  7808688,  7784989,  7761505,  7738232,  7715167,
217 	7692308,   7669650,  7647191,  7624929,  7602859,  7580980,  7559289,
218 	7537784,   7516460,  7495317,  7474351,  7453560,  7432941,  7412493,
219 	7392213,   7372098,  7352146,  7332356,  7312724,  7293250,  7273930,
220 	7254763,   7235746,  7216878,  7198158,  7179582,  7161149,  7142857,
221 	7124705,   7106691,  7088812,  7071068,  7053456,  7035975,  7018624,
222 	7001400,   6984303,  6967330,  6950480,  6933752,  6917145,  6900656,
223 	6884284,   6868028,  6851887,  6835859,  6819943,  6804138,  6788442,
224 	6772855,   6757374,  6741999,  6726728,  6711561,  6696495,  6681531,
225 	6666667,   6651901,  6637233,  6622662,  6608186,  6593805,  6579517,
226 	6565322,   6551218,  6537205,  6523281,  6509446,  6495698,  6482037,
227 	6468462,   6454972,  6441566,  6428243,  6415003,  6401844,  6388766,
228 	6375767,   6362848,  6350006,  6337243,  6324555,  6311944,  6299408,
229 	6286946,   6274558,  6262243,  6250000,  6237829,  6225728,  6213698,
230 	6201737,   6189845,  6178021,  6166264,  6154575,  6142951,  6131393,
231 	6119901,   6108472,  6097108,  6085806,  6074567,  6063391,  6052275,
232 	6041221,   6030227,  6019293,  6008418,  5997601,  5986843,  5976143,
233 	5965500,   5954913,  5944383,  5933908,  5923489,  5913124,  5902813,
234 	5892557,   5882353,  5872202,  5862104,  5852057,  5842062,  5832118,
235 	5822225,   5812382,  5802589,  5792844,  5783149,  5773503,  5763904,
236 	5754353,   5744850,  5735393,  5725983,  5716620,  5707301,  5698029,
237 	5688801,   5679618,  5670480,  5661385,  5652334,  5643326,  5634362,
238 	5625440,   5616560,  5607722,  5598925,  5590170,  5581456,  5572782,
239 	5564149,   5555556,  5547002,  5538488,  5530013,  5521576,  5513178,
240 	5504819,   5496497,  5488213,  5479966,  5471757,  5463584,  5455447,
241 	5447347,   5439283,  5431254,  5423261,  5415304,  5407381,  5399492,
242 	5391639,   5383819,  5376033,  5368281,  5360563,  5352877,  5345225,
243 	5337605,   5330018,  5322463,  5314940,  5307449,  5299989,  5292561,
244 	5285164,   5277798,  5270463,  5263158,  5255883,  5248639,  5241424,
245 	5234239,   5227084,  5219958,  5212860,  5205792,  5198752,  5191741,
246 	5184758,   5177804,  5170877,  5163978,  5157106,  5150262,  5143445,
247 	5136655,   5129892,  5123155,  5116445,  5109761,  5103104,  5096472,
248 	5089866,   5083286,  5076731,  5070201,  5063697,  5057217,  5050763,
249 	5044333,   5037927,  5031546,  5025189,  5018856,  5012547,  5006262
250 };
251 
252 void
codel_initparams(struct codel_params * cp,unsigned int target,unsigned int interval,int quantum)253 codel_initparams(struct codel_params *cp, unsigned int target,
254     unsigned int interval, int quantum)
255 {
256 	uint64_t mult;
257 	unsigned int i;
258 
259 	/*
260 	 * Update observation intervals table according to the configured
261 	 * initial interval value.
262 	 */
263 	if (interval > codel_intervals[0]) {
264 		/* Select either specified target or 5% of an interval */
265 		cp->target = MAX(target, interval / 5);
266 		cp->interval = interval;
267 
268 		/* The coefficient is scaled up by a 1000 */
269 		mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
270 
271 		/* Prepare table of intervals */
272 		cp->intervals = mallocarray(nitems(codel_intervals),
273 		    sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
274 		for (i = 0; i < nitems(codel_intervals); i++)
275 			cp->intervals[i] = ((uint64_t)codel_intervals[i] *
276 			    mult) / 1000;
277 	} else {
278 		cp->target = MAX(target, codel_target);
279 		cp->interval = codel_intervals[0];
280 		cp->intervals = (uint32_t *)codel_intervals;
281 	}
282 
283 	cp->quantum = quantum;
284 }
285 
286 void
codel_freeparams(struct codel_params * cp)287 codel_freeparams(struct codel_params *cp)
288 {
289 	if (cp->intervals != codel_intervals)
290 		free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
291 		    sizeof(codel_intervals[0]));
292 	cp->intervals = NULL;
293 }
294 
295 static inline unsigned int
codel_backlog(struct codel * cd)296 codel_backlog(struct codel *cd)
297 {
298 	return (cd->backlog);
299 }
300 
301 static inline unsigned int
codel_qlength(struct codel * cd)302 codel_qlength(struct codel *cd)
303 {
304 	return (ml_len(&cd->q));
305 }
306 
307 static inline int64_t
codel_delay(struct codel * cd)308 codel_delay(struct codel *cd)
309 {
310 	return (cd->delay);
311 }
312 
313 void
codel_enqueue(struct codel * cd,int64_t now,struct mbuf * m)314 codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
315 {
316 	m->m_pkthdr.ph_timestamp = now;
317 
318 	ml_enqueue(&cd->q, m);
319 	cd->backlog += m->m_pkthdr.len;
320 }
321 
322 /*
323  * Select the next interval according to the number of drops
324  * in the current one relative to the provided timestamp.
325  */
326 static inline void
control_law(struct codel * cd,struct codel_params * cp,int64_t rts)327 control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
328 {
329 	unsigned int idx;
330 
331 	idx = min(cd->drops, nitems(codel_intervals) - 1);
332 	cd->next = rts + cp->intervals[idx];
333 }
334 
335 /*
336  * Pick the next enqueued packet and determine the queueing delay
337  * as well as whether or not it's a good candidate for dropping
338  * from the queue.
339  *
340  * The decision whether to drop the packet or not is made based
341  * on the queueing delay target of 5ms and on the current queue
342  * length in bytes which shouldn't be less than the amount of data
343  * that arrives in a typical interarrival time (MTU-sized packets
344  * arriving spaced by the amount of time it takes to send such a
345  * packet on the bottleneck).
346  */
347 static inline struct mbuf *
codel_next_packet(struct codel * cd,struct codel_params * cp,int64_t now,int * drop)348 codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
349     int *drop)
350 {
351 	struct mbuf *m;
352 
353 	*drop = 0;
354 
355 	m = MBUF_LIST_FIRST(&cd->q);
356 	if (m == NULL) {
357 		KASSERT(cd->backlog == 0);
358 		/* Empty queue, reset interval */
359 		cd->start = 0;
360 		return (NULL);
361 	}
362 
363 	if (now - m->m_pkthdr.ph_timestamp < cp->target ||
364 	    cd->backlog <= cp->quantum) {
365 		/*
366 		 * The minimum delay decreased below the target, reset
367 		 * the current observation interval.
368 		 */
369 		cd->start = 0;
370 		return (m);
371 	}
372 
373 	if (cd->start == 0) {
374 		/*
375 		 * This is the first packet to be delayed for more than
376 		 * the target, start the first observation interval after
377 		 * a single RTT and see if the minimum delay goes below
378 		 * the target within the interval, otherwise punish the
379 		 * next packet.
380 		 */
381 		cd->start = now + cp->interval;
382 	} else if (now > cd->start) {
383 		*drop = 1;
384 	}
385 	return (m);
386 }
387 
388 enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
389 
390 static inline int
codel_state_change(struct codel * cd,int64_t now,struct mbuf * m,int drop,int state)391 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
392     int state)
393 {
394 	if (state == FIRSTDROP)
395 		return (ACCEPTING);
396 
397 	if (cd->dropping) {
398 		if (!drop)
399 			return (RECOVERY);
400 		else if (now >= cd->next)
401 			return (state == DROPPING ? CONTROL : DROPPING);
402 	} else if (drop)
403 		return (FIRSTDROP);
404 
405 	if (m == NULL)
406 		return (RECOVERY);
407 
408 	return (ACCEPTING);
409 }
410 
411 struct mbuf *
codel_dequeue(struct codel * cd,struct codel_params * cp,int64_t now,struct mbuf_list * free_ml,uint64_t * dpkts,uint64_t * dbytes)412 codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
413     struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
414 {
415 	struct mbuf *m;
416 	unsigned short delta;
417 	int drop, state, done = 0;
418 
419 	state = INITIAL;
420 
421 	while (!done) {
422 		m = codel_next_packet(cd, cp, now, &drop);
423 		state = codel_state_change(cd, now, m, drop, state);
424 
425 		switch (state) {
426 		case FIRSTDROP:
427 			m = codel_commit(cd, m);
428 			ml_enqueue(free_ml, m);
429 
430 			*dpkts += 1;
431 			*dbytes += m->m_pkthdr.len;
432 
433 			cd->dropping = 1;
434 
435 			/*
436 			 * If we're still within the grace period and not
437 			 * meeting our minimal delay target we treat this
438 			 * as a continuation of the previous observation
439 			 * interval and shrink it further.  Otherwise we
440 			 * start from the initial one.
441 			 */
442 			delta = cd->drops - cd->ldrops;
443 			if (delta > 1) {
444 				if (now < cd->next ||
445 				    now - cd->next < codel_grace)
446 					cd->drops = delta;
447 				else
448 					cd->drops = 1;
449 			} else
450 				cd->drops = 1;
451 			control_law(cd, cp, now);
452 			cd->ldrops = cd->drops;
453 
454 			/* fetches the next packet and goes to ACCEPTING */
455 			break;
456 		case DROPPING:
457 			m = codel_commit(cd, m);
458 			ml_enqueue(free_ml, m);
459 			cd->drops++;
460 
461 			*dpkts += 1;
462 			*dbytes += m->m_pkthdr.len;
463 
464 			/* fetches the next packet and goes to CONTROL */
465 			break;
466 		case CONTROL:
467 			if (drop) {
468 				control_law(cd, cp, cd->next);
469 				continue;
470 			}
471 			/* FALLTHROUGH */
472 		case RECOVERY:
473 			cd->dropping = 0;
474 			/* FALLTHROUGH */
475 		case ACCEPTING:
476 			done = 1;
477 			break;
478 		}
479 	}
480 
481 	if (m != NULL)
482 		cd->delay = now - m->m_pkthdr.ph_timestamp;
483 
484 	return (m);
485 }
486 
487 struct mbuf *
codel_commit(struct codel * cd,struct mbuf * m)488 codel_commit(struct codel *cd, struct mbuf *m)
489 {
490 	struct mbuf *n;
491 
492 	n = ml_dequeue(&cd->q);
493 	if (m)
494 		KASSERT(n == m);
495 	KASSERT(n != NULL);
496 	KASSERT(cd->backlog >= n->m_pkthdr.len);
497 	cd->backlog -= n->m_pkthdr.len;
498 	return (n);
499 }
500 
501 void
codel_purge(struct codel * cd,struct mbuf_list * ml)502 codel_purge(struct codel *cd, struct mbuf_list *ml)
503 {
504 	ml_enlist(ml, &cd->q);
505 	cd->backlog = 0;
506 }
507 
508 /*
509  * FQ-CoDel implementation
510  */
511 
512 static inline struct flow *
classify_flow(struct fqcodel * fqc,struct mbuf * m)513 classify_flow(struct fqcodel *fqc, struct mbuf *m)
514 {
515 	unsigned int index = 0;
516 
517 	if (m->m_pkthdr.csum_flags & M_FLOWID)
518 		index = m->m_pkthdr.ph_flowid % fqc->nflows;
519 
520 	DPRINTF("%s: %u\n", __func__, index);
521 
522 	return (&fqc->flows[index]);
523 }
524 
525 struct mbuf *
fqcodel_enq(struct fqcodel * fqc,struct mbuf * m)526 fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
527 {
528 	struct flow *flow;
529 	unsigned int backlog = 0;
530 	int64_t now;
531 	int i;
532 
533 	flow = classify_flow(fqc, m);
534 	if (flow == NULL)
535 		return (m);
536 
537 	now = nsecuptime();
538 	codel_enqueue(&flow->cd, now, m);
539 	fqc->qlength++;
540 
541 	if (!flow->active) {
542 		SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
543 		flow->deficit = fqc->quantum;
544 		flow->active = 1;
545 		DPRINTF("%s: flow %u active deficit %d\n", __func__,
546 		    flow->id, flow->deficit);
547 	}
548 
549 	/*
550 	 * Check the limit for all queues and remove a packet
551 	 * from the longest one.
552 	 */
553 	if (fqc->qlength >= fqcodel_qlimit) {
554 		for (i = 0; i < fqc->nflows; i++) {
555 			if (codel_backlog(&fqc->flows[i].cd) > backlog) {
556 				flow = &fqc->flows[i];
557 				backlog = codel_backlog(&flow->cd);
558 			}
559 		}
560 
561 		KASSERT(flow != NULL);
562 		m = codel_commit(&flow->cd, NULL);
563 
564 		fqc->drop_cnt.packets++;
565 		fqc->drop_cnt.bytes += m->m_pkthdr.len;
566 
567 		fqc->qlength--;
568 
569 		DPRINTF("%s: dropping from flow %u\n", __func__,
570 		    flow->id);
571 		return (m);
572 	}
573 
574 	return (NULL);
575 }
576 
577 static inline struct flowq *
select_queue(struct fqcodel * fqc)578 select_queue(struct fqcodel *fqc)
579 {
580 	struct flowq *fq = NULL;
581 
582 	if (!SIMPLEQ_EMPTY(&fqc->newq))
583 		fq = &fqc->newq;
584 	else if (!SIMPLEQ_EMPTY(&fqc->oldq))
585 		fq = &fqc->oldq;
586 	return (fq);
587 }
588 
589 static inline struct flow *
first_flow(struct fqcodel * fqc,struct flowq ** fq)590 first_flow(struct fqcodel *fqc, struct flowq **fq)
591 {
592 	struct flow *flow;
593 
594 	while ((*fq = select_queue(fqc)) != NULL) {
595 		while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
596 			if (flow->deficit <= 0) {
597 				flow->deficit += fqc->quantum;
598 				SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
599 				SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
600 				    flowentry);
601 				DPRINTF("%s: flow %u deficit %d\n", __func__,
602 				    flow->id, flow->deficit);
603 			} else
604 				return (flow);
605 		}
606 	}
607 
608 	return (NULL);
609 }
610 
611 static inline struct flow *
next_flow(struct fqcodel * fqc,struct flow * flow,struct flowq ** fq)612 next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
613 {
614 	SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
615 
616 	if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
617 		/* A packet was dropped, starve the queue */
618 		SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
619 		DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
620 		    flow->id, flow->deficit);
621 	} else {
622 		/* A packet was dropped on a starved queue, disable it */
623 		flow->active = 0;
624 		DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
625 		    flow->id, flow->deficit);
626 	}
627 
628 	return (first_flow(fqc, fq));
629 }
630 
631 struct mbuf *
fqcodel_deq_begin(struct fqcodel * fqc,void ** cookiep,struct mbuf_list * free_ml)632 fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
633     struct mbuf_list *free_ml)
634 {
635 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
636 	struct flowq *fq;
637 	struct flow *flow;
638 	struct mbuf *m;
639 	int64_t now;
640 
641 	if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
642 		fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
643 
644 	now = nsecuptime();
645 
646 	for (flow = first_flow(fqc, &fq); flow != NULL;
647 	     flow = next_flow(fqc, flow, &fq)) {
648 		m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
649 		    &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
650 
651 		KASSERT(fqc->qlength >= ml_len(&ml));
652 		fqc->qlength -= ml_len(&ml);
653 
654 		ml_enlist(free_ml, &ml);
655 
656 		if (m != NULL) {
657 			flow->deficit -= m->m_pkthdr.len;
658 			DPRINTF("%s: flow %u deficit %d\n", __func__,
659 			    flow->id, flow->deficit);
660 			*cookiep = flow;
661 			return (m);
662 		}
663 	}
664 
665 	return (NULL);
666 }
667 
668 void
fqcodel_deq_commit(struct fqcodel * fqc,struct mbuf * m,void * cookie)669 fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
670 {
671 	struct flow *flow = cookie;
672 
673 	KASSERT(fqc->qlength > 0);
674 	fqc->qlength--;
675 
676 	fqc->xmit_cnt.packets++;
677 	fqc->xmit_cnt.bytes += m->m_pkthdr.len;
678 
679 	(void)codel_commit(&flow->cd, m);
680 }
681 
682 void
fqcodel_purge(struct fqcodel * fqc,struct mbuf_list * ml)683 fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
684 {
685 	unsigned int i;
686 
687 	for (i = 0; i < fqc->nflows; i++)
688 		codel_purge(&fqc->flows[i].cd, ml);
689 	fqc->qlength = 0;
690 }
691 
692 struct mbuf *
fqcodel_if_enq(struct ifqueue * ifq,struct mbuf * m)693 fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
694 {
695 	return fqcodel_enq(ifq->ifq_q, m);
696 }
697 
698 struct mbuf *
fqcodel_if_deq_begin(struct ifqueue * ifq,void ** cookiep)699 fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
700 {
701 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
702 	struct mbuf *m;
703 
704 	m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
705 	ifq_mfreeml(ifq, &free_ml);
706 	return (m);
707 }
708 
709 void
fqcodel_if_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)710 fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
711 {
712 	return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
713 }
714 
715 void
fqcodel_if_purge(struct ifqueue * ifq,struct mbuf_list * ml)716 fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
717 {
718 	return fqcodel_purge(ifq->ifq_q, ml);
719 }
720 
721 void *
fqcodel_pf_alloc(struct ifnet * ifp)722 fqcodel_pf_alloc(struct ifnet *ifp)
723 {
724 	struct fqcodel *fqc;
725 
726 	fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
727 
728 	SIMPLEQ_INIT(&fqc->newq);
729 	SIMPLEQ_INIT(&fqc->oldq);
730 
731 	return (fqc);
732 }
733 
734 int
fqcodel_pf_addqueue(void * arg,struct pf_queuespec * qs)735 fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
736 {
737 	struct ifnet *ifp = qs->kif->pfik_ifp;
738 	struct fqcodel *fqc = arg;
739 
740 	if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff)
741 		return (EINVAL);
742 
743 	fqc->nflows = qs->flowqueue.flows;
744 	fqc->quantum = qs->flowqueue.quantum;
745 	if (qs->qlimit > 0)
746 		fqc->qlimit = qs->qlimit;
747 	else
748 		fqc->qlimit = fqcodel_qlimit;
749 	if (fqc->quantum > 0)
750 		fqc->flags |= FQCF_FIXED_QUANTUM;
751 	else
752 		fqc->quantum = ifp->if_mtu + max_linkhdr;
753 
754 	codel_initparams(&fqc->cparams, qs->flowqueue.target,
755 	    qs->flowqueue.interval, fqc->quantum);
756 
757 	fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
758 	    M_DEVBUF, M_WAITOK | M_ZERO);
759 
760 #ifdef FQCODEL_DEBUG
761 	{
762 		unsigned int i;
763 
764 		for (i = 0; i < fqc->nflows; i++)
765 			fqc->flows[i].id = i;
766 	}
767 #endif
768 
769 	fqc->ifp = ifp;
770 
771 	DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
772 	    "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
773 	    fqc->quantum, fqc->cparams.target / 1000000,
774 	    fqc->cparams.interval / 1000000);
775 
776 	return (0);
777 }
778 
779 void
fqcodel_pf_free(void * arg)780 fqcodel_pf_free(void *arg)
781 {
782 	struct fqcodel *fqc = arg;
783 
784 	codel_freeparams(&fqc->cparams);
785 	free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
786 	free(fqc, M_DEVBUF, sizeof(struct fqcodel));
787 }
788 
789 int
fqcodel_pf_qstats(struct pf_queuespec * qs,void * ubuf,int * nbytes)790 fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
791 {
792 	struct ifnet *ifp = qs->kif->pfik_ifp;
793 	struct fqcodel_stats stats;
794 	struct fqcodel *fqc;
795 	int64_t delay;
796 	unsigned int i;
797 	int error = 0;
798 
799 	if (ifp == NULL)
800 		return (EBADF);
801 
802 	if (*nbytes < sizeof(stats))
803 		return (EINVAL);
804 
805 	memset(&stats, 0, sizeof(stats));
806 
807 	/* XXX: multi-q? */
808 	fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
809 	if (fqc == NULL)
810 		return (EBADF);
811 
812 	stats.xmit_cnt = fqc->xmit_cnt;
813 	stats.drop_cnt = fqc->drop_cnt;
814 
815 	stats.qlength = ifq_len(&ifp->if_snd);
816 	stats.qlimit = fqc->qlimit;
817 
818 	stats.flows = 0;
819 	stats.delaysum = stats.delaysumsq = 0;
820 
821 	for (i = 0; i < fqc->nflows; i++) {
822 		if (codel_qlength(&fqc->flows[i].cd) == 0)
823 			continue;
824 		/* Scale down to microseconds to avoid overflows */
825 		delay = codel_delay(&fqc->flows[i].cd) / 1000;
826 		stats.delaysum += delay;
827 		stats.delaysumsq += delay * delay;
828 		stats.flows++;
829 	}
830 
831 	ifq_q_leave(&ifp->if_snd, fqc);
832 
833 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
834 		return (error);
835 
836 	*nbytes = sizeof(stats);
837 	return (0);
838 }
839 
840 unsigned int
fqcodel_pf_qlength(void * fqc)841 fqcodel_pf_qlength(void *fqc)
842 {
843 	return ((struct fqcodel *)fqc)->qlength;
844 }
845 
846 struct mbuf *
fqcodel_pf_enqueue(void * fqc,struct mbuf * m)847 fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
848 {
849 	return fqcodel_enq(fqc, m);
850 }
851 
852 struct mbuf *
fqcodel_pf_deq_begin(void * fqc,void ** cookiep,struct mbuf_list * free_ml)853 fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
854 {
855 	return fqcodel_deq_begin(fqc, cookiep, free_ml);
856 }
857 
858 void
fqcodel_pf_deq_commit(void * fqc,struct mbuf * m,void * cookie)859 fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
860 {
861 	return fqcodel_deq_commit(fqc, m, cookie);
862 }
863 
864 void
fqcodel_pf_purge(void * fqc,struct mbuf_list * ml)865 fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
866 {
867 	return fqcodel_purge(fqc, ml);
868 }
869 
870 unsigned int
fqcodel_idx(unsigned int nqueues,const struct mbuf * m)871 fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
872 {
873 	return (0);
874 }
875 
876 void *
fqcodel_alloc(unsigned int idx,void * arg)877 fqcodel_alloc(unsigned int idx, void *arg)
878 {
879 	/* Allocation is done in fqcodel_pf_alloc */
880 	return (arg);
881 }
882 
883 void
fqcodel_free(unsigned int idx,void * arg)884 fqcodel_free(unsigned int idx, void *arg)
885 {
886 	/* nothing to do here */
887 }
888