1 /* $OpenBSD: fq_codel.c,v 1.16 2024/10/29 23:25:45 dlg Exp $ */
2
3 /*
4 * Copyright (c) 2017 Mike Belopuhov
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Codel - The Controlled-Delay Active Queue Management algorithm
21 * IETF draft-ietf-aqm-codel-07
22 *
23 * Based on the algorithm by Kathleen Nichols and Van Jacobson with
24 * improvements from Dave Taht and Eric Dumazet.
25 */
26
27 /*
28 * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
29 * IETF draft-ietf-aqm-fq-codel-06
30 *
31 * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
32 * Internet Architectures, Swinburne University of Technology, Melbourne,
33 * Australia.
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/socket.h>
39 #include <sys/mbuf.h>
40 #include <sys/queue.h>
41
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/pfvar.h>
45 #include <net/fq_codel.h>
46
47 /* #define FQCODEL_DEBUG 1 */
48
49 #ifdef FQCODEL_DEBUG
50 #define DPRINTF(x...) printf(x)
51 #else
52 #define DPRINTF(x...)
53 #endif
54
55 struct codel {
56 struct mbuf_list q;
57
58 unsigned int dropping:1; /* Dropping state */
59 unsigned int backlog:31; /* Number of bytes in the queue */
60
61 unsigned short drops; /* Free running counter of drops */
62 unsigned short ldrops; /* Value from the previous run */
63
64 int64_t start; /* The moment queue was above target */
65 int64_t next; /* Next interval */
66 int64_t delay; /* Delay incurred by the last packet */
67 };
68
69 struct codel_params {
70 int64_t target;
71 int64_t interval;
72 int quantum;
73
74 uint32_t *intervals;
75 };
76
77 void codel_initparams(struct codel_params *, unsigned int,
78 unsigned int, int);
79 void codel_freeparams(struct codel_params *);
80 void codel_enqueue(struct codel *, int64_t, struct mbuf *);
81 struct mbuf *codel_dequeue(struct codel *, struct codel_params *, int64_t,
82 struct mbuf_list *, uint64_t *, uint64_t *);
83 struct mbuf *codel_commit(struct codel *, struct mbuf *);
84 void codel_purge(struct codel *, struct mbuf_list *ml);
85
86 struct flow {
87 struct codel cd;
88 int active:1;
89 int deficit:31;
90 #ifdef FQCODEL_DEBUG
91 uint16_t id;
92 #endif
93 SIMPLEQ_ENTRY(flow) flowentry;
94 };
95 SIMPLEQ_HEAD(flowq, flow);
96
97 struct fqcodel {
98 struct flowq newq;
99 struct flowq oldq;
100
101 struct flow *flows;
102 unsigned int qlength;
103
104 struct ifnet *ifp;
105
106 struct codel_params cparams;
107
108 unsigned int nflows;
109 unsigned int qlimit;
110 int quantum;
111
112 unsigned int flags;
113 #define FQCF_FIXED_QUANTUM 0x1
114
115 /* stats */
116 struct fqcodel_pktcntr xmit_cnt;
117 struct fqcodel_pktcntr drop_cnt;
118 };
119
120 unsigned int fqcodel_idx(unsigned int, const struct mbuf *);
121 void *fqcodel_alloc(unsigned int, void *);
122 void fqcodel_free(unsigned int, void *);
123 struct mbuf *fqcodel_if_enq(struct ifqueue *, struct mbuf *);
124 struct mbuf *fqcodel_if_deq_begin(struct ifqueue *, void **);
125 void fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
126 void fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
127
128 struct mbuf *fqcodel_enq(struct fqcodel *, struct mbuf *);
129 struct mbuf *fqcodel_deq_begin(struct fqcodel *, void **,
130 struct mbuf_list *);
131 void fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
132 void fqcodel_purge(struct fqcodel *, struct mbuf_list *);
133
134 /*
135 * ifqueue glue.
136 */
137
138 static const struct ifq_ops fqcodel_ops = {
139 fqcodel_idx,
140 fqcodel_if_enq,
141 fqcodel_if_deq_begin,
142 fqcodel_if_deq_commit,
143 fqcodel_if_purge,
144 fqcodel_alloc,
145 fqcodel_free
146 };
147
148 const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
149
150 void *fqcodel_pf_alloc(struct ifnet *);
151 int fqcodel_pf_addqueue(void *, struct pf_queuespec *);
152 void fqcodel_pf_free(void *);
153 int fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
154 unsigned int fqcodel_pf_qlength(void *);
155 struct mbuf * fqcodel_pf_enqueue(void *, struct mbuf *);
156 struct mbuf * fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
157 void fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
158 void fqcodel_pf_purge(void *, struct mbuf_list *);
159
160 /*
161 * pf queue glue.
162 */
163
164 static const struct pfq_ops fqcodel_pf_ops = {
165 fqcodel_pf_alloc,
166 fqcodel_pf_addqueue,
167 fqcodel_pf_free,
168 fqcodel_pf_qstats,
169 fqcodel_pf_qlength,
170 fqcodel_pf_enqueue,
171 fqcodel_pf_deq_begin,
172 fqcodel_pf_deq_commit,
173 fqcodel_pf_purge
174 };
175
176 const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
177
178 /* Default aggregate queue depth */
179 static const unsigned int fqcodel_qlimit = 1024;
180
181 /*
182 * CoDel implementation
183 */
184
185 /* Delay target, 5ms */
186 static const int64_t codel_target = 5000000;
187
188 /* Grace period after last drop, 16 100ms intervals */
189 static const int64_t codel_grace = 1600000000;
190
191 /* First 399 "100 / sqrt(x)" intervals, ns precision */
192 static const uint32_t codel_intervals[] = {
193 100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
194 35355339, 33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
195 25819889, 25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
196 21320072, 20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
197 18569534, 18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
198 16666667, 16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
199 15249857, 15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
200 14142136, 14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
201 13245324, 13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
202 12500000, 12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
203 11867817, 11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
204 11322770, 11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
205 10846523, 10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
206 10425721, 10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
207 10050378, 10000000, 9950372, 9901475, 9853293, 9805807, 9759001,
208 9712859, 9667365, 9622504, 9578263, 9534626, 9491580, 9449112,
209 9407209, 9365858, 9325048, 9284767, 9245003, 9205746, 9166985,
210 9128709, 9090909, 9053575, 9016696, 8980265, 8944272, 8908708,
211 8873565, 8838835, 8804509, 8770580, 8737041, 8703883, 8671100,
212 8638684, 8606630, 8574929, 8543577, 8512565, 8481889, 8451543,
213 8421519, 8391814, 8362420, 8333333, 8304548, 8276059, 8247861,
214 8219949, 8192319, 8164966, 8137885, 8111071, 8084521, 8058230,
215 8032193, 8006408, 7980869, 7955573, 7930516, 7905694, 7881104,
216 7856742, 7832604, 7808688, 7784989, 7761505, 7738232, 7715167,
217 7692308, 7669650, 7647191, 7624929, 7602859, 7580980, 7559289,
218 7537784, 7516460, 7495317, 7474351, 7453560, 7432941, 7412493,
219 7392213, 7372098, 7352146, 7332356, 7312724, 7293250, 7273930,
220 7254763, 7235746, 7216878, 7198158, 7179582, 7161149, 7142857,
221 7124705, 7106691, 7088812, 7071068, 7053456, 7035975, 7018624,
222 7001400, 6984303, 6967330, 6950480, 6933752, 6917145, 6900656,
223 6884284, 6868028, 6851887, 6835859, 6819943, 6804138, 6788442,
224 6772855, 6757374, 6741999, 6726728, 6711561, 6696495, 6681531,
225 6666667, 6651901, 6637233, 6622662, 6608186, 6593805, 6579517,
226 6565322, 6551218, 6537205, 6523281, 6509446, 6495698, 6482037,
227 6468462, 6454972, 6441566, 6428243, 6415003, 6401844, 6388766,
228 6375767, 6362848, 6350006, 6337243, 6324555, 6311944, 6299408,
229 6286946, 6274558, 6262243, 6250000, 6237829, 6225728, 6213698,
230 6201737, 6189845, 6178021, 6166264, 6154575, 6142951, 6131393,
231 6119901, 6108472, 6097108, 6085806, 6074567, 6063391, 6052275,
232 6041221, 6030227, 6019293, 6008418, 5997601, 5986843, 5976143,
233 5965500, 5954913, 5944383, 5933908, 5923489, 5913124, 5902813,
234 5892557, 5882353, 5872202, 5862104, 5852057, 5842062, 5832118,
235 5822225, 5812382, 5802589, 5792844, 5783149, 5773503, 5763904,
236 5754353, 5744850, 5735393, 5725983, 5716620, 5707301, 5698029,
237 5688801, 5679618, 5670480, 5661385, 5652334, 5643326, 5634362,
238 5625440, 5616560, 5607722, 5598925, 5590170, 5581456, 5572782,
239 5564149, 5555556, 5547002, 5538488, 5530013, 5521576, 5513178,
240 5504819, 5496497, 5488213, 5479966, 5471757, 5463584, 5455447,
241 5447347, 5439283, 5431254, 5423261, 5415304, 5407381, 5399492,
242 5391639, 5383819, 5376033, 5368281, 5360563, 5352877, 5345225,
243 5337605, 5330018, 5322463, 5314940, 5307449, 5299989, 5292561,
244 5285164, 5277798, 5270463, 5263158, 5255883, 5248639, 5241424,
245 5234239, 5227084, 5219958, 5212860, 5205792, 5198752, 5191741,
246 5184758, 5177804, 5170877, 5163978, 5157106, 5150262, 5143445,
247 5136655, 5129892, 5123155, 5116445, 5109761, 5103104, 5096472,
248 5089866, 5083286, 5076731, 5070201, 5063697, 5057217, 5050763,
249 5044333, 5037927, 5031546, 5025189, 5018856, 5012547, 5006262
250 };
251
252 void
codel_initparams(struct codel_params * cp,unsigned int target,unsigned int interval,int quantum)253 codel_initparams(struct codel_params *cp, unsigned int target,
254 unsigned int interval, int quantum)
255 {
256 uint64_t mult;
257 unsigned int i;
258
259 /*
260 * Update observation intervals table according to the configured
261 * initial interval value.
262 */
263 if (interval > codel_intervals[0]) {
264 /* Select either specified target or 5% of an interval */
265 cp->target = MAX(target, interval / 5);
266 cp->interval = interval;
267
268 /* The coefficient is scaled up by a 1000 */
269 mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
270
271 /* Prepare table of intervals */
272 cp->intervals = mallocarray(nitems(codel_intervals),
273 sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
274 for (i = 0; i < nitems(codel_intervals); i++)
275 cp->intervals[i] = ((uint64_t)codel_intervals[i] *
276 mult) / 1000;
277 } else {
278 cp->target = MAX(target, codel_target);
279 cp->interval = codel_intervals[0];
280 cp->intervals = (uint32_t *)codel_intervals;
281 }
282
283 cp->quantum = quantum;
284 }
285
286 void
codel_freeparams(struct codel_params * cp)287 codel_freeparams(struct codel_params *cp)
288 {
289 if (cp->intervals != codel_intervals)
290 free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
291 sizeof(codel_intervals[0]));
292 cp->intervals = NULL;
293 }
294
295 static inline unsigned int
codel_backlog(struct codel * cd)296 codel_backlog(struct codel *cd)
297 {
298 return (cd->backlog);
299 }
300
301 static inline unsigned int
codel_qlength(struct codel * cd)302 codel_qlength(struct codel *cd)
303 {
304 return (ml_len(&cd->q));
305 }
306
307 static inline int64_t
codel_delay(struct codel * cd)308 codel_delay(struct codel *cd)
309 {
310 return (cd->delay);
311 }
312
313 void
codel_enqueue(struct codel * cd,int64_t now,struct mbuf * m)314 codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
315 {
316 m->m_pkthdr.ph_timestamp = now;
317
318 ml_enqueue(&cd->q, m);
319 cd->backlog += m->m_pkthdr.len;
320 }
321
322 /*
323 * Select the next interval according to the number of drops
324 * in the current one relative to the provided timestamp.
325 */
326 static inline void
control_law(struct codel * cd,struct codel_params * cp,int64_t rts)327 control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
328 {
329 unsigned int idx;
330
331 idx = min(cd->drops, nitems(codel_intervals) - 1);
332 cd->next = rts + cp->intervals[idx];
333 }
334
335 /*
336 * Pick the next enqueued packet and determine the queueing delay
337 * as well as whether or not it's a good candidate for dropping
338 * from the queue.
339 *
340 * The decision whether to drop the packet or not is made based
341 * on the queueing delay target of 5ms and on the current queue
342 * length in bytes which shouldn't be less than the amount of data
343 * that arrives in a typical interarrival time (MTU-sized packets
344 * arriving spaced by the amount of time it takes to send such a
345 * packet on the bottleneck).
346 */
347 static inline struct mbuf *
codel_next_packet(struct codel * cd,struct codel_params * cp,int64_t now,int * drop)348 codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
349 int *drop)
350 {
351 struct mbuf *m;
352
353 *drop = 0;
354
355 m = MBUF_LIST_FIRST(&cd->q);
356 if (m == NULL) {
357 KASSERT(cd->backlog == 0);
358 /* Empty queue, reset interval */
359 cd->start = 0;
360 return (NULL);
361 }
362
363 if (now - m->m_pkthdr.ph_timestamp < cp->target ||
364 cd->backlog <= cp->quantum) {
365 /*
366 * The minimum delay decreased below the target, reset
367 * the current observation interval.
368 */
369 cd->start = 0;
370 return (m);
371 }
372
373 if (cd->start == 0) {
374 /*
375 * This is the first packet to be delayed for more than
376 * the target, start the first observation interval after
377 * a single RTT and see if the minimum delay goes below
378 * the target within the interval, otherwise punish the
379 * next packet.
380 */
381 cd->start = now + cp->interval;
382 } else if (now > cd->start) {
383 *drop = 1;
384 }
385 return (m);
386 }
387
388 enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
389
390 static inline int
codel_state_change(struct codel * cd,int64_t now,struct mbuf * m,int drop,int state)391 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
392 int state)
393 {
394 if (state == FIRSTDROP)
395 return (ACCEPTING);
396
397 if (cd->dropping) {
398 if (!drop)
399 return (RECOVERY);
400 else if (now >= cd->next)
401 return (state == DROPPING ? CONTROL : DROPPING);
402 } else if (drop)
403 return (FIRSTDROP);
404
405 if (m == NULL)
406 return (RECOVERY);
407
408 return (ACCEPTING);
409 }
410
411 struct mbuf *
codel_dequeue(struct codel * cd,struct codel_params * cp,int64_t now,struct mbuf_list * free_ml,uint64_t * dpkts,uint64_t * dbytes)412 codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
413 struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
414 {
415 struct mbuf *m;
416 unsigned short delta;
417 int drop, state, done = 0;
418
419 state = INITIAL;
420
421 while (!done) {
422 m = codel_next_packet(cd, cp, now, &drop);
423 state = codel_state_change(cd, now, m, drop, state);
424
425 switch (state) {
426 case FIRSTDROP:
427 m = codel_commit(cd, m);
428 ml_enqueue(free_ml, m);
429
430 *dpkts += 1;
431 *dbytes += m->m_pkthdr.len;
432
433 cd->dropping = 1;
434
435 /*
436 * If we're still within the grace period and not
437 * meeting our minimal delay target we treat this
438 * as a continuation of the previous observation
439 * interval and shrink it further. Otherwise we
440 * start from the initial one.
441 */
442 delta = cd->drops - cd->ldrops;
443 if (delta > 1) {
444 if (now < cd->next ||
445 now - cd->next < codel_grace)
446 cd->drops = delta;
447 else
448 cd->drops = 1;
449 } else
450 cd->drops = 1;
451 control_law(cd, cp, now);
452 cd->ldrops = cd->drops;
453
454 /* fetches the next packet and goes to ACCEPTING */
455 break;
456 case DROPPING:
457 m = codel_commit(cd, m);
458 ml_enqueue(free_ml, m);
459 cd->drops++;
460
461 *dpkts += 1;
462 *dbytes += m->m_pkthdr.len;
463
464 /* fetches the next packet and goes to CONTROL */
465 break;
466 case CONTROL:
467 if (drop) {
468 control_law(cd, cp, cd->next);
469 continue;
470 }
471 /* FALLTHROUGH */
472 case RECOVERY:
473 cd->dropping = 0;
474 /* FALLTHROUGH */
475 case ACCEPTING:
476 done = 1;
477 break;
478 }
479 }
480
481 if (m != NULL)
482 cd->delay = now - m->m_pkthdr.ph_timestamp;
483
484 return (m);
485 }
486
487 struct mbuf *
codel_commit(struct codel * cd,struct mbuf * m)488 codel_commit(struct codel *cd, struct mbuf *m)
489 {
490 struct mbuf *n;
491
492 n = ml_dequeue(&cd->q);
493 if (m)
494 KASSERT(n == m);
495 KASSERT(n != NULL);
496 KASSERT(cd->backlog >= n->m_pkthdr.len);
497 cd->backlog -= n->m_pkthdr.len;
498 return (n);
499 }
500
501 void
codel_purge(struct codel * cd,struct mbuf_list * ml)502 codel_purge(struct codel *cd, struct mbuf_list *ml)
503 {
504 ml_enlist(ml, &cd->q);
505 cd->backlog = 0;
506 }
507
508 /*
509 * FQ-CoDel implementation
510 */
511
512 static inline struct flow *
classify_flow(struct fqcodel * fqc,struct mbuf * m)513 classify_flow(struct fqcodel *fqc, struct mbuf *m)
514 {
515 unsigned int index = 0;
516
517 if (m->m_pkthdr.csum_flags & M_FLOWID)
518 index = m->m_pkthdr.ph_flowid % fqc->nflows;
519
520 DPRINTF("%s: %u\n", __func__, index);
521
522 return (&fqc->flows[index]);
523 }
524
525 struct mbuf *
fqcodel_enq(struct fqcodel * fqc,struct mbuf * m)526 fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
527 {
528 struct flow *flow;
529 unsigned int backlog = 0;
530 int64_t now;
531 int i;
532
533 flow = classify_flow(fqc, m);
534 if (flow == NULL)
535 return (m);
536
537 now = nsecuptime();
538 codel_enqueue(&flow->cd, now, m);
539 fqc->qlength++;
540
541 if (!flow->active) {
542 SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
543 flow->deficit = fqc->quantum;
544 flow->active = 1;
545 DPRINTF("%s: flow %u active deficit %d\n", __func__,
546 flow->id, flow->deficit);
547 }
548
549 /*
550 * Check the limit for all queues and remove a packet
551 * from the longest one.
552 */
553 if (fqc->qlength >= fqcodel_qlimit) {
554 for (i = 0; i < fqc->nflows; i++) {
555 if (codel_backlog(&fqc->flows[i].cd) > backlog) {
556 flow = &fqc->flows[i];
557 backlog = codel_backlog(&flow->cd);
558 }
559 }
560
561 KASSERT(flow != NULL);
562 m = codel_commit(&flow->cd, NULL);
563
564 fqc->drop_cnt.packets++;
565 fqc->drop_cnt.bytes += m->m_pkthdr.len;
566
567 fqc->qlength--;
568
569 DPRINTF("%s: dropping from flow %u\n", __func__,
570 flow->id);
571 return (m);
572 }
573
574 return (NULL);
575 }
576
577 static inline struct flowq *
select_queue(struct fqcodel * fqc)578 select_queue(struct fqcodel *fqc)
579 {
580 struct flowq *fq = NULL;
581
582 if (!SIMPLEQ_EMPTY(&fqc->newq))
583 fq = &fqc->newq;
584 else if (!SIMPLEQ_EMPTY(&fqc->oldq))
585 fq = &fqc->oldq;
586 return (fq);
587 }
588
589 static inline struct flow *
first_flow(struct fqcodel * fqc,struct flowq ** fq)590 first_flow(struct fqcodel *fqc, struct flowq **fq)
591 {
592 struct flow *flow;
593
594 while ((*fq = select_queue(fqc)) != NULL) {
595 while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
596 if (flow->deficit <= 0) {
597 flow->deficit += fqc->quantum;
598 SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
599 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
600 flowentry);
601 DPRINTF("%s: flow %u deficit %d\n", __func__,
602 flow->id, flow->deficit);
603 } else
604 return (flow);
605 }
606 }
607
608 return (NULL);
609 }
610
611 static inline struct flow *
next_flow(struct fqcodel * fqc,struct flow * flow,struct flowq ** fq)612 next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
613 {
614 SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
615
616 if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
617 /* A packet was dropped, starve the queue */
618 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
619 DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
620 flow->id, flow->deficit);
621 } else {
622 /* A packet was dropped on a starved queue, disable it */
623 flow->active = 0;
624 DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
625 flow->id, flow->deficit);
626 }
627
628 return (first_flow(fqc, fq));
629 }
630
631 struct mbuf *
fqcodel_deq_begin(struct fqcodel * fqc,void ** cookiep,struct mbuf_list * free_ml)632 fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
633 struct mbuf_list *free_ml)
634 {
635 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
636 struct flowq *fq;
637 struct flow *flow;
638 struct mbuf *m;
639 int64_t now;
640
641 if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
642 fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
643
644 now = nsecuptime();
645
646 for (flow = first_flow(fqc, &fq); flow != NULL;
647 flow = next_flow(fqc, flow, &fq)) {
648 m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
649 &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
650
651 KASSERT(fqc->qlength >= ml_len(&ml));
652 fqc->qlength -= ml_len(&ml);
653
654 ml_enlist(free_ml, &ml);
655
656 if (m != NULL) {
657 flow->deficit -= m->m_pkthdr.len;
658 DPRINTF("%s: flow %u deficit %d\n", __func__,
659 flow->id, flow->deficit);
660 *cookiep = flow;
661 return (m);
662 }
663 }
664
665 return (NULL);
666 }
667
668 void
fqcodel_deq_commit(struct fqcodel * fqc,struct mbuf * m,void * cookie)669 fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
670 {
671 struct flow *flow = cookie;
672
673 KASSERT(fqc->qlength > 0);
674 fqc->qlength--;
675
676 fqc->xmit_cnt.packets++;
677 fqc->xmit_cnt.bytes += m->m_pkthdr.len;
678
679 (void)codel_commit(&flow->cd, m);
680 }
681
682 void
fqcodel_purge(struct fqcodel * fqc,struct mbuf_list * ml)683 fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
684 {
685 unsigned int i;
686
687 for (i = 0; i < fqc->nflows; i++)
688 codel_purge(&fqc->flows[i].cd, ml);
689 fqc->qlength = 0;
690 }
691
692 struct mbuf *
fqcodel_if_enq(struct ifqueue * ifq,struct mbuf * m)693 fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
694 {
695 return fqcodel_enq(ifq->ifq_q, m);
696 }
697
698 struct mbuf *
fqcodel_if_deq_begin(struct ifqueue * ifq,void ** cookiep)699 fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
700 {
701 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
702 struct mbuf *m;
703
704 m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
705 ifq_mfreeml(ifq, &free_ml);
706 return (m);
707 }
708
709 void
fqcodel_if_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)710 fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
711 {
712 return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
713 }
714
715 void
fqcodel_if_purge(struct ifqueue * ifq,struct mbuf_list * ml)716 fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
717 {
718 return fqcodel_purge(ifq->ifq_q, ml);
719 }
720
721 void *
fqcodel_pf_alloc(struct ifnet * ifp)722 fqcodel_pf_alloc(struct ifnet *ifp)
723 {
724 struct fqcodel *fqc;
725
726 fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
727
728 SIMPLEQ_INIT(&fqc->newq);
729 SIMPLEQ_INIT(&fqc->oldq);
730
731 return (fqc);
732 }
733
734 int
fqcodel_pf_addqueue(void * arg,struct pf_queuespec * qs)735 fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
736 {
737 struct ifnet *ifp = qs->kif->pfik_ifp;
738 struct fqcodel *fqc = arg;
739
740 if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff)
741 return (EINVAL);
742
743 fqc->nflows = qs->flowqueue.flows;
744 fqc->quantum = qs->flowqueue.quantum;
745 if (qs->qlimit > 0)
746 fqc->qlimit = qs->qlimit;
747 else
748 fqc->qlimit = fqcodel_qlimit;
749 if (fqc->quantum > 0)
750 fqc->flags |= FQCF_FIXED_QUANTUM;
751 else
752 fqc->quantum = ifp->if_mtu + max_linkhdr;
753
754 codel_initparams(&fqc->cparams, qs->flowqueue.target,
755 qs->flowqueue.interval, fqc->quantum);
756
757 fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
758 M_DEVBUF, M_WAITOK | M_ZERO);
759
760 #ifdef FQCODEL_DEBUG
761 {
762 unsigned int i;
763
764 for (i = 0; i < fqc->nflows; i++)
765 fqc->flows[i].id = i;
766 }
767 #endif
768
769 fqc->ifp = ifp;
770
771 DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
772 "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
773 fqc->quantum, fqc->cparams.target / 1000000,
774 fqc->cparams.interval / 1000000);
775
776 return (0);
777 }
778
779 void
fqcodel_pf_free(void * arg)780 fqcodel_pf_free(void *arg)
781 {
782 struct fqcodel *fqc = arg;
783
784 codel_freeparams(&fqc->cparams);
785 free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
786 free(fqc, M_DEVBUF, sizeof(struct fqcodel));
787 }
788
789 int
fqcodel_pf_qstats(struct pf_queuespec * qs,void * ubuf,int * nbytes)790 fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
791 {
792 struct ifnet *ifp = qs->kif->pfik_ifp;
793 struct fqcodel_stats stats;
794 struct fqcodel *fqc;
795 int64_t delay;
796 unsigned int i;
797 int error = 0;
798
799 if (ifp == NULL)
800 return (EBADF);
801
802 if (*nbytes < sizeof(stats))
803 return (EINVAL);
804
805 memset(&stats, 0, sizeof(stats));
806
807 /* XXX: multi-q? */
808 fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
809 if (fqc == NULL)
810 return (EBADF);
811
812 stats.xmit_cnt = fqc->xmit_cnt;
813 stats.drop_cnt = fqc->drop_cnt;
814
815 stats.qlength = ifq_len(&ifp->if_snd);
816 stats.qlimit = fqc->qlimit;
817
818 stats.flows = 0;
819 stats.delaysum = stats.delaysumsq = 0;
820
821 for (i = 0; i < fqc->nflows; i++) {
822 if (codel_qlength(&fqc->flows[i].cd) == 0)
823 continue;
824 /* Scale down to microseconds to avoid overflows */
825 delay = codel_delay(&fqc->flows[i].cd) / 1000;
826 stats.delaysum += delay;
827 stats.delaysumsq += delay * delay;
828 stats.flows++;
829 }
830
831 ifq_q_leave(&ifp->if_snd, fqc);
832
833 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
834 return (error);
835
836 *nbytes = sizeof(stats);
837 return (0);
838 }
839
840 unsigned int
fqcodel_pf_qlength(void * fqc)841 fqcodel_pf_qlength(void *fqc)
842 {
843 return ((struct fqcodel *)fqc)->qlength;
844 }
845
846 struct mbuf *
fqcodel_pf_enqueue(void * fqc,struct mbuf * m)847 fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
848 {
849 return fqcodel_enq(fqc, m);
850 }
851
852 struct mbuf *
fqcodel_pf_deq_begin(void * fqc,void ** cookiep,struct mbuf_list * free_ml)853 fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
854 {
855 return fqcodel_deq_begin(fqc, cookiep, free_ml);
856 }
857
858 void
fqcodel_pf_deq_commit(void * fqc,struct mbuf * m,void * cookie)859 fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
860 {
861 return fqcodel_deq_commit(fqc, m, cookie);
862 }
863
864 void
fqcodel_pf_purge(void * fqc,struct mbuf_list * ml)865 fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
866 {
867 return fqcodel_purge(fqc, ml);
868 }
869
870 unsigned int
fqcodel_idx(unsigned int nqueues,const struct mbuf * m)871 fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
872 {
873 return (0);
874 }
875
876 void *
fqcodel_alloc(unsigned int idx,void * arg)877 fqcodel_alloc(unsigned int idx, void *arg)
878 {
879 /* Allocation is done in fqcodel_pf_alloc */
880 return (arg);
881 }
882
883 void
fqcodel_free(unsigned int idx,void * arg)884 fqcodel_free(unsigned int idx, void *arg)
885 {
886 /* nothing to do here */
887 }
888