1 /* $OpenBSD: hfsc.c,v 1.50 2024/10/29 23:57:54 dlg Exp $ */
2
3 /*
4 * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org>
5 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
6 *
7 * Permission to use, copy, modify, and distribute this software and
8 * its documentation is hereby granted (including for commercial or
9 * for-profit use), provided that both the copyright notice and this
10 * permission notice appear in all copies of the software, derivative
11 * works, or modified versions, and any portions thereof.
12 *
13 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
14 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
15 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
16 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 *
28 * Carnegie Mellon encourages (but does not require) users of this
29 * software to return any improvements or extensions that they make,
30 * and to grant Carnegie Mellon the rights to redistribute these
31 * changes without encumbrance.
32 */
33 /*
34 * H-FSC is described in Proceedings of SIGCOMM'97,
35 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
36 * Real-Time and Priority Service"
37 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
38 *
39 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
40 * when a class has an upperlimit, the fit-time is computed from the
41 * upperlimit service curve. the link-sharing scheduler does not schedule
42 * a class whose fit-time exceeds the current time.
43 */
44
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/queue.h>
53 #include <sys/kernel.h>
54 #include <sys/timeout.h>
55
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <netinet/in.h>
59
60 #include <net/pfvar.h>
61 #include <net/hfsc.h>
62
63 /*
64 * kernel internal service curve representation
65 * coordinates are given by 64 bit unsigned integers.
66 * x-axis: unit is clock count. for the intel x86 architecture,
67 * the raw Pentium TSC (Timestamp Counter) value is used.
68 * virtual time is also calculated in this time scale.
69 * y-axis: unit is byte.
70 *
71 * the service curve parameters are converted to the internal
72 * representation.
73 * the slope values are scaled to avoid overflow.
74 * the inverse slope values as well as the y-projection of the 1st
75 * segment are kept in order to avoid 64-bit divide operations
76 * that are expensive on 32-bit architectures.
77 *
78 * note: Intel Pentium TSC never wraps around in several thousands of years.
79 * x-axis doesn't wrap around for 1089 years with 1GHz clock.
80 * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
81 */
82
83 /* kernel internal representation of a service curve */
84 struct hfsc_internal_sc {
85 u_int64_t sm1; /* scaled slope of the 1st segment */
86 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
87 u_int64_t dx; /* the x-projection of the 1st segment */
88 u_int64_t dy; /* the y-projection of the 1st segment */
89 u_int64_t sm2; /* scaled slope of the 2nd segment */
90 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
91 };
92
93 /* runtime service curve */
94 struct hfsc_runtime_sc {
95 u_int64_t x; /* current starting position on x-axis */
96 u_int64_t y; /* current starting position on x-axis */
97 u_int64_t sm1; /* scaled slope of the 1st segment */
98 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
99 u_int64_t dx; /* the x-projection of the 1st segment */
100 u_int64_t dy; /* the y-projection of the 1st segment */
101 u_int64_t sm2; /* scaled slope of the 2nd segment */
102 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
103 };
104
105 struct hfsc_classq {
106 struct mbuf_list q; /* Queue of packets */
107 int qlimit; /* Queue limit */
108 };
109
110 /* for TAILQ based ellist and actlist implementation */
111 struct hfsc_class;
112 TAILQ_HEAD(hfsc_eligible, hfsc_class);
113 TAILQ_HEAD(hfsc_active, hfsc_class);
114 #define hfsc_actlist_last(s) TAILQ_LAST(s, hfsc_active)
115
116 struct hfsc_class {
117 u_int cl_id; /* class id (just for debug) */
118 u_int32_t cl_handle; /* class handle */
119 int cl_flags; /* misc flags */
120
121 struct hfsc_class *cl_parent; /* parent class */
122 struct hfsc_class *cl_siblings; /* sibling classes */
123 struct hfsc_class *cl_children; /* child classes */
124
125 struct hfsc_classq cl_q; /* class queue structure */
126
127 const struct pfq_ops *cl_qops; /* queue manager */
128 void *cl_qdata; /* queue manager data */
129 void *cl_cookie; /* queue manager cookie */
130
131 u_int64_t cl_total; /* total work in bytes */
132 u_int64_t cl_cumul; /* cumulative work in bytes
133 done by real-time criteria */
134 u_int64_t cl_d; /* deadline */
135 u_int64_t cl_e; /* eligible time */
136 u_int64_t cl_vt; /* virtual time */
137 u_int64_t cl_f; /* time when this class will fit for
138 link-sharing, max(myf, cfmin) */
139 u_int64_t cl_myf; /* my fit-time (as calculated from this
140 class's own upperlimit curve) */
141 u_int64_t cl_myfadj; /* my fit-time adjustment
142 (to cancel history dependence) */
143 u_int64_t cl_cfmin; /* earliest children's fit-time (used
144 with cl_myf to obtain cl_f) */
145 u_int64_t cl_cvtmin; /* minimal virtual time among the
146 children fit for link-sharing
147 (monotonic within a period) */
148 u_int64_t cl_vtadj; /* intra-period cumulative vt
149 adjustment */
150 u_int64_t cl_vtoff; /* inter-period cumulative vt offset */
151 u_int64_t cl_cvtmax; /* max child's vt in the last period */
152
153 u_int64_t cl_initvt; /* init virtual time (for debugging) */
154
155 struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */
156 struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */
157 struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */
158 struct hfsc_runtime_sc cl_deadline; /* deadline curve */
159 struct hfsc_runtime_sc cl_eligible; /* eligible curve */
160 struct hfsc_runtime_sc cl_virtual; /* virtual curve */
161 struct hfsc_runtime_sc cl_ulimit; /* upperlimit curve */
162
163 u_int cl_vtperiod; /* vt period sequence no */
164 u_int cl_parentperiod; /* parent's vt period seqno */
165 int cl_nactive; /* number of active children */
166 struct hfsc_active cl_actc; /* active children list */
167
168 TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
169 TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
170
171 struct {
172 struct hfsc_pktcntr xmit_cnt;
173 struct hfsc_pktcntr drop_cnt;
174 u_int period;
175 } cl_stats;
176 };
177
178 /*
179 * hfsc interface state
180 */
181 struct hfsc_if {
182 struct hfsc_if *hif_next; /* interface state list */
183 struct hfsc_class *hif_rootclass; /* root class */
184 struct hfsc_class *hif_defaultclass; /* default class */
185 struct hfsc_class **hif_class_tbl;
186
187 u_int64_t hif_microtime; /* time at deq_begin */
188
189 u_int hif_allocated; /* # of slots in hif_class_tbl */
190 u_int hif_classes; /* # of classes in the tree */
191 u_int hif_classid; /* class id sequence number */
192
193 struct hfsc_eligible hif_eligible; /* eligible list */
194 struct timeout hif_defer; /* for queues that weren't ready */
195 };
196
197 /*
198 * function prototypes
199 */
200 struct hfsc_class *hfsc_class_create(struct hfsc_if *,
201 struct hfsc_sc *, struct hfsc_sc *,
202 struct hfsc_sc *, struct hfsc_class *, int,
203 int, int);
204 int hfsc_class_destroy(struct hfsc_if *,
205 struct hfsc_class *);
206 struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
207
208 void hfsc_cl_purge(struct hfsc_if *, struct hfsc_class *,
209 struct mbuf_list *);
210
211 void hfsc_update_sc(struct hfsc_if *, struct hfsc_class *, int);
212 void hfsc_deferred(void *);
213 void hfsc_update_cfmin(struct hfsc_class *);
214 void hfsc_set_active(struct hfsc_if *, struct hfsc_class *, int);
215 void hfsc_set_passive(struct hfsc_if *, struct hfsc_class *);
216 void hfsc_init_ed(struct hfsc_if *, struct hfsc_class *, int);
217 void hfsc_update_ed(struct hfsc_if *, struct hfsc_class *, int);
218 void hfsc_update_d(struct hfsc_class *, int);
219 void hfsc_init_vf(struct hfsc_class *, int);
220 void hfsc_update_vf(struct hfsc_class *, int, u_int64_t);
221 void hfsc_ellist_insert(struct hfsc_if *, struct hfsc_class *);
222 void hfsc_ellist_remove(struct hfsc_if *, struct hfsc_class *);
223 void hfsc_ellist_update(struct hfsc_if *, struct hfsc_class *);
224 struct hfsc_class *hfsc_ellist_get_mindl(struct hfsc_if *, u_int64_t);
225 void hfsc_actlist_insert(struct hfsc_class *);
226 void hfsc_actlist_remove(struct hfsc_class *);
227 void hfsc_actlist_update(struct hfsc_class *);
228
229 struct hfsc_class *hfsc_actlist_firstfit(struct hfsc_class *,
230 u_int64_t);
231
232 static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
233 static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
234 static __inline u_int64_t m2sm(u_int);
235 static __inline u_int64_t m2ism(u_int);
236 static __inline u_int64_t d2dx(u_int);
237 static __inline u_int sm2m(u_int64_t);
238 static __inline u_int dx2d(u_int64_t);
239
240 void hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *);
241 void hfsc_rtsc_init(struct hfsc_runtime_sc *,
242 struct hfsc_internal_sc *, u_int64_t, u_int64_t);
243 u_int64_t hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t);
244 u_int64_t hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t);
245 void hfsc_rtsc_min(struct hfsc_runtime_sc *,
246 struct hfsc_internal_sc *, u_int64_t, u_int64_t);
247
248 void hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *);
249 struct hfsc_class *hfsc_clh2cph(struct hfsc_if *, u_int32_t);
250
251 #define HFSC_FREQ 1000000000LL
252 #define HFSC_CLK_PER_TICK tick_nsec
253 #define HFSC_HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
254
255 #define hfsc_uptime() nsecuptime()
256
257 struct pool hfsc_class_pl, hfsc_internal_sc_pl;
258
259 /*
260 * ifqueue glue.
261 */
262
263 unsigned int hfsc_idx(unsigned int, const struct mbuf *);
264 struct mbuf *hfsc_enq(struct ifqueue *, struct mbuf *);
265 struct mbuf *hfsc_deq_begin(struct ifqueue *, void **);
266 void hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *);
267 void hfsc_purge(struct ifqueue *, struct mbuf_list *);
268 void *hfsc_alloc(unsigned int, void *);
269 void hfsc_free(unsigned int, void *);
270
271 const struct ifq_ops hfsc_ops = {
272 hfsc_idx,
273 hfsc_enq,
274 hfsc_deq_begin,
275 hfsc_deq_commit,
276 hfsc_purge,
277 hfsc_alloc,
278 hfsc_free,
279 };
280
281 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops;
282
283 /*
284 * pf queue glue.
285 */
286
287 void *hfsc_pf_alloc(struct ifnet *);
288 int hfsc_pf_addqueue(void *, struct pf_queuespec *);
289 void hfsc_pf_free(void *);
290 int hfsc_pf_qstats(struct pf_queuespec *, void *, int *);
291 unsigned int hfsc_pf_qlength(void *);
292 struct mbuf * hfsc_pf_enqueue(void *, struct mbuf *);
293 struct mbuf * hfsc_pf_deq_begin(void *, void **, struct mbuf_list *);
294 void hfsc_pf_deq_commit(void *, struct mbuf *, void *);
295 void hfsc_pf_purge(void *, struct mbuf_list *);
296
297 const struct pfq_ops hfsc_pf_ops = {
298 hfsc_pf_alloc,
299 hfsc_pf_addqueue,
300 hfsc_pf_free,
301 hfsc_pf_qstats,
302 hfsc_pf_qlength,
303 hfsc_pf_enqueue,
304 hfsc_pf_deq_begin,
305 hfsc_pf_deq_commit,
306 hfsc_pf_purge
307 };
308
309 const struct pfq_ops * const pfq_hfsc_ops = &hfsc_pf_ops;
310
311 /*
312 * shortcuts for repeated use
313 */
314 static inline unsigned int
hfsc_class_qlength(struct hfsc_class * cl)315 hfsc_class_qlength(struct hfsc_class *cl)
316 {
317 /* Only leaf classes have a queue */
318 if (cl->cl_qops != NULL)
319 return cl->cl_qops->pfq_qlength(cl->cl_qdata);
320 return 0;
321 }
322
323 static inline struct mbuf *
hfsc_class_enqueue(struct hfsc_class * cl,struct mbuf * m)324 hfsc_class_enqueue(struct hfsc_class *cl, struct mbuf *m)
325 {
326 return cl->cl_qops->pfq_enqueue(cl->cl_qdata, m);
327 }
328
329 static inline struct mbuf *
hfsc_class_deq_begin(struct hfsc_class * cl,struct mbuf_list * ml)330 hfsc_class_deq_begin(struct hfsc_class *cl, struct mbuf_list *ml)
331 {
332 return cl->cl_qops->pfq_deq_begin(cl->cl_qdata, &cl->cl_cookie, ml);
333 }
334
335 static inline void
hfsc_class_deq_commit(struct hfsc_class * cl,struct mbuf * m)336 hfsc_class_deq_commit(struct hfsc_class *cl, struct mbuf *m)
337 {
338 return cl->cl_qops->pfq_deq_commit(cl->cl_qdata, m, cl->cl_cookie);
339 }
340
341 static inline void
hfsc_class_purge(struct hfsc_class * cl,struct mbuf_list * ml)342 hfsc_class_purge(struct hfsc_class *cl, struct mbuf_list *ml)
343 {
344 /* Only leaf classes have a queue */
345 if (cl->cl_qops != NULL)
346 return cl->cl_qops->pfq_purge(cl->cl_qdata, ml);
347 }
348
349 static inline u_int
hfsc_more_slots(u_int current)350 hfsc_more_slots(u_int current)
351 {
352 u_int want = current * 2;
353
354 return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want);
355 }
356
357 static void
hfsc_grow_class_tbl(struct hfsc_if * hif,u_int howmany)358 hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany)
359 {
360 struct hfsc_class **newtbl, **old;
361 size_t oldlen = sizeof(void *) * hif->hif_allocated;
362
363 newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF,
364 M_WAITOK | M_ZERO);
365 old = hif->hif_class_tbl;
366
367 memcpy(newtbl, old, oldlen);
368 hif->hif_class_tbl = newtbl;
369 hif->hif_allocated = howmany;
370
371 free(old, M_DEVBUF, oldlen);
372 }
373
374 void
hfsc_initialize(void)375 hfsc_initialize(void)
376 {
377 pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0,
378 IPL_NONE, PR_WAITOK, "hfscclass", NULL);
379 pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0,
380 IPL_NONE, PR_WAITOK, "hfscintsc", NULL);
381 }
382
383 void *
hfsc_pf_alloc(struct ifnet * ifp)384 hfsc_pf_alloc(struct ifnet *ifp)
385 {
386 struct hfsc_if *hif;
387
388 KASSERT(ifp != NULL);
389
390 hif = malloc(sizeof(*hif), M_DEVBUF, M_WAITOK | M_ZERO);
391 TAILQ_INIT(&hif->hif_eligible);
392 hif->hif_class_tbl = mallocarray(HFSC_DEFAULT_CLASSES, sizeof(void *),
393 M_DEVBUF, M_WAITOK | M_ZERO);
394 hif->hif_allocated = HFSC_DEFAULT_CLASSES;
395
396 timeout_set(&hif->hif_defer, hfsc_deferred, ifp);
397
398 return (hif);
399 }
400
401 int
hfsc_pf_addqueue(void * arg,struct pf_queuespec * q)402 hfsc_pf_addqueue(void *arg, struct pf_queuespec *q)
403 {
404 struct hfsc_if *hif = arg;
405 struct hfsc_class *cl, *parent, *np = NULL;
406 struct hfsc_sc rtsc, lssc, ulsc;
407 int error = 0;
408
409 KASSERT(hif != NULL);
410 KASSERT(q->qid != 0);
411
412 /* Root queue must have non-zero linksharing parameters */
413 if (q->linkshare.m1.absolute == 0 && q->linkshare.m2.absolute == 0 &&
414 q->parent_qid == 0)
415 return (EINVAL);
416
417 if (q->parent_qid == 0 && hif->hif_rootclass == NULL) {
418 np = hfsc_class_create(hif, NULL, NULL, NULL, NULL,
419 0, 0, HFSC_ROOT_CLASS | q->qid);
420 if (np == NULL)
421 return (EINVAL);
422 parent = np;
423 } else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL)
424 return (EINVAL);
425
426 if (hfsc_clh2cph(hif, q->qid) != NULL) {
427 hfsc_class_destroy(hif, np);
428 return (EBUSY);
429 }
430
431 rtsc.m1 = q->realtime.m1.absolute;
432 rtsc.d = q->realtime.d;
433 rtsc.m2 = q->realtime.m2.absolute;
434 lssc.m1 = q->linkshare.m1.absolute;
435 lssc.d = q->linkshare.d;
436 lssc.m2 = q->linkshare.m2.absolute;
437 ulsc.m1 = q->upperlimit.m1.absolute;
438 ulsc.d = q->upperlimit.d;
439 ulsc.m2 = q->upperlimit.m2.absolute;
440
441 if ((cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
442 parent, q->qlimit, q->flags, q->qid)) == NULL) {
443 hfsc_class_destroy(hif, np);
444 return (ENOMEM);
445 }
446
447 /* Attach a queue manager if specified */
448 cl->cl_qops = pf_queue_manager(q);
449 /* Realtime class cannot be used with an external queue manager */
450 if (cl->cl_qops == NULL || cl->cl_rsc != NULL) {
451 cl->cl_qops = pfq_hfsc_ops;
452 cl->cl_qdata = &cl->cl_q;
453 } else {
454 cl->cl_qdata = cl->cl_qops->pfq_alloc(q->kif->pfik_ifp);
455 if (cl->cl_qdata == NULL) {
456 cl->cl_qops = NULL;
457 hfsc_class_destroy(hif, cl);
458 hfsc_class_destroy(hif, np);
459 return (ENOMEM);
460 }
461 error = cl->cl_qops->pfq_addqueue(cl->cl_qdata, q);
462 if (error) {
463 cl->cl_qops->pfq_free(cl->cl_qdata);
464 cl->cl_qops = NULL;
465 hfsc_class_destroy(hif, cl);
466 hfsc_class_destroy(hif, np);
467 return (error);
468 }
469 }
470
471 KASSERT(cl->cl_qops != NULL);
472 KASSERT(cl->cl_qdata != NULL);
473
474 return (0);
475 }
476
477 int
hfsc_pf_qstats(struct pf_queuespec * q,void * ubuf,int * nbytes)478 hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes)
479 {
480 struct ifnet *ifp = q->kif->pfik_ifp;
481 struct hfsc_if *hif;
482 struct hfsc_class *cl;
483 struct hfsc_class_stats stats;
484 int error = 0;
485
486 if (ifp == NULL)
487 return (EBADF);
488
489 if (*nbytes < sizeof(stats))
490 return (EINVAL);
491
492 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops);
493 if (hif == NULL)
494 return (EBADF);
495
496 if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL) {
497 ifq_q_leave(&ifp->if_snd, hif);
498 return (EINVAL);
499 }
500
501 hfsc_getclstats(&stats, cl);
502 ifq_q_leave(&ifp->if_snd, hif);
503
504 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
505 return (error);
506
507 *nbytes = sizeof(stats);
508 return (0);
509 }
510
511 void
hfsc_pf_free(void * arg)512 hfsc_pf_free(void *arg)
513 {
514 hfsc_free(0, arg);
515 }
516
517 unsigned int
hfsc_pf_qlength(void * arg)518 hfsc_pf_qlength(void *arg)
519 {
520 struct hfsc_classq *cq = arg;
521
522 return ml_len(&cq->q);
523 }
524
525 struct mbuf *
hfsc_pf_enqueue(void * arg,struct mbuf * m)526 hfsc_pf_enqueue(void *arg, struct mbuf *m)
527 {
528 struct hfsc_classq *cq = arg;
529
530 if (ml_len(&cq->q) >= cq->qlimit)
531 return (m);
532
533 ml_enqueue(&cq->q, m);
534 return (NULL);
535 }
536
537 struct mbuf *
hfsc_pf_deq_begin(void * arg,void ** cookiep,struct mbuf_list * free_ml)538 hfsc_pf_deq_begin(void *arg, void **cookiep, struct mbuf_list *free_ml)
539 {
540 struct hfsc_classq *cq = arg;
541
542 return MBUF_LIST_FIRST(&cq->q);
543 }
544
545 void
hfsc_pf_deq_commit(void * arg,struct mbuf * m,void * cookie)546 hfsc_pf_deq_commit(void *arg, struct mbuf *m, void *cookie)
547 {
548 struct hfsc_classq *cq = arg;
549
550 ml_dequeue(&cq->q);
551 }
552
553 void
hfsc_pf_purge(void * arg,struct mbuf_list * ml)554 hfsc_pf_purge(void *arg, struct mbuf_list *ml)
555 {
556 struct hfsc_classq *cq = arg;
557
558 ml_enlist(ml, &cq->q);
559 }
560
561 unsigned int
hfsc_idx(unsigned int nqueues,const struct mbuf * m)562 hfsc_idx(unsigned int nqueues, const struct mbuf *m)
563 {
564 /*
565 * hfsc can only function on a single ifq and the stack understands
566 * this. when the first ifq on an interface is switched to hfsc,
567 * this gets used to map all mbufs to the first and only ifq that
568 * is set up for hfsc.
569 */
570 return (0);
571 }
572
573 void *
hfsc_alloc(unsigned int idx,void * q)574 hfsc_alloc(unsigned int idx, void *q)
575 {
576 struct hfsc_if *hif = q;
577
578 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
579 KASSERT(hif != NULL);
580 return (hif);
581 }
582
583 void
hfsc_free(unsigned int idx,void * q)584 hfsc_free(unsigned int idx, void *q)
585 {
586 struct hfsc_if *hif = q;
587 struct hfsc_class *cl;
588 int i, restart;
589
590 KERNEL_ASSERT_LOCKED();
591 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
592
593 timeout_del(&hif->hif_defer);
594
595 do {
596 restart = 0;
597 for (i = 0; i < hif->hif_allocated; i++) {
598 cl = hif->hif_class_tbl[i];
599 if (hfsc_class_destroy(hif, cl) == EBUSY)
600 restart++;
601 }
602 } while (restart > 0);
603
604 free(hif->hif_class_tbl, M_DEVBUF, hif->hif_allocated * sizeof(void *));
605 free(hif, M_DEVBUF, sizeof(*hif));
606 }
607
608 void
hfsc_purge(struct ifqueue * ifq,struct mbuf_list * ml)609 hfsc_purge(struct ifqueue *ifq, struct mbuf_list *ml)
610 {
611 struct hfsc_if *hif = ifq->ifq_q;
612 struct hfsc_class *cl;
613
614 for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
615 hfsc_cl_purge(hif, cl, ml);
616 }
617
618 struct hfsc_class *
hfsc_class_create(struct hfsc_if * hif,struct hfsc_sc * rsc,struct hfsc_sc * fsc,struct hfsc_sc * usc,struct hfsc_class * parent,int qlimit,int flags,int qid)619 hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc,
620 struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent,
621 int qlimit, int flags, int qid)
622 {
623 struct hfsc_class *cl, *p;
624 int i, s;
625
626 if (qlimit == 0)
627 qlimit = HFSC_DEFAULT_QLIMIT;
628
629 if (hif->hif_classes >= hif->hif_allocated) {
630 u_int newslots = hfsc_more_slots(hif->hif_allocated);
631
632 if (newslots == hif->hif_allocated)
633 return (NULL);
634 hfsc_grow_class_tbl(hif, newslots);
635 }
636
637 cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO);
638 TAILQ_INIT(&cl->cl_actc);
639
640 ml_init(&cl->cl_q.q);
641 cl->cl_q.qlimit = qlimit;
642 cl->cl_flags = flags;
643
644 if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
645 cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
646 hfsc_sc2isc(rsc, cl->cl_rsc);
647 hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
648 hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
649 }
650 if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
651 cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
652 hfsc_sc2isc(fsc, cl->cl_fsc);
653 hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
654 }
655 if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
656 cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
657 hfsc_sc2isc(usc, cl->cl_usc);
658 hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
659 }
660
661 cl->cl_id = hif->hif_classid++;
662 cl->cl_handle = qid;
663 cl->cl_parent = parent;
664
665 s = splnet();
666 hif->hif_classes++;
667
668 /*
669 * find a free slot in the class table. if the slot matching
670 * the lower bits of qid is free, use this slot. otherwise,
671 * use the first free slot.
672 */
673 i = qid % hif->hif_allocated;
674 if (hif->hif_class_tbl[i] == NULL)
675 hif->hif_class_tbl[i] = cl;
676 else {
677 for (i = 0; i < hif->hif_allocated; i++)
678 if (hif->hif_class_tbl[i] == NULL) {
679 hif->hif_class_tbl[i] = cl;
680 break;
681 }
682 if (i == hif->hif_allocated) {
683 splx(s);
684 goto err_ret;
685 }
686 }
687
688 if (flags & HFSC_DEFAULTCLASS)
689 hif->hif_defaultclass = cl;
690
691 if (parent == NULL)
692 hif->hif_rootclass = cl;
693 else {
694 /* add this class to the children list of the parent */
695 if ((p = parent->cl_children) == NULL)
696 parent->cl_children = cl;
697 else {
698 while (p->cl_siblings != NULL)
699 p = p->cl_siblings;
700 p->cl_siblings = cl;
701 }
702 }
703 splx(s);
704
705 return (cl);
706
707 err_ret:
708 if (cl->cl_fsc != NULL)
709 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
710 if (cl->cl_rsc != NULL)
711 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
712 if (cl->cl_usc != NULL)
713 pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
714 pool_put(&hfsc_class_pl, cl);
715 return (NULL);
716 }
717
718 int
hfsc_class_destroy(struct hfsc_if * hif,struct hfsc_class * cl)719 hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl)
720 {
721 int i, s;
722
723 if (cl == NULL)
724 return (0);
725
726 if (cl->cl_children != NULL)
727 return (EBUSY);
728
729 s = splnet();
730 KASSERT(hfsc_class_qlength(cl) == 0);
731
732 if (cl->cl_parent != NULL) {
733 struct hfsc_class *p = cl->cl_parent->cl_children;
734
735 if (p == cl)
736 cl->cl_parent->cl_children = cl->cl_siblings;
737 else do {
738 if (p->cl_siblings == cl) {
739 p->cl_siblings = cl->cl_siblings;
740 break;
741 }
742 } while ((p = p->cl_siblings) != NULL);
743 }
744
745 for (i = 0; i < hif->hif_allocated; i++)
746 if (hif->hif_class_tbl[i] == cl) {
747 hif->hif_class_tbl[i] = NULL;
748 break;
749 }
750
751 hif->hif_classes--;
752 splx(s);
753
754 KASSERT(TAILQ_EMPTY(&cl->cl_actc));
755
756 if (cl == hif->hif_rootclass)
757 hif->hif_rootclass = NULL;
758 if (cl == hif->hif_defaultclass)
759 hif->hif_defaultclass = NULL;
760
761 /* Free external queue manager resources */
762 if (cl->cl_qops && cl->cl_qops != pfq_hfsc_ops)
763 cl->cl_qops->pfq_free(cl->cl_qdata);
764
765 if (cl->cl_usc != NULL)
766 pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
767 if (cl->cl_fsc != NULL)
768 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
769 if (cl->cl_rsc != NULL)
770 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
771 pool_put(&hfsc_class_pl, cl);
772
773 return (0);
774 }
775
776 /*
777 * hfsc_nextclass returns the next class in the tree.
778 * usage:
779 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
780 * do_something;
781 */
782 struct hfsc_class *
hfsc_nextclass(struct hfsc_class * cl)783 hfsc_nextclass(struct hfsc_class *cl)
784 {
785 if (cl->cl_children != NULL)
786 cl = cl->cl_children;
787 else if (cl->cl_siblings != NULL)
788 cl = cl->cl_siblings;
789 else {
790 while ((cl = cl->cl_parent) != NULL)
791 if (cl->cl_siblings) {
792 cl = cl->cl_siblings;
793 break;
794 }
795 }
796
797 return (cl);
798 }
799
800 struct mbuf *
hfsc_enq(struct ifqueue * ifq,struct mbuf * m)801 hfsc_enq(struct ifqueue *ifq, struct mbuf *m)
802 {
803 struct hfsc_if *hif = ifq->ifq_q;
804 struct hfsc_class *cl;
805 struct mbuf *dm;
806
807 if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL ||
808 cl->cl_children != NULL) {
809 cl = hif->hif_defaultclass;
810 if (cl == NULL)
811 return (m);
812 }
813
814 dm = hfsc_class_enqueue(cl, m);
815
816 /* successfully queued. */
817 if (dm != m && hfsc_class_qlength(cl) == 1) {
818 hfsc_set_active(hif, cl, m->m_pkthdr.len);
819 if (!timeout_pending(&hif->hif_defer))
820 timeout_add(&hif->hif_defer, 1);
821 }
822
823 /* drop occurred. */
824 if (dm != NULL)
825 PKTCNTR_INC(&cl->cl_stats.drop_cnt, dm->m_pkthdr.len);
826
827 return (dm);
828 }
829
830 struct mbuf *
hfsc_deq_begin(struct ifqueue * ifq,void ** cookiep)831 hfsc_deq_begin(struct ifqueue *ifq, void **cookiep)
832 {
833 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
834 struct hfsc_if *hif = ifq->ifq_q;
835 struct hfsc_class *cl, *tcl;
836 struct mbuf *m;
837 u_int64_t cur_time;
838
839 cur_time = hfsc_uptime();
840
841 /*
842 * if there are eligible classes, use real-time criteria.
843 * find the class with the minimum deadline among
844 * the eligible classes.
845 */
846 cl = hfsc_ellist_get_mindl(hif, cur_time);
847 if (cl == NULL) {
848 /*
849 * use link-sharing criteria
850 * get the class with the minimum vt in the hierarchy
851 */
852 cl = NULL;
853 tcl = hif->hif_rootclass;
854
855 while (tcl != NULL && tcl->cl_children != NULL) {
856 tcl = hfsc_actlist_firstfit(tcl, cur_time);
857 if (tcl == NULL)
858 continue;
859
860 /*
861 * update parent's cl_cvtmin.
862 * don't update if the new vt is smaller.
863 */
864 if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt)
865 tcl->cl_parent->cl_cvtmin = tcl->cl_vt;
866
867 cl = tcl;
868 }
869 /* XXX HRTIMER plan hfsc_deferred precisely here. */
870 if (cl == NULL)
871 return (NULL);
872 }
873
874 m = hfsc_class_deq_begin(cl, &free_ml);
875 ifq_mfreeml(ifq, &free_ml);
876 if (m == NULL) {
877 hfsc_update_sc(hif, cl, 0);
878 return (NULL);
879 }
880
881 hif->hif_microtime = cur_time;
882 *cookiep = cl;
883 return (m);
884 }
885
886 void
hfsc_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)887 hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
888 {
889 struct hfsc_if *hif = ifq->ifq_q;
890 struct hfsc_class *cl = cookie;
891
892 hfsc_class_deq_commit(cl, m);
893 hfsc_update_sc(hif, cl, m->m_pkthdr.len);
894
895 PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len);
896 }
897
898 void
hfsc_update_sc(struct hfsc_if * hif,struct hfsc_class * cl,int len)899 hfsc_update_sc(struct hfsc_if *hif, struct hfsc_class *cl, int len)
900 {
901 int next_len, realtime = 0;
902 u_int64_t cur_time = hif->hif_microtime;
903
904 /* check if the class was scheduled by real-time criteria */
905 if (cl->cl_rsc != NULL)
906 realtime = (cl->cl_e <= cur_time);
907
908 hfsc_update_vf(cl, len, cur_time);
909 if (realtime)
910 cl->cl_cumul += len;
911
912 if (hfsc_class_qlength(cl) > 0) {
913 /*
914 * Realtime queue needs to look into the future and make
915 * calculations based on that. This is the reason it can't
916 * be used with an external queue manager.
917 */
918 if (cl->cl_rsc != NULL) {
919 struct mbuf *m0;
920
921 /* update ed */
922 KASSERT(cl->cl_qops == pfq_hfsc_ops);
923 m0 = MBUF_LIST_FIRST(&cl->cl_q.q);
924 next_len = m0->m_pkthdr.len;
925
926 if (realtime)
927 hfsc_update_ed(hif, cl, next_len);
928 else
929 hfsc_update_d(cl, next_len);
930 }
931 } else {
932 /* the class becomes passive */
933 hfsc_set_passive(hif, cl);
934 }
935 }
936
937 void
hfsc_deferred(void * arg)938 hfsc_deferred(void *arg)
939 {
940 struct ifnet *ifp = arg;
941 struct ifqueue *ifq = &ifp->if_snd;
942 struct hfsc_if *hif;
943
944 if (!HFSC_ENABLED(ifq))
945 return;
946
947 if (!ifq_empty(ifq))
948 ifq_start(ifq);
949
950 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops);
951 if (hif == NULL)
952 return;
953 /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
954 timeout_add(&hif->hif_defer, 1);
955 ifq_q_leave(&ifp->if_snd, hif);
956 }
957
958 void
hfsc_cl_purge(struct hfsc_if * hif,struct hfsc_class * cl,struct mbuf_list * ml)959 hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml)
960 {
961 struct mbuf_list ml2 = MBUF_LIST_INITIALIZER();
962
963 hfsc_class_purge(cl, &ml2);
964 if (ml_empty(&ml2))
965 return;
966
967 ml_enlist(ml, &ml2);
968
969 hfsc_update_vf(cl, 0, 0); /* remove cl from the actlist */
970 hfsc_set_passive(hif, cl);
971 }
972
973 void
hfsc_set_active(struct hfsc_if * hif,struct hfsc_class * cl,int len)974 hfsc_set_active(struct hfsc_if *hif, struct hfsc_class *cl, int len)
975 {
976 if (cl->cl_rsc != NULL)
977 hfsc_init_ed(hif, cl, len);
978 if (cl->cl_fsc != NULL)
979 hfsc_init_vf(cl, len);
980
981 cl->cl_stats.period++;
982 }
983
984 void
hfsc_set_passive(struct hfsc_if * hif,struct hfsc_class * cl)985 hfsc_set_passive(struct hfsc_if *hif, struct hfsc_class *cl)
986 {
987 if (cl->cl_rsc != NULL)
988 hfsc_ellist_remove(hif, cl);
989
990 /*
991 * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0,
992 * 0) needs to be called explicitly to remove a class from actlist
993 */
994 }
995
996 void
hfsc_init_ed(struct hfsc_if * hif,struct hfsc_class * cl,int next_len)997 hfsc_init_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
998 {
999 u_int64_t cur_time;
1000
1001 cur_time = hfsc_uptime();
1002
1003 /* update the deadline curve */
1004 hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
1005
1006 /*
1007 * update the eligible curve.
1008 * for concave, it is equal to the deadline curve.
1009 * for convex, it is a linear curve with slope m2.
1010 */
1011 cl->cl_eligible = cl->cl_deadline;
1012 if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
1013 cl->cl_eligible.dx = 0;
1014 cl->cl_eligible.dy = 0;
1015 }
1016
1017 /* compute e and d */
1018 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
1019 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1020
1021 hfsc_ellist_insert(hif, cl);
1022 }
1023
1024 void
hfsc_update_ed(struct hfsc_if * hif,struct hfsc_class * cl,int next_len)1025 hfsc_update_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
1026 {
1027 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
1028 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1029
1030 hfsc_ellist_update(hif, cl);
1031 }
1032
1033 void
hfsc_update_d(struct hfsc_class * cl,int next_len)1034 hfsc_update_d(struct hfsc_class *cl, int next_len)
1035 {
1036 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1037 }
1038
1039 void
hfsc_init_vf(struct hfsc_class * cl,int len)1040 hfsc_init_vf(struct hfsc_class *cl, int len)
1041 {
1042 struct hfsc_class *max_cl, *p;
1043 u_int64_t vt, f, cur_time;
1044 int go_active;
1045
1046 cur_time = 0;
1047 go_active = 1;
1048 for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
1049 if (go_active && cl->cl_nactive++ == 0)
1050 go_active = 1;
1051 else
1052 go_active = 0;
1053
1054 if (go_active) {
1055 max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc,
1056 hfsc_active);
1057 if (max_cl != NULL) {
1058 /*
1059 * set vt to the average of the min and max
1060 * classes. if the parent's period didn't
1061 * change, don't decrease vt of the class.
1062 */
1063 vt = max_cl->cl_vt;
1064 if (cl->cl_parent->cl_cvtmin != 0)
1065 vt = (cl->cl_parent->cl_cvtmin + vt)/2;
1066
1067 if (cl->cl_parent->cl_vtperiod !=
1068 cl->cl_parentperiod || vt > cl->cl_vt)
1069 cl->cl_vt = vt;
1070 } else {
1071 /*
1072 * first child for a new parent backlog period.
1073 * add parent's cvtmax to vtoff of children
1074 * to make a new vt (vtoff + vt) larger than
1075 * the vt in the last period for all children.
1076 */
1077 vt = cl->cl_parent->cl_cvtmax;
1078 for (p = cl->cl_parent->cl_children; p != NULL;
1079 p = p->cl_siblings)
1080 p->cl_vtoff += vt;
1081 cl->cl_vt = 0;
1082 cl->cl_parent->cl_cvtmax = 0;
1083 cl->cl_parent->cl_cvtmin = 0;
1084 }
1085 cl->cl_initvt = cl->cl_vt;
1086
1087 /* update the virtual curve */
1088 vt = cl->cl_vt + cl->cl_vtoff;
1089 hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt,
1090 cl->cl_total);
1091 if (cl->cl_virtual.x == vt) {
1092 cl->cl_virtual.x -= cl->cl_vtoff;
1093 cl->cl_vtoff = 0;
1094 }
1095 cl->cl_vtadj = 0;
1096
1097 cl->cl_vtperiod++; /* increment vt period */
1098 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
1099 if (cl->cl_parent->cl_nactive == 0)
1100 cl->cl_parentperiod++;
1101 cl->cl_f = 0;
1102
1103 hfsc_actlist_insert(cl);
1104
1105 if (cl->cl_usc != NULL) {
1106 /* class has upper limit curve */
1107 if (cur_time == 0)
1108 cur_time = hfsc_uptime();
1109
1110 /* update the ulimit curve */
1111 hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
1112 cl->cl_total);
1113 /* compute myf */
1114 cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit,
1115 cl->cl_total);
1116 cl->cl_myfadj = 0;
1117 }
1118 }
1119
1120 if (cl->cl_myf > cl->cl_cfmin)
1121 f = cl->cl_myf;
1122 else
1123 f = cl->cl_cfmin;
1124 if (f != cl->cl_f) {
1125 cl->cl_f = f;
1126 hfsc_update_cfmin(cl->cl_parent);
1127 }
1128 }
1129 }
1130
1131 void
hfsc_update_vf(struct hfsc_class * cl,int len,u_int64_t cur_time)1132 hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
1133 {
1134 u_int64_t f, myf_bound, delta;
1135 int go_passive = 0;
1136
1137 if (hfsc_class_qlength(cl) == 0)
1138 go_passive = 1;
1139
1140 for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
1141 cl->cl_total += len;
1142
1143 if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
1144 continue;
1145
1146 if (go_passive && --cl->cl_nactive == 0)
1147 go_passive = 1;
1148 else
1149 go_passive = 0;
1150
1151 if (go_passive) {
1152 /* no more active child, going passive */
1153
1154 /* update cvtmax of the parent class */
1155 if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
1156 cl->cl_parent->cl_cvtmax = cl->cl_vt;
1157
1158 /* remove this class from the vt list */
1159 hfsc_actlist_remove(cl);
1160
1161 hfsc_update_cfmin(cl->cl_parent);
1162
1163 continue;
1164 }
1165
1166 /*
1167 * update vt and f
1168 */
1169 cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total)
1170 - cl->cl_vtoff + cl->cl_vtadj;
1171
1172 /*
1173 * if vt of the class is smaller than cvtmin,
1174 * the class was skipped in the past due to non-fit.
1175 * if so, we need to adjust vtadj.
1176 */
1177 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
1178 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
1179 cl->cl_vt = cl->cl_parent->cl_cvtmin;
1180 }
1181
1182 /* update the vt list */
1183 hfsc_actlist_update(cl);
1184
1185 if (cl->cl_usc != NULL) {
1186 cl->cl_myf = cl->cl_myfadj +
1187 hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
1188
1189 /*
1190 * if myf lags behind by more than one clock tick
1191 * from the current time, adjust myfadj to prevent
1192 * a rate-limited class from going greedy.
1193 * in a steady state under rate-limiting, myf
1194 * fluctuates within one clock tick.
1195 */
1196 myf_bound = cur_time - HFSC_CLK_PER_TICK;
1197 if (cl->cl_myf < myf_bound) {
1198 delta = cur_time - cl->cl_myf;
1199 cl->cl_myfadj += delta;
1200 cl->cl_myf += delta;
1201 }
1202 }
1203
1204 /* cl_f is max(cl_myf, cl_cfmin) */
1205 if (cl->cl_myf > cl->cl_cfmin)
1206 f = cl->cl_myf;
1207 else
1208 f = cl->cl_cfmin;
1209 if (f != cl->cl_f) {
1210 cl->cl_f = f;
1211 hfsc_update_cfmin(cl->cl_parent);
1212 }
1213 }
1214 }
1215
1216 void
hfsc_update_cfmin(struct hfsc_class * cl)1217 hfsc_update_cfmin(struct hfsc_class *cl)
1218 {
1219 struct hfsc_class *p;
1220 u_int64_t cfmin;
1221
1222 if (TAILQ_EMPTY(&cl->cl_actc)) {
1223 cl->cl_cfmin = 0;
1224 return;
1225 }
1226 cfmin = HFSC_HT_INFINITY;
1227 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
1228 if (p->cl_f == 0) {
1229 cl->cl_cfmin = 0;
1230 return;
1231 }
1232 if (p->cl_f < cfmin)
1233 cfmin = p->cl_f;
1234 }
1235 cl->cl_cfmin = cfmin;
1236 }
1237
1238 /*
1239 * eligible list holds backlogged classes being sorted by their eligible times.
1240 * there is one eligible list per interface.
1241 */
1242 void
hfsc_ellist_insert(struct hfsc_if * hif,struct hfsc_class * cl)1243 hfsc_ellist_insert(struct hfsc_if *hif, struct hfsc_class *cl)
1244 {
1245 struct hfsc_class *p;
1246
1247 /* check the last entry first */
1248 if ((p = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible)) == NULL ||
1249 p->cl_e <= cl->cl_e) {
1250 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1251 return;
1252 }
1253
1254 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1255 if (cl->cl_e < p->cl_e) {
1256 TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1257 return;
1258 }
1259 }
1260 }
1261
1262 void
hfsc_ellist_remove(struct hfsc_if * hif,struct hfsc_class * cl)1263 hfsc_ellist_remove(struct hfsc_if *hif, struct hfsc_class *cl)
1264 {
1265 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1266 }
1267
1268 void
hfsc_ellist_update(struct hfsc_if * hif,struct hfsc_class * cl)1269 hfsc_ellist_update(struct hfsc_if *hif, struct hfsc_class *cl)
1270 {
1271 struct hfsc_class *p, *last;
1272
1273 /*
1274 * the eligible time of a class increases monotonically.
1275 * if the next entry has a larger eligible time, nothing to do.
1276 */
1277 p = TAILQ_NEXT(cl, cl_ellist);
1278 if (p == NULL || cl->cl_e <= p->cl_e)
1279 return;
1280
1281 /* check the last entry */
1282 last = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible);
1283 if (last->cl_e <= cl->cl_e) {
1284 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1285 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1286 return;
1287 }
1288
1289 /*
1290 * the new position must be between the next entry
1291 * and the last entry
1292 */
1293 while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
1294 if (cl->cl_e < p->cl_e) {
1295 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1296 TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1297 return;
1298 }
1299 }
1300 }
1301
1302 /* find the class with the minimum deadline among the eligible classes */
1303 struct hfsc_class *
hfsc_ellist_get_mindl(struct hfsc_if * hif,u_int64_t cur_time)1304 hfsc_ellist_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
1305 {
1306 struct hfsc_class *p, *cl = NULL;
1307
1308 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1309 if (p->cl_e > cur_time)
1310 break;
1311 if (cl == NULL || p->cl_d < cl->cl_d)
1312 cl = p;
1313 }
1314 return (cl);
1315 }
1316
1317 /*
1318 * active children list holds backlogged child classes being sorted
1319 * by their virtual time.
1320 * each intermediate class has one active children list.
1321 */
1322 void
hfsc_actlist_insert(struct hfsc_class * cl)1323 hfsc_actlist_insert(struct hfsc_class *cl)
1324 {
1325 struct hfsc_class *p;
1326
1327 /* check the last entry first */
1328 if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active)) == NULL
1329 || p->cl_vt <= cl->cl_vt) {
1330 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1331 return;
1332 }
1333
1334 TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
1335 if (cl->cl_vt < p->cl_vt) {
1336 TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1337 return;
1338 }
1339 }
1340 }
1341
1342 void
hfsc_actlist_remove(struct hfsc_class * cl)1343 hfsc_actlist_remove(struct hfsc_class *cl)
1344 {
1345 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1346 }
1347
1348 void
hfsc_actlist_update(struct hfsc_class * cl)1349 hfsc_actlist_update(struct hfsc_class *cl)
1350 {
1351 struct hfsc_class *p, *last;
1352
1353 /*
1354 * the virtual time of a class increases monotonically during its
1355 * backlogged period.
1356 * if the next entry has a larger virtual time, nothing to do.
1357 */
1358 p = TAILQ_NEXT(cl, cl_actlist);
1359 if (p == NULL || cl->cl_vt < p->cl_vt)
1360 return;
1361
1362 /* check the last entry */
1363 last = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active);
1364 if (last->cl_vt <= cl->cl_vt) {
1365 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1366 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1367 return;
1368 }
1369
1370 /*
1371 * the new position must be between the next entry
1372 * and the last entry
1373 */
1374 while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
1375 if (cl->cl_vt < p->cl_vt) {
1376 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1377 TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1378 return;
1379 }
1380 }
1381 }
1382
1383 struct hfsc_class *
hfsc_actlist_firstfit(struct hfsc_class * cl,u_int64_t cur_time)1384 hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
1385 {
1386 struct hfsc_class *p;
1387
1388 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist)
1389 if (p->cl_f <= cur_time)
1390 return (p);
1391
1392 return (NULL);
1393 }
1394
1395 /*
1396 * service curve support functions
1397 *
1398 * external service curve parameters
1399 * m: bits/sec
1400 * d: msec
1401 * internal service curve parameters
1402 * sm: (bytes/tsc_interval) << SM_SHIFT
1403 * ism: (tsc_count/byte) << ISM_SHIFT
1404 * dx: tsc_count
1405 *
1406 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1407 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1408 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1409 * digits in decimal using the following table.
1410 *
1411 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
1412 * ----------+-------------------------------------------------------
1413 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
1414 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
1415 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
1416 *
1417 * nsec/byte 80000 8000 800 80 8
1418 * ism(500MHz) 40000 4000 400 40 4
1419 * ism(200MHz) 16000 1600 160 16 1.6
1420 */
1421 #define SM_SHIFT 24
1422 #define ISM_SHIFT 10
1423
1424 #define SM_MASK ((1LL << SM_SHIFT) - 1)
1425 #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
1426
1427 static __inline u_int64_t
seg_x2y(u_int64_t x,u_int64_t sm)1428 seg_x2y(u_int64_t x, u_int64_t sm)
1429 {
1430 u_int64_t y;
1431
1432 /*
1433 * compute
1434 * y = x * sm >> SM_SHIFT
1435 * but divide it for the upper and lower bits to avoid overflow
1436 */
1437 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
1438 return (y);
1439 }
1440
1441 static __inline u_int64_t
seg_y2x(u_int64_t y,u_int64_t ism)1442 seg_y2x(u_int64_t y, u_int64_t ism)
1443 {
1444 u_int64_t x;
1445
1446 if (y == 0)
1447 x = 0;
1448 else if (ism == HFSC_HT_INFINITY)
1449 x = HFSC_HT_INFINITY;
1450 else {
1451 x = (y >> ISM_SHIFT) * ism
1452 + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
1453 }
1454 return (x);
1455 }
1456
1457 static __inline u_int64_t
m2sm(u_int m)1458 m2sm(u_int m)
1459 {
1460 u_int64_t sm;
1461
1462 sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ;
1463 return (sm);
1464 }
1465
1466 static __inline u_int64_t
m2ism(u_int m)1467 m2ism(u_int m)
1468 {
1469 u_int64_t ism;
1470
1471 if (m == 0)
1472 ism = HFSC_HT_INFINITY;
1473 else
1474 ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m;
1475 return (ism);
1476 }
1477
1478 static __inline u_int64_t
d2dx(u_int d)1479 d2dx(u_int d)
1480 {
1481 u_int64_t dx;
1482
1483 dx = ((u_int64_t)d * HFSC_FREQ) / 1000;
1484 return (dx);
1485 }
1486
1487 static __inline u_int
sm2m(u_int64_t sm)1488 sm2m(u_int64_t sm)
1489 {
1490 u_int64_t m;
1491
1492 m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT;
1493 return ((u_int)m);
1494 }
1495
1496 static __inline u_int
dx2d(u_int64_t dx)1497 dx2d(u_int64_t dx)
1498 {
1499 u_int64_t d;
1500
1501 d = dx * 1000 / HFSC_FREQ;
1502 return ((u_int)d);
1503 }
1504
1505 void
hfsc_sc2isc(struct hfsc_sc * sc,struct hfsc_internal_sc * isc)1506 hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc)
1507 {
1508 isc->sm1 = m2sm(sc->m1);
1509 isc->ism1 = m2ism(sc->m1);
1510 isc->dx = d2dx(sc->d);
1511 isc->dy = seg_x2y(isc->dx, isc->sm1);
1512 isc->sm2 = m2sm(sc->m2);
1513 isc->ism2 = m2ism(sc->m2);
1514 }
1515
1516 /*
1517 * initialize the runtime service curve with the given internal
1518 * service curve starting at (x, y).
1519 */
1520 void
hfsc_rtsc_init(struct hfsc_runtime_sc * rtsc,struct hfsc_internal_sc * isc,u_int64_t x,u_int64_t y)1521 hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc,
1522 u_int64_t x, u_int64_t y)
1523 {
1524 rtsc->x = x;
1525 rtsc->y = y;
1526 rtsc->sm1 = isc->sm1;
1527 rtsc->ism1 = isc->ism1;
1528 rtsc->dx = isc->dx;
1529 rtsc->dy = isc->dy;
1530 rtsc->sm2 = isc->sm2;
1531 rtsc->ism2 = isc->ism2;
1532 }
1533
1534 /*
1535 * calculate the y-projection of the runtime service curve by the
1536 * given x-projection value
1537 */
1538 u_int64_t
hfsc_rtsc_y2x(struct hfsc_runtime_sc * rtsc,u_int64_t y)1539 hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y)
1540 {
1541 u_int64_t x;
1542
1543 if (y < rtsc->y)
1544 x = rtsc->x;
1545 else if (y <= rtsc->y + rtsc->dy) {
1546 /* x belongs to the 1st segment */
1547 if (rtsc->dy == 0)
1548 x = rtsc->x + rtsc->dx;
1549 else
1550 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
1551 } else {
1552 /* x belongs to the 2nd segment */
1553 x = rtsc->x + rtsc->dx
1554 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
1555 }
1556 return (x);
1557 }
1558
1559 u_int64_t
hfsc_rtsc_x2y(struct hfsc_runtime_sc * rtsc,u_int64_t x)1560 hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x)
1561 {
1562 u_int64_t y;
1563
1564 if (x <= rtsc->x)
1565 y = rtsc->y;
1566 else if (x <= rtsc->x + rtsc->dx)
1567 /* y belongs to the 1st segment */
1568 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
1569 else
1570 /* y belongs to the 2nd segment */
1571 y = rtsc->y + rtsc->dy
1572 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
1573 return (y);
1574 }
1575
1576 /*
1577 * update the runtime service curve by taking the minimum of the current
1578 * runtime service curve and the service curve starting at (x, y).
1579 */
1580 void
hfsc_rtsc_min(struct hfsc_runtime_sc * rtsc,struct hfsc_internal_sc * isc,u_int64_t x,u_int64_t y)1581 hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc,
1582 u_int64_t x, u_int64_t y)
1583 {
1584 u_int64_t y1, y2, dx, dy;
1585
1586 if (isc->sm1 <= isc->sm2) {
1587 /* service curve is convex */
1588 y1 = hfsc_rtsc_x2y(rtsc, x);
1589 if (y1 < y)
1590 /* the current rtsc is smaller */
1591 return;
1592 rtsc->x = x;
1593 rtsc->y = y;
1594 return;
1595 }
1596
1597 /*
1598 * service curve is concave
1599 * compute the two y values of the current rtsc
1600 * y1: at x
1601 * y2: at (x + dx)
1602 */
1603 y1 = hfsc_rtsc_x2y(rtsc, x);
1604 if (y1 <= y) {
1605 /* rtsc is below isc, no change to rtsc */
1606 return;
1607 }
1608
1609 y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx);
1610 if (y2 >= y + isc->dy) {
1611 /* rtsc is above isc, replace rtsc by isc */
1612 rtsc->x = x;
1613 rtsc->y = y;
1614 rtsc->dx = isc->dx;
1615 rtsc->dy = isc->dy;
1616 return;
1617 }
1618
1619 /*
1620 * the two curves intersect
1621 * compute the offsets (dx, dy) using the reverse
1622 * function of seg_x2y()
1623 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1624 */
1625 dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
1626 /*
1627 * check if (x, y1) belongs to the 1st segment of rtsc.
1628 * if so, add the offset.
1629 */
1630 if (rtsc->x + rtsc->dx > x)
1631 dx += rtsc->x + rtsc->dx - x;
1632 dy = seg_x2y(dx, isc->sm1);
1633
1634 rtsc->x = x;
1635 rtsc->y = y;
1636 rtsc->dx = dx;
1637 rtsc->dy = dy;
1638 return;
1639 }
1640
1641 void
hfsc_getclstats(struct hfsc_class_stats * sp,struct hfsc_class * cl)1642 hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl)
1643 {
1644 sp->class_id = cl->cl_id;
1645 sp->class_handle = cl->cl_handle;
1646
1647 if (cl->cl_rsc != NULL) {
1648 sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
1649 sp->rsc.d = dx2d(cl->cl_rsc->dx);
1650 sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
1651 } else {
1652 sp->rsc.m1 = 0;
1653 sp->rsc.d = 0;
1654 sp->rsc.m2 = 0;
1655 }
1656 if (cl->cl_fsc != NULL) {
1657 sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
1658 sp->fsc.d = dx2d(cl->cl_fsc->dx);
1659 sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
1660 } else {
1661 sp->fsc.m1 = 0;
1662 sp->fsc.d = 0;
1663 sp->fsc.m2 = 0;
1664 }
1665 if (cl->cl_usc != NULL) {
1666 sp->usc.m1 = sm2m(cl->cl_usc->sm1);
1667 sp->usc.d = dx2d(cl->cl_usc->dx);
1668 sp->usc.m2 = sm2m(cl->cl_usc->sm2);
1669 } else {
1670 sp->usc.m1 = 0;
1671 sp->usc.d = 0;
1672 sp->usc.m2 = 0;
1673 }
1674
1675 sp->total = cl->cl_total;
1676 sp->cumul = cl->cl_cumul;
1677
1678 sp->d = cl->cl_d;
1679 sp->e = cl->cl_e;
1680 sp->vt = cl->cl_vt;
1681 sp->f = cl->cl_f;
1682
1683 sp->initvt = cl->cl_initvt;
1684 sp->vtperiod = cl->cl_vtperiod;
1685 sp->parentperiod = cl->cl_parentperiod;
1686 sp->nactive = cl->cl_nactive;
1687 sp->vtoff = cl->cl_vtoff;
1688 sp->cvtmax = cl->cl_cvtmax;
1689 sp->myf = cl->cl_myf;
1690 sp->cfmin = cl->cl_cfmin;
1691 sp->cvtmin = cl->cl_cvtmin;
1692 sp->myfadj = cl->cl_myfadj;
1693 sp->vtadj = cl->cl_vtadj;
1694
1695 sp->cur_time = hfsc_uptime();
1696 sp->machclk_freq = HFSC_FREQ;
1697
1698 sp->qlength = hfsc_class_qlength(cl);
1699 sp->qlimit = cl->cl_q.qlimit;
1700 sp->xmit_cnt = cl->cl_stats.xmit_cnt;
1701 sp->drop_cnt = cl->cl_stats.drop_cnt;
1702 sp->period = cl->cl_stats.period;
1703
1704 sp->qtype = 0;
1705 }
1706
1707 /* convert a class handle to the corresponding class pointer */
1708 struct hfsc_class *
hfsc_clh2cph(struct hfsc_if * hif,u_int32_t chandle)1709 hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle)
1710 {
1711 int i;
1712 struct hfsc_class *cl;
1713
1714 if (chandle == 0)
1715 return (NULL);
1716 /*
1717 * first, try the slot corresponding to the lower bits of the handle.
1718 * if it does not match, do the linear table search.
1719 */
1720 i = chandle % hif->hif_allocated;
1721 if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1722 return (cl);
1723 for (i = 0; i < hif->hif_allocated; i++)
1724 if ((cl = hif->hif_class_tbl[i]) != NULL &&
1725 cl->cl_handle == chandle)
1726 return (cl);
1727 return (NULL);
1728 }
1729