xref: /openbsd/sys/net/hfsc.c (revision 09467b48)
1 /*	$OpenBSD: hfsc.c,v 1.48 2018/10/22 23:44:53 dlg Exp $	*/
2 
3 /*
4  * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
6  *
7  * Permission to use, copy, modify, and distribute this software and
8  * its documentation is hereby granted (including for commercial or
9  * for-profit use), provided that both the copyright notice and this
10  * permission notice appear in all copies of the software, derivative
11  * works, or modified versions, and any portions thereof.
12  *
13  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
14  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
15  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
16  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  *
28  * Carnegie Mellon encourages (but does not require) users of this
29  * software to return any improvements or extensions that they make,
30  * and to grant Carnegie Mellon the rights to redistribute these
31  * changes without encumbrance.
32  */
33 /*
34  * H-FSC is described in Proceedings of SIGCOMM'97,
35  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
36  * Real-Time and Priority Service"
37  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
38  *
39  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
40  * when a class has an upperlimit, the fit-time is computed from the
41  * upperlimit service curve.  the link-sharing scheduler does not schedule
42  * a class whose fit-time exceeds the current time.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/queue.h>
53 #include <sys/kernel.h>
54 #include <sys/timeout.h>
55 
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <netinet/in.h>
59 
60 #include <net/pfvar.h>
61 #include <net/hfsc.h>
62 
63 /*
64  * kernel internal service curve representation
65  *	coordinates are given by 64 bit unsigned integers.
66  *	x-axis: unit is clock count.  for the intel x86 architecture,
67  *		the raw Pentium TSC (Timestamp Counter) value is used.
68  *		virtual time is also calculated in this time scale.
69  *	y-axis: unit is byte.
70  *
71  *	the service curve parameters are converted to the internal
72  *	representation.
73  *	the slope values are scaled to avoid overflow.
74  *	the inverse slope values as well as the y-projection of the 1st
75  *	segment are kept in order to to avoid 64-bit divide operations
76  *	that are expensive on 32-bit architectures.
77  *
78  *  note: Intel Pentium TSC never wraps around in several thousands of years.
79  *	x-axis doesn't wrap around for 1089 years with 1GHz clock.
80  *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
81  */
82 
83 /* kernel internal representation of a service curve */
84 struct hfsc_internal_sc {
85 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
86 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
87 	u_int64_t	dx;	/* the x-projection of the 1st segment */
88 	u_int64_t	dy;	/* the y-projection of the 1st segment */
89 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
90 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
91 };
92 
93 /* runtime service curve */
94 struct hfsc_runtime_sc {
95 	u_int64_t	x;	/* current starting position on x-axis */
96 	u_int64_t	y;	/* current starting position on x-axis */
97 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
98 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
99 	u_int64_t	dx;	/* the x-projection of the 1st segment */
100 	u_int64_t	dy;	/* the y-projection of the 1st segment */
101 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
102 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
103 };
104 
105 struct hfsc_classq {
106 	struct mbuf_list q;	 /* Queue of packets */
107 	int		 qlimit; /* Queue limit */
108 };
109 
110 /* for TAILQ based ellist and actlist implementation */
111 struct hfsc_class;
112 TAILQ_HEAD(hfsc_eligible, hfsc_class);
113 TAILQ_HEAD(hfsc_active, hfsc_class);
114 #define	hfsc_actlist_last(s)		TAILQ_LAST(s, hfsc_active)
115 
116 struct hfsc_class {
117 	u_int		cl_id;		/* class id (just for debug) */
118 	u_int32_t	cl_handle;	/* class handle */
119 	int		cl_flags;	/* misc flags */
120 
121 	struct hfsc_class *cl_parent;	/* parent class */
122 	struct hfsc_class *cl_siblings;	/* sibling classes */
123 	struct hfsc_class *cl_children;	/* child classes */
124 
125 	struct hfsc_classq cl_q;	/* class queue structure */
126 
127 	const struct pfq_ops *cl_qops;	/* queue manager */
128 	void		*cl_qdata;	/* queue manager data */
129 	void		*cl_cookie;	/* queue manager cookie */
130 
131 	u_int64_t	cl_total;	/* total work in bytes */
132 	u_int64_t	cl_cumul;	/* cumulative work in bytes
133 					   done by real-time criteria */
134 	u_int64_t	cl_d;		/* deadline */
135 	u_int64_t	cl_e;		/* eligible time */
136 	u_int64_t	cl_vt;		/* virtual time */
137 	u_int64_t	cl_f;		/* time when this class will fit for
138 					   link-sharing, max(myf, cfmin) */
139 	u_int64_t	cl_myf;		/* my fit-time (as calculated from this
140 					   class's own upperlimit curve) */
141 	u_int64_t	cl_myfadj;	/* my fit-time adjustment
142 					   (to cancel history dependence) */
143 	u_int64_t	cl_cfmin;	/* earliest children's fit-time (used
144 					   with cl_myf to obtain cl_f) */
145 	u_int64_t	cl_cvtmin;	/* minimal virtual time among the
146 					   children fit for link-sharing
147 					   (monotonic within a period) */
148 	u_int64_t	cl_vtadj;	/* intra-period cumulative vt
149 					   adjustment */
150 	u_int64_t	cl_vtoff;	/* inter-period cumulative vt offset */
151 	u_int64_t	cl_cvtmax;	/* max child's vt in the last period */
152 
153 	u_int64_t	cl_initvt;	/* init virtual time (for debugging) */
154 
155 	struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */
156 	struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */
157 	struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */
158 	struct hfsc_runtime_sc   cl_deadline; /* deadline curve */
159 	struct hfsc_runtime_sc   cl_eligible; /* eligible curve */
160 	struct hfsc_runtime_sc   cl_virtual;  /* virtual curve */
161 	struct hfsc_runtime_sc   cl_ulimit;   /* upperlimit curve */
162 
163 	u_int		cl_vtperiod;	/* vt period sequence no */
164 	u_int		cl_parentperiod;  /* parent's vt period seqno */
165 	int		cl_nactive;	/* number of active children */
166 	struct hfsc_active	cl_actc; /* active children list */
167 
168 	TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
169 	TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
170 
171 	struct {
172 		struct hfsc_pktcntr xmit_cnt;
173 		struct hfsc_pktcntr drop_cnt;
174 		u_int period;
175 	} cl_stats;
176 };
177 
178 /*
179  * hfsc interface state
180  */
181 struct hfsc_if {
182 	struct hfsc_if		*hif_next;	/* interface state list */
183 	struct hfsc_class	*hif_rootclass;		/* root class */
184 	struct hfsc_class	*hif_defaultclass;	/* default class */
185 	struct hfsc_class	**hif_class_tbl;
186 
187 	u_int64_t		hif_microtime;	/* time at deq_begin */
188 
189 	u_int	hif_allocated;			/* # of slots in hif_class_tbl */
190 	u_int	hif_classes;			/* # of classes in the tree */
191 	u_int	hif_classid;			/* class id sequence number */
192 
193 	struct hfsc_eligible hif_eligible;	/* eligible list */
194 	struct timeout hif_defer;	/* for queues that weren't ready */
195 };
196 
197 /*
198  * function prototypes
199  */
200 struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
201 			    struct hfsc_sc *, struct hfsc_sc *,
202 			    struct hfsc_sc *, struct hfsc_class *, int,
203 			    int, int);
204 int			 hfsc_class_destroy(struct hfsc_if *,
205 			    struct hfsc_class *);
206 struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
207 
208 void		 hfsc_cl_purge(struct hfsc_if *, struct hfsc_class *,
209 		     struct mbuf_list *);
210 
211 void		 hfsc_update_sc(struct hfsc_if *, struct hfsc_class *, int);
212 void		 hfsc_deferred(void *);
213 void		 hfsc_update_cfmin(struct hfsc_class *);
214 void		 hfsc_set_active(struct hfsc_if *, struct hfsc_class *, int);
215 void		 hfsc_set_passive(struct hfsc_if *, struct hfsc_class *);
216 void		 hfsc_init_ed(struct hfsc_if *, struct hfsc_class *, int);
217 void		 hfsc_update_ed(struct hfsc_if *, struct hfsc_class *, int);
218 void		 hfsc_update_d(struct hfsc_class *, int);
219 void		 hfsc_init_vf(struct hfsc_class *, int);
220 void		 hfsc_update_vf(struct hfsc_class *, int, u_int64_t);
221 void		 hfsc_ellist_insert(struct hfsc_if *, struct hfsc_class *);
222 void		 hfsc_ellist_remove(struct hfsc_if *, struct hfsc_class *);
223 void		 hfsc_ellist_update(struct hfsc_if *, struct hfsc_class *);
224 struct hfsc_class	*hfsc_ellist_get_mindl(struct hfsc_if *, u_int64_t);
225 void		 hfsc_actlist_insert(struct hfsc_class *);
226 void		 hfsc_actlist_remove(struct hfsc_class *);
227 void		 hfsc_actlist_update(struct hfsc_class *);
228 
229 struct hfsc_class	*hfsc_actlist_firstfit(struct hfsc_class *,
230 				    u_int64_t);
231 
232 static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
233 static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
234 static __inline u_int64_t	m2sm(u_int);
235 static __inline u_int64_t	m2ism(u_int);
236 static __inline u_int64_t	d2dx(u_int);
237 static __inline u_int		sm2m(u_int64_t);
238 static __inline u_int		dx2d(u_int64_t);
239 
240 void		hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *);
241 void		hfsc_rtsc_init(struct hfsc_runtime_sc *,
242 		    struct hfsc_internal_sc *, u_int64_t, u_int64_t);
243 u_int64_t	hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t);
244 u_int64_t	hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t);
245 void		hfsc_rtsc_min(struct hfsc_runtime_sc *,
246 		    struct hfsc_internal_sc *, u_int64_t, u_int64_t);
247 
248 void		hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *);
249 struct hfsc_class	*hfsc_clh2cph(struct hfsc_if *, u_int32_t);
250 
251 #define	HFSC_CLK_SHIFT		8
252 #define	HFSC_FREQ		(1000000 << HFSC_CLK_SHIFT)
253 #define	HFSC_CLK_PER_TICK	(HFSC_FREQ / hz)
254 #define	HFSC_HT_INFINITY	0xffffffffffffffffLL /* infinite time value */
255 
256 struct pool	hfsc_class_pl, hfsc_internal_sc_pl;
257 
258 /*
259  * ifqueue glue.
260  */
261 
262 unsigned int	 hfsc_idx(unsigned int, const struct mbuf *);
263 struct mbuf	*hfsc_enq(struct ifqueue *, struct mbuf *);
264 struct mbuf	*hfsc_deq_begin(struct ifqueue *, void **);
265 void		 hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *);
266 void		 hfsc_purge(struct ifqueue *, struct mbuf_list *);
267 void		*hfsc_alloc(unsigned int, void *);
268 void		 hfsc_free(unsigned int, void *);
269 
270 const struct ifq_ops hfsc_ops = {
271 	hfsc_idx,
272 	hfsc_enq,
273 	hfsc_deq_begin,
274 	hfsc_deq_commit,
275 	hfsc_purge,
276 	hfsc_alloc,
277 	hfsc_free,
278 };
279 
280 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops;
281 
282 /*
283  * pf queue glue.
284  */
285 
286 void		*hfsc_pf_alloc(struct ifnet *);
287 int		 hfsc_pf_addqueue(void *, struct pf_queuespec *);
288 void		 hfsc_pf_free(void *);
289 int		 hfsc_pf_qstats(struct pf_queuespec *, void *, int *);
290 unsigned int	 hfsc_pf_qlength(void *);
291 struct mbuf *	 hfsc_pf_enqueue(void *, struct mbuf *);
292 struct mbuf *	 hfsc_pf_deq_begin(void *, void **, struct mbuf_list *);
293 void		 hfsc_pf_deq_commit(void *, struct mbuf *, void *);
294 void		 hfsc_pf_purge(void *, struct mbuf_list *);
295 
296 const struct pfq_ops hfsc_pf_ops = {
297 	hfsc_pf_alloc,
298 	hfsc_pf_addqueue,
299 	hfsc_pf_free,
300 	hfsc_pf_qstats,
301 	hfsc_pf_qlength,
302 	hfsc_pf_enqueue,
303 	hfsc_pf_deq_begin,
304 	hfsc_pf_deq_commit,
305 	hfsc_pf_purge
306 };
307 
308 const struct pfq_ops * const pfq_hfsc_ops = &hfsc_pf_ops;
309 
310 /*
311  * shortcuts for repeated use
312  */
313 static inline unsigned int
314 hfsc_class_qlength(struct hfsc_class *cl)
315 {
316 	/* Only leaf classes have a queue */
317 	if (cl->cl_qops != NULL)
318 		return cl->cl_qops->pfq_qlength(cl->cl_qdata);
319 	return 0;
320 }
321 
322 static inline struct mbuf *
323 hfsc_class_enqueue(struct hfsc_class *cl, struct mbuf *m)
324 {
325 	return cl->cl_qops->pfq_enqueue(cl->cl_qdata, m);
326 }
327 
328 static inline struct mbuf *
329 hfsc_class_deq_begin(struct hfsc_class *cl, struct mbuf_list *ml)
330 {
331 	return cl->cl_qops->pfq_deq_begin(cl->cl_qdata, &cl->cl_cookie, ml);
332 }
333 
334 static inline void
335 hfsc_class_deq_commit(struct hfsc_class *cl, struct mbuf *m)
336 {
337 	return cl->cl_qops->pfq_deq_commit(cl->cl_qdata, m, cl->cl_cookie);
338 }
339 
340 static inline void
341 hfsc_class_purge(struct hfsc_class *cl, struct mbuf_list *ml)
342 {
343 	/* Only leaf classes have a queue */
344 	if (cl->cl_qops != NULL)
345 		return cl->cl_qops->pfq_purge(cl->cl_qdata, ml);
346 }
347 
348 u_int64_t
349 hfsc_microuptime(void)
350 {
351 	struct timeval tv;
352 
353 	microuptime(&tv);
354 	return (((u_int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec) <<
355 	    HFSC_CLK_SHIFT);
356 }
357 
358 static inline u_int
359 hfsc_more_slots(u_int current)
360 {
361 	u_int want = current * 2;
362 
363 	return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want);
364 }
365 
366 static void
367 hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany)
368 {
369 	struct hfsc_class **newtbl, **old;
370 	size_t oldlen = sizeof(void *) * hif->hif_allocated;
371 
372 	newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF,
373 	    M_WAITOK | M_ZERO);
374 	old = hif->hif_class_tbl;
375 
376 	memcpy(newtbl, old, oldlen);
377 	hif->hif_class_tbl = newtbl;
378 	hif->hif_allocated = howmany;
379 
380 	free(old, M_DEVBUF, oldlen);
381 }
382 
383 void
384 hfsc_initialize(void)
385 {
386 	pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0,
387 	    IPL_NONE, PR_WAITOK, "hfscclass", NULL);
388 	pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0,
389 	    IPL_NONE, PR_WAITOK, "hfscintsc", NULL);
390 }
391 
392 void *
393 hfsc_pf_alloc(struct ifnet *ifp)
394 {
395 	struct hfsc_if *hif;
396 
397 	KASSERT(ifp != NULL);
398 
399 	hif = malloc(sizeof(*hif), M_DEVBUF, M_WAITOK | M_ZERO);
400 	TAILQ_INIT(&hif->hif_eligible);
401 	hif->hif_class_tbl = mallocarray(HFSC_DEFAULT_CLASSES, sizeof(void *),
402 	    M_DEVBUF, M_WAITOK | M_ZERO);
403 	hif->hif_allocated = HFSC_DEFAULT_CLASSES;
404 
405 	timeout_set(&hif->hif_defer, hfsc_deferred, ifp);
406 
407 	return (hif);
408 }
409 
410 int
411 hfsc_pf_addqueue(void *arg, struct pf_queuespec *q)
412 {
413 	struct hfsc_if *hif = arg;
414 	struct hfsc_class *cl, *parent, *np = NULL;
415 	struct hfsc_sc rtsc, lssc, ulsc;
416 	int error = 0;
417 
418 	KASSERT(hif != NULL);
419 	KASSERT(q->qid != 0);
420 
421 	/* Root queue must have non-zero linksharing parameters */
422 	if (q->linkshare.m1.absolute == 0 && q->linkshare.m2.absolute == 0 &&
423 	    q->parent_qid == 0)
424 		return (EINVAL);
425 
426 	if (q->parent_qid == 0 && hif->hif_rootclass == NULL) {
427 		np = hfsc_class_create(hif, NULL, NULL, NULL, NULL,
428 		    0, 0, HFSC_ROOT_CLASS | q->qid);
429 		if (np == NULL)
430 			return (EINVAL);
431 		parent = np;
432 	} else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL)
433 		return (EINVAL);
434 
435 	if (hfsc_clh2cph(hif, q->qid) != NULL) {
436 		hfsc_class_destroy(hif, np);
437 		return (EBUSY);
438 	}
439 
440 	rtsc.m1 = q->realtime.m1.absolute;
441 	rtsc.d  = q->realtime.d;
442 	rtsc.m2 = q->realtime.m2.absolute;
443 	lssc.m1 = q->linkshare.m1.absolute;
444 	lssc.d  = q->linkshare.d;
445 	lssc.m2 = q->linkshare.m2.absolute;
446 	ulsc.m1 = q->upperlimit.m1.absolute;
447 	ulsc.d  = q->upperlimit.d;
448 	ulsc.m2 = q->upperlimit.m2.absolute;
449 
450 	if ((cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
451 	    parent, q->qlimit, q->flags, q->qid)) == NULL) {
452 		hfsc_class_destroy(hif, np);
453 		return (ENOMEM);
454 	}
455 
456 	/* Attach a queue manager if specified */
457 	cl->cl_qops = pf_queue_manager(q);
458 	/* Realtime class cannot be used with an external queue manager */
459 	if (cl->cl_qops == NULL || cl->cl_rsc != NULL) {
460 		cl->cl_qops = pfq_hfsc_ops;
461 		cl->cl_qdata = &cl->cl_q;
462 	} else {
463 		cl->cl_qdata = cl->cl_qops->pfq_alloc(q->kif->pfik_ifp);
464 		if (cl->cl_qdata == NULL) {
465 			cl->cl_qops = NULL;
466 			hfsc_class_destroy(hif, cl);
467 			hfsc_class_destroy(hif, np);
468 			return (ENOMEM);
469 		}
470 		error = cl->cl_qops->pfq_addqueue(cl->cl_qdata, q);
471 		if (error) {
472 			cl->cl_qops->pfq_free(cl->cl_qdata);
473 			cl->cl_qops = NULL;
474 			hfsc_class_destroy(hif, cl);
475 			hfsc_class_destroy(hif, np);
476 			return (error);
477 		}
478 	}
479 
480 	KASSERT(cl->cl_qops != NULL);
481 	KASSERT(cl->cl_qdata != NULL);
482 
483 	return (0);
484 }
485 
486 int
487 hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes)
488 {
489 	struct ifnet *ifp = q->kif->pfik_ifp;
490 	struct hfsc_if *hif;
491 	struct hfsc_class *cl;
492 	struct hfsc_class_stats stats;
493 	int error = 0;
494 
495 	if (ifp == NULL)
496 		return (EBADF);
497 
498 	if (*nbytes < sizeof(stats))
499 		return (EINVAL);
500 
501 	hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops);
502 	if (hif == NULL)
503 		return (EBADF);
504 
505 	if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL) {
506 		ifq_q_leave(&ifp->if_snd, hif);
507 		return (EINVAL);
508 	}
509 
510 	hfsc_getclstats(&stats, cl);
511 	ifq_q_leave(&ifp->if_snd, hif);
512 
513 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
514 		return (error);
515 
516 	*nbytes = sizeof(stats);
517 	return (0);
518 }
519 
520 void
521 hfsc_pf_free(void *arg)
522 {
523 	hfsc_free(0, arg);
524 }
525 
526 unsigned int
527 hfsc_pf_qlength(void *arg)
528 {
529 	struct hfsc_classq *cq = arg;
530 
531 	return ml_len(&cq->q);
532 }
533 
534 struct mbuf *
535 hfsc_pf_enqueue(void *arg, struct mbuf *m)
536 {
537 	struct hfsc_classq *cq = arg;
538 
539 	if (ml_len(&cq->q) >= cq->qlimit)
540 		return (m);
541 
542 	ml_enqueue(&cq->q, m);
543 	return (NULL);
544 }
545 
546 struct mbuf *
547 hfsc_pf_deq_begin(void *arg, void **cookiep, struct mbuf_list *free_ml)
548 {
549 	struct hfsc_classq *cq = arg;
550 
551 	return MBUF_LIST_FIRST(&cq->q);
552 }
553 
554 void
555 hfsc_pf_deq_commit(void *arg, struct mbuf *m, void *cookie)
556 {
557 	struct hfsc_classq *cq = arg;
558 
559 	ml_dequeue(&cq->q);
560 }
561 
562 void
563 hfsc_pf_purge(void *arg, struct mbuf_list *ml)
564 {
565 	struct hfsc_classq *cq = arg;
566 
567 	ml_enlist(ml, &cq->q);
568 }
569 
570 unsigned int
571 hfsc_idx(unsigned int nqueues, const struct mbuf *m)
572 {
573 	/*
574 	 * hfsc can only function on a single ifq and the stack understands
575 	 * this. when the first ifq on an interface is switched to hfsc,
576 	 * this gets used to map all mbufs to the first and only ifq that
577 	 * is set up for hfsc.
578 	 */
579 	return (0);
580 }
581 
582 void *
583 hfsc_alloc(unsigned int idx, void *q)
584 {
585 	struct hfsc_if *hif = q;
586 
587 	KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
588 	KASSERT(hif != NULL);
589 	return (hif);
590 }
591 
592 void
593 hfsc_free(unsigned int idx, void *q)
594 {
595 	struct hfsc_if *hif = q;
596 	struct hfsc_class *cl;
597 	int i, restart;
598 
599 	KERNEL_ASSERT_LOCKED();
600 	KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */
601 
602 	timeout_del(&hif->hif_defer);
603 
604 	do {
605 		restart = 0;
606 		for (i = 0; i < hif->hif_allocated; i++) {
607 			cl = hif->hif_class_tbl[i];
608 			if (hfsc_class_destroy(hif, cl) == EBUSY)
609 				restart++;
610 		}
611 	} while (restart > 0);
612 
613 	free(hif->hif_class_tbl, M_DEVBUF, hif->hif_allocated * sizeof(void *));
614 	free(hif, M_DEVBUF, sizeof(*hif));
615 }
616 
617 void
618 hfsc_purge(struct ifqueue *ifq, struct mbuf_list *ml)
619 {
620 	struct hfsc_if		*hif = ifq->ifq_q;
621 	struct hfsc_class	*cl;
622 
623 	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
624 		hfsc_cl_purge(hif, cl, ml);
625 }
626 
627 struct hfsc_class *
628 hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc,
629     struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent,
630     int qlimit, int flags, int qid)
631 {
632 	struct hfsc_class *cl, *p;
633 	int i, s;
634 
635 	if (qlimit == 0)
636 		qlimit = HFSC_DEFAULT_QLIMIT;
637 
638 	if (hif->hif_classes >= hif->hif_allocated) {
639 		u_int newslots = hfsc_more_slots(hif->hif_allocated);
640 
641 		if (newslots == hif->hif_allocated)
642 			return (NULL);
643 		hfsc_grow_class_tbl(hif, newslots);
644 	}
645 
646 	cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO);
647 	TAILQ_INIT(&cl->cl_actc);
648 
649 	ml_init(&cl->cl_q.q);
650 	cl->cl_q.qlimit = qlimit;
651 	cl->cl_flags = flags;
652 
653 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
654 		cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
655 		hfsc_sc2isc(rsc, cl->cl_rsc);
656 		hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
657 		hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
658 	}
659 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
660 		cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
661 		hfsc_sc2isc(fsc, cl->cl_fsc);
662 		hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
663 	}
664 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
665 		cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
666 		hfsc_sc2isc(usc, cl->cl_usc);
667 		hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
668 	}
669 
670 	cl->cl_id = hif->hif_classid++;
671 	cl->cl_handle = qid;
672 	cl->cl_parent = parent;
673 
674 	s = splnet();
675 	hif->hif_classes++;
676 
677 	/*
678 	 * find a free slot in the class table.  if the slot matching
679 	 * the lower bits of qid is free, use this slot.  otherwise,
680 	 * use the first free slot.
681 	 */
682 	i = qid % hif->hif_allocated;
683 	if (hif->hif_class_tbl[i] == NULL)
684 		hif->hif_class_tbl[i] = cl;
685 	else {
686 		for (i = 0; i < hif->hif_allocated; i++)
687 			if (hif->hif_class_tbl[i] == NULL) {
688 				hif->hif_class_tbl[i] = cl;
689 				break;
690 			}
691 		if (i == hif->hif_allocated) {
692 			splx(s);
693 			goto err_ret;
694 		}
695 	}
696 
697 	if (flags & HFSC_DEFAULTCLASS)
698 		hif->hif_defaultclass = cl;
699 
700 	if (parent == NULL)
701 		hif->hif_rootclass = cl;
702 	else {
703 		/* add this class to the children list of the parent */
704 		if ((p = parent->cl_children) == NULL)
705 			parent->cl_children = cl;
706 		else {
707 			while (p->cl_siblings != NULL)
708 				p = p->cl_siblings;
709 			p->cl_siblings = cl;
710 		}
711 	}
712 	splx(s);
713 
714 	return (cl);
715 
716 err_ret:
717 	if (cl->cl_fsc != NULL)
718 		pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
719 	if (cl->cl_rsc != NULL)
720 		pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
721 	if (cl->cl_usc != NULL)
722 		pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
723 	pool_put(&hfsc_class_pl, cl);
724 	return (NULL);
725 }
726 
727 int
728 hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl)
729 {
730 	int i, s;
731 
732 	if (cl == NULL)
733 		return (0);
734 
735 	if (cl->cl_children != NULL)
736 		return (EBUSY);
737 
738 	s = splnet();
739 	KASSERT(hfsc_class_qlength(cl) == 0);
740 
741 	if (cl->cl_parent != NULL) {
742 		struct hfsc_class *p = cl->cl_parent->cl_children;
743 
744 		if (p == cl)
745 			cl->cl_parent->cl_children = cl->cl_siblings;
746 		else do {
747 			if (p->cl_siblings == cl) {
748 				p->cl_siblings = cl->cl_siblings;
749 				break;
750 			}
751 		} while ((p = p->cl_siblings) != NULL);
752 	}
753 
754 	for (i = 0; i < hif->hif_allocated; i++)
755 		if (hif->hif_class_tbl[i] == cl) {
756 			hif->hif_class_tbl[i] = NULL;
757 			break;
758 		}
759 
760 	hif->hif_classes--;
761 	splx(s);
762 
763 	KASSERT(TAILQ_EMPTY(&cl->cl_actc));
764 
765 	if (cl == hif->hif_rootclass)
766 		hif->hif_rootclass = NULL;
767 	if (cl == hif->hif_defaultclass)
768 		hif->hif_defaultclass = NULL;
769 
770 	/* Free external queue manager resources */
771 	if (cl->cl_qops && cl->cl_qops != pfq_hfsc_ops)
772 		cl->cl_qops->pfq_free(cl->cl_qdata);
773 
774 	if (cl->cl_usc != NULL)
775 		pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
776 	if (cl->cl_fsc != NULL)
777 		pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
778 	if (cl->cl_rsc != NULL)
779 		pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
780 	pool_put(&hfsc_class_pl, cl);
781 
782 	return (0);
783 }
784 
785 /*
786  * hfsc_nextclass returns the next class in the tree.
787  *   usage:
788  *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
789  *		do_something;
790  */
791 struct hfsc_class *
792 hfsc_nextclass(struct hfsc_class *cl)
793 {
794 	if (cl->cl_children != NULL)
795 		cl = cl->cl_children;
796 	else if (cl->cl_siblings != NULL)
797 		cl = cl->cl_siblings;
798 	else {
799 		while ((cl = cl->cl_parent) != NULL)
800 			if (cl->cl_siblings) {
801 				cl = cl->cl_siblings;
802 				break;
803 			}
804 	}
805 
806 	return (cl);
807 }
808 
809 struct mbuf *
810 hfsc_enq(struct ifqueue *ifq, struct mbuf *m)
811 {
812 	struct hfsc_if *hif = ifq->ifq_q;
813 	struct hfsc_class *cl;
814 	struct mbuf *dm;
815 
816 	if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL ||
817 	    cl->cl_children != NULL) {
818 		cl = hif->hif_defaultclass;
819 		if (cl == NULL)
820 			return (m);
821 	}
822 
823 	dm = hfsc_class_enqueue(cl, m);
824 
825 	/* successfully queued. */
826 	if (dm != m && hfsc_class_qlength(cl) == 1) {
827 		hfsc_set_active(hif, cl, m->m_pkthdr.len);
828 		if (!timeout_pending(&hif->hif_defer))
829 			timeout_add(&hif->hif_defer, 1);
830 	}
831 
832 	/* drop occurred. */
833 	if (dm != NULL)
834 		PKTCNTR_INC(&cl->cl_stats.drop_cnt, dm->m_pkthdr.len);
835 
836 	return (dm);
837 }
838 
839 struct mbuf *
840 hfsc_deq_begin(struct ifqueue *ifq, void **cookiep)
841 {
842 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
843 	struct hfsc_if *hif = ifq->ifq_q;
844 	struct hfsc_class *cl, *tcl;
845 	struct mbuf *m;
846 	u_int64_t cur_time;
847 
848 	cur_time = hfsc_microuptime();
849 
850 	/*
851 	 * if there are eligible classes, use real-time criteria.
852 	 * find the class with the minimum deadline among
853 	 * the eligible classes.
854 	 */
855 	cl = hfsc_ellist_get_mindl(hif, cur_time);
856 	if (cl == NULL) {
857 		/*
858 		 * use link-sharing criteria
859 		 * get the class with the minimum vt in the hierarchy
860 		 */
861 		cl = NULL;
862 		tcl = hif->hif_rootclass;
863 
864 		while (tcl != NULL && tcl->cl_children != NULL) {
865 			tcl = hfsc_actlist_firstfit(tcl, cur_time);
866 			if (tcl == NULL)
867 				continue;
868 
869 			/*
870 			 * update parent's cl_cvtmin.
871 			 * don't update if the new vt is smaller.
872 			 */
873 			if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt)
874 				tcl->cl_parent->cl_cvtmin = tcl->cl_vt;
875 
876 			cl = tcl;
877 		}
878 		/* XXX HRTIMER plan hfsc_deferred precisely here. */
879 		if (cl == NULL)
880 			return (NULL);
881 	}
882 
883 	m = hfsc_class_deq_begin(cl, &free_ml);
884 	ifq_mfreeml(ifq, &free_ml);
885 	if (m == NULL) {
886 		hfsc_update_sc(hif, cl, 0);
887 		return (NULL);
888 	}
889 
890 	hif->hif_microtime = cur_time;
891 	*cookiep = cl;
892 	return (m);
893 }
894 
895 void
896 hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
897 {
898 	struct hfsc_if *hif = ifq->ifq_q;
899 	struct hfsc_class *cl = cookie;
900 
901 	hfsc_class_deq_commit(cl, m);
902 	hfsc_update_sc(hif, cl, m->m_pkthdr.len);
903 
904 	PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len);
905 }
906 
907 void
908 hfsc_update_sc(struct hfsc_if *hif, struct hfsc_class *cl, int len)
909 {
910 	int next_len, realtime = 0;
911 	u_int64_t cur_time = hif->hif_microtime;
912 
913 	/* check if the class was scheduled by real-time criteria */
914 	if (cl->cl_rsc != NULL)
915 		realtime = (cl->cl_e <= cur_time);
916 
917 	hfsc_update_vf(cl, len, cur_time);
918 	if (realtime)
919 		cl->cl_cumul += len;
920 
921 	if (hfsc_class_qlength(cl) > 0) {
922 		/*
923 		 * Realtime queue needs to look into the future and make
924 		 * calculations based on that. This is the reason it can't
925 		 * be used with an external queue manager.
926 		 */
927 		if (cl->cl_rsc != NULL) {
928 			struct mbuf *m0;
929 
930 			/* update ed */
931 			KASSERT(cl->cl_qops == pfq_hfsc_ops);
932 			m0 = MBUF_LIST_FIRST(&cl->cl_q.q);
933 			next_len = m0->m_pkthdr.len;
934 
935 			if (realtime)
936 				hfsc_update_ed(hif, cl, next_len);
937 			else
938 				hfsc_update_d(cl, next_len);
939 		}
940 	} else {
941 		/* the class becomes passive */
942 		hfsc_set_passive(hif, cl);
943 	}
944 }
945 
946 void
947 hfsc_deferred(void *arg)
948 {
949 	struct ifnet *ifp = arg;
950 	struct ifqueue *ifq = &ifp->if_snd;
951 	struct hfsc_if *hif;
952 
953 	if (!HFSC_ENABLED(ifq))
954 		return;
955 
956 	if (!ifq_empty(ifq))
957 		ifq_start(ifq);
958 
959 	hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops);
960 	if (hif == NULL)
961 		return;
962 	/* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
963 	timeout_add(&hif->hif_defer, 1);
964 	ifq_q_leave(&ifp->if_snd, hif);
965 }
966 
967 void
968 hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml)
969 {
970 	struct mbuf_list ml2 = MBUF_LIST_INITIALIZER();
971 
972 	hfsc_class_purge(cl, &ml2);
973 	if (ml_empty(&ml2))
974 		return;
975 
976 	ml_enlist(ml, &ml2);
977 
978 	hfsc_update_vf(cl, 0, 0);	/* remove cl from the actlist */
979 	hfsc_set_passive(hif, cl);
980 }
981 
982 void
983 hfsc_set_active(struct hfsc_if *hif, struct hfsc_class *cl, int len)
984 {
985 	if (cl->cl_rsc != NULL)
986 		hfsc_init_ed(hif, cl, len);
987 	if (cl->cl_fsc != NULL)
988 		hfsc_init_vf(cl, len);
989 
990 	cl->cl_stats.period++;
991 }
992 
993 void
994 hfsc_set_passive(struct hfsc_if *hif, struct hfsc_class *cl)
995 {
996 	if (cl->cl_rsc != NULL)
997 		hfsc_ellist_remove(hif, cl);
998 
999 	/*
1000 	 * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0,
1001 	 * 0) needs to be called explicitly to remove a class from actlist
1002 	 */
1003 }
1004 
1005 void
1006 hfsc_init_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
1007 {
1008 	u_int64_t cur_time;
1009 
1010 	cur_time = hfsc_microuptime();
1011 
1012 	/* update the deadline curve */
1013 	hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
1014 
1015 	/*
1016 	 * update the eligible curve.
1017 	 * for concave, it is equal to the deadline curve.
1018 	 * for convex, it is a linear curve with slope m2.
1019 	 */
1020 	cl->cl_eligible = cl->cl_deadline;
1021 	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
1022 		cl->cl_eligible.dx = 0;
1023 		cl->cl_eligible.dy = 0;
1024 	}
1025 
1026 	/* compute e and d */
1027 	cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
1028 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1029 
1030 	hfsc_ellist_insert(hif, cl);
1031 }
1032 
1033 void
1034 hfsc_update_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
1035 {
1036 	cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
1037 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1038 
1039 	hfsc_ellist_update(hif, cl);
1040 }
1041 
1042 void
1043 hfsc_update_d(struct hfsc_class *cl, int next_len)
1044 {
1045 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
1046 }
1047 
1048 void
1049 hfsc_init_vf(struct hfsc_class *cl, int len)
1050 {
1051 	struct hfsc_class *max_cl, *p;
1052 	u_int64_t vt, f, cur_time;
1053 	int go_active;
1054 
1055 	cur_time = 0;
1056 	go_active = 1;
1057 	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
1058 		if (go_active && cl->cl_nactive++ == 0)
1059 			go_active = 1;
1060 		else
1061 			go_active = 0;
1062 
1063 		if (go_active) {
1064 			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc,
1065 			    hfsc_active);
1066 			if (max_cl != NULL) {
1067 				/*
1068 				 * set vt to the average of the min and max
1069 				 * classes.  if the parent's period didn't
1070 				 * change, don't decrease vt of the class.
1071 				 */
1072 				vt = max_cl->cl_vt;
1073 				if (cl->cl_parent->cl_cvtmin != 0)
1074 					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
1075 
1076 				if (cl->cl_parent->cl_vtperiod !=
1077 				    cl->cl_parentperiod || vt > cl->cl_vt)
1078 					cl->cl_vt = vt;
1079 			} else {
1080 				/*
1081 				 * first child for a new parent backlog period.
1082 				 * add parent's cvtmax to vtoff of children
1083 				 * to make a new vt (vtoff + vt) larger than
1084 				 * the vt in the last period for all children.
1085 				 */
1086 				vt = cl->cl_parent->cl_cvtmax;
1087 				for (p = cl->cl_parent->cl_children; p != NULL;
1088 				     p = p->cl_siblings)
1089 					p->cl_vtoff += vt;
1090 				cl->cl_vt = 0;
1091 				cl->cl_parent->cl_cvtmax = 0;
1092 				cl->cl_parent->cl_cvtmin = 0;
1093 			}
1094 			cl->cl_initvt = cl->cl_vt;
1095 
1096 			/* update the virtual curve */
1097 			vt = cl->cl_vt + cl->cl_vtoff;
1098 			hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt,
1099 			    cl->cl_total);
1100 			if (cl->cl_virtual.x == vt) {
1101 				cl->cl_virtual.x -= cl->cl_vtoff;
1102 				cl->cl_vtoff = 0;
1103 			}
1104 			cl->cl_vtadj = 0;
1105 
1106 			cl->cl_vtperiod++;  /* increment vt period */
1107 			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
1108 			if (cl->cl_parent->cl_nactive == 0)
1109 				cl->cl_parentperiod++;
1110 			cl->cl_f = 0;
1111 
1112 			hfsc_actlist_insert(cl);
1113 
1114 			if (cl->cl_usc != NULL) {
1115 				/* class has upper limit curve */
1116 				if (cur_time == 0)
1117 					cur_time = hfsc_microuptime();
1118 
1119 				/* update the ulimit curve */
1120 				hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
1121 				    cl->cl_total);
1122 				/* compute myf */
1123 				cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit,
1124 				    cl->cl_total);
1125 				cl->cl_myfadj = 0;
1126 			}
1127 		}
1128 
1129 		if (cl->cl_myf > cl->cl_cfmin)
1130 			f = cl->cl_myf;
1131 		else
1132 			f = cl->cl_cfmin;
1133 		if (f != cl->cl_f) {
1134 			cl->cl_f = f;
1135 			hfsc_update_cfmin(cl->cl_parent);
1136 		}
1137 	}
1138 }
1139 
1140 void
1141 hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
1142 {
1143 	u_int64_t f, myf_bound, delta;
1144 	int go_passive = 0;
1145 
1146 	if (hfsc_class_qlength(cl) == 0)
1147 		go_passive = 1;
1148 
1149 	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
1150 		cl->cl_total += len;
1151 
1152 		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
1153 			continue;
1154 
1155 		if (go_passive && --cl->cl_nactive == 0)
1156 			go_passive = 1;
1157 		else
1158 			go_passive = 0;
1159 
1160 		if (go_passive) {
1161 			/* no more active child, going passive */
1162 
1163 			/* update cvtmax of the parent class */
1164 			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
1165 				cl->cl_parent->cl_cvtmax = cl->cl_vt;
1166 
1167 			/* remove this class from the vt list */
1168 			hfsc_actlist_remove(cl);
1169 
1170 			hfsc_update_cfmin(cl->cl_parent);
1171 
1172 			continue;
1173 		}
1174 
1175 		/*
1176 		 * update vt and f
1177 		 */
1178 		cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total)
1179 		    - cl->cl_vtoff + cl->cl_vtadj;
1180 
1181 		/*
1182 		 * if vt of the class is smaller than cvtmin,
1183 		 * the class was skipped in the past due to non-fit.
1184 		 * if so, we need to adjust vtadj.
1185 		 */
1186 		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
1187 			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
1188 			cl->cl_vt = cl->cl_parent->cl_cvtmin;
1189 		}
1190 
1191 		/* update the vt list */
1192 		hfsc_actlist_update(cl);
1193 
1194 		if (cl->cl_usc != NULL) {
1195 			cl->cl_myf = cl->cl_myfadj +
1196 			    hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
1197 
1198 			/*
1199 			 * if myf lags behind by more than one clock tick
1200 			 * from the current time, adjust myfadj to prevent
1201 			 * a rate-limited class from going greedy.
1202 			 * in a steady state under rate-limiting, myf
1203 			 * fluctuates within one clock tick.
1204 			 */
1205 			myf_bound = cur_time - HFSC_CLK_PER_TICK;
1206 			if (cl->cl_myf < myf_bound) {
1207 				delta = cur_time - cl->cl_myf;
1208 				cl->cl_myfadj += delta;
1209 				cl->cl_myf += delta;
1210 			}
1211 		}
1212 
1213 		/* cl_f is max(cl_myf, cl_cfmin) */
1214 		if (cl->cl_myf > cl->cl_cfmin)
1215 			f = cl->cl_myf;
1216 		else
1217 			f = cl->cl_cfmin;
1218 		if (f != cl->cl_f) {
1219 			cl->cl_f = f;
1220 			hfsc_update_cfmin(cl->cl_parent);
1221 		}
1222 	}
1223 }
1224 
1225 void
1226 hfsc_update_cfmin(struct hfsc_class *cl)
1227 {
1228 	struct hfsc_class *p;
1229 	u_int64_t cfmin;
1230 
1231 	if (TAILQ_EMPTY(&cl->cl_actc)) {
1232 		cl->cl_cfmin = 0;
1233 		return;
1234 	}
1235 	cfmin = HFSC_HT_INFINITY;
1236 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
1237 		if (p->cl_f == 0) {
1238 			cl->cl_cfmin = 0;
1239 			return;
1240 		}
1241 		if (p->cl_f < cfmin)
1242 			cfmin = p->cl_f;
1243 	}
1244 	cl->cl_cfmin = cfmin;
1245 }
1246 
1247 /*
1248  * eligible list holds backlogged classes being sorted by their eligible times.
1249  * there is one eligible list per interface.
1250  */
1251 void
1252 hfsc_ellist_insert(struct hfsc_if *hif, struct hfsc_class *cl)
1253 {
1254 	struct hfsc_class *p;
1255 
1256 	/* check the last entry first */
1257 	if ((p = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible)) == NULL ||
1258 	    p->cl_e <= cl->cl_e) {
1259 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1260 		return;
1261 	}
1262 
1263 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1264 		if (cl->cl_e < p->cl_e) {
1265 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1266 			return;
1267 		}
1268 	}
1269 }
1270 
1271 void
1272 hfsc_ellist_remove(struct hfsc_if *hif, struct hfsc_class *cl)
1273 {
1274 	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1275 }
1276 
1277 void
1278 hfsc_ellist_update(struct hfsc_if *hif, struct hfsc_class *cl)
1279 {
1280 	struct hfsc_class *p, *last;
1281 
1282 	/*
1283 	 * the eligible time of a class increases monotonically.
1284 	 * if the next entry has a larger eligible time, nothing to do.
1285 	 */
1286 	p = TAILQ_NEXT(cl, cl_ellist);
1287 	if (p == NULL || cl->cl_e <= p->cl_e)
1288 		return;
1289 
1290 	/* check the last entry */
1291 	last = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible);
1292 	if (last->cl_e <= cl->cl_e) {
1293 		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1294 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1295 		return;
1296 	}
1297 
1298 	/*
1299 	 * the new position must be between the next entry
1300 	 * and the last entry
1301 	 */
1302 	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
1303 		if (cl->cl_e < p->cl_e) {
1304 			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1305 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1306 			return;
1307 		}
1308 	}
1309 }
1310 
1311 /* find the class with the minimum deadline among the eligible classes */
1312 struct hfsc_class *
1313 hfsc_ellist_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
1314 {
1315 	struct hfsc_class *p, *cl = NULL;
1316 
1317 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1318 		if (p->cl_e > cur_time)
1319 			break;
1320 		if (cl == NULL || p->cl_d < cl->cl_d)
1321 			cl = p;
1322 	}
1323 	return (cl);
1324 }
1325 
1326 /*
1327  * active children list holds backlogged child classes being sorted
1328  * by their virtual time.
1329  * each intermediate class has one active children list.
1330  */
1331 void
1332 hfsc_actlist_insert(struct hfsc_class *cl)
1333 {
1334 	struct hfsc_class *p;
1335 
1336 	/* check the last entry first */
1337 	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active)) == NULL
1338 	    || p->cl_vt <= cl->cl_vt) {
1339 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1340 		return;
1341 	}
1342 
1343 	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
1344 		if (cl->cl_vt < p->cl_vt) {
1345 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1346 			return;
1347 		}
1348 	}
1349 }
1350 
1351 void
1352 hfsc_actlist_remove(struct hfsc_class *cl)
1353 {
1354 	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1355 }
1356 
1357 void
1358 hfsc_actlist_update(struct hfsc_class *cl)
1359 {
1360 	struct hfsc_class *p, *last;
1361 
1362 	/*
1363 	 * the virtual time of a class increases monotonically during its
1364 	 * backlogged period.
1365 	 * if the next entry has a larger virtual time, nothing to do.
1366 	 */
1367 	p = TAILQ_NEXT(cl, cl_actlist);
1368 	if (p == NULL || cl->cl_vt < p->cl_vt)
1369 		return;
1370 
1371 	/* check the last entry */
1372 	last = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active);
1373 	if (last->cl_vt <= cl->cl_vt) {
1374 		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1375 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1376 		return;
1377 	}
1378 
1379 	/*
1380 	 * the new position must be between the next entry
1381 	 * and the last entry
1382 	 */
1383 	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
1384 		if (cl->cl_vt < p->cl_vt) {
1385 			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1386 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1387 			return;
1388 		}
1389 	}
1390 }
1391 
1392 struct hfsc_class *
1393 hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
1394 {
1395 	struct hfsc_class *p;
1396 
1397 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist)
1398 		if (p->cl_f <= cur_time)
1399 			return (p);
1400 
1401 	return (NULL);
1402 }
1403 
1404 /*
1405  * service curve support functions
1406  *
1407  *  external service curve parameters
1408  *	m: bits/sec
1409  *	d: msec
1410  *  internal service curve parameters
1411  *	sm: (bytes/tsc_interval) << SM_SHIFT
1412  *	ism: (tsc_count/byte) << ISM_SHIFT
1413  *	dx: tsc_count
1414  *
1415  * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1416  * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1417  * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1418  * digits in decimal using the following table.
1419  *
1420  *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
1421  *  ----------+-------------------------------------------------------
1422  *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
1423  *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
1424  *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
1425  *
1426  *  nsec/byte   80000      8000       800        80         8
1427  *  ism(500MHz) 40000      4000       400        40         4
1428  *  ism(200MHz) 16000      1600       160        16         1.6
1429  */
1430 #define	SM_SHIFT	24
1431 #define	ISM_SHIFT	10
1432 
1433 #define	SM_MASK		((1LL << SM_SHIFT) - 1)
1434 #define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
1435 
1436 static __inline u_int64_t
1437 seg_x2y(u_int64_t x, u_int64_t sm)
1438 {
1439 	u_int64_t y;
1440 
1441 	/*
1442 	 * compute
1443 	 *	y = x * sm >> SM_SHIFT
1444 	 * but divide it for the upper and lower bits to avoid overflow
1445 	 */
1446 	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
1447 	return (y);
1448 }
1449 
1450 static __inline u_int64_t
1451 seg_y2x(u_int64_t y, u_int64_t ism)
1452 {
1453 	u_int64_t x;
1454 
1455 	if (y == 0)
1456 		x = 0;
1457 	else if (ism == HFSC_HT_INFINITY)
1458 		x = HFSC_HT_INFINITY;
1459 	else {
1460 		x = (y >> ISM_SHIFT) * ism
1461 		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
1462 	}
1463 	return (x);
1464 }
1465 
1466 static __inline u_int64_t
1467 m2sm(u_int m)
1468 {
1469 	u_int64_t sm;
1470 
1471 	sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ;
1472 	return (sm);
1473 }
1474 
1475 static __inline u_int64_t
1476 m2ism(u_int m)
1477 {
1478 	u_int64_t ism;
1479 
1480 	if (m == 0)
1481 		ism = HFSC_HT_INFINITY;
1482 	else
1483 		ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m;
1484 	return (ism);
1485 }
1486 
1487 static __inline u_int64_t
1488 d2dx(u_int d)
1489 {
1490 	u_int64_t dx;
1491 
1492 	dx = ((u_int64_t)d * HFSC_FREQ) / 1000;
1493 	return (dx);
1494 }
1495 
1496 static __inline u_int
1497 sm2m(u_int64_t sm)
1498 {
1499 	u_int64_t m;
1500 
1501 	m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT;
1502 	return ((u_int)m);
1503 }
1504 
1505 static __inline u_int
1506 dx2d(u_int64_t dx)
1507 {
1508 	u_int64_t d;
1509 
1510 	d = dx * 1000 / HFSC_FREQ;
1511 	return ((u_int)d);
1512 }
1513 
1514 void
1515 hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc)
1516 {
1517 	isc->sm1 = m2sm(sc->m1);
1518 	isc->ism1 = m2ism(sc->m1);
1519 	isc->dx = d2dx(sc->d);
1520 	isc->dy = seg_x2y(isc->dx, isc->sm1);
1521 	isc->sm2 = m2sm(sc->m2);
1522 	isc->ism2 = m2ism(sc->m2);
1523 }
1524 
1525 /*
1526  * initialize the runtime service curve with the given internal
1527  * service curve starting at (x, y).
1528  */
1529 void
1530 hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc,
1531     u_int64_t x, u_int64_t y)
1532 {
1533 	rtsc->x =	x;
1534 	rtsc->y =	y;
1535 	rtsc->sm1 =	isc->sm1;
1536 	rtsc->ism1 =	isc->ism1;
1537 	rtsc->dx =	isc->dx;
1538 	rtsc->dy =	isc->dy;
1539 	rtsc->sm2 =	isc->sm2;
1540 	rtsc->ism2 =	isc->ism2;
1541 }
1542 
1543 /*
1544  * calculate the y-projection of the runtime service curve by the
1545  * given x-projection value
1546  */
1547 u_int64_t
1548 hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y)
1549 {
1550 	u_int64_t x;
1551 
1552 	if (y < rtsc->y)
1553 		x = rtsc->x;
1554 	else if (y <= rtsc->y + rtsc->dy) {
1555 		/* x belongs to the 1st segment */
1556 		if (rtsc->dy == 0)
1557 			x = rtsc->x + rtsc->dx;
1558 		else
1559 			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
1560 	} else {
1561 		/* x belongs to the 2nd segment */
1562 		x = rtsc->x + rtsc->dx
1563 		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
1564 	}
1565 	return (x);
1566 }
1567 
1568 u_int64_t
1569 hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x)
1570 {
1571 	u_int64_t y;
1572 
1573 	if (x <= rtsc->x)
1574 		y = rtsc->y;
1575 	else if (x <= rtsc->x + rtsc->dx)
1576 		/* y belongs to the 1st segment */
1577 		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
1578 	else
1579 		/* y belongs to the 2nd segment */
1580 		y = rtsc->y + rtsc->dy
1581 		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
1582 	return (y);
1583 }
1584 
1585 /*
1586  * update the runtime service curve by taking the minimum of the current
1587  * runtime service curve and the service curve starting at (x, y).
1588  */
1589 void
1590 hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc,
1591     u_int64_t x, u_int64_t y)
1592 {
1593 	u_int64_t y1, y2, dx, dy;
1594 
1595 	if (isc->sm1 <= isc->sm2) {
1596 		/* service curve is convex */
1597 		y1 = hfsc_rtsc_x2y(rtsc, x);
1598 		if (y1 < y)
1599 			/* the current rtsc is smaller */
1600 			return;
1601 		rtsc->x = x;
1602 		rtsc->y = y;
1603 		return;
1604 	}
1605 
1606 	/*
1607 	 * service curve is concave
1608 	 * compute the two y values of the current rtsc
1609 	 *	y1: at x
1610 	 *	y2: at (x + dx)
1611 	 */
1612 	y1 = hfsc_rtsc_x2y(rtsc, x);
1613 	if (y1 <= y) {
1614 		/* rtsc is below isc, no change to rtsc */
1615 		return;
1616 	}
1617 
1618 	y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx);
1619 	if (y2 >= y + isc->dy) {
1620 		/* rtsc is above isc, replace rtsc by isc */
1621 		rtsc->x = x;
1622 		rtsc->y = y;
1623 		rtsc->dx = isc->dx;
1624 		rtsc->dy = isc->dy;
1625 		return;
1626 	}
1627 
1628 	/*
1629 	 * the two curves intersect
1630 	 * compute the offsets (dx, dy) using the reverse
1631 	 * function of seg_x2y()
1632 	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1633 	 */
1634 	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
1635 	/*
1636 	 * check if (x, y1) belongs to the 1st segment of rtsc.
1637 	 * if so, add the offset.
1638 	 */
1639 	if (rtsc->x + rtsc->dx > x)
1640 		dx += rtsc->x + rtsc->dx - x;
1641 	dy = seg_x2y(dx, isc->sm1);
1642 
1643 	rtsc->x = x;
1644 	rtsc->y = y;
1645 	rtsc->dx = dx;
1646 	rtsc->dy = dy;
1647 	return;
1648 }
1649 
1650 void
1651 hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl)
1652 {
1653 	sp->class_id = cl->cl_id;
1654 	sp->class_handle = cl->cl_handle;
1655 
1656 	if (cl->cl_rsc != NULL) {
1657 		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
1658 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
1659 		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
1660 	} else {
1661 		sp->rsc.m1 = 0;
1662 		sp->rsc.d = 0;
1663 		sp->rsc.m2 = 0;
1664 	}
1665 	if (cl->cl_fsc != NULL) {
1666 		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
1667 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
1668 		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
1669 	} else {
1670 		sp->fsc.m1 = 0;
1671 		sp->fsc.d = 0;
1672 		sp->fsc.m2 = 0;
1673 	}
1674 	if (cl->cl_usc != NULL) {
1675 		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
1676 		sp->usc.d = dx2d(cl->cl_usc->dx);
1677 		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
1678 	} else {
1679 		sp->usc.m1 = 0;
1680 		sp->usc.d = 0;
1681 		sp->usc.m2 = 0;
1682 	}
1683 
1684 	sp->total = cl->cl_total;
1685 	sp->cumul = cl->cl_cumul;
1686 
1687 	sp->d = cl->cl_d;
1688 	sp->e = cl->cl_e;
1689 	sp->vt = cl->cl_vt;
1690 	sp->f = cl->cl_f;
1691 
1692 	sp->initvt = cl->cl_initvt;
1693 	sp->vtperiod = cl->cl_vtperiod;
1694 	sp->parentperiod = cl->cl_parentperiod;
1695 	sp->nactive = cl->cl_nactive;
1696 	sp->vtoff = cl->cl_vtoff;
1697 	sp->cvtmax = cl->cl_cvtmax;
1698 	sp->myf = cl->cl_myf;
1699 	sp->cfmin = cl->cl_cfmin;
1700 	sp->cvtmin = cl->cl_cvtmin;
1701 	sp->myfadj = cl->cl_myfadj;
1702 	sp->vtadj = cl->cl_vtadj;
1703 
1704 	sp->cur_time = hfsc_microuptime();
1705 	sp->machclk_freq = HFSC_FREQ;
1706 
1707 	sp->qlength = hfsc_class_qlength(cl);
1708 	sp->qlimit = cl->cl_q.qlimit;
1709 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
1710 	sp->drop_cnt = cl->cl_stats.drop_cnt;
1711 	sp->period = cl->cl_stats.period;
1712 
1713 	sp->qtype = 0;
1714 }
1715 
1716 /* convert a class handle to the corresponding class pointer */
1717 struct hfsc_class *
1718 hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle)
1719 {
1720 	int i;
1721 	struct hfsc_class *cl;
1722 
1723 	if (chandle == 0)
1724 		return (NULL);
1725 	/*
1726 	 * first, try the slot corresponding to the lower bits of the handle.
1727 	 * if it does not match, do the linear table search.
1728 	 */
1729 	i = chandle % hif->hif_allocated;
1730 	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1731 		return (cl);
1732 	for (i = 0; i < hif->hif_allocated; i++)
1733 		if ((cl = hif->hif_class_tbl[i]) != NULL &&
1734 		    cl->cl_handle == chandle)
1735 			return (cl);
1736 	return (NULL);
1737 }
1738