1 /* $OpenBSD: hfsc.c,v 1.49 2023/04/11 00:45:09 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. 6 * 7 * Permission to use, copy, modify, and distribute this software and 8 * its documentation is hereby granted (including for commercial or 9 * for-profit use), provided that both the copyright notice and this 10 * permission notice appear in all copies of the software, derivative 11 * works, or modified versions, and any portions thereof. 12 * 13 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF 14 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS 15 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED 16 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * Carnegie Mellon encourages (but does not require) users of this 29 * software to return any improvements or extensions that they make, 30 * and to grant Carnegie Mellon the rights to redistribute these 31 * changes without encumbrance. 32 */ 33 /* 34 * H-FSC is described in Proceedings of SIGCOMM'97, 35 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, 36 * Real-Time and Priority Service" 37 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. 38 * 39 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. 40 * when a class has an upperlimit, the fit-time is computed from the 41 * upperlimit service curve. the link-sharing scheduler does not schedule 42 * a class whose fit-time exceeds the current time. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/malloc.h> 47 #include <sys/pool.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/systm.h> 51 #include <sys/errno.h> 52 #include <sys/queue.h> 53 #include <sys/kernel.h> 54 #include <sys/timeout.h> 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <netinet/in.h> 59 60 #include <net/pfvar.h> 61 #include <net/hfsc.h> 62 63 /* 64 * kernel internal service curve representation 65 * coordinates are given by 64 bit unsigned integers. 66 * x-axis: unit is clock count. for the intel x86 architecture, 67 * the raw Pentium TSC (Timestamp Counter) value is used. 68 * virtual time is also calculated in this time scale. 69 * y-axis: unit is byte. 70 * 71 * the service curve parameters are converted to the internal 72 * representation. 73 * the slope values are scaled to avoid overflow. 74 * the inverse slope values as well as the y-projection of the 1st 75 * segment are kept in order to avoid 64-bit divide operations 76 * that are expensive on 32-bit architectures. 77 * 78 * note: Intel Pentium TSC never wraps around in several thousands of years. 79 * x-axis doesn't wrap around for 1089 years with 1GHz clock. 80 * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth. 81 */ 82 83 /* kernel internal representation of a service curve */ 84 struct hfsc_internal_sc { 85 u_int64_t sm1; /* scaled slope of the 1st segment */ 86 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ 87 u_int64_t dx; /* the x-projection of the 1st segment */ 88 u_int64_t dy; /* the y-projection of the 1st segment */ 89 u_int64_t sm2; /* scaled slope of the 2nd segment */ 90 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ 91 }; 92 93 /* runtime service curve */ 94 struct hfsc_runtime_sc { 95 u_int64_t x; /* current starting position on x-axis */ 96 u_int64_t y; /* current starting position on x-axis */ 97 u_int64_t sm1; /* scaled slope of the 1st segment */ 98 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ 99 u_int64_t dx; /* the x-projection of the 1st segment */ 100 u_int64_t dy; /* the y-projection of the 1st segment */ 101 u_int64_t sm2; /* scaled slope of the 2nd segment */ 102 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ 103 }; 104 105 struct hfsc_classq { 106 struct mbuf_list q; /* Queue of packets */ 107 int qlimit; /* Queue limit */ 108 }; 109 110 /* for TAILQ based ellist and actlist implementation */ 111 struct hfsc_class; 112 TAILQ_HEAD(hfsc_eligible, hfsc_class); 113 TAILQ_HEAD(hfsc_active, hfsc_class); 114 #define hfsc_actlist_last(s) TAILQ_LAST(s, hfsc_active) 115 116 struct hfsc_class { 117 u_int cl_id; /* class id (just for debug) */ 118 u_int32_t cl_handle; /* class handle */ 119 int cl_flags; /* misc flags */ 120 121 struct hfsc_class *cl_parent; /* parent class */ 122 struct hfsc_class *cl_siblings; /* sibling classes */ 123 struct hfsc_class *cl_children; /* child classes */ 124 125 struct hfsc_classq cl_q; /* class queue structure */ 126 127 const struct pfq_ops *cl_qops; /* queue manager */ 128 void *cl_qdata; /* queue manager data */ 129 void *cl_cookie; /* queue manager cookie */ 130 131 u_int64_t cl_total; /* total work in bytes */ 132 u_int64_t cl_cumul; /* cumulative work in bytes 133 done by real-time criteria */ 134 u_int64_t cl_d; /* deadline */ 135 u_int64_t cl_e; /* eligible time */ 136 u_int64_t cl_vt; /* virtual time */ 137 u_int64_t cl_f; /* time when this class will fit for 138 link-sharing, max(myf, cfmin) */ 139 u_int64_t cl_myf; /* my fit-time (as calculated from this 140 class's own upperlimit curve) */ 141 u_int64_t cl_myfadj; /* my fit-time adjustment 142 (to cancel history dependence) */ 143 u_int64_t cl_cfmin; /* earliest children's fit-time (used 144 with cl_myf to obtain cl_f) */ 145 u_int64_t cl_cvtmin; /* minimal virtual time among the 146 children fit for link-sharing 147 (monotonic within a period) */ 148 u_int64_t cl_vtadj; /* intra-period cumulative vt 149 adjustment */ 150 u_int64_t cl_vtoff; /* inter-period cumulative vt offset */ 151 u_int64_t cl_cvtmax; /* max child's vt in the last period */ 152 153 u_int64_t cl_initvt; /* init virtual time (for debugging) */ 154 155 struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */ 156 struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */ 157 struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */ 158 struct hfsc_runtime_sc cl_deadline; /* deadline curve */ 159 struct hfsc_runtime_sc cl_eligible; /* eligible curve */ 160 struct hfsc_runtime_sc cl_virtual; /* virtual curve */ 161 struct hfsc_runtime_sc cl_ulimit; /* upperlimit curve */ 162 163 u_int cl_vtperiod; /* vt period sequence no */ 164 u_int cl_parentperiod; /* parent's vt period seqno */ 165 int cl_nactive; /* number of active children */ 166 struct hfsc_active cl_actc; /* active children list */ 167 168 TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */ 169 TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */ 170 171 struct { 172 struct hfsc_pktcntr xmit_cnt; 173 struct hfsc_pktcntr drop_cnt; 174 u_int period; 175 } cl_stats; 176 }; 177 178 /* 179 * hfsc interface state 180 */ 181 struct hfsc_if { 182 struct hfsc_if *hif_next; /* interface state list */ 183 struct hfsc_class *hif_rootclass; /* root class */ 184 struct hfsc_class *hif_defaultclass; /* default class */ 185 struct hfsc_class **hif_class_tbl; 186 187 u_int64_t hif_microtime; /* time at deq_begin */ 188 189 u_int hif_allocated; /* # of slots in hif_class_tbl */ 190 u_int hif_classes; /* # of classes in the tree */ 191 u_int hif_classid; /* class id sequence number */ 192 193 struct hfsc_eligible hif_eligible; /* eligible list */ 194 struct timeout hif_defer; /* for queues that weren't ready */ 195 }; 196 197 /* 198 * function prototypes 199 */ 200 struct hfsc_class *hfsc_class_create(struct hfsc_if *, 201 struct hfsc_sc *, struct hfsc_sc *, 202 struct hfsc_sc *, struct hfsc_class *, int, 203 int, int); 204 int hfsc_class_destroy(struct hfsc_if *, 205 struct hfsc_class *); 206 struct hfsc_class *hfsc_nextclass(struct hfsc_class *); 207 208 void hfsc_cl_purge(struct hfsc_if *, struct hfsc_class *, 209 struct mbuf_list *); 210 211 void hfsc_update_sc(struct hfsc_if *, struct hfsc_class *, int); 212 void hfsc_deferred(void *); 213 void hfsc_update_cfmin(struct hfsc_class *); 214 void hfsc_set_active(struct hfsc_if *, struct hfsc_class *, int); 215 void hfsc_set_passive(struct hfsc_if *, struct hfsc_class *); 216 void hfsc_init_ed(struct hfsc_if *, struct hfsc_class *, int); 217 void hfsc_update_ed(struct hfsc_if *, struct hfsc_class *, int); 218 void hfsc_update_d(struct hfsc_class *, int); 219 void hfsc_init_vf(struct hfsc_class *, int); 220 void hfsc_update_vf(struct hfsc_class *, int, u_int64_t); 221 void hfsc_ellist_insert(struct hfsc_if *, struct hfsc_class *); 222 void hfsc_ellist_remove(struct hfsc_if *, struct hfsc_class *); 223 void hfsc_ellist_update(struct hfsc_if *, struct hfsc_class *); 224 struct hfsc_class *hfsc_ellist_get_mindl(struct hfsc_if *, u_int64_t); 225 void hfsc_actlist_insert(struct hfsc_class *); 226 void hfsc_actlist_remove(struct hfsc_class *); 227 void hfsc_actlist_update(struct hfsc_class *); 228 229 struct hfsc_class *hfsc_actlist_firstfit(struct hfsc_class *, 230 u_int64_t); 231 232 static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); 233 static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); 234 static __inline u_int64_t m2sm(u_int); 235 static __inline u_int64_t m2ism(u_int); 236 static __inline u_int64_t d2dx(u_int); 237 static __inline u_int sm2m(u_int64_t); 238 static __inline u_int dx2d(u_int64_t); 239 240 void hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *); 241 void hfsc_rtsc_init(struct hfsc_runtime_sc *, 242 struct hfsc_internal_sc *, u_int64_t, u_int64_t); 243 u_int64_t hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t); 244 u_int64_t hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t); 245 void hfsc_rtsc_min(struct hfsc_runtime_sc *, 246 struct hfsc_internal_sc *, u_int64_t, u_int64_t); 247 248 void hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *); 249 struct hfsc_class *hfsc_clh2cph(struct hfsc_if *, u_int32_t); 250 251 #define HFSC_CLK_SHIFT 8 252 #define HFSC_FREQ (1000000 << HFSC_CLK_SHIFT) 253 #define HFSC_CLK_PER_TICK (HFSC_FREQ / hz) 254 #define HFSC_HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ 255 256 struct pool hfsc_class_pl, hfsc_internal_sc_pl; 257 258 /* 259 * ifqueue glue. 260 */ 261 262 unsigned int hfsc_idx(unsigned int, const struct mbuf *); 263 struct mbuf *hfsc_enq(struct ifqueue *, struct mbuf *); 264 struct mbuf *hfsc_deq_begin(struct ifqueue *, void **); 265 void hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *); 266 void hfsc_purge(struct ifqueue *, struct mbuf_list *); 267 void *hfsc_alloc(unsigned int, void *); 268 void hfsc_free(unsigned int, void *); 269 270 const struct ifq_ops hfsc_ops = { 271 hfsc_idx, 272 hfsc_enq, 273 hfsc_deq_begin, 274 hfsc_deq_commit, 275 hfsc_purge, 276 hfsc_alloc, 277 hfsc_free, 278 }; 279 280 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops; 281 282 /* 283 * pf queue glue. 284 */ 285 286 void *hfsc_pf_alloc(struct ifnet *); 287 int hfsc_pf_addqueue(void *, struct pf_queuespec *); 288 void hfsc_pf_free(void *); 289 int hfsc_pf_qstats(struct pf_queuespec *, void *, int *); 290 unsigned int hfsc_pf_qlength(void *); 291 struct mbuf * hfsc_pf_enqueue(void *, struct mbuf *); 292 struct mbuf * hfsc_pf_deq_begin(void *, void **, struct mbuf_list *); 293 void hfsc_pf_deq_commit(void *, struct mbuf *, void *); 294 void hfsc_pf_purge(void *, struct mbuf_list *); 295 296 const struct pfq_ops hfsc_pf_ops = { 297 hfsc_pf_alloc, 298 hfsc_pf_addqueue, 299 hfsc_pf_free, 300 hfsc_pf_qstats, 301 hfsc_pf_qlength, 302 hfsc_pf_enqueue, 303 hfsc_pf_deq_begin, 304 hfsc_pf_deq_commit, 305 hfsc_pf_purge 306 }; 307 308 const struct pfq_ops * const pfq_hfsc_ops = &hfsc_pf_ops; 309 310 /* 311 * shortcuts for repeated use 312 */ 313 static inline unsigned int 314 hfsc_class_qlength(struct hfsc_class *cl) 315 { 316 /* Only leaf classes have a queue */ 317 if (cl->cl_qops != NULL) 318 return cl->cl_qops->pfq_qlength(cl->cl_qdata); 319 return 0; 320 } 321 322 static inline struct mbuf * 323 hfsc_class_enqueue(struct hfsc_class *cl, struct mbuf *m) 324 { 325 return cl->cl_qops->pfq_enqueue(cl->cl_qdata, m); 326 } 327 328 static inline struct mbuf * 329 hfsc_class_deq_begin(struct hfsc_class *cl, struct mbuf_list *ml) 330 { 331 return cl->cl_qops->pfq_deq_begin(cl->cl_qdata, &cl->cl_cookie, ml); 332 } 333 334 static inline void 335 hfsc_class_deq_commit(struct hfsc_class *cl, struct mbuf *m) 336 { 337 return cl->cl_qops->pfq_deq_commit(cl->cl_qdata, m, cl->cl_cookie); 338 } 339 340 static inline void 341 hfsc_class_purge(struct hfsc_class *cl, struct mbuf_list *ml) 342 { 343 /* Only leaf classes have a queue */ 344 if (cl->cl_qops != NULL) 345 return cl->cl_qops->pfq_purge(cl->cl_qdata, ml); 346 } 347 348 u_int64_t 349 hfsc_microuptime(void) 350 { 351 struct timeval tv; 352 353 microuptime(&tv); 354 return (((u_int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec) << 355 HFSC_CLK_SHIFT); 356 } 357 358 static inline u_int 359 hfsc_more_slots(u_int current) 360 { 361 u_int want = current * 2; 362 363 return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want); 364 } 365 366 static void 367 hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany) 368 { 369 struct hfsc_class **newtbl, **old; 370 size_t oldlen = sizeof(void *) * hif->hif_allocated; 371 372 newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF, 373 M_WAITOK | M_ZERO); 374 old = hif->hif_class_tbl; 375 376 memcpy(newtbl, old, oldlen); 377 hif->hif_class_tbl = newtbl; 378 hif->hif_allocated = howmany; 379 380 free(old, M_DEVBUF, oldlen); 381 } 382 383 void 384 hfsc_initialize(void) 385 { 386 pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0, 387 IPL_NONE, PR_WAITOK, "hfscclass", NULL); 388 pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0, 389 IPL_NONE, PR_WAITOK, "hfscintsc", NULL); 390 } 391 392 void * 393 hfsc_pf_alloc(struct ifnet *ifp) 394 { 395 struct hfsc_if *hif; 396 397 KASSERT(ifp != NULL); 398 399 hif = malloc(sizeof(*hif), M_DEVBUF, M_WAITOK | M_ZERO); 400 TAILQ_INIT(&hif->hif_eligible); 401 hif->hif_class_tbl = mallocarray(HFSC_DEFAULT_CLASSES, sizeof(void *), 402 M_DEVBUF, M_WAITOK | M_ZERO); 403 hif->hif_allocated = HFSC_DEFAULT_CLASSES; 404 405 timeout_set(&hif->hif_defer, hfsc_deferred, ifp); 406 407 return (hif); 408 } 409 410 int 411 hfsc_pf_addqueue(void *arg, struct pf_queuespec *q) 412 { 413 struct hfsc_if *hif = arg; 414 struct hfsc_class *cl, *parent, *np = NULL; 415 struct hfsc_sc rtsc, lssc, ulsc; 416 int error = 0; 417 418 KASSERT(hif != NULL); 419 KASSERT(q->qid != 0); 420 421 /* Root queue must have non-zero linksharing parameters */ 422 if (q->linkshare.m1.absolute == 0 && q->linkshare.m2.absolute == 0 && 423 q->parent_qid == 0) 424 return (EINVAL); 425 426 if (q->parent_qid == 0 && hif->hif_rootclass == NULL) { 427 np = hfsc_class_create(hif, NULL, NULL, NULL, NULL, 428 0, 0, HFSC_ROOT_CLASS | q->qid); 429 if (np == NULL) 430 return (EINVAL); 431 parent = np; 432 } else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL) 433 return (EINVAL); 434 435 if (hfsc_clh2cph(hif, q->qid) != NULL) { 436 hfsc_class_destroy(hif, np); 437 return (EBUSY); 438 } 439 440 rtsc.m1 = q->realtime.m1.absolute; 441 rtsc.d = q->realtime.d; 442 rtsc.m2 = q->realtime.m2.absolute; 443 lssc.m1 = q->linkshare.m1.absolute; 444 lssc.d = q->linkshare.d; 445 lssc.m2 = q->linkshare.m2.absolute; 446 ulsc.m1 = q->upperlimit.m1.absolute; 447 ulsc.d = q->upperlimit.d; 448 ulsc.m2 = q->upperlimit.m2.absolute; 449 450 if ((cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, 451 parent, q->qlimit, q->flags, q->qid)) == NULL) { 452 hfsc_class_destroy(hif, np); 453 return (ENOMEM); 454 } 455 456 /* Attach a queue manager if specified */ 457 cl->cl_qops = pf_queue_manager(q); 458 /* Realtime class cannot be used with an external queue manager */ 459 if (cl->cl_qops == NULL || cl->cl_rsc != NULL) { 460 cl->cl_qops = pfq_hfsc_ops; 461 cl->cl_qdata = &cl->cl_q; 462 } else { 463 cl->cl_qdata = cl->cl_qops->pfq_alloc(q->kif->pfik_ifp); 464 if (cl->cl_qdata == NULL) { 465 cl->cl_qops = NULL; 466 hfsc_class_destroy(hif, cl); 467 hfsc_class_destroy(hif, np); 468 return (ENOMEM); 469 } 470 error = cl->cl_qops->pfq_addqueue(cl->cl_qdata, q); 471 if (error) { 472 cl->cl_qops->pfq_free(cl->cl_qdata); 473 cl->cl_qops = NULL; 474 hfsc_class_destroy(hif, cl); 475 hfsc_class_destroy(hif, np); 476 return (error); 477 } 478 } 479 480 KASSERT(cl->cl_qops != NULL); 481 KASSERT(cl->cl_qdata != NULL); 482 483 return (0); 484 } 485 486 int 487 hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes) 488 { 489 struct ifnet *ifp = q->kif->pfik_ifp; 490 struct hfsc_if *hif; 491 struct hfsc_class *cl; 492 struct hfsc_class_stats stats; 493 int error = 0; 494 495 if (ifp == NULL) 496 return (EBADF); 497 498 if (*nbytes < sizeof(stats)) 499 return (EINVAL); 500 501 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops); 502 if (hif == NULL) 503 return (EBADF); 504 505 if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL) { 506 ifq_q_leave(&ifp->if_snd, hif); 507 return (EINVAL); 508 } 509 510 hfsc_getclstats(&stats, cl); 511 ifq_q_leave(&ifp->if_snd, hif); 512 513 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 514 return (error); 515 516 *nbytes = sizeof(stats); 517 return (0); 518 } 519 520 void 521 hfsc_pf_free(void *arg) 522 { 523 hfsc_free(0, arg); 524 } 525 526 unsigned int 527 hfsc_pf_qlength(void *arg) 528 { 529 struct hfsc_classq *cq = arg; 530 531 return ml_len(&cq->q); 532 } 533 534 struct mbuf * 535 hfsc_pf_enqueue(void *arg, struct mbuf *m) 536 { 537 struct hfsc_classq *cq = arg; 538 539 if (ml_len(&cq->q) >= cq->qlimit) 540 return (m); 541 542 ml_enqueue(&cq->q, m); 543 return (NULL); 544 } 545 546 struct mbuf * 547 hfsc_pf_deq_begin(void *arg, void **cookiep, struct mbuf_list *free_ml) 548 { 549 struct hfsc_classq *cq = arg; 550 551 return MBUF_LIST_FIRST(&cq->q); 552 } 553 554 void 555 hfsc_pf_deq_commit(void *arg, struct mbuf *m, void *cookie) 556 { 557 struct hfsc_classq *cq = arg; 558 559 ml_dequeue(&cq->q); 560 } 561 562 void 563 hfsc_pf_purge(void *arg, struct mbuf_list *ml) 564 { 565 struct hfsc_classq *cq = arg; 566 567 ml_enlist(ml, &cq->q); 568 } 569 570 unsigned int 571 hfsc_idx(unsigned int nqueues, const struct mbuf *m) 572 { 573 /* 574 * hfsc can only function on a single ifq and the stack understands 575 * this. when the first ifq on an interface is switched to hfsc, 576 * this gets used to map all mbufs to the first and only ifq that 577 * is set up for hfsc. 578 */ 579 return (0); 580 } 581 582 void * 583 hfsc_alloc(unsigned int idx, void *q) 584 { 585 struct hfsc_if *hif = q; 586 587 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */ 588 KASSERT(hif != NULL); 589 return (hif); 590 } 591 592 void 593 hfsc_free(unsigned int idx, void *q) 594 { 595 struct hfsc_if *hif = q; 596 struct hfsc_class *cl; 597 int i, restart; 598 599 KERNEL_ASSERT_LOCKED(); 600 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */ 601 602 timeout_del(&hif->hif_defer); 603 604 do { 605 restart = 0; 606 for (i = 0; i < hif->hif_allocated; i++) { 607 cl = hif->hif_class_tbl[i]; 608 if (hfsc_class_destroy(hif, cl) == EBUSY) 609 restart++; 610 } 611 } while (restart > 0); 612 613 free(hif->hif_class_tbl, M_DEVBUF, hif->hif_allocated * sizeof(void *)); 614 free(hif, M_DEVBUF, sizeof(*hif)); 615 } 616 617 void 618 hfsc_purge(struct ifqueue *ifq, struct mbuf_list *ml) 619 { 620 struct hfsc_if *hif = ifq->ifq_q; 621 struct hfsc_class *cl; 622 623 for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) 624 hfsc_cl_purge(hif, cl, ml); 625 } 626 627 struct hfsc_class * 628 hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc, 629 struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent, 630 int qlimit, int flags, int qid) 631 { 632 struct hfsc_class *cl, *p; 633 int i, s; 634 635 if (qlimit == 0) 636 qlimit = HFSC_DEFAULT_QLIMIT; 637 638 if (hif->hif_classes >= hif->hif_allocated) { 639 u_int newslots = hfsc_more_slots(hif->hif_allocated); 640 641 if (newslots == hif->hif_allocated) 642 return (NULL); 643 hfsc_grow_class_tbl(hif, newslots); 644 } 645 646 cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO); 647 TAILQ_INIT(&cl->cl_actc); 648 649 ml_init(&cl->cl_q.q); 650 cl->cl_q.qlimit = qlimit; 651 cl->cl_flags = flags; 652 653 if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { 654 cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 655 hfsc_sc2isc(rsc, cl->cl_rsc); 656 hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); 657 hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); 658 } 659 if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { 660 cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 661 hfsc_sc2isc(fsc, cl->cl_fsc); 662 hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); 663 } 664 if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { 665 cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 666 hfsc_sc2isc(usc, cl->cl_usc); 667 hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); 668 } 669 670 cl->cl_id = hif->hif_classid++; 671 cl->cl_handle = qid; 672 cl->cl_parent = parent; 673 674 s = splnet(); 675 hif->hif_classes++; 676 677 /* 678 * find a free slot in the class table. if the slot matching 679 * the lower bits of qid is free, use this slot. otherwise, 680 * use the first free slot. 681 */ 682 i = qid % hif->hif_allocated; 683 if (hif->hif_class_tbl[i] == NULL) 684 hif->hif_class_tbl[i] = cl; 685 else { 686 for (i = 0; i < hif->hif_allocated; i++) 687 if (hif->hif_class_tbl[i] == NULL) { 688 hif->hif_class_tbl[i] = cl; 689 break; 690 } 691 if (i == hif->hif_allocated) { 692 splx(s); 693 goto err_ret; 694 } 695 } 696 697 if (flags & HFSC_DEFAULTCLASS) 698 hif->hif_defaultclass = cl; 699 700 if (parent == NULL) 701 hif->hif_rootclass = cl; 702 else { 703 /* add this class to the children list of the parent */ 704 if ((p = parent->cl_children) == NULL) 705 parent->cl_children = cl; 706 else { 707 while (p->cl_siblings != NULL) 708 p = p->cl_siblings; 709 p->cl_siblings = cl; 710 } 711 } 712 splx(s); 713 714 return (cl); 715 716 err_ret: 717 if (cl->cl_fsc != NULL) 718 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc); 719 if (cl->cl_rsc != NULL) 720 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc); 721 if (cl->cl_usc != NULL) 722 pool_put(&hfsc_internal_sc_pl, cl->cl_usc); 723 pool_put(&hfsc_class_pl, cl); 724 return (NULL); 725 } 726 727 int 728 hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl) 729 { 730 int i, s; 731 732 if (cl == NULL) 733 return (0); 734 735 if (cl->cl_children != NULL) 736 return (EBUSY); 737 738 s = splnet(); 739 KASSERT(hfsc_class_qlength(cl) == 0); 740 741 if (cl->cl_parent != NULL) { 742 struct hfsc_class *p = cl->cl_parent->cl_children; 743 744 if (p == cl) 745 cl->cl_parent->cl_children = cl->cl_siblings; 746 else do { 747 if (p->cl_siblings == cl) { 748 p->cl_siblings = cl->cl_siblings; 749 break; 750 } 751 } while ((p = p->cl_siblings) != NULL); 752 } 753 754 for (i = 0; i < hif->hif_allocated; i++) 755 if (hif->hif_class_tbl[i] == cl) { 756 hif->hif_class_tbl[i] = NULL; 757 break; 758 } 759 760 hif->hif_classes--; 761 splx(s); 762 763 KASSERT(TAILQ_EMPTY(&cl->cl_actc)); 764 765 if (cl == hif->hif_rootclass) 766 hif->hif_rootclass = NULL; 767 if (cl == hif->hif_defaultclass) 768 hif->hif_defaultclass = NULL; 769 770 /* Free external queue manager resources */ 771 if (cl->cl_qops && cl->cl_qops != pfq_hfsc_ops) 772 cl->cl_qops->pfq_free(cl->cl_qdata); 773 774 if (cl->cl_usc != NULL) 775 pool_put(&hfsc_internal_sc_pl, cl->cl_usc); 776 if (cl->cl_fsc != NULL) 777 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc); 778 if (cl->cl_rsc != NULL) 779 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc); 780 pool_put(&hfsc_class_pl, cl); 781 782 return (0); 783 } 784 785 /* 786 * hfsc_nextclass returns the next class in the tree. 787 * usage: 788 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) 789 * do_something; 790 */ 791 struct hfsc_class * 792 hfsc_nextclass(struct hfsc_class *cl) 793 { 794 if (cl->cl_children != NULL) 795 cl = cl->cl_children; 796 else if (cl->cl_siblings != NULL) 797 cl = cl->cl_siblings; 798 else { 799 while ((cl = cl->cl_parent) != NULL) 800 if (cl->cl_siblings) { 801 cl = cl->cl_siblings; 802 break; 803 } 804 } 805 806 return (cl); 807 } 808 809 struct mbuf * 810 hfsc_enq(struct ifqueue *ifq, struct mbuf *m) 811 { 812 struct hfsc_if *hif = ifq->ifq_q; 813 struct hfsc_class *cl; 814 struct mbuf *dm; 815 816 if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL || 817 cl->cl_children != NULL) { 818 cl = hif->hif_defaultclass; 819 if (cl == NULL) 820 return (m); 821 } 822 823 dm = hfsc_class_enqueue(cl, m); 824 825 /* successfully queued. */ 826 if (dm != m && hfsc_class_qlength(cl) == 1) { 827 hfsc_set_active(hif, cl, m->m_pkthdr.len); 828 if (!timeout_pending(&hif->hif_defer)) 829 timeout_add(&hif->hif_defer, 1); 830 } 831 832 /* drop occurred. */ 833 if (dm != NULL) 834 PKTCNTR_INC(&cl->cl_stats.drop_cnt, dm->m_pkthdr.len); 835 836 return (dm); 837 } 838 839 struct mbuf * 840 hfsc_deq_begin(struct ifqueue *ifq, void **cookiep) 841 { 842 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 843 struct hfsc_if *hif = ifq->ifq_q; 844 struct hfsc_class *cl, *tcl; 845 struct mbuf *m; 846 u_int64_t cur_time; 847 848 cur_time = hfsc_microuptime(); 849 850 /* 851 * if there are eligible classes, use real-time criteria. 852 * find the class with the minimum deadline among 853 * the eligible classes. 854 */ 855 cl = hfsc_ellist_get_mindl(hif, cur_time); 856 if (cl == NULL) { 857 /* 858 * use link-sharing criteria 859 * get the class with the minimum vt in the hierarchy 860 */ 861 cl = NULL; 862 tcl = hif->hif_rootclass; 863 864 while (tcl != NULL && tcl->cl_children != NULL) { 865 tcl = hfsc_actlist_firstfit(tcl, cur_time); 866 if (tcl == NULL) 867 continue; 868 869 /* 870 * update parent's cl_cvtmin. 871 * don't update if the new vt is smaller. 872 */ 873 if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt) 874 tcl->cl_parent->cl_cvtmin = tcl->cl_vt; 875 876 cl = tcl; 877 } 878 /* XXX HRTIMER plan hfsc_deferred precisely here. */ 879 if (cl == NULL) 880 return (NULL); 881 } 882 883 m = hfsc_class_deq_begin(cl, &free_ml); 884 ifq_mfreeml(ifq, &free_ml); 885 if (m == NULL) { 886 hfsc_update_sc(hif, cl, 0); 887 return (NULL); 888 } 889 890 hif->hif_microtime = cur_time; 891 *cookiep = cl; 892 return (m); 893 } 894 895 void 896 hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 897 { 898 struct hfsc_if *hif = ifq->ifq_q; 899 struct hfsc_class *cl = cookie; 900 901 hfsc_class_deq_commit(cl, m); 902 hfsc_update_sc(hif, cl, m->m_pkthdr.len); 903 904 PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len); 905 } 906 907 void 908 hfsc_update_sc(struct hfsc_if *hif, struct hfsc_class *cl, int len) 909 { 910 int next_len, realtime = 0; 911 u_int64_t cur_time = hif->hif_microtime; 912 913 /* check if the class was scheduled by real-time criteria */ 914 if (cl->cl_rsc != NULL) 915 realtime = (cl->cl_e <= cur_time); 916 917 hfsc_update_vf(cl, len, cur_time); 918 if (realtime) 919 cl->cl_cumul += len; 920 921 if (hfsc_class_qlength(cl) > 0) { 922 /* 923 * Realtime queue needs to look into the future and make 924 * calculations based on that. This is the reason it can't 925 * be used with an external queue manager. 926 */ 927 if (cl->cl_rsc != NULL) { 928 struct mbuf *m0; 929 930 /* update ed */ 931 KASSERT(cl->cl_qops == pfq_hfsc_ops); 932 m0 = MBUF_LIST_FIRST(&cl->cl_q.q); 933 next_len = m0->m_pkthdr.len; 934 935 if (realtime) 936 hfsc_update_ed(hif, cl, next_len); 937 else 938 hfsc_update_d(cl, next_len); 939 } 940 } else { 941 /* the class becomes passive */ 942 hfsc_set_passive(hif, cl); 943 } 944 } 945 946 void 947 hfsc_deferred(void *arg) 948 { 949 struct ifnet *ifp = arg; 950 struct ifqueue *ifq = &ifp->if_snd; 951 struct hfsc_if *hif; 952 953 if (!HFSC_ENABLED(ifq)) 954 return; 955 956 if (!ifq_empty(ifq)) 957 ifq_start(ifq); 958 959 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops); 960 if (hif == NULL) 961 return; 962 /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */ 963 timeout_add(&hif->hif_defer, 1); 964 ifq_q_leave(&ifp->if_snd, hif); 965 } 966 967 void 968 hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml) 969 { 970 struct mbuf_list ml2 = MBUF_LIST_INITIALIZER(); 971 972 hfsc_class_purge(cl, &ml2); 973 if (ml_empty(&ml2)) 974 return; 975 976 ml_enlist(ml, &ml2); 977 978 hfsc_update_vf(cl, 0, 0); /* remove cl from the actlist */ 979 hfsc_set_passive(hif, cl); 980 } 981 982 void 983 hfsc_set_active(struct hfsc_if *hif, struct hfsc_class *cl, int len) 984 { 985 if (cl->cl_rsc != NULL) 986 hfsc_init_ed(hif, cl, len); 987 if (cl->cl_fsc != NULL) 988 hfsc_init_vf(cl, len); 989 990 cl->cl_stats.period++; 991 } 992 993 void 994 hfsc_set_passive(struct hfsc_if *hif, struct hfsc_class *cl) 995 { 996 if (cl->cl_rsc != NULL) 997 hfsc_ellist_remove(hif, cl); 998 999 /* 1000 * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0, 1001 * 0) needs to be called explicitly to remove a class from actlist 1002 */ 1003 } 1004 1005 void 1006 hfsc_init_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len) 1007 { 1008 u_int64_t cur_time; 1009 1010 cur_time = hfsc_microuptime(); 1011 1012 /* update the deadline curve */ 1013 hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); 1014 1015 /* 1016 * update the eligible curve. 1017 * for concave, it is equal to the deadline curve. 1018 * for convex, it is a linear curve with slope m2. 1019 */ 1020 cl->cl_eligible = cl->cl_deadline; 1021 if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { 1022 cl->cl_eligible.dx = 0; 1023 cl->cl_eligible.dy = 0; 1024 } 1025 1026 /* compute e and d */ 1027 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 1028 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1029 1030 hfsc_ellist_insert(hif, cl); 1031 } 1032 1033 void 1034 hfsc_update_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len) 1035 { 1036 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 1037 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1038 1039 hfsc_ellist_update(hif, cl); 1040 } 1041 1042 void 1043 hfsc_update_d(struct hfsc_class *cl, int next_len) 1044 { 1045 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1046 } 1047 1048 void 1049 hfsc_init_vf(struct hfsc_class *cl, int len) 1050 { 1051 struct hfsc_class *max_cl, *p; 1052 u_int64_t vt, f, cur_time; 1053 int go_active; 1054 1055 cur_time = 0; 1056 go_active = 1; 1057 for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { 1058 if (go_active && cl->cl_nactive++ == 0) 1059 go_active = 1; 1060 else 1061 go_active = 0; 1062 1063 if (go_active) { 1064 max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, 1065 hfsc_active); 1066 if (max_cl != NULL) { 1067 /* 1068 * set vt to the average of the min and max 1069 * classes. if the parent's period didn't 1070 * change, don't decrease vt of the class. 1071 */ 1072 vt = max_cl->cl_vt; 1073 if (cl->cl_parent->cl_cvtmin != 0) 1074 vt = (cl->cl_parent->cl_cvtmin + vt)/2; 1075 1076 if (cl->cl_parent->cl_vtperiod != 1077 cl->cl_parentperiod || vt > cl->cl_vt) 1078 cl->cl_vt = vt; 1079 } else { 1080 /* 1081 * first child for a new parent backlog period. 1082 * add parent's cvtmax to vtoff of children 1083 * to make a new vt (vtoff + vt) larger than 1084 * the vt in the last period for all children. 1085 */ 1086 vt = cl->cl_parent->cl_cvtmax; 1087 for (p = cl->cl_parent->cl_children; p != NULL; 1088 p = p->cl_siblings) 1089 p->cl_vtoff += vt; 1090 cl->cl_vt = 0; 1091 cl->cl_parent->cl_cvtmax = 0; 1092 cl->cl_parent->cl_cvtmin = 0; 1093 } 1094 cl->cl_initvt = cl->cl_vt; 1095 1096 /* update the virtual curve */ 1097 vt = cl->cl_vt + cl->cl_vtoff; 1098 hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, 1099 cl->cl_total); 1100 if (cl->cl_virtual.x == vt) { 1101 cl->cl_virtual.x -= cl->cl_vtoff; 1102 cl->cl_vtoff = 0; 1103 } 1104 cl->cl_vtadj = 0; 1105 1106 cl->cl_vtperiod++; /* increment vt period */ 1107 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; 1108 if (cl->cl_parent->cl_nactive == 0) 1109 cl->cl_parentperiod++; 1110 cl->cl_f = 0; 1111 1112 hfsc_actlist_insert(cl); 1113 1114 if (cl->cl_usc != NULL) { 1115 /* class has upper limit curve */ 1116 if (cur_time == 0) 1117 cur_time = hfsc_microuptime(); 1118 1119 /* update the ulimit curve */ 1120 hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, 1121 cl->cl_total); 1122 /* compute myf */ 1123 cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit, 1124 cl->cl_total); 1125 cl->cl_myfadj = 0; 1126 } 1127 } 1128 1129 if (cl->cl_myf > cl->cl_cfmin) 1130 f = cl->cl_myf; 1131 else 1132 f = cl->cl_cfmin; 1133 if (f != cl->cl_f) { 1134 cl->cl_f = f; 1135 hfsc_update_cfmin(cl->cl_parent); 1136 } 1137 } 1138 } 1139 1140 void 1141 hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) 1142 { 1143 u_int64_t f, myf_bound, delta; 1144 int go_passive = 0; 1145 1146 if (hfsc_class_qlength(cl) == 0) 1147 go_passive = 1; 1148 1149 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 1150 cl->cl_total += len; 1151 1152 if (cl->cl_fsc == NULL || cl->cl_nactive == 0) 1153 continue; 1154 1155 if (go_passive && --cl->cl_nactive == 0) 1156 go_passive = 1; 1157 else 1158 go_passive = 0; 1159 1160 if (go_passive) { 1161 /* no more active child, going passive */ 1162 1163 /* update cvtmax of the parent class */ 1164 if (cl->cl_vt > cl->cl_parent->cl_cvtmax) 1165 cl->cl_parent->cl_cvtmax = cl->cl_vt; 1166 1167 /* remove this class from the vt list */ 1168 hfsc_actlist_remove(cl); 1169 1170 hfsc_update_cfmin(cl->cl_parent); 1171 1172 continue; 1173 } 1174 1175 /* 1176 * update vt and f 1177 */ 1178 cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total) 1179 - cl->cl_vtoff + cl->cl_vtadj; 1180 1181 /* 1182 * if vt of the class is smaller than cvtmin, 1183 * the class was skipped in the past due to non-fit. 1184 * if so, we need to adjust vtadj. 1185 */ 1186 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { 1187 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; 1188 cl->cl_vt = cl->cl_parent->cl_cvtmin; 1189 } 1190 1191 /* update the vt list */ 1192 hfsc_actlist_update(cl); 1193 1194 if (cl->cl_usc != NULL) { 1195 cl->cl_myf = cl->cl_myfadj + 1196 hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total); 1197 1198 /* 1199 * if myf lags behind by more than one clock tick 1200 * from the current time, adjust myfadj to prevent 1201 * a rate-limited class from going greedy. 1202 * in a steady state under rate-limiting, myf 1203 * fluctuates within one clock tick. 1204 */ 1205 myf_bound = cur_time - HFSC_CLK_PER_TICK; 1206 if (cl->cl_myf < myf_bound) { 1207 delta = cur_time - cl->cl_myf; 1208 cl->cl_myfadj += delta; 1209 cl->cl_myf += delta; 1210 } 1211 } 1212 1213 /* cl_f is max(cl_myf, cl_cfmin) */ 1214 if (cl->cl_myf > cl->cl_cfmin) 1215 f = cl->cl_myf; 1216 else 1217 f = cl->cl_cfmin; 1218 if (f != cl->cl_f) { 1219 cl->cl_f = f; 1220 hfsc_update_cfmin(cl->cl_parent); 1221 } 1222 } 1223 } 1224 1225 void 1226 hfsc_update_cfmin(struct hfsc_class *cl) 1227 { 1228 struct hfsc_class *p; 1229 u_int64_t cfmin; 1230 1231 if (TAILQ_EMPTY(&cl->cl_actc)) { 1232 cl->cl_cfmin = 0; 1233 return; 1234 } 1235 cfmin = HFSC_HT_INFINITY; 1236 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { 1237 if (p->cl_f == 0) { 1238 cl->cl_cfmin = 0; 1239 return; 1240 } 1241 if (p->cl_f < cfmin) 1242 cfmin = p->cl_f; 1243 } 1244 cl->cl_cfmin = cfmin; 1245 } 1246 1247 /* 1248 * eligible list holds backlogged classes being sorted by their eligible times. 1249 * there is one eligible list per interface. 1250 */ 1251 void 1252 hfsc_ellist_insert(struct hfsc_if *hif, struct hfsc_class *cl) 1253 { 1254 struct hfsc_class *p; 1255 1256 /* check the last entry first */ 1257 if ((p = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible)) == NULL || 1258 p->cl_e <= cl->cl_e) { 1259 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); 1260 return; 1261 } 1262 1263 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { 1264 if (cl->cl_e < p->cl_e) { 1265 TAILQ_INSERT_BEFORE(p, cl, cl_ellist); 1266 return; 1267 } 1268 } 1269 } 1270 1271 void 1272 hfsc_ellist_remove(struct hfsc_if *hif, struct hfsc_class *cl) 1273 { 1274 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1275 } 1276 1277 void 1278 hfsc_ellist_update(struct hfsc_if *hif, struct hfsc_class *cl) 1279 { 1280 struct hfsc_class *p, *last; 1281 1282 /* 1283 * the eligible time of a class increases monotonically. 1284 * if the next entry has a larger eligible time, nothing to do. 1285 */ 1286 p = TAILQ_NEXT(cl, cl_ellist); 1287 if (p == NULL || cl->cl_e <= p->cl_e) 1288 return; 1289 1290 /* check the last entry */ 1291 last = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible); 1292 if (last->cl_e <= cl->cl_e) { 1293 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1294 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); 1295 return; 1296 } 1297 1298 /* 1299 * the new position must be between the next entry 1300 * and the last entry 1301 */ 1302 while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { 1303 if (cl->cl_e < p->cl_e) { 1304 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1305 TAILQ_INSERT_BEFORE(p, cl, cl_ellist); 1306 return; 1307 } 1308 } 1309 } 1310 1311 /* find the class with the minimum deadline among the eligible classes */ 1312 struct hfsc_class * 1313 hfsc_ellist_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) 1314 { 1315 struct hfsc_class *p, *cl = NULL; 1316 1317 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { 1318 if (p->cl_e > cur_time) 1319 break; 1320 if (cl == NULL || p->cl_d < cl->cl_d) 1321 cl = p; 1322 } 1323 return (cl); 1324 } 1325 1326 /* 1327 * active children list holds backlogged child classes being sorted 1328 * by their virtual time. 1329 * each intermediate class has one active children list. 1330 */ 1331 void 1332 hfsc_actlist_insert(struct hfsc_class *cl) 1333 { 1334 struct hfsc_class *p; 1335 1336 /* check the last entry first */ 1337 if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active)) == NULL 1338 || p->cl_vt <= cl->cl_vt) { 1339 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); 1340 return; 1341 } 1342 1343 TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { 1344 if (cl->cl_vt < p->cl_vt) { 1345 TAILQ_INSERT_BEFORE(p, cl, cl_actlist); 1346 return; 1347 } 1348 } 1349 } 1350 1351 void 1352 hfsc_actlist_remove(struct hfsc_class *cl) 1353 { 1354 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1355 } 1356 1357 void 1358 hfsc_actlist_update(struct hfsc_class *cl) 1359 { 1360 struct hfsc_class *p, *last; 1361 1362 /* 1363 * the virtual time of a class increases monotonically during its 1364 * backlogged period. 1365 * if the next entry has a larger virtual time, nothing to do. 1366 */ 1367 p = TAILQ_NEXT(cl, cl_actlist); 1368 if (p == NULL || cl->cl_vt < p->cl_vt) 1369 return; 1370 1371 /* check the last entry */ 1372 last = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active); 1373 if (last->cl_vt <= cl->cl_vt) { 1374 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1375 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); 1376 return; 1377 } 1378 1379 /* 1380 * the new position must be between the next entry 1381 * and the last entry 1382 */ 1383 while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { 1384 if (cl->cl_vt < p->cl_vt) { 1385 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1386 TAILQ_INSERT_BEFORE(p, cl, cl_actlist); 1387 return; 1388 } 1389 } 1390 } 1391 1392 struct hfsc_class * 1393 hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) 1394 { 1395 struct hfsc_class *p; 1396 1397 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) 1398 if (p->cl_f <= cur_time) 1399 return (p); 1400 1401 return (NULL); 1402 } 1403 1404 /* 1405 * service curve support functions 1406 * 1407 * external service curve parameters 1408 * m: bits/sec 1409 * d: msec 1410 * internal service curve parameters 1411 * sm: (bytes/tsc_interval) << SM_SHIFT 1412 * ism: (tsc_count/byte) << ISM_SHIFT 1413 * dx: tsc_count 1414 * 1415 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. 1416 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU 1417 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective 1418 * digits in decimal using the following table. 1419 * 1420 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 1421 * ----------+------------------------------------------------------- 1422 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 1423 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 1424 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 1425 * 1426 * nsec/byte 80000 8000 800 80 8 1427 * ism(500MHz) 40000 4000 400 40 4 1428 * ism(200MHz) 16000 1600 160 16 1.6 1429 */ 1430 #define SM_SHIFT 24 1431 #define ISM_SHIFT 10 1432 1433 #define SM_MASK ((1LL << SM_SHIFT) - 1) 1434 #define ISM_MASK ((1LL << ISM_SHIFT) - 1) 1435 1436 static __inline u_int64_t 1437 seg_x2y(u_int64_t x, u_int64_t sm) 1438 { 1439 u_int64_t y; 1440 1441 /* 1442 * compute 1443 * y = x * sm >> SM_SHIFT 1444 * but divide it for the upper and lower bits to avoid overflow 1445 */ 1446 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); 1447 return (y); 1448 } 1449 1450 static __inline u_int64_t 1451 seg_y2x(u_int64_t y, u_int64_t ism) 1452 { 1453 u_int64_t x; 1454 1455 if (y == 0) 1456 x = 0; 1457 else if (ism == HFSC_HT_INFINITY) 1458 x = HFSC_HT_INFINITY; 1459 else { 1460 x = (y >> ISM_SHIFT) * ism 1461 + (((y & ISM_MASK) * ism) >> ISM_SHIFT); 1462 } 1463 return (x); 1464 } 1465 1466 static __inline u_int64_t 1467 m2sm(u_int m) 1468 { 1469 u_int64_t sm; 1470 1471 sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ; 1472 return (sm); 1473 } 1474 1475 static __inline u_int64_t 1476 m2ism(u_int m) 1477 { 1478 u_int64_t ism; 1479 1480 if (m == 0) 1481 ism = HFSC_HT_INFINITY; 1482 else 1483 ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m; 1484 return (ism); 1485 } 1486 1487 static __inline u_int64_t 1488 d2dx(u_int d) 1489 { 1490 u_int64_t dx; 1491 1492 dx = ((u_int64_t)d * HFSC_FREQ) / 1000; 1493 return (dx); 1494 } 1495 1496 static __inline u_int 1497 sm2m(u_int64_t sm) 1498 { 1499 u_int64_t m; 1500 1501 m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT; 1502 return ((u_int)m); 1503 } 1504 1505 static __inline u_int 1506 dx2d(u_int64_t dx) 1507 { 1508 u_int64_t d; 1509 1510 d = dx * 1000 / HFSC_FREQ; 1511 return ((u_int)d); 1512 } 1513 1514 void 1515 hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc) 1516 { 1517 isc->sm1 = m2sm(sc->m1); 1518 isc->ism1 = m2ism(sc->m1); 1519 isc->dx = d2dx(sc->d); 1520 isc->dy = seg_x2y(isc->dx, isc->sm1); 1521 isc->sm2 = m2sm(sc->m2); 1522 isc->ism2 = m2ism(sc->m2); 1523 } 1524 1525 /* 1526 * initialize the runtime service curve with the given internal 1527 * service curve starting at (x, y). 1528 */ 1529 void 1530 hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc, 1531 u_int64_t x, u_int64_t y) 1532 { 1533 rtsc->x = x; 1534 rtsc->y = y; 1535 rtsc->sm1 = isc->sm1; 1536 rtsc->ism1 = isc->ism1; 1537 rtsc->dx = isc->dx; 1538 rtsc->dy = isc->dy; 1539 rtsc->sm2 = isc->sm2; 1540 rtsc->ism2 = isc->ism2; 1541 } 1542 1543 /* 1544 * calculate the y-projection of the runtime service curve by the 1545 * given x-projection value 1546 */ 1547 u_int64_t 1548 hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y) 1549 { 1550 u_int64_t x; 1551 1552 if (y < rtsc->y) 1553 x = rtsc->x; 1554 else if (y <= rtsc->y + rtsc->dy) { 1555 /* x belongs to the 1st segment */ 1556 if (rtsc->dy == 0) 1557 x = rtsc->x + rtsc->dx; 1558 else 1559 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); 1560 } else { 1561 /* x belongs to the 2nd segment */ 1562 x = rtsc->x + rtsc->dx 1563 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); 1564 } 1565 return (x); 1566 } 1567 1568 u_int64_t 1569 hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x) 1570 { 1571 u_int64_t y; 1572 1573 if (x <= rtsc->x) 1574 y = rtsc->y; 1575 else if (x <= rtsc->x + rtsc->dx) 1576 /* y belongs to the 1st segment */ 1577 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); 1578 else 1579 /* y belongs to the 2nd segment */ 1580 y = rtsc->y + rtsc->dy 1581 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); 1582 return (y); 1583 } 1584 1585 /* 1586 * update the runtime service curve by taking the minimum of the current 1587 * runtime service curve and the service curve starting at (x, y). 1588 */ 1589 void 1590 hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc, 1591 u_int64_t x, u_int64_t y) 1592 { 1593 u_int64_t y1, y2, dx, dy; 1594 1595 if (isc->sm1 <= isc->sm2) { 1596 /* service curve is convex */ 1597 y1 = hfsc_rtsc_x2y(rtsc, x); 1598 if (y1 < y) 1599 /* the current rtsc is smaller */ 1600 return; 1601 rtsc->x = x; 1602 rtsc->y = y; 1603 return; 1604 } 1605 1606 /* 1607 * service curve is concave 1608 * compute the two y values of the current rtsc 1609 * y1: at x 1610 * y2: at (x + dx) 1611 */ 1612 y1 = hfsc_rtsc_x2y(rtsc, x); 1613 if (y1 <= y) { 1614 /* rtsc is below isc, no change to rtsc */ 1615 return; 1616 } 1617 1618 y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx); 1619 if (y2 >= y + isc->dy) { 1620 /* rtsc is above isc, replace rtsc by isc */ 1621 rtsc->x = x; 1622 rtsc->y = y; 1623 rtsc->dx = isc->dx; 1624 rtsc->dy = isc->dy; 1625 return; 1626 } 1627 1628 /* 1629 * the two curves intersect 1630 * compute the offsets (dx, dy) using the reverse 1631 * function of seg_x2y() 1632 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) 1633 */ 1634 dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); 1635 /* 1636 * check if (x, y1) belongs to the 1st segment of rtsc. 1637 * if so, add the offset. 1638 */ 1639 if (rtsc->x + rtsc->dx > x) 1640 dx += rtsc->x + rtsc->dx - x; 1641 dy = seg_x2y(dx, isc->sm1); 1642 1643 rtsc->x = x; 1644 rtsc->y = y; 1645 rtsc->dx = dx; 1646 rtsc->dy = dy; 1647 return; 1648 } 1649 1650 void 1651 hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl) 1652 { 1653 sp->class_id = cl->cl_id; 1654 sp->class_handle = cl->cl_handle; 1655 1656 if (cl->cl_rsc != NULL) { 1657 sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); 1658 sp->rsc.d = dx2d(cl->cl_rsc->dx); 1659 sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); 1660 } else { 1661 sp->rsc.m1 = 0; 1662 sp->rsc.d = 0; 1663 sp->rsc.m2 = 0; 1664 } 1665 if (cl->cl_fsc != NULL) { 1666 sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); 1667 sp->fsc.d = dx2d(cl->cl_fsc->dx); 1668 sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); 1669 } else { 1670 sp->fsc.m1 = 0; 1671 sp->fsc.d = 0; 1672 sp->fsc.m2 = 0; 1673 } 1674 if (cl->cl_usc != NULL) { 1675 sp->usc.m1 = sm2m(cl->cl_usc->sm1); 1676 sp->usc.d = dx2d(cl->cl_usc->dx); 1677 sp->usc.m2 = sm2m(cl->cl_usc->sm2); 1678 } else { 1679 sp->usc.m1 = 0; 1680 sp->usc.d = 0; 1681 sp->usc.m2 = 0; 1682 } 1683 1684 sp->total = cl->cl_total; 1685 sp->cumul = cl->cl_cumul; 1686 1687 sp->d = cl->cl_d; 1688 sp->e = cl->cl_e; 1689 sp->vt = cl->cl_vt; 1690 sp->f = cl->cl_f; 1691 1692 sp->initvt = cl->cl_initvt; 1693 sp->vtperiod = cl->cl_vtperiod; 1694 sp->parentperiod = cl->cl_parentperiod; 1695 sp->nactive = cl->cl_nactive; 1696 sp->vtoff = cl->cl_vtoff; 1697 sp->cvtmax = cl->cl_cvtmax; 1698 sp->myf = cl->cl_myf; 1699 sp->cfmin = cl->cl_cfmin; 1700 sp->cvtmin = cl->cl_cvtmin; 1701 sp->myfadj = cl->cl_myfadj; 1702 sp->vtadj = cl->cl_vtadj; 1703 1704 sp->cur_time = hfsc_microuptime(); 1705 sp->machclk_freq = HFSC_FREQ; 1706 1707 sp->qlength = hfsc_class_qlength(cl); 1708 sp->qlimit = cl->cl_q.qlimit; 1709 sp->xmit_cnt = cl->cl_stats.xmit_cnt; 1710 sp->drop_cnt = cl->cl_stats.drop_cnt; 1711 sp->period = cl->cl_stats.period; 1712 1713 sp->qtype = 0; 1714 } 1715 1716 /* convert a class handle to the corresponding class pointer */ 1717 struct hfsc_class * 1718 hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle) 1719 { 1720 int i; 1721 struct hfsc_class *cl; 1722 1723 if (chandle == 0) 1724 return (NULL); 1725 /* 1726 * first, try the slot corresponding to the lower bits of the handle. 1727 * if it does not match, do the linear table search. 1728 */ 1729 i = chandle % hif->hif_allocated; 1730 if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) 1731 return (cl); 1732 for (i = 0; i < hif->hif_allocated; i++) 1733 if ((cl = hif->hif_class_tbl[i]) != NULL && 1734 cl->cl_handle == chandle) 1735 return (cl); 1736 return (NULL); 1737 } 1738