1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cred.h> 33 #include <sys/proc.h> 34 #include <sys/session.h> 35 #include <sys/strsubr.h> 36 #include <sys/user.h> 37 #include <sys/priocntl.h> 38 #include <sys/class.h> 39 #include <sys/disp.h> 40 #include <sys/procset.h> 41 #include <sys/debug.h> 42 #include <sys/kmem.h> 43 #include <sys/errno.h> 44 #include <sys/fx.h> 45 #include <sys/fxpriocntl.h> 46 #include <sys/cpuvar.h> 47 #include <sys/systm.h> 48 #include <sys/vtrace.h> 49 #include <sys/schedctl.h> 50 #include <sys/tnf_probe.h> 51 #include <sys/sunddi.h> 52 #include <sys/spl.h> 53 #include <sys/modctl.h> 54 #include <sys/policy.h> 55 #include <sys/sdt.h> 56 #include <sys/cpupart.h> 57 #include <sys/cpucaps.h> 58 59 static pri_t fx_init(id_t, int, classfuncs_t **); 60 61 static struct sclass csw = { 62 "FX", 63 fx_init, 64 0 65 }; 66 67 static struct modlsched modlsched = { 68 &mod_schedops, "Fixed priority sched class", &csw 69 }; 70 71 static struct modlinkage modlinkage = { 72 MODREV_1, (void *)&modlsched, NULL 73 }; 74 75 76 /* 77 * control flags (kparms->fx_cflags). 78 */ 79 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 80 #define FX_DOUPRI 0x02 /* change user priority */ 81 #define FX_DOTQ 0x04 /* change FX time quantum */ 82 83 84 #define FXMAXUPRI 60 /* maximum user priority setting */ 85 86 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 87 88 /* 89 * The fxproc_t structures that have a registered callback vector, 90 * are also kept in an array of circular doubly linked lists. A hash on 91 * the thread id (from ddi_get_kt_did()) is used to determine which list 92 * each of such fxproc structures should be placed. Each list has a dummy 93 * "head" which is never removed, so the list is never empty. 94 */ 95 96 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 97 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 98 99 /* Insert fxproc into callback list */ 100 #define FX_CB_LIST_INSERT(fxpp) \ 101 { \ 102 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 103 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 104 fxproc_t *headp = &fx_cb_plisthead[index]; \ 105 mutex_enter(lockp); \ 106 fxpp->fx_cb_next = headp->fx_cb_next; \ 107 fxpp->fx_cb_prev = headp; \ 108 headp->fx_cb_next->fx_cb_prev = fxpp; \ 109 headp->fx_cb_next = fxpp; \ 110 mutex_exit(lockp); \ 111 } 112 113 /* 114 * Remove thread from callback list. 115 */ 116 #define FX_CB_LIST_DELETE(fxpp) \ 117 { \ 118 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 119 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 120 mutex_enter(lockp); \ 121 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 122 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 123 mutex_exit(lockp); \ 124 } 125 126 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 127 128 /* adjust x to be between 0 and fx_maxumdpri */ 129 130 #define FX_ADJUST_PRI(pri) \ 131 { \ 132 if (pri < 0) \ 133 pri = 0; \ 134 else if (pri > fx_maxumdpri) \ 135 pri = fx_maxumdpri; \ 136 } 137 138 #define FX_ADJUST_QUANTUM(q) \ 139 { \ 140 if (q > INT_MAX) \ 141 q = INT_MAX; \ 142 else if (q <= 0) \ 143 q = FX_TQINF; \ 144 } 145 146 #define FX_ISVALID(pri, quantum) \ 147 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 148 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 149 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 150 151 152 static id_t fx_cid; /* fixed priority class ID */ 153 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 154 155 static pri_t fx_maxupri = FXMAXUPRI; 156 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 157 158 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 159 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 160 161 162 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 163 /* that have callbacks */ 164 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 165 /* list of fxprocs with */ 166 /* callbacks */ 167 168 static int fx_admin(caddr_t, cred_t *); 169 static int fx_getclinfo(void *); 170 static int fx_parmsin(void *); 171 static int fx_parmsout(void *, pc_vaparms_t *); 172 static int fx_vaparmsin(void *, pc_vaparms_t *); 173 static int fx_vaparmsout(void *, pc_vaparms_t *); 174 static int fx_getclpri(pcpri_t *); 175 static int fx_alloc(void **, int); 176 static void fx_free(void *); 177 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 178 static void fx_exitclass(void *); 179 static int fx_canexit(kthread_t *, cred_t *); 180 static int fx_fork(kthread_t *, kthread_t *, void *); 181 static void fx_forkret(kthread_t *, kthread_t *); 182 static void fx_parmsget(kthread_t *, void *); 183 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 184 static void fx_stop(kthread_t *, int, int); 185 static void fx_exit(kthread_t *); 186 static pri_t fx_swapin(kthread_t *, int); 187 static pri_t fx_swapout(kthread_t *, int); 188 static void fx_trapret(kthread_t *); 189 static void fx_preempt(kthread_t *); 190 static void fx_setrun(kthread_t *); 191 static void fx_sleep(kthread_t *); 192 static void fx_tick(kthread_t *); 193 static void fx_wakeup(kthread_t *); 194 static int fx_donice(kthread_t *, cred_t *, int, int *); 195 static int fx_doprio(kthread_t *, cred_t *, int, int *); 196 static pri_t fx_globpri(kthread_t *); 197 static void fx_yield(kthread_t *); 198 static void fx_nullsys(); 199 200 extern fxdpent_t *fx_getdptbl(void); 201 202 static void fx_change_priority(kthread_t *, fxproc_t *); 203 static fxproc_t *fx_list_lookup(kt_did_t); 204 static void fx_list_release(fxproc_t *); 205 206 207 static struct classfuncs fx_classfuncs = { 208 /* class functions */ 209 fx_admin, 210 fx_getclinfo, 211 fx_parmsin, 212 fx_parmsout, 213 fx_vaparmsin, 214 fx_vaparmsout, 215 fx_getclpri, 216 fx_alloc, 217 fx_free, 218 219 /* thread functions */ 220 fx_enterclass, 221 fx_exitclass, 222 fx_canexit, 223 fx_fork, 224 fx_forkret, 225 fx_parmsget, 226 fx_parmsset, 227 fx_stop, 228 fx_exit, 229 fx_nullsys, /* active */ 230 fx_nullsys, /* inactive */ 231 fx_swapin, 232 fx_swapout, 233 fx_trapret, 234 fx_preempt, 235 fx_setrun, 236 fx_sleep, 237 fx_tick, 238 fx_wakeup, 239 fx_donice, 240 fx_globpri, 241 fx_nullsys, /* set_process_group */ 242 fx_yield, 243 fx_doprio, 244 }; 245 246 247 int 248 _init() 249 { 250 return (mod_install(&modlinkage)); 251 } 252 253 int 254 _fini() 255 { 256 return (EBUSY); 257 } 258 259 int 260 _info(struct modinfo *modinfop) 261 { 262 return (mod_info(&modlinkage, modinfop)); 263 } 264 265 /* 266 * Fixed priority class initialization. Called by dispinit() at boot time. 267 * We can ignore the clparmsz argument since we know that the smallest 268 * possible parameter buffer is big enough for us. 269 */ 270 /* ARGSUSED */ 271 static pri_t 272 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 273 { 274 int i; 275 extern pri_t fx_getmaxumdpri(void); 276 277 fx_dptbl = fx_getdptbl(); 278 fx_maxumdpri = fx_getmaxumdpri(); 279 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 280 281 fx_cid = cid; /* Record our class ID */ 282 283 /* 284 * Initialize the hash table for fxprocs with callbacks 285 */ 286 for (i = 0; i < FX_CB_LISTS; i++) { 287 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 288 &fx_cb_plisthead[i]; 289 } 290 291 /* 292 * We're required to return a pointer to our classfuncs 293 * structure and the highest global priority value we use. 294 */ 295 *clfuncspp = &fx_classfuncs; 296 return (fx_maxglobpri); 297 } 298 299 /* 300 * Get or reset the fx_dptbl values per the user's request. 301 */ 302 static int 303 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 304 { 305 fxadmin_t fxadmin; 306 fxdpent_t *tmpdpp; 307 int userdpsz; 308 int i; 309 size_t fxdpsz; 310 311 if (get_udatamodel() == DATAMODEL_NATIVE) { 312 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 313 return (EFAULT); 314 } 315 #ifdef _SYSCALL32_IMPL 316 else { 317 /* get fxadmin struct from ILP32 caller */ 318 fxadmin32_t fxadmin32; 319 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 320 return (EFAULT); 321 fxadmin.fx_dpents = 322 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 323 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 324 fxadmin.fx_cmd = fxadmin32.fx_cmd; 325 } 326 #endif /* _SYSCALL32_IMPL */ 327 328 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 329 330 switch (fxadmin.fx_cmd) { 331 case FX_GETDPSIZE: 332 fxadmin.fx_ndpents = fx_maxumdpri + 1; 333 334 if (get_udatamodel() == DATAMODEL_NATIVE) { 335 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 336 return (EFAULT); 337 } 338 #ifdef _SYSCALL32_IMPL 339 else { 340 /* return fxadmin struct to ILP32 caller */ 341 fxadmin32_t fxadmin32; 342 fxadmin32.fx_dpents = 343 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 344 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 345 fxadmin32.fx_cmd = fxadmin.fx_cmd; 346 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 347 return (EFAULT); 348 } 349 #endif /* _SYSCALL32_IMPL */ 350 break; 351 352 case FX_GETDPTBL: 353 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 354 fxdpsz); 355 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 356 return (EFAULT); 357 358 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 359 360 if (get_udatamodel() == DATAMODEL_NATIVE) { 361 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 362 return (EFAULT); 363 } 364 #ifdef _SYSCALL32_IMPL 365 else { 366 /* return fxadmin struct to ILP32 callers */ 367 fxadmin32_t fxadmin32; 368 fxadmin32.fx_dpents = 369 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 370 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 371 fxadmin32.fx_cmd = fxadmin.fx_cmd; 372 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 373 return (EFAULT); 374 } 375 #endif /* _SYSCALL32_IMPL */ 376 break; 377 378 case FX_SETDPTBL: 379 /* 380 * We require that the requesting process has sufficient 381 * privileges. We also require that the table supplied by 382 * the user exactly match the current fx_dptbl in size. 383 */ 384 if (secpolicy_dispadm(reqpcredp) != 0) { 385 return (EPERM); 386 } 387 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 388 return (EINVAL); 389 } 390 391 /* 392 * We read the user supplied table into a temporary buffer 393 * where it is validated before being copied over the 394 * fx_dptbl. 395 */ 396 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 397 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 398 kmem_free(tmpdpp, fxdpsz); 399 return (EFAULT); 400 } 401 for (i = 0; i < fxadmin.fx_ndpents; i++) { 402 403 /* 404 * Validate the user supplied values. All we are doing 405 * here is verifying that the values are within their 406 * allowable ranges and will not panic the system. We 407 * make no attempt to ensure that the resulting 408 * configuration makes sense or results in reasonable 409 * performance. 410 */ 411 if (tmpdpp[i].fx_quantum <= 0 && 412 tmpdpp[i].fx_quantum != FX_TQINF) { 413 kmem_free(tmpdpp, fxdpsz); 414 return (EINVAL); 415 } 416 } 417 418 /* 419 * Copy the user supplied values over the current fx_dptbl 420 * values. The fx_globpri member is read-only so we don't 421 * overwrite it. 422 */ 423 mutex_enter(&fx_dptblock); 424 for (i = 0; i < fxadmin.fx_ndpents; i++) { 425 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 426 } 427 mutex_exit(&fx_dptblock); 428 kmem_free(tmpdpp, fxdpsz); 429 break; 430 431 default: 432 return (EINVAL); 433 } 434 return (0); 435 } 436 437 /* 438 * Allocate a fixed priority class specific thread structure and 439 * initialize it with the parameters supplied. Also move the thread 440 * to specified priority. 441 */ 442 static int 443 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 444 void *bufp) 445 { 446 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 447 fxproc_t *fxpp; 448 pri_t reqfxupri; 449 pri_t reqfxuprilim; 450 451 fxpp = (fxproc_t *)bufp; 452 ASSERT(fxpp != NULL); 453 454 /* 455 * Initialize the fxproc structure. 456 */ 457 fxpp->fx_flags = 0; 458 fxpp->fx_callback = NULL; 459 fxpp->fx_cookie = NULL; 460 461 if (fxkparmsp == NULL) { 462 /* 463 * Use default values. 464 */ 465 fxpp->fx_pri = fxpp->fx_uprilim = 0; 466 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 467 fxpp->fx_nice = NZERO; 468 } else { 469 /* 470 * Use supplied values. 471 */ 472 473 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 474 reqfxuprilim = 0; 475 } else { 476 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 477 secpolicy_setpriority(reqpcredp) != 0) 478 return (EPERM); 479 reqfxuprilim = fxkparmsp->fx_uprilim; 480 FX_ADJUST_PRI(reqfxuprilim); 481 } 482 483 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 484 reqfxupri = reqfxuprilim; 485 } else { 486 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 487 secpolicy_setpriority(reqpcredp) != 0) 488 return (EPERM); 489 /* 490 * Set the user priority to the requested value 491 * or the upri limit, whichever is lower. 492 */ 493 reqfxupri = fxkparmsp->fx_upri; 494 FX_ADJUST_PRI(reqfxupri); 495 496 if (reqfxupri > reqfxuprilim) 497 reqfxupri = reqfxuprilim; 498 } 499 500 501 fxpp->fx_uprilim = reqfxuprilim; 502 fxpp->fx_pri = reqfxupri; 503 504 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri; 505 506 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 507 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 508 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 509 } else { 510 if (secpolicy_setpriority(reqpcredp) != 0) 511 return (EPERM); 512 513 if (fxkparmsp->fx_tqntm == FX_TQINF) 514 fxpp->fx_pquantum = FX_TQINF; 515 else { 516 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 517 } 518 } 519 520 } 521 522 fxpp->fx_timeleft = fxpp->fx_pquantum; 523 cpucaps_sc_init(&fxpp->fx_caps); 524 fxpp->fx_tp = t; 525 526 thread_lock(t); /* get dispatcher lock on thread */ 527 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 528 t->t_cid = cid; 529 t->t_cldata = (void *)fxpp; 530 t->t_schedflag &= ~TS_RUNQMATCH; 531 fx_change_priority(t, fxpp); 532 thread_unlock(t); 533 534 return (0); 535 } 536 537 /* 538 * The thread is exiting. 539 */ 540 static void 541 fx_exit(kthread_t *t) 542 { 543 fxproc_t *fxpp; 544 545 thread_lock(t); 546 fxpp = (fxproc_t *)(t->t_cldata); 547 548 /* 549 * A thread could be exiting in between clock ticks, so we need to 550 * calculate how much CPU time it used since it was charged last time. 551 * 552 * CPU caps are not enforced on exiting processes - it is usually 553 * desirable to exit as soon as possible to free resources. 554 */ 555 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY); 556 557 if (FX_HAS_CB(fxpp)) { 558 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 559 fxpp->fx_callback = NULL; 560 fxpp->fx_cookie = NULL; 561 thread_unlock(t); 562 FX_CB_LIST_DELETE(fxpp); 563 return; 564 } 565 566 thread_unlock(t); 567 } 568 569 /* 570 * Exiting the class. Free fxproc structure of thread. 571 */ 572 static void 573 fx_exitclass(void *procp) 574 { 575 fxproc_t *fxpp = (fxproc_t *)procp; 576 577 thread_lock(fxpp->fx_tp); 578 if (FX_HAS_CB(fxpp)) { 579 580 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 581 582 fxpp->fx_callback = NULL; 583 fxpp->fx_cookie = NULL; 584 thread_unlock(fxpp->fx_tp); 585 FX_CB_LIST_DELETE(fxpp); 586 } else 587 thread_unlock(fxpp->fx_tp); 588 589 kmem_free(fxpp, sizeof (fxproc_t)); 590 } 591 592 /* ARGSUSED */ 593 static int 594 fx_canexit(kthread_t *t, cred_t *cred) 595 { 596 /* 597 * A thread can always leave the FX class 598 */ 599 return (0); 600 } 601 602 /* 603 * Initialize fixed-priority class specific proc structure for a child. 604 * callbacks are not inherited upon fork. 605 */ 606 static int 607 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 608 { 609 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 610 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 611 612 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 613 614 cfxpp = (fxproc_t *)bufp; 615 ASSERT(cfxpp != NULL); 616 thread_lock(t); 617 pfxpp = (fxproc_t *)t->t_cldata; 618 /* 619 * Initialize child's fxproc structure. 620 */ 621 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 622 cfxpp->fx_pri = pfxpp->fx_pri; 623 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 624 cfxpp->fx_nice = pfxpp->fx_nice; 625 cfxpp->fx_callback = NULL; 626 cfxpp->fx_cookie = NULL; 627 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 628 cpucaps_sc_init(&cfxpp->fx_caps); 629 630 cfxpp->fx_tp = ct; 631 ct->t_cldata = (void *)cfxpp; 632 thread_unlock(t); 633 634 /* 635 * Link new structure into fxproc list. 636 */ 637 return (0); 638 } 639 640 641 /* 642 * Child is placed at back of dispatcher queue and parent gives 643 * up processor so that the child runs first after the fork. 644 * This allows the child immediately execing to break the multiple 645 * use of copy on write pages with no disk home. The parent will 646 * get to steal them back rather than uselessly copying them. 647 */ 648 static void 649 fx_forkret(kthread_t *t, kthread_t *ct) 650 { 651 proc_t *pp = ttoproc(t); 652 proc_t *cp = ttoproc(ct); 653 fxproc_t *fxpp; 654 655 ASSERT(t == curthread); 656 ASSERT(MUTEX_HELD(&pidlock)); 657 658 /* 659 * Grab the child's p_lock before dropping pidlock to ensure 660 * the process does not disappear before we set it running. 661 */ 662 mutex_enter(&cp->p_lock); 663 mutex_exit(&pidlock); 664 continuelwps(cp); 665 mutex_exit(&cp->p_lock); 666 667 mutex_enter(&pp->p_lock); 668 continuelwps(pp); 669 mutex_exit(&pp->p_lock); 670 671 thread_lock(t); 672 fxpp = (fxproc_t *)(t->t_cldata); 673 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 674 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 675 THREAD_TRANSITION(t); 676 fx_setrun(t); 677 thread_unlock(t); 678 679 swtch(); 680 } 681 682 683 /* 684 * Get information about the fixed-priority class into the buffer 685 * pointed to by fxinfop. The maximum configured user priority 686 * is the only information we supply. 687 */ 688 static int 689 fx_getclinfo(void *infop) 690 { 691 fxinfo_t *fxinfop = (fxinfo_t *)infop; 692 fxinfop->fx_maxupri = fx_maxupri; 693 return (0); 694 } 695 696 697 698 /* 699 * Return the user mode scheduling priority range. 700 */ 701 static int 702 fx_getclpri(pcpri_t *pcprip) 703 { 704 pcprip->pc_clpmax = fx_maxupri; 705 pcprip->pc_clpmin = 0; 706 return (0); 707 } 708 709 710 static void 711 fx_nullsys() 712 {} 713 714 715 /* 716 * Get the fixed-priority parameters of the thread pointed to by 717 * fxprocp into the buffer pointed to by fxparmsp. 718 */ 719 static void 720 fx_parmsget(kthread_t *t, void *parmsp) 721 { 722 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 723 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 724 725 fxkparmsp->fx_upri = fxpp->fx_pri; 726 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 727 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 728 } 729 730 731 732 /* 733 * Check the validity of the fixed-priority parameters in the buffer 734 * pointed to by fxparmsp. 735 */ 736 static int 737 fx_parmsin(void *parmsp) 738 { 739 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 740 uint_t cflags; 741 longlong_t ticks; 742 /* 743 * Check validity of parameters. 744 */ 745 746 if ((fxparmsp->fx_uprilim > fx_maxupri || 747 fxparmsp->fx_uprilim < 0) && 748 fxparmsp->fx_uprilim != FX_NOCHANGE) 749 return (EINVAL); 750 751 if ((fxparmsp->fx_upri > fx_maxupri || 752 fxparmsp->fx_upri < 0) && 753 fxparmsp->fx_upri != FX_NOCHANGE) 754 return (EINVAL); 755 756 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 757 fxparmsp->fx_tqnsecs >= NANOSEC) 758 return (EINVAL); 759 760 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 761 762 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 763 cflags |= FX_DOUPRILIM; 764 } 765 766 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 767 cflags |= FX_DOTQ; 768 769 /* 770 * convert the buffer to kernel format. 771 */ 772 773 if (fxparmsp->fx_tqnsecs >= 0) { 774 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 775 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 776 return (ERANGE); 777 778 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 779 } else { 780 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 781 (fxparmsp->fx_tqnsecs != FX_TQINF) && 782 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 783 return (EINVAL); 784 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 785 } 786 787 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 788 789 return (0); 790 } 791 792 793 /* 794 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 795 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 796 * pc_vaparms_t contains (key, value) pairs of parameter. 797 */ 798 static int 799 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 800 { 801 uint_t secs = 0; 802 uint_t cnt; 803 int nsecs = 0; 804 int priflag, secflag, nsecflag, limflag; 805 longlong_t ticks; 806 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 807 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 808 809 810 /* 811 * First check the validity of parameters and convert them 812 * from the user supplied format to the internal format. 813 */ 814 priflag = secflag = nsecflag = limflag = 0; 815 816 fxprmsp->fx_cflags = 0; 817 818 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 819 return (EINVAL); 820 821 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 822 823 switch (vpp->pc_key) { 824 case FX_KY_UPRILIM: 825 if (limflag++) 826 return (EINVAL); 827 fxprmsp->fx_cflags |= FX_DOUPRILIM; 828 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 829 if (fxprmsp->fx_uprilim > fx_maxupri || 830 fxprmsp->fx_uprilim < 0) 831 return (EINVAL); 832 break; 833 834 case FX_KY_UPRI: 835 if (priflag++) 836 return (EINVAL); 837 fxprmsp->fx_cflags |= FX_DOUPRI; 838 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 839 if (fxprmsp->fx_upri > fx_maxupri || 840 fxprmsp->fx_upri < 0) 841 return (EINVAL); 842 break; 843 844 case FX_KY_TQSECS: 845 if (secflag++) 846 return (EINVAL); 847 fxprmsp->fx_cflags |= FX_DOTQ; 848 secs = (uint_t)vpp->pc_parm; 849 break; 850 851 case FX_KY_TQNSECS: 852 if (nsecflag++) 853 return (EINVAL); 854 fxprmsp->fx_cflags |= FX_DOTQ; 855 nsecs = (int)vpp->pc_parm; 856 break; 857 858 default: 859 return (EINVAL); 860 } 861 } 862 863 if (vaparmsp->pc_vaparmscnt == 0) { 864 /* 865 * Use default parameters. 866 */ 867 fxprmsp->fx_upri = 0; 868 fxprmsp->fx_uprilim = 0; 869 fxprmsp->fx_tqntm = FX_TQDEF; 870 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 871 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 872 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 873 return (EINVAL); 874 875 if (nsecs >= 0) { 876 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 877 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 878 return (ERANGE); 879 880 fxprmsp->fx_tqntm = (int)ticks; 881 } else { 882 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 883 return (EINVAL); 884 fxprmsp->fx_tqntm = nsecs; 885 } 886 } 887 888 return (0); 889 } 890 891 892 /* 893 * Nothing to do here but return success. 894 */ 895 /* ARGSUSED */ 896 static int 897 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 898 { 899 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 900 901 if (vaparmsp != NULL) 902 return (0); 903 904 if (fxkprmsp->fx_tqntm < 0) { 905 /* 906 * Quantum field set to special value (e.g. FX_TQINF) 907 */ 908 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 909 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 910 911 } else { 912 /* Convert quantum from ticks to seconds-nanoseconds */ 913 914 timestruc_t ts; 915 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 916 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 917 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 918 } 919 920 return (0); 921 } 922 923 924 /* 925 * Copy all selected fixed-priority class parameters to the user. 926 * The parameters are specified by a key. 927 */ 928 static int 929 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 930 { 931 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 932 timestruc_t ts; 933 uint_t cnt; 934 uint_t secs; 935 int nsecs; 936 int priflag, secflag, nsecflag, limflag; 937 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 938 939 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 940 941 priflag = secflag = nsecflag = limflag = 0; 942 943 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 944 return (EINVAL); 945 946 if (fxkprmsp->fx_tqntm < 0) { 947 /* 948 * Quantum field set to special value (e.g. FX_TQINF). 949 */ 950 secs = 0; 951 nsecs = fxkprmsp->fx_tqntm; 952 } else { 953 /* 954 * Convert quantum from ticks to seconds-nanoseconds. 955 */ 956 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 957 secs = ts.tv_sec; 958 nsecs = ts.tv_nsec; 959 } 960 961 962 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 963 964 switch (vpp->pc_key) { 965 case FX_KY_UPRILIM: 966 if (limflag++) 967 return (EINVAL); 968 if (copyout(&fxkprmsp->fx_uprilim, 969 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 970 return (EFAULT); 971 break; 972 973 case FX_KY_UPRI: 974 if (priflag++) 975 return (EINVAL); 976 if (copyout(&fxkprmsp->fx_upri, 977 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 978 return (EFAULT); 979 break; 980 981 case FX_KY_TQSECS: 982 if (secflag++) 983 return (EINVAL); 984 if (copyout(&secs, 985 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 986 return (EFAULT); 987 break; 988 989 case FX_KY_TQNSECS: 990 if (nsecflag++) 991 return (EINVAL); 992 if (copyout(&nsecs, 993 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 994 return (EFAULT); 995 break; 996 997 default: 998 return (EINVAL); 999 } 1000 } 1001 1002 return (0); 1003 } 1004 1005 /* 1006 * Set the scheduling parameters of the thread pointed to by fxprocp 1007 * to those specified in the buffer pointed to by fxparmsp. 1008 */ 1009 /* ARGSUSED */ 1010 static int 1011 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1012 { 1013 char nice; 1014 pri_t reqfxuprilim; 1015 pri_t reqfxupri; 1016 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1017 fxproc_t *fxpp; 1018 1019 1020 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1021 1022 thread_lock(tx); 1023 fxpp = (fxproc_t *)tx->t_cldata; 1024 1025 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1026 reqfxuprilim = fxpp->fx_uprilim; 1027 else 1028 reqfxuprilim = fxkparmsp->fx_uprilim; 1029 1030 /* 1031 * Basic permissions enforced by generic kernel code 1032 * for all classes require that a thread attempting 1033 * to change the scheduling parameters of a target 1034 * thread be privileged or have a real or effective 1035 * UID matching that of the target thread. We are not 1036 * called unless these basic permission checks have 1037 * already passed. The fixed priority class requires in 1038 * addition that the calling thread be privileged if it 1039 * is attempting to raise the pri above its current 1040 * value This may have been checked previously but if our 1041 * caller passed us a non-NULL credential pointer we assume 1042 * it hasn't and we check it here. 1043 */ 1044 1045 if ((reqpcredp != NULL) && 1046 (reqfxuprilim > fxpp->fx_uprilim || 1047 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1048 secpolicy_setpriority(reqpcredp) != 0) { 1049 thread_unlock(tx); 1050 return (EPERM); 1051 } 1052 1053 FX_ADJUST_PRI(reqfxuprilim); 1054 1055 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1056 reqfxupri = fxpp->fx_pri; 1057 else 1058 reqfxupri = fxkparmsp->fx_upri; 1059 1060 1061 /* 1062 * Make sure the user priority doesn't exceed the upri limit. 1063 */ 1064 if (reqfxupri > reqfxuprilim) 1065 reqfxupri = reqfxuprilim; 1066 1067 /* 1068 * Set fx_nice to the nice value corresponding to the user 1069 * priority we are setting. Note that setting the nice field 1070 * of the parameter struct won't affect upri or nice. 1071 */ 1072 1073 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1074 1075 if (nice > NZERO) 1076 nice = NZERO; 1077 1078 fxpp->fx_uprilim = reqfxuprilim; 1079 fxpp->fx_pri = reqfxupri; 1080 1081 if (fxkparmsp->fx_tqntm == FX_TQINF) 1082 fxpp->fx_pquantum = FX_TQINF; 1083 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1084 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1085 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1086 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1087 1088 fxpp->fx_nice = nice; 1089 1090 fx_change_priority(tx, fxpp); 1091 thread_unlock(tx); 1092 return (0); 1093 } 1094 1095 1096 /* 1097 * Return the global scheduling priority that would be assigned 1098 * to a thread entering the fixed-priority class with the fx_upri. 1099 */ 1100 static pri_t 1101 fx_globpri(kthread_t *t) 1102 { 1103 fxproc_t *fxpp; 1104 1105 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1106 1107 fxpp = (fxproc_t *)t->t_cldata; 1108 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1109 1110 } 1111 1112 /* 1113 * Arrange for thread to be placed in appropriate location 1114 * on dispatcher queue. 1115 * 1116 * This is called with the current thread in TS_ONPROC and locked. 1117 */ 1118 static void 1119 fx_preempt(kthread_t *t) 1120 { 1121 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1122 1123 ASSERT(t == curthread); 1124 ASSERT(THREAD_LOCK_HELD(curthread)); 1125 1126 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1127 1128 /* 1129 * Check to see if we're doing "preemption control" here. If 1130 * we are, and if the user has requested that this thread not 1131 * be preempted, and if preemptions haven't been put off for 1132 * too long, let the preemption happen here but try to make 1133 * sure the thread is rescheduled as soon as possible. We do 1134 * this by putting it on the front of the highest priority run 1135 * queue in the FX class. If the preemption has been put off 1136 * for too long, clear the "nopreempt" bit and let the thread 1137 * be preempted. 1138 */ 1139 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1140 if (fxpp->fx_pquantum == FX_TQINF || 1141 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1142 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1143 schedctl_set_yield(t, 1); 1144 setfrontdq(t); 1145 return; 1146 } else { 1147 schedctl_set_nopreempt(t, 0); 1148 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1149 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1150 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1151 tnf_lwpid, lwpid, t->t_tid); 1152 /* 1153 * Fall through and be preempted below. 1154 */ 1155 } 1156 } 1157 1158 if (FX_HAS_CB(fxpp)) { 1159 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1160 pri_t newpri = fxpp->fx_pri; 1161 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1162 &new_quantum, &newpri); 1163 FX_ADJUST_QUANTUM(new_quantum); 1164 if ((int)new_quantum != fxpp->fx_pquantum) { 1165 fxpp->fx_pquantum = (int)new_quantum; 1166 fxpp->fx_timeleft = fxpp->fx_pquantum; 1167 } 1168 FX_ADJUST_PRI(newpri); 1169 fxpp->fx_pri = newpri; 1170 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1171 } 1172 1173 /* 1174 * This thread may be placed on wait queue by CPU Caps. In this case we 1175 * do not need to do anything until it is removed from the wait queue. 1176 */ 1177 if (CPUCAPS_ENFORCE(t)) { 1178 return; 1179 } 1180 1181 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1182 fxpp->fx_timeleft = fxpp->fx_pquantum; 1183 fxpp->fx_flags &= ~FXBACKQ; 1184 setbackdq(t); 1185 } else { 1186 setfrontdq(t); 1187 } 1188 } 1189 1190 static void 1191 fx_setrun(kthread_t *t) 1192 { 1193 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1194 1195 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1196 fxpp->fx_flags &= ~FXBACKQ; 1197 1198 if (t->t_disp_time != lbolt) 1199 setbackdq(t); 1200 else 1201 setfrontdq(t); 1202 } 1203 1204 1205 /* 1206 * Prepare thread for sleep. We reset the thread priority so it will 1207 * run at the kernel priority level when it wakes up. 1208 */ 1209 static void 1210 fx_sleep(kthread_t *t) 1211 { 1212 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1213 1214 ASSERT(t == curthread); 1215 ASSERT(THREAD_LOCK_HELD(t)); 1216 1217 /* 1218 * Account for time spent on CPU before going to sleep. 1219 */ 1220 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1221 1222 if (FX_HAS_CB(fxpp)) { 1223 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1224 } 1225 t->t_stime = lbolt; /* time stamp for the swapper */ 1226 } 1227 1228 1229 /* 1230 * Return Values: 1231 * 1232 * -1 if the thread is loaded or is not eligible to be swapped in. 1233 * 1234 * FX and RT threads are designed so that they don't swapout; however, 1235 * it is possible that while the thread is swapped out and in another class, it 1236 * can be changed to FX or RT. Since these threads should be swapped in 1237 * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin 1238 * returns SHRT_MAX - 1, so that it gives deference to any swapped out 1239 * RT threads. 1240 */ 1241 /* ARGSUSED */ 1242 static pri_t 1243 fx_swapin(kthread_t *t, int flags) 1244 { 1245 pri_t tpri = -1; 1246 1247 ASSERT(THREAD_LOCK_HELD(t)); 1248 1249 if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) { 1250 tpri = (pri_t)SHRT_MAX - 1; 1251 } 1252 1253 return (tpri); 1254 } 1255 1256 /* 1257 * Return Values 1258 * -1 if the thread isn't loaded or is not eligible to be swapped out. 1259 */ 1260 /* ARGSUSED */ 1261 static pri_t 1262 fx_swapout(kthread_t *t, int flags) 1263 { 1264 ASSERT(THREAD_LOCK_HELD(t)); 1265 1266 return (-1); 1267 1268 } 1269 1270 /* ARGSUSED */ 1271 static void 1272 fx_stop(kthread_t *t, int why, int what) 1273 { 1274 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1275 1276 ASSERT(THREAD_LOCK_HELD(t)); 1277 1278 if (FX_HAS_CB(fxpp)) { 1279 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1280 } 1281 } 1282 1283 /* 1284 * Check for time slice expiration. If time slice has expired 1285 * set runrun to cause preemption. 1286 */ 1287 static void 1288 fx_tick(kthread_t *t) 1289 { 1290 boolean_t call_cpu_surrender = B_FALSE; 1291 fxproc_t *fxpp; 1292 1293 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1294 1295 thread_lock(t); 1296 1297 fxpp = (fxproc_t *)(t->t_cldata); 1298 1299 if (FX_HAS_CB(fxpp)) { 1300 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1301 pri_t newpri = fxpp->fx_pri; 1302 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1303 &new_quantum, &newpri); 1304 FX_ADJUST_QUANTUM(new_quantum); 1305 if ((int)new_quantum != fxpp->fx_pquantum) { 1306 fxpp->fx_pquantum = (int)new_quantum; 1307 fxpp->fx_timeleft = fxpp->fx_pquantum; 1308 } 1309 FX_ADJUST_PRI(newpri); 1310 if (newpri != fxpp->fx_pri) { 1311 fxpp->fx_pri = newpri; 1312 fx_change_priority(t, fxpp); 1313 } 1314 } 1315 1316 /* 1317 * Keep track of thread's project CPU usage. Note that projects 1318 * get charged even when threads are running in the kernel. 1319 */ 1320 call_cpu_surrender = CPUCAPS_CHARGE(t, &fxpp->fx_caps, 1321 CPUCAPS_CHARGE_ENFORCE); 1322 1323 if ((fxpp->fx_pquantum != FX_TQINF) && 1324 (--fxpp->fx_timeleft <= 0)) { 1325 pri_t new_pri; 1326 1327 /* 1328 * If we're doing preemption control and trying to 1329 * avoid preempting this thread, just note that 1330 * the thread should yield soon and let it keep 1331 * running (unless it's been a while). 1332 */ 1333 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1334 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1335 DTRACE_SCHED1(schedctl__nopreempt, 1336 kthread_t *, t); 1337 schedctl_set_yield(t, 1); 1338 thread_unlock_nopreempt(t); 1339 return; 1340 } 1341 TNF_PROBE_2(schedctl_failsafe, 1342 "schedctl FX fx_tick", /* CSTYLED */, 1343 tnf_pid, pid, ttoproc(t)->p_pid, 1344 tnf_lwpid, lwpid, t->t_tid); 1345 } 1346 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1347 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1348 /* 1349 * When the priority of a thread is changed, 1350 * it may be necessary to adjust its position 1351 * on a sleep queue or dispatch queue. Even 1352 * when the priority is not changed, we need 1353 * to preserve round robin on dispatch queue. 1354 * The function thread_change_pri accomplishes 1355 * this. 1356 */ 1357 if (thread_change_pri(t, new_pri, 0)) { 1358 fxpp->fx_timeleft = fxpp->fx_pquantum; 1359 } else { 1360 call_cpu_surrender = B_TRUE; 1361 } 1362 } else if (t->t_state == TS_ONPROC && 1363 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1364 call_cpu_surrender = B_TRUE; 1365 } 1366 1367 if (call_cpu_surrender) { 1368 fxpp->fx_flags |= FXBACKQ; 1369 cpu_surrender(t); 1370 } 1371 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1372 } 1373 1374 1375 static void 1376 fx_trapret(kthread_t *t) 1377 { 1378 cpu_t *cp = CPU; 1379 1380 ASSERT(THREAD_LOCK_HELD(t)); 1381 ASSERT(t == curthread); 1382 ASSERT(cp->cpu_dispthread == t); 1383 ASSERT(t->t_state == TS_ONPROC); 1384 } 1385 1386 1387 /* 1388 * Processes waking up go to the back of their queue. 1389 */ 1390 static void 1391 fx_wakeup(kthread_t *t) 1392 { 1393 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1394 1395 ASSERT(THREAD_LOCK_HELD(t)); 1396 1397 t->t_stime = lbolt; /* time stamp for the swapper */ 1398 if (FX_HAS_CB(fxpp)) { 1399 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1400 pri_t newpri = fxpp->fx_pri; 1401 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1402 &new_quantum, &newpri); 1403 FX_ADJUST_QUANTUM(new_quantum); 1404 if ((int)new_quantum != fxpp->fx_pquantum) { 1405 fxpp->fx_pquantum = (int)new_quantum; 1406 fxpp->fx_timeleft = fxpp->fx_pquantum; 1407 } 1408 1409 FX_ADJUST_PRI(newpri); 1410 if (newpri != fxpp->fx_pri) { 1411 fxpp->fx_pri = newpri; 1412 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1413 } 1414 } 1415 1416 fxpp->fx_flags &= ~FXBACKQ; 1417 1418 if (t->t_disp_time != lbolt) 1419 setbackdq(t); 1420 else 1421 setfrontdq(t); 1422 } 1423 1424 1425 /* 1426 * When a thread yields, put it on the back of the run queue. 1427 */ 1428 static void 1429 fx_yield(kthread_t *t) 1430 { 1431 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1432 1433 ASSERT(t == curthread); 1434 ASSERT(THREAD_LOCK_HELD(t)); 1435 1436 /* 1437 * Collect CPU usage spent before yielding CPU. 1438 */ 1439 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1440 1441 if (FX_HAS_CB(fxpp)) { 1442 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1443 pri_t newpri = fxpp->fx_pri; 1444 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1445 &new_quantum, &newpri); 1446 FX_ADJUST_QUANTUM(new_quantum); 1447 if ((int)new_quantum != fxpp->fx_pquantum) { 1448 fxpp->fx_pquantum = (int)new_quantum; 1449 fxpp->fx_timeleft = fxpp->fx_pquantum; 1450 } 1451 FX_ADJUST_PRI(newpri); 1452 fxpp->fx_pri = newpri; 1453 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1454 } 1455 1456 /* 1457 * Clear the preemption control "yield" bit since the user is 1458 * doing a yield. 1459 */ 1460 if (t->t_schedctl) 1461 schedctl_set_yield(t, 0); 1462 1463 if (fxpp->fx_timeleft <= 0) { 1464 /* 1465 * Time slice was artificially extended to avoid 1466 * preemption, so pretend we're preempting it now. 1467 */ 1468 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1469 fxpp->fx_timeleft = fxpp->fx_pquantum; 1470 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1471 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1472 } 1473 1474 fxpp->fx_flags &= ~FXBACKQ; 1475 setbackdq(t); 1476 } 1477 1478 /* 1479 * Increment the nice value of the specified thread by incr and 1480 * return the new value in *retvalp. 1481 */ 1482 static int 1483 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1484 { 1485 int newnice; 1486 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1487 fxkparms_t fxkparms; 1488 1489 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1490 1491 /* If there's no change to priority, just return current setting */ 1492 if (incr == 0) { 1493 if (retvalp) { 1494 *retvalp = fxpp->fx_nice - NZERO; 1495 } 1496 return (0); 1497 } 1498 1499 if ((incr < 0 || incr > 2 * NZERO) && 1500 secpolicy_setpriority(cr) != 0) 1501 return (EPERM); 1502 1503 /* 1504 * Specifying a nice increment greater than the upper limit of 1505 * 2 * NZERO - 1 will result in the thread's nice value being 1506 * set to the upper limit. We check for this before computing 1507 * the new value because otherwise we could get overflow 1508 * if a privileged user specified some ridiculous increment. 1509 */ 1510 if (incr > 2 * NZERO - 1) 1511 incr = 2 * NZERO - 1; 1512 1513 newnice = fxpp->fx_nice + incr; 1514 if (newnice > NZERO) 1515 newnice = NZERO; 1516 else if (newnice < 0) 1517 newnice = 0; 1518 1519 fxkparms.fx_uprilim = fxkparms.fx_upri = 1520 -((newnice - NZERO) * fx_maxupri) / NZERO; 1521 1522 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1523 1524 fxkparms.fx_tqntm = FX_TQDEF; 1525 1526 /* 1527 * Reset the uprilim and upri values of the thread. Adjust 1528 * time quantum accordingly. 1529 */ 1530 1531 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1532 1533 /* 1534 * Although fx_parmsset already reset fx_nice it may 1535 * not have been set to precisely the value calculated above 1536 * because fx_parmsset determines the nice value from the 1537 * user priority and we may have truncated during the integer 1538 * conversion from nice value to user priority and back. 1539 * We reset fx_nice to the value we calculated above. 1540 */ 1541 fxpp->fx_nice = (char)newnice; 1542 1543 if (retvalp) 1544 *retvalp = newnice - NZERO; 1545 1546 return (0); 1547 } 1548 1549 /* 1550 * Increment the priority of the specified thread by incr and 1551 * return the new value in *retvalp. 1552 */ 1553 static int 1554 fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1555 { 1556 int newpri; 1557 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1558 fxkparms_t fxkparms; 1559 1560 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1561 1562 /* If there's no change to priority, just return current setting */ 1563 if (incr == 0) { 1564 *retvalp = fxpp->fx_pri; 1565 return (0); 1566 } 1567 1568 newpri = fxpp->fx_pri + incr; 1569 if (newpri > fx_maxupri || newpri < 0) 1570 return (EINVAL); 1571 1572 *retvalp = newpri; 1573 fxkparms.fx_uprilim = fxkparms.fx_upri = newpri; 1574 fxkparms.fx_tqntm = FX_NOCHANGE; 1575 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1576 1577 /* 1578 * Reset the uprilim and upri values of the thread. 1579 */ 1580 return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr)); 1581 } 1582 1583 static void 1584 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1585 { 1586 pri_t new_pri; 1587 1588 ASSERT(THREAD_LOCK_HELD(t)); 1589 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1590 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1591 t->t_cpri = fxpp->fx_pri; 1592 if (t == curthread || t->t_state == TS_ONPROC) { 1593 /* curthread is always onproc */ 1594 cpu_t *cp = t->t_disp_queue->disp_cpu; 1595 THREAD_CHANGE_PRI(t, new_pri); 1596 if (t == cp->cpu_dispthread) 1597 cp->cpu_dispatch_pri = DISP_PRIO(t); 1598 if (DISP_MUST_SURRENDER(t)) { 1599 fxpp->fx_flags |= FXBACKQ; 1600 cpu_surrender(t); 1601 } else { 1602 fxpp->fx_timeleft = fxpp->fx_pquantum; 1603 } 1604 } else { 1605 /* 1606 * When the priority of a thread is changed, 1607 * it may be necessary to adjust its position 1608 * on a sleep queue or dispatch queue. 1609 * The function thread_change_pri accomplishes 1610 * this. 1611 */ 1612 if (thread_change_pri(t, new_pri, 0)) { 1613 /* 1614 * The thread was on a run queue. Reset 1615 * its CPU timeleft from the quantum 1616 * associated with the new priority. 1617 */ 1618 fxpp->fx_timeleft = fxpp->fx_pquantum; 1619 } else { 1620 fxpp->fx_flags |= FXBACKQ; 1621 } 1622 } 1623 } 1624 1625 static int 1626 fx_alloc(void **p, int flag) 1627 { 1628 void *bufp; 1629 1630 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1631 if (bufp == NULL) { 1632 return (ENOMEM); 1633 } else { 1634 *p = bufp; 1635 return (0); 1636 } 1637 } 1638 1639 static void 1640 fx_free(void *bufp) 1641 { 1642 if (bufp) 1643 kmem_free(bufp, sizeof (fxproc_t)); 1644 } 1645 1646 /* 1647 * Release the callback list mutex after successful lookup 1648 */ 1649 void 1650 fx_list_release(fxproc_t *fxpp) 1651 { 1652 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1653 kmutex_t *lockp = &fx_cb_list_lock[index]; 1654 mutex_exit(lockp); 1655 } 1656 1657 fxproc_t * 1658 fx_list_lookup(kt_did_t ktid) 1659 { 1660 int index = FX_CB_LIST_HASH(ktid); 1661 kmutex_t *lockp = &fx_cb_list_lock[index]; 1662 fxproc_t *fxpp; 1663 1664 mutex_enter(lockp); 1665 1666 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1667 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1668 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1669 fxpp->fx_callback != NULL) { 1670 /* 1671 * The caller is responsible for calling 1672 * fx_list_release to drop the lock upon 1673 * successful lookup 1674 */ 1675 return (fxpp); 1676 } 1677 } 1678 mutex_exit(lockp); 1679 return ((fxproc_t *)NULL); 1680 } 1681 1682 1683 /* 1684 * register a callback set of routines for current thread 1685 * thread should already be in FX class 1686 */ 1687 int 1688 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1689 pri_t pri, clock_t quantum) 1690 { 1691 1692 fxproc_t *fxpp; 1693 1694 if (fx_callback == NULL) 1695 return (EINVAL); 1696 1697 if (secpolicy_dispadm(CRED()) != 0) 1698 return (EPERM); 1699 1700 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1701 return (EINVAL); 1702 1703 if (!FX_ISVALID(pri, quantum)) 1704 return (EINVAL); 1705 1706 thread_lock(curthread); /* get dispatcher lock on thread */ 1707 1708 if (curthread->t_cid != fx_cid) { 1709 thread_unlock(curthread); 1710 return (EINVAL); 1711 } 1712 1713 fxpp = (fxproc_t *)(curthread->t_cldata); 1714 ASSERT(fxpp != NULL); 1715 if (FX_HAS_CB(fxpp)) { 1716 thread_unlock(curthread); 1717 return (EINVAL); 1718 } 1719 1720 fxpp->fx_callback = fx_callback; 1721 fxpp->fx_cookie = cookie; 1722 1723 if (pri != FX_CB_NOCHANGE) { 1724 fxpp->fx_pri = pri; 1725 FX_ADJUST_PRI(fxpp->fx_pri); 1726 if (quantum == FX_TQDEF) { 1727 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1728 } else if (quantum == FX_TQINF) { 1729 fxpp->fx_pquantum = FX_TQINF; 1730 } else if (quantum != FX_NOCHANGE) { 1731 FX_ADJUST_QUANTUM(quantum); 1732 fxpp->fx_pquantum = quantum; 1733 } 1734 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1735 if (quantum == FX_TQINF) 1736 fxpp->fx_pquantum = FX_TQINF; 1737 else { 1738 FX_ADJUST_QUANTUM(quantum); 1739 fxpp->fx_pquantum = quantum; 1740 } 1741 } 1742 1743 fxpp->fx_ktid = ddi_get_kt_did(); 1744 1745 fx_change_priority(curthread, fxpp); 1746 1747 thread_unlock(curthread); 1748 1749 /* 1750 * Link new structure into fxproc list. 1751 */ 1752 FX_CB_LIST_INSERT(fxpp); 1753 return (0); 1754 } 1755 1756 /* unregister a callback set of routines for current thread */ 1757 int 1758 fx_unregister_callbacks() 1759 { 1760 fxproc_t *fxpp; 1761 1762 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1763 /* 1764 * did not have a registered callback; 1765 */ 1766 return (EINVAL); 1767 } 1768 1769 thread_lock(fxpp->fx_tp); 1770 fxpp->fx_callback = NULL; 1771 fxpp->fx_cookie = NULL; 1772 thread_unlock(fxpp->fx_tp); 1773 fx_list_release(fxpp); 1774 1775 FX_CB_LIST_DELETE(fxpp); 1776 return (0); 1777 } 1778 1779 /* 1780 * modify priority and/or quantum value of a thread with callback 1781 */ 1782 int 1783 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1784 { 1785 fxproc_t *fxpp; 1786 1787 if (!FX_ISVALID(pri, quantum)) 1788 return (EINVAL); 1789 1790 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1791 /* 1792 * either thread had exited or did not have a registered 1793 * callback; 1794 */ 1795 return (ESRCH); 1796 } 1797 1798 thread_lock(fxpp->fx_tp); 1799 1800 if (pri != FX_CB_NOCHANGE) { 1801 fxpp->fx_pri = pri; 1802 FX_ADJUST_PRI(fxpp->fx_pri); 1803 if (quantum == FX_TQDEF) { 1804 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1805 } else if (quantum == FX_TQINF) { 1806 fxpp->fx_pquantum = FX_TQINF; 1807 } else if (quantum != FX_NOCHANGE) { 1808 FX_ADJUST_QUANTUM(quantum); 1809 fxpp->fx_pquantum = quantum; 1810 } 1811 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1812 if (quantum == FX_TQINF) { 1813 fxpp->fx_pquantum = FX_TQINF; 1814 } else { 1815 FX_ADJUST_QUANTUM(quantum); 1816 fxpp->fx_pquantum = quantum; 1817 } 1818 } 1819 1820 fx_change_priority(fxpp->fx_tp, fxpp); 1821 1822 thread_unlock(fxpp->fx_tp); 1823 fx_list_release(fxpp); 1824 return (0); 1825 } 1826 1827 1828 /* 1829 * return an iblock cookie for mutex initialization to be used in callbacks 1830 */ 1831 void * 1832 fx_get_mutex_cookie() 1833 { 1834 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1835 } 1836 1837 /* 1838 * return maximum relative priority 1839 */ 1840 pri_t 1841 fx_get_maxpri() 1842 { 1843 return (fx_maxumdpri); 1844 } 1845