17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ab761399Sesaxe * Common Development and Distribution License (the "License"). 6ab761399Sesaxe * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26455e370cSJohn Levon /* 27c3377ee9SJohn Levon * Copyright 2019 Joyent, Inc. 28455e370cSJohn Levon */ 29455e370cSJohn Levon 307c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 317c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 327c478bd9Sstevel@tonic-gate 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #include <sys/types.h> 357c478bd9Sstevel@tonic-gate #include <sys/param.h> 367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 377c478bd9Sstevel@tonic-gate #include <sys/signal.h> 387c478bd9Sstevel@tonic-gate #include <sys/user.h> 397c478bd9Sstevel@tonic-gate #include <sys/systm.h> 407c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 417c478bd9Sstevel@tonic-gate #include <sys/var.h> 427c478bd9Sstevel@tonic-gate #include <sys/errno.h> 437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 447c478bd9Sstevel@tonic-gate #include <sys/debug.h> 457c478bd9Sstevel@tonic-gate #include <sys/inline.h> 467c478bd9Sstevel@tonic-gate #include <sys/disp.h> 477c478bd9Sstevel@tonic-gate #include <sys/class.h> 487c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 497c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 507c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 517c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 527c478bd9Sstevel@tonic-gate #include <sys/tnf.h> 537c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 547c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 55fb2f18f8Sesaxe #include <sys/pg.h> 56fb2f18f8Sesaxe #include <sys/cmt.h> 57fb2f18f8Sesaxe #include <sys/bitset.h> 587c478bd9Sstevel@tonic-gate #include <sys/schedctl.h> 597c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 607c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 617c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 62057452c6Sjj209869 #include <sys/archsystm.h> 63c3377ee9SJohn Levon #include <sys/smt.h> 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate #include <vm/as.h> 667c478bd9Sstevel@tonic-gate 677c478bd9Sstevel@tonic-gate #define BOUND_CPU 0x1 687c478bd9Sstevel@tonic-gate #define BOUND_PARTITION 0x2 697c478bd9Sstevel@tonic-gate #define BOUND_INTR 0x4 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */ 727c478bd9Sstevel@tonic-gate struct disp_queue_info { 737c478bd9Sstevel@tonic-gate disp_t *dp; 747c478bd9Sstevel@tonic-gate dispq_t *olddispq; 757c478bd9Sstevel@tonic-gate dispq_t *newdispq; 767c478bd9Sstevel@tonic-gate ulong_t *olddqactmap; 777c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 787c478bd9Sstevel@tonic-gate int oldnglobpris; 797c478bd9Sstevel@tonic-gate }; 807c478bd9Sstevel@tonic-gate static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 817c478bd9Sstevel@tonic-gate disp_t *dp); 827c478bd9Sstevel@tonic-gate static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 837c478bd9Sstevel@tonic-gate static void disp_dq_free(struct disp_queue_info *dptr); 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */ 867c478bd9Sstevel@tonic-gate static void generic_idle_cpu(); 877c478bd9Sstevel@tonic-gate void (*idle_cpu)() = generic_idle_cpu; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */ 907c478bd9Sstevel@tonic-gate static void idle_enter(); 917c478bd9Sstevel@tonic-gate static void idle_exit(); 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */ 947c478bd9Sstevel@tonic-gate static void generic_enq_thread(cpu_t *, int); 957c478bd9Sstevel@tonic-gate void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate pri_t kpreemptpri; /* priority where kernel preemption applies */ 987c478bd9Sstevel@tonic-gate pri_t upreemptpri = 0; /* priority where normal preemption applies */ 997c478bd9Sstevel@tonic-gate pri_t intr_pri; /* interrupt thread priority base level */ 1007c478bd9Sstevel@tonic-gate 101685679f7Sakolb #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 1027c478bd9Sstevel@tonic-gate pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 1037c478bd9Sstevel@tonic-gate disp_t cpu0_disp; /* boot CPU's dispatch queue */ 1047c478bd9Sstevel@tonic-gate disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 1057c478bd9Sstevel@tonic-gate int nswapped; /* total number of swapped threads */ 1067c478bd9Sstevel@tonic-gate void disp_swapped_enq(kthread_t *tp); 1077c478bd9Sstevel@tonic-gate static void disp_swapped_setrun(kthread_t *tp); 1087c478bd9Sstevel@tonic-gate static void cpu_resched(cpu_t *cp, pri_t tpri); 1097c478bd9Sstevel@tonic-gate 1107c478bd9Sstevel@tonic-gate /* 1117c478bd9Sstevel@tonic-gate * If this is set, only interrupt threads will cause kernel preemptions. 1127c478bd9Sstevel@tonic-gate * This is done by changing the value of kpreemptpri. kpreemptpri 113*6d40a71eSBryan Cantrill * will either be the max sysclass pri or the min interrupt pri. 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate int only_intr_kpreempt; 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun); 1187c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun); 1197c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf); 1207c478bd9Sstevel@tonic-gate #define SETKP_BACK 0 1217c478bd9Sstevel@tonic-gate #define SETKP_FRONT 1 1227c478bd9Sstevel@tonic-gate /* 1237c478bd9Sstevel@tonic-gate * Parameter that determines how recently a thread must have run 1247c478bd9Sstevel@tonic-gate * on the CPU to be considered loosely-bound to that CPU to reduce 1257c478bd9Sstevel@tonic-gate * cold cache effects. The interval is in hertz. 1267c478bd9Sstevel@tonic-gate */ 127fb2f18f8Sesaxe #define RECHOOSE_INTERVAL 3 1287c478bd9Sstevel@tonic-gate int rechoose_interval = RECHOOSE_INTERVAL; 1297c478bd9Sstevel@tonic-gate 130685679f7Sakolb /* 131685679f7Sakolb * Parameter that determines how long (in nanoseconds) a thread must 132685679f7Sakolb * be sitting on a run queue before it can be stolen by another CPU 133685679f7Sakolb * to reduce migrations. The interval is in nanoseconds. 134685679f7Sakolb * 13581588590Sbholler * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 13681588590Sbholler * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 13781588590Sbholler * here indicating it is uninitiallized. 13881588590Sbholler * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 13981588590Sbholler * 140685679f7Sakolb */ 14181588590Sbholler #define NOSTEAL_UNINITIALIZED (-1) 14281588590Sbholler hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 14381588590Sbholler extern void cmp_set_nosteal_interval(void); 144685679f7Sakolb 1457c478bd9Sstevel@tonic-gate id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate disp_lock_t transition_lock; /* lock on transitioning threads */ 1487c478bd9Sstevel@tonic-gate disp_lock_t stop_lock; /* lock on stopped threads */ 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate static void cpu_dispqalloc(int numpris); 1517c478bd9Sstevel@tonic-gate 152685679f7Sakolb /* 153685679f7Sakolb * This gets returned by disp_getwork/disp_getbest if we couldn't steal 154685679f7Sakolb * a thread because it was sitting on its run queue for a very short 155685679f7Sakolb * period of time. 156685679f7Sakolb */ 157685679f7Sakolb #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 158685679f7Sakolb 1597c478bd9Sstevel@tonic-gate static kthread_t *disp_getwork(cpu_t *to); 1607c478bd9Sstevel@tonic-gate static kthread_t *disp_getbest(disp_t *from); 1617c478bd9Sstevel@tonic-gate static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate void swtch_to(kthread_t *); 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate /* 1667c478bd9Sstevel@tonic-gate * dispatcher and scheduler initialization 1677c478bd9Sstevel@tonic-gate */ 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* 1707c478bd9Sstevel@tonic-gate * disp_setup - Common code to calculate and allocate dispatcher 1717c478bd9Sstevel@tonic-gate * variables and structures based on the maximum priority. 1727c478bd9Sstevel@tonic-gate */ 1737c478bd9Sstevel@tonic-gate static void 1747c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 1757c478bd9Sstevel@tonic-gate { 1767c478bd9Sstevel@tonic-gate pri_t newnglobpris; 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate if (newnglobpris > oldnglobpris) { 1837c478bd9Sstevel@tonic-gate /* 1847c478bd9Sstevel@tonic-gate * Allocate new kp queues for each CPU partition. 1857c478bd9Sstevel@tonic-gate */ 1867c478bd9Sstevel@tonic-gate cpupart_kpqalloc(newnglobpris); 1877c478bd9Sstevel@tonic-gate 1887c478bd9Sstevel@tonic-gate /* 1897c478bd9Sstevel@tonic-gate * Allocate new dispatch queues for each CPU. 1907c478bd9Sstevel@tonic-gate */ 1917c478bd9Sstevel@tonic-gate cpu_dispqalloc(newnglobpris); 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate /* 1947c478bd9Sstevel@tonic-gate * compute new interrupt thread base priority 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate intr_pri = maxglobpri; 1977c478bd9Sstevel@tonic-gate if (only_intr_kpreempt) { 1987c478bd9Sstevel@tonic-gate kpreemptpri = intr_pri + 1; 1997c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 2007c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 2017c478bd9Sstevel@tonic-gate } 2027c478bd9Sstevel@tonic-gate v.v_nglobpris = newnglobpris; 2037c478bd9Sstevel@tonic-gate } 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate /* 2077c478bd9Sstevel@tonic-gate * dispinit - Called to initialize all loaded classes and the 2087c478bd9Sstevel@tonic-gate * dispatcher framework. 2097c478bd9Sstevel@tonic-gate */ 2107c478bd9Sstevel@tonic-gate void 2117c478bd9Sstevel@tonic-gate dispinit(void) 2127c478bd9Sstevel@tonic-gate { 2137c478bd9Sstevel@tonic-gate id_t cid; 2147c478bd9Sstevel@tonic-gate pri_t maxglobpri; 2157c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 2167c478bd9Sstevel@tonic-gate 2177c478bd9Sstevel@tonic-gate maxglobpri = -1; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate /* 2207c478bd9Sstevel@tonic-gate * Initialize transition lock, which will always be set. 2217c478bd9Sstevel@tonic-gate */ 2227c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&transition_lock); 2237c478bd9Sstevel@tonic-gate disp_lock_enter_high(&transition_lock); 2247c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&stop_lock); 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 2277c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_maxrunpri = -1; 2287c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_max_unbound_pri = -1; 229fb2f18f8Sesaxe 2307c478bd9Sstevel@tonic-gate /* 2317c478bd9Sstevel@tonic-gate * Initialize the default CPU partition. 2327c478bd9Sstevel@tonic-gate */ 2337c478bd9Sstevel@tonic-gate cpupart_initialize_default(); 2347c478bd9Sstevel@tonic-gate /* 2357c478bd9Sstevel@tonic-gate * Call the class specific initialization functions for 2367c478bd9Sstevel@tonic-gate * all pre-installed schedulers. 2377c478bd9Sstevel@tonic-gate * 2387c478bd9Sstevel@tonic-gate * We pass the size of a class specific parameter 2397c478bd9Sstevel@tonic-gate * buffer to each of the initialization functions 2407c478bd9Sstevel@tonic-gate * to try to catch problems with backward compatibility 2417c478bd9Sstevel@tonic-gate * of class modules. 2427c478bd9Sstevel@tonic-gate * 2437c478bd9Sstevel@tonic-gate * For example a new class module running on an old system 2447c478bd9Sstevel@tonic-gate * which didn't provide sufficiently large parameter buffers 2457c478bd9Sstevel@tonic-gate * would be bad news. Class initialization modules can check for 2467c478bd9Sstevel@tonic-gate * this and take action if they detect a problem. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate for (cid = 0; cid < nclass; cid++) { 2507c478bd9Sstevel@tonic-gate sclass_t *sc; 2517c478bd9Sstevel@tonic-gate 2527c478bd9Sstevel@tonic-gate sc = &sclass[cid]; 2537c478bd9Sstevel@tonic-gate if (SCHED_INSTALLED(sc)) { 2547c478bd9Sstevel@tonic-gate cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 2557c478bd9Sstevel@tonic-gate &sc->cl_funcs); 2567c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 2577c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 2587c478bd9Sstevel@tonic-gate } 2597c478bd9Sstevel@tonic-gate } 260*6d40a71eSBryan Cantrill 261*6d40a71eSBryan Cantrill /* 262*6d40a71eSBryan Cantrill * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is 263*6d40a71eSBryan Cantrill * to say, maxclsyspri + 1. However, over time, the system has used 264*6d40a71eSBryan Cantrill * more and more asynchronous kernel threads, with an increasing number 265*6d40a71eSBryan Cantrill * of these doing work on direct behalf of higher-level software (e.g., 266*6d40a71eSBryan Cantrill * network processing). This has led to potential priority inversions: 267*6d40a71eSBryan Cantrill * threads doing low-priority lengthy kernel work can effectively 268*6d40a71eSBryan Cantrill * delay kernel-level processing of higher-priority data. To minimize 269*6d40a71eSBryan Cantrill * such inversions, we set kpreemptpri to be v_maxsyspri; anything in 270*6d40a71eSBryan Cantrill * the kernel that runs at maxclsyspri will therefore induce kernel 271*6d40a71eSBryan Cantrill * preemption, and this priority should be used if/when an asynchronous 272*6d40a71eSBryan Cantrill * thread (or, as is often the case, task queue) is performing a task 273*6d40a71eSBryan Cantrill * on behalf of higher-level software (or any task that is otherwise 274*6d40a71eSBryan Cantrill * latency-sensitve). 275*6d40a71eSBryan Cantrill */ 276*6d40a71eSBryan Cantrill kpreemptpri = (pri_t)v.v_maxsyspri; 2777c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 2787c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate ASSERT(maxglobpri >= 0); 2817c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, 0); 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate /* 28681588590Sbholler * Platform specific sticky scheduler setup. 28781588590Sbholler */ 28881588590Sbholler if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 28981588590Sbholler cmp_set_nosteal_interval(); 29081588590Sbholler 29181588590Sbholler /* 2927c478bd9Sstevel@tonic-gate * Get the default class ID; this may be later modified via 2937c478bd9Sstevel@tonic-gate * dispadmin(1M). This will load the class (normally TS) and that will 2947c478bd9Sstevel@tonic-gate * call disp_add(), which is why we had to drop cpu_lock first. 2957c478bd9Sstevel@tonic-gate */ 2967c478bd9Sstevel@tonic-gate if (getcid(defaultclass, &defaultcid) != 0) { 2977c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 2987c478bd9Sstevel@tonic-gate defaultclass); 2997c478bd9Sstevel@tonic-gate } 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate /* 3037c478bd9Sstevel@tonic-gate * disp_add - Called with class pointer to initialize the dispatcher 3047c478bd9Sstevel@tonic-gate * for a newly loaded class. 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate void 3077c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp) 3087c478bd9Sstevel@tonic-gate { 3097c478bd9Sstevel@tonic-gate pri_t maxglobpri; 3107c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 3137c478bd9Sstevel@tonic-gate /* 3147c478bd9Sstevel@tonic-gate * Initialize the scheduler class. 3157c478bd9Sstevel@tonic-gate */ 3167c478bd9Sstevel@tonic-gate maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 3177c478bd9Sstevel@tonic-gate cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 3187c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 3197c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate /* 3227c478bd9Sstevel@tonic-gate * Save old queue information. Since we're initializing a 3237c478bd9Sstevel@tonic-gate * new scheduling class which has just been loaded, then 3247c478bd9Sstevel@tonic-gate * the size of the dispq may have changed. We need to handle 3257c478bd9Sstevel@tonic-gate * that here. 3267c478bd9Sstevel@tonic-gate */ 3277c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, v.v_nglobpris); 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 3307c478bd9Sstevel@tonic-gate } 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate /* 3347c478bd9Sstevel@tonic-gate * For each CPU, allocate new dispatch queues 3357c478bd9Sstevel@tonic-gate * with the stated number of priorities. 3367c478bd9Sstevel@tonic-gate */ 3377c478bd9Sstevel@tonic-gate static void 3387c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris) 3397c478bd9Sstevel@tonic-gate { 3407c478bd9Sstevel@tonic-gate cpu_t *cpup; 3417c478bd9Sstevel@tonic-gate struct disp_queue_info *disp_mem; 3427c478bd9Sstevel@tonic-gate int i, num; 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 3457c478bd9Sstevel@tonic-gate 3467c478bd9Sstevel@tonic-gate disp_mem = kmem_zalloc(NCPU * 3477c478bd9Sstevel@tonic-gate sizeof (struct disp_queue_info), KM_SLEEP); 3487c478bd9Sstevel@tonic-gate 3497c478bd9Sstevel@tonic-gate /* 3507c478bd9Sstevel@tonic-gate * This routine must allocate all of the memory before stopping 3517c478bd9Sstevel@tonic-gate * the cpus because it must not sleep in kmem_alloc while the 3527c478bd9Sstevel@tonic-gate * CPUs are stopped. Locks they hold will not be freed until they 3537c478bd9Sstevel@tonic-gate * are restarted. 3547c478bd9Sstevel@tonic-gate */ 3557c478bd9Sstevel@tonic-gate i = 0; 3567c478bd9Sstevel@tonic-gate cpup = cpu_list; 3577c478bd9Sstevel@tonic-gate do { 3587c478bd9Sstevel@tonic-gate disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 3597c478bd9Sstevel@tonic-gate i++; 3607c478bd9Sstevel@tonic-gate cpup = cpup->cpu_next; 3617c478bd9Sstevel@tonic-gate } while (cpup != cpu_list); 3627c478bd9Sstevel@tonic-gate num = i; 3637c478bd9Sstevel@tonic-gate 3640ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL); 3657c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 3667c478bd9Sstevel@tonic-gate disp_dq_assign(&disp_mem[i], numpris); 3677c478bd9Sstevel@tonic-gate start_cpus(); 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate /* 3707c478bd9Sstevel@tonic-gate * I must free all of the memory after starting the cpus because 3717c478bd9Sstevel@tonic-gate * I can not risk sleeping in kmem_free while the cpus are stopped. 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 3747c478bd9Sstevel@tonic-gate disp_dq_free(&disp_mem[i]); 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 3777c478bd9Sstevel@tonic-gate } 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate static void 3807c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 3817c478bd9Sstevel@tonic-gate { 3827c478bd9Sstevel@tonic-gate dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 3837c478bd9Sstevel@tonic-gate dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 3847c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 3857c478bd9Sstevel@tonic-gate dptr->dp = dp; 3867c478bd9Sstevel@tonic-gate } 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate static void 3897c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris) 3907c478bd9Sstevel@tonic-gate { 3917c478bd9Sstevel@tonic-gate disp_t *dp; 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate dp = dptr->dp; 3947c478bd9Sstevel@tonic-gate dptr->olddispq = dp->disp_q; 3957c478bd9Sstevel@tonic-gate dptr->olddqactmap = dp->disp_qactmap; 3967c478bd9Sstevel@tonic-gate dptr->oldnglobpris = dp->disp_npri; 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate ASSERT(dptr->oldnglobpris < numpris); 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) { 4017c478bd9Sstevel@tonic-gate /* 4027c478bd9Sstevel@tonic-gate * Use kcopy because bcopy is platform-specific 4037c478bd9Sstevel@tonic-gate * and could block while we might have paused the cpus. 4047c478bd9Sstevel@tonic-gate */ 4057c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddispq, dptr->newdispq, 4067c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 4077c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 4087c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 4097c478bd9Sstevel@tonic-gate sizeof (long)); 4107c478bd9Sstevel@tonic-gate } 4117c478bd9Sstevel@tonic-gate dp->disp_q = dptr->newdispq; 4127c478bd9Sstevel@tonic-gate dp->disp_qactmap = dptr->newdqactmap; 4137c478bd9Sstevel@tonic-gate dp->disp_q_limit = &dptr->newdispq[numpris]; 4147c478bd9Sstevel@tonic-gate dp->disp_npri = numpris; 4157c478bd9Sstevel@tonic-gate } 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate static void 4187c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr) 4197c478bd9Sstevel@tonic-gate { 4207c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) 4217c478bd9Sstevel@tonic-gate kmem_free(dptr->olddispq, 4227c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 4237c478bd9Sstevel@tonic-gate if (dptr->olddqactmap != NULL) 4247c478bd9Sstevel@tonic-gate kmem_free(dptr->olddqactmap, 4257c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 4267c478bd9Sstevel@tonic-gate } 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate /* 4297c478bd9Sstevel@tonic-gate * For a newly created CPU, initialize the dispatch queue. 4307c478bd9Sstevel@tonic-gate * This is called before the CPU is known through cpu[] or on any lists. 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate void 4337c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp) 4347c478bd9Sstevel@tonic-gate { 4357c478bd9Sstevel@tonic-gate disp_t *dp; 4367c478bd9Sstevel@tonic-gate dispq_t *newdispq; 4377c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 4387c478bd9Sstevel@tonic-gate 4397c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate if (cp == cpu0_disp.disp_cpu) 4427c478bd9Sstevel@tonic-gate dp = &cpu0_disp; 4437c478bd9Sstevel@tonic-gate else 4447c478bd9Sstevel@tonic-gate dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 4457c478bd9Sstevel@tonic-gate bzero(dp, sizeof (disp_t)); 4467c478bd9Sstevel@tonic-gate cp->cpu_disp = dp; 4477c478bd9Sstevel@tonic-gate dp->disp_cpu = cp; 4487c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 4497c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 4507c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&cp->cpu_thread_lock); 4517c478bd9Sstevel@tonic-gate /* 4527c478bd9Sstevel@tonic-gate * Allocate memory for the dispatcher queue headers 4537c478bd9Sstevel@tonic-gate * and the active queue bitmap. 4547c478bd9Sstevel@tonic-gate */ 4557c478bd9Sstevel@tonic-gate newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 4567c478bd9Sstevel@tonic-gate newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 4577c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 4587c478bd9Sstevel@tonic-gate dp->disp_q = newdispq; 4597c478bd9Sstevel@tonic-gate dp->disp_qactmap = newdqactmap; 4607c478bd9Sstevel@tonic-gate dp->disp_q_limit = &newdispq[v.v_nglobpris]; 4617c478bd9Sstevel@tonic-gate dp->disp_npri = v.v_nglobpris; 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate void 4657c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp) 4667c478bd9Sstevel@tonic-gate { 4677c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate disp_kp_free(cp->cpu_disp); 4707c478bd9Sstevel@tonic-gate if (cp->cpu_disp != &cpu0_disp) 4717c478bd9Sstevel@tonic-gate kmem_free(cp->cpu_disp, sizeof (disp_t)); 4727c478bd9Sstevel@tonic-gate } 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate /* 4757c478bd9Sstevel@tonic-gate * Allocate new, larger kpreempt dispatch queue to replace the old one. 4767c478bd9Sstevel@tonic-gate */ 4777c478bd9Sstevel@tonic-gate void 4787c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri) 4797c478bd9Sstevel@tonic-gate { 4807c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate if (npri > dq->disp_npri) { 4837c478bd9Sstevel@tonic-gate /* 4847c478bd9Sstevel@tonic-gate * Allocate memory for the new array. 4857c478bd9Sstevel@tonic-gate */ 4867c478bd9Sstevel@tonic-gate disp_dq_alloc(&mem_info, npri, dq); 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate /* 4897c478bd9Sstevel@tonic-gate * We need to copy the old structures to the new 4907c478bd9Sstevel@tonic-gate * and free the old. 4917c478bd9Sstevel@tonic-gate */ 4927c478bd9Sstevel@tonic-gate disp_dq_assign(&mem_info, npri); 4937c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 4947c478bd9Sstevel@tonic-gate } 4957c478bd9Sstevel@tonic-gate } 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate /* 4987c478bd9Sstevel@tonic-gate * Free dispatch queue. 4997c478bd9Sstevel@tonic-gate * Used for the kpreempt queues for a removed CPU partition and 5007c478bd9Sstevel@tonic-gate * for the per-CPU queues of deleted CPUs. 5017c478bd9Sstevel@tonic-gate */ 5027c478bd9Sstevel@tonic-gate void 5037c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq) 5047c478bd9Sstevel@tonic-gate { 5057c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate mem_info.olddispq = dq->disp_q; 5087c478bd9Sstevel@tonic-gate mem_info.olddqactmap = dq->disp_qactmap; 5097c478bd9Sstevel@tonic-gate mem_info.oldnglobpris = dq->disp_npri; 5107c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 5117c478bd9Sstevel@tonic-gate } 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate /* 5147c478bd9Sstevel@tonic-gate * End dispatcher and scheduler initialization. 5157c478bd9Sstevel@tonic-gate */ 5167c478bd9Sstevel@tonic-gate 5177c478bd9Sstevel@tonic-gate /* 5187c478bd9Sstevel@tonic-gate * See if there's anything to do other than remain idle. 5197c478bd9Sstevel@tonic-gate * Return non-zero if there is. 5207c478bd9Sstevel@tonic-gate * 5217c478bd9Sstevel@tonic-gate * This function must be called with high spl, or with 5227c478bd9Sstevel@tonic-gate * kernel preemption disabled to prevent the partition's 5237c478bd9Sstevel@tonic-gate * active cpu list from changing while being traversed. 5247c478bd9Sstevel@tonic-gate * 5256890d023SEric Saxe * This is essentially a simpler version of disp_getwork() 5266890d023SEric Saxe * to be called by CPUs preparing to "halt". 5277c478bd9Sstevel@tonic-gate */ 5287c478bd9Sstevel@tonic-gate int 5297c478bd9Sstevel@tonic-gate disp_anywork(void) 5307c478bd9Sstevel@tonic-gate { 5317c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 5327c478bd9Sstevel@tonic-gate cpu_t *ocp; 5336890d023SEric Saxe volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable; 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate if (!(cp->cpu_flags & CPU_OFFLINE)) { 5367c478bd9Sstevel@tonic-gate if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 5377c478bd9Sstevel@tonic-gate return (1); 5387c478bd9Sstevel@tonic-gate 5396890d023SEric Saxe for (ocp = cp->cpu_next_part; ocp != cp; 5406890d023SEric Saxe ocp = ocp->cpu_next_part) { 5416890d023SEric Saxe ASSERT(CPU_ACTIVE(ocp)); 5426890d023SEric Saxe 5436890d023SEric Saxe /* 5446890d023SEric Saxe * Something has appeared on the local run queue. 5456890d023SEric Saxe */ 5466890d023SEric Saxe if (*local_nrunnable > 0) 5476890d023SEric Saxe return (1); 5486890d023SEric Saxe /* 5496890d023SEric Saxe * If we encounter another idle CPU that will 5506890d023SEric Saxe * soon be trolling around through disp_anywork() 5516890d023SEric Saxe * terminate our walk here and let this other CPU 5526890d023SEric Saxe * patrol the next part of the list. 5536890d023SEric Saxe */ 5546890d023SEric Saxe if (ocp->cpu_dispatch_pri == -1 && 5556890d023SEric Saxe (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0) 5566890d023SEric Saxe return (0); 5577c478bd9Sstevel@tonic-gate /* 5587c478bd9Sstevel@tonic-gate * Work can be taken from another CPU if: 5597c478bd9Sstevel@tonic-gate * - There is unbound work on the run queue 5607c478bd9Sstevel@tonic-gate * - That work isn't a thread undergoing a 5617c478bd9Sstevel@tonic-gate * - context switch on an otherwise empty queue. 5627c478bd9Sstevel@tonic-gate * - The CPU isn't running the idle loop. 5637c478bd9Sstevel@tonic-gate */ 5647c478bd9Sstevel@tonic-gate if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 5657c478bd9Sstevel@tonic-gate !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 5667c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) && 5677c478bd9Sstevel@tonic-gate ocp->cpu_dispatch_pri != -1) 5687c478bd9Sstevel@tonic-gate return (1); 5697c478bd9Sstevel@tonic-gate } 5707c478bd9Sstevel@tonic-gate } 5717c478bd9Sstevel@tonic-gate return (0); 5727c478bd9Sstevel@tonic-gate } 5737c478bd9Sstevel@tonic-gate 5747c478bd9Sstevel@tonic-gate /* 5757c478bd9Sstevel@tonic-gate * Called when CPU enters the idle loop 5767c478bd9Sstevel@tonic-gate */ 5777c478bd9Sstevel@tonic-gate static void 5787c478bd9Sstevel@tonic-gate idle_enter() 5797c478bd9Sstevel@tonic-gate { 5807c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 5817c478bd9Sstevel@tonic-gate 582eda89462Sesolom new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 5837c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, idlethread, 1); 5847c478bd9Sstevel@tonic-gate set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 5857c478bd9Sstevel@tonic-gate } 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate /* 5887c478bd9Sstevel@tonic-gate * Called when CPU exits the idle loop 5897c478bd9Sstevel@tonic-gate */ 5907c478bd9Sstevel@tonic-gate static void 5917c478bd9Sstevel@tonic-gate idle_exit() 5927c478bd9Sstevel@tonic-gate { 5937c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 5947c478bd9Sstevel@tonic-gate 595eda89462Sesolom new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 5967c478bd9Sstevel@tonic-gate unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 5977c478bd9Sstevel@tonic-gate } 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* 6007c478bd9Sstevel@tonic-gate * Idle loop. 6017c478bd9Sstevel@tonic-gate */ 6027c478bd9Sstevel@tonic-gate void 6037c478bd9Sstevel@tonic-gate idle() 6047c478bd9Sstevel@tonic-gate { 6057c478bd9Sstevel@tonic-gate struct cpu *cp = CPU; /* pointer to this CPU */ 6067c478bd9Sstevel@tonic-gate kthread_t *t; /* taken thread */ 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate idle_enter(); 6097c478bd9Sstevel@tonic-gate 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * Uniprocessor version of idle loop. 6127c478bd9Sstevel@tonic-gate * Do this until notified that we're on an actual multiprocessor. 6137c478bd9Sstevel@tonic-gate */ 6147c478bd9Sstevel@tonic-gate while (ncpus == 1) { 6157c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable == 0) { 6167c478bd9Sstevel@tonic-gate (*idle_cpu)(); 6177c478bd9Sstevel@tonic-gate continue; 6187c478bd9Sstevel@tonic-gate } 6197c478bd9Sstevel@tonic-gate idle_exit(); 6207c478bd9Sstevel@tonic-gate swtch(); 6217c478bd9Sstevel@tonic-gate 6227c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch */ 6237c478bd9Sstevel@tonic-gate } 6247c478bd9Sstevel@tonic-gate 6257c478bd9Sstevel@tonic-gate /* 6267c478bd9Sstevel@tonic-gate * Multiprocessor idle loop. 6277c478bd9Sstevel@tonic-gate */ 6287c478bd9Sstevel@tonic-gate for (;;) { 6297c478bd9Sstevel@tonic-gate /* 6307c478bd9Sstevel@tonic-gate * If CPU is completely quiesced by p_online(2), just wait 6317c478bd9Sstevel@tonic-gate * here with minimal bus traffic until put online. 6327c478bd9Sstevel@tonic-gate */ 6337c478bd9Sstevel@tonic-gate while (cp->cpu_flags & CPU_QUIESCED) 6347c478bd9Sstevel@tonic-gate (*idle_cpu)(); 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) { 6377c478bd9Sstevel@tonic-gate idle_exit(); 6387c478bd9Sstevel@tonic-gate swtch(); 6397c478bd9Sstevel@tonic-gate } else { 6407c478bd9Sstevel@tonic-gate if (cp->cpu_flags & CPU_OFFLINE) 6417c478bd9Sstevel@tonic-gate continue; 6427c478bd9Sstevel@tonic-gate if ((t = disp_getwork(cp)) == NULL) { 6437c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level != -1) { 6447c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 6457c478bd9Sstevel@tonic-gate disp_t *kpq; 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 6487c478bd9Sstevel@tonic-gate /* 6497c478bd9Sstevel@tonic-gate * Set kpq under lock to prevent 6507c478bd9Sstevel@tonic-gate * migration between partitions. 6517c478bd9Sstevel@tonic-gate */ 6527c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 6537c478bd9Sstevel@tonic-gate if (kpq->disp_maxrunpri == -1) 6547c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = -1; 6557c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 6567c478bd9Sstevel@tonic-gate } 6577c478bd9Sstevel@tonic-gate (*idle_cpu)(); 6587c478bd9Sstevel@tonic-gate continue; 6597c478bd9Sstevel@tonic-gate } 660685679f7Sakolb /* 661685679f7Sakolb * If there was a thread but we couldn't steal 662685679f7Sakolb * it, then keep trying. 663685679f7Sakolb */ 664685679f7Sakolb if (t == T_DONTSTEAL) 665685679f7Sakolb continue; 6667c478bd9Sstevel@tonic-gate idle_exit(); 6677c478bd9Sstevel@tonic-gate swtch_to(t); 6687c478bd9Sstevel@tonic-gate } 6697c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch/swtch_to */ 6707c478bd9Sstevel@tonic-gate } 6717c478bd9Sstevel@tonic-gate } 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * Preempt the currently running thread in favor of the highest 6767c478bd9Sstevel@tonic-gate * priority thread. The class of the current thread controls 6777c478bd9Sstevel@tonic-gate * where it goes on the dispatcher queues. If panicking, turn 6787c478bd9Sstevel@tonic-gate * preemption off. 6797c478bd9Sstevel@tonic-gate */ 6807c478bd9Sstevel@tonic-gate void 6817c478bd9Sstevel@tonic-gate preempt() 6827c478bd9Sstevel@tonic-gate { 6837c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 6847c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 6857c478bd9Sstevel@tonic-gate 6867c478bd9Sstevel@tonic-gate if (panicstr) 6877c478bd9Sstevel@tonic-gate return; 6887c478bd9Sstevel@tonic-gate 6897c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 6907c478bd9Sstevel@tonic-gate 6917c478bd9Sstevel@tonic-gate thread_lock(t); 6927c478bd9Sstevel@tonic-gate 6937c478bd9Sstevel@tonic-gate if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 6947c478bd9Sstevel@tonic-gate /* 6957c478bd9Sstevel@tonic-gate * this thread has already been chosen to be run on 6967c478bd9Sstevel@tonic-gate * another CPU. Clear kprunrun on this CPU since we're 6977c478bd9Sstevel@tonic-gate * already headed for swtch(). 6987c478bd9Sstevel@tonic-gate */ 6997c478bd9Sstevel@tonic-gate CPU->cpu_kprunrun = 0; 7007c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 7017c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 7027c478bd9Sstevel@tonic-gate } else { 7037c478bd9Sstevel@tonic-gate if (lwp != NULL) 7047c478bd9Sstevel@tonic-gate lwp->lwp_ru.nivcsw++; 7057c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 7067c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 7077c478bd9Sstevel@tonic-gate CL_PREEMPT(t); 7087c478bd9Sstevel@tonic-gate DTRACE_SCHED(preempt); 7097c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 7107c478bd9Sstevel@tonic-gate 7117c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate swtch(); /* clears CPU->cpu_runrun via disp() */ 7147c478bd9Sstevel@tonic-gate } 7157c478bd9Sstevel@tonic-gate } 7167c478bd9Sstevel@tonic-gate 7177c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin(); 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate /* 7207c478bd9Sstevel@tonic-gate * disp() - find the highest priority thread for this processor to run, and 7217c478bd9Sstevel@tonic-gate * set it in TS_ONPROC state so that resume() can be called to run it. 7227c478bd9Sstevel@tonic-gate */ 7237c478bd9Sstevel@tonic-gate static kthread_t * 7247c478bd9Sstevel@tonic-gate disp() 7257c478bd9Sstevel@tonic-gate { 7267c478bd9Sstevel@tonic-gate cpu_t *cpup; 7277c478bd9Sstevel@tonic-gate disp_t *dp; 7287c478bd9Sstevel@tonic-gate kthread_t *tp; 7297c478bd9Sstevel@tonic-gate dispq_t *dq; 7307c478bd9Sstevel@tonic-gate int maxrunword; 7317c478bd9Sstevel@tonic-gate pri_t pri; 7327c478bd9Sstevel@tonic-gate disp_t *kpq; 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 7357c478bd9Sstevel@tonic-gate 7367c478bd9Sstevel@tonic-gate cpup = CPU; 7377c478bd9Sstevel@tonic-gate /* 7387c478bd9Sstevel@tonic-gate * Find the highest priority loaded, runnable thread. 7397c478bd9Sstevel@tonic-gate */ 7407c478bd9Sstevel@tonic-gate dp = cpup->cpu_disp; 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate reschedule: 7437c478bd9Sstevel@tonic-gate /* 7447c478bd9Sstevel@tonic-gate * If there is more important work on the global queue with a better 7457c478bd9Sstevel@tonic-gate * priority than the maximum on this CPU, take it now. 7467c478bd9Sstevel@tonic-gate */ 7477c478bd9Sstevel@tonic-gate kpq = &cpup->cpu_part->cp_kp_queue; 7487c478bd9Sstevel@tonic-gate while ((pri = kpq->disp_maxrunpri) >= 0 && 7497c478bd9Sstevel@tonic-gate pri >= dp->disp_maxrunpri && 7507c478bd9Sstevel@tonic-gate (cpup->cpu_flags & CPU_OFFLINE) == 0 && 7517c478bd9Sstevel@tonic-gate (tp = disp_getbest(kpq)) != NULL) { 7527c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) != NULL) { 7537c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 7547c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 7557c478bd9Sstevel@tonic-gate return (tp); 7567c478bd9Sstevel@tonic-gate } 7577c478bd9Sstevel@tonic-gate } 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 7607c478bd9Sstevel@tonic-gate pri = dp->disp_maxrunpri; 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate /* 7637c478bd9Sstevel@tonic-gate * If there is nothing to run, look at what's runnable on other queues. 7647c478bd9Sstevel@tonic-gate * Choose the idle thread if the CPU is quiesced. 7657c478bd9Sstevel@tonic-gate * Note that CPUs that have the CPU_OFFLINE flag set can still run 7667c478bd9Sstevel@tonic-gate * interrupt threads, which will be the only threads on the CPU's own 7677c478bd9Sstevel@tonic-gate * queue, but cannot run threads from other queues. 7687c478bd9Sstevel@tonic-gate */ 7697c478bd9Sstevel@tonic-gate if (pri == -1) { 7707c478bd9Sstevel@tonic-gate if (!(cpup->cpu_flags & CPU_OFFLINE)) { 7717c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 772685679f7Sakolb if ((tp = disp_getwork(cpup)) == NULL || 773685679f7Sakolb tp == T_DONTSTEAL) { 7747c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 7757c478bd9Sstevel@tonic-gate (void) splhigh(); 7767c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 7777c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 7787c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 7797c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 7807c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 7817c478bd9Sstevel@tonic-gate } 7827c478bd9Sstevel@tonic-gate } else { 7837c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 7847c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 7857c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 7867c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 7877c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 7887c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 7897c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 7907c478bd9Sstevel@tonic-gate } 7917c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 7927c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 7937c478bd9Sstevel@tonic-gate return (tp); 7947c478bd9Sstevel@tonic-gate } 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 7977c478bd9Sstevel@tonic-gate tp = dq->dq_first; 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 8007c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 8017c478bd9Sstevel@tonic-gate 8027c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate /* 8057c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 8067c478bd9Sstevel@tonic-gate */ 8077c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 8087c478bd9Sstevel@tonic-gate dq->dq_sruncnt--; 8097c478bd9Sstevel@tonic-gate if ((dq->dq_first = tp->t_link) == NULL) { 8107c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 8117c478bd9Sstevel@tonic-gate 8127c478bd9Sstevel@tonic-gate ASSERT(dq->dq_sruncnt == 0); 8137c478bd9Sstevel@tonic-gate dq->dq_last = NULL; 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate /* 8167c478bd9Sstevel@tonic-gate * The queue is empty, so the corresponding bit needs to be 8177c478bd9Sstevel@tonic-gate * turned off in dqactmap. If nrunnable != 0 just took the 8187c478bd9Sstevel@tonic-gate * last runnable thread off the 8197c478bd9Sstevel@tonic-gate * highest queue, so recompute disp_maxrunpri. 8207c478bd9Sstevel@tonic-gate */ 8217c478bd9Sstevel@tonic-gate maxrunword = pri >> BT_ULSHIFT; 8227c478bd9Sstevel@tonic-gate dqactmap[maxrunword] &= ~BT_BIW(pri); 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 8257c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 8267c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 8277c478bd9Sstevel@tonic-gate } else { 8287c478bd9Sstevel@tonic-gate int ipri; 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dqactmap, maxrunword); 8317c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 8327c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 8337c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 8347c478bd9Sstevel@tonic-gate } 8357c478bd9Sstevel@tonic-gate } else { 8367c478bd9Sstevel@tonic-gate tp->t_link = NULL; 8377c478bd9Sstevel@tonic-gate } 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate /* 8407c478bd9Sstevel@tonic-gate * Set TS_DONT_SWAP flag to prevent another processor from swapping 8417c478bd9Sstevel@tonic-gate * out this thread before we have a chance to run it. 8427c478bd9Sstevel@tonic-gate * While running, it is protected against swapping by t_lock. 8437c478bd9Sstevel@tonic-gate */ 8447c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 8457c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; /* protected by spl only */ 8467c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = pri; 8477c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 8487c478bd9Sstevel@tonic-gate thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 8497c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 8507c478bd9Sstevel@tonic-gate 8517c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 8527c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 8537c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) == NULL) 8567c478bd9Sstevel@tonic-gate goto reschedule; 8577c478bd9Sstevel@tonic-gate 8587c478bd9Sstevel@tonic-gate return (tp); 8597c478bd9Sstevel@tonic-gate } 8607c478bd9Sstevel@tonic-gate 8617c478bd9Sstevel@tonic-gate /* 8627c478bd9Sstevel@tonic-gate * swtch() 8637c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 8647c478bd9Sstevel@tonic-gate * Called with the current thread already switched to a new state, 8657c478bd9Sstevel@tonic-gate * on a sleep queue, run queue, stopped, and not zombied. 8667c478bd9Sstevel@tonic-gate * May be called at any spl level less than or equal to LOCK_LEVEL. 8677c478bd9Sstevel@tonic-gate * Always drops spl to the base level (spl0()). 8687c478bd9Sstevel@tonic-gate */ 8697c478bd9Sstevel@tonic-gate void 8707c478bd9Sstevel@tonic-gate swtch() 8717c478bd9Sstevel@tonic-gate { 8727c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 8737c478bd9Sstevel@tonic-gate kthread_t *next; 8747c478bd9Sstevel@tonic-gate cpu_t *cp; 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 8777c478bd9Sstevel@tonic-gate 8787c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 8797c478bd9Sstevel@tonic-gate cpu_intr_swtch_enter(t); 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate if (t->t_intr != NULL) { 8827c478bd9Sstevel@tonic-gate /* 8837c478bd9Sstevel@tonic-gate * We are an interrupt thread. Setup and return 8847c478bd9Sstevel@tonic-gate * the interrupted thread to be resumed. 8857c478bd9Sstevel@tonic-gate */ 8867c478bd9Sstevel@tonic-gate (void) splhigh(); /* block other scheduler action */ 8877c478bd9Sstevel@tonic-gate cp = CPU; /* now protected against migration */ 8887c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 8897c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 8907c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, intrblk, 1); 8917c478bd9Sstevel@tonic-gate next = thread_unpin(); 8927c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 8937c478bd9Sstevel@tonic-gate resume_from_intr(next); 8947c478bd9Sstevel@tonic-gate } else { 8957c478bd9Sstevel@tonic-gate #ifdef DEBUG 8967c478bd9Sstevel@tonic-gate if (t->t_state == TS_ONPROC && 8977c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu == CPU && 8987c478bd9Sstevel@tonic-gate t->t_preempt == 0) { 8997c478bd9Sstevel@tonic-gate thread_lock(t); 9007c478bd9Sstevel@tonic-gate ASSERT(t->t_state != TS_ONPROC || 9017c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu != CPU || 9027c478bd9Sstevel@tonic-gate t->t_preempt != 0); /* cannot migrate */ 9037c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 9047c478bd9Sstevel@tonic-gate } 9057c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 9067c478bd9Sstevel@tonic-gate cp = CPU; 9077c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 9087c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 9097c478bd9Sstevel@tonic-gate 9107c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 9117c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate if (next != t) { 9140e751525SEric Saxe hrtime_t now; 9150e751525SEric Saxe 9160e751525SEric Saxe now = gethrtime_unscaled(); 9170e751525SEric Saxe pg_ev_thread_swtch(cp, now, t, next); 9187c478bd9Sstevel@tonic-gate 919f2bd4627Sjohansen /* 920f2bd4627Sjohansen * If t was previously in the TS_ONPROC state, 921f2bd4627Sjohansen * setfrontdq and setbackdq won't have set its t_waitrq. 922f2bd4627Sjohansen * Since we now finally know that we're switching away 923f2bd4627Sjohansen * from this thread, set its t_waitrq if it is on a run 924f2bd4627Sjohansen * queue. 925f2bd4627Sjohansen */ 926f2bd4627Sjohansen if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 9270e751525SEric Saxe t->t_waitrq = now; 928f2bd4627Sjohansen } 929f2bd4627Sjohansen 930f2bd4627Sjohansen /* 931f2bd4627Sjohansen * restore mstate of thread that we are switching to 932f2bd4627Sjohansen */ 933f2bd4627Sjohansen restore_mstate(next); 934f2bd4627Sjohansen 9357c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 936d3d50737SRafael Vanoni cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt(); 9377c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 9387c478bd9Sstevel@tonic-gate 9397c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 9407c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 9417c478bd9Sstevel@tonic-gate 9427c478bd9Sstevel@tonic-gate resume(next); 9437c478bd9Sstevel@tonic-gate /* 9447c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 9457c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 9467c478bd9Sstevel@tonic-gate * return here 9477c478bd9Sstevel@tonic-gate */ 9487c478bd9Sstevel@tonic-gate } else { 9497c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 9507c478bd9Sstevel@tonic-gate cpu_intr_swtch_exit(t); 9511dbbbf76SSudheer A /* 9521dbbbf76SSudheer A * Threads that enqueue themselves on a run queue defer 9531dbbbf76SSudheer A * setting t_waitrq. It is then either set in swtch() 9541dbbbf76SSudheer A * when the CPU is actually yielded, or not at all if it 9551dbbbf76SSudheer A * is remaining on the CPU. 9561dbbbf76SSudheer A * There is however a window between where the thread 9571dbbbf76SSudheer A * placed itself on a run queue, and where it selects 9581dbbbf76SSudheer A * itself in disp(), where a third party (eg. clock() 9591dbbbf76SSudheer A * doing tick processing) may have re-enqueued this 9601dbbbf76SSudheer A * thread, setting t_waitrq in the process. We detect 9611dbbbf76SSudheer A * this race by noticing that despite switching to 9621dbbbf76SSudheer A * ourself, our t_waitrq has been set, and should be 9631dbbbf76SSudheer A * cleared. 9641dbbbf76SSudheer A */ 9651dbbbf76SSudheer A if (t->t_waitrq != 0) 9661dbbbf76SSudheer A t->t_waitrq = 0; 9677c478bd9Sstevel@tonic-gate 9680e751525SEric Saxe pg_ev_thread_remain(cp, t); 9690e751525SEric Saxe 9707c478bd9Sstevel@tonic-gate DTRACE_SCHED(remain__cpu); 9717c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 9727c478bd9Sstevel@tonic-gate (void) spl0(); 9737c478bd9Sstevel@tonic-gate } 9747c478bd9Sstevel@tonic-gate } 9757c478bd9Sstevel@tonic-gate } 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate /* 9787c478bd9Sstevel@tonic-gate * swtch_from_zombie() 9797c478bd9Sstevel@tonic-gate * Special case of swtch(), which allows checks for TS_ZOMB to be 9807c478bd9Sstevel@tonic-gate * eliminated from normal resume. 9817c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 9827c478bd9Sstevel@tonic-gate * Called with the current thread zombied. 9837c478bd9Sstevel@tonic-gate * Zombies cannot migrate, so CPU references are safe. 9847c478bd9Sstevel@tonic-gate */ 9857c478bd9Sstevel@tonic-gate void 9867c478bd9Sstevel@tonic-gate swtch_from_zombie() 9877c478bd9Sstevel@tonic-gate { 9887c478bd9Sstevel@tonic-gate kthread_t *next; 9897c478bd9Sstevel@tonic-gate cpu_t *cpu = CPU; 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate ASSERT(curthread->t_state == TS_ZOMB); 9947c478bd9Sstevel@tonic-gate 9957c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 9967c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 9977c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 9987c478bd9Sstevel@tonic-gate ASSERT(next != curthread); 9997c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 10007c478bd9Sstevel@tonic-gate 10010e751525SEric Saxe pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next); 10027c478bd9Sstevel@tonic-gate 1003f2bd4627Sjohansen restore_mstate(next); 1004f2bd4627Sjohansen 10057c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 10067c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 10077c478bd9Sstevel@tonic-gate 10087c478bd9Sstevel@tonic-gate resume_from_zombie(next); 10097c478bd9Sstevel@tonic-gate /* 10107c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 10117c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we certainly will not 10127c478bd9Sstevel@tonic-gate * return here 10137c478bd9Sstevel@tonic-gate */ 10147c478bd9Sstevel@tonic-gate } 10157c478bd9Sstevel@tonic-gate 10167c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 10177c478bd9Sstevel@tonic-gate 1018057452c6Sjj209869 /* 1019057452c6Sjj209869 * search_disp_queues() 1020057452c6Sjj209869 * Search the given dispatch queues for thread tp. 1021057452c6Sjj209869 * Return 1 if tp is found, otherwise return 0. 1022057452c6Sjj209869 */ 1023057452c6Sjj209869 static int 1024057452c6Sjj209869 search_disp_queues(disp_t *dp, kthread_t *tp) 1025057452c6Sjj209869 { 10267c478bd9Sstevel@tonic-gate dispq_t *dq; 10277c478bd9Sstevel@tonic-gate dispq_t *eq; 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 1030057452c6Sjj209869 10317c478bd9Sstevel@tonic-gate for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 10327c478bd9Sstevel@tonic-gate kthread_t *rp; 10337c478bd9Sstevel@tonic-gate 1034057452c6Sjj209869 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1035057452c6Sjj209869 10367c478bd9Sstevel@tonic-gate for (rp = dq->dq_first; rp; rp = rp->t_link) 10377c478bd9Sstevel@tonic-gate if (tp == rp) { 10387c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 10397c478bd9Sstevel@tonic-gate return (1); 10407c478bd9Sstevel@tonic-gate } 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 1043057452c6Sjj209869 10447c478bd9Sstevel@tonic-gate return (0); 1045057452c6Sjj209869 } 1046057452c6Sjj209869 1047057452c6Sjj209869 /* 1048057452c6Sjj209869 * thread_on_queue() 1049057452c6Sjj209869 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1050057452c6Sjj209869 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1051057452c6Sjj209869 */ 1052057452c6Sjj209869 static int 1053057452c6Sjj209869 thread_on_queue(kthread_t *tp) 1054057452c6Sjj209869 { 1055057452c6Sjj209869 cpu_t *cp; 1056057452c6Sjj209869 struct cpupart *part; 1057057452c6Sjj209869 1058057452c6Sjj209869 ASSERT(getpil() >= DISP_LEVEL); 1059057452c6Sjj209869 1060057452c6Sjj209869 /* 1061057452c6Sjj209869 * Search the per-CPU dispatch queues for tp. 1062057452c6Sjj209869 */ 1063057452c6Sjj209869 cp = CPU; 1064057452c6Sjj209869 do { 1065057452c6Sjj209869 if (search_disp_queues(cp->cpu_disp, tp)) 1066057452c6Sjj209869 return (1); 1067057452c6Sjj209869 } while ((cp = cp->cpu_next_onln) != CPU); 1068057452c6Sjj209869 1069057452c6Sjj209869 /* 1070057452c6Sjj209869 * Search the partition-wide kpreempt queues for tp. 1071057452c6Sjj209869 */ 1072057452c6Sjj209869 part = CPU->cpu_part; 1073057452c6Sjj209869 do { 1074057452c6Sjj209869 if (search_disp_queues(&part->cp_kp_queue, tp)) 1075057452c6Sjj209869 return (1); 1076057452c6Sjj209869 } while ((part = part->cp_next) != CPU->cpu_part); 1077057452c6Sjj209869 1078057452c6Sjj209869 return (0); 1079057452c6Sjj209869 } 1080057452c6Sjj209869 10817c478bd9Sstevel@tonic-gate #else 10827c478bd9Sstevel@tonic-gate 10837c478bd9Sstevel@tonic-gate #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 10847c478bd9Sstevel@tonic-gate 10857c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate /* 10887c478bd9Sstevel@tonic-gate * like swtch(), but switch to a specified thread taken from another CPU. 10897c478bd9Sstevel@tonic-gate * called with spl high.. 10907c478bd9Sstevel@tonic-gate */ 10917c478bd9Sstevel@tonic-gate void 10927c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next) 10937c478bd9Sstevel@tonic-gate { 10947c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 10950e751525SEric Saxe hrtime_t now; 10967c478bd9Sstevel@tonic-gate 10977c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 10987c478bd9Sstevel@tonic-gate 10997c478bd9Sstevel@tonic-gate /* 11007c478bd9Sstevel@tonic-gate * Update context switch statistics. 11017c478bd9Sstevel@tonic-gate */ 11027c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 11037c478bd9Sstevel@tonic-gate 11047c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 11057c478bd9Sstevel@tonic-gate 11060e751525SEric Saxe now = gethrtime_unscaled(); 11070e751525SEric Saxe pg_ev_thread_swtch(cp, now, curthread, next); 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 11107c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 11117c478bd9Sstevel@tonic-gate 11127c478bd9Sstevel@tonic-gate /* record last execution time */ 1113d3d50737SRafael Vanoni cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt(); 11147c478bd9Sstevel@tonic-gate 1115f2bd4627Sjohansen /* 1116f2bd4627Sjohansen * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1117f2bd4627Sjohansen * won't have set its t_waitrq. Since we now finally know that we're 1118f2bd4627Sjohansen * switching away from this thread, set its t_waitrq if it is on a run 1119f2bd4627Sjohansen * queue. 1120f2bd4627Sjohansen */ 1121f2bd4627Sjohansen if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 11220e751525SEric Saxe curthread->t_waitrq = now; 1123f2bd4627Sjohansen } 1124f2bd4627Sjohansen 1125f2bd4627Sjohansen /* restore next thread to previously running microstate */ 1126f2bd4627Sjohansen restore_mstate(next); 1127f2bd4627Sjohansen 11287c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 11297c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 11307c478bd9Sstevel@tonic-gate 11317c478bd9Sstevel@tonic-gate resume(next); 11327c478bd9Sstevel@tonic-gate /* 11337c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 11347c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 11357c478bd9Sstevel@tonic-gate * return here 11367c478bd9Sstevel@tonic-gate */ 11377c478bd9Sstevel@tonic-gate } 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate static void 11407c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri) 11417c478bd9Sstevel@tonic-gate { 11427c478bd9Sstevel@tonic-gate int call_poke_cpu = 0; 11437c478bd9Sstevel@tonic-gate pri_t cpupri = cp->cpu_dispatch_pri; 11447c478bd9Sstevel@tonic-gate 1145455e370cSJohn Levon if (cpupri != CPU_IDLE_PRI && cpupri < tpri) { 11467c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 11477c478bd9Sstevel@tonic-gate "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 11487c478bd9Sstevel@tonic-gate if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 11497c478bd9Sstevel@tonic-gate cp->cpu_runrun = 1; 11507c478bd9Sstevel@tonic-gate aston(cp->cpu_dispthread); 11517c478bd9Sstevel@tonic-gate if (tpri < kpreemptpri && cp != CPU) 11527c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 11537c478bd9Sstevel@tonic-gate } 11547c478bd9Sstevel@tonic-gate if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 11557c478bd9Sstevel@tonic-gate cp->cpu_kprunrun = 1; 11567c478bd9Sstevel@tonic-gate if (cp != CPU) 11577c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 11587c478bd9Sstevel@tonic-gate } 11597c478bd9Sstevel@tonic-gate } 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate /* 11627c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 11637c478bd9Sstevel@tonic-gate */ 11647c478bd9Sstevel@tonic-gate membar_enter(); 11657c478bd9Sstevel@tonic-gate 11667c478bd9Sstevel@tonic-gate if (call_poke_cpu) 11677c478bd9Sstevel@tonic-gate poke_cpu(cp->cpu_id); 11687c478bd9Sstevel@tonic-gate } 11697c478bd9Sstevel@tonic-gate 11707c478bd9Sstevel@tonic-gate /* 11717c478bd9Sstevel@tonic-gate * setbackdq() keeps runqs balanced such that the difference in length 11727c478bd9Sstevel@tonic-gate * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 11737c478bd9Sstevel@tonic-gate * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 11747c478bd9Sstevel@tonic-gate * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 11757c478bd9Sstevel@tonic-gate * try to keep runqs perfectly balanced regardless of the thread priority. 11767c478bd9Sstevel@tonic-gate */ 11777c478bd9Sstevel@tonic-gate #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 11787c478bd9Sstevel@tonic-gate #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 11797c478bd9Sstevel@tonic-gate #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* 11826890d023SEric Saxe * Macro that evaluates to true if it is likely that the thread has cache 11836890d023SEric Saxe * warmth. This is based on the amount of time that has elapsed since the 11846890d023SEric Saxe * thread last ran. If that amount of time is less than "rechoose_interval" 11856890d023SEric Saxe * ticks, then we decide that the thread has enough cache warmth to warrant 11866890d023SEric Saxe * some affinity for t->t_cpu. 11876890d023SEric Saxe */ 11886890d023SEric Saxe #define THREAD_HAS_CACHE_WARMTH(thread) \ 11896890d023SEric Saxe ((thread == curthread) || \ 1190d3d50737SRafael Vanoni ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval)) 11916890d023SEric Saxe /* 11927c478bd9Sstevel@tonic-gate * Put the specified thread on the back of the dispatcher 11937c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 11947c478bd9Sstevel@tonic-gate * 11957c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 11967c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 11977c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 11987c478bd9Sstevel@tonic-gate */ 11997c478bd9Sstevel@tonic-gate void 12007c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp) 12017c478bd9Sstevel@tonic-gate { 12027c478bd9Sstevel@tonic-gate dispq_t *dq; 12037c478bd9Sstevel@tonic-gate disp_t *dp; 12047c478bd9Sstevel@tonic-gate cpu_t *cp; 12057c478bd9Sstevel@tonic-gate pri_t tpri; 12067c478bd9Sstevel@tonic-gate int bound; 12076890d023SEric Saxe boolean_t self; 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 12107c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 12117c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate /* 12147c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 12157c478bd9Sstevel@tonic-gate * queue it, but wake sched. 12167c478bd9Sstevel@tonic-gate */ 12177c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 12187c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 12197c478bd9Sstevel@tonic-gate return; 12207c478bd9Sstevel@tonic-gate } 12217c478bd9Sstevel@tonic-gate 12226890d023SEric Saxe self = (tp == curthread); 12236890d023SEric Saxe 1224abd41583Sgd209917 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1225abd41583Sgd209917 bound = 1; 1226abd41583Sgd209917 else 1227abd41583Sgd209917 bound = 0; 1228abd41583Sgd209917 12297c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 12307c478bd9Sstevel@tonic-gate if (ncpus == 1) 12317c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1232abd41583Sgd209917 else if (!bound) { 12337c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 12347c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_BACK); 12357c478bd9Sstevel@tonic-gate return; 12367c478bd9Sstevel@tonic-gate } 12376890d023SEric Saxe 12387c478bd9Sstevel@tonic-gate /* 12396890d023SEric Saxe * We'll generally let this thread continue to run where 12406890d023SEric Saxe * it last ran...but will consider migration if: 1241455e370cSJohn Levon * - The thread probably doesn't have much cache warmth. 1242c3377ee9SJohn Levon * - SMT exclusion would prefer us to run elsewhere 12436890d023SEric Saxe * - The CPU where it last ran is the target of an offline 12446890d023SEric Saxe * request. 1245455e370cSJohn Levon * - The thread last ran outside its home lgroup. 12467c478bd9Sstevel@tonic-gate */ 12476890d023SEric Saxe if ((!THREAD_HAS_CACHE_WARMTH(tp)) || 1248c3377ee9SJohn Levon !smt_should_run(tp, tp->t_cpu) || 1249455e370cSJohn Levon (tp->t_cpu == cpu_inmotion) || 1250455e370cSJohn Levon !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { 1251455e370cSJohn Levon cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 12526890d023SEric Saxe } else { 12536890d023SEric Saxe cp = tp->t_cpu; 12546890d023SEric Saxe } 12557c478bd9Sstevel@tonic-gate 12567c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 12577c478bd9Sstevel@tonic-gate int qlen; 12587c478bd9Sstevel@tonic-gate 12597c478bd9Sstevel@tonic-gate /* 1260fb2f18f8Sesaxe * Perform any CMT load balancing 12617c478bd9Sstevel@tonic-gate */ 1262fb2f18f8Sesaxe cp = cmt_balance(tp, cp); 12637c478bd9Sstevel@tonic-gate 12647c478bd9Sstevel@tonic-gate /* 12657c478bd9Sstevel@tonic-gate * Balance across the run queues 12667c478bd9Sstevel@tonic-gate */ 12677c478bd9Sstevel@tonic-gate qlen = RUNQ_LEN(cp, tpri); 12687c478bd9Sstevel@tonic-gate if (tpri >= RUNQ_MATCH_PRI && 12697c478bd9Sstevel@tonic-gate !(tp->t_schedflag & TS_RUNQMATCH)) 12707c478bd9Sstevel@tonic-gate qlen -= RUNQ_MAX_DIFF; 12717c478bd9Sstevel@tonic-gate if (qlen > 0) { 1272685679f7Sakolb cpu_t *newcp; 12737c478bd9Sstevel@tonic-gate 1274685679f7Sakolb if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1275685679f7Sakolb newcp = cp->cpu_next_part; 1276685679f7Sakolb } else if ((newcp = cp->cpu_next_lpl) == cp) { 1277685679f7Sakolb newcp = cp->cpu_next_part; 12787c478bd9Sstevel@tonic-gate } 1279685679f7Sakolb 1280c3377ee9SJohn Levon if (smt_should_run(tp, newcp) && 1281455e370cSJohn Levon RUNQ_LEN(newcp, tpri) < qlen) { 1282685679f7Sakolb DTRACE_PROBE3(runq__balance, 1283685679f7Sakolb kthread_t *, tp, 1284685679f7Sakolb cpu_t *, cp, cpu_t *, newcp); 1285685679f7Sakolb cp = newcp; 1286685679f7Sakolb } 12877c478bd9Sstevel@tonic-gate } 12887c478bd9Sstevel@tonic-gate } else { 12897c478bd9Sstevel@tonic-gate /* 12907c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 12917c478bd9Sstevel@tonic-gate */ 1292455e370cSJohn Levon cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp, 1293455e370cSJohn Levon tp->t_pri); 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 12967c478bd9Sstevel@tonic-gate } else { 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 12997c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 13007c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 13017c478bd9Sstevel@tonic-gate * favour weak binding over strong. 13027c478bd9Sstevel@tonic-gate */ 13037c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 13047c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 13057c478bd9Sstevel@tonic-gate } 1306f2bd4627Sjohansen /* 1307f2bd4627Sjohansen * A thread that is ONPROC may be temporarily placed on the run queue 1308f2bd4627Sjohansen * but then chosen to run again by disp. If the thread we're placing on 1309f2bd4627Sjohansen * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1310f2bd4627Sjohansen * replacement process is actually scheduled in swtch(). In this 1311f2bd4627Sjohansen * situation, curthread is the only thread that could be in the ONPROC 1312f2bd4627Sjohansen * state. 1313f2bd4627Sjohansen */ 13146890d023SEric Saxe if ((!self) && (tp->t_waitrq == 0)) { 1315f2bd4627Sjohansen hrtime_t curtime; 1316f2bd4627Sjohansen 1317f2bd4627Sjohansen curtime = gethrtime_unscaled(); 1318f2bd4627Sjohansen (void) cpu_update_pct(tp, curtime); 1319f2bd4627Sjohansen tp->t_waitrq = curtime; 1320f2bd4627Sjohansen } else { 1321f2bd4627Sjohansen (void) cpu_update_pct(tp, gethrtime_unscaled()); 1322f2bd4627Sjohansen } 1323f2bd4627Sjohansen 13247c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 13257c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 13267c478bd9Sstevel@tonic-gate 13277c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 13287c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 13297c478bd9Sstevel@tonic-gate tpri, cp, tp); 13307c478bd9Sstevel@tonic-gate 13317c478bd9Sstevel@tonic-gate #ifndef NPROBE 13327c478bd9Sstevel@tonic-gate /* Kernel probe */ 13337c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 13347c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 13357c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 13367c478bd9Sstevel@tonic-gate 13377c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 13387c478bd9Sstevel@tonic-gate 13397c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 13407c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 13417c478bd9Sstevel@tonic-gate tp->t_link = NULL; 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 13447c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 1345685679f7Sakolb if (!bound) 1346685679f7Sakolb dp->disp_steal = 0; 13477c478bd9Sstevel@tonic-gate membar_enter(); 13487c478bd9Sstevel@tonic-gate 13497c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 13507c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 13517c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 13527c478bd9Sstevel@tonic-gate dq->dq_last = tp; 13537c478bd9Sstevel@tonic-gate } else { 13547c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 13557c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 13567c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 13577c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 13587c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 13597c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 13607c478bd9Sstevel@tonic-gate membar_enter(); 13617c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 13627c478bd9Sstevel@tonic-gate } 13637c478bd9Sstevel@tonic-gate } 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 13666890d023SEric Saxe if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) { 13677c478bd9Sstevel@tonic-gate /* 13687c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 13697c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 13707c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 13717c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 13727c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 13737c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 13747c478bd9Sstevel@tonic-gate */ 13757c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 13767c478bd9Sstevel@tonic-gate } 13777c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 13787c478bd9Sstevel@tonic-gate } 13797c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate /* 13837c478bd9Sstevel@tonic-gate * Put the specified thread on the front of the dispatcher 13847c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 13857c478bd9Sstevel@tonic-gate * 13867c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 13877c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 13887c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 13897c478bd9Sstevel@tonic-gate */ 13907c478bd9Sstevel@tonic-gate void 13917c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp) 13927c478bd9Sstevel@tonic-gate { 13937c478bd9Sstevel@tonic-gate disp_t *dp; 13947c478bd9Sstevel@tonic-gate dispq_t *dq; 13957c478bd9Sstevel@tonic-gate cpu_t *cp; 13967c478bd9Sstevel@tonic-gate pri_t tpri; 13977c478bd9Sstevel@tonic-gate int bound; 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 14007c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 14017c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 14057c478bd9Sstevel@tonic-gate * queue it, but wake sched. 14067c478bd9Sstevel@tonic-gate */ 14077c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 14087c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 14097c478bd9Sstevel@tonic-gate return; 14107c478bd9Sstevel@tonic-gate } 14117c478bd9Sstevel@tonic-gate 1412abd41583Sgd209917 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1413abd41583Sgd209917 bound = 1; 1414abd41583Sgd209917 else 1415abd41583Sgd209917 bound = 0; 1416abd41583Sgd209917 14177c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 14187c478bd9Sstevel@tonic-gate if (ncpus == 1) 14197c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1420abd41583Sgd209917 else if (!bound) { 14217c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 14227c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_FRONT); 14237c478bd9Sstevel@tonic-gate return; 14247c478bd9Sstevel@tonic-gate } 14257c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 14267c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 14277c478bd9Sstevel@tonic-gate /* 14286890d023SEric Saxe * We'll generally let this thread continue to run 14296890d023SEric Saxe * where it last ran, but will consider migration if: 1430455e370cSJohn Levon * - The thread last ran outside its home lgroup. 14316890d023SEric Saxe * - The CPU where it last ran is the target of an 14326890d023SEric Saxe * offline request (a thread_nomigrate() on the in 14336890d023SEric Saxe * motion CPU relies on this when forcing a preempt). 14346890d023SEric Saxe * - The thread isn't the highest priority thread where 14356890d023SEric Saxe * it last ran, and it is considered not likely to 14366890d023SEric Saxe * have significant cache warmth. 14377c478bd9Sstevel@tonic-gate */ 1438455e370cSJohn Levon if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) || 1439455e370cSJohn Levon cp == cpu_inmotion || 1440455e370cSJohn Levon (tpri < cp->cpu_disp->disp_maxrunpri && 1441455e370cSJohn Levon !THREAD_HAS_CACHE_WARMTH(tp))) { 1442455e370cSJohn Levon cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 14436890d023SEric Saxe } 14447c478bd9Sstevel@tonic-gate } else { 14457c478bd9Sstevel@tonic-gate /* 14467c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 14477c478bd9Sstevel@tonic-gate */ 14487c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1449455e370cSJohn Levon tp, tp->t_pri); 14507c478bd9Sstevel@tonic-gate } 14517c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 14527c478bd9Sstevel@tonic-gate } else { 14537c478bd9Sstevel@tonic-gate /* 14547c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 14557c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 14567c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 14577c478bd9Sstevel@tonic-gate * favour weak binding over strong. 14587c478bd9Sstevel@tonic-gate */ 14597c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 14607c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 14617c478bd9Sstevel@tonic-gate } 1462f2bd4627Sjohansen 1463f2bd4627Sjohansen /* 1464f2bd4627Sjohansen * A thread that is ONPROC may be temporarily placed on the run queue 1465f2bd4627Sjohansen * but then chosen to run again by disp. If the thread we're placing on 1466f2bd4627Sjohansen * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1467f2bd4627Sjohansen * replacement process is actually scheduled in swtch(). In this 1468f2bd4627Sjohansen * situation, curthread is the only thread that could be in the ONPROC 1469f2bd4627Sjohansen * state. 1470f2bd4627Sjohansen */ 1471f2bd4627Sjohansen if ((tp != curthread) && (tp->t_waitrq == 0)) { 1472f2bd4627Sjohansen hrtime_t curtime; 1473f2bd4627Sjohansen 1474f2bd4627Sjohansen curtime = gethrtime_unscaled(); 1475f2bd4627Sjohansen (void) cpu_update_pct(tp, curtime); 1476f2bd4627Sjohansen tp->t_waitrq = curtime; 1477f2bd4627Sjohansen } else { 1478f2bd4627Sjohansen (void) cpu_update_pct(tp, gethrtime_unscaled()); 1479f2bd4627Sjohansen } 1480f2bd4627Sjohansen 14817c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 14827c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 14837c478bd9Sstevel@tonic-gate 14847c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 14857c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 14867c478bd9Sstevel@tonic-gate 14877c478bd9Sstevel@tonic-gate #ifndef NPROBE 14887c478bd9Sstevel@tonic-gate /* Kernel probe */ 14897c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 14907c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 14917c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 14967c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 14997c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 1500685679f7Sakolb if (!bound) 1501685679f7Sakolb dp->disp_steal = 0; 15027c478bd9Sstevel@tonic-gate membar_enter(); 15037c478bd9Sstevel@tonic-gate 15047c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 15057c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 15067c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 15077c478bd9Sstevel@tonic-gate dq->dq_first = tp; 15087c478bd9Sstevel@tonic-gate } else { 15097c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 15107c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 15117c478bd9Sstevel@tonic-gate tp->t_link = NULL; 15127c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 15137c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 15147c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 15157c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 15167c478bd9Sstevel@tonic-gate membar_enter(); 15177c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 15187c478bd9Sstevel@tonic-gate } 15197c478bd9Sstevel@tonic-gate } 15207c478bd9Sstevel@tonic-gate 15217c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 15227c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 && 15237c478bd9Sstevel@tonic-gate cp == CPU) { 15247c478bd9Sstevel@tonic-gate /* 15257c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 15267c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 15277c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 15287c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 15297c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 15307c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 15317c478bd9Sstevel@tonic-gate */ 15327c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 15337c478bd9Sstevel@tonic-gate } 15347c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate /* 15407c478bd9Sstevel@tonic-gate * Put a high-priority unbound thread on the kp queue 15417c478bd9Sstevel@tonic-gate */ 15427c478bd9Sstevel@tonic-gate static void 15437c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf) 15447c478bd9Sstevel@tonic-gate { 15457c478bd9Sstevel@tonic-gate dispq_t *dq; 15467c478bd9Sstevel@tonic-gate disp_t *dp; 15477c478bd9Sstevel@tonic-gate cpu_t *cp; 15487c478bd9Sstevel@tonic-gate pri_t tpri; 15497c478bd9Sstevel@tonic-gate 15507c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 15517c478bd9Sstevel@tonic-gate 15527c478bd9Sstevel@tonic-gate dp = &tp->t_cpupart->cp_kp_queue; 15537c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 15567c478bd9Sstevel@tonic-gate 15577c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 15587c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 15597c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 15607c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 15617c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 15627c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 15637c478bd9Sstevel@tonic-gate 15647c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 15657c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 15667c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 15677c478bd9Sstevel@tonic-gate tp->t_link = NULL; 15687c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 15697c478bd9Sstevel@tonic-gate dq->dq_last = tp; 15707c478bd9Sstevel@tonic-gate } else { 15717c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 15727c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 15737c478bd9Sstevel@tonic-gate dq->dq_first = tp; 15747c478bd9Sstevel@tonic-gate } 15757c478bd9Sstevel@tonic-gate } else { 15767c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 15777c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 15787c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 15797c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 15807c478bd9Sstevel@tonic-gate } else { 15817c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 15827c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 15837c478bd9Sstevel@tonic-gate tp->t_link = NULL; 15847c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 15857c478bd9Sstevel@tonic-gate } 15867c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 15877c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 15887c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 15897c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 15907c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 15917c478bd9Sstevel@tonic-gate membar_enter(); 15927c478bd9Sstevel@tonic-gate } 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 15967c478bd9Sstevel@tonic-gate if (tp->t_cpupart != cp->cpu_part) { 15977c478bd9Sstevel@tonic-gate /* migrate to a cpu in the new partition */ 15987c478bd9Sstevel@tonic-gate cp = tp->t_cpupart->cp_cpulist; 15997c478bd9Sstevel@tonic-gate } 1600455e370cSJohn Levon cp = disp_lowpri_cpu(cp, tp, tp->t_pri); 16017c478bd9Sstevel@tonic-gate disp_lock_enter_high(&cp->cpu_disp->disp_lock); 16027c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 16037c478bd9Sstevel@tonic-gate 16047c478bd9Sstevel@tonic-gate #ifndef NPROBE 16057c478bd9Sstevel@tonic-gate /* Kernel probe */ 16067c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 16077c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 16087c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 16097c478bd9Sstevel@tonic-gate 16107c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level < tpri) 16117c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = tpri; 16127c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 16137c478bd9Sstevel@tonic-gate disp_lock_exit_high(&cp->cpu_disp->disp_lock); 16147c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, 0); 16157c478bd9Sstevel@tonic-gate } 16167c478bd9Sstevel@tonic-gate 16177c478bd9Sstevel@tonic-gate /* 16187c478bd9Sstevel@tonic-gate * Remove a thread from the dispatcher queue if it is on it. 16197c478bd9Sstevel@tonic-gate * It is not an error if it is not found but we return whether 16207c478bd9Sstevel@tonic-gate * or not it was found in case the caller wants to check. 16217c478bd9Sstevel@tonic-gate */ 16227c478bd9Sstevel@tonic-gate int 16237c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp) 16247c478bd9Sstevel@tonic-gate { 16257c478bd9Sstevel@tonic-gate disp_t *dp; 16267c478bd9Sstevel@tonic-gate dispq_t *dq; 16277c478bd9Sstevel@tonic-gate kthread_t *rp; 16287c478bd9Sstevel@tonic-gate kthread_t *trp; 16297c478bd9Sstevel@tonic-gate kthread_t **ptp; 16307c478bd9Sstevel@tonic-gate int tpri; 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 16337c478bd9Sstevel@tonic-gate 16347c478bd9Sstevel@tonic-gate if (tp->t_state != TS_RUN) 16357c478bd9Sstevel@tonic-gate return (0); 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* 16387c478bd9Sstevel@tonic-gate * The thread is "swapped" or is on the swap queue and 16397c478bd9Sstevel@tonic-gate * hence no longer on the run queue, so return true. 16407c478bd9Sstevel@tonic-gate */ 16417c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 16427c478bd9Sstevel@tonic-gate return (1); 16437c478bd9Sstevel@tonic-gate 16447c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 16457c478bd9Sstevel@tonic-gate dp = tp->t_disp_queue; 16467c478bd9Sstevel@tonic-gate ASSERT(tpri < dp->disp_npri); 16477c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 16487c478bd9Sstevel@tonic-gate ptp = &dq->dq_first; 16497c478bd9Sstevel@tonic-gate rp = *ptp; 16507c478bd9Sstevel@tonic-gate trp = NULL; 16517c478bd9Sstevel@tonic-gate 16527c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 16537c478bd9Sstevel@tonic-gate 16547c478bd9Sstevel@tonic-gate /* 16557c478bd9Sstevel@tonic-gate * Search for thread in queue. 16567c478bd9Sstevel@tonic-gate * Double links would simplify this at the expense of disp/setrun. 16577c478bd9Sstevel@tonic-gate */ 16587c478bd9Sstevel@tonic-gate while (rp != tp && rp != NULL) { 16597c478bd9Sstevel@tonic-gate trp = rp; 16607c478bd9Sstevel@tonic-gate ptp = &trp->t_link; 16617c478bd9Sstevel@tonic-gate rp = trp->t_link; 16627c478bd9Sstevel@tonic-gate } 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate if (rp == NULL) { 16657c478bd9Sstevel@tonic-gate panic("dispdeq: thread not on queue"); 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 16697c478bd9Sstevel@tonic-gate 16707c478bd9Sstevel@tonic-gate /* 16717c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 16727c478bd9Sstevel@tonic-gate */ 16737c478bd9Sstevel@tonic-gate if ((*ptp = rp->t_link) == NULL) 16747c478bd9Sstevel@tonic-gate dq->dq_last = trp; 16757c478bd9Sstevel@tonic-gate 16767c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 16777c478bd9Sstevel@tonic-gate if (--dq->dq_sruncnt == 0) { 16787c478bd9Sstevel@tonic-gate dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 16797c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 16807c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 16817c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 16827c478bd9Sstevel@tonic-gate } else if (tpri == dp->disp_maxrunpri) { 16837c478bd9Sstevel@tonic-gate int ipri; 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dp->disp_qactmap, 16867c478bd9Sstevel@tonic-gate dp->disp_maxrunpri >> BT_ULSHIFT); 16877c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 16887c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 16897c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 16907c478bd9Sstevel@tonic-gate } 16917c478bd9Sstevel@tonic-gate } 16927c478bd9Sstevel@tonic-gate tp->t_link = NULL; 16937c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); /* put in intermediate state */ 16947c478bd9Sstevel@tonic-gate return (1); 16957c478bd9Sstevel@tonic-gate } 16967c478bd9Sstevel@tonic-gate 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate /* 16997c478bd9Sstevel@tonic-gate * dq_sruninc and dq_srundec are public functions for 17007c478bd9Sstevel@tonic-gate * incrementing/decrementing the sruncnts when a thread on 17017c478bd9Sstevel@tonic-gate * a dispatcher queue is made schedulable/unschedulable by 17027c478bd9Sstevel@tonic-gate * resetting the TS_LOAD flag. 17037c478bd9Sstevel@tonic-gate * 17047c478bd9Sstevel@tonic-gate * The caller MUST have the thread lock and therefore the dispatcher 17057c478bd9Sstevel@tonic-gate * queue lock so that the operation which changes 17067c478bd9Sstevel@tonic-gate * the flag, the operation that checks the status of the thread to 17077c478bd9Sstevel@tonic-gate * determine if it's on a disp queue AND the call to this function 17087c478bd9Sstevel@tonic-gate * are one atomic operation with respect to interrupts. 17097c478bd9Sstevel@tonic-gate */ 17107c478bd9Sstevel@tonic-gate 17117c478bd9Sstevel@tonic-gate /* 17127c478bd9Sstevel@tonic-gate * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 17137c478bd9Sstevel@tonic-gate */ 17147c478bd9Sstevel@tonic-gate void 17157c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t) 17167c478bd9Sstevel@tonic-gate { 17177c478bd9Sstevel@tonic-gate ASSERT(t->t_state == TS_RUN); 17187c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 17197c478bd9Sstevel@tonic-gate 17207c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 17217c478bd9Sstevel@tonic-gate setfrontdq(t); 17227c478bd9Sstevel@tonic-gate } 17237c478bd9Sstevel@tonic-gate 17247c478bd9Sstevel@tonic-gate /* 17257c478bd9Sstevel@tonic-gate * See comment on calling conventions above. 17267c478bd9Sstevel@tonic-gate * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 17277c478bd9Sstevel@tonic-gate */ 17287c478bd9Sstevel@tonic-gate void 17297c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t) 17307c478bd9Sstevel@tonic-gate { 17317c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 17327c478bd9Sstevel@tonic-gate 17337c478bd9Sstevel@tonic-gate (void) dispdeq(t); 17347c478bd9Sstevel@tonic-gate disp_swapped_enq(t); 17357c478bd9Sstevel@tonic-gate } 17367c478bd9Sstevel@tonic-gate 17377c478bd9Sstevel@tonic-gate /* 17387c478bd9Sstevel@tonic-gate * Change the dispatcher lock of thread to the "swapped_lock" 17397c478bd9Sstevel@tonic-gate * and return with thread lock still held. 17407c478bd9Sstevel@tonic-gate * 17417c478bd9Sstevel@tonic-gate * Called with thread_lock held, in transition state, and at high spl. 17427c478bd9Sstevel@tonic-gate */ 17437c478bd9Sstevel@tonic-gate void 17447c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp) 17457c478bd9Sstevel@tonic-gate { 17467c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 17477c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); 17487c478bd9Sstevel@tonic-gate 17497c478bd9Sstevel@tonic-gate switch (tp->t_state) { 17507c478bd9Sstevel@tonic-gate case TS_RUN: 17517c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 17527c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 17537c478bd9Sstevel@tonic-gate break; 17547c478bd9Sstevel@tonic-gate case TS_ONPROC: 17557c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 17567c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 17577c478bd9Sstevel@tonic-gate wake_sched_sec = 1; /* tell clock to wake sched */ 17587c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 17597c478bd9Sstevel@tonic-gate break; 17607c478bd9Sstevel@tonic-gate default: 17617c478bd9Sstevel@tonic-gate panic("disp_swapped: tp: %p bad t_state", (void *)tp); 17627c478bd9Sstevel@tonic-gate } 17637c478bd9Sstevel@tonic-gate } 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate /* 17667c478bd9Sstevel@tonic-gate * This routine is called by setbackdq/setfrontdq if the thread is 17677c478bd9Sstevel@tonic-gate * not loaded or loaded and on the swap queue. 17687c478bd9Sstevel@tonic-gate * 17697c478bd9Sstevel@tonic-gate * Thread state TS_SLEEP implies that a swapped thread 17707c478bd9Sstevel@tonic-gate * has been woken up and needs to be swapped in by the swapper. 17717c478bd9Sstevel@tonic-gate * 17727c478bd9Sstevel@tonic-gate * Thread state TS_RUN, it implies that the priority of a swapped 17737c478bd9Sstevel@tonic-gate * thread is being increased by scheduling class (e.g. ts_update). 17747c478bd9Sstevel@tonic-gate */ 17757c478bd9Sstevel@tonic-gate static void 17767c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp) 17777c478bd9Sstevel@tonic-gate { 17787c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 17797c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 17807c478bd9Sstevel@tonic-gate 17817c478bd9Sstevel@tonic-gate switch (tp->t_state) { 17827c478bd9Sstevel@tonic-gate case TS_SLEEP: 17837c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 17847c478bd9Sstevel@tonic-gate /* 17857c478bd9Sstevel@tonic-gate * Wakeup sched immediately (i.e., next tick) if the 17867c478bd9Sstevel@tonic-gate * thread priority is above maxclsyspri. 17877c478bd9Sstevel@tonic-gate */ 17887c478bd9Sstevel@tonic-gate if (DISP_PRIO(tp) > maxclsyspri) 17897c478bd9Sstevel@tonic-gate wake_sched = 1; 17907c478bd9Sstevel@tonic-gate else 17917c478bd9Sstevel@tonic-gate wake_sched_sec = 1; 17927c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 17937c478bd9Sstevel@tonic-gate break; 17947c478bd9Sstevel@tonic-gate case TS_RUN: /* called from ts_update */ 17957c478bd9Sstevel@tonic-gate break; 17967c478bd9Sstevel@tonic-gate default: 17978793b36bSNick Todd panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 17987c478bd9Sstevel@tonic-gate } 17997c478bd9Sstevel@tonic-gate } 18007c478bd9Sstevel@tonic-gate 18017c478bd9Sstevel@tonic-gate /* 18027c478bd9Sstevel@tonic-gate * Make a thread give up its processor. Find the processor on 18037c478bd9Sstevel@tonic-gate * which this thread is executing, and have that processor 18047c478bd9Sstevel@tonic-gate * preempt. 180535a5a358SJonathan Adams * 180635a5a358SJonathan Adams * We allow System Duty Cycle (SDC) threads to be preempted even if 180735a5a358SJonathan Adams * they are running at kernel priorities. To implement this, we always 180835a5a358SJonathan Adams * set cpu_kprunrun; this ensures preempt() will be called. Since SDC 180935a5a358SJonathan Adams * calls cpu_surrender() very often, we only preempt if there is anyone 181035a5a358SJonathan Adams * competing with us. 18117c478bd9Sstevel@tonic-gate */ 18127c478bd9Sstevel@tonic-gate void 18137c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp) 18147c478bd9Sstevel@tonic-gate { 18157c478bd9Sstevel@tonic-gate cpu_t *cpup; 18167c478bd9Sstevel@tonic-gate int max_pri; 18177c478bd9Sstevel@tonic-gate int max_run_pri; 18187c478bd9Sstevel@tonic-gate klwp_t *lwp; 18197c478bd9Sstevel@tonic-gate 18207c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 18217c478bd9Sstevel@tonic-gate 18227c478bd9Sstevel@tonic-gate if (tp->t_state != TS_ONPROC) 18237c478bd9Sstevel@tonic-gate return; 18247c478bd9Sstevel@tonic-gate cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 18257c478bd9Sstevel@tonic-gate max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 18267c478bd9Sstevel@tonic-gate max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 18277c478bd9Sstevel@tonic-gate if (max_pri < max_run_pri) 18287c478bd9Sstevel@tonic-gate max_pri = max_run_pri; 18297c478bd9Sstevel@tonic-gate 183035a5a358SJonathan Adams if (tp->t_cid == sysdccid) { 183135a5a358SJonathan Adams uint_t t_pri = DISP_PRIO(tp); 183235a5a358SJonathan Adams if (t_pri > max_pri) 183335a5a358SJonathan Adams return; /* we are not competing w/ anyone */ 183435a5a358SJonathan Adams cpup->cpu_runrun = cpup->cpu_kprunrun = 1; 183535a5a358SJonathan Adams } else { 18367c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 1; 18377c478bd9Sstevel@tonic-gate if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 18387c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 1; 18397c478bd9Sstevel@tonic-gate } 184035a5a358SJonathan Adams } 18417c478bd9Sstevel@tonic-gate 18427c478bd9Sstevel@tonic-gate /* 18437c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 18447c478bd9Sstevel@tonic-gate */ 18457c478bd9Sstevel@tonic-gate membar_enter(); 18467c478bd9Sstevel@tonic-gate 18477c478bd9Sstevel@tonic-gate DTRACE_SCHED1(surrender, kthread_t *, tp); 18487c478bd9Sstevel@tonic-gate 18497c478bd9Sstevel@tonic-gate /* 18507c478bd9Sstevel@tonic-gate * Make the target thread take an excursion through trap() 18517c478bd9Sstevel@tonic-gate * to do preempt() (unless we're already in trap or post_syscall, 18527c478bd9Sstevel@tonic-gate * calling cpu_surrender via CL_TRAPRET). 18537c478bd9Sstevel@tonic-gate */ 18547c478bd9Sstevel@tonic-gate if (tp != curthread || (lwp = tp->t_lwp) == NULL || 18557c478bd9Sstevel@tonic-gate lwp->lwp_state != LWP_USER) { 18567c478bd9Sstevel@tonic-gate aston(tp); 18577c478bd9Sstevel@tonic-gate if (cpup != CPU) 18587c478bd9Sstevel@tonic-gate poke_cpu(cpup->cpu_id); 18597c478bd9Sstevel@tonic-gate } 18607c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 18617c478bd9Sstevel@tonic-gate "cpu_surrender:tid %p cpu %p", tp, cpup); 18627c478bd9Sstevel@tonic-gate } 18637c478bd9Sstevel@tonic-gate 18647c478bd9Sstevel@tonic-gate /* 18657c478bd9Sstevel@tonic-gate * Commit to and ratify a scheduling decision 18667c478bd9Sstevel@tonic-gate */ 18677c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 18687c478bd9Sstevel@tonic-gate static kthread_t * 18697c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq) 18707c478bd9Sstevel@tonic-gate { 18717c478bd9Sstevel@tonic-gate pri_t tpri, maxpri; 18727c478bd9Sstevel@tonic-gate pri_t maxkpri; 18737c478bd9Sstevel@tonic-gate cpu_t *cpup; 18747c478bd9Sstevel@tonic-gate 18757c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 18767c478bd9Sstevel@tonic-gate /* 18777c478bd9Sstevel@tonic-gate * Commit to, then ratify scheduling decision 18787c478bd9Sstevel@tonic-gate */ 18797c478bd9Sstevel@tonic-gate cpup = CPU; 18807c478bd9Sstevel@tonic-gate if (cpup->cpu_runrun != 0) 18817c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 0; 18827c478bd9Sstevel@tonic-gate if (cpup->cpu_kprunrun != 0) 18837c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 0; 18847c478bd9Sstevel@tonic-gate if (cpup->cpu_chosen_level != -1) 18857c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 18867c478bd9Sstevel@tonic-gate membar_enter(); 18877c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 18887c478bd9Sstevel@tonic-gate maxpri = cpup->cpu_disp->disp_maxrunpri; 18897c478bd9Sstevel@tonic-gate maxkpri = kpq->disp_maxrunpri; 18907c478bd9Sstevel@tonic-gate if (maxpri < maxkpri) 18917c478bd9Sstevel@tonic-gate maxpri = maxkpri; 18927c478bd9Sstevel@tonic-gate if (tpri < maxpri) { 18937c478bd9Sstevel@tonic-gate /* 18947c478bd9Sstevel@tonic-gate * should have done better 18957c478bd9Sstevel@tonic-gate * put this one back and indicate to try again 18967c478bd9Sstevel@tonic-gate */ 18977c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = curthread; /* fixup dispthread */ 18987c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 18997c478bd9Sstevel@tonic-gate thread_lock_high(tp); 19007c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 19017c478bd9Sstevel@tonic-gate setfrontdq(tp); 19027c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp); 19037c478bd9Sstevel@tonic-gate 19047c478bd9Sstevel@tonic-gate tp = NULL; 19057c478bd9Sstevel@tonic-gate } 19067c478bd9Sstevel@tonic-gate return (tp); 19077c478bd9Sstevel@tonic-gate } 19087c478bd9Sstevel@tonic-gate 19097c478bd9Sstevel@tonic-gate /* 19107c478bd9Sstevel@tonic-gate * See if there is any work on the dispatcher queue for other CPUs. 19117c478bd9Sstevel@tonic-gate * If there is, dequeue the best thread and return. 19127c478bd9Sstevel@tonic-gate */ 19137c478bd9Sstevel@tonic-gate static kthread_t * 19147c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp) 19157c478bd9Sstevel@tonic-gate { 19167c478bd9Sstevel@tonic-gate cpu_t *ocp; /* other CPU */ 19177c478bd9Sstevel@tonic-gate cpu_t *ocp_start; 19187c478bd9Sstevel@tonic-gate cpu_t *tcp; /* target local CPU */ 19197c478bd9Sstevel@tonic-gate kthread_t *tp; 1920685679f7Sakolb kthread_t *retval = NULL; 19217c478bd9Sstevel@tonic-gate pri_t maxpri; 19227c478bd9Sstevel@tonic-gate disp_t *kpq; /* kp queue for this partition */ 19237c478bd9Sstevel@tonic-gate lpl_t *lpl, *lpl_leaf; 19246890d023SEric Saxe int leafidx, startidx; 1925685679f7Sakolb hrtime_t stealtime; 19266890d023SEric Saxe lgrp_id_t local_id; 19277c478bd9Sstevel@tonic-gate 19287c478bd9Sstevel@tonic-gate maxpri = -1; 19297c478bd9Sstevel@tonic-gate tcp = NULL; 19307c478bd9Sstevel@tonic-gate 19317c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 19327c478bd9Sstevel@tonic-gate while (kpq->disp_maxrunpri >= 0) { 19337c478bd9Sstevel@tonic-gate /* 19347c478bd9Sstevel@tonic-gate * Try to take a thread from the kp_queue. 19357c478bd9Sstevel@tonic-gate */ 19367c478bd9Sstevel@tonic-gate tp = (disp_getbest(kpq)); 19377c478bd9Sstevel@tonic-gate if (tp) 19387c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 19397c478bd9Sstevel@tonic-gate } 19407c478bd9Sstevel@tonic-gate 1941ab761399Sesaxe kpreempt_disable(); /* protect the cpu_active list */ 19427c478bd9Sstevel@tonic-gate 19437c478bd9Sstevel@tonic-gate /* 19447c478bd9Sstevel@tonic-gate * Try to find something to do on another CPU's run queue. 19457c478bd9Sstevel@tonic-gate * Loop through all other CPUs looking for the one with the highest 19467c478bd9Sstevel@tonic-gate * priority unbound thread. 19477c478bd9Sstevel@tonic-gate * 19487c478bd9Sstevel@tonic-gate * On NUMA machines, the partition's CPUs are consulted in order of 19497c478bd9Sstevel@tonic-gate * distance from the current CPU. This way, the first available 19507c478bd9Sstevel@tonic-gate * work found is also the closest, and will suffer the least 19517c478bd9Sstevel@tonic-gate * from being migrated. 19527c478bd9Sstevel@tonic-gate */ 19537c478bd9Sstevel@tonic-gate lpl = lpl_leaf = cp->cpu_lpl; 19546890d023SEric Saxe local_id = lpl_leaf->lpl_lgrpid; 19556890d023SEric Saxe leafidx = startidx = 0; 19567c478bd9Sstevel@tonic-gate 19577c478bd9Sstevel@tonic-gate /* 19587c478bd9Sstevel@tonic-gate * This loop traverses the lpl hierarchy. Higher level lpls represent 19597c478bd9Sstevel@tonic-gate * broader levels of locality 19607c478bd9Sstevel@tonic-gate */ 19617c478bd9Sstevel@tonic-gate do { 19627c478bd9Sstevel@tonic-gate /* This loop iterates over the lpl's leaves */ 19637c478bd9Sstevel@tonic-gate do { 19647c478bd9Sstevel@tonic-gate if (lpl_leaf != cp->cpu_lpl) 19657c478bd9Sstevel@tonic-gate ocp = lpl_leaf->lpl_cpus; 19667c478bd9Sstevel@tonic-gate else 19677c478bd9Sstevel@tonic-gate ocp = cp->cpu_next_lpl; 19687c478bd9Sstevel@tonic-gate 19697c478bd9Sstevel@tonic-gate /* This loop iterates over the CPUs in the leaf */ 19707c478bd9Sstevel@tonic-gate ocp_start = ocp; 19717c478bd9Sstevel@tonic-gate do { 19727c478bd9Sstevel@tonic-gate pri_t pri; 19737c478bd9Sstevel@tonic-gate 19747c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp)); 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate /* 197739bac370Sesaxe * End our stroll around this lpl if: 19787c478bd9Sstevel@tonic-gate * 19797c478bd9Sstevel@tonic-gate * - Something became runnable on the local 198039bac370Sesaxe * queue...which also ends our stroll around 198139bac370Sesaxe * the partition. 19827c478bd9Sstevel@tonic-gate * 198339bac370Sesaxe * - We happen across another idle CPU. 198439bac370Sesaxe * Since it is patrolling the next portion 198539bac370Sesaxe * of the lpl's list (assuming it's not 19866890d023SEric Saxe * halted, or busy servicing an interrupt), 19876890d023SEric Saxe * move to the next higher level of locality. 19887c478bd9Sstevel@tonic-gate */ 198939bac370Sesaxe if (cp->cpu_disp->disp_nrunnable != 0) { 199039bac370Sesaxe kpreempt_enable(); 199139bac370Sesaxe return (NULL); 199239bac370Sesaxe } 19937c478bd9Sstevel@tonic-gate if (ocp->cpu_dispatch_pri == -1) { 19947c478bd9Sstevel@tonic-gate if (ocp->cpu_disp_flags & 19956890d023SEric Saxe CPU_DISP_HALTED || 19966890d023SEric Saxe ocp->cpu_intr_actv != 0) 19977c478bd9Sstevel@tonic-gate continue; 199839bac370Sesaxe else 19996890d023SEric Saxe goto next_level; 20007c478bd9Sstevel@tonic-gate } 20017c478bd9Sstevel@tonic-gate 20027c478bd9Sstevel@tonic-gate /* 20037c478bd9Sstevel@tonic-gate * If there's only one thread and the CPU 20047c478bd9Sstevel@tonic-gate * is in the middle of a context switch, 20057c478bd9Sstevel@tonic-gate * or it's currently running the idle thread, 20067c478bd9Sstevel@tonic-gate * don't steal it. 20077c478bd9Sstevel@tonic-gate */ 20087c478bd9Sstevel@tonic-gate if ((ocp->cpu_disp_flags & 20097c478bd9Sstevel@tonic-gate CPU_DISP_DONTSTEAL) && 20107c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) 20117c478bd9Sstevel@tonic-gate continue; 20127c478bd9Sstevel@tonic-gate 20137c478bd9Sstevel@tonic-gate pri = ocp->cpu_disp->disp_max_unbound_pri; 20147c478bd9Sstevel@tonic-gate if (pri > maxpri) { 2015685679f7Sakolb /* 2016685679f7Sakolb * Don't steal threads that we attempted 2017fb2f18f8Sesaxe * to steal recently until they're ready 2018fb2f18f8Sesaxe * to be stolen again. 2019685679f7Sakolb */ 2020685679f7Sakolb stealtime = ocp->cpu_disp->disp_steal; 2021685679f7Sakolb if (stealtime == 0 || 2022685679f7Sakolb stealtime - gethrtime() <= 0) { 20237c478bd9Sstevel@tonic-gate maxpri = pri; 20247c478bd9Sstevel@tonic-gate tcp = ocp; 2025685679f7Sakolb } else { 2026685679f7Sakolb /* 2027685679f7Sakolb * Don't update tcp, just set 2028685679f7Sakolb * the retval to T_DONTSTEAL, so 2029685679f7Sakolb * that if no acceptable CPUs 2030685679f7Sakolb * are found the return value 2031685679f7Sakolb * will be T_DONTSTEAL rather 2032685679f7Sakolb * then NULL. 2033685679f7Sakolb */ 2034685679f7Sakolb retval = T_DONTSTEAL; 2035685679f7Sakolb } 20367c478bd9Sstevel@tonic-gate } 20377c478bd9Sstevel@tonic-gate } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 20387c478bd9Sstevel@tonic-gate 20396890d023SEric Saxe /* 20406890d023SEric Saxe * Iterate to the next leaf lpl in the resource set 20416890d023SEric Saxe * at this level of locality. If we hit the end of 20426890d023SEric Saxe * the set, wrap back around to the beginning. 20436890d023SEric Saxe * 20446890d023SEric Saxe * Note: This iteration is NULL terminated for a reason 20456890d023SEric Saxe * see lpl_topo_bootstrap() in lgrp.c for details. 20466890d023SEric Saxe */ 20477c478bd9Sstevel@tonic-gate if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 20487c478bd9Sstevel@tonic-gate leafidx = 0; 20497c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[leafidx]; 20507c478bd9Sstevel@tonic-gate } 20516890d023SEric Saxe } while (leafidx != startidx); 20527c478bd9Sstevel@tonic-gate 20536890d023SEric Saxe next_level: 20546890d023SEric Saxe /* 20556890d023SEric Saxe * Expand the search to include farther away CPUs (next 20566890d023SEric Saxe * locality level). The closer CPUs that have already been 20576890d023SEric Saxe * checked will be checked again. In doing so, idle CPUs 20586890d023SEric Saxe * will tend to be more aggresive about stealing from CPUs 20596890d023SEric Saxe * that are closer (since the closer CPUs will be considered 20606890d023SEric Saxe * more often). 20616890d023SEric Saxe * Begin at this level with the CPUs local leaf lpl. 20626890d023SEric Saxe */ 20636890d023SEric Saxe if ((lpl = lpl->lpl_parent) != NULL) { 20646890d023SEric Saxe leafidx = startidx = lpl->lpl_id2rset[local_id]; 20656890d023SEric Saxe lpl_leaf = lpl->lpl_rset[leafidx]; 20666890d023SEric Saxe } 20677c478bd9Sstevel@tonic-gate } while (!tcp && lpl); 20687c478bd9Sstevel@tonic-gate 2069ab761399Sesaxe kpreempt_enable(); 20707c478bd9Sstevel@tonic-gate 20717c478bd9Sstevel@tonic-gate /* 20727c478bd9Sstevel@tonic-gate * If another queue looks good, and there is still nothing on 20737c478bd9Sstevel@tonic-gate * the local queue, try to transfer one or more threads 20747c478bd9Sstevel@tonic-gate * from it to our queue. 20757c478bd9Sstevel@tonic-gate */ 20767c478bd9Sstevel@tonic-gate if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2077685679f7Sakolb tp = disp_getbest(tcp->cpu_disp); 2078685679f7Sakolb if (tp == NULL || tp == T_DONTSTEAL) 2079685679f7Sakolb return (tp); 20807c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 20817c478bd9Sstevel@tonic-gate } 2082685679f7Sakolb return (retval); 20837c478bd9Sstevel@tonic-gate } 20847c478bd9Sstevel@tonic-gate 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate /* 20877c478bd9Sstevel@tonic-gate * disp_fix_unbound_pri() 20887c478bd9Sstevel@tonic-gate * Determines the maximum priority of unbound threads on the queue. 20897c478bd9Sstevel@tonic-gate * The priority is kept for the queue, but is only increased, never 20907c478bd9Sstevel@tonic-gate * reduced unless some CPU is looking for something on that queue. 20917c478bd9Sstevel@tonic-gate * 20927c478bd9Sstevel@tonic-gate * The priority argument is the known upper limit. 20937c478bd9Sstevel@tonic-gate * 20947c478bd9Sstevel@tonic-gate * Perhaps this should be kept accurately, but that probably means 20957c478bd9Sstevel@tonic-gate * separate bitmaps for bound and unbound threads. Since only idled 20967c478bd9Sstevel@tonic-gate * CPUs will have to do this recalculation, it seems better this way. 20977c478bd9Sstevel@tonic-gate */ 20987c478bd9Sstevel@tonic-gate static void 20997c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri) 21007c478bd9Sstevel@tonic-gate { 21017c478bd9Sstevel@tonic-gate kthread_t *tp; 21027c478bd9Sstevel@tonic-gate dispq_t *dq; 21037c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 21047c478bd9Sstevel@tonic-gate ulong_t mapword; 21057c478bd9Sstevel@tonic-gate int wx; 21067c478bd9Sstevel@tonic-gate 21077c478bd9Sstevel@tonic-gate ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 21087c478bd9Sstevel@tonic-gate 21097c478bd9Sstevel@tonic-gate ASSERT(pri >= 0); /* checked by caller */ 21107c478bd9Sstevel@tonic-gate 21117c478bd9Sstevel@tonic-gate /* 21127c478bd9Sstevel@tonic-gate * Start the search at the next lowest priority below the supplied 21137c478bd9Sstevel@tonic-gate * priority. This depends on the bitmap implementation. 21147c478bd9Sstevel@tonic-gate */ 21157c478bd9Sstevel@tonic-gate do { 21167c478bd9Sstevel@tonic-gate wx = pri >> BT_ULSHIFT; /* index of word in map */ 21177c478bd9Sstevel@tonic-gate 21187c478bd9Sstevel@tonic-gate /* 21197c478bd9Sstevel@tonic-gate * Form mask for all lower priorities in the word. 21207c478bd9Sstevel@tonic-gate */ 21217c478bd9Sstevel@tonic-gate mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 21227c478bd9Sstevel@tonic-gate 21237c478bd9Sstevel@tonic-gate /* 21247c478bd9Sstevel@tonic-gate * Get next lower active priority. 21257c478bd9Sstevel@tonic-gate */ 21267c478bd9Sstevel@tonic-gate if (mapword != 0) { 21277c478bd9Sstevel@tonic-gate pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 21287c478bd9Sstevel@tonic-gate } else if (wx > 0) { 21297c478bd9Sstevel@tonic-gate pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 21307c478bd9Sstevel@tonic-gate if (pri < 0) 21317c478bd9Sstevel@tonic-gate break; 21327c478bd9Sstevel@tonic-gate } else { 21337c478bd9Sstevel@tonic-gate pri = -1; 21347c478bd9Sstevel@tonic-gate break; 21357c478bd9Sstevel@tonic-gate } 21367c478bd9Sstevel@tonic-gate 21377c478bd9Sstevel@tonic-gate /* 21387c478bd9Sstevel@tonic-gate * Search the queue for unbound, runnable threads. 21397c478bd9Sstevel@tonic-gate */ 21407c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 21417c478bd9Sstevel@tonic-gate tp = dq->dq_first; 21427c478bd9Sstevel@tonic-gate 21437c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 21447c478bd9Sstevel@tonic-gate tp = tp->t_link; 21457c478bd9Sstevel@tonic-gate } 21467c478bd9Sstevel@tonic-gate 21477c478bd9Sstevel@tonic-gate /* 21487c478bd9Sstevel@tonic-gate * If a thread was found, set the priority and return. 21497c478bd9Sstevel@tonic-gate */ 21507c478bd9Sstevel@tonic-gate } while (tp == NULL); 21517c478bd9Sstevel@tonic-gate 21527c478bd9Sstevel@tonic-gate /* 21537c478bd9Sstevel@tonic-gate * pri holds the maximum unbound thread priority or -1. 21547c478bd9Sstevel@tonic-gate */ 21557c478bd9Sstevel@tonic-gate if (dp->disp_max_unbound_pri != pri) 21567c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = pri; 21577c478bd9Sstevel@tonic-gate } 21587c478bd9Sstevel@tonic-gate 21597c478bd9Sstevel@tonic-gate /* 21607c478bd9Sstevel@tonic-gate * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 21617c478bd9Sstevel@tonic-gate * check if the CPU to which is was previously bound should have 21627c478bd9Sstevel@tonic-gate * its disp_max_unbound_pri increased. 21637c478bd9Sstevel@tonic-gate */ 21647c478bd9Sstevel@tonic-gate void 21657c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp) 21667c478bd9Sstevel@tonic-gate { 21677c478bd9Sstevel@tonic-gate disp_t *dp; 21687c478bd9Sstevel@tonic-gate pri_t tpri; 21697c478bd9Sstevel@tonic-gate 21707c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 21717c478bd9Sstevel@tonic-gate 21727c478bd9Sstevel@tonic-gate /* 21737c478bd9Sstevel@tonic-gate * Don't do anything if the thread is not bound, or 21747c478bd9Sstevel@tonic-gate * currently not runnable or swapped out. 21757c478bd9Sstevel@tonic-gate */ 21767c478bd9Sstevel@tonic-gate if (tp->t_bound_cpu == NULL || 21777c478bd9Sstevel@tonic-gate tp->t_state != TS_RUN || 21787c478bd9Sstevel@tonic-gate tp->t_schedflag & TS_ON_SWAPQ) 21797c478bd9Sstevel@tonic-gate return; 21807c478bd9Sstevel@tonic-gate 21817c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 21827c478bd9Sstevel@tonic-gate dp = tp->t_bound_cpu->cpu_disp; 21837c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 21847c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 21857c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 21867c478bd9Sstevel@tonic-gate } 21877c478bd9Sstevel@tonic-gate 21887c478bd9Sstevel@tonic-gate /* 2189685679f7Sakolb * disp_getbest() 2190685679f7Sakolb * De-queue the highest priority unbound runnable thread. 2191685679f7Sakolb * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2192685679f7Sakolb * Returns NULL if nothing found. 2193685679f7Sakolb * Returns T_DONTSTEAL if the thread was not stealable. 2194685679f7Sakolb * so that the caller will try again later. 21957c478bd9Sstevel@tonic-gate * 2196685679f7Sakolb * Passed a pointer to a dispatch queue not associated with this CPU, and 2197685679f7Sakolb * its type. 21987c478bd9Sstevel@tonic-gate */ 21997c478bd9Sstevel@tonic-gate static kthread_t * 22007c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp) 22017c478bd9Sstevel@tonic-gate { 22027c478bd9Sstevel@tonic-gate kthread_t *tp; 22037c478bd9Sstevel@tonic-gate dispq_t *dq; 22047c478bd9Sstevel@tonic-gate pri_t pri; 2205685679f7Sakolb cpu_t *cp, *tcp; 2206685679f7Sakolb boolean_t allbound; 22077c478bd9Sstevel@tonic-gate 22087c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 22097c478bd9Sstevel@tonic-gate 22107c478bd9Sstevel@tonic-gate /* 22117c478bd9Sstevel@tonic-gate * If there is nothing to run, or the CPU is in the middle of a 22127c478bd9Sstevel@tonic-gate * context switch of the only thread, return NULL. 22137c478bd9Sstevel@tonic-gate */ 2214685679f7Sakolb tcp = dp->disp_cpu; 2215685679f7Sakolb cp = CPU; 22167c478bd9Sstevel@tonic-gate pri = dp->disp_max_unbound_pri; 22177c478bd9Sstevel@tonic-gate if (pri == -1 || 2218685679f7Sakolb (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2219685679f7Sakolb tcp->cpu_disp->disp_nrunnable == 1)) { 22207c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 22217c478bd9Sstevel@tonic-gate return (NULL); 22227c478bd9Sstevel@tonic-gate } 22237c478bd9Sstevel@tonic-gate 22247c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 2225685679f7Sakolb 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate /* 2228685679f7Sakolb * Assume that all threads are bound on this queue, and change it 2229685679f7Sakolb * later when we find out that it is not the case. 22307c478bd9Sstevel@tonic-gate */ 2231685679f7Sakolb allbound = B_TRUE; 2232685679f7Sakolb for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2233685679f7Sakolb hrtime_t now, nosteal, rqtime; 2234685679f7Sakolb 2235685679f7Sakolb /* 2236685679f7Sakolb * Skip over bound threads which could be here even 2237685679f7Sakolb * though disp_max_unbound_pri indicated this level. 2238685679f7Sakolb */ 2239685679f7Sakolb if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2240685679f7Sakolb continue; 2241685679f7Sakolb 2242685679f7Sakolb /* 2243685679f7Sakolb * We've got some unbound threads on this queue, so turn 2244685679f7Sakolb * the allbound flag off now. 2245685679f7Sakolb */ 2246685679f7Sakolb allbound = B_FALSE; 2247685679f7Sakolb 2248685679f7Sakolb /* 2249685679f7Sakolb * The thread is a candidate for stealing from its run queue. We 2250685679f7Sakolb * don't want to steal threads that became runnable just a 2251685679f7Sakolb * moment ago. This improves CPU affinity for threads that get 2252685679f7Sakolb * preempted for short periods of time and go back on the run 2253685679f7Sakolb * queue. 2254685679f7Sakolb * 2255685679f7Sakolb * We want to let it stay on its run queue if it was only placed 2256685679f7Sakolb * there recently and it was running on the same CPU before that 2257685679f7Sakolb * to preserve its cache investment. For the thread to remain on 2258685679f7Sakolb * its run queue, ALL of the following conditions must be 2259685679f7Sakolb * satisfied: 2260685679f7Sakolb * 2261685679f7Sakolb * - the disp queue should not be the kernel preemption queue 2262685679f7Sakolb * - delayed idle stealing should not be disabled 2263685679f7Sakolb * - nosteal_nsec should be non-zero 2264685679f7Sakolb * - it should run with user priority 2265685679f7Sakolb * - it should be on the run queue of the CPU where it was 2266685679f7Sakolb * running before being placed on the run queue 2267685679f7Sakolb * - it should be the only thread on the run queue (to prevent 2268685679f7Sakolb * extra scheduling latency for other threads) 2269685679f7Sakolb * - it should sit on the run queue for less than per-chip 2270685679f7Sakolb * nosteal interval or global nosteal interval 2271685679f7Sakolb * - in case of CPUs with shared cache it should sit in a run 2272685679f7Sakolb * queue of a CPU from a different chip 2273685679f7Sakolb * 2274685679f7Sakolb * The checks are arranged so that the ones that are faster are 2275685679f7Sakolb * placed earlier. 2276685679f7Sakolb */ 2277685679f7Sakolb if (tcp == NULL || 2278685679f7Sakolb pri >= minclsyspri || 2279685679f7Sakolb tp->t_cpu != tcp) 2280685679f7Sakolb break; 2281685679f7Sakolb 2282685679f7Sakolb /* 2283fb2f18f8Sesaxe * Steal immediately if, due to CMT processor architecture 2284fb2f18f8Sesaxe * migraiton between cp and tcp would incur no performance 2285fb2f18f8Sesaxe * penalty. 2286685679f7Sakolb */ 2287fb2f18f8Sesaxe if (pg_cmt_can_migrate(cp, tcp)) 2288685679f7Sakolb break; 2289685679f7Sakolb 2290fb2f18f8Sesaxe nosteal = nosteal_nsec; 2291fb2f18f8Sesaxe if (nosteal == 0) 2292685679f7Sakolb break; 2293685679f7Sakolb 2294685679f7Sakolb /* 2295685679f7Sakolb * Calculate time spent sitting on run queue 2296685679f7Sakolb */ 2297685679f7Sakolb now = gethrtime_unscaled(); 2298685679f7Sakolb rqtime = now - tp->t_waitrq; 2299685679f7Sakolb scalehrtime(&rqtime); 2300685679f7Sakolb 2301685679f7Sakolb /* 2302685679f7Sakolb * Steal immediately if the time spent on this run queue is more 2303685679f7Sakolb * than allowed nosteal delay. 2304685679f7Sakolb * 2305685679f7Sakolb * Negative rqtime check is needed here to avoid infinite 2306685679f7Sakolb * stealing delays caused by unlikely but not impossible 2307685679f7Sakolb * drifts between CPU times on different CPUs. 2308685679f7Sakolb */ 2309685679f7Sakolb if (rqtime > nosteal || rqtime < 0) 2310685679f7Sakolb break; 2311685679f7Sakolb 2312685679f7Sakolb DTRACE_PROBE4(nosteal, kthread_t *, tp, 2313685679f7Sakolb cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2314685679f7Sakolb scalehrtime(&now); 2315685679f7Sakolb /* 2316685679f7Sakolb * Calculate when this thread becomes stealable 2317685679f7Sakolb */ 2318685679f7Sakolb now += (nosteal - rqtime); 2319685679f7Sakolb 2320685679f7Sakolb /* 2321685679f7Sakolb * Calculate time when some thread becomes stealable 2322685679f7Sakolb */ 2323685679f7Sakolb if (now < dp->disp_steal) 2324685679f7Sakolb dp->disp_steal = now; 23257c478bd9Sstevel@tonic-gate } 23267c478bd9Sstevel@tonic-gate 23277c478bd9Sstevel@tonic-gate /* 23287c478bd9Sstevel@tonic-gate * If there were no unbound threads on this queue, find the queue 2329685679f7Sakolb * where they are and then return later. The value of 2330685679f7Sakolb * disp_max_unbound_pri is not always accurate because it isn't 2331685679f7Sakolb * reduced until another idle CPU looks for work. 2332685679f7Sakolb */ 2333685679f7Sakolb if (allbound) 2334685679f7Sakolb disp_fix_unbound_pri(dp, pri); 2335685679f7Sakolb 2336685679f7Sakolb /* 2337685679f7Sakolb * If we reached the end of the queue and found no unbound threads 2338685679f7Sakolb * then return NULL so that other CPUs will be considered. If there 2339685679f7Sakolb * are unbound threads but they cannot yet be stolen, then 2340685679f7Sakolb * return T_DONTSTEAL and try again later. 23417c478bd9Sstevel@tonic-gate */ 23427c478bd9Sstevel@tonic-gate if (tp == NULL) { 23437c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 2344685679f7Sakolb return (allbound ? NULL : T_DONTSTEAL); 23457c478bd9Sstevel@tonic-gate } 23467c478bd9Sstevel@tonic-gate 23477c478bd9Sstevel@tonic-gate /* 23487c478bd9Sstevel@tonic-gate * Found a runnable, unbound thread, so remove it from queue. 23497c478bd9Sstevel@tonic-gate * dispdeq() requires that we have the thread locked, and we do, 23507c478bd9Sstevel@tonic-gate * by virtue of holding the dispatch queue lock. dispdeq() will 23517c478bd9Sstevel@tonic-gate * put the thread in transition state, thereby dropping the dispq 23527c478bd9Sstevel@tonic-gate * lock. 23537c478bd9Sstevel@tonic-gate */ 2354685679f7Sakolb 23557c478bd9Sstevel@tonic-gate #ifdef DEBUG 23567c478bd9Sstevel@tonic-gate { 23577c478bd9Sstevel@tonic-gate int thread_was_on_queue; 23587c478bd9Sstevel@tonic-gate 23597c478bd9Sstevel@tonic-gate thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 23607c478bd9Sstevel@tonic-gate ASSERT(thread_was_on_queue); 23617c478bd9Sstevel@tonic-gate } 2362685679f7Sakolb 23637c478bd9Sstevel@tonic-gate #else /* DEBUG */ 23647c478bd9Sstevel@tonic-gate (void) dispdeq(tp); /* drops disp_lock */ 23657c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 23667c478bd9Sstevel@tonic-gate 2367685679f7Sakolb /* 2368685679f7Sakolb * Reset the disp_queue steal time - we do not know what is the smallest 2369685679f7Sakolb * value across the queue is. 2370685679f7Sakolb */ 2371685679f7Sakolb dp->disp_steal = 0; 2372685679f7Sakolb 23737c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate /* 23767c478bd9Sstevel@tonic-gate * Setup thread to run on the current CPU. 23777c478bd9Sstevel@tonic-gate */ 23787c478bd9Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp; 23797c478bd9Sstevel@tonic-gate 23807c478bd9Sstevel@tonic-gate cp->cpu_dispthread = tp; /* protected by spl only */ 23817c478bd9Sstevel@tonic-gate cp->cpu_dispatch_pri = pri; 23820f500aa6Sbpramod 23830f500aa6Sbpramod /* 23840f500aa6Sbpramod * There can be a memory synchronization race between disp_getbest() 23850f500aa6Sbpramod * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 23860f500aa6Sbpramod * to preempt the current thread to run the enqueued thread while 23870f500aa6Sbpramod * disp_getbest() and disp_ratify() are changing the current thread 23880f500aa6Sbpramod * to the stolen thread. This may lead to a situation where 23890f500aa6Sbpramod * cpu_resched() tries to preempt the wrong thread and the 23900f500aa6Sbpramod * stolen thread continues to run on the CPU which has been tagged 23910f500aa6Sbpramod * for preemption. 23920f500aa6Sbpramod * Later the clock thread gets enqueued but doesn't get to run on the 23930f500aa6Sbpramod * CPU causing the system to hang. 23940f500aa6Sbpramod * 23950f500aa6Sbpramod * To avoid this, grabbing and dropping the disp_lock (which does 23960f500aa6Sbpramod * a memory barrier) is needed to synchronize the execution of 23970f500aa6Sbpramod * cpu_resched() with disp_getbest() and disp_ratify() and 23980f500aa6Sbpramod * synchronize the memory read and written by cpu_resched(), 23990f500aa6Sbpramod * disp_getbest(), and disp_ratify() with each other. 24000f500aa6Sbpramod * (see CR#6482861 for more details). 24010f500aa6Sbpramod */ 24020f500aa6Sbpramod disp_lock_enter_high(&cp->cpu_disp->disp_lock); 24030f500aa6Sbpramod disp_lock_exit_high(&cp->cpu_disp->disp_lock); 24040f500aa6Sbpramod 24057c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 24067c478bd9Sstevel@tonic-gate 2407685679f7Sakolb DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2408685679f7Sakolb 24097c478bd9Sstevel@tonic-gate thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 24107c478bd9Sstevel@tonic-gate 24117c478bd9Sstevel@tonic-gate /* 24127c478bd9Sstevel@tonic-gate * Return with spl high so that swtch() won't need to raise it. 24137c478bd9Sstevel@tonic-gate * The disp_lock was dropped by dispdeq(). 24147c478bd9Sstevel@tonic-gate */ 24157c478bd9Sstevel@tonic-gate 24167c478bd9Sstevel@tonic-gate return (tp); 24177c478bd9Sstevel@tonic-gate } 24187c478bd9Sstevel@tonic-gate 24197c478bd9Sstevel@tonic-gate /* 24207c478bd9Sstevel@tonic-gate * disp_bound_common() - common routine for higher level functions 24217c478bd9Sstevel@tonic-gate * that check for bound threads under certain conditions. 24227c478bd9Sstevel@tonic-gate * If 'threadlistsafe' is set then there is no need to acquire 24237c478bd9Sstevel@tonic-gate * pidlock to stop the thread list from changing (eg, if 24247c478bd9Sstevel@tonic-gate * disp_bound_* is called with cpus paused). 24257c478bd9Sstevel@tonic-gate */ 24267c478bd9Sstevel@tonic-gate static int 24277c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 24287c478bd9Sstevel@tonic-gate { 24297c478bd9Sstevel@tonic-gate int found = 0; 24307c478bd9Sstevel@tonic-gate kthread_t *tp; 24317c478bd9Sstevel@tonic-gate 24327c478bd9Sstevel@tonic-gate ASSERT(flag); 24337c478bd9Sstevel@tonic-gate 24347c478bd9Sstevel@tonic-gate if (!threadlistsafe) 24357c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 24367c478bd9Sstevel@tonic-gate tp = curthread; /* faster than allthreads */ 24377c478bd9Sstevel@tonic-gate do { 24387c478bd9Sstevel@tonic-gate if (tp->t_state != TS_FREE) { 24397c478bd9Sstevel@tonic-gate /* 24407c478bd9Sstevel@tonic-gate * If an interrupt thread is busy, but the 24417c478bd9Sstevel@tonic-gate * caller doesn't care (i.e. BOUND_INTR is off), 24427c478bd9Sstevel@tonic-gate * then just ignore it and continue through. 24437c478bd9Sstevel@tonic-gate */ 24447c478bd9Sstevel@tonic-gate if ((tp->t_flag & T_INTR_THREAD) && 24457c478bd9Sstevel@tonic-gate !(flag & BOUND_INTR)) 24467c478bd9Sstevel@tonic-gate continue; 24477c478bd9Sstevel@tonic-gate 24487c478bd9Sstevel@tonic-gate /* 24497c478bd9Sstevel@tonic-gate * Skip the idle thread for the CPU 24507c478bd9Sstevel@tonic-gate * we're about to set offline. 24517c478bd9Sstevel@tonic-gate */ 24527c478bd9Sstevel@tonic-gate if (tp == cp->cpu_idle_thread) 24537c478bd9Sstevel@tonic-gate continue; 24547c478bd9Sstevel@tonic-gate 24557c478bd9Sstevel@tonic-gate /* 24567c478bd9Sstevel@tonic-gate * Skip the pause thread for the CPU 24577c478bd9Sstevel@tonic-gate * we're about to set offline. 24587c478bd9Sstevel@tonic-gate */ 24597c478bd9Sstevel@tonic-gate if (tp == cp->cpu_pause_thread) 24607c478bd9Sstevel@tonic-gate continue; 24617c478bd9Sstevel@tonic-gate 24627c478bd9Sstevel@tonic-gate if ((flag & BOUND_CPU) && 24637c478bd9Sstevel@tonic-gate (tp->t_bound_cpu == cp || 24647c478bd9Sstevel@tonic-gate tp->t_bind_cpu == cp->cpu_id || 24657c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu == cp)) { 24667c478bd9Sstevel@tonic-gate found = 1; 24677c478bd9Sstevel@tonic-gate break; 24687c478bd9Sstevel@tonic-gate } 24697c478bd9Sstevel@tonic-gate 24707c478bd9Sstevel@tonic-gate if ((flag & BOUND_PARTITION) && 24717c478bd9Sstevel@tonic-gate (tp->t_cpupart == cp->cpu_part)) { 24727c478bd9Sstevel@tonic-gate found = 1; 24737c478bd9Sstevel@tonic-gate break; 24747c478bd9Sstevel@tonic-gate } 24757c478bd9Sstevel@tonic-gate } 24767c478bd9Sstevel@tonic-gate } while ((tp = tp->t_next) != curthread && found == 0); 24777c478bd9Sstevel@tonic-gate if (!threadlistsafe) 24787c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 24797c478bd9Sstevel@tonic-gate return (found); 24807c478bd9Sstevel@tonic-gate } 24817c478bd9Sstevel@tonic-gate 24827c478bd9Sstevel@tonic-gate /* 24837c478bd9Sstevel@tonic-gate * disp_bound_threads - return nonzero if threads are bound to the processor. 24847c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 24857c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 24867c478bd9Sstevel@tonic-gate */ 24877c478bd9Sstevel@tonic-gate int 24887c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe) 24897c478bd9Sstevel@tonic-gate { 24907c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 24917c478bd9Sstevel@tonic-gate } 24927c478bd9Sstevel@tonic-gate 24937c478bd9Sstevel@tonic-gate /* 24947c478bd9Sstevel@tonic-gate * disp_bound_anythreads - return nonzero if _any_ threads are bound 24957c478bd9Sstevel@tonic-gate * to the given processor, including interrupt threads. 24967c478bd9Sstevel@tonic-gate */ 24977c478bd9Sstevel@tonic-gate int 24987c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 24997c478bd9Sstevel@tonic-gate { 25007c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 25017c478bd9Sstevel@tonic-gate } 25027c478bd9Sstevel@tonic-gate 25037c478bd9Sstevel@tonic-gate /* 25047c478bd9Sstevel@tonic-gate * disp_bound_partition - return nonzero if threads are bound to the same 25057c478bd9Sstevel@tonic-gate * partition as the processor. 25067c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 25077c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 25087c478bd9Sstevel@tonic-gate */ 25097c478bd9Sstevel@tonic-gate int 25107c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe) 25117c478bd9Sstevel@tonic-gate { 25127c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 25137c478bd9Sstevel@tonic-gate } 25147c478bd9Sstevel@tonic-gate 25157c478bd9Sstevel@tonic-gate /* 25167c478bd9Sstevel@tonic-gate * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 25177c478bd9Sstevel@tonic-gate * threads to other CPUs. 25187c478bd9Sstevel@tonic-gate */ 25197c478bd9Sstevel@tonic-gate void 25207c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp) 25217c478bd9Sstevel@tonic-gate { 25227c478bd9Sstevel@tonic-gate kthread_t *tp; 25237c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 25247c478bd9Sstevel@tonic-gate dispq_t *dq; 25257c478bd9Sstevel@tonic-gate pri_t pri; 25267c478bd9Sstevel@tonic-gate int wasonq; 25277c478bd9Sstevel@tonic-gate 25287c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 25297c478bd9Sstevel@tonic-gate while ((pri = dp->disp_max_unbound_pri) != -1) { 25307c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 25317c478bd9Sstevel@tonic-gate tp = dq->dq_first; 25327c478bd9Sstevel@tonic-gate 25337c478bd9Sstevel@tonic-gate /* 25347c478bd9Sstevel@tonic-gate * Skip over bound threads. 25357c478bd9Sstevel@tonic-gate */ 25367c478bd9Sstevel@tonic-gate while (tp != NULL && tp->t_bound_cpu != NULL) { 25377c478bd9Sstevel@tonic-gate tp = tp->t_link; 25387c478bd9Sstevel@tonic-gate } 25397c478bd9Sstevel@tonic-gate 25407c478bd9Sstevel@tonic-gate if (tp == NULL) { 25417c478bd9Sstevel@tonic-gate /* disp_max_unbound_pri must be inaccurate, so fix it */ 25427c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri); 25437c478bd9Sstevel@tonic-gate continue; 25447c478bd9Sstevel@tonic-gate } 25457c478bd9Sstevel@tonic-gate 25467c478bd9Sstevel@tonic-gate wasonq = dispdeq(tp); /* drops disp_lock */ 25477c478bd9Sstevel@tonic-gate ASSERT(wasonq); 25487c478bd9Sstevel@tonic-gate ASSERT(tp->t_weakbound_cpu == NULL); 25497c478bd9Sstevel@tonic-gate 25507c478bd9Sstevel@tonic-gate setbackdq(tp); 25517c478bd9Sstevel@tonic-gate /* 25527c478bd9Sstevel@tonic-gate * Called from cpu_offline: 25537c478bd9Sstevel@tonic-gate * 25547c478bd9Sstevel@tonic-gate * cp has already been removed from the list of active cpus 25557c478bd9Sstevel@tonic-gate * and tp->t_cpu has been changed so there is no risk of 25567c478bd9Sstevel@tonic-gate * tp ending up back on cp. 25577c478bd9Sstevel@tonic-gate * 25587c478bd9Sstevel@tonic-gate * Called from cpupart_move_cpu: 25597c478bd9Sstevel@tonic-gate * 25607c478bd9Sstevel@tonic-gate * The cpu has moved to a new cpupart. Any threads that 25617c478bd9Sstevel@tonic-gate * were on it's dispatch queues before the move remain 25627c478bd9Sstevel@tonic-gate * in the old partition and can't run in the new partition. 25637c478bd9Sstevel@tonic-gate */ 25647c478bd9Sstevel@tonic-gate ASSERT(tp->t_cpu != cp); 25657c478bd9Sstevel@tonic-gate thread_unlock(tp); 25667c478bd9Sstevel@tonic-gate 25677c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 25687c478bd9Sstevel@tonic-gate } 25697c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 25707c478bd9Sstevel@tonic-gate } 25717c478bd9Sstevel@tonic-gate 25727c478bd9Sstevel@tonic-gate /* 2573455e370cSJohn Levon * Return a score rating this CPU for running this thread: lower is better. 25747c478bd9Sstevel@tonic-gate * 2575455e370cSJohn Levon * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for 2576455e370cSJohn Levon * curcpu (as that's our own priority). 25777c478bd9Sstevel@tonic-gate * 2578455e370cSJohn Levon * If a cpu is the target of an offline request, then try to avoid it. 25797c478bd9Sstevel@tonic-gate * 2580455e370cSJohn Levon * Otherwise we'll use double the effective dispatcher priority for the CPU. 25817c478bd9Sstevel@tonic-gate * 2582c3377ee9SJohn Levon * We do this so smt_adjust_cpu_score() can increment the score if needed, 2583455e370cSJohn Levon * without ending up over-riding a dispatcher priority. 2584455e370cSJohn Levon */ 2585455e370cSJohn Levon static pri_t 2586455e370cSJohn Levon cpu_score(cpu_t *cp, kthread_t *tp) 2587455e370cSJohn Levon { 2588455e370cSJohn Levon pri_t score; 2589455e370cSJohn Levon 2590455e370cSJohn Levon if (tp == curthread && cp == curthread->t_cpu) 2591455e370cSJohn Levon score = 2 * CPU_IDLE_PRI; 2592455e370cSJohn Levon else if (cp == cpu_inmotion) 2593455e370cSJohn Levon score = SHRT_MAX; 2594455e370cSJohn Levon else 2595455e370cSJohn Levon score = 2 * cp->cpu_dispatch_pri; 2596455e370cSJohn Levon 2597455e370cSJohn Levon if (2 * cp->cpu_disp->disp_maxrunpri > score) 2598455e370cSJohn Levon score = 2 * cp->cpu_disp->disp_maxrunpri; 2599455e370cSJohn Levon if (2 * cp->cpu_chosen_level > score) 2600455e370cSJohn Levon score = 2 * cp->cpu_chosen_level; 2601455e370cSJohn Levon 2602c3377ee9SJohn Levon return (smt_adjust_cpu_score(tp, cp, score)); 2603455e370cSJohn Levon } 2604455e370cSJohn Levon 2605455e370cSJohn Levon /* 2606455e370cSJohn Levon * disp_lowpri_cpu - find a suitable CPU to run the given thread. 26077c478bd9Sstevel@tonic-gate * 2608455e370cSJohn Levon * We are looking for a CPU with an effective dispatch priority lower than the 2609455e370cSJohn Levon * thread's, so that the thread will run immediately rather than be enqueued. 2610455e370cSJohn Levon * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group. 2611455e370cSJohn Levon * If we don't find an available CPU there, we will expand our search to include 2612455e370cSJohn Levon * wider locality levels. (Note these groups are already divided by CPU 2613455e370cSJohn Levon * partition.) 2614455e370cSJohn Levon * 2615455e370cSJohn Levon * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on 2616455e370cSJohn Levon * the best home CPU we found. 2617455e370cSJohn Levon * 2618455e370cSJohn Levon * The hint passed in is used as a starting point so we don't favor CPU 0 or any 2619455e370cSJohn Levon * other CPU. The caller should pass in the most recently used CPU for the 2620455e370cSJohn Levon * thread; it's of course possible that this CPU isn't in the home lgroup. 2621455e370cSJohn Levon * 2622455e370cSJohn Levon * This function must be called at either high SPL, or with preemption disabled, 2623455e370cSJohn Levon * so that the "hint" CPU cannot be removed from the online CPU list while we 2624455e370cSJohn Levon * are traversing it. 26257c478bd9Sstevel@tonic-gate */ 26267c478bd9Sstevel@tonic-gate cpu_t * 2627455e370cSJohn Levon disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri) 26287c478bd9Sstevel@tonic-gate { 26297c478bd9Sstevel@tonic-gate cpu_t *bestcpu; 26307c478bd9Sstevel@tonic-gate cpu_t *besthomecpu; 26317c478bd9Sstevel@tonic-gate cpu_t *cp, *cpstart; 26327c478bd9Sstevel@tonic-gate 26337c478bd9Sstevel@tonic-gate klgrpset_t done; 26347c478bd9Sstevel@tonic-gate 26357c478bd9Sstevel@tonic-gate lpl_t *lpl_iter, *lpl_leaf; 26367c478bd9Sstevel@tonic-gate 26377c478bd9Sstevel@tonic-gate ASSERT(hint != NULL); 2638455e370cSJohn Levon ASSERT(tp->t_lpl->lpl_ncpu > 0); 26397c478bd9Sstevel@tonic-gate 26407c478bd9Sstevel@tonic-gate bestcpu = besthomecpu = NULL; 26417c478bd9Sstevel@tonic-gate klgrpset_clear(done); 26427c478bd9Sstevel@tonic-gate 2643455e370cSJohn Levon lpl_iter = tp->t_lpl; 26447c478bd9Sstevel@tonic-gate 26457c478bd9Sstevel@tonic-gate do { 2646455e370cSJohn Levon pri_t best = SHRT_MAX; 2647455e370cSJohn Levon klgrpset_t cur_set; 26487c478bd9Sstevel@tonic-gate 26497c478bd9Sstevel@tonic-gate klgrpset_clear(cur_set); 26507c478bd9Sstevel@tonic-gate 2651455e370cSJohn Levon for (int i = 0; i < lpl_iter->lpl_nrset; i++) { 26527c478bd9Sstevel@tonic-gate lpl_leaf = lpl_iter->lpl_rset[i]; 26537c478bd9Sstevel@tonic-gate if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 26547c478bd9Sstevel@tonic-gate continue; 26557c478bd9Sstevel@tonic-gate 26567c478bd9Sstevel@tonic-gate klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 26577c478bd9Sstevel@tonic-gate 26587c478bd9Sstevel@tonic-gate if (hint->cpu_lpl == lpl_leaf) 26597c478bd9Sstevel@tonic-gate cp = cpstart = hint; 26607c478bd9Sstevel@tonic-gate else 26617c478bd9Sstevel@tonic-gate cp = cpstart = lpl_leaf->lpl_cpus; 26627c478bd9Sstevel@tonic-gate 26637c478bd9Sstevel@tonic-gate do { 2664455e370cSJohn Levon pri_t score = cpu_score(cp, tp); 2665455e370cSJohn Levon 2666455e370cSJohn Levon if (score < best) { 2667455e370cSJohn Levon best = score; 26687c478bd9Sstevel@tonic-gate bestcpu = cp; 2669455e370cSJohn Levon 2670455e370cSJohn Levon /* An idle CPU: we're done. */ 2671455e370cSJohn Levon if (score / 2 == CPU_IDLE_PRI) 2672455e370cSJohn Levon goto out; 26737c478bd9Sstevel@tonic-gate } 26747c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next_lpl) != cpstart); 26757c478bd9Sstevel@tonic-gate } 26767c478bd9Sstevel@tonic-gate 2677455e370cSJohn Levon if (bestcpu != NULL && tpri > (best / 2)) 2678455e370cSJohn Levon goto out; 2679455e370cSJohn Levon 26807c478bd9Sstevel@tonic-gate if (besthomecpu == NULL) 26817c478bd9Sstevel@tonic-gate besthomecpu = bestcpu; 2682455e370cSJohn Levon 26837c478bd9Sstevel@tonic-gate /* 26847c478bd9Sstevel@tonic-gate * Add the lgrps we just considered to the "done" set 26857c478bd9Sstevel@tonic-gate */ 26867c478bd9Sstevel@tonic-gate klgrpset_or(done, cur_set); 26877c478bd9Sstevel@tonic-gate 26887c478bd9Sstevel@tonic-gate } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 26897c478bd9Sstevel@tonic-gate 26907c478bd9Sstevel@tonic-gate /* 26917c478bd9Sstevel@tonic-gate * The specified priority isn't high enough to run immediately 26927c478bd9Sstevel@tonic-gate * anywhere, so just return the best CPU from the home lgroup. 26937c478bd9Sstevel@tonic-gate */ 2694455e370cSJohn Levon bestcpu = besthomecpu; 2695455e370cSJohn Levon 2696455e370cSJohn Levon out: 2697455e370cSJohn Levon ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2698455e370cSJohn Levon return (bestcpu); 26997c478bd9Sstevel@tonic-gate } 27007c478bd9Sstevel@tonic-gate 27017c478bd9Sstevel@tonic-gate /* 27027c478bd9Sstevel@tonic-gate * This routine provides the generic idle cpu function for all processors. 27037c478bd9Sstevel@tonic-gate * If a processor has some specific code to execute when idle (say, to stop 27047c478bd9Sstevel@tonic-gate * the pipeline and save power) then that routine should be defined in the 27057c478bd9Sstevel@tonic-gate * processors specific code (module_xx.c) and the global variable idle_cpu 27067c478bd9Sstevel@tonic-gate * set to that function. 27077c478bd9Sstevel@tonic-gate */ 27087c478bd9Sstevel@tonic-gate static void 27097c478bd9Sstevel@tonic-gate generic_idle_cpu(void) 27107c478bd9Sstevel@tonic-gate { 27117c478bd9Sstevel@tonic-gate } 27127c478bd9Sstevel@tonic-gate 27137c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 27147c478bd9Sstevel@tonic-gate static void 27157c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound) 27167c478bd9Sstevel@tonic-gate { 27177c478bd9Sstevel@tonic-gate } 2718455e370cSJohn Levon 2719455e370cSJohn Levon cpu_t * 2720455e370cSJohn Levon disp_choose_best_cpu(void) 2721455e370cSJohn Levon { 2722455e370cSJohn Levon kthread_t *t = curthread; 2723455e370cSJohn Levon cpu_t *curcpu = CPU; 2724455e370cSJohn Levon 2725455e370cSJohn Levon ASSERT(t->t_preempt > 0); 2726455e370cSJohn Levon ASSERT(t->t_state == TS_ONPROC); 2727455e370cSJohn Levon ASSERT(t->t_schedflag & TS_VCPU); 2728455e370cSJohn Levon 2729c3377ee9SJohn Levon if (smt_should_run(t, curcpu)) 2730455e370cSJohn Levon return (curcpu); 2731455e370cSJohn Levon 2732455e370cSJohn Levon return (disp_lowpri_cpu(curcpu, t, t->t_pri)); 2733455e370cSJohn Levon } 2734