1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.64 2004/07/04 22:44:27 eirikn Exp $ 27 */ 28 29 /* 30 * Each cpu in a system has its own self-contained light weight kernel 31 * thread scheduler, which means that generally speaking we only need 32 * to use a critical section to avoid problems. Foreign thread 33 * scheduling is queued via (async) IPIs. 34 */ 35 36 #ifdef _KERNEL 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/proc.h> 42 #include <sys/rtprio.h> 43 #include <sys/queue.h> 44 #include <sys/thread2.h> 45 #include <sys/sysctl.h> 46 #include <sys/kthread.h> 47 #include <machine/cpu.h> 48 #include <sys/lock.h> 49 #include <sys/caps.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_pager.h> 58 #include <vm/vm_extern.h> 59 #include <vm/vm_zone.h> 60 61 #include <machine/stdarg.h> 62 #include <machine/ipl.h> 63 #include <machine/smp.h> 64 65 #define THREAD_STACK (UPAGES * PAGE_SIZE) 66 67 #else 68 69 #include <sys/stdint.h> 70 #include <libcaps/thread.h> 71 #include <sys/thread.h> 72 #include <sys/msgport.h> 73 #include <sys/errno.h> 74 #include <libcaps/globaldata.h> 75 #include <machine/cpufunc.h> 76 #include <sys/thread2.h> 77 #include <sys/msgport2.h> 78 #include <stdio.h> 79 #include <stdlib.h> 80 #include <string.h> 81 #include <machine/lock.h> 82 83 #endif 84 85 static int untimely_switch = 0; 86 #ifdef INVARIANTS 87 static int panic_on_cscount = 0; 88 #endif 89 static __int64_t switch_count = 0; 90 static __int64_t preempt_hit = 0; 91 static __int64_t preempt_miss = 0; 92 static __int64_t preempt_weird = 0; 93 94 #ifdef _KERNEL 95 96 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); 97 #ifdef INVARIANTS 98 SYSCTL_INT(_lwkt, OID_AUTO, panic_on_cscount, CTLFLAG_RW, &panic_on_cscount, 0, ""); 99 #endif 100 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); 101 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); 102 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); 103 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); 104 105 #endif 106 107 /* 108 * These helper procedures handle the runq, they can only be called from 109 * within a critical section. 110 * 111 * WARNING! Prior to SMP being brought up it is possible to enqueue and 112 * dequeue threads belonging to other cpus, so be sure to use td->td_gd 113 * instead of 'mycpu' when referencing the globaldata structure. Once 114 * SMP live enqueuing and dequeueing only occurs on the current cpu. 115 */ 116 static __inline 117 void 118 _lwkt_dequeue(thread_t td) 119 { 120 if (td->td_flags & TDF_RUNQ) { 121 int nq = td->td_pri & TDPRI_MASK; 122 struct globaldata *gd = td->td_gd; 123 124 td->td_flags &= ~TDF_RUNQ; 125 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq); 126 /* runqmask is passively cleaned up by the switcher */ 127 } 128 } 129 130 static __inline 131 void 132 _lwkt_enqueue(thread_t td) 133 { 134 if ((td->td_flags & (TDF_RUNQ|TDF_MIGRATING)) == 0) { 135 int nq = td->td_pri & TDPRI_MASK; 136 struct globaldata *gd = td->td_gd; 137 138 td->td_flags |= TDF_RUNQ; 139 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); 140 gd->gd_runqmask |= 1 << nq; 141 } 142 } 143 144 /* 145 * Schedule a thread to run. As the current thread we can always safely 146 * schedule ourselves, and a shortcut procedure is provided for that 147 * function. 148 * 149 * (non-blocking, self contained on a per cpu basis) 150 */ 151 void 152 lwkt_schedule_self(thread_t td) 153 { 154 crit_enter_quick(td); 155 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 156 KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!")); 157 _lwkt_enqueue(td); 158 #ifdef _KERNEL 159 if (td->td_proc && td->td_proc->p_stat == SSLEEP) 160 panic("SCHED SELF PANIC"); 161 #endif 162 crit_exit_quick(td); 163 } 164 165 /* 166 * Deschedule a thread. 167 * 168 * (non-blocking, self contained on a per cpu basis) 169 */ 170 void 171 lwkt_deschedule_self(thread_t td) 172 { 173 crit_enter_quick(td); 174 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 175 _lwkt_dequeue(td); 176 crit_exit_quick(td); 177 } 178 179 #ifdef _KERNEL 180 181 /* 182 * LWKTs operate on a per-cpu basis 183 * 184 * WARNING! Called from early boot, 'mycpu' may not work yet. 185 */ 186 void 187 lwkt_gdinit(struct globaldata *gd) 188 { 189 int i; 190 191 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i) 192 TAILQ_INIT(&gd->gd_tdrunq[i]); 193 gd->gd_runqmask = 0; 194 TAILQ_INIT(&gd->gd_tdallq); 195 } 196 197 #endif /* _KERNEL */ 198 199 /* 200 * Initialize a thread wait structure prior to first use. 201 * 202 * NOTE! called from low level boot code, we cannot do anything fancy! 203 */ 204 void 205 lwkt_wait_init(lwkt_wait_t w) 206 { 207 lwkt_token_init(&w->wa_token); 208 TAILQ_INIT(&w->wa_waitq); 209 w->wa_gen = 0; 210 w->wa_count = 0; 211 } 212 213 /* 214 * Create a new thread. The thread must be associated with a process context 215 * or LWKT start address before it can be scheduled. If the target cpu is 216 * -1 the thread will be created on the current cpu. 217 * 218 * If you intend to create a thread without a process context this function 219 * does everything except load the startup and switcher function. 220 */ 221 thread_t 222 lwkt_alloc_thread(struct thread *td, int cpu) 223 { 224 void *stack; 225 int flags = 0; 226 globaldata_t gd = mycpu; 227 228 if (td == NULL) { 229 crit_enter_gd(gd); 230 if (gd->gd_tdfreecount > 0) { 231 --gd->gd_tdfreecount; 232 td = TAILQ_FIRST(&gd->gd_tdfreeq); 233 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0, 234 ("lwkt_alloc_thread: unexpected NULL or corrupted td")); 235 TAILQ_REMOVE(&gd->gd_tdfreeq, td, td_threadq); 236 crit_exit_gd(gd); 237 stack = td->td_kstack; 238 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD); 239 } else { 240 crit_exit_gd(gd); 241 #ifdef _KERNEL 242 td = zalloc(thread_zone); 243 #else 244 td = malloc(sizeof(struct thread)); 245 #endif 246 td->td_kstack = NULL; 247 flags |= TDF_ALLOCATED_THREAD; 248 } 249 } 250 if ((stack = td->td_kstack) == NULL) { 251 #ifdef _KERNEL 252 stack = (void *)kmem_alloc(kernel_map, THREAD_STACK); 253 #else 254 stack = libcaps_alloc_stack(THREAD_STACK); 255 #endif 256 flags |= TDF_ALLOCATED_STACK; 257 } 258 if (cpu < 0) 259 lwkt_init_thread(td, stack, flags, mycpu); 260 else 261 lwkt_init_thread(td, stack, flags, globaldata_find(cpu)); 262 return(td); 263 } 264 265 #ifdef _KERNEL 266 267 /* 268 * Initialize a preexisting thread structure. This function is used by 269 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread. 270 * 271 * All threads start out in a critical section at a priority of 272 * TDPRI_KERN_DAEMON. Higher level code will modify the priority as 273 * appropriate. This function may send an IPI message when the 274 * requested cpu is not the current cpu and consequently gd_tdallq may 275 * not be initialized synchronously from the point of view of the originating 276 * cpu. 277 * 278 * NOTE! we have to be careful in regards to creating threads for other cpus 279 * if SMP has not yet been activated. 280 */ 281 #ifdef SMP 282 283 static void 284 lwkt_init_thread_remote(void *arg) 285 { 286 thread_t td = arg; 287 288 TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq); 289 } 290 291 #endif 292 293 void 294 lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) 295 { 296 globaldata_t mygd = mycpu; 297 298 bzero(td, sizeof(struct thread)); 299 td->td_kstack = stack; 300 td->td_flags |= flags; 301 td->td_gd = gd; 302 td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; 303 lwkt_initport(&td->td_msgport, td); 304 pmap_init_thread(td); 305 #ifdef SMP 306 /* 307 * Normally initializing a thread for a remote cpu requires sending an 308 * IPI. However, the idlethread is setup before the other cpus are 309 * activated so we have to treat it as a special case. XXX manipulation 310 * of gd_tdallq requires the BGL. 311 */ 312 if (gd == mygd || td == &gd->gd_idlethread) { 313 crit_enter_gd(mygd); 314 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 315 crit_exit_gd(mygd); 316 } else { 317 lwkt_send_ipiq(gd, lwkt_init_thread_remote, td); 318 } 319 #else 320 crit_enter_gd(mygd); 321 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 322 crit_exit_gd(mygd); 323 #endif 324 } 325 326 #endif /* _KERNEL */ 327 328 void 329 lwkt_set_comm(thread_t td, const char *ctl, ...) 330 { 331 __va_list va; 332 333 __va_start(va, ctl); 334 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va); 335 __va_end(va); 336 } 337 338 void 339 lwkt_hold(thread_t td) 340 { 341 ++td->td_refs; 342 } 343 344 void 345 lwkt_rele(thread_t td) 346 { 347 KKASSERT(td->td_refs > 0); 348 --td->td_refs; 349 } 350 351 #ifdef _KERNEL 352 353 void 354 lwkt_wait_free(thread_t td) 355 { 356 while (td->td_refs) 357 tsleep(td, 0, "tdreap", hz); 358 } 359 360 #endif 361 362 void 363 lwkt_free_thread(thread_t td) 364 { 365 struct globaldata *gd = mycpu; 366 367 KASSERT((td->td_flags & TDF_RUNNING) == 0, 368 ("lwkt_free_thread: did not exit! %p", td)); 369 370 crit_enter_gd(gd); 371 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 372 if (gd->gd_tdfreecount < CACHE_NTHREADS && 373 (td->td_flags & TDF_ALLOCATED_THREAD) 374 ) { 375 ++gd->gd_tdfreecount; 376 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq); 377 crit_exit_gd(gd); 378 } else { 379 crit_exit_gd(gd); 380 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) { 381 #ifdef _KERNEL 382 kmem_free(kernel_map, (vm_offset_t)td->td_kstack, THREAD_STACK); 383 #else 384 libcaps_free_stack(td->td_kstack, THREAD_STACK); 385 #endif 386 /* gd invalid */ 387 td->td_kstack = NULL; 388 } 389 if (td->td_flags & TDF_ALLOCATED_THREAD) { 390 #ifdef _KERNEL 391 zfree(thread_zone, td); 392 #else 393 free(td); 394 #endif 395 } 396 } 397 } 398 399 400 /* 401 * Switch to the next runnable lwkt. If no LWKTs are runnable then 402 * switch to the idlethread. Switching must occur within a critical 403 * section to avoid races with the scheduling queue. 404 * 405 * We always have full control over our cpu's run queue. Other cpus 406 * that wish to manipulate our queue must use the cpu_*msg() calls to 407 * talk to our cpu, so a critical section is all that is needed and 408 * the result is very, very fast thread switching. 409 * 410 * The LWKT scheduler uses a fixed priority model and round-robins at 411 * each priority level. User process scheduling is a totally 412 * different beast and LWKT priorities should not be confused with 413 * user process priorities. 414 * 415 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() 416 * cleans it up. Note that the td_switch() function cannot do anything that 417 * requires the MP lock since the MP lock will have already been setup for 418 * the target thread (not the current thread). It's nice to have a scheduler 419 * that does not need the MP lock to work because it allows us to do some 420 * really cool high-performance MP lock optimizations. 421 */ 422 423 void 424 lwkt_switch(void) 425 { 426 globaldata_t gd = mycpu; 427 thread_t td = gd->gd_curthread; 428 thread_t ntd; 429 #ifdef SMP 430 int mpheld; 431 #endif 432 433 /* 434 * Switching from within a 'fast' (non thread switched) interrupt is 435 * illegal. 436 */ 437 if (gd->gd_intr_nesting_level && panicstr == NULL) { 438 panic("lwkt_switch: cannot switch from within a fast interrupt, yet"); 439 } 440 441 /* 442 * Passive release (used to transition from user to kernel mode 443 * when we block or switch rather then when we enter the kernel). 444 * This function is NOT called if we are switching into a preemption 445 * or returning from a preemption. Typically this causes us to lose 446 * our current process designation (if we have one) and become a true 447 * LWKT thread, and may also hand the current process designation to 448 * another process and schedule thread. 449 */ 450 if (td->td_release) 451 td->td_release(td); 452 453 crit_enter_gd(gd); 454 ++switch_count; 455 456 #ifdef SMP 457 /* 458 * td_mpcount cannot be used to determine if we currently hold the 459 * MP lock because get_mplock() will increment it prior to attempting 460 * to get the lock, and switch out if it can't. Our ownership of 461 * the actual lock will remain stable while we are in a critical section 462 * (but, of course, another cpu may own or release the lock so the 463 * actual value of mp_lock is not stable). 464 */ 465 mpheld = MP_LOCK_HELD(); 466 #ifdef INVARIANTS 467 if (td->td_cscount) { 468 printf("Diagnostic: attempt to switch while mastering cpusync: %p\n", 469 td); 470 if (panic_on_cscount) 471 panic("switching while mastering cpusync"); 472 } 473 #endif 474 #endif 475 if ((ntd = td->td_preempted) != NULL) { 476 /* 477 * We had preempted another thread on this cpu, resume the preempted 478 * thread. This occurs transparently, whether the preempted thread 479 * was scheduled or not (it may have been preempted after descheduling 480 * itself). 481 * 482 * We have to setup the MP lock for the original thread after backing 483 * out the adjustment that was made to curthread when the original 484 * was preempted. 485 */ 486 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); 487 #ifdef SMP 488 if (ntd->td_mpcount && mpheld == 0) { 489 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d", 490 td, ntd, td->td_mpcount, ntd->td_mpcount); 491 } 492 if (ntd->td_mpcount) { 493 td->td_mpcount -= ntd->td_mpcount; 494 KKASSERT(td->td_mpcount >= 0); 495 } 496 #endif 497 ntd->td_flags |= TDF_PREEMPT_DONE; 498 /* YYY release mp lock on switchback if original doesn't need it */ 499 } else { 500 /* 501 * Priority queue / round-robin at each priority. Note that user 502 * processes run at a fixed, low priority and the user process 503 * scheduler deals with interactions between user processes 504 * by scheduling and descheduling them from the LWKT queue as 505 * necessary. 506 * 507 * We have to adjust the MP lock for the target thread. If we 508 * need the MP lock and cannot obtain it we try to locate a 509 * thread that does not need the MP lock. If we cannot, we spin 510 * instead of HLT. 511 * 512 * A similar issue exists for the tokens held by the target thread. 513 * If we cannot obtain ownership of the tokens we cannot immediately 514 * schedule the thread. 515 */ 516 517 /* 518 * We are switching threads. If there are any pending requests for 519 * tokens we can satisfy all of them here. 520 */ 521 #ifdef SMP 522 if (gd->gd_tokreqbase) 523 lwkt_drain_token_requests(); 524 #endif 525 526 again: 527 if (gd->gd_runqmask) { 528 int nq = bsrl(gd->gd_runqmask); 529 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) { 530 gd->gd_runqmask &= ~(1 << nq); 531 goto again; 532 } 533 #ifdef SMP 534 /* 535 * If the target needs the MP lock and we couldn't get it, 536 * or if the target is holding tokens and we could not 537 * gain ownership of the tokens, continue looking for a 538 * thread to schedule and spin instead of HLT if we can't. 539 */ 540 if ((ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) || 541 (ntd->td_toks && lwkt_chktokens(ntd) == 0) 542 ) { 543 u_int32_t rqmask = gd->gd_runqmask; 544 while (rqmask) { 545 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { 546 if (ntd->td_mpcount && !mpheld && !cpu_try_mplock()) 547 continue; 548 mpheld = MP_LOCK_HELD(); 549 if (ntd->td_toks && !lwkt_chktokens(ntd)) 550 continue; 551 break; 552 } 553 if (ntd) 554 break; 555 rqmask &= ~(1 << nq); 556 nq = bsrl(rqmask); 557 } 558 if (ntd == NULL) { 559 ntd = &gd->gd_idlethread; 560 ntd->td_flags |= TDF_IDLE_NOHLT; 561 } else { 562 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 563 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 564 } 565 } else { 566 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 567 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 568 } 569 #else 570 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 571 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 572 #endif 573 } else { 574 /* 575 * We have nothing to run but only let the idle loop halt 576 * the cpu if there are no pending interrupts. 577 */ 578 ntd = &gd->gd_idlethread; 579 if (gd->gd_reqflags & RQF_IDLECHECK_MASK) 580 ntd->td_flags |= TDF_IDLE_NOHLT; 581 } 582 } 583 KASSERT(ntd->td_pri >= TDPRI_CRIT, 584 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); 585 586 /* 587 * Do the actual switch. If the new target does not need the MP lock 588 * and we are holding it, release the MP lock. If the new target requires 589 * the MP lock we have already acquired it for the target. 590 */ 591 #ifdef SMP 592 if (ntd->td_mpcount == 0 ) { 593 if (MP_LOCK_HELD()) 594 cpu_rel_mplock(); 595 } else { 596 ASSERT_MP_LOCK_HELD(); 597 } 598 #endif 599 if (td != ntd) 600 td->td_switch(ntd); 601 /* NOTE: current cpu may have changed after switch */ 602 crit_exit_quick(td); 603 } 604 605 /* 606 * Request that the target thread preempt the current thread. Preemption 607 * only works under a specific set of conditions: 608 * 609 * - We are not preempting ourselves 610 * - The target thread is owned by the current cpu 611 * - We are not currently being preempted 612 * - The target is not currently being preempted 613 * - We are able to satisfy the target's MP lock requirements (if any). 614 * 615 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically 616 * this is called via lwkt_schedule() through the td_preemptable callback. 617 * critpri is the managed critical priority that we should ignore in order 618 * to determine whether preemption is possible (aka usually just the crit 619 * priority of lwkt_schedule() itself). 620 * 621 * XXX at the moment we run the target thread in a critical section during 622 * the preemption in order to prevent the target from taking interrupts 623 * that *WE* can't. Preemption is strictly limited to interrupt threads 624 * and interrupt-like threads, outside of a critical section, and the 625 * preempted source thread will be resumed the instant the target blocks 626 * whether or not the source is scheduled (i.e. preemption is supposed to 627 * be as transparent as possible). 628 * 629 * The target thread inherits our MP count (added to its own) for the 630 * duration of the preemption in order to preserve the atomicy of the 631 * MP lock during the preemption. Therefore, any preempting targets must be 632 * careful in regards to MP assertions. Note that the MP count may be 633 * out of sync with the physical mp_lock, but we do not have to preserve 634 * the original ownership of the lock if it was out of synch (that is, we 635 * can leave it synchronized on return). 636 */ 637 void 638 lwkt_preempt(thread_t ntd, int critpri) 639 { 640 struct globaldata *gd = mycpu; 641 thread_t td; 642 #ifdef SMP 643 int mpheld; 644 int savecnt; 645 #endif 646 647 /* 648 * The caller has put us in a critical section. We can only preempt 649 * if the caller of the caller was not in a critical section (basically 650 * a local interrupt), as determined by the 'critpri' parameter. 651 * 652 * YYY The target thread must be in a critical section (else it must 653 * inherit our critical section? I dunno yet). 654 * 655 * Any tokens held by the target may not be held by thread(s) being 656 * preempted. We take the easy way out and do not preempt if 657 * the target is holding tokens. 658 * 659 * Set need_lwkt_resched() unconditionally for now YYY. 660 */ 661 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); 662 663 td = gd->gd_curthread; 664 need_lwkt_resched(); 665 if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) { 666 ++preempt_miss; 667 return; 668 } 669 if ((td->td_pri & ~TDPRI_MASK) > critpri) { 670 ++preempt_miss; 671 return; 672 } 673 #ifdef SMP 674 if (ntd->td_gd != gd) { 675 ++preempt_miss; 676 return; 677 } 678 #endif 679 /* 680 * Take the easy way out and do not preempt if the target is holding 681 * one or more tokens. We could test whether the thread(s) being 682 * preempted interlock against the target thread's tokens and whether 683 * we can get all the target thread's tokens, but this situation 684 * should not occur very often so its easier to simply not preempt. 685 */ 686 if (ntd->td_toks != NULL) { 687 ++preempt_miss; 688 return; 689 } 690 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { 691 ++preempt_weird; 692 return; 693 } 694 if (ntd->td_preempted) { 695 ++preempt_hit; 696 return; 697 } 698 #ifdef SMP 699 /* 700 * note: an interrupt might have occured just as we were transitioning 701 * to or from the MP lock. In this case td_mpcount will be pre-disposed 702 * (non-zero) but not actually synchronized with the actual state of the 703 * lock. We can use it to imply an MP lock requirement for the 704 * preemption but we cannot use it to test whether we hold the MP lock 705 * or not. 706 */ 707 savecnt = td->td_mpcount; 708 mpheld = MP_LOCK_HELD(); 709 ntd->td_mpcount += td->td_mpcount; 710 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { 711 ntd->td_mpcount -= td->td_mpcount; 712 ++preempt_miss; 713 return; 714 } 715 #endif 716 717 ++preempt_hit; 718 ntd->td_preempted = td; 719 td->td_flags |= TDF_PREEMPT_LOCK; 720 td->td_switch(ntd); 721 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); 722 #ifdef SMP 723 KKASSERT(savecnt == td->td_mpcount); 724 mpheld = MP_LOCK_HELD(); 725 if (mpheld && td->td_mpcount == 0) 726 cpu_rel_mplock(); 727 else if (mpheld == 0 && td->td_mpcount) 728 panic("lwkt_preempt(): MP lock was not held through"); 729 #endif 730 ntd->td_preempted = NULL; 731 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); 732 } 733 734 /* 735 * Yield our thread while higher priority threads are pending. This is 736 * typically called when we leave a critical section but it can be safely 737 * called while we are in a critical section. 738 * 739 * This function will not generally yield to equal priority threads but it 740 * can occur as a side effect. Note that lwkt_switch() is called from 741 * inside the critical section to prevent its own crit_exit() from reentering 742 * lwkt_yield_quick(). 743 * 744 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint 745 * came along but was blocked and made pending. 746 * 747 * (self contained on a per cpu basis) 748 */ 749 void 750 lwkt_yield_quick(void) 751 { 752 globaldata_t gd = mycpu; 753 thread_t td = gd->gd_curthread; 754 755 /* 756 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear 757 * it with a non-zero cpl then we might not wind up calling splz after 758 * a task switch when the critical section is exited even though the 759 * new task could accept the interrupt. 760 * 761 * XXX from crit_exit() only called after last crit section is released. 762 * If called directly will run splz() even if in a critical section. 763 * 764 * td_nest_count prevent deep nesting via splz() or doreti(). Note that 765 * except for this special case, we MUST call splz() here to handle any 766 * pending ints, particularly after we switch, or we might accidently 767 * halt the cpu with interrupts pending. 768 */ 769 if (gd->gd_reqflags && td->td_nest_count < 2) 770 splz(); 771 772 /* 773 * YYY enabling will cause wakeup() to task-switch, which really 774 * confused the old 4.x code. This is a good way to simulate 775 * preemption and MP without actually doing preemption or MP, because a 776 * lot of code assumes that wakeup() does not block. 777 */ 778 if (untimely_switch && td->td_nest_count == 0 && 779 gd->gd_intr_nesting_level == 0 780 ) { 781 crit_enter_quick(td); 782 /* 783 * YYY temporary hacks until we disassociate the userland scheduler 784 * from the LWKT scheduler. 785 */ 786 if (td->td_flags & TDF_RUNQ) { 787 lwkt_switch(); /* will not reenter yield function */ 788 } else { 789 lwkt_schedule_self(td); /* make sure we are scheduled */ 790 lwkt_switch(); /* will not reenter yield function */ 791 lwkt_deschedule_self(td); /* make sure we are descheduled */ 792 } 793 crit_exit_noyield(td); 794 } 795 } 796 797 /* 798 * This implements a normal yield which, unlike _quick, will yield to equal 799 * priority threads as well. Note that gd_reqflags tests will be handled by 800 * the crit_exit() call in lwkt_switch(). 801 * 802 * (self contained on a per cpu basis) 803 */ 804 void 805 lwkt_yield(void) 806 { 807 lwkt_schedule_self(curthread); 808 lwkt_switch(); 809 } 810 811 /* 812 * Generic schedule. Possibly schedule threads belonging to other cpus and 813 * deal with threads that might be blocked on a wait queue. 814 * 815 * We have a little helper inline function which does additional work after 816 * the thread has been enqueued, including dealing with preemption and 817 * setting need_lwkt_resched() (which prevents the kernel from returning 818 * to userland until it has processed higher priority threads). 819 */ 820 static __inline 821 void 822 _lwkt_schedule_post(thread_t ntd, int cpri) 823 { 824 if (ntd->td_preemptable) { 825 ntd->td_preemptable(ntd, cpri); /* YYY +token */ 826 } else { 827 if ((ntd->td_flags & TDF_NORESCHED) == 0) { 828 if ((ntd->td_pri & TDPRI_MASK) >= TDPRI_KERN_USER) 829 need_lwkt_resched(); 830 } 831 } 832 } 833 834 void 835 lwkt_schedule(thread_t td) 836 { 837 globaldata_t mygd = mycpu; 838 839 #ifdef INVARIANTS 840 KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule(): scheduling gd_idlethread is illegal!")); 841 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 842 && td->td_proc->p_stat == SSLEEP 843 ) { 844 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", 845 curthread, 846 curthread->td_proc ? curthread->td_proc->p_pid : -1, 847 curthread->td_proc ? curthread->td_proc->p_stat : -1, 848 td, 849 td->td_proc ? curthread->td_proc->p_pid : -1, 850 td->td_proc ? curthread->td_proc->p_stat : -1 851 ); 852 panic("SCHED PANIC"); 853 } 854 #endif 855 crit_enter_gd(mygd); 856 if (td == mygd->gd_curthread) { 857 _lwkt_enqueue(td); 858 } else { 859 lwkt_wait_t w; 860 861 /* 862 * If the thread is on a wait list we have to send our scheduling 863 * request to the owner of the wait structure. Otherwise we send 864 * the scheduling request to the cpu owning the thread. Races 865 * are ok, the target will forward the message as necessary (the 866 * message may chase the thread around before it finally gets 867 * acted upon). 868 * 869 * (remember, wait structures use stable storage) 870 * 871 * NOTE: tokens no longer enter a critical section, so we only need 872 * to account for the crit_enter() above when calling 873 * _lwkt_schedule_post(). 874 */ 875 if ((w = td->td_wait) != NULL) { 876 lwkt_tokref wref; 877 878 if (lwkt_trytoken(&wref, &w->wa_token)) { 879 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 880 --w->wa_count; 881 td->td_wait = NULL; 882 #ifdef SMP 883 if (td->td_gd == mycpu) { 884 _lwkt_enqueue(td); 885 _lwkt_schedule_post(td, TDPRI_CRIT); 886 } else { 887 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 888 } 889 #else 890 _lwkt_enqueue(td); 891 _lwkt_schedule_post(td, TDPRI_CRIT); 892 #endif 893 lwkt_reltoken(&wref); 894 } else { 895 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); 896 } 897 } else { 898 /* 899 * If the wait structure is NULL and we own the thread, there 900 * is no race (since we are in a critical section). If we 901 * do not own the thread there might be a race but the 902 * target cpu will deal with it. 903 */ 904 #ifdef SMP 905 if (td->td_gd == mygd) { 906 _lwkt_enqueue(td); 907 _lwkt_schedule_post(td, TDPRI_CRIT); 908 } else { 909 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 910 } 911 #else 912 _lwkt_enqueue(td); 913 _lwkt_schedule_post(td, TDPRI_CRIT); 914 #endif 915 } 916 } 917 crit_exit_gd(mygd); 918 } 919 920 /* 921 * Managed acquisition. This code assumes that the MP lock is held for 922 * the tdallq operation and that the thread has been descheduled from its 923 * original cpu. We also have to wait for the thread to be entirely switched 924 * out on its original cpu (this is usually fast enough that we never loop) 925 * since the LWKT system does not have to hold the MP lock while switching 926 * and the target may have released it before switching. 927 */ 928 void 929 lwkt_acquire(thread_t td) 930 { 931 globaldata_t gd; 932 globaldata_t mygd; 933 934 gd = td->td_gd; 935 mygd = mycpu; 936 KKASSERT((td->td_flags & TDF_RUNQ) == 0); 937 while (td->td_flags & TDF_RUNNING) /* XXX spin */ 938 cpu_mb1(); 939 if (gd != mygd) { 940 crit_enter_gd(mygd); 941 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 942 td->td_gd = mygd; 943 TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq); /* protected by BGL */ 944 crit_exit_gd(mygd); 945 } 946 } 947 948 /* 949 * Generic deschedule. Descheduling threads other then your own should be 950 * done only in carefully controlled circumstances. Descheduling is 951 * asynchronous. 952 * 953 * This function may block if the cpu has run out of messages. 954 */ 955 void 956 lwkt_deschedule(thread_t td) 957 { 958 crit_enter(); 959 if (td == curthread) { 960 _lwkt_dequeue(td); 961 } else { 962 if (td->td_gd == mycpu) { 963 _lwkt_dequeue(td); 964 } else { 965 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_deschedule, td); 966 } 967 } 968 crit_exit(); 969 } 970 971 /* 972 * Set the target thread's priority. This routine does not automatically 973 * switch to a higher priority thread, LWKT threads are not designed for 974 * continuous priority changes. Yield if you want to switch. 975 * 976 * We have to retain the critical section count which uses the high bits 977 * of the td_pri field. The specified priority may also indicate zero or 978 * more critical sections by adding TDPRI_CRIT*N. 979 * 980 * Note that we requeue the thread whether it winds up on a different runq 981 * or not. uio_yield() depends on this and the routine is not normally 982 * called with the same priority otherwise. 983 */ 984 void 985 lwkt_setpri(thread_t td, int pri) 986 { 987 KKASSERT(pri >= 0); 988 KKASSERT(td->td_gd == mycpu); 989 crit_enter(); 990 if (td->td_flags & TDF_RUNQ) { 991 _lwkt_dequeue(td); 992 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 993 _lwkt_enqueue(td); 994 } else { 995 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 996 } 997 crit_exit(); 998 } 999 1000 void 1001 lwkt_setpri_self(int pri) 1002 { 1003 thread_t td = curthread; 1004 1005 KKASSERT(pri >= 0 && pri <= TDPRI_MAX); 1006 crit_enter(); 1007 if (td->td_flags & TDF_RUNQ) { 1008 _lwkt_dequeue(td); 1009 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1010 _lwkt_enqueue(td); 1011 } else { 1012 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1013 } 1014 crit_exit(); 1015 } 1016 1017 /* 1018 * Migrate the current thread to the specified cpu. The BGL must be held 1019 * (for the gd_tdallq manipulation XXX). This is accomplished by 1020 * descheduling ourselves from the current cpu, moving our thread to the 1021 * tdallq of the target cpu, IPI messaging the target cpu, and switching out. 1022 * TDF_MIGRATING prevents scheduling races while the thread is being migrated. 1023 */ 1024 #ifdef SMP 1025 static void lwkt_setcpu_remote(void *arg); 1026 #endif 1027 1028 void 1029 lwkt_setcpu_self(globaldata_t rgd) 1030 { 1031 #ifdef SMP 1032 thread_t td = curthread; 1033 1034 if (td->td_gd != rgd) { 1035 crit_enter_quick(td); 1036 td->td_flags |= TDF_MIGRATING; 1037 lwkt_deschedule_self(td); 1038 TAILQ_REMOVE(&td->td_gd->gd_tdallq, td, td_allq); /* protected by BGL */ 1039 TAILQ_INSERT_TAIL(&rgd->gd_tdallq, td, td_allq); /* protected by BGL */ 1040 lwkt_send_ipiq(rgd, (ipifunc_t)lwkt_setcpu_remote, td); 1041 lwkt_switch(); 1042 /* we are now on the target cpu */ 1043 crit_exit_quick(td); 1044 } 1045 #endif 1046 } 1047 1048 /* 1049 * Remote IPI for cpu migration (called while in a critical section so we 1050 * do not have to enter another one). The thread has already been moved to 1051 * our cpu's allq, but we must wait for the thread to be completely switched 1052 * out on the originating cpu before we schedule it on ours or the stack 1053 * state may be corrupt. We clear TDF_MIGRATING after flushing the GD 1054 * change to main memory. 1055 * 1056 * XXX The use of TDF_MIGRATING might not be sufficient to avoid races 1057 * against wakeups. It is best if this interface is used only when there 1058 * are no pending events that might try to schedule the thread. 1059 */ 1060 #ifdef SMP 1061 static void 1062 lwkt_setcpu_remote(void *arg) 1063 { 1064 thread_t td = arg; 1065 globaldata_t gd = mycpu; 1066 1067 while (td->td_flags & TDF_RUNNING) 1068 cpu_mb1(); 1069 td->td_gd = gd; 1070 cpu_mb2(); 1071 td->td_flags &= ~TDF_MIGRATING; 1072 _lwkt_enqueue(td); 1073 } 1074 #endif 1075 1076 struct proc * 1077 lwkt_preempted_proc(void) 1078 { 1079 thread_t td = curthread; 1080 while (td->td_preempted) 1081 td = td->td_preempted; 1082 return(td->td_proc); 1083 } 1084 1085 /* 1086 * Block on the specified wait queue until signaled. A generation number 1087 * must be supplied to interlock the wait queue. The function will 1088 * return immediately if the generation number does not match the wait 1089 * structure's generation number. 1090 */ 1091 void 1092 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen) 1093 { 1094 thread_t td = curthread; 1095 lwkt_tokref ilock; 1096 1097 lwkt_gettoken(&ilock, &w->wa_token); 1098 crit_enter(); 1099 if (w->wa_gen == *gen) { 1100 _lwkt_dequeue(td); 1101 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq); 1102 ++w->wa_count; 1103 td->td_wait = w; 1104 td->td_wmesg = wmesg; 1105 again: 1106 lwkt_switch(); 1107 if (td->td_wmesg != NULL) { 1108 _lwkt_dequeue(td); 1109 goto again; 1110 } 1111 } 1112 crit_exit(); 1113 *gen = w->wa_gen; 1114 lwkt_reltoken(&ilock); 1115 } 1116 1117 /* 1118 * Signal a wait queue. We gain ownership of the wait queue in order to 1119 * signal it. Once a thread is removed from the wait queue we have to 1120 * deal with the cpu owning the thread. 1121 * 1122 * Note: alternatively we could message the target cpu owning the wait 1123 * queue. YYY implement as sysctl. 1124 */ 1125 void 1126 lwkt_signal(lwkt_wait_t w, int count) 1127 { 1128 thread_t td; 1129 lwkt_tokref ilock; 1130 1131 lwkt_gettoken(&ilock, &w->wa_token); 1132 ++w->wa_gen; 1133 crit_enter(); 1134 if (count < 0) 1135 count = w->wa_count; 1136 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) { 1137 --count; 1138 --w->wa_count; 1139 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 1140 td->td_wait = NULL; 1141 td->td_wmesg = NULL; 1142 if (td->td_gd == mycpu) { 1143 _lwkt_enqueue(td); 1144 } else { 1145 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 1146 } 1147 } 1148 crit_exit(); 1149 lwkt_reltoken(&ilock); 1150 } 1151 1152 /* 1153 * Create a kernel process/thread/whatever. It shares it's address space 1154 * with proc0 - ie: kernel only. 1155 * 1156 * NOTE! By default new threads are created with the MP lock held. A 1157 * thread which does not require the MP lock should release it by calling 1158 * rel_mplock() at the start of the new thread. 1159 */ 1160 int 1161 lwkt_create(void (*func)(void *), void *arg, 1162 struct thread **tdp, thread_t template, int tdflags, int cpu, 1163 const char *fmt, ...) 1164 { 1165 thread_t td; 1166 __va_list ap; 1167 1168 td = lwkt_alloc_thread(template, cpu); 1169 if (tdp) 1170 *tdp = td; 1171 cpu_set_thread_handler(td, lwkt_exit, func, arg); 1172 td->td_flags |= TDF_VERBOSE | tdflags; 1173 #ifdef SMP 1174 td->td_mpcount = 1; 1175 #endif 1176 1177 /* 1178 * Set up arg0 for 'ps' etc 1179 */ 1180 __va_start(ap, fmt); 1181 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1182 __va_end(ap); 1183 1184 /* 1185 * Schedule the thread to run 1186 */ 1187 if ((td->td_flags & TDF_STOPREQ) == 0) 1188 lwkt_schedule(td); 1189 else 1190 td->td_flags &= ~TDF_STOPREQ; 1191 return 0; 1192 } 1193 1194 /* 1195 * kthread_* is specific to the kernel and is not needed by userland. 1196 */ 1197 #ifdef _KERNEL 1198 1199 /* 1200 * Destroy an LWKT thread. Warning! This function is not called when 1201 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1202 * uses a different reaping mechanism. 1203 */ 1204 void 1205 lwkt_exit(void) 1206 { 1207 thread_t td = curthread; 1208 globaldata_t gd; 1209 1210 if (td->td_flags & TDF_VERBOSE) 1211 printf("kthread %p %s has exited\n", td, td->td_comm); 1212 caps_exit(td); 1213 crit_enter_quick(td); 1214 lwkt_deschedule_self(td); 1215 gd = mycpu; 1216 KKASSERT(gd == td->td_gd); 1217 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 1218 if (td->td_flags & TDF_ALLOCATED_THREAD) { 1219 ++gd->gd_tdfreecount; 1220 TAILQ_INSERT_TAIL(&gd->gd_tdfreeq, td, td_threadq); 1221 } 1222 cpu_thread_exit(); 1223 } 1224 1225 /* 1226 * Create a kernel process/thread/whatever. It shares it's address space 1227 * with proc0 - ie: kernel only. 5.x compatible. 1228 * 1229 * NOTE! By default kthreads are created with the MP lock held. A 1230 * thread which does not require the MP lock should release it by calling 1231 * rel_mplock() at the start of the new thread. 1232 */ 1233 int 1234 kthread_create(void (*func)(void *), void *arg, 1235 struct thread **tdp, const char *fmt, ...) 1236 { 1237 thread_t td; 1238 __va_list ap; 1239 1240 td = lwkt_alloc_thread(NULL, -1); 1241 if (tdp) 1242 *tdp = td; 1243 cpu_set_thread_handler(td, kthread_exit, func, arg); 1244 td->td_flags |= TDF_VERBOSE; 1245 #ifdef SMP 1246 td->td_mpcount = 1; 1247 #endif 1248 1249 /* 1250 * Set up arg0 for 'ps' etc 1251 */ 1252 __va_start(ap, fmt); 1253 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1254 __va_end(ap); 1255 1256 /* 1257 * Schedule the thread to run 1258 */ 1259 lwkt_schedule(td); 1260 return 0; 1261 } 1262 1263 /* 1264 * Destroy an LWKT thread. Warning! This function is not called when 1265 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1266 * uses a different reaping mechanism. 1267 * 1268 * XXX duplicates lwkt_exit() 1269 */ 1270 void 1271 kthread_exit(void) 1272 { 1273 lwkt_exit(); 1274 } 1275 1276 #endif /* _KERNEL */ 1277 1278 void 1279 crit_panic(void) 1280 { 1281 thread_t td = curthread; 1282 int lpri = td->td_pri; 1283 1284 td->td_pri = 0; 1285 panic("td_pri is/would-go negative! %p %d", td, lpri); 1286 } 1287 1288