1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.82 2005/07/21 06:28:50 dillon Exp $ 35 */ 36 37 /* 38 * Each cpu in a system has its own self-contained light weight kernel 39 * thread scheduler, which means that generally speaking we only need 40 * to use a critical section to avoid problems. Foreign thread 41 * scheduling is queued via (async) IPIs. 42 */ 43 44 #ifdef _KERNEL 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/rtprio.h> 51 #include <sys/queue.h> 52 #include <sys/thread2.h> 53 #include <sys/sysctl.h> 54 #include <sys/kthread.h> 55 #include <machine/cpu.h> 56 #include <sys/lock.h> 57 #include <sys/caps.h> 58 59 #include <vm/vm.h> 60 #include <vm/vm_param.h> 61 #include <vm/vm_kern.h> 62 #include <vm/vm_object.h> 63 #include <vm/vm_page.h> 64 #include <vm/vm_map.h> 65 #include <vm/vm_pager.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_zone.h> 68 69 #include <machine/stdarg.h> 70 #include <machine/ipl.h> 71 #include <machine/smp.h> 72 73 #else 74 75 #include <sys/stdint.h> 76 #include <libcaps/thread.h> 77 #include <sys/thread.h> 78 #include <sys/msgport.h> 79 #include <sys/errno.h> 80 #include <libcaps/globaldata.h> 81 #include <machine/cpufunc.h> 82 #include <sys/thread2.h> 83 #include <sys/msgport2.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <string.h> 87 #include <machine/lock.h> 88 #include <machine/atomic.h> 89 #include <machine/cpu.h> 90 91 #endif 92 93 static int untimely_switch = 0; 94 #ifdef INVARIANTS 95 static int panic_on_cscount = 0; 96 #endif 97 static __int64_t switch_count = 0; 98 static __int64_t preempt_hit = 0; 99 static __int64_t preempt_miss = 0; 100 static __int64_t preempt_weird = 0; 101 static __int64_t token_contention_count = 0; 102 static __int64_t mplock_contention_count = 0; 103 104 #ifdef _KERNEL 105 106 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); 107 #ifdef INVARIANTS 108 SYSCTL_INT(_lwkt, OID_AUTO, panic_on_cscount, CTLFLAG_RW, &panic_on_cscount, 0, ""); 109 #endif 110 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); 112 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); 113 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); 114 #ifdef INVARIANTS 115 SYSCTL_QUAD(_lwkt, OID_AUTO, token_contention_count, CTLFLAG_RW, 116 &token_contention_count, 0, "spinning due to token contention"); 117 SYSCTL_QUAD(_lwkt, OID_AUTO, mplock_contention_count, CTLFLAG_RW, 118 &mplock_contention_count, 0, "spinning due to MPLOCK contention"); 119 #endif 120 #endif 121 122 /* 123 * These helper procedures handle the runq, they can only be called from 124 * within a critical section. 125 * 126 * WARNING! Prior to SMP being brought up it is possible to enqueue and 127 * dequeue threads belonging to other cpus, so be sure to use td->td_gd 128 * instead of 'mycpu' when referencing the globaldata structure. Once 129 * SMP live enqueuing and dequeueing only occurs on the current cpu. 130 */ 131 static __inline 132 void 133 _lwkt_dequeue(thread_t td) 134 { 135 if (td->td_flags & TDF_RUNQ) { 136 int nq = td->td_pri & TDPRI_MASK; 137 struct globaldata *gd = td->td_gd; 138 139 td->td_flags &= ~TDF_RUNQ; 140 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq); 141 /* runqmask is passively cleaned up by the switcher */ 142 } 143 } 144 145 static __inline 146 void 147 _lwkt_enqueue(thread_t td) 148 { 149 if ((td->td_flags & (TDF_RUNQ|TDF_MIGRATING)) == 0) { 150 int nq = td->td_pri & TDPRI_MASK; 151 struct globaldata *gd = td->td_gd; 152 153 td->td_flags |= TDF_RUNQ; 154 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); 155 gd->gd_runqmask |= 1 << nq; 156 } 157 } 158 159 /* 160 * Schedule a thread to run. As the current thread we can always safely 161 * schedule ourselves, and a shortcut procedure is provided for that 162 * function. 163 * 164 * (non-blocking, self contained on a per cpu basis) 165 */ 166 void 167 lwkt_schedule_self(thread_t td) 168 { 169 crit_enter_quick(td); 170 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 171 KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!")); 172 _lwkt_enqueue(td); 173 #ifdef _KERNEL 174 if (td->td_proc && td->td_proc->p_stat == SSLEEP) 175 panic("SCHED SELF PANIC"); 176 #endif 177 crit_exit_quick(td); 178 } 179 180 /* 181 * Deschedule a thread. 182 * 183 * (non-blocking, self contained on a per cpu basis) 184 */ 185 void 186 lwkt_deschedule_self(thread_t td) 187 { 188 crit_enter_quick(td); 189 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 190 _lwkt_dequeue(td); 191 crit_exit_quick(td); 192 } 193 194 #ifdef _KERNEL 195 196 /* 197 * LWKTs operate on a per-cpu basis 198 * 199 * WARNING! Called from early boot, 'mycpu' may not work yet. 200 */ 201 void 202 lwkt_gdinit(struct globaldata *gd) 203 { 204 int i; 205 206 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i) 207 TAILQ_INIT(&gd->gd_tdrunq[i]); 208 gd->gd_runqmask = 0; 209 TAILQ_INIT(&gd->gd_tdallq); 210 } 211 212 #endif /* _KERNEL */ 213 214 /* 215 * Initialize a thread wait structure prior to first use. 216 * 217 * NOTE! called from low level boot code, we cannot do anything fancy! 218 */ 219 void 220 lwkt_wait_init(lwkt_wait_t w) 221 { 222 lwkt_token_init(&w->wa_token); 223 TAILQ_INIT(&w->wa_waitq); 224 w->wa_gen = 0; 225 w->wa_count = 0; 226 } 227 228 /* 229 * Create a new thread. The thread must be associated with a process context 230 * or LWKT start address before it can be scheduled. If the target cpu is 231 * -1 the thread will be created on the current cpu. 232 * 233 * If you intend to create a thread without a process context this function 234 * does everything except load the startup and switcher function. 235 */ 236 thread_t 237 lwkt_alloc_thread(struct thread *td, int stksize, int cpu) 238 { 239 void *stack; 240 int flags = 0; 241 globaldata_t gd = mycpu; 242 243 if (td == NULL) { 244 crit_enter_gd(gd); 245 if (gd->gd_tdfreecount > 0) { 246 --gd->gd_tdfreecount; 247 td = TAILQ_FIRST(&gd->gd_tdfreeq); 248 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0, 249 ("lwkt_alloc_thread: unexpected NULL or corrupted td")); 250 TAILQ_REMOVE(&gd->gd_tdfreeq, td, td_threadq); 251 crit_exit_gd(gd); 252 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD); 253 } else { 254 crit_exit_gd(gd); 255 #ifdef _KERNEL 256 td = zalloc(thread_zone); 257 #else 258 td = malloc(sizeof(struct thread)); 259 #endif 260 td->td_kstack = NULL; 261 td->td_kstack_size = 0; 262 flags |= TDF_ALLOCATED_THREAD; 263 } 264 } 265 if ((stack = td->td_kstack) != NULL && td->td_kstack_size != stksize) { 266 if (flags & TDF_ALLOCATED_STACK) { 267 #ifdef _KERNEL 268 kmem_free(kernel_map, (vm_offset_t)stack, td->td_kstack_size); 269 #else 270 libcaps_free_stack(stack, td->td_kstack_size); 271 #endif 272 stack = NULL; 273 } 274 } 275 if (stack == NULL) { 276 #ifdef _KERNEL 277 stack = (void *)kmem_alloc(kernel_map, stksize); 278 #else 279 stack = libcaps_alloc_stack(stksize); 280 #endif 281 flags |= TDF_ALLOCATED_STACK; 282 } 283 if (cpu < 0) 284 lwkt_init_thread(td, stack, stksize, flags, mycpu); 285 else 286 lwkt_init_thread(td, stack, stksize, flags, globaldata_find(cpu)); 287 return(td); 288 } 289 290 #ifdef _KERNEL 291 292 /* 293 * Initialize a preexisting thread structure. This function is used by 294 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread. 295 * 296 * All threads start out in a critical section at a priority of 297 * TDPRI_KERN_DAEMON. Higher level code will modify the priority as 298 * appropriate. This function may send an IPI message when the 299 * requested cpu is not the current cpu and consequently gd_tdallq may 300 * not be initialized synchronously from the point of view of the originating 301 * cpu. 302 * 303 * NOTE! we have to be careful in regards to creating threads for other cpus 304 * if SMP has not yet been activated. 305 */ 306 #ifdef SMP 307 308 static void 309 lwkt_init_thread_remote(void *arg) 310 { 311 thread_t td = arg; 312 313 TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq); 314 } 315 316 #endif 317 318 void 319 lwkt_init_thread(thread_t td, void *stack, int stksize, int flags, 320 struct globaldata *gd) 321 { 322 globaldata_t mygd = mycpu; 323 324 bzero(td, sizeof(struct thread)); 325 td->td_kstack = stack; 326 td->td_kstack_size = stksize; 327 td->td_flags |= flags; 328 td->td_gd = gd; 329 td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; 330 lwkt_initport(&td->td_msgport, td); 331 pmap_init_thread(td); 332 #ifdef SMP 333 /* 334 * Normally initializing a thread for a remote cpu requires sending an 335 * IPI. However, the idlethread is setup before the other cpus are 336 * activated so we have to treat it as a special case. XXX manipulation 337 * of gd_tdallq requires the BGL. 338 */ 339 if (gd == mygd || td == &gd->gd_idlethread) { 340 crit_enter_gd(mygd); 341 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 342 crit_exit_gd(mygd); 343 } else { 344 lwkt_send_ipiq(gd, lwkt_init_thread_remote, td); 345 } 346 #else 347 crit_enter_gd(mygd); 348 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 349 crit_exit_gd(mygd); 350 #endif 351 } 352 353 #endif /* _KERNEL */ 354 355 void 356 lwkt_set_comm(thread_t td, const char *ctl, ...) 357 { 358 __va_list va; 359 360 __va_start(va, ctl); 361 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va); 362 __va_end(va); 363 } 364 365 void 366 lwkt_hold(thread_t td) 367 { 368 ++td->td_refs; 369 } 370 371 void 372 lwkt_rele(thread_t td) 373 { 374 KKASSERT(td->td_refs > 0); 375 --td->td_refs; 376 } 377 378 #ifdef _KERNEL 379 380 void 381 lwkt_wait_free(thread_t td) 382 { 383 while (td->td_refs) 384 tsleep(td, 0, "tdreap", hz); 385 } 386 387 #endif 388 389 void 390 lwkt_free_thread(thread_t td) 391 { 392 struct globaldata *gd = mycpu; 393 394 KASSERT((td->td_flags & TDF_RUNNING) == 0, 395 ("lwkt_free_thread: did not exit! %p", td)); 396 397 crit_enter_gd(gd); 398 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 399 if (gd->gd_tdfreecount < CACHE_NTHREADS && 400 (td->td_flags & TDF_ALLOCATED_THREAD) 401 ) { 402 ++gd->gd_tdfreecount; 403 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq); 404 crit_exit_gd(gd); 405 } else { 406 crit_exit_gd(gd); 407 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) { 408 #ifdef _KERNEL 409 kmem_free(kernel_map, (vm_offset_t)td->td_kstack, td->td_kstack_size); 410 #else 411 libcaps_free_stack(td->td_kstack, td->td_kstack_size); 412 #endif 413 /* gd invalid */ 414 td->td_kstack = NULL; 415 td->td_kstack_size = 0; 416 } 417 if (td->td_flags & TDF_ALLOCATED_THREAD) { 418 #ifdef _KERNEL 419 zfree(thread_zone, td); 420 #else 421 free(td); 422 #endif 423 } 424 } 425 } 426 427 428 /* 429 * Switch to the next runnable lwkt. If no LWKTs are runnable then 430 * switch to the idlethread. Switching must occur within a critical 431 * section to avoid races with the scheduling queue. 432 * 433 * We always have full control over our cpu's run queue. Other cpus 434 * that wish to manipulate our queue must use the cpu_*msg() calls to 435 * talk to our cpu, so a critical section is all that is needed and 436 * the result is very, very fast thread switching. 437 * 438 * The LWKT scheduler uses a fixed priority model and round-robins at 439 * each priority level. User process scheduling is a totally 440 * different beast and LWKT priorities should not be confused with 441 * user process priorities. 442 * 443 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() 444 * cleans it up. Note that the td_switch() function cannot do anything that 445 * requires the MP lock since the MP lock will have already been setup for 446 * the target thread (not the current thread). It's nice to have a scheduler 447 * that does not need the MP lock to work because it allows us to do some 448 * really cool high-performance MP lock optimizations. 449 */ 450 451 void 452 lwkt_switch(void) 453 { 454 globaldata_t gd = mycpu; 455 thread_t td = gd->gd_curthread; 456 thread_t ntd; 457 #ifdef SMP 458 int mpheld; 459 #endif 460 461 /* 462 * Switching from within a 'fast' (non thread switched) interrupt or IPI 463 * is illegal. However, we may have to do it anyway if we hit a fatal 464 * kernel trap or we have paniced. 465 * 466 * If this case occurs save and restore the interrupt nesting level. 467 */ 468 if (gd->gd_intr_nesting_level) { 469 int savegdnest; 470 int savegdtrap; 471 472 if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) { 473 panic("lwkt_switch: cannot switch from within " 474 "a fast interrupt, yet, td %p\n", td); 475 } else { 476 savegdnest = gd->gd_intr_nesting_level; 477 savegdtrap = gd->gd_trap_nesting_level; 478 gd->gd_intr_nesting_level = 0; 479 gd->gd_trap_nesting_level = 0; 480 if ((td->td_flags & TDF_PANICWARN) == 0) { 481 td->td_flags |= TDF_PANICWARN; 482 printf("Warning: thread switch from interrupt or IPI, " 483 "thread %p (%s)\n", td, td->td_comm); 484 #ifdef DDB 485 db_print_backtrace(); 486 #endif 487 } 488 lwkt_switch(); 489 gd->gd_intr_nesting_level = savegdnest; 490 gd->gd_trap_nesting_level = savegdtrap; 491 return; 492 } 493 } 494 495 /* 496 * Passive release (used to transition from user to kernel mode 497 * when we block or switch rather then when we enter the kernel). 498 * This function is NOT called if we are switching into a preemption 499 * or returning from a preemption. Typically this causes us to lose 500 * our current process designation (if we have one) and become a true 501 * LWKT thread, and may also hand the current process designation to 502 * another process and schedule thread. 503 */ 504 if (td->td_release) 505 td->td_release(td); 506 507 crit_enter_gd(gd); 508 509 #ifdef SMP 510 /* 511 * td_mpcount cannot be used to determine if we currently hold the 512 * MP lock because get_mplock() will increment it prior to attempting 513 * to get the lock, and switch out if it can't. Our ownership of 514 * the actual lock will remain stable while we are in a critical section 515 * (but, of course, another cpu may own or release the lock so the 516 * actual value of mp_lock is not stable). 517 */ 518 mpheld = MP_LOCK_HELD(); 519 #ifdef INVARIANTS 520 if (td->td_cscount) { 521 printf("Diagnostic: attempt to switch while mastering cpusync: %p\n", 522 td); 523 if (panic_on_cscount) 524 panic("switching while mastering cpusync"); 525 } 526 #endif 527 #endif 528 if ((ntd = td->td_preempted) != NULL) { 529 /* 530 * We had preempted another thread on this cpu, resume the preempted 531 * thread. This occurs transparently, whether the preempted thread 532 * was scheduled or not (it may have been preempted after descheduling 533 * itself). 534 * 535 * We have to setup the MP lock for the original thread after backing 536 * out the adjustment that was made to curthread when the original 537 * was preempted. 538 */ 539 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); 540 #ifdef SMP 541 if (ntd->td_mpcount && mpheld == 0) { 542 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d", 543 td, ntd, td->td_mpcount, ntd->td_mpcount); 544 } 545 if (ntd->td_mpcount) { 546 td->td_mpcount -= ntd->td_mpcount; 547 KKASSERT(td->td_mpcount >= 0); 548 } 549 #endif 550 ntd->td_flags |= TDF_PREEMPT_DONE; 551 552 /* 553 * XXX. The interrupt may have woken a thread up, we need to properly 554 * set the reschedule flag if the originally interrupted thread is at 555 * a lower priority. 556 */ 557 if (gd->gd_runqmask > (2 << (ntd->td_pri & TDPRI_MASK)) - 1) 558 need_lwkt_resched(); 559 /* YYY release mp lock on switchback if original doesn't need it */ 560 } else { 561 /* 562 * Priority queue / round-robin at each priority. Note that user 563 * processes run at a fixed, low priority and the user process 564 * scheduler deals with interactions between user processes 565 * by scheduling and descheduling them from the LWKT queue as 566 * necessary. 567 * 568 * We have to adjust the MP lock for the target thread. If we 569 * need the MP lock and cannot obtain it we try to locate a 570 * thread that does not need the MP lock. If we cannot, we spin 571 * instead of HLT. 572 * 573 * A similar issue exists for the tokens held by the target thread. 574 * If we cannot obtain ownership of the tokens we cannot immediately 575 * schedule the thread. 576 */ 577 578 /* 579 * We are switching threads. If there are any pending requests for 580 * tokens we can satisfy all of them here. 581 */ 582 #ifdef SMP 583 if (gd->gd_tokreqbase) 584 lwkt_drain_token_requests(); 585 #endif 586 587 /* 588 * If an LWKT reschedule was requested, well that is what we are 589 * doing now so clear it. 590 */ 591 clear_lwkt_resched(); 592 again: 593 if (gd->gd_runqmask) { 594 int nq = bsrl(gd->gd_runqmask); 595 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) { 596 gd->gd_runqmask &= ~(1 << nq); 597 goto again; 598 } 599 #ifdef SMP 600 /* 601 * THREAD SELECTION FOR AN SMP MACHINE BUILD 602 * 603 * If the target needs the MP lock and we couldn't get it, 604 * or if the target is holding tokens and we could not 605 * gain ownership of the tokens, continue looking for a 606 * thread to schedule and spin instead of HLT if we can't. 607 * 608 * NOTE: the mpheld variable invalid after this conditional, it 609 * can change due to both cpu_try_mplock() returning success 610 * AND interactions in lwkt_chktokens() due to the fact that 611 * we are trying to check the mpcount of a thread other then 612 * the current thread. Because of this, if the current thread 613 * is not holding td_mpcount, an IPI indirectly run via 614 * lwkt_chktokens() can obtain and release the MP lock and 615 * cause the core MP lock to be released. 616 */ 617 if ((ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) || 618 (ntd->td_toks && lwkt_chktokens(ntd) == 0) 619 ) { 620 u_int32_t rqmask = gd->gd_runqmask; 621 622 mpheld = MP_LOCK_HELD(); 623 ntd = NULL; 624 while (rqmask) { 625 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { 626 if (ntd->td_mpcount && !mpheld && !cpu_try_mplock()) { 627 /* spinning due to MP lock being held */ 628 #ifdef INVARIANTS 629 ++mplock_contention_count; 630 #endif 631 /* mplock still not held, 'mpheld' still valid */ 632 continue; 633 } 634 635 /* 636 * mpheld state invalid after chktokens call returns 637 * failure, but the variable is only needed for 638 * the loop. 639 */ 640 if (ntd->td_toks && !lwkt_chktokens(ntd)) { 641 /* spinning due to token contention */ 642 #ifdef INVARIANTS 643 ++token_contention_count; 644 #endif 645 mpheld = MP_LOCK_HELD(); 646 continue; 647 } 648 break; 649 } 650 if (ntd) 651 break; 652 rqmask &= ~(1 << nq); 653 nq = bsrl(rqmask); 654 } 655 if (ntd == NULL) { 656 ntd = &gd->gd_idlethread; 657 ntd->td_flags |= TDF_IDLE_NOHLT; 658 goto using_idle_thread; 659 } else { 660 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 661 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 662 } 663 } else { 664 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 665 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 666 } 667 #else 668 /* 669 * THREAD SELECTION FOR A UP MACHINE BUILD. We don't have to 670 * worry about tokens or the BGL. 671 */ 672 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 673 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 674 #endif 675 } else { 676 /* 677 * We have nothing to run but only let the idle loop halt 678 * the cpu if there are no pending interrupts. 679 */ 680 ntd = &gd->gd_idlethread; 681 if (gd->gd_reqflags & RQF_IDLECHECK_MASK) 682 ntd->td_flags |= TDF_IDLE_NOHLT; 683 #ifdef SMP 684 using_idle_thread: 685 /* 686 * The idle thread should not be holding the MP lock unless we 687 * are trapping in the kernel or in a panic. Since we select the 688 * idle thread unconditionally when no other thread is available, 689 * if the MP lock is desired during a panic or kernel trap, we 690 * have to loop in the scheduler until we get it. 691 */ 692 if (ntd->td_mpcount) { 693 mpheld = MP_LOCK_HELD(); 694 if (gd->gd_trap_nesting_level == 0 && panicstr == NULL) 695 panic("Idle thread %p was holding the BGL!", ntd); 696 else if (mpheld == 0) 697 goto again; 698 } 699 #endif 700 } 701 } 702 KASSERT(ntd->td_pri >= TDPRI_CRIT, 703 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); 704 705 /* 706 * Do the actual switch. If the new target does not need the MP lock 707 * and we are holding it, release the MP lock. If the new target requires 708 * the MP lock we have already acquired it for the target. 709 */ 710 #ifdef SMP 711 if (ntd->td_mpcount == 0 ) { 712 if (MP_LOCK_HELD()) 713 cpu_rel_mplock(); 714 } else { 715 ASSERT_MP_LOCK_HELD(ntd); 716 } 717 #endif 718 if (td != ntd) { 719 ++switch_count; 720 td->td_switch(ntd); 721 } 722 /* NOTE: current cpu may have changed after switch */ 723 crit_exit_quick(td); 724 } 725 726 /* 727 * Request that the target thread preempt the current thread. Preemption 728 * only works under a specific set of conditions: 729 * 730 * - We are not preempting ourselves 731 * - The target thread is owned by the current cpu 732 * - We are not currently being preempted 733 * - The target is not currently being preempted 734 * - We are able to satisfy the target's MP lock requirements (if any). 735 * 736 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically 737 * this is called via lwkt_schedule() through the td_preemptable callback. 738 * critpri is the managed critical priority that we should ignore in order 739 * to determine whether preemption is possible (aka usually just the crit 740 * priority of lwkt_schedule() itself). 741 * 742 * XXX at the moment we run the target thread in a critical section during 743 * the preemption in order to prevent the target from taking interrupts 744 * that *WE* can't. Preemption is strictly limited to interrupt threads 745 * and interrupt-like threads, outside of a critical section, and the 746 * preempted source thread will be resumed the instant the target blocks 747 * whether or not the source is scheduled (i.e. preemption is supposed to 748 * be as transparent as possible). 749 * 750 * The target thread inherits our MP count (added to its own) for the 751 * duration of the preemption in order to preserve the atomicy of the 752 * MP lock during the preemption. Therefore, any preempting targets must be 753 * careful in regards to MP assertions. Note that the MP count may be 754 * out of sync with the physical mp_lock, but we do not have to preserve 755 * the original ownership of the lock if it was out of synch (that is, we 756 * can leave it synchronized on return). 757 */ 758 void 759 lwkt_preempt(thread_t ntd, int critpri) 760 { 761 struct globaldata *gd = mycpu; 762 thread_t td; 763 #ifdef SMP 764 int mpheld; 765 int savecnt; 766 #endif 767 768 /* 769 * The caller has put us in a critical section. We can only preempt 770 * if the caller of the caller was not in a critical section (basically 771 * a local interrupt), as determined by the 'critpri' parameter. 772 * 773 * YYY The target thread must be in a critical section (else it must 774 * inherit our critical section? I dunno yet). 775 * 776 * Any tokens held by the target may not be held by thread(s) being 777 * preempted. We take the easy way out and do not preempt if 778 * the target is holding tokens. 779 * 780 * Set need_lwkt_resched() unconditionally for now YYY. 781 */ 782 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); 783 784 td = gd->gd_curthread; 785 if ((ntd->td_pri & TDPRI_MASK) <= (td->td_pri & TDPRI_MASK)) { 786 ++preempt_miss; 787 return; 788 } 789 if ((td->td_pri & ~TDPRI_MASK) > critpri) { 790 ++preempt_miss; 791 need_lwkt_resched(); 792 return; 793 } 794 #ifdef SMP 795 if (ntd->td_gd != gd) { 796 ++preempt_miss; 797 need_lwkt_resched(); 798 return; 799 } 800 #endif 801 /* 802 * Take the easy way out and do not preempt if the target is holding 803 * one or more tokens. We could test whether the thread(s) being 804 * preempted interlock against the target thread's tokens and whether 805 * we can get all the target thread's tokens, but this situation 806 * should not occur very often so its easier to simply not preempt. 807 */ 808 if (ntd->td_toks != NULL) { 809 ++preempt_miss; 810 need_lwkt_resched(); 811 return; 812 } 813 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { 814 ++preempt_weird; 815 need_lwkt_resched(); 816 return; 817 } 818 if (ntd->td_preempted) { 819 ++preempt_hit; 820 need_lwkt_resched(); 821 return; 822 } 823 #ifdef SMP 824 /* 825 * note: an interrupt might have occured just as we were transitioning 826 * to or from the MP lock. In this case td_mpcount will be pre-disposed 827 * (non-zero) but not actually synchronized with the actual state of the 828 * lock. We can use it to imply an MP lock requirement for the 829 * preemption but we cannot use it to test whether we hold the MP lock 830 * or not. 831 */ 832 savecnt = td->td_mpcount; 833 mpheld = MP_LOCK_HELD(); 834 ntd->td_mpcount += td->td_mpcount; 835 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { 836 ntd->td_mpcount -= td->td_mpcount; 837 ++preempt_miss; 838 need_lwkt_resched(); 839 return; 840 } 841 #endif 842 843 /* 844 * Since we are able to preempt the current thread, there is no need to 845 * call need_lwkt_resched(). 846 */ 847 ++preempt_hit; 848 ntd->td_preempted = td; 849 td->td_flags |= TDF_PREEMPT_LOCK; 850 td->td_switch(ntd); 851 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); 852 #ifdef SMP 853 KKASSERT(savecnt == td->td_mpcount); 854 mpheld = MP_LOCK_HELD(); 855 if (mpheld && td->td_mpcount == 0) 856 cpu_rel_mplock(); 857 else if (mpheld == 0 && td->td_mpcount) 858 panic("lwkt_preempt(): MP lock was not held through"); 859 #endif 860 ntd->td_preempted = NULL; 861 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); 862 } 863 864 /* 865 * Yield our thread while higher priority threads are pending. This is 866 * typically called when we leave a critical section but it can be safely 867 * called while we are in a critical section. 868 * 869 * This function will not generally yield to equal priority threads but it 870 * can occur as a side effect. Note that lwkt_switch() is called from 871 * inside the critical section to prevent its own crit_exit() from reentering 872 * lwkt_yield_quick(). 873 * 874 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint 875 * came along but was blocked and made pending. 876 * 877 * (self contained on a per cpu basis) 878 */ 879 void 880 lwkt_yield_quick(void) 881 { 882 globaldata_t gd = mycpu; 883 thread_t td = gd->gd_curthread; 884 885 /* 886 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear 887 * it with a non-zero cpl then we might not wind up calling splz after 888 * a task switch when the critical section is exited even though the 889 * new task could accept the interrupt. 890 * 891 * XXX from crit_exit() only called after last crit section is released. 892 * If called directly will run splz() even if in a critical section. 893 * 894 * td_nest_count prevent deep nesting via splz() or doreti(). Note that 895 * except for this special case, we MUST call splz() here to handle any 896 * pending ints, particularly after we switch, or we might accidently 897 * halt the cpu with interrupts pending. 898 */ 899 if (gd->gd_reqflags && td->td_nest_count < 2) 900 splz(); 901 902 /* 903 * YYY enabling will cause wakeup() to task-switch, which really 904 * confused the old 4.x code. This is a good way to simulate 905 * preemption and MP without actually doing preemption or MP, because a 906 * lot of code assumes that wakeup() does not block. 907 */ 908 if (untimely_switch && td->td_nest_count == 0 && 909 gd->gd_intr_nesting_level == 0 910 ) { 911 crit_enter_quick(td); 912 /* 913 * YYY temporary hacks until we disassociate the userland scheduler 914 * from the LWKT scheduler. 915 */ 916 if (td->td_flags & TDF_RUNQ) { 917 lwkt_switch(); /* will not reenter yield function */ 918 } else { 919 lwkt_schedule_self(td); /* make sure we are scheduled */ 920 lwkt_switch(); /* will not reenter yield function */ 921 lwkt_deschedule_self(td); /* make sure we are descheduled */ 922 } 923 crit_exit_noyield(td); 924 } 925 } 926 927 /* 928 * This implements a normal yield which, unlike _quick, will yield to equal 929 * priority threads as well. Note that gd_reqflags tests will be handled by 930 * the crit_exit() call in lwkt_switch(). 931 * 932 * (self contained on a per cpu basis) 933 */ 934 void 935 lwkt_yield(void) 936 { 937 lwkt_schedule_self(curthread); 938 lwkt_switch(); 939 } 940 941 /* 942 * Generic schedule. Possibly schedule threads belonging to other cpus and 943 * deal with threads that might be blocked on a wait queue. 944 * 945 * We have a little helper inline function which does additional work after 946 * the thread has been enqueued, including dealing with preemption and 947 * setting need_lwkt_resched() (which prevents the kernel from returning 948 * to userland until it has processed higher priority threads). 949 */ 950 static __inline 951 void 952 _lwkt_schedule_post(globaldata_t gd, thread_t ntd, int cpri) 953 { 954 if (ntd->td_preemptable) { 955 ntd->td_preemptable(ntd, cpri); /* YYY +token */ 956 } else if ((ntd->td_flags & TDF_NORESCHED) == 0 && 957 (ntd->td_pri & TDPRI_MASK) > (gd->gd_curthread->td_pri & TDPRI_MASK) 958 ) { 959 need_lwkt_resched(); 960 } 961 } 962 963 void 964 lwkt_schedule(thread_t td) 965 { 966 globaldata_t mygd = mycpu; 967 968 #ifdef INVARIANTS 969 KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule(): scheduling gd_idlethread is illegal!")); 970 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 971 && td->td_proc->p_stat == SSLEEP 972 ) { 973 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", 974 curthread, 975 curthread->td_proc ? curthread->td_proc->p_pid : -1, 976 curthread->td_proc ? curthread->td_proc->p_stat : -1, 977 td, 978 td->td_proc ? td->td_proc->p_pid : -1, 979 td->td_proc ? td->td_proc->p_stat : -1 980 ); 981 panic("SCHED PANIC"); 982 } 983 #endif 984 crit_enter_gd(mygd); 985 if (td == mygd->gd_curthread) { 986 _lwkt_enqueue(td); 987 } else { 988 lwkt_wait_t w; 989 990 /* 991 * If the thread is on a wait list we have to send our scheduling 992 * request to the owner of the wait structure. Otherwise we send 993 * the scheduling request to the cpu owning the thread. Races 994 * are ok, the target will forward the message as necessary (the 995 * message may chase the thread around before it finally gets 996 * acted upon). 997 * 998 * (remember, wait structures use stable storage) 999 * 1000 * NOTE: we have to account for the number of critical sections 1001 * under our control when calling _lwkt_schedule_post() so it 1002 * can figure out whether preemption is allowed. 1003 * 1004 * NOTE: The wait structure algorithms are a mess and need to be 1005 * rewritten. 1006 * 1007 * NOTE: We cannot safely acquire or release a token, even 1008 * non-blocking, because this routine may be called in the context 1009 * of a thread already holding the token and thus not provide any 1010 * interlock protection. We cannot safely manipulate the td_toks 1011 * list for the same reason. Instead we depend on our critical 1012 * section if the token is owned by our cpu. 1013 */ 1014 if ((w = td->td_wait) != NULL) { 1015 if (w->wa_token.t_cpu == mygd) { 1016 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 1017 --w->wa_count; 1018 td->td_wait = NULL; 1019 #ifdef SMP 1020 if (td->td_gd == mygd) { 1021 _lwkt_enqueue(td); 1022 _lwkt_schedule_post(mygd, td, TDPRI_CRIT); 1023 } else { 1024 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 1025 } 1026 #else 1027 _lwkt_enqueue(td); 1028 _lwkt_schedule_post(mygd, td, TDPRI_CRIT); 1029 #endif 1030 } else { 1031 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); 1032 } 1033 } else { 1034 /* 1035 * If the wait structure is NULL and we own the thread, there 1036 * is no race (since we are in a critical section). If we 1037 * do not own the thread there might be a race but the 1038 * target cpu will deal with it. 1039 */ 1040 #ifdef SMP 1041 if (td->td_gd == mygd) { 1042 _lwkt_enqueue(td); 1043 _lwkt_schedule_post(mygd, td, TDPRI_CRIT); 1044 } else { 1045 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 1046 } 1047 #else 1048 _lwkt_enqueue(td); 1049 _lwkt_schedule_post(mygd, td, TDPRI_CRIT); 1050 #endif 1051 } 1052 } 1053 crit_exit_gd(mygd); 1054 } 1055 1056 /* 1057 * Managed acquisition. This code assumes that the MP lock is held for 1058 * the tdallq operation and that the thread has been descheduled from its 1059 * original cpu. We also have to wait for the thread to be entirely switched 1060 * out on its original cpu (this is usually fast enough that we never loop) 1061 * since the LWKT system does not have to hold the MP lock while switching 1062 * and the target may have released it before switching. 1063 */ 1064 void 1065 lwkt_acquire(thread_t td) 1066 { 1067 globaldata_t gd; 1068 globaldata_t mygd; 1069 1070 gd = td->td_gd; 1071 mygd = mycpu; 1072 cpu_lfence(); 1073 KKASSERT((td->td_flags & TDF_RUNQ) == 0); 1074 while (td->td_flags & TDF_RUNNING) /* XXX spin */ 1075 cpu_lfence(); 1076 if (gd != mygd) { 1077 crit_enter_gd(mygd); 1078 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 1079 td->td_gd = mygd; 1080 TAILQ_INSERT_TAIL(&mygd->gd_tdallq, td, td_allq); /* protected by BGL */ 1081 crit_exit_gd(mygd); 1082 } 1083 } 1084 1085 /* 1086 * Generic deschedule. Descheduling threads other then your own should be 1087 * done only in carefully controlled circumstances. Descheduling is 1088 * asynchronous. 1089 * 1090 * This function may block if the cpu has run out of messages. 1091 */ 1092 void 1093 lwkt_deschedule(thread_t td) 1094 { 1095 crit_enter(); 1096 if (td == curthread) { 1097 _lwkt_dequeue(td); 1098 } else { 1099 if (td->td_gd == mycpu) { 1100 _lwkt_dequeue(td); 1101 } else { 1102 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_deschedule, td); 1103 } 1104 } 1105 crit_exit(); 1106 } 1107 1108 /* 1109 * Set the target thread's priority. This routine does not automatically 1110 * switch to a higher priority thread, LWKT threads are not designed for 1111 * continuous priority changes. Yield if you want to switch. 1112 * 1113 * We have to retain the critical section count which uses the high bits 1114 * of the td_pri field. The specified priority may also indicate zero or 1115 * more critical sections by adding TDPRI_CRIT*N. 1116 * 1117 * Note that we requeue the thread whether it winds up on a different runq 1118 * or not. uio_yield() depends on this and the routine is not normally 1119 * called with the same priority otherwise. 1120 */ 1121 void 1122 lwkt_setpri(thread_t td, int pri) 1123 { 1124 KKASSERT(pri >= 0); 1125 KKASSERT(td->td_gd == mycpu); 1126 crit_enter(); 1127 if (td->td_flags & TDF_RUNQ) { 1128 _lwkt_dequeue(td); 1129 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1130 _lwkt_enqueue(td); 1131 } else { 1132 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1133 } 1134 crit_exit(); 1135 } 1136 1137 void 1138 lwkt_setpri_self(int pri) 1139 { 1140 thread_t td = curthread; 1141 1142 KKASSERT(pri >= 0 && pri <= TDPRI_MAX); 1143 crit_enter(); 1144 if (td->td_flags & TDF_RUNQ) { 1145 _lwkt_dequeue(td); 1146 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1147 _lwkt_enqueue(td); 1148 } else { 1149 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 1150 } 1151 crit_exit(); 1152 } 1153 1154 /* 1155 * Determine if there is a runnable thread at a higher priority then 1156 * the current thread. lwkt_setpri() does not check this automatically. 1157 * Return 1 if there is, 0 if there isn't. 1158 * 1159 * Example: if bit 31 of runqmask is set and the current thread is priority 1160 * 30, then we wind up checking the mask: 0x80000000 against 0x7fffffff. 1161 * 1162 * If nq reaches 31 the shift operation will overflow to 0 and we will wind 1163 * up comparing against 0xffffffff, a comparison that will always be false. 1164 */ 1165 int 1166 lwkt_checkpri_self(void) 1167 { 1168 globaldata_t gd = mycpu; 1169 thread_t td = gd->gd_curthread; 1170 int nq = td->td_pri & TDPRI_MASK; 1171 1172 while (gd->gd_runqmask > (__uint32_t)(2 << nq) - 1) { 1173 if (TAILQ_FIRST(&gd->gd_tdrunq[nq + 1])) 1174 return(1); 1175 ++nq; 1176 } 1177 return(0); 1178 } 1179 1180 /* 1181 * Migrate the current thread to the specified cpu. The BGL must be held 1182 * (for the gd_tdallq manipulation XXX). This is accomplished by 1183 * descheduling ourselves from the current cpu, moving our thread to the 1184 * tdallq of the target cpu, IPI messaging the target cpu, and switching out. 1185 * TDF_MIGRATING prevents scheduling races while the thread is being migrated. 1186 */ 1187 #ifdef SMP 1188 static void lwkt_setcpu_remote(void *arg); 1189 #endif 1190 1191 void 1192 lwkt_setcpu_self(globaldata_t rgd) 1193 { 1194 #ifdef SMP 1195 thread_t td = curthread; 1196 1197 if (td->td_gd != rgd) { 1198 crit_enter_quick(td); 1199 td->td_flags |= TDF_MIGRATING; 1200 lwkt_deschedule_self(td); 1201 TAILQ_REMOVE(&td->td_gd->gd_tdallq, td, td_allq); /* protected by BGL */ 1202 TAILQ_INSERT_TAIL(&rgd->gd_tdallq, td, td_allq); /* protected by BGL */ 1203 lwkt_send_ipiq(rgd, (ipifunc_t)lwkt_setcpu_remote, td); 1204 lwkt_switch(); 1205 /* we are now on the target cpu */ 1206 crit_exit_quick(td); 1207 } 1208 #endif 1209 } 1210 1211 /* 1212 * Remote IPI for cpu migration (called while in a critical section so we 1213 * do not have to enter another one). The thread has already been moved to 1214 * our cpu's allq, but we must wait for the thread to be completely switched 1215 * out on the originating cpu before we schedule it on ours or the stack 1216 * state may be corrupt. We clear TDF_MIGRATING after flushing the GD 1217 * change to main memory. 1218 * 1219 * XXX The use of TDF_MIGRATING might not be sufficient to avoid races 1220 * against wakeups. It is best if this interface is used only when there 1221 * are no pending events that might try to schedule the thread. 1222 */ 1223 #ifdef SMP 1224 static void 1225 lwkt_setcpu_remote(void *arg) 1226 { 1227 thread_t td = arg; 1228 globaldata_t gd = mycpu; 1229 1230 while (td->td_flags & TDF_RUNNING) 1231 cpu_lfence(); 1232 td->td_gd = gd; 1233 cpu_sfence(); 1234 td->td_flags &= ~TDF_MIGRATING; 1235 _lwkt_enqueue(td); 1236 } 1237 #endif 1238 1239 struct proc * 1240 lwkt_preempted_proc(void) 1241 { 1242 thread_t td = curthread; 1243 while (td->td_preempted) 1244 td = td->td_preempted; 1245 return(td->td_proc); 1246 } 1247 1248 /* 1249 * Block on the specified wait queue until signaled. A generation number 1250 * must be supplied to interlock the wait queue. The function will 1251 * return immediately if the generation number does not match the wait 1252 * structure's generation number. 1253 */ 1254 void 1255 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen) 1256 { 1257 thread_t td = curthread; 1258 lwkt_tokref ilock; 1259 1260 lwkt_gettoken(&ilock, &w->wa_token); 1261 crit_enter(); 1262 if (w->wa_gen == *gen) { 1263 _lwkt_dequeue(td); 1264 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq); 1265 ++w->wa_count; 1266 td->td_wait = w; 1267 td->td_wmesg = wmesg; 1268 again: 1269 lwkt_switch(); 1270 if (td->td_wmesg != NULL) { 1271 _lwkt_dequeue(td); 1272 goto again; 1273 } 1274 } 1275 crit_exit(); 1276 *gen = w->wa_gen; 1277 lwkt_reltoken(&ilock); 1278 } 1279 1280 /* 1281 * Signal a wait queue. We gain ownership of the wait queue in order to 1282 * signal it. Once a thread is removed from the wait queue we have to 1283 * deal with the cpu owning the thread. 1284 * 1285 * Note: alternatively we could message the target cpu owning the wait 1286 * queue. YYY implement as sysctl. 1287 */ 1288 void 1289 lwkt_signal(lwkt_wait_t w, int count) 1290 { 1291 thread_t td; 1292 lwkt_tokref ilock; 1293 1294 lwkt_gettoken(&ilock, &w->wa_token); 1295 ++w->wa_gen; 1296 crit_enter(); 1297 if (count < 0) 1298 count = w->wa_count; 1299 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) { 1300 --count; 1301 --w->wa_count; 1302 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 1303 td->td_wait = NULL; 1304 td->td_wmesg = NULL; 1305 if (td->td_gd == mycpu) { 1306 _lwkt_enqueue(td); 1307 } else { 1308 lwkt_send_ipiq(td->td_gd, (ipifunc_t)lwkt_schedule, td); 1309 } 1310 } 1311 crit_exit(); 1312 lwkt_reltoken(&ilock); 1313 } 1314 1315 /* 1316 * Create a kernel process/thread/whatever. It shares it's address space 1317 * with proc0 - ie: kernel only. 1318 * 1319 * NOTE! By default new threads are created with the MP lock held. A 1320 * thread which does not require the MP lock should release it by calling 1321 * rel_mplock() at the start of the new thread. 1322 */ 1323 int 1324 lwkt_create(void (*func)(void *), void *arg, 1325 struct thread **tdp, thread_t template, int tdflags, int cpu, 1326 const char *fmt, ...) 1327 { 1328 thread_t td; 1329 __va_list ap; 1330 1331 td = lwkt_alloc_thread(template, LWKT_THREAD_STACK, cpu); 1332 if (tdp) 1333 *tdp = td; 1334 cpu_set_thread_handler(td, lwkt_exit, func, arg); 1335 td->td_flags |= TDF_VERBOSE | tdflags; 1336 #ifdef SMP 1337 td->td_mpcount = 1; 1338 #endif 1339 1340 /* 1341 * Set up arg0 for 'ps' etc 1342 */ 1343 __va_start(ap, fmt); 1344 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1345 __va_end(ap); 1346 1347 /* 1348 * Schedule the thread to run 1349 */ 1350 if ((td->td_flags & TDF_STOPREQ) == 0) 1351 lwkt_schedule(td); 1352 else 1353 td->td_flags &= ~TDF_STOPREQ; 1354 return 0; 1355 } 1356 1357 /* 1358 * kthread_* is specific to the kernel and is not needed by userland. 1359 */ 1360 #ifdef _KERNEL 1361 1362 /* 1363 * Destroy an LWKT thread. Warning! This function is not called when 1364 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1365 * uses a different reaping mechanism. 1366 */ 1367 void 1368 lwkt_exit(void) 1369 { 1370 thread_t td = curthread; 1371 globaldata_t gd; 1372 1373 if (td->td_flags & TDF_VERBOSE) 1374 printf("kthread %p %s has exited\n", td, td->td_comm); 1375 caps_exit(td); 1376 crit_enter_quick(td); 1377 lwkt_deschedule_self(td); 1378 gd = mycpu; 1379 KKASSERT(gd == td->td_gd); 1380 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 1381 if (td->td_flags & TDF_ALLOCATED_THREAD) { 1382 ++gd->gd_tdfreecount; 1383 TAILQ_INSERT_TAIL(&gd->gd_tdfreeq, td, td_threadq); 1384 } 1385 cpu_thread_exit(); 1386 } 1387 1388 #endif /* _KERNEL */ 1389 1390 void 1391 crit_panic(void) 1392 { 1393 thread_t td = curthread; 1394 int lpri = td->td_pri; 1395 1396 td->td_pri = 0; 1397 panic("td_pri is/would-go negative! %p %d", td, lpri); 1398 } 1399 1400 #ifdef SMP 1401 1402 /* 1403 * Called from debugger/panic on cpus which have been stopped. We must still 1404 * process the IPIQ while stopped, even if we were stopped while in a critical 1405 * section (XXX). 1406 * 1407 * If we are dumping also try to process any pending interrupts. This may 1408 * or may not work depending on the state of the cpu at the point it was 1409 * stopped. 1410 */ 1411 void 1412 lwkt_smp_stopped(void) 1413 { 1414 globaldata_t gd = mycpu; 1415 1416 crit_enter_gd(gd); 1417 if (dumping) { 1418 lwkt_process_ipiq(); 1419 splz(); 1420 } else { 1421 lwkt_process_ipiq(); 1422 } 1423 crit_exit_gd(gd); 1424 } 1425 1426 #endif 1427