1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.48 2004/01/18 12:29:49 dillon Exp $ 27 */ 28 29 /* 30 * Each cpu in a system has its own self-contained light weight kernel 31 * thread scheduler, which means that generally speaking we only need 32 * to use a critical section to avoid problems. Foreign thread 33 * scheduling is queued via (async) IPIs. 34 * 35 * NOTE: on UP machines smp_active is defined to be 0. On SMP machines 36 * smp_active is 0 prior to SMP activation, then it is 1. The LWKT module 37 * uses smp_active to optimize UP builds and to avoid sending IPIs during 38 * early boot (primarily interrupt and network thread initialization). 39 */ 40 41 #ifdef _KERNEL 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/proc.h> 47 #include <sys/rtprio.h> 48 #include <sys/queue.h> 49 #include <sys/thread2.h> 50 #include <sys/sysctl.h> 51 #include <sys/kthread.h> 52 #include <machine/cpu.h> 53 #include <sys/lock.h> 54 #include <sys/caps.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_param.h> 58 #include <vm/vm_kern.h> 59 #include <vm/vm_object.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_pager.h> 63 #include <vm/vm_extern.h> 64 #include <vm/vm_zone.h> 65 66 #include <machine/stdarg.h> 67 #include <machine/ipl.h> 68 #include <machine/smp.h> 69 70 #define THREAD_STACK (UPAGES * PAGE_SIZE) 71 72 #else 73 74 #include <sys/stdint.h> 75 #include <libcaps/thread.h> 76 #include <sys/thread.h> 77 #include <sys/msgport.h> 78 #include <sys/errno.h> 79 #include <libcaps/globaldata.h> 80 #include <sys/thread2.h> 81 #include <sys/msgport2.h> 82 #include <stdio.h> 83 #include <stdlib.h> 84 #include <string.h> 85 #include <machine/cpufunc.h> 86 #include <machine/lock.h> 87 88 #endif 89 90 static int untimely_switch = 0; 91 #ifdef INVARIANTS 92 static int token_debug = 0; 93 #endif 94 static __int64_t switch_count = 0; 95 static __int64_t preempt_hit = 0; 96 static __int64_t preempt_miss = 0; 97 static __int64_t preempt_weird = 0; 98 #ifdef SMP 99 static __int64_t ipiq_count = 0; 100 static __int64_t ipiq_fifofull = 0; 101 #endif 102 103 #ifdef _KERNEL 104 105 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); 106 #ifdef INVARIANTS 107 SYSCTL_INT(_lwkt, OID_AUTO, token_debug, CTLFLAG_RW, &token_debug, 0, ""); 108 #endif 109 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); 110 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); 112 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); 113 #ifdef SMP 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 115 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 116 #endif 117 118 #endif 119 120 /* 121 * These helper procedures handle the runq, they can only be called from 122 * within a critical section. 123 * 124 * WARNING! Prior to SMP being brought up it is possible to enqueue and 125 * dequeue threads belonging to other cpus, so be sure to use td->td_gd 126 * instead of 'mycpu' when referencing the globaldata structure. Once 127 * SMP live enqueuing and dequeueing only occurs on the current cpu. 128 */ 129 static __inline 130 void 131 _lwkt_dequeue(thread_t td) 132 { 133 if (td->td_flags & TDF_RUNQ) { 134 int nq = td->td_pri & TDPRI_MASK; 135 struct globaldata *gd = td->td_gd; 136 137 td->td_flags &= ~TDF_RUNQ; 138 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq); 139 /* runqmask is passively cleaned up by the switcher */ 140 } 141 } 142 143 static __inline 144 void 145 _lwkt_enqueue(thread_t td) 146 { 147 if ((td->td_flags & TDF_RUNQ) == 0) { 148 int nq = td->td_pri & TDPRI_MASK; 149 struct globaldata *gd = td->td_gd; 150 151 td->td_flags |= TDF_RUNQ; 152 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); 153 gd->gd_runqmask |= 1 << nq; 154 } 155 } 156 157 static __inline 158 int 159 _lwkt_wantresched(thread_t ntd, thread_t cur) 160 { 161 return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK)); 162 } 163 164 #ifdef _KERNEL 165 166 /* 167 * LWKTs operate on a per-cpu basis 168 * 169 * WARNING! Called from early boot, 'mycpu' may not work yet. 170 */ 171 void 172 lwkt_gdinit(struct globaldata *gd) 173 { 174 int i; 175 176 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i) 177 TAILQ_INIT(&gd->gd_tdrunq[i]); 178 gd->gd_runqmask = 0; 179 TAILQ_INIT(&gd->gd_tdallq); 180 } 181 182 #endif /* _KERNEL */ 183 184 /* 185 * Initialize a thread wait structure prior to first use. 186 * 187 * NOTE! called from low level boot code, we cannot do anything fancy! 188 */ 189 void 190 lwkt_init_wait(lwkt_wait_t w) 191 { 192 TAILQ_INIT(&w->wa_waitq); 193 } 194 195 /* 196 * Create a new thread. The thread must be associated with a process context 197 * or LWKT start address before it can be scheduled. If the target cpu is 198 * -1 the thread will be created on the current cpu. 199 * 200 * If you intend to create a thread without a process context this function 201 * does everything except load the startup and switcher function. 202 */ 203 thread_t 204 lwkt_alloc_thread(struct thread *td, int cpu) 205 { 206 void *stack; 207 int flags = 0; 208 209 if (td == NULL) { 210 crit_enter(); 211 if (mycpu->gd_tdfreecount > 0) { 212 --mycpu->gd_tdfreecount; 213 td = TAILQ_FIRST(&mycpu->gd_tdfreeq); 214 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0, 215 ("lwkt_alloc_thread: unexpected NULL or corrupted td")); 216 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq); 217 crit_exit(); 218 stack = td->td_kstack; 219 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD); 220 } else { 221 crit_exit(); 222 #ifdef _KERNEL 223 td = zalloc(thread_zone); 224 #else 225 td = malloc(sizeof(struct thread)); 226 #endif 227 td->td_kstack = NULL; 228 flags |= TDF_ALLOCATED_THREAD; 229 } 230 } 231 if ((stack = td->td_kstack) == NULL) { 232 #ifdef _KERNEL 233 stack = (void *)kmem_alloc(kernel_map, THREAD_STACK); 234 #else 235 stack = libcaps_alloc_stack(THREAD_STACK); 236 #endif 237 flags |= TDF_ALLOCATED_STACK; 238 } 239 if (cpu < 0) 240 lwkt_init_thread(td, stack, flags, mycpu); 241 else 242 lwkt_init_thread(td, stack, flags, globaldata_find(cpu)); 243 return(td); 244 } 245 246 #ifdef _KERNEL 247 248 /* 249 * Initialize a preexisting thread structure. This function is used by 250 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread. 251 * 252 * All threads start out in a critical section at a priority of 253 * TDPRI_KERN_DAEMON. Higher level code will modify the priority as 254 * appropriate. This function may send an IPI message when the 255 * requested cpu is not the current cpu and consequently gd_tdallq may 256 * not be initialized synchronously from the point of view of the originating 257 * cpu. 258 * 259 * NOTE! we have to be careful in regards to creating threads for other cpus 260 * if SMP has not yet been activated. 261 */ 262 static void 263 lwkt_init_thread_remote(void *arg) 264 { 265 thread_t td = arg; 266 267 TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq); 268 } 269 270 void 271 lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) 272 { 273 bzero(td, sizeof(struct thread)); 274 td->td_kstack = stack; 275 td->td_flags |= flags; 276 td->td_gd = gd; 277 td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; 278 lwkt_initport(&td->td_msgport, td); 279 pmap_init_thread(td); 280 if (smp_active == 0 || gd == mycpu) { 281 crit_enter(); 282 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 283 crit_exit(); 284 } else { 285 lwkt_send_ipiq(gd->gd_cpuid, lwkt_init_thread_remote, td); 286 } 287 } 288 289 #endif /* _KERNEL */ 290 291 void 292 lwkt_set_comm(thread_t td, const char *ctl, ...) 293 { 294 __va_list va; 295 296 __va_start(va, ctl); 297 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va); 298 __va_end(va); 299 } 300 301 void 302 lwkt_hold(thread_t td) 303 { 304 ++td->td_refs; 305 } 306 307 void 308 lwkt_rele(thread_t td) 309 { 310 KKASSERT(td->td_refs > 0); 311 --td->td_refs; 312 } 313 314 #ifdef _KERNEL 315 316 void 317 lwkt_wait_free(thread_t td) 318 { 319 while (td->td_refs) 320 tsleep(td, 0, "tdreap", hz); 321 } 322 323 #endif 324 325 void 326 lwkt_free_thread(thread_t td) 327 { 328 struct globaldata *gd = mycpu; 329 330 KASSERT((td->td_flags & TDF_RUNNING) == 0, 331 ("lwkt_free_thread: did not exit! %p", td)); 332 333 crit_enter(); 334 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 335 if (gd->gd_tdfreecount < CACHE_NTHREADS && 336 (td->td_flags & TDF_ALLOCATED_THREAD) 337 ) { 338 ++gd->gd_tdfreecount; 339 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq); 340 crit_exit(); 341 } else { 342 crit_exit(); 343 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) { 344 #ifdef _KERNEL 345 kmem_free(kernel_map, (vm_offset_t)td->td_kstack, THREAD_STACK); 346 #else 347 libcaps_free_stack(td->td_kstack, THREAD_STACK); 348 #endif 349 /* gd invalid */ 350 td->td_kstack = NULL; 351 } 352 if (td->td_flags & TDF_ALLOCATED_THREAD) { 353 #ifdef _KERNEL 354 zfree(thread_zone, td); 355 #else 356 free(td); 357 #endif 358 } 359 } 360 } 361 362 363 /* 364 * Switch to the next runnable lwkt. If no LWKTs are runnable then 365 * switch to the idlethread. Switching must occur within a critical 366 * section to avoid races with the scheduling queue. 367 * 368 * We always have full control over our cpu's run queue. Other cpus 369 * that wish to manipulate our queue must use the cpu_*msg() calls to 370 * talk to our cpu, so a critical section is all that is needed and 371 * the result is very, very fast thread switching. 372 * 373 * The LWKT scheduler uses a fixed priority model and round-robins at 374 * each priority level. User process scheduling is a totally 375 * different beast and LWKT priorities should not be confused with 376 * user process priorities. 377 * 378 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() 379 * cleans it up. Note that the td_switch() function cannot do anything that 380 * requires the MP lock since the MP lock will have already been setup for 381 * the target thread (not the current thread). It's nice to have a scheduler 382 * that does not need the MP lock to work because it allows us to do some 383 * really cool high-performance MP lock optimizations. 384 */ 385 386 void 387 lwkt_switch(void) 388 { 389 struct globaldata *gd; 390 thread_t td = curthread; 391 thread_t ntd; 392 #ifdef SMP 393 int mpheld; 394 #endif 395 396 /* 397 * Switching from within a 'fast' (non thread switched) interrupt is 398 * illegal. 399 */ 400 if (mycpu->gd_intr_nesting_level && panicstr == NULL) { 401 panic("lwkt_switch: cannot switch from within a fast interrupt, yet\n"); 402 } 403 404 /* 405 * Passive release (used to transition from user to kernel mode 406 * when we block or switch rather then when we enter the kernel). 407 * This function is NOT called if we are switching into a preemption 408 * or returning from a preemption. Typically this causes us to lose 409 * our P_CURPROC designation (if we have one) and become a true LWKT 410 * thread, and may also hand P_CURPROC to another process and schedule 411 * its thread. 412 */ 413 if (td->td_release) 414 td->td_release(td); 415 416 crit_enter(); 417 ++switch_count; 418 419 #ifdef SMP 420 /* 421 * td_mpcount cannot be used to determine if we currently hold the 422 * MP lock because get_mplock() will increment it prior to attempting 423 * to get the lock, and switch out if it can't. Our ownership of 424 * the actual lock will remain stable while we are in a critical section 425 * (but, of course, another cpu may own or release the lock so the 426 * actual value of mp_lock is not stable). 427 */ 428 mpheld = MP_LOCK_HELD(); 429 #endif 430 if ((ntd = td->td_preempted) != NULL) { 431 /* 432 * We had preempted another thread on this cpu, resume the preempted 433 * thread. This occurs transparently, whether the preempted thread 434 * was scheduled or not (it may have been preempted after descheduling 435 * itself). 436 * 437 * We have to setup the MP lock for the original thread after backing 438 * out the adjustment that was made to curthread when the original 439 * was preempted. 440 */ 441 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); 442 #ifdef SMP 443 if (ntd->td_mpcount && mpheld == 0) { 444 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n", 445 td, ntd, td->td_mpcount, ntd->td_mpcount); 446 } 447 if (ntd->td_mpcount) { 448 td->td_mpcount -= ntd->td_mpcount; 449 KKASSERT(td->td_mpcount >= 0); 450 } 451 #endif 452 ntd->td_flags |= TDF_PREEMPT_DONE; 453 /* YYY release mp lock on switchback if original doesn't need it */ 454 } else { 455 /* 456 * Priority queue / round-robin at each priority. Note that user 457 * processes run at a fixed, low priority and the user process 458 * scheduler deals with interactions between user processes 459 * by scheduling and descheduling them from the LWKT queue as 460 * necessary. 461 * 462 * We have to adjust the MP lock for the target thread. If we 463 * need the MP lock and cannot obtain it we try to locate a 464 * thread that does not need the MP lock. 465 */ 466 gd = mycpu; 467 again: 468 if (gd->gd_runqmask) { 469 int nq = bsrl(gd->gd_runqmask); 470 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) { 471 gd->gd_runqmask &= ~(1 << nq); 472 goto again; 473 } 474 #ifdef SMP 475 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) { 476 /* 477 * Target needs MP lock and we couldn't get it, try 478 * to locate a thread which does not need the MP lock 479 * to run. If we cannot locate a thread spin in idle. 480 */ 481 u_int32_t rqmask = gd->gd_runqmask; 482 while (rqmask) { 483 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { 484 if (ntd->td_mpcount == 0) 485 break; 486 } 487 if (ntd) 488 break; 489 rqmask &= ~(1 << nq); 490 nq = bsrl(rqmask); 491 } 492 if (ntd == NULL) { 493 ntd = &gd->gd_idlethread; 494 ntd->td_flags |= TDF_IDLE_NOHLT; 495 } else { 496 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 497 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 498 } 499 } else { 500 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 501 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 502 } 503 #else 504 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 505 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 506 #endif 507 } else { 508 /* 509 * We have nothing to run but only let the idle loop halt 510 * the cpu if there are no pending interrupts. 511 */ 512 ntd = &gd->gd_idlethread; 513 if (gd->gd_reqflags & RQF_IDLECHECK_MASK) 514 ntd->td_flags |= TDF_IDLE_NOHLT; 515 } 516 } 517 KASSERT(ntd->td_pri >= TDPRI_CRIT, 518 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); 519 520 /* 521 * Do the actual switch. If the new target does not need the MP lock 522 * and we are holding it, release the MP lock. If the new target requires 523 * the MP lock we have already acquired it for the target. 524 */ 525 #ifdef SMP 526 if (ntd->td_mpcount == 0 ) { 527 if (MP_LOCK_HELD()) 528 cpu_rel_mplock(); 529 } else { 530 ASSERT_MP_LOCK_HELD(); 531 } 532 #endif 533 if (td != ntd) { 534 td->td_switch(ntd); 535 } 536 537 crit_exit(); 538 } 539 540 /* 541 * Switch if another thread has a higher priority. Do not switch to other 542 * threads at the same priority. 543 */ 544 void 545 lwkt_maybe_switch() 546 { 547 struct globaldata *gd = mycpu; 548 struct thread *td = gd->gd_curthread; 549 550 if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) { 551 lwkt_switch(); 552 } 553 } 554 555 /* 556 * Request that the target thread preempt the current thread. Preemption 557 * only works under a specific set of conditions: 558 * 559 * - We are not preempting ourselves 560 * - The target thread is owned by the current cpu 561 * - We are not currently being preempted 562 * - The target is not currently being preempted 563 * - We are able to satisfy the target's MP lock requirements (if any). 564 * 565 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically 566 * this is called via lwkt_schedule() through the td_preemptable callback. 567 * critpri is the managed critical priority that we should ignore in order 568 * to determine whether preemption is possible (aka usually just the crit 569 * priority of lwkt_schedule() itself). 570 * 571 * XXX at the moment we run the target thread in a critical section during 572 * the preemption in order to prevent the target from taking interrupts 573 * that *WE* can't. Preemption is strictly limited to interrupt threads 574 * and interrupt-like threads, outside of a critical section, and the 575 * preempted source thread will be resumed the instant the target blocks 576 * whether or not the source is scheduled (i.e. preemption is supposed to 577 * be as transparent as possible). 578 * 579 * The target thread inherits our MP count (added to its own) for the 580 * duration of the preemption in order to preserve the atomicy of the 581 * MP lock during the preemption. Therefore, any preempting targets must be 582 * careful in regards to MP assertions. Note that the MP count may be 583 * out of sync with the physical mp_lock, but we do not have to preserve 584 * the original ownership of the lock if it was out of synch (that is, we 585 * can leave it synchronized on return). 586 */ 587 void 588 lwkt_preempt(thread_t ntd, int critpri) 589 { 590 struct globaldata *gd = mycpu; 591 thread_t td = gd->gd_curthread; 592 #ifdef SMP 593 int mpheld; 594 int savecnt; 595 #endif 596 597 /* 598 * The caller has put us in a critical section. We can only preempt 599 * if the caller of the caller was not in a critical section (basically 600 * a local interrupt), as determined by the 'critpri' parameter. If 601 * we are unable to preempt 602 * 603 * YYY The target thread must be in a critical section (else it must 604 * inherit our critical section? I dunno yet). 605 */ 606 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); 607 608 need_resched(); 609 if (!_lwkt_wantresched(ntd, td)) { 610 ++preempt_miss; 611 return; 612 } 613 if ((td->td_pri & ~TDPRI_MASK) > critpri) { 614 ++preempt_miss; 615 return; 616 } 617 #ifdef SMP 618 if (ntd->td_gd != gd) { 619 ++preempt_miss; 620 return; 621 } 622 #endif 623 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { 624 ++preempt_weird; 625 return; 626 } 627 if (ntd->td_preempted) { 628 ++preempt_hit; 629 return; 630 } 631 #ifdef SMP 632 /* 633 * note: an interrupt might have occured just as we were transitioning 634 * to or from the MP lock. In this case td_mpcount will be pre-disposed 635 * (non-zero) but not actually synchronized with the actual state of the 636 * lock. We can use it to imply an MP lock requirement for the 637 * preemption but we cannot use it to test whether we hold the MP lock 638 * or not. 639 */ 640 savecnt = td->td_mpcount; 641 mpheld = MP_LOCK_HELD(); 642 ntd->td_mpcount += td->td_mpcount; 643 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { 644 ntd->td_mpcount -= td->td_mpcount; 645 ++preempt_miss; 646 return; 647 } 648 #endif 649 650 ++preempt_hit; 651 ntd->td_preempted = td; 652 td->td_flags |= TDF_PREEMPT_LOCK; 653 td->td_switch(ntd); 654 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); 655 #ifdef SMP 656 KKASSERT(savecnt == td->td_mpcount); 657 mpheld = MP_LOCK_HELD(); 658 if (mpheld && td->td_mpcount == 0) 659 cpu_rel_mplock(); 660 else if (mpheld == 0 && td->td_mpcount) 661 panic("lwkt_preempt(): MP lock was not held through"); 662 #endif 663 ntd->td_preempted = NULL; 664 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); 665 } 666 667 /* 668 * Yield our thread while higher priority threads are pending. This is 669 * typically called when we leave a critical section but it can be safely 670 * called while we are in a critical section. 671 * 672 * This function will not generally yield to equal priority threads but it 673 * can occur as a side effect. Note that lwkt_switch() is called from 674 * inside the critical section to prevent its own crit_exit() from reentering 675 * lwkt_yield_quick(). 676 * 677 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint 678 * came along but was blocked and made pending. 679 * 680 * (self contained on a per cpu basis) 681 */ 682 void 683 lwkt_yield_quick(void) 684 { 685 globaldata_t gd = mycpu; 686 thread_t td = gd->gd_curthread; 687 688 /* 689 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear 690 * it with a non-zero cpl then we might not wind up calling splz after 691 * a task switch when the critical section is exited even though the 692 * new task could accept the interrupt. 693 * 694 * XXX from crit_exit() only called after last crit section is released. 695 * If called directly will run splz() even if in a critical section. 696 * 697 * td_nest_count prevent deep nesting via splz() or doreti(). Note that 698 * except for this special case, we MUST call splz() here to handle any 699 * pending ints, particularly after we switch, or we might accidently 700 * halt the cpu with interrupts pending. 701 */ 702 if (gd->gd_reqflags && td->td_nest_count < 2) 703 splz(); 704 705 /* 706 * YYY enabling will cause wakeup() to task-switch, which really 707 * confused the old 4.x code. This is a good way to simulate 708 * preemption and MP without actually doing preemption or MP, because a 709 * lot of code assumes that wakeup() does not block. 710 */ 711 if (untimely_switch && td->td_nest_count == 0 && 712 gd->gd_intr_nesting_level == 0 713 ) { 714 crit_enter(); 715 /* 716 * YYY temporary hacks until we disassociate the userland scheduler 717 * from the LWKT scheduler. 718 */ 719 if (td->td_flags & TDF_RUNQ) { 720 lwkt_switch(); /* will not reenter yield function */ 721 } else { 722 lwkt_schedule_self(); /* make sure we are scheduled */ 723 lwkt_switch(); /* will not reenter yield function */ 724 lwkt_deschedule_self(); /* make sure we are descheduled */ 725 } 726 crit_exit_noyield(td); 727 } 728 } 729 730 /* 731 * This implements a normal yield which, unlike _quick, will yield to equal 732 * priority threads as well. Note that gd_reqflags tests will be handled by 733 * the crit_exit() call in lwkt_switch(). 734 * 735 * (self contained on a per cpu basis) 736 */ 737 void 738 lwkt_yield(void) 739 { 740 lwkt_schedule_self(); 741 lwkt_switch(); 742 } 743 744 /* 745 * Schedule a thread to run. As the current thread we can always safely 746 * schedule ourselves, and a shortcut procedure is provided for that 747 * function. 748 * 749 * (non-blocking, self contained on a per cpu basis) 750 */ 751 void 752 lwkt_schedule_self(void) 753 { 754 thread_t td = curthread; 755 756 crit_enter(); 757 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 758 _lwkt_enqueue(td); 759 #ifdef _KERNEL 760 if (td->td_proc && td->td_proc->p_stat == SSLEEP) 761 panic("SCHED SELF PANIC"); 762 #endif 763 crit_exit(); 764 } 765 766 /* 767 * Generic schedule. Possibly schedule threads belonging to other cpus and 768 * deal with threads that might be blocked on a wait queue. 769 * 770 * YYY this is one of the best places to implement load balancing code. 771 * Load balancing can be accomplished by requesting other sorts of actions 772 * for the thread in question. 773 */ 774 void 775 lwkt_schedule(thread_t td) 776 { 777 #ifdef INVARIANTS 778 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 779 && td->td_proc->p_stat == SSLEEP 780 ) { 781 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", 782 curthread, 783 curthread->td_proc ? curthread->td_proc->p_pid : -1, 784 curthread->td_proc ? curthread->td_proc->p_stat : -1, 785 td, 786 td->td_proc ? curthread->td_proc->p_pid : -1, 787 td->td_proc ? curthread->td_proc->p_stat : -1 788 ); 789 panic("SCHED PANIC"); 790 } 791 #endif 792 crit_enter(); 793 if (td == curthread) { 794 _lwkt_enqueue(td); 795 } else { 796 lwkt_wait_t w; 797 798 /* 799 * If the thread is on a wait list we have to send our scheduling 800 * request to the owner of the wait structure. Otherwise we send 801 * the scheduling request to the cpu owning the thread. Races 802 * are ok, the target will forward the message as necessary (the 803 * message may chase the thread around before it finally gets 804 * acted upon). 805 * 806 * (remember, wait structures use stable storage) 807 */ 808 if ((w = td->td_wait) != NULL) { 809 if (lwkt_trytoken(&w->wa_token)) { 810 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 811 --w->wa_count; 812 td->td_wait = NULL; 813 if (smp_active == 0 || td->td_gd == mycpu) { 814 _lwkt_enqueue(td); 815 if (td->td_preemptable) { 816 td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ 817 } else if (_lwkt_wantresched(td, curthread)) { 818 need_resched(); 819 } 820 } else { 821 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 822 } 823 lwkt_reltoken(&w->wa_token); 824 } else { 825 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); 826 } 827 } else { 828 /* 829 * If the wait structure is NULL and we own the thread, there 830 * is no race (since we are in a critical section). If we 831 * do not own the thread there might be a race but the 832 * target cpu will deal with it. 833 */ 834 if (smp_active == 0 || td->td_gd == mycpu) { 835 _lwkt_enqueue(td); 836 if (td->td_preemptable) { 837 td->td_preemptable(td, TDPRI_CRIT); 838 } else if (_lwkt_wantresched(td, curthread)) { 839 need_resched(); 840 } 841 } else { 842 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 843 } 844 } 845 } 846 crit_exit(); 847 } 848 849 /* 850 * Managed acquisition. This code assumes that the MP lock is held for 851 * the tdallq operation and that the thread has been descheduled from its 852 * original cpu. We also have to wait for the thread to be entirely switched 853 * out on its original cpu (this is usually fast enough that we never loop) 854 * since the LWKT system does not have to hold the MP lock while switching 855 * and the target may have released it before switching. 856 */ 857 void 858 lwkt_acquire(thread_t td) 859 { 860 struct globaldata *gd; 861 862 gd = td->td_gd; 863 KKASSERT((td->td_flags & TDF_RUNQ) == 0); 864 while (td->td_flags & TDF_RUNNING) /* XXX spin */ 865 ; 866 if (gd != mycpu) { 867 crit_enter(); 868 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 869 gd = mycpu; 870 td->td_gd = gd; 871 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 872 crit_exit(); 873 } 874 } 875 876 /* 877 * Deschedule a thread. 878 * 879 * (non-blocking, self contained on a per cpu basis) 880 */ 881 void 882 lwkt_deschedule_self(void) 883 { 884 thread_t td = curthread; 885 886 crit_enter(); 887 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 888 _lwkt_dequeue(td); 889 crit_exit(); 890 } 891 892 /* 893 * Generic deschedule. Descheduling threads other then your own should be 894 * done only in carefully controlled circumstances. Descheduling is 895 * asynchronous. 896 * 897 * This function may block if the cpu has run out of messages. 898 */ 899 void 900 lwkt_deschedule(thread_t td) 901 { 902 crit_enter(); 903 if (td == curthread) { 904 _lwkt_dequeue(td); 905 } else { 906 if (td->td_gd == mycpu) { 907 _lwkt_dequeue(td); 908 } else { 909 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_deschedule, td); 910 } 911 } 912 crit_exit(); 913 } 914 915 /* 916 * Set the target thread's priority. This routine does not automatically 917 * switch to a higher priority thread, LWKT threads are not designed for 918 * continuous priority changes. Yield if you want to switch. 919 * 920 * We have to retain the critical section count which uses the high bits 921 * of the td_pri field. The specified priority may also indicate zero or 922 * more critical sections by adding TDPRI_CRIT*N. 923 */ 924 void 925 lwkt_setpri(thread_t td, int pri) 926 { 927 KKASSERT(pri >= 0); 928 KKASSERT(td->td_gd == mycpu); 929 crit_enter(); 930 if (td->td_flags & TDF_RUNQ) { 931 _lwkt_dequeue(td); 932 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 933 _lwkt_enqueue(td); 934 } else { 935 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 936 } 937 crit_exit(); 938 } 939 940 void 941 lwkt_setpri_self(int pri) 942 { 943 thread_t td = curthread; 944 945 KKASSERT(pri >= 0 && pri <= TDPRI_MAX); 946 crit_enter(); 947 if (td->td_flags & TDF_RUNQ) { 948 _lwkt_dequeue(td); 949 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 950 _lwkt_enqueue(td); 951 } else { 952 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 953 } 954 crit_exit(); 955 } 956 957 struct proc * 958 lwkt_preempted_proc(void) 959 { 960 thread_t td = curthread; 961 while (td->td_preempted) 962 td = td->td_preempted; 963 return(td->td_proc); 964 } 965 966 typedef struct lwkt_gettoken_req { 967 lwkt_token_t tok; 968 int cpu; 969 } lwkt_gettoken_req; 970 971 #if 0 972 973 /* 974 * This function deschedules the current thread and blocks on the specified 975 * wait queue. We obtain ownership of the wait queue in order to block 976 * on it. A generation number is used to interlock the wait queue in case 977 * it gets signalled while we are blocked waiting on the token. 978 * 979 * Note: alternatively we could dequeue our thread and then message the 980 * target cpu owning the wait queue. YYY implement as sysctl. 981 * 982 * Note: wait queue signals normally ping-pong the cpu as an optimization. 983 */ 984 985 void 986 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen) 987 { 988 thread_t td = curthread; 989 990 lwkt_gettoken(&w->wa_token); 991 if (w->wa_gen == *gen) { 992 _lwkt_dequeue(td); 993 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq); 994 ++w->wa_count; 995 td->td_wait = w; 996 td->td_wmesg = wmesg; 997 again: 998 lwkt_switch(); 999 lwkt_regettoken(&w->wa_token); 1000 if (td->td_wmesg != NULL) { 1001 _lwkt_dequeue(td); 1002 goto again; 1003 } 1004 } 1005 /* token might be lost, doesn't matter for gen update */ 1006 *gen = w->wa_gen; 1007 lwkt_reltoken(&w->wa_token); 1008 } 1009 1010 /* 1011 * Signal a wait queue. We gain ownership of the wait queue in order to 1012 * signal it. Once a thread is removed from the wait queue we have to 1013 * deal with the cpu owning the thread. 1014 * 1015 * Note: alternatively we could message the target cpu owning the wait 1016 * queue. YYY implement as sysctl. 1017 */ 1018 void 1019 lwkt_signal(lwkt_wait_t w, int count) 1020 { 1021 thread_t td; 1022 int count; 1023 1024 lwkt_gettoken(&w->wa_token); 1025 ++w->wa_gen; 1026 if (count < 0) 1027 count = w->wa_count; 1028 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) { 1029 --count; 1030 --w->wa_count; 1031 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 1032 td->td_wait = NULL; 1033 td->td_wmesg = NULL; 1034 if (td->td_gd == mycpu) { 1035 _lwkt_enqueue(td); 1036 } else { 1037 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 1038 } 1039 lwkt_regettoken(&w->wa_token); 1040 } 1041 lwkt_reltoken(&w->wa_token); 1042 } 1043 1044 #endif 1045 1046 /* 1047 * Acquire ownership of a token 1048 * 1049 * Acquire ownership of a token. The token may have spl and/or critical 1050 * section side effects, depending on its purpose. These side effects 1051 * guarentee that you will maintain ownership of the token as long as you 1052 * do not block. If you block you may lose access to the token (but you 1053 * must still release it even if you lose your access to it). 1054 * 1055 * YYY for now we use a critical section to prevent IPIs from taking away 1056 * a token, but do we really only need to disable IPIs ? 1057 * 1058 * YYY certain tokens could be made to act like mutexes when performance 1059 * would be better (e.g. t_cpu == -1). This is not yet implemented. 1060 * 1061 * YYY the tokens replace 4.x's simplelocks for the most part, but this 1062 * means that 4.x does not expect a switch so for now we cannot switch 1063 * when waiting for an IPI to be returned. 1064 * 1065 * YYY If the token is owned by another cpu we may have to send an IPI to 1066 * it and then block. The IPI causes the token to be given away to the 1067 * requesting cpu, unless it has already changed hands. Since only the 1068 * current cpu can give away a token it owns we do not need a memory barrier. 1069 * This needs serious optimization. 1070 */ 1071 1072 #ifdef SMP 1073 1074 static 1075 void 1076 lwkt_gettoken_remote(void *arg) 1077 { 1078 lwkt_gettoken_req *req = arg; 1079 if (req->tok->t_cpu == mycpu->gd_cpuid) { 1080 #ifdef INVARIANTS 1081 if (token_debug) 1082 printf("GT(%d,%d) ", req->tok->t_cpu, req->cpu); 1083 #endif 1084 req->tok->t_cpu = req->cpu; 1085 req->tok->t_reqcpu = req->cpu; /* YYY leave owned by target cpu */ 1086 /* else set reqcpu to point to current cpu for release */ 1087 } 1088 } 1089 1090 #endif 1091 1092 int 1093 lwkt_gettoken(lwkt_token_t tok) 1094 { 1095 /* 1096 * Prevent preemption so the token can't be taken away from us once 1097 * we gain ownership of it. Use a synchronous request which might 1098 * block. The request will be forwarded as necessary playing catchup 1099 * to the token. 1100 */ 1101 1102 crit_enter(); 1103 #ifdef INVARIANTS 1104 if (curthread->td_pri > 1800) { 1105 printf("lwkt_gettoken: %p called from %p: crit sect nesting warning\n", 1106 tok, ((int **)&tok)[-1]); 1107 } 1108 if (curthread->td_pri > 2000) { 1109 curthread->td_pri = 1000; 1110 panic("too HIGH!"); 1111 } 1112 #endif 1113 #ifdef SMP 1114 while (tok->t_cpu != mycpu->gd_cpuid) { 1115 struct lwkt_gettoken_req req; 1116 int seq; 1117 int dcpu; 1118 1119 req.cpu = mycpu->gd_cpuid; 1120 req.tok = tok; 1121 dcpu = (volatile int)tok->t_cpu; 1122 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1123 #ifdef INVARIANTS 1124 if (token_debug) 1125 printf("REQT%d ", dcpu); 1126 #endif 1127 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1128 lwkt_wait_ipiq(dcpu, seq); 1129 #ifdef INVARIANTS 1130 if (token_debug) 1131 printf("REQR%d ", tok->t_cpu); 1132 #endif 1133 } 1134 #endif 1135 /* 1136 * leave us in a critical section on return. This will be undone 1137 * by lwkt_reltoken(). Bump the generation number. 1138 */ 1139 return(++tok->t_gen); 1140 } 1141 1142 /* 1143 * Attempt to acquire ownership of a token. Returns 1 on success, 0 on 1144 * failure. 1145 */ 1146 int 1147 lwkt_trytoken(lwkt_token_t tok) 1148 { 1149 crit_enter(); 1150 #ifdef SMP 1151 if (tok->t_cpu != mycpu->gd_cpuid) { 1152 crit_exit(); 1153 return(0); 1154 } 1155 #endif 1156 /* leave us in the critical section */ 1157 ++tok->t_gen; 1158 return(1); 1159 } 1160 1161 /* 1162 * Release your ownership of a token. Releases must occur in reverse 1163 * order to aquisitions, eventually so priorities can be unwound properly 1164 * like SPLs. At the moment the actual implemention doesn't care. 1165 * 1166 * We can safely hand a token that we own to another cpu without notifying 1167 * it, but once we do we can't get it back without requesting it (unless 1168 * the other cpu hands it back to us before we check). 1169 * 1170 * We might have lost the token, so check that. 1171 * 1172 * Return the token's generation number. The number is useful to callers 1173 * who may want to know if the token was stolen during potential blockages. 1174 */ 1175 int 1176 lwkt_reltoken(lwkt_token_t tok) 1177 { 1178 int gen; 1179 1180 if (tok->t_cpu == mycpu->gd_cpuid) { 1181 tok->t_cpu = tok->t_reqcpu; 1182 } 1183 gen = tok->t_gen; 1184 crit_exit(); 1185 return(gen); 1186 } 1187 1188 /* 1189 * Reacquire a token that might have been lost. 0 is returned if the 1190 * generation has not changed (nobody stole the token from us), -1 is 1191 * returned otherwise. The token is reacquired regardless but the 1192 * generation number is not bumped further if we already own the token. 1193 * 1194 * For efficiency we inline the best-case situation for lwkt_regettoken() 1195 * (i.e .we still own the token). 1196 */ 1197 int 1198 lwkt_gentoken(lwkt_token_t tok, int *gen) 1199 { 1200 if (tok->t_cpu == mycpu->gd_cpuid && tok->t_gen == *gen) 1201 return(0); 1202 *gen = lwkt_regettoken(tok); 1203 return(-1); 1204 } 1205 1206 /* 1207 * Re-acquire a token that might have been lost. The generation number 1208 * is bumped and returned regardless of whether the token had been lost 1209 * or not (because we only have cpu granularity we have to bump the token 1210 * either way). 1211 */ 1212 int 1213 lwkt_regettoken(lwkt_token_t tok) 1214 { 1215 /* assert we are in a critical section */ 1216 if (tok->t_cpu != mycpu->gd_cpuid) { 1217 #ifdef SMP 1218 while (tok->t_cpu != mycpu->gd_cpuid) { 1219 struct lwkt_gettoken_req req; 1220 int seq; 1221 int dcpu; 1222 1223 req.cpu = mycpu->gd_cpuid; 1224 req.tok = tok; 1225 dcpu = (volatile int)tok->t_cpu; 1226 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1227 #ifdef INVARIANTS 1228 if (token_debug) 1229 printf("REQT%d ", dcpu); 1230 #endif 1231 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1232 lwkt_wait_ipiq(dcpu, seq); 1233 #ifdef INVARIATNS 1234 if (token_debug) 1235 printf("REQR%d ", tok->t_cpu); 1236 #endif 1237 } 1238 #endif 1239 } 1240 ++tok->t_gen; 1241 return(tok->t_gen); 1242 } 1243 1244 void 1245 lwkt_inittoken(lwkt_token_t tok) 1246 { 1247 /* 1248 * Zero structure and set cpu owner and reqcpu to cpu 0. 1249 */ 1250 bzero(tok, sizeof(*tok)); 1251 } 1252 1253 /* 1254 * Create a kernel process/thread/whatever. It shares it's address space 1255 * with proc0 - ie: kernel only. 1256 * 1257 * NOTE! By default new threads are created with the MP lock held. A 1258 * thread which does not require the MP lock should release it by calling 1259 * rel_mplock() at the start of the new thread. 1260 */ 1261 int 1262 lwkt_create(void (*func)(void *), void *arg, 1263 struct thread **tdp, thread_t template, int tdflags, int cpu, 1264 const char *fmt, ...) 1265 { 1266 thread_t td; 1267 __va_list ap; 1268 1269 td = lwkt_alloc_thread(template, cpu); 1270 if (tdp) 1271 *tdp = td; 1272 cpu_set_thread_handler(td, lwkt_exit, func, arg); 1273 td->td_flags |= TDF_VERBOSE | tdflags; 1274 #ifdef SMP 1275 td->td_mpcount = 1; 1276 #endif 1277 1278 /* 1279 * Set up arg0 for 'ps' etc 1280 */ 1281 __va_start(ap, fmt); 1282 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1283 __va_end(ap); 1284 1285 /* 1286 * Schedule the thread to run 1287 */ 1288 if ((td->td_flags & TDF_STOPREQ) == 0) 1289 lwkt_schedule(td); 1290 else 1291 td->td_flags &= ~TDF_STOPREQ; 1292 return 0; 1293 } 1294 1295 /* 1296 * kthread_* is specific to the kernel and is not needed by userland. 1297 */ 1298 #ifdef _KERNEL 1299 1300 /* 1301 * Destroy an LWKT thread. Warning! This function is not called when 1302 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1303 * uses a different reaping mechanism. 1304 */ 1305 void 1306 lwkt_exit(void) 1307 { 1308 thread_t td = curthread; 1309 1310 if (td->td_flags & TDF_VERBOSE) 1311 printf("kthread %p %s has exited\n", td, td->td_comm); 1312 caps_exit(td); 1313 crit_enter(); 1314 lwkt_deschedule_self(); 1315 ++mycpu->gd_tdfreecount; 1316 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq); 1317 cpu_thread_exit(); 1318 } 1319 1320 /* 1321 * Create a kernel process/thread/whatever. It shares it's address space 1322 * with proc0 - ie: kernel only. 5.x compatible. 1323 * 1324 * NOTE! By default kthreads are created with the MP lock held. A 1325 * thread which does not require the MP lock should release it by calling 1326 * rel_mplock() at the start of the new thread. 1327 */ 1328 int 1329 kthread_create(void (*func)(void *), void *arg, 1330 struct thread **tdp, const char *fmt, ...) 1331 { 1332 thread_t td; 1333 __va_list ap; 1334 1335 td = lwkt_alloc_thread(NULL, -1); 1336 if (tdp) 1337 *tdp = td; 1338 cpu_set_thread_handler(td, kthread_exit, func, arg); 1339 td->td_flags |= TDF_VERBOSE; 1340 #ifdef SMP 1341 td->td_mpcount = 1; 1342 #endif 1343 1344 /* 1345 * Set up arg0 for 'ps' etc 1346 */ 1347 __va_start(ap, fmt); 1348 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1349 __va_end(ap); 1350 1351 /* 1352 * Schedule the thread to run 1353 */ 1354 lwkt_schedule(td); 1355 return 0; 1356 } 1357 1358 /* 1359 * Destroy an LWKT thread. Warning! This function is not called when 1360 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1361 * uses a different reaping mechanism. 1362 * 1363 * XXX duplicates lwkt_exit() 1364 */ 1365 void 1366 kthread_exit(void) 1367 { 1368 lwkt_exit(); 1369 } 1370 1371 #endif /* _KERNEL */ 1372 1373 void 1374 crit_panic(void) 1375 { 1376 thread_t td = curthread; 1377 int lpri = td->td_pri; 1378 1379 td->td_pri = 0; 1380 panic("td_pri is/would-go negative! %p %d", td, lpri); 1381 } 1382 1383 #ifdef SMP 1384 1385 /* 1386 * Send a function execution request to another cpu. The request is queued 1387 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 1388 * possible target cpu. The FIFO can be written. 1389 * 1390 * YYY If the FIFO fills up we have to enable interrupts and process the 1391 * IPIQ while waiting for it to empty or we may deadlock with another cpu. 1392 * Create a CPU_*() function to do this! 1393 * 1394 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 1395 * end will take care of any pending interrupts. 1396 * 1397 * Must be called from a critical section. 1398 */ 1399 int 1400 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1401 { 1402 lwkt_ipiq_t ip; 1403 int windex; 1404 struct globaldata *gd = mycpu; 1405 1406 if (dcpu == gd->gd_cpuid) { 1407 func(arg); 1408 return(0); 1409 } 1410 crit_enter(); 1411 ++gd->gd_intr_nesting_level; 1412 #ifdef INVARIANTS 1413 if (gd->gd_intr_nesting_level > 20) 1414 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 1415 #endif 1416 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 1417 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1418 ++ipiq_count; 1419 ip = &gd->gd_ipiq[dcpu]; 1420 1421 /* 1422 * We always drain before the FIFO becomes full so it should never 1423 * become full. We need to leave enough entries to deal with 1424 * reentrancy. 1425 */ 1426 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO); 1427 windex = ip->ip_windex & MAXCPUFIFO_MASK; 1428 ip->ip_func[windex] = func; 1429 ip->ip_arg[windex] = arg; 1430 /* YYY memory barrier */ 1431 ++ip->ip_windex; 1432 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 1433 unsigned int eflags = read_eflags(); 1434 cpu_enable_intr(); 1435 ++ipiq_fifofull; 1436 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 1437 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 1438 lwkt_process_ipiq(); 1439 } 1440 write_eflags(eflags); 1441 } 1442 --gd->gd_intr_nesting_level; 1443 cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */ 1444 crit_exit(); 1445 return(ip->ip_windex); 1446 } 1447 1448 /* 1449 * Send a message to several target cpus. Typically used for scheduling. 1450 * The message will not be sent to stopped cpus. 1451 */ 1452 void 1453 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 1454 { 1455 int cpuid; 1456 1457 mask &= ~stopped_cpus; 1458 while (mask) { 1459 cpuid = bsfl(mask); 1460 lwkt_send_ipiq(cpuid, func, arg); 1461 mask &= ~(1 << cpuid); 1462 } 1463 } 1464 1465 /* 1466 * Wait for the remote cpu to finish processing a function. 1467 * 1468 * YYY we have to enable interrupts and process the IPIQ while waiting 1469 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 1470 * function to do this! YYY we really should 'block' here. 1471 * 1472 * Must be called from a critical section. Thsi routine may be called 1473 * from an interrupt (for example, if an interrupt wakes a foreign thread 1474 * up). 1475 */ 1476 void 1477 lwkt_wait_ipiq(int dcpu, int seq) 1478 { 1479 lwkt_ipiq_t ip; 1480 int maxc = 100000000; 1481 1482 if (dcpu != mycpu->gd_cpuid) { 1483 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1484 ip = &mycpu->gd_ipiq[dcpu]; 1485 if ((int)(ip->ip_xindex - seq) < 0) { 1486 unsigned int eflags = read_eflags(); 1487 cpu_enable_intr(); 1488 while ((int)(ip->ip_xindex - seq) < 0) { 1489 lwkt_process_ipiq(); 1490 if (--maxc == 0) 1491 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, dcpu, ip->ip_xindex - seq); 1492 if (maxc < -1000000) 1493 panic("LWKT_WAIT_IPIQ"); 1494 } 1495 write_eflags(eflags); 1496 } 1497 } 1498 } 1499 1500 /* 1501 * Called from IPI interrupt (like a fast interrupt), which has placed 1502 * us in a critical section. The MP lock may or may not be held. 1503 * May also be called from doreti or splz, or be reentrantly called 1504 * indirectly through the ip_func[] we run. 1505 */ 1506 void 1507 lwkt_process_ipiq(void) 1508 { 1509 int n; 1510 int cpuid = mycpu->gd_cpuid; 1511 1512 for (n = 0; n < ncpus; ++n) { 1513 lwkt_ipiq_t ip; 1514 int ri; 1515 1516 if (n == cpuid) 1517 continue; 1518 ip = globaldata_find(n)->gd_ipiq; 1519 if (ip == NULL) 1520 continue; 1521 ip = &ip[cpuid]; 1522 1523 /* 1524 * Note: xindex is only updated after we are sure the function has 1525 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 1526 * function may send an IPI which may block/drain. 1527 */ 1528 while (ip->ip_rindex != ip->ip_windex) { 1529 ri = ip->ip_rindex & MAXCPUFIFO_MASK; 1530 ++ip->ip_rindex; 1531 ip->ip_func[ri](ip->ip_arg[ri]); 1532 /* YYY memory barrier */ 1533 ip->ip_xindex = ip->ip_rindex; 1534 } 1535 } 1536 } 1537 1538 #else 1539 1540 int 1541 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1542 { 1543 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg); 1544 return(0); /* NOT REACHED */ 1545 } 1546 1547 void 1548 lwkt_wait_ipiq(int dcpu, int seq) 1549 { 1550 panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq); 1551 } 1552 1553 #endif 1554