1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.40 2003/11/03 02:08:35 dillon Exp $ 27 */ 28 29 /* 30 * Each cpu in a system has its own self-contained light weight kernel 31 * thread scheduler, which means that generally speaking we only need 32 * to use a critical section to avoid problems. Foreign thread 33 * scheduling is queued via (async) IPIs. 34 * 35 * NOTE: on UP machines smp_active is defined to be 0. On SMP machines 36 * smp_active is 0 prior to SMP activation, then it is 1. The LWKT module 37 * uses smp_active to optimize UP builds and to avoid sending IPIs during 38 * early boot (primarily interrupt and network thread initialization). 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/proc.h> 45 #include <sys/rtprio.h> 46 #include <sys/queue.h> 47 #include <sys/thread2.h> 48 #include <sys/sysctl.h> 49 #include <sys/kthread.h> 50 #include <machine/cpu.h> 51 #include <sys/lock.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_map.h> 59 #include <vm/vm_pager.h> 60 #include <vm/vm_extern.h> 61 #include <vm/vm_zone.h> 62 63 #include <machine/stdarg.h> 64 #include <machine/ipl.h> 65 #include <machine/smp.h> 66 67 static int untimely_switch = 0; 68 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); 69 #ifdef INVARIANTS 70 static int token_debug = 0; 71 SYSCTL_INT(_lwkt, OID_AUTO, token_debug, CTLFLAG_RW, &token_debug, 0, ""); 72 #endif 73 static quad_t switch_count = 0; 74 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); 75 static quad_t preempt_hit = 0; 76 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); 77 static quad_t preempt_miss = 0; 78 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); 79 static quad_t preempt_weird = 0; 80 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); 81 static quad_t ipiq_count = 0; 82 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 83 static quad_t ipiq_fifofull = 0; 84 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 85 86 /* 87 * These helper procedures handle the runq, they can only be called from 88 * within a critical section. 89 * 90 * WARNING! Prior to SMP being brought up it is possible to enqueue and 91 * dequeue threads belonging to other cpus, so be sure to use td->td_gd 92 * instead of 'mycpu' when referencing the globaldata structure. Once 93 * SMP live enqueuing and dequeueing only occurs on the current cpu. 94 */ 95 static __inline 96 void 97 _lwkt_dequeue(thread_t td) 98 { 99 if (td->td_flags & TDF_RUNQ) { 100 int nq = td->td_pri & TDPRI_MASK; 101 struct globaldata *gd = td->td_gd; 102 103 td->td_flags &= ~TDF_RUNQ; 104 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq); 105 /* runqmask is passively cleaned up by the switcher */ 106 } 107 } 108 109 static __inline 110 void 111 _lwkt_enqueue(thread_t td) 112 { 113 if ((td->td_flags & TDF_RUNQ) == 0) { 114 int nq = td->td_pri & TDPRI_MASK; 115 struct globaldata *gd = td->td_gd; 116 117 td->td_flags |= TDF_RUNQ; 118 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); 119 gd->gd_runqmask |= 1 << nq; 120 } 121 } 122 123 static __inline 124 int 125 _lwkt_wantresched(thread_t ntd, thread_t cur) 126 { 127 return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK)); 128 } 129 130 /* 131 * LWKTs operate on a per-cpu basis 132 * 133 * WARNING! Called from early boot, 'mycpu' may not work yet. 134 */ 135 void 136 lwkt_gdinit(struct globaldata *gd) 137 { 138 int i; 139 140 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i) 141 TAILQ_INIT(&gd->gd_tdrunq[i]); 142 gd->gd_runqmask = 0; 143 TAILQ_INIT(&gd->gd_tdallq); 144 } 145 146 /* 147 * Initialize a thread wait structure prior to first use. 148 * 149 * NOTE! called from low level boot code, we cannot do anything fancy! 150 */ 151 void 152 lwkt_init_wait(lwkt_wait_t w) 153 { 154 TAILQ_INIT(&w->wa_waitq); 155 } 156 157 /* 158 * Create a new thread. The thread must be associated with a process context 159 * or LWKT start address before it can be scheduled. If the target cpu is 160 * -1 the thread will be created on the current cpu. 161 * 162 * If you intend to create a thread without a process context this function 163 * does everything except load the startup and switcher function. 164 */ 165 thread_t 166 lwkt_alloc_thread(struct thread *td, int cpu) 167 { 168 void *stack; 169 int flags = 0; 170 171 if (td == NULL) { 172 crit_enter(); 173 if (mycpu->gd_tdfreecount > 0) { 174 --mycpu->gd_tdfreecount; 175 td = TAILQ_FIRST(&mycpu->gd_tdfreeq); 176 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0, 177 ("lwkt_alloc_thread: unexpected NULL or corrupted td")); 178 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq); 179 crit_exit(); 180 stack = td->td_kstack; 181 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD); 182 } else { 183 crit_exit(); 184 td = zalloc(thread_zone); 185 td->td_kstack = NULL; 186 flags |= TDF_ALLOCATED_THREAD; 187 } 188 } 189 if ((stack = td->td_kstack) == NULL) { 190 stack = (void *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE); 191 flags |= TDF_ALLOCATED_STACK; 192 } 193 if (cpu < 0) 194 lwkt_init_thread(td, stack, flags, mycpu); 195 else 196 lwkt_init_thread(td, stack, flags, globaldata_find(cpu)); 197 return(td); 198 } 199 200 /* 201 * Initialize a preexisting thread structure. This function is used by 202 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread. 203 * 204 * All threads start out in a critical section at a priority of 205 * TDPRI_KERN_DAEMON. Higher level code will modify the priority as 206 * appropriate. This function may send an IPI message when the 207 * requested cpu is not the current cpu and consequently gd_tdallq may 208 * not be initialized synchronously from the point of view of the originating 209 * cpu. 210 * 211 * NOTE! we have to be careful in regards to creating threads for other cpus 212 * if SMP has not yet been activated. 213 */ 214 static void 215 lwkt_init_thread_remote(void *arg) 216 { 217 thread_t td = arg; 218 219 TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq); 220 } 221 222 void 223 lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) 224 { 225 bzero(td, sizeof(struct thread)); 226 td->td_kstack = stack; 227 td->td_flags |= flags; 228 td->td_gd = gd; 229 td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; 230 lwkt_init_port(&td->td_msgport, td); 231 pmap_init_thread(td); 232 if (smp_active == 0 || gd == mycpu) { 233 crit_enter(); 234 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 235 crit_exit(); 236 } else { 237 lwkt_send_ipiq(gd->gd_cpuid, lwkt_init_thread_remote, td); 238 } 239 } 240 241 void 242 lwkt_set_comm(thread_t td, const char *ctl, ...) 243 { 244 va_list va; 245 246 va_start(va, ctl); 247 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va); 248 va_end(va); 249 } 250 251 void 252 lwkt_hold(thread_t td) 253 { 254 ++td->td_refs; 255 } 256 257 void 258 lwkt_rele(thread_t td) 259 { 260 KKASSERT(td->td_refs > 0); 261 --td->td_refs; 262 } 263 264 void 265 lwkt_wait_free(thread_t td) 266 { 267 while (td->td_refs) 268 tsleep(td, 0, "tdreap", hz); 269 } 270 271 void 272 lwkt_free_thread(thread_t td) 273 { 274 struct globaldata *gd = mycpu; 275 276 KASSERT((td->td_flags & TDF_RUNNING) == 0, 277 ("lwkt_free_thread: did not exit! %p", td)); 278 279 crit_enter(); 280 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 281 if (gd->gd_tdfreecount < CACHE_NTHREADS && 282 (td->td_flags & TDF_ALLOCATED_THREAD) 283 ) { 284 ++gd->gd_tdfreecount; 285 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq); 286 crit_exit(); 287 } else { 288 crit_exit(); 289 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) { 290 kmem_free(kernel_map, 291 (vm_offset_t)td->td_kstack, UPAGES * PAGE_SIZE); 292 /* gd invalid */ 293 td->td_kstack = NULL; 294 } 295 if (td->td_flags & TDF_ALLOCATED_THREAD) 296 zfree(thread_zone, td); 297 } 298 } 299 300 301 /* 302 * Switch to the next runnable lwkt. If no LWKTs are runnable then 303 * switch to the idlethread. Switching must occur within a critical 304 * section to avoid races with the scheduling queue. 305 * 306 * We always have full control over our cpu's run queue. Other cpus 307 * that wish to manipulate our queue must use the cpu_*msg() calls to 308 * talk to our cpu, so a critical section is all that is needed and 309 * the result is very, very fast thread switching. 310 * 311 * The LWKT scheduler uses a fixed priority model and round-robins at 312 * each priority level. User process scheduling is a totally 313 * different beast and LWKT priorities should not be confused with 314 * user process priorities. 315 * 316 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() 317 * cleans it up. Note that the td_switch() function cannot do anything that 318 * requires the MP lock since the MP lock will have already been setup for 319 * the target thread (not the current thread). It's nice to have a scheduler 320 * that does not need the MP lock to work because it allows us to do some 321 * really cool high-performance MP lock optimizations. 322 */ 323 324 void 325 lwkt_switch(void) 326 { 327 struct globaldata *gd; 328 thread_t td = curthread; 329 thread_t ntd; 330 #ifdef SMP 331 int mpheld; 332 #endif 333 334 /* 335 * Switching from within a 'fast' (non thread switched) interrupt is 336 * illegal. 337 */ 338 if (mycpu->gd_intr_nesting_level && panicstr == NULL) { 339 panic("lwkt_switch: cannot switch from within a fast interrupt, yet\n"); 340 } 341 342 /* 343 * Passive release (used to transition from user to kernel mode 344 * when we block or switch rather then when we enter the kernel). 345 * This function is NOT called if we are switching into a preemption 346 * or returning from a preemption. Typically this causes us to lose 347 * our P_CURPROC designation (if we have one) and become a true LWKT 348 * thread, and may also hand P_CURPROC to another process and schedule 349 * its thread. 350 */ 351 if (td->td_release) 352 td->td_release(td); 353 354 crit_enter(); 355 ++switch_count; 356 357 #ifdef SMP 358 /* 359 * td_mpcount cannot be used to determine if we currently hold the 360 * MP lock because get_mplock() will increment it prior to attempting 361 * to get the lock, and switch out if it can't. Our ownership of 362 * the actual lock will remain stable while we are in a critical section 363 * (but, of course, another cpu may own or release the lock so the 364 * actual value of mp_lock is not stable). 365 */ 366 mpheld = MP_LOCK_HELD(); 367 #endif 368 if ((ntd = td->td_preempted) != NULL) { 369 /* 370 * We had preempted another thread on this cpu, resume the preempted 371 * thread. This occurs transparently, whether the preempted thread 372 * was scheduled or not (it may have been preempted after descheduling 373 * itself). 374 * 375 * We have to setup the MP lock for the original thread after backing 376 * out the adjustment that was made to curthread when the original 377 * was preempted. 378 */ 379 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); 380 #ifdef SMP 381 if (ntd->td_mpcount && mpheld == 0) { 382 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n", 383 td, ntd, td->td_mpcount, ntd->td_mpcount); 384 } 385 if (ntd->td_mpcount) { 386 td->td_mpcount -= ntd->td_mpcount; 387 KKASSERT(td->td_mpcount >= 0); 388 } 389 #endif 390 ntd->td_flags |= TDF_PREEMPT_DONE; 391 /* YYY release mp lock on switchback if original doesn't need it */ 392 } else { 393 /* 394 * Priority queue / round-robin at each priority. Note that user 395 * processes run at a fixed, low priority and the user process 396 * scheduler deals with interactions between user processes 397 * by scheduling and descheduling them from the LWKT queue as 398 * necessary. 399 * 400 * We have to adjust the MP lock for the target thread. If we 401 * need the MP lock and cannot obtain it we try to locate a 402 * thread that does not need the MP lock. 403 */ 404 gd = mycpu; 405 again: 406 if (gd->gd_runqmask) { 407 int nq = bsrl(gd->gd_runqmask); 408 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) { 409 gd->gd_runqmask &= ~(1 << nq); 410 goto again; 411 } 412 #ifdef SMP 413 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) { 414 /* 415 * Target needs MP lock and we couldn't get it, try 416 * to locate a thread which does not need the MP lock 417 * to run. If we cannot locate a thread spin in idle. 418 */ 419 u_int32_t rqmask = gd->gd_runqmask; 420 while (rqmask) { 421 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { 422 if (ntd->td_mpcount == 0) 423 break; 424 } 425 if (ntd) 426 break; 427 rqmask &= ~(1 << nq); 428 nq = bsrl(rqmask); 429 } 430 if (ntd == NULL) { 431 ntd = &gd->gd_idlethread; 432 ntd->td_flags |= TDF_IDLE_NOHLT; 433 } else { 434 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 435 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 436 } 437 } else { 438 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 439 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 440 } 441 #else 442 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 443 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 444 #endif 445 } else { 446 /* 447 * Nothing to run but we may still need the BGL to deal with 448 * pending interrupts, spin in idle if so. 449 */ 450 ntd = &gd->gd_idlethread; 451 if (gd->gd_reqflags) 452 ntd->td_flags |= TDF_IDLE_NOHLT; 453 } 454 } 455 KASSERT(ntd->td_pri >= TDPRI_CRIT, 456 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); 457 458 /* 459 * Do the actual switch. If the new target does not need the MP lock 460 * and we are holding it, release the MP lock. If the new target requires 461 * the MP lock we have already acquired it for the target. 462 */ 463 #ifdef SMP 464 if (ntd->td_mpcount == 0 ) { 465 if (MP_LOCK_HELD()) 466 cpu_rel_mplock(); 467 } else { 468 ASSERT_MP_LOCK_HELD(); 469 } 470 #endif 471 if (td != ntd) { 472 td->td_switch(ntd); 473 } 474 475 crit_exit(); 476 } 477 478 /* 479 * Switch if another thread has a higher priority. Do not switch to other 480 * threads at the same priority. 481 */ 482 void 483 lwkt_maybe_switch() 484 { 485 struct globaldata *gd = mycpu; 486 struct thread *td = gd->gd_curthread; 487 488 if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) { 489 lwkt_switch(); 490 } 491 } 492 493 /* 494 * Request that the target thread preempt the current thread. Preemption 495 * only works under a specific set of conditions: 496 * 497 * - We are not preempting ourselves 498 * - The target thread is owned by the current cpu 499 * - We are not currently being preempted 500 * - The target is not currently being preempted 501 * - We are able to satisfy the target's MP lock requirements (if any). 502 * 503 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically 504 * this is called via lwkt_schedule() through the td_preemptable callback. 505 * critpri is the managed critical priority that we should ignore in order 506 * to determine whether preemption is possible (aka usually just the crit 507 * priority of lwkt_schedule() itself). 508 * 509 * XXX at the moment we run the target thread in a critical section during 510 * the preemption in order to prevent the target from taking interrupts 511 * that *WE* can't. Preemption is strictly limited to interrupt threads 512 * and interrupt-like threads, outside of a critical section, and the 513 * preempted source thread will be resumed the instant the target blocks 514 * whether or not the source is scheduled (i.e. preemption is supposed to 515 * be as transparent as possible). 516 * 517 * The target thread inherits our MP count (added to its own) for the 518 * duration of the preemption in order to preserve the atomicy of the 519 * MP lock during the preemption. Therefore, any preempting targets must be 520 * careful in regards to MP assertions. Note that the MP count may be 521 * out of sync with the physical mp_lock, but we do not have to preserve 522 * the original ownership of the lock if it was out of synch (that is, we 523 * can leave it synchronized on return). 524 */ 525 void 526 lwkt_preempt(thread_t ntd, int critpri) 527 { 528 struct globaldata *gd = mycpu; 529 thread_t td = gd->gd_curthread; 530 #ifdef SMP 531 int mpheld; 532 int savecnt; 533 #endif 534 535 /* 536 * The caller has put us in a critical section. We can only preempt 537 * if the caller of the caller was not in a critical section (basically 538 * a local interrupt), as determined by the 'critpri' parameter. If 539 * we are unable to preempt 540 * 541 * YYY The target thread must be in a critical section (else it must 542 * inherit our critical section? I dunno yet). 543 */ 544 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); 545 546 need_resched(); 547 if (!_lwkt_wantresched(ntd, td)) { 548 ++preempt_miss; 549 return; 550 } 551 if ((td->td_pri & ~TDPRI_MASK) > critpri) { 552 ++preempt_miss; 553 return; 554 } 555 #ifdef SMP 556 if (ntd->td_gd != gd) { 557 ++preempt_miss; 558 return; 559 } 560 #endif 561 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { 562 ++preempt_weird; 563 return; 564 } 565 if (ntd->td_preempted) { 566 ++preempt_hit; 567 return; 568 } 569 #ifdef SMP 570 /* 571 * note: an interrupt might have occured just as we were transitioning 572 * to or from the MP lock. In this case td_mpcount will be pre-disposed 573 * (non-zero) but not actually synchronized with the actual state of the 574 * lock. We can use it to imply an MP lock requirement for the 575 * preemption but we cannot use it to test whether we hold the MP lock 576 * or not. 577 */ 578 savecnt = td->td_mpcount; 579 mpheld = MP_LOCK_HELD(); 580 ntd->td_mpcount += td->td_mpcount; 581 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { 582 ntd->td_mpcount -= td->td_mpcount; 583 ++preempt_miss; 584 return; 585 } 586 #endif 587 588 ++preempt_hit; 589 ntd->td_preempted = td; 590 td->td_flags |= TDF_PREEMPT_LOCK; 591 td->td_switch(ntd); 592 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); 593 #ifdef SMP 594 KKASSERT(savecnt == td->td_mpcount); 595 mpheld = MP_LOCK_HELD(); 596 if (mpheld && td->td_mpcount == 0) 597 cpu_rel_mplock(); 598 else if (mpheld == 0 && td->td_mpcount) 599 panic("lwkt_preempt(): MP lock was not held through"); 600 #endif 601 ntd->td_preempted = NULL; 602 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); 603 } 604 605 /* 606 * Yield our thread while higher priority threads are pending. This is 607 * typically called when we leave a critical section but it can be safely 608 * called while we are in a critical section. 609 * 610 * This function will not generally yield to equal priority threads but it 611 * can occur as a side effect. Note that lwkt_switch() is called from 612 * inside the critical section to prevent its own crit_exit() from reentering 613 * lwkt_yield_quick(). 614 * 615 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint 616 * came along but was blocked and made pending. 617 * 618 * (self contained on a per cpu basis) 619 */ 620 void 621 lwkt_yield_quick(void) 622 { 623 globaldata_t gd = mycpu; 624 thread_t td = gd->gd_curthread; 625 626 /* 627 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear 628 * it with a non-zero cpl then we might not wind up calling splz after 629 * a task switch when the critical section is exited even though the 630 * new task could accept the interrupt. 631 * 632 * XXX from crit_exit() only called after last crit section is released. 633 * If called directly will run splz() even if in a critical section. 634 * 635 * td_nest_count prevent deep nesting via splz() or doreti(). Note that 636 * except for this special case, we MUST call splz() here to handle any 637 * pending ints, particularly after we switch, or we might accidently 638 * halt the cpu with interrupts pending. 639 */ 640 if (gd->gd_reqflags && td->td_nest_count < 2) 641 splz(); 642 643 /* 644 * YYY enabling will cause wakeup() to task-switch, which really 645 * confused the old 4.x code. This is a good way to simulate 646 * preemption and MP without actually doing preemption or MP, because a 647 * lot of code assumes that wakeup() does not block. 648 */ 649 if (untimely_switch && td->td_nest_count == 0 && 650 gd->gd_intr_nesting_level == 0 651 ) { 652 crit_enter(); 653 /* 654 * YYY temporary hacks until we disassociate the userland scheduler 655 * from the LWKT scheduler. 656 */ 657 if (td->td_flags & TDF_RUNQ) { 658 lwkt_switch(); /* will not reenter yield function */ 659 } else { 660 lwkt_schedule_self(); /* make sure we are scheduled */ 661 lwkt_switch(); /* will not reenter yield function */ 662 lwkt_deschedule_self(); /* make sure we are descheduled */ 663 } 664 crit_exit_noyield(td); 665 } 666 } 667 668 /* 669 * This implements a normal yield which, unlike _quick, will yield to equal 670 * priority threads as well. Note that gd_reqflags tests will be handled by 671 * the crit_exit() call in lwkt_switch(). 672 * 673 * (self contained on a per cpu basis) 674 */ 675 void 676 lwkt_yield(void) 677 { 678 lwkt_schedule_self(); 679 lwkt_switch(); 680 } 681 682 /* 683 * Schedule a thread to run. As the current thread we can always safely 684 * schedule ourselves, and a shortcut procedure is provided for that 685 * function. 686 * 687 * (non-blocking, self contained on a per cpu basis) 688 */ 689 void 690 lwkt_schedule_self(void) 691 { 692 thread_t td = curthread; 693 694 crit_enter(); 695 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 696 _lwkt_enqueue(td); 697 if (td->td_proc && td->td_proc->p_stat == SSLEEP) 698 panic("SCHED SELF PANIC"); 699 crit_exit(); 700 } 701 702 /* 703 * Generic schedule. Possibly schedule threads belonging to other cpus and 704 * deal with threads that might be blocked on a wait queue. 705 * 706 * YYY this is one of the best places to implement load balancing code. 707 * Load balancing can be accomplished by requesting other sorts of actions 708 * for the thread in question. 709 */ 710 void 711 lwkt_schedule(thread_t td) 712 { 713 #ifdef INVARIANTS 714 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 715 && td->td_proc->p_stat == SSLEEP 716 ) { 717 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", 718 curthread, 719 curthread->td_proc ? curthread->td_proc->p_pid : -1, 720 curthread->td_proc ? curthread->td_proc->p_stat : -1, 721 td, 722 td->td_proc ? curthread->td_proc->p_pid : -1, 723 td->td_proc ? curthread->td_proc->p_stat : -1 724 ); 725 panic("SCHED PANIC"); 726 } 727 #endif 728 crit_enter(); 729 if (td == curthread) { 730 _lwkt_enqueue(td); 731 } else { 732 lwkt_wait_t w; 733 734 /* 735 * If the thread is on a wait list we have to send our scheduling 736 * request to the owner of the wait structure. Otherwise we send 737 * the scheduling request to the cpu owning the thread. Races 738 * are ok, the target will forward the message as necessary (the 739 * message may chase the thread around before it finally gets 740 * acted upon). 741 * 742 * (remember, wait structures use stable storage) 743 */ 744 if ((w = td->td_wait) != NULL) { 745 if (lwkt_trytoken(&w->wa_token)) { 746 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 747 --w->wa_count; 748 td->td_wait = NULL; 749 if (smp_active == 0 || td->td_gd == mycpu) { 750 _lwkt_enqueue(td); 751 if (td->td_preemptable) { 752 td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ 753 } else if (_lwkt_wantresched(td, curthread)) { 754 need_resched(); 755 } 756 } else { 757 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 758 } 759 lwkt_reltoken(&w->wa_token); 760 } else { 761 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); 762 } 763 } else { 764 /* 765 * If the wait structure is NULL and we own the thread, there 766 * is no race (since we are in a critical section). If we 767 * do not own the thread there might be a race but the 768 * target cpu will deal with it. 769 */ 770 if (smp_active == 0 || td->td_gd == mycpu) { 771 _lwkt_enqueue(td); 772 if (td->td_preemptable) { 773 td->td_preemptable(td, TDPRI_CRIT); 774 } else if (_lwkt_wantresched(td, curthread)) { 775 need_resched(); 776 } 777 } else { 778 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 779 } 780 } 781 } 782 crit_exit(); 783 } 784 785 /* 786 * Managed acquisition. This code assumes that the MP lock is held for 787 * the tdallq operation and that the thread has been descheduled from its 788 * original cpu. We also have to wait for the thread to be entirely switched 789 * out on its original cpu (this is usually fast enough that we never loop) 790 * since the LWKT system does not have to hold the MP lock while switching 791 * and the target may have released it before switching. 792 */ 793 void 794 lwkt_acquire(thread_t td) 795 { 796 struct globaldata *gd; 797 798 gd = td->td_gd; 799 KKASSERT((td->td_flags & TDF_RUNQ) == 0); 800 while (td->td_flags & TDF_RUNNING) /* XXX spin */ 801 ; 802 if (gd != mycpu) { 803 crit_enter(); 804 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 805 gd = mycpu; 806 td->td_gd = gd; 807 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 808 crit_exit(); 809 } 810 } 811 812 /* 813 * Deschedule a thread. 814 * 815 * (non-blocking, self contained on a per cpu basis) 816 */ 817 void 818 lwkt_deschedule_self(void) 819 { 820 thread_t td = curthread; 821 822 crit_enter(); 823 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 824 _lwkt_dequeue(td); 825 crit_exit(); 826 } 827 828 /* 829 * Generic deschedule. Descheduling threads other then your own should be 830 * done only in carefully controlled circumstances. Descheduling is 831 * asynchronous. 832 * 833 * This function may block if the cpu has run out of messages. 834 */ 835 void 836 lwkt_deschedule(thread_t td) 837 { 838 crit_enter(); 839 if (td == curthread) { 840 _lwkt_dequeue(td); 841 } else { 842 if (td->td_gd == mycpu) { 843 _lwkt_dequeue(td); 844 } else { 845 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_deschedule, td); 846 } 847 } 848 crit_exit(); 849 } 850 851 /* 852 * Set the target thread's priority. This routine does not automatically 853 * switch to a higher priority thread, LWKT threads are not designed for 854 * continuous priority changes. Yield if you want to switch. 855 * 856 * We have to retain the critical section count which uses the high bits 857 * of the td_pri field. The specified priority may also indicate zero or 858 * more critical sections by adding TDPRI_CRIT*N. 859 */ 860 void 861 lwkt_setpri(thread_t td, int pri) 862 { 863 KKASSERT(pri >= 0); 864 KKASSERT(td->td_gd == mycpu); 865 crit_enter(); 866 if (td->td_flags & TDF_RUNQ) { 867 _lwkt_dequeue(td); 868 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 869 _lwkt_enqueue(td); 870 } else { 871 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 872 } 873 crit_exit(); 874 } 875 876 void 877 lwkt_setpri_self(int pri) 878 { 879 thread_t td = curthread; 880 881 KKASSERT(pri >= 0 && pri <= TDPRI_MAX); 882 crit_enter(); 883 if (td->td_flags & TDF_RUNQ) { 884 _lwkt_dequeue(td); 885 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 886 _lwkt_enqueue(td); 887 } else { 888 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 889 } 890 crit_exit(); 891 } 892 893 struct proc * 894 lwkt_preempted_proc(void) 895 { 896 thread_t td = curthread; 897 while (td->td_preempted) 898 td = td->td_preempted; 899 return(td->td_proc); 900 } 901 902 typedef struct lwkt_gettoken_req { 903 lwkt_token_t tok; 904 int cpu; 905 } lwkt_gettoken_req; 906 907 #if 0 908 909 /* 910 * This function deschedules the current thread and blocks on the specified 911 * wait queue. We obtain ownership of the wait queue in order to block 912 * on it. A generation number is used to interlock the wait queue in case 913 * it gets signalled while we are blocked waiting on the token. 914 * 915 * Note: alternatively we could dequeue our thread and then message the 916 * target cpu owning the wait queue. YYY implement as sysctl. 917 * 918 * Note: wait queue signals normally ping-pong the cpu as an optimization. 919 */ 920 921 void 922 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen) 923 { 924 thread_t td = curthread; 925 926 lwkt_gettoken(&w->wa_token); 927 if (w->wa_gen == *gen) { 928 _lwkt_dequeue(td); 929 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq); 930 ++w->wa_count; 931 td->td_wait = w; 932 td->td_wmesg = wmesg; 933 again: 934 lwkt_switch(); 935 lwkt_regettoken(&w->wa_token); 936 if (td->td_wmesg != NULL) { 937 _lwkt_dequeue(td); 938 goto again; 939 } 940 } 941 /* token might be lost, doesn't matter for gen update */ 942 *gen = w->wa_gen; 943 lwkt_reltoken(&w->wa_token); 944 } 945 946 /* 947 * Signal a wait queue. We gain ownership of the wait queue in order to 948 * signal it. Once a thread is removed from the wait queue we have to 949 * deal with the cpu owning the thread. 950 * 951 * Note: alternatively we could message the target cpu owning the wait 952 * queue. YYY implement as sysctl. 953 */ 954 void 955 lwkt_signal(lwkt_wait_t w, int count) 956 { 957 thread_t td; 958 int count; 959 960 lwkt_gettoken(&w->wa_token); 961 ++w->wa_gen; 962 if (count < 0) 963 count = w->wa_count; 964 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) { 965 --count; 966 --w->wa_count; 967 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 968 td->td_wait = NULL; 969 td->td_wmesg = NULL; 970 if (td->td_gd == mycpu) { 971 _lwkt_enqueue(td); 972 } else { 973 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 974 } 975 lwkt_regettoken(&w->wa_token); 976 } 977 lwkt_reltoken(&w->wa_token); 978 } 979 980 #endif 981 982 /* 983 * Acquire ownership of a token 984 * 985 * Acquire ownership of a token. The token may have spl and/or critical 986 * section side effects, depending on its purpose. These side effects 987 * guarentee that you will maintain ownership of the token as long as you 988 * do not block. If you block you may lose access to the token (but you 989 * must still release it even if you lose your access to it). 990 * 991 * YYY for now we use a critical section to prevent IPIs from taking away 992 * a token, but do we really only need to disable IPIs ? 993 * 994 * YYY certain tokens could be made to act like mutexes when performance 995 * would be better (e.g. t_cpu == -1). This is not yet implemented. 996 * 997 * YYY the tokens replace 4.x's simplelocks for the most part, but this 998 * means that 4.x does not expect a switch so for now we cannot switch 999 * when waiting for an IPI to be returned. 1000 * 1001 * YYY If the token is owned by another cpu we may have to send an IPI to 1002 * it and then block. The IPI causes the token to be given away to the 1003 * requesting cpu, unless it has already changed hands. Since only the 1004 * current cpu can give away a token it owns we do not need a memory barrier. 1005 * This needs serious optimization. 1006 */ 1007 1008 #ifdef SMP 1009 1010 static 1011 void 1012 lwkt_gettoken_remote(void *arg) 1013 { 1014 lwkt_gettoken_req *req = arg; 1015 if (req->tok->t_cpu == mycpu->gd_cpuid) { 1016 #ifdef INVARIANTS 1017 if (token_debug) 1018 printf("GT(%d,%d) ", req->tok->t_cpu, req->cpu); 1019 #endif 1020 req->tok->t_cpu = req->cpu; 1021 req->tok->t_reqcpu = req->cpu; /* YYY leave owned by target cpu */ 1022 /* else set reqcpu to point to current cpu for release */ 1023 } 1024 } 1025 1026 #endif 1027 1028 int 1029 lwkt_gettoken(lwkt_token_t tok) 1030 { 1031 /* 1032 * Prevent preemption so the token can't be taken away from us once 1033 * we gain ownership of it. Use a synchronous request which might 1034 * block. The request will be forwarded as necessary playing catchup 1035 * to the token. 1036 */ 1037 1038 crit_enter(); 1039 #ifdef INVARIANTS 1040 if (curthread->td_pri > 1800) { 1041 printf("lwkt_gettoken: %p called from %p: crit sect nesting warning\n", 1042 tok, ((int **)&tok)[-1]); 1043 } 1044 if (curthread->td_pri > 2000) { 1045 curthread->td_pri = 1000; 1046 panic("too HIGH!"); 1047 } 1048 #endif 1049 #ifdef SMP 1050 while (tok->t_cpu != mycpu->gd_cpuid) { 1051 struct lwkt_gettoken_req req; 1052 int seq; 1053 int dcpu; 1054 1055 req.cpu = mycpu->gd_cpuid; 1056 req.tok = tok; 1057 dcpu = (volatile int)tok->t_cpu; 1058 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1059 #ifdef INVARIANTS 1060 if (token_debug) 1061 printf("REQT%d ", dcpu); 1062 #endif 1063 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1064 lwkt_wait_ipiq(dcpu, seq); 1065 #ifdef INVARIANTS 1066 if (token_debug) 1067 printf("REQR%d ", tok->t_cpu); 1068 #endif 1069 } 1070 #endif 1071 /* 1072 * leave us in a critical section on return. This will be undone 1073 * by lwkt_reltoken(). Bump the generation number. 1074 */ 1075 return(++tok->t_gen); 1076 } 1077 1078 /* 1079 * Attempt to acquire ownership of a token. Returns 1 on success, 0 on 1080 * failure. 1081 */ 1082 int 1083 lwkt_trytoken(lwkt_token_t tok) 1084 { 1085 crit_enter(); 1086 #ifdef SMP 1087 if (tok->t_cpu != mycpu->gd_cpuid) { 1088 crit_exit(); 1089 return(0); 1090 } 1091 #endif 1092 /* leave us in the critical section */ 1093 ++tok->t_gen; 1094 return(1); 1095 } 1096 1097 /* 1098 * Release your ownership of a token. Releases must occur in reverse 1099 * order to aquisitions, eventually so priorities can be unwound properly 1100 * like SPLs. At the moment the actual implemention doesn't care. 1101 * 1102 * We can safely hand a token that we own to another cpu without notifying 1103 * it, but once we do we can't get it back without requesting it (unless 1104 * the other cpu hands it back to us before we check). 1105 * 1106 * We might have lost the token, so check that. 1107 * 1108 * Return the token's generation number. The number is useful to callers 1109 * who may want to know if the token was stolen during potential blockages. 1110 */ 1111 int 1112 lwkt_reltoken(lwkt_token_t tok) 1113 { 1114 int gen; 1115 1116 if (tok->t_cpu == mycpu->gd_cpuid) { 1117 tok->t_cpu = tok->t_reqcpu; 1118 } 1119 gen = tok->t_gen; 1120 crit_exit(); 1121 return(gen); 1122 } 1123 1124 /* 1125 * Reacquire a token that might have been lost. 0 is returned if the 1126 * generation has not changed (nobody stole the token from us), -1 is 1127 * returned otherwise. The token is reacquired regardless but the 1128 * generation number is not bumped further if we already own the token. 1129 * 1130 * For efficiency we inline the best-case situation for lwkt_regettoken() 1131 * (i.e .we still own the token). 1132 */ 1133 int 1134 lwkt_gentoken(lwkt_token_t tok, int *gen) 1135 { 1136 if (tok->t_cpu == mycpu->gd_cpuid && tok->t_gen == *gen) 1137 return(0); 1138 *gen = lwkt_regettoken(tok); 1139 return(-1); 1140 } 1141 1142 /* 1143 * Re-acquire a token that might have been lost. The generation number 1144 * is bumped and returned regardless of whether the token had been lost 1145 * or not (because we only have cpu granularity we have to bump the token 1146 * either way). 1147 */ 1148 int 1149 lwkt_regettoken(lwkt_token_t tok) 1150 { 1151 /* assert we are in a critical section */ 1152 if (tok->t_cpu != mycpu->gd_cpuid) { 1153 #ifdef SMP 1154 while (tok->t_cpu != mycpu->gd_cpuid) { 1155 struct lwkt_gettoken_req req; 1156 int seq; 1157 int dcpu; 1158 1159 req.cpu = mycpu->gd_cpuid; 1160 req.tok = tok; 1161 dcpu = (volatile int)tok->t_cpu; 1162 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1163 #ifdef INVARIANTS 1164 if (token_debug) 1165 printf("REQT%d ", dcpu); 1166 #endif 1167 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1168 lwkt_wait_ipiq(dcpu, seq); 1169 #ifdef INVARIATNS 1170 if (token_debug) 1171 printf("REQR%d ", tok->t_cpu); 1172 #endif 1173 } 1174 #endif 1175 } 1176 ++tok->t_gen; 1177 return(tok->t_gen); 1178 } 1179 1180 void 1181 lwkt_inittoken(lwkt_token_t tok) 1182 { 1183 /* 1184 * Zero structure and set cpu owner and reqcpu to cpu 0. 1185 */ 1186 bzero(tok, sizeof(*tok)); 1187 } 1188 1189 /* 1190 * Create a kernel process/thread/whatever. It shares it's address space 1191 * with proc0 - ie: kernel only. 1192 * 1193 * NOTE! By default new threads are created with the MP lock held. A 1194 * thread which does not require the MP lock should release it by calling 1195 * rel_mplock() at the start of the new thread. 1196 */ 1197 int 1198 lwkt_create(void (*func)(void *), void *arg, 1199 struct thread **tdp, thread_t template, int tdflags, int cpu, 1200 const char *fmt, ...) 1201 { 1202 thread_t td; 1203 va_list ap; 1204 1205 td = lwkt_alloc_thread(template, cpu); 1206 if (tdp) 1207 *tdp = td; 1208 cpu_set_thread_handler(td, kthread_exit, func, arg); 1209 td->td_flags |= TDF_VERBOSE | tdflags; 1210 #ifdef SMP 1211 td->td_mpcount = 1; 1212 #endif 1213 1214 /* 1215 * Set up arg0 for 'ps' etc 1216 */ 1217 va_start(ap, fmt); 1218 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1219 va_end(ap); 1220 1221 /* 1222 * Schedule the thread to run 1223 */ 1224 if ((td->td_flags & TDF_STOPREQ) == 0) 1225 lwkt_schedule(td); 1226 else 1227 td->td_flags &= ~TDF_STOPREQ; 1228 return 0; 1229 } 1230 1231 /* 1232 * Destroy an LWKT thread. Warning! This function is not called when 1233 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1234 * uses a different reaping mechanism. 1235 */ 1236 void 1237 lwkt_exit(void) 1238 { 1239 thread_t td = curthread; 1240 1241 if (td->td_flags & TDF_VERBOSE) 1242 printf("kthread %p %s has exited\n", td, td->td_comm); 1243 crit_enter(); 1244 lwkt_deschedule_self(); 1245 ++mycpu->gd_tdfreecount; 1246 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq); 1247 cpu_thread_exit(); 1248 } 1249 1250 /* 1251 * Create a kernel process/thread/whatever. It shares it's address space 1252 * with proc0 - ie: kernel only. 5.x compatible. 1253 * 1254 * NOTE! By default kthreads are created with the MP lock held. A 1255 * thread which does not require the MP lock should release it by calling 1256 * rel_mplock() at the start of the new thread. 1257 */ 1258 int 1259 kthread_create(void (*func)(void *), void *arg, 1260 struct thread **tdp, const char *fmt, ...) 1261 { 1262 thread_t td; 1263 va_list ap; 1264 1265 td = lwkt_alloc_thread(NULL, -1); 1266 if (tdp) 1267 *tdp = td; 1268 cpu_set_thread_handler(td, kthread_exit, func, arg); 1269 td->td_flags |= TDF_VERBOSE; 1270 #ifdef SMP 1271 td->td_mpcount = 1; 1272 #endif 1273 1274 /* 1275 * Set up arg0 for 'ps' etc 1276 */ 1277 va_start(ap, fmt); 1278 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1279 va_end(ap); 1280 1281 /* 1282 * Schedule the thread to run 1283 */ 1284 lwkt_schedule(td); 1285 return 0; 1286 } 1287 1288 void 1289 crit_panic(void) 1290 { 1291 thread_t td = curthread; 1292 int lpri = td->td_pri; 1293 1294 td->td_pri = 0; 1295 panic("td_pri is/would-go negative! %p %d", td, lpri); 1296 } 1297 1298 /* 1299 * Destroy an LWKT thread. Warning! This function is not called when 1300 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1301 * uses a different reaping mechanism. 1302 * 1303 * XXX duplicates lwkt_exit() 1304 */ 1305 void 1306 kthread_exit(void) 1307 { 1308 lwkt_exit(); 1309 } 1310 1311 #ifdef SMP 1312 1313 /* 1314 * Send a function execution request to another cpu. The request is queued 1315 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 1316 * possible target cpu. The FIFO can be written. 1317 * 1318 * YYY If the FIFO fills up we have to enable interrupts and process the 1319 * IPIQ while waiting for it to empty or we may deadlock with another cpu. 1320 * Create a CPU_*() function to do this! 1321 * 1322 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 1323 * end will take care of any pending interrupts. 1324 * 1325 * Must be called from a critical section. 1326 */ 1327 int 1328 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1329 { 1330 lwkt_ipiq_t ip; 1331 int windex; 1332 struct globaldata *gd = mycpu; 1333 1334 if (dcpu == gd->gd_cpuid) { 1335 func(arg); 1336 return(0); 1337 } 1338 crit_enter(); 1339 ++gd->gd_intr_nesting_level; 1340 #ifdef INVARIANTS 1341 if (gd->gd_intr_nesting_level > 20) 1342 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 1343 #endif 1344 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 1345 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1346 ++ipiq_count; 1347 ip = &gd->gd_ipiq[dcpu]; 1348 1349 /* 1350 * We always drain before the FIFO becomes full so it should never 1351 * become full. We need to leave enough entries to deal with 1352 * reentrancy. 1353 */ 1354 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO); 1355 windex = ip->ip_windex & MAXCPUFIFO_MASK; 1356 ip->ip_func[windex] = func; 1357 ip->ip_arg[windex] = arg; 1358 /* YYY memory barrier */ 1359 ++ip->ip_windex; 1360 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 1361 unsigned int eflags = read_eflags(); 1362 cpu_enable_intr(); 1363 ++ipiq_fifofull; 1364 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 1365 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 1366 lwkt_process_ipiq(); 1367 } 1368 write_eflags(eflags); 1369 } 1370 --gd->gd_intr_nesting_level; 1371 cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */ 1372 crit_exit(); 1373 return(ip->ip_windex); 1374 } 1375 1376 /* 1377 * Send a message to several target cpus. Typically used for scheduling. 1378 * The message will not be sent to stopped cpus. 1379 */ 1380 void 1381 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 1382 { 1383 int cpuid; 1384 1385 mask &= ~stopped_cpus; 1386 while (mask) { 1387 cpuid = bsfl(mask); 1388 lwkt_send_ipiq(cpuid, func, arg); 1389 mask &= ~(1 << cpuid); 1390 } 1391 } 1392 1393 /* 1394 * Wait for the remote cpu to finish processing a function. 1395 * 1396 * YYY we have to enable interrupts and process the IPIQ while waiting 1397 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 1398 * function to do this! YYY we really should 'block' here. 1399 * 1400 * Must be called from a critical section. Thsi routine may be called 1401 * from an interrupt (for example, if an interrupt wakes a foreign thread 1402 * up). 1403 */ 1404 void 1405 lwkt_wait_ipiq(int dcpu, int seq) 1406 { 1407 lwkt_ipiq_t ip; 1408 int maxc = 100000000; 1409 1410 if (dcpu != mycpu->gd_cpuid) { 1411 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1412 ip = &mycpu->gd_ipiq[dcpu]; 1413 if ((int)(ip->ip_xindex - seq) < 0) { 1414 unsigned int eflags = read_eflags(); 1415 cpu_enable_intr(); 1416 while ((int)(ip->ip_xindex - seq) < 0) { 1417 lwkt_process_ipiq(); 1418 if (--maxc == 0) 1419 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, dcpu, ip->ip_xindex - seq); 1420 if (maxc < -1000000) 1421 panic("LWKT_WAIT_IPIQ"); 1422 } 1423 write_eflags(eflags); 1424 } 1425 } 1426 } 1427 1428 /* 1429 * Called from IPI interrupt (like a fast interrupt), which has placed 1430 * us in a critical section. The MP lock may or may not be held. 1431 * May also be called from doreti or splz, or be reentrantly called 1432 * indirectly through the ip_func[] we run. 1433 */ 1434 void 1435 lwkt_process_ipiq(void) 1436 { 1437 int n; 1438 int cpuid = mycpu->gd_cpuid; 1439 1440 for (n = 0; n < ncpus; ++n) { 1441 lwkt_ipiq_t ip; 1442 int ri; 1443 1444 if (n == cpuid) 1445 continue; 1446 ip = globaldata_find(n)->gd_ipiq; 1447 if (ip == NULL) 1448 continue; 1449 ip = &ip[cpuid]; 1450 1451 /* 1452 * Note: xindex is only updated after we are sure the function has 1453 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 1454 * function may send an IPI which may block/drain. 1455 */ 1456 while (ip->ip_rindex != ip->ip_windex) { 1457 ri = ip->ip_rindex & MAXCPUFIFO_MASK; 1458 ++ip->ip_rindex; 1459 ip->ip_func[ri](ip->ip_arg[ri]); 1460 /* YYY memory barrier */ 1461 ip->ip_xindex = ip->ip_rindex; 1462 } 1463 } 1464 } 1465 1466 #else 1467 1468 int 1469 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1470 { 1471 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg); 1472 return(0); /* NOT REACHED */ 1473 } 1474 1475 void 1476 lwkt_wait_ipiq(int dcpu, int seq) 1477 { 1478 panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq); 1479 } 1480 1481 #endif 1482