1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.15 2005/07/23 07:17:42 dillon Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include "opt_ddb.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/rtprio.h> 51 #include <sys/queue.h> 52 #include <sys/thread2.h> 53 #include <sys/sysctl.h> 54 #include <sys/ktr.h> 55 #include <sys/kthread.h> 56 #include <machine/cpu.h> 57 #include <sys/lock.h> 58 #include <sys/caps.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/vm_kern.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_zone.h> 69 70 #include <machine/stdarg.h> 71 #include <machine/ipl.h> 72 #include <machine/smp.h> 73 #include <machine/atomic.h> 74 75 #define THREAD_STACK (UPAGES * PAGE_SIZE) 76 77 #else 78 79 #include <sys/stdint.h> 80 #include <libcaps/thread.h> 81 #include <sys/thread.h> 82 #include <sys/msgport.h> 83 #include <sys/errno.h> 84 #include <libcaps/globaldata.h> 85 #include <machine/cpufunc.h> 86 #include <sys/thread2.h> 87 #include <sys/msgport2.h> 88 #include <stdio.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <machine/lock.h> 92 #include <machine/cpu.h> 93 #include <machine/atomic.h> 94 95 #endif 96 97 #ifdef SMP 98 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 99 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 100 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 101 static __int64_t ipiq_passive; /* passive IPI messages */ 102 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 103 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 104 #ifdef PANIC_DEBUG 105 static int panic_ipiq_cpu = -1; 106 static int panic_ipiq_count = 100; 107 #endif 108 #endif 109 110 #ifdef _KERNEL 111 112 #ifdef SMP 113 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 115 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 116 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 117 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 118 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 119 #ifdef PANIC_DEBUG 120 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 121 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 122 #endif 123 124 #define IPIQ_STRING "func=%p arg=%p scpu=%d dcpu=%d" 125 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 2) 126 127 #if !defined(KTR_IPIQ) 128 #define KTR_IPIQ KTR_ALL 129 #endif 130 KTR_INFO_MASTER(ipiq); 131 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 132 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 133 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 134 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 135 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 136 137 #define logipiq(name, func, arg, sgd, dgd) \ 138 KTR_LOG(ipiq_ ## name, func, arg, sgd->gd_cpuid, dgd->gd_cpuid) 139 140 #endif /* SMP */ 141 #endif /* KERNEL */ 142 143 #ifdef SMP 144 145 static int lwkt_process_ipiq1(globaldata_t sgd, lwkt_ipiq_t ip, struct intrframe *frame); 146 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 147 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 148 149 /* 150 * Send a function execution request to another cpu. The request is queued 151 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 152 * possible target cpu. The FIFO can be written. 153 * 154 * If the FIFO fills up we have to enable interrupts to avoid an APIC 155 * deadlock and process pending IPIQs while waiting for it to empty. 156 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 157 * 158 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 159 * end will take care of any pending interrupts. 160 * 161 * The actual hardware IPI is avoided if the target cpu is already processing 162 * the queue from a prior IPI. It is possible to pipeline IPI messages 163 * very quickly between cpus due to the FIFO hysteresis. 164 * 165 * Need not be called from a critical section. 166 */ 167 int 168 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 169 { 170 lwkt_ipiq_t ip; 171 int windex; 172 struct globaldata *gd = mycpu; 173 174 logipiq(send_norm, func, arg, gd, target); 175 176 if (target == gd) { 177 func(arg); 178 return(0); 179 } 180 crit_enter(); 181 ++gd->gd_intr_nesting_level; 182 #ifdef INVARIANTS 183 if (gd->gd_intr_nesting_level > 20) 184 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 185 #endif 186 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 187 ++ipiq_count; 188 ip = &gd->gd_ipiq[target->gd_cpuid]; 189 190 /* 191 * Do not allow the FIFO to become full. Interrupts must be physically 192 * enabled while we liveloop to avoid deadlocking the APIC. 193 */ 194 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 195 unsigned int eflags = read_eflags(); 196 197 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 198 cpu_send_ipiq(target->gd_cpuid); 199 cpu_enable_intr(); 200 ++ipiq_fifofull; 201 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 202 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 203 lwkt_process_ipiq(); 204 } 205 write_eflags(eflags); 206 } 207 208 /* 209 * Queue the new message 210 */ 211 windex = ip->ip_windex & MAXCPUFIFO_MASK; 212 ip->ip_func[windex] = (ipifunc2_t)func; 213 ip->ip_arg[windex] = arg; 214 cpu_sfence(); 215 ++ip->ip_windex; 216 --gd->gd_intr_nesting_level; 217 218 /* 219 * signal the target cpu that there is work pending. 220 */ 221 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 222 cpu_send_ipiq(target->gd_cpuid); 223 } else { 224 if (ipiq_optimized == 0) 225 cpu_send_ipiq(target->gd_cpuid); 226 ++ipiq_avoided; 227 } 228 crit_exit(); 229 return(ip->ip_windex); 230 } 231 232 /* 233 * Similar to lwkt_send_ipiq() but this function does not actually initiate 234 * the IPI to the target cpu unless the FIFO has become too full, so it is 235 * very fast. 236 * 237 * This function is used for non-critical IPI messages, such as memory 238 * deallocations. The queue will typically be flushed by the target cpu at 239 * the next clock interrupt. 240 * 241 * Need not be called from a critical section. 242 */ 243 int 244 lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg) 245 { 246 lwkt_ipiq_t ip; 247 int windex; 248 struct globaldata *gd = mycpu; 249 250 KKASSERT(target != gd); 251 crit_enter(); 252 logipiq(send_pasv, func, arg, gd, target); 253 ++gd->gd_intr_nesting_level; 254 #ifdef INVARIANTS 255 if (gd->gd_intr_nesting_level > 20) 256 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 257 #endif 258 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 259 ++ipiq_count; 260 ++ipiq_passive; 261 ip = &gd->gd_ipiq[target->gd_cpuid]; 262 263 /* 264 * Do not allow the FIFO to become full. Interrupts must be physically 265 * enabled while we liveloop to avoid deadlocking the APIC. 266 */ 267 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 268 unsigned int eflags = read_eflags(); 269 270 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 271 cpu_send_ipiq(target->gd_cpuid); 272 cpu_enable_intr(); 273 ++ipiq_fifofull; 274 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 275 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 276 lwkt_process_ipiq(); 277 } 278 write_eflags(eflags); 279 } 280 281 /* 282 * Queue the new message 283 */ 284 windex = ip->ip_windex & MAXCPUFIFO_MASK; 285 ip->ip_func[windex] = (ipifunc2_t)func; 286 ip->ip_arg[windex] = arg; 287 cpu_sfence(); 288 ++ip->ip_windex; 289 --gd->gd_intr_nesting_level; 290 291 /* 292 * Do not signal the target cpu, it will pick up the IPI when it next 293 * polls (typically on the next tick). 294 */ 295 crit_exit(); 296 return(ip->ip_windex); 297 } 298 299 /* 300 * Send an IPI request without blocking, return 0 on success, ENOENT on 301 * failure. The actual queueing of the hardware IPI may still force us 302 * to spin and process incoming IPIs but that will eventually go away 303 * when we've gotten rid of the other general IPIs. 304 */ 305 int 306 lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg) 307 { 308 lwkt_ipiq_t ip; 309 int windex; 310 struct globaldata *gd = mycpu; 311 312 logipiq(send_nbio, func, arg, gd, target); 313 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 314 if (target == gd) { 315 func(arg); 316 return(0); 317 } 318 ++ipiq_count; 319 ip = &gd->gd_ipiq[target->gd_cpuid]; 320 321 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 322 logipiq(send_fail, func, arg, gd, target); 323 return(ENOENT); 324 } 325 windex = ip->ip_windex & MAXCPUFIFO_MASK; 326 ip->ip_func[windex] = (ipifunc2_t)func; 327 ip->ip_arg[windex] = arg; 328 cpu_sfence(); 329 ++ip->ip_windex; 330 331 /* 332 * This isn't a passive IPI, we still have to signal the target cpu. 333 */ 334 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 335 cpu_send_ipiq(target->gd_cpuid); 336 } else { 337 if (ipiq_optimized == 0) 338 cpu_send_ipiq(target->gd_cpuid); 339 else 340 ++ipiq_avoided; 341 } 342 return(0); 343 } 344 345 /* 346 * deprecated, used only by fast int forwarding. 347 */ 348 int 349 lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg) 350 { 351 return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg)); 352 } 353 354 /* 355 * Send a message to several target cpus. Typically used for scheduling. 356 * The message will not be sent to stopped cpus. 357 */ 358 int 359 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 360 { 361 int cpuid; 362 int count = 0; 363 364 mask &= ~stopped_cpus; 365 while (mask) { 366 cpuid = bsfl(mask); 367 lwkt_send_ipiq(globaldata_find(cpuid), func, arg); 368 mask &= ~(1 << cpuid); 369 ++count; 370 } 371 return(count); 372 } 373 374 /* 375 * Wait for the remote cpu to finish processing a function. 376 * 377 * YYY we have to enable interrupts and process the IPIQ while waiting 378 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 379 * function to do this! YYY we really should 'block' here. 380 * 381 * MUST be called from a critical section. This routine may be called 382 * from an interrupt (for example, if an interrupt wakes a foreign thread 383 * up). 384 */ 385 void 386 lwkt_wait_ipiq(globaldata_t target, int seq) 387 { 388 lwkt_ipiq_t ip; 389 int maxc = 100000000; 390 391 if (target != mycpu) { 392 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 393 if ((int)(ip->ip_xindex - seq) < 0) { 394 unsigned int eflags = read_eflags(); 395 cpu_enable_intr(); 396 while ((int)(ip->ip_xindex - seq) < 0) { 397 crit_enter(); 398 lwkt_process_ipiq(); 399 crit_exit(); 400 if (--maxc == 0) 401 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 402 if (maxc < -1000000) 403 panic("LWKT_WAIT_IPIQ"); 404 /* 405 * xindex may be modified by another cpu, use a load fence 406 * to ensure that the loop does not use a speculative value 407 * (which may improve performance). 408 */ 409 cpu_lfence(); 410 } 411 write_eflags(eflags); 412 } 413 } 414 } 415 416 int 417 lwkt_seq_ipiq(globaldata_t target) 418 { 419 lwkt_ipiq_t ip; 420 421 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 422 return(ip->ip_windex); 423 } 424 425 /* 426 * Called from IPI interrupt (like a fast interrupt), which has placed 427 * us in a critical section. The MP lock may or may not be held. 428 * May also be called from doreti or splz, or be reentrantly called 429 * indirectly through the ip_func[] we run. 430 * 431 * There are two versions, one where no interrupt frame is available (when 432 * called from the send code and from splz, and one where an interrupt 433 * frame is available. 434 */ 435 void 436 lwkt_process_ipiq(void) 437 { 438 globaldata_t gd = mycpu; 439 globaldata_t sgd; 440 lwkt_ipiq_t ip; 441 int n; 442 443 again: 444 for (n = 0; n < ncpus; ++n) { 445 if (n != gd->gd_cpuid) { 446 sgd = globaldata_find(n); 447 ip = sgd->gd_ipiq; 448 if (ip != NULL) { 449 while (lwkt_process_ipiq1(sgd, &ip[gd->gd_cpuid], NULL)) 450 ; 451 } 452 } 453 } 454 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 455 if (lwkt_process_ipiq1(gd, &gd->gd_cpusyncq, NULL)) { 456 if (gd->gd_curthread->td_cscount == 0) 457 goto again; 458 need_ipiq(); 459 } 460 } 461 } 462 463 #ifdef _KERNEL 464 void 465 lwkt_process_ipiq_frame(struct intrframe frame) 466 { 467 globaldata_t gd = mycpu; 468 globaldata_t sgd; 469 lwkt_ipiq_t ip; 470 int n; 471 472 again: 473 for (n = 0; n < ncpus; ++n) { 474 if (n != gd->gd_cpuid) { 475 sgd = globaldata_find(n); 476 ip = sgd->gd_ipiq; 477 if (ip != NULL) { 478 while (lwkt_process_ipiq1(sgd, &ip[gd->gd_cpuid], &frame)) 479 ; 480 } 481 } 482 } 483 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 484 if (lwkt_process_ipiq1(gd, &gd->gd_cpusyncq, &frame)) { 485 if (gd->gd_curthread->td_cscount == 0) 486 goto again; 487 need_ipiq(); 488 } 489 } 490 } 491 #endif 492 493 static int 494 lwkt_process_ipiq1(globaldata_t sgd, lwkt_ipiq_t ip, struct intrframe *frame) 495 { 496 int ri; 497 int wi; 498 void (*copy_func)(void *data, struct intrframe *frame); 499 void *copy_arg; 500 501 /* 502 * Obtain the current write index, which is modified by a remote cpu. 503 * Issue a load fence to prevent speculative reads of e.g. data written 504 * by the other cpu prior to it updating the index. 505 */ 506 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 507 wi = ip->ip_windex; 508 cpu_lfence(); 509 510 /* 511 * Note: xindex is only updated after we are sure the function has 512 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 513 * function may send an IPI which may block/drain. 514 * 515 * Note: due to additional IPI operations that the callback function 516 * may make, it is possible for both rindex and windex to advance and 517 * thus for rindex to advance passed our cached windex. 518 */ 519 while (wi - (ri = ip->ip_rindex) > 0) { 520 ri &= MAXCPUFIFO_MASK; 521 copy_func = ip->ip_func[ri]; 522 copy_arg = ip->ip_arg[ri]; 523 cpu_mfence(); 524 ++ip->ip_rindex; 525 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK)); 526 logipiq(receive, copy_func, copy_arg, sgd, mycpu); 527 copy_func(copy_arg, frame); 528 cpu_sfence(); 529 ip->ip_xindex = ip->ip_rindex; 530 531 #ifdef PANIC_DEBUG 532 /* 533 * Simulate panics during the processing of an IPI 534 */ 535 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 536 if (--panic_ipiq_count == 0) { 537 #ifdef DDB 538 Debugger("PANIC_DEBUG"); 539 #else 540 panic("PANIC_DEBUG"); 541 #endif 542 } 543 } 544 #endif 545 } 546 547 /* 548 * Return non-zero if there are more IPI messages pending on this 549 * ipiq. ip_npoll is left set as long as possible to reduce the 550 * number of IPIs queued by the originating cpu, but must be cleared 551 * *BEFORE* checking windex. 552 */ 553 atomic_poll_release_int(&ip->ip_npoll); 554 return(wi != ip->ip_windex); 555 } 556 557 #else 558 559 /* 560 * !SMP dummy routines 561 */ 562 563 int 564 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 565 { 566 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); 567 return(0); /* NOT REACHED */ 568 } 569 570 void 571 lwkt_wait_ipiq(globaldata_t target, int seq) 572 { 573 panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); 574 } 575 576 #endif 577 578 /* 579 * CPU Synchronization Support 580 * 581 * lwkt_cpusync_simple() 582 * 583 * The function is executed synchronously before return on remote cpus. 584 * A lwkt_cpusync_t pointer is passed as an argument. The data can 585 * be accessed via arg->cs_data. 586 * 587 * XXX should I just pass the data as an argument to be consistent? 588 */ 589 590 void 591 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 592 { 593 struct lwkt_cpusync cmd; 594 595 cmd.cs_run_func = NULL; 596 cmd.cs_fin1_func = func; 597 cmd.cs_fin2_func = NULL; 598 cmd.cs_data = data; 599 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 600 if (mask & (1 << mycpu->gd_cpuid)) 601 func(&cmd); 602 lwkt_cpusync_finish(&cmd); 603 } 604 605 /* 606 * lwkt_cpusync_fastdata() 607 * 608 * The function is executed in tandem with return on remote cpus. 609 * The data is directly passed as an argument. Do not pass pointers to 610 * temporary storage as the storage might have 611 * gone poof by the time the target cpu executes 612 * the function. 613 * 614 * At the moment lwkt_cpusync is declared on the stack and we must wait 615 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 616 * optimization we should be able to put a counter in the globaldata 617 * structure (if it is not otherwise being used) and just poke it and 618 * return without waiting. XXX 619 */ 620 void 621 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 622 { 623 struct lwkt_cpusync cmd; 624 625 cmd.cs_run_func = NULL; 626 cmd.cs_fin1_func = NULL; 627 cmd.cs_fin2_func = func; 628 cmd.cs_data = NULL; 629 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 630 if (mask & (1 << mycpu->gd_cpuid)) 631 func(data); 632 lwkt_cpusync_finish(&cmd); 633 } 634 635 /* 636 * lwkt_cpusync_start() 637 * 638 * Start synchronization with a set of target cpus, return once they are 639 * known to be in a synchronization loop. The target cpus will execute 640 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 641 * 642 * XXX future: add lwkt_cpusync_start_quick() and require a call to 643 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 644 * potentially absorb the IPI latency doing something useful. 645 */ 646 void 647 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 648 { 649 globaldata_t gd = mycpu; 650 651 poll->cs_count = 0; 652 poll->cs_mask = mask; 653 #ifdef SMP 654 poll->cs_maxcount = lwkt_send_ipiq_mask( 655 mask & gd->gd_other_cpus & smp_active_mask, 656 (ipifunc_t)lwkt_cpusync_remote1, poll); 657 #endif 658 if (mask & gd->gd_cpumask) { 659 if (poll->cs_run_func) 660 poll->cs_run_func(poll); 661 } 662 #ifdef SMP 663 if (poll->cs_maxcount) { 664 ++ipiq_cscount; 665 ++gd->gd_curthread->td_cscount; 666 while (poll->cs_count != poll->cs_maxcount) { 667 crit_enter(); 668 lwkt_process_ipiq(); 669 crit_exit(); 670 } 671 } 672 #endif 673 } 674 675 void 676 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 677 { 678 globaldata_t gd = mycpu; 679 #ifdef SMP 680 int count; 681 #endif 682 683 mask &= ~poll->cs_mask; 684 poll->cs_mask |= mask; 685 #ifdef SMP 686 count = lwkt_send_ipiq_mask( 687 mask & gd->gd_other_cpus & smp_active_mask, 688 (ipifunc_t)lwkt_cpusync_remote1, poll); 689 #endif 690 if (mask & gd->gd_cpumask) { 691 if (poll->cs_run_func) 692 poll->cs_run_func(poll); 693 } 694 #ifdef SMP 695 poll->cs_maxcount += count; 696 if (poll->cs_maxcount) { 697 if (poll->cs_maxcount == count) 698 ++gd->gd_curthread->td_cscount; 699 while (poll->cs_count != poll->cs_maxcount) { 700 crit_enter(); 701 lwkt_process_ipiq(); 702 crit_exit(); 703 } 704 } 705 #endif 706 } 707 708 /* 709 * Finish synchronization with a set of target cpus. The target cpus will 710 * execute cs_fin1_func(poll) prior to this function returning, and will 711 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 712 * 713 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 714 * more remote cpus and must account for it in our thread structure. 715 */ 716 void 717 lwkt_cpusync_finish(lwkt_cpusync_t poll) 718 { 719 globaldata_t gd = mycpu; 720 721 poll->cs_count = -1; 722 if (poll->cs_mask & gd->gd_cpumask) { 723 if (poll->cs_fin1_func) 724 poll->cs_fin1_func(poll); 725 if (poll->cs_fin2_func) 726 poll->cs_fin2_func(poll->cs_data); 727 } 728 #ifdef SMP 729 if (poll->cs_maxcount) { 730 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 731 crit_enter(); 732 lwkt_process_ipiq(); 733 crit_exit(); 734 } 735 --gd->gd_curthread->td_cscount; 736 } 737 #endif 738 } 739 740 #ifdef SMP 741 742 /* 743 * helper IPI remote messaging function. 744 * 745 * Called on remote cpu when a new cpu synchronization request has been 746 * sent to us. Execute the run function and adjust cs_count, then requeue 747 * the request so we spin on it. 748 */ 749 static void 750 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 751 { 752 atomic_add_int(&poll->cs_count, 1); 753 if (poll->cs_run_func) 754 poll->cs_run_func(poll); 755 lwkt_cpusync_remote2(poll); 756 } 757 758 /* 759 * helper IPI remote messaging function. 760 * 761 * Poll for the originator telling us to finish. If it hasn't, requeue 762 * our request so we spin on it. When the originator requests that we 763 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 764 * in tandem with the release. 765 */ 766 static void 767 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 768 { 769 if (poll->cs_count < 0) { 770 cpusync_func2_t savef; 771 void *saved; 772 773 if (poll->cs_fin1_func) 774 poll->cs_fin1_func(poll); 775 if (poll->cs_fin2_func) { 776 savef = poll->cs_fin2_func; 777 saved = poll->cs_data; 778 atomic_add_int(&poll->cs_count, -1); 779 savef(saved); 780 } else { 781 atomic_add_int(&poll->cs_count, -1); 782 } 783 } else { 784 globaldata_t gd = mycpu; 785 lwkt_ipiq_t ip; 786 int wi; 787 788 ip = &gd->gd_cpusyncq; 789 wi = ip->ip_windex & MAXCPUFIFO_MASK; 790 ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2; 791 ip->ip_arg[wi] = poll; 792 cpu_sfence(); 793 ++ip->ip_windex; 794 } 795 } 796 797 #endif 798