1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.21 2007/01/22 19:37:04 corecode Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include "opt_ddb.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/rtprio.h> 51 #include <sys/queue.h> 52 #include <sys/thread2.h> 53 #include <sys/sysctl.h> 54 #include <sys/ktr.h> 55 #include <sys/kthread.h> 56 #include <machine/cpu.h> 57 #include <sys/lock.h> 58 #include <sys/caps.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/vm_kern.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_zone.h> 69 70 #include <machine/stdarg.h> 71 #include <machine/smp.h> 72 #include <machine/atomic.h> 73 74 #else 75 76 #include <sys/stdint.h> 77 #include <libcaps/thread.h> 78 #include <sys/thread.h> 79 #include <sys/msgport.h> 80 #include <sys/errno.h> 81 #include <libcaps/globaldata.h> 82 #include <machine/cpufunc.h> 83 #include <sys/thread2.h> 84 #include <sys/msgport2.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <machine/lock.h> 89 #include <machine/cpu.h> 90 #include <machine/atomic.h> 91 92 #endif 93 94 #ifdef SMP 95 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 96 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 97 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 98 static __int64_t ipiq_passive; /* passive IPI messages */ 99 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 100 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 101 #ifdef PANIC_DEBUG 102 static int panic_ipiq_cpu = -1; 103 static int panic_ipiq_count = 100; 104 #endif 105 #endif 106 107 #ifdef _KERNEL 108 109 #ifdef SMP 110 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 112 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 113 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 115 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 116 #ifdef PANIC_DEBUG 117 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 118 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 119 #endif 120 121 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 122 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 2) 123 124 #if !defined(KTR_IPIQ) 125 #define KTR_IPIQ KTR_ALL 126 #endif 127 KTR_INFO_MASTER(ipiq); 128 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 129 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 130 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 131 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 132 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 133 134 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 135 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 136 137 #endif /* SMP */ 138 #endif /* KERNEL */ 139 140 #ifdef SMP 141 142 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 143 struct intrframe *frame); 144 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 145 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 146 147 /* 148 * Send a function execution request to another cpu. The request is queued 149 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 150 * possible target cpu. The FIFO can be written. 151 * 152 * If the FIFO fills up we have to enable interrupts to avoid an APIC 153 * deadlock and process pending IPIQs while waiting for it to empty. 154 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 155 * 156 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 157 * end will take care of any pending interrupts. 158 * 159 * The actual hardware IPI is avoided if the target cpu is already processing 160 * the queue from a prior IPI. It is possible to pipeline IPI messages 161 * very quickly between cpus due to the FIFO hysteresis. 162 * 163 * Need not be called from a critical section. 164 */ 165 int 166 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 167 { 168 lwkt_ipiq_t ip; 169 int windex; 170 struct globaldata *gd = mycpu; 171 172 logipiq(send_norm, func, arg1, arg2, gd, target); 173 174 if (target == gd) { 175 func(arg1, arg2, NULL); 176 return(0); 177 } 178 crit_enter(); 179 ++gd->gd_intr_nesting_level; 180 #ifdef INVARIANTS 181 if (gd->gd_intr_nesting_level > 20) 182 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 183 #endif 184 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 185 ++ipiq_count; 186 ip = &gd->gd_ipiq[target->gd_cpuid]; 187 188 /* 189 * Do not allow the FIFO to become full. Interrupts must be physically 190 * enabled while we liveloop to avoid deadlocking the APIC. 191 */ 192 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 193 unsigned int eflags = read_eflags(); 194 195 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 196 cpu_send_ipiq(target->gd_cpuid); 197 cpu_enable_intr(); 198 ++ipiq_fifofull; 199 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 200 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 201 lwkt_process_ipiq(); 202 } 203 write_eflags(eflags); 204 } 205 206 /* 207 * Queue the new message 208 */ 209 windex = ip->ip_windex & MAXCPUFIFO_MASK; 210 ip->ip_func[windex] = func; 211 ip->ip_arg1[windex] = arg1; 212 ip->ip_arg2[windex] = arg2; 213 cpu_sfence(); 214 ++ip->ip_windex; 215 --gd->gd_intr_nesting_level; 216 217 /* 218 * signal the target cpu that there is work pending. 219 */ 220 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 221 cpu_send_ipiq(target->gd_cpuid); 222 } else { 223 if (ipiq_optimized == 0) 224 cpu_send_ipiq(target->gd_cpuid); 225 ++ipiq_avoided; 226 } 227 crit_exit(); 228 return(ip->ip_windex); 229 } 230 231 /* 232 * Similar to lwkt_send_ipiq() but this function does not actually initiate 233 * the IPI to the target cpu unless the FIFO has become too full, so it is 234 * very fast. 235 * 236 * This function is used for non-critical IPI messages, such as memory 237 * deallocations. The queue will typically be flushed by the target cpu at 238 * the next clock interrupt. 239 * 240 * Need not be called from a critical section. 241 */ 242 int 243 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 244 void *arg1, int arg2) 245 { 246 lwkt_ipiq_t ip; 247 int windex; 248 struct globaldata *gd = mycpu; 249 250 KKASSERT(target != gd); 251 crit_enter(); 252 logipiq(send_pasv, func, arg1, arg2, gd, target); 253 ++gd->gd_intr_nesting_level; 254 #ifdef INVARIANTS 255 if (gd->gd_intr_nesting_level > 20) 256 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 257 #endif 258 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 259 ++ipiq_count; 260 ++ipiq_passive; 261 ip = &gd->gd_ipiq[target->gd_cpuid]; 262 263 /* 264 * Do not allow the FIFO to become full. Interrupts must be physically 265 * enabled while we liveloop to avoid deadlocking the APIC. 266 */ 267 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 268 unsigned int eflags = read_eflags(); 269 270 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 271 cpu_send_ipiq(target->gd_cpuid); 272 cpu_enable_intr(); 273 ++ipiq_fifofull; 274 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 275 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 276 lwkt_process_ipiq(); 277 } 278 write_eflags(eflags); 279 } 280 281 /* 282 * Queue the new message 283 */ 284 windex = ip->ip_windex & MAXCPUFIFO_MASK; 285 ip->ip_func[windex] = func; 286 ip->ip_arg1[windex] = arg1; 287 ip->ip_arg2[windex] = arg2; 288 cpu_sfence(); 289 ++ip->ip_windex; 290 --gd->gd_intr_nesting_level; 291 292 /* 293 * Do not signal the target cpu, it will pick up the IPI when it next 294 * polls (typically on the next tick). 295 */ 296 crit_exit(); 297 return(ip->ip_windex); 298 } 299 300 /* 301 * Send an IPI request without blocking, return 0 on success, ENOENT on 302 * failure. The actual queueing of the hardware IPI may still force us 303 * to spin and process incoming IPIs but that will eventually go away 304 * when we've gotten rid of the other general IPIs. 305 */ 306 int 307 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 308 void *arg1, int arg2) 309 { 310 lwkt_ipiq_t ip; 311 int windex; 312 struct globaldata *gd = mycpu; 313 314 logipiq(send_nbio, func, arg1, arg2, gd, target); 315 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 316 if (target == gd) { 317 func(arg1, arg2, NULL); 318 return(0); 319 } 320 ++ipiq_count; 321 ip = &gd->gd_ipiq[target->gd_cpuid]; 322 323 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 324 logipiq(send_fail, func, arg1, arg2, gd, target); 325 return(ENOENT); 326 } 327 windex = ip->ip_windex & MAXCPUFIFO_MASK; 328 ip->ip_func[windex] = func; 329 ip->ip_arg1[windex] = arg1; 330 ip->ip_arg2[windex] = arg2; 331 cpu_sfence(); 332 ++ip->ip_windex; 333 334 /* 335 * This isn't a passive IPI, we still have to signal the target cpu. 336 */ 337 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 338 cpu_send_ipiq(target->gd_cpuid); 339 } else { 340 if (ipiq_optimized == 0) 341 cpu_send_ipiq(target->gd_cpuid); 342 else 343 ++ipiq_avoided; 344 } 345 return(0); 346 } 347 348 /* 349 * deprecated, used only by fast int forwarding. 350 */ 351 int 352 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 353 { 354 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 355 } 356 357 /* 358 * Send a message to several target cpus. Typically used for scheduling. 359 * The message will not be sent to stopped cpus. 360 */ 361 int 362 lwkt_send_ipiq3_mask(u_int32_t mask, ipifunc3_t func, void *arg1, int arg2) 363 { 364 int cpuid; 365 int count = 0; 366 367 mask &= ~stopped_cpus; 368 while (mask) { 369 cpuid = bsfl(mask); 370 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 371 mask &= ~(1 << cpuid); 372 ++count; 373 } 374 return(count); 375 } 376 377 /* 378 * Wait for the remote cpu to finish processing a function. 379 * 380 * YYY we have to enable interrupts and process the IPIQ while waiting 381 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 382 * function to do this! YYY we really should 'block' here. 383 * 384 * MUST be called from a critical section. This routine may be called 385 * from an interrupt (for example, if an interrupt wakes a foreign thread 386 * up). 387 */ 388 void 389 lwkt_wait_ipiq(globaldata_t target, int seq) 390 { 391 lwkt_ipiq_t ip; 392 int maxc = 100000000; 393 394 if (target != mycpu) { 395 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 396 if ((int)(ip->ip_xindex - seq) < 0) { 397 unsigned int eflags = read_eflags(); 398 cpu_enable_intr(); 399 while ((int)(ip->ip_xindex - seq) < 0) { 400 crit_enter(); 401 lwkt_process_ipiq(); 402 crit_exit(); 403 if (--maxc == 0) 404 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 405 if (maxc < -1000000) 406 panic("LWKT_WAIT_IPIQ"); 407 /* 408 * xindex may be modified by another cpu, use a load fence 409 * to ensure that the loop does not use a speculative value 410 * (which may improve performance). 411 */ 412 cpu_lfence(); 413 } 414 write_eflags(eflags); 415 } 416 } 417 } 418 419 int 420 lwkt_seq_ipiq(globaldata_t target) 421 { 422 lwkt_ipiq_t ip; 423 424 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 425 return(ip->ip_windex); 426 } 427 428 /* 429 * Called from IPI interrupt (like a fast interrupt), which has placed 430 * us in a critical section. The MP lock may or may not be held. 431 * May also be called from doreti or splz, or be reentrantly called 432 * indirectly through the ip_func[] we run. 433 * 434 * There are two versions, one where no interrupt frame is available (when 435 * called from the send code and from splz, and one where an interrupt 436 * frame is available. 437 */ 438 void 439 lwkt_process_ipiq(void) 440 { 441 globaldata_t gd = mycpu; 442 globaldata_t sgd; 443 lwkt_ipiq_t ip; 444 int n; 445 446 again: 447 for (n = 0; n < ncpus; ++n) { 448 if (n != gd->gd_cpuid) { 449 sgd = globaldata_find(n); 450 ip = sgd->gd_ipiq; 451 if (ip != NULL) { 452 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 453 ; 454 } 455 } 456 } 457 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 458 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 459 if (gd->gd_curthread->td_cscount == 0) 460 goto again; 461 need_ipiq(); 462 } 463 } 464 } 465 466 #ifdef _KERNEL 467 void 468 lwkt_process_ipiq_frame(struct intrframe *frame) 469 { 470 globaldata_t gd = mycpu; 471 globaldata_t sgd; 472 lwkt_ipiq_t ip; 473 int n; 474 475 again: 476 for (n = 0; n < ncpus; ++n) { 477 if (n != gd->gd_cpuid) { 478 sgd = globaldata_find(n); 479 ip = sgd->gd_ipiq; 480 if (ip != NULL) { 481 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 482 ; 483 } 484 } 485 } 486 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 487 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 488 if (gd->gd_curthread->td_cscount == 0) 489 goto again; 490 need_ipiq(); 491 } 492 } 493 } 494 #endif 495 496 static int 497 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 498 struct intrframe *frame) 499 { 500 int ri; 501 int wi; 502 ipifunc3_t copy_func; 503 void *copy_arg1; 504 int copy_arg2; 505 506 /* 507 * Obtain the current write index, which is modified by a remote cpu. 508 * Issue a load fence to prevent speculative reads of e.g. data written 509 * by the other cpu prior to it updating the index. 510 */ 511 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 512 wi = ip->ip_windex; 513 cpu_lfence(); 514 515 /* 516 * Note: xindex is only updated after we are sure the function has 517 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 518 * function may send an IPI which may block/drain. 519 * 520 * Note: due to additional IPI operations that the callback function 521 * may make, it is possible for both rindex and windex to advance and 522 * thus for rindex to advance passed our cached windex. 523 */ 524 while (wi - (ri = ip->ip_rindex) > 0) { 525 ri &= MAXCPUFIFO_MASK; 526 copy_func = ip->ip_func[ri]; 527 copy_arg1 = ip->ip_arg1[ri]; 528 copy_arg2 = ip->ip_arg2[ri]; 529 cpu_mfence(); 530 ++ip->ip_rindex; 531 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK)); 532 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 533 copy_func(copy_arg1, copy_arg2, frame); 534 cpu_sfence(); 535 ip->ip_xindex = ip->ip_rindex; 536 537 #ifdef PANIC_DEBUG 538 /* 539 * Simulate panics during the processing of an IPI 540 */ 541 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 542 if (--panic_ipiq_count == 0) { 543 #ifdef DDB 544 Debugger("PANIC_DEBUG"); 545 #else 546 panic("PANIC_DEBUG"); 547 #endif 548 } 549 } 550 #endif 551 } 552 553 /* 554 * Return non-zero if there are more IPI messages pending on this 555 * ipiq. ip_npoll is left set as long as possible to reduce the 556 * number of IPIs queued by the originating cpu, but must be cleared 557 * *BEFORE* checking windex. 558 */ 559 atomic_poll_release_int(&ip->ip_npoll); 560 return(wi != ip->ip_windex); 561 } 562 563 #endif 564 565 /* 566 * CPU Synchronization Support 567 * 568 * lwkt_cpusync_simple() 569 * 570 * The function is executed synchronously before return on remote cpus. 571 * A lwkt_cpusync_t pointer is passed as an argument. The data can 572 * be accessed via arg->cs_data. 573 * 574 * XXX should I just pass the data as an argument to be consistent? 575 */ 576 577 void 578 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 579 { 580 struct lwkt_cpusync cmd; 581 582 cmd.cs_run_func = NULL; 583 cmd.cs_fin1_func = func; 584 cmd.cs_fin2_func = NULL; 585 cmd.cs_data = data; 586 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 587 if (mask & (1 << mycpu->gd_cpuid)) 588 func(&cmd); 589 lwkt_cpusync_finish(&cmd); 590 } 591 592 /* 593 * lwkt_cpusync_fastdata() 594 * 595 * The function is executed in tandem with return on remote cpus. 596 * The data is directly passed as an argument. Do not pass pointers to 597 * temporary storage as the storage might have 598 * gone poof by the time the target cpu executes 599 * the function. 600 * 601 * At the moment lwkt_cpusync is declared on the stack and we must wait 602 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 603 * optimization we should be able to put a counter in the globaldata 604 * structure (if it is not otherwise being used) and just poke it and 605 * return without waiting. XXX 606 */ 607 void 608 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 609 { 610 struct lwkt_cpusync cmd; 611 612 cmd.cs_run_func = NULL; 613 cmd.cs_fin1_func = NULL; 614 cmd.cs_fin2_func = func; 615 cmd.cs_data = NULL; 616 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 617 if (mask & (1 << mycpu->gd_cpuid)) 618 func(data); 619 lwkt_cpusync_finish(&cmd); 620 } 621 622 /* 623 * lwkt_cpusync_start() 624 * 625 * Start synchronization with a set of target cpus, return once they are 626 * known to be in a synchronization loop. The target cpus will execute 627 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 628 * 629 * XXX future: add lwkt_cpusync_start_quick() and require a call to 630 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 631 * potentially absorb the IPI latency doing something useful. 632 */ 633 void 634 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 635 { 636 globaldata_t gd = mycpu; 637 638 poll->cs_count = 0; 639 poll->cs_mask = mask; 640 #ifdef SMP 641 poll->cs_maxcount = lwkt_send_ipiq_mask( 642 mask & gd->gd_other_cpus & smp_active_mask, 643 (ipifunc1_t)lwkt_cpusync_remote1, poll); 644 #endif 645 if (mask & gd->gd_cpumask) { 646 if (poll->cs_run_func) 647 poll->cs_run_func(poll); 648 } 649 #ifdef SMP 650 if (poll->cs_maxcount) { 651 ++ipiq_cscount; 652 ++gd->gd_curthread->td_cscount; 653 while (poll->cs_count != poll->cs_maxcount) { 654 crit_enter(); 655 lwkt_process_ipiq(); 656 crit_exit(); 657 } 658 } 659 #endif 660 } 661 662 void 663 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 664 { 665 globaldata_t gd = mycpu; 666 #ifdef SMP 667 int count; 668 #endif 669 670 mask &= ~poll->cs_mask; 671 poll->cs_mask |= mask; 672 #ifdef SMP 673 count = lwkt_send_ipiq_mask( 674 mask & gd->gd_other_cpus & smp_active_mask, 675 (ipifunc1_t)lwkt_cpusync_remote1, poll); 676 #endif 677 if (mask & gd->gd_cpumask) { 678 if (poll->cs_run_func) 679 poll->cs_run_func(poll); 680 } 681 #ifdef SMP 682 poll->cs_maxcount += count; 683 if (poll->cs_maxcount) { 684 if (poll->cs_maxcount == count) 685 ++gd->gd_curthread->td_cscount; 686 while (poll->cs_count != poll->cs_maxcount) { 687 crit_enter(); 688 lwkt_process_ipiq(); 689 crit_exit(); 690 } 691 } 692 #endif 693 } 694 695 /* 696 * Finish synchronization with a set of target cpus. The target cpus will 697 * execute cs_fin1_func(poll) prior to this function returning, and will 698 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 699 * 700 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 701 * more remote cpus and must account for it in our thread structure. 702 */ 703 void 704 lwkt_cpusync_finish(lwkt_cpusync_t poll) 705 { 706 globaldata_t gd = mycpu; 707 708 poll->cs_count = -1; 709 if (poll->cs_mask & gd->gd_cpumask) { 710 if (poll->cs_fin1_func) 711 poll->cs_fin1_func(poll); 712 if (poll->cs_fin2_func) 713 poll->cs_fin2_func(poll->cs_data); 714 } 715 #ifdef SMP 716 if (poll->cs_maxcount) { 717 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 718 crit_enter(); 719 lwkt_process_ipiq(); 720 crit_exit(); 721 } 722 --gd->gd_curthread->td_cscount; 723 } 724 #endif 725 } 726 727 #ifdef SMP 728 729 /* 730 * helper IPI remote messaging function. 731 * 732 * Called on remote cpu when a new cpu synchronization request has been 733 * sent to us. Execute the run function and adjust cs_count, then requeue 734 * the request so we spin on it. 735 */ 736 static void 737 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 738 { 739 atomic_add_int(&poll->cs_count, 1); 740 if (poll->cs_run_func) 741 poll->cs_run_func(poll); 742 lwkt_cpusync_remote2(poll); 743 } 744 745 /* 746 * helper IPI remote messaging function. 747 * 748 * Poll for the originator telling us to finish. If it hasn't, requeue 749 * our request so we spin on it. When the originator requests that we 750 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 751 * in tandem with the release. 752 */ 753 static void 754 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 755 { 756 if (poll->cs_count < 0) { 757 cpusync_func2_t savef; 758 void *saved; 759 760 if (poll->cs_fin1_func) 761 poll->cs_fin1_func(poll); 762 if (poll->cs_fin2_func) { 763 savef = poll->cs_fin2_func; 764 saved = poll->cs_data; 765 atomic_add_int(&poll->cs_count, -1); 766 savef(saved); 767 } else { 768 atomic_add_int(&poll->cs_count, -1); 769 } 770 } else { 771 globaldata_t gd = mycpu; 772 lwkt_ipiq_t ip; 773 int wi; 774 775 ip = &gd->gd_cpusyncq; 776 wi = ip->ip_windex & MAXCPUFIFO_MASK; 777 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 778 ip->ip_arg1[wi] = poll; 779 ip->ip_arg2[wi] = 0; 780 cpu_sfence(); 781 ++ip->ip_windex; 782 } 783 } 784 785 #endif 786