1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.22 2007/06/07 20:35:54 dillon Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include "opt_ddb.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/rtprio.h> 51 #include <sys/queue.h> 52 #include <sys/thread2.h> 53 #include <sys/sysctl.h> 54 #include <sys/ktr.h> 55 #include <sys/kthread.h> 56 #include <machine/cpu.h> 57 #include <sys/lock.h> 58 #include <sys/caps.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/vm_kern.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_zone.h> 69 70 #include <machine/stdarg.h> 71 #include <machine/smp.h> 72 #include <machine/atomic.h> 73 74 #else 75 76 #include <sys/stdint.h> 77 #include <libcaps/thread.h> 78 #include <sys/thread.h> 79 #include <sys/msgport.h> 80 #include <sys/errno.h> 81 #include <libcaps/globaldata.h> 82 #include <machine/cpufunc.h> 83 #include <sys/thread2.h> 84 #include <sys/msgport2.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <machine/lock.h> 89 #include <machine/cpu.h> 90 #include <machine/atomic.h> 91 92 #endif 93 94 #ifdef SMP 95 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 96 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 97 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 98 static __int64_t ipiq_passive; /* passive IPI messages */ 99 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 100 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 101 #ifdef PANIC_DEBUG 102 static int panic_ipiq_cpu = -1; 103 static int panic_ipiq_count = 100; 104 #endif 105 #endif 106 107 #ifdef _KERNEL 108 109 #ifdef SMP 110 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 112 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 113 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 115 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 116 #ifdef PANIC_DEBUG 117 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 118 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 119 #endif 120 121 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 122 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 2) 123 124 #if !defined(KTR_IPIQ) 125 #define KTR_IPIQ KTR_ALL 126 #endif 127 KTR_INFO_MASTER(ipiq); 128 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 129 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 130 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 131 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 132 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 133 KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t)); 134 KTR_INFO(KTR_IPIQ, ipiq, sync_add, 6, "cpumask=%08x", sizeof(cpumask_t)); 135 136 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 137 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 138 #define logipiq2(name, arg) \ 139 KTR_LOG(ipiq_ ## name, arg) 140 141 #endif /* SMP */ 142 #endif /* KERNEL */ 143 144 #ifdef SMP 145 146 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 147 struct intrframe *frame); 148 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 149 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 150 151 /* 152 * Send a function execution request to another cpu. The request is queued 153 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 154 * possible target cpu. The FIFO can be written. 155 * 156 * If the FIFO fills up we have to enable interrupts to avoid an APIC 157 * deadlock and process pending IPIQs while waiting for it to empty. 158 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 159 * 160 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 161 * end will take care of any pending interrupts. 162 * 163 * The actual hardware IPI is avoided if the target cpu is already processing 164 * the queue from a prior IPI. It is possible to pipeline IPI messages 165 * very quickly between cpus due to the FIFO hysteresis. 166 * 167 * Need not be called from a critical section. 168 */ 169 int 170 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 171 { 172 lwkt_ipiq_t ip; 173 int windex; 174 struct globaldata *gd = mycpu; 175 176 logipiq(send_norm, func, arg1, arg2, gd, target); 177 178 if (target == gd) { 179 func(arg1, arg2, NULL); 180 return(0); 181 } 182 crit_enter(); 183 ++gd->gd_intr_nesting_level; 184 #ifdef INVARIANTS 185 if (gd->gd_intr_nesting_level > 20) 186 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 187 #endif 188 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 189 ++ipiq_count; 190 ip = &gd->gd_ipiq[target->gd_cpuid]; 191 192 /* 193 * Do not allow the FIFO to become full. Interrupts must be physically 194 * enabled while we liveloop to avoid deadlocking the APIC. 195 */ 196 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 197 unsigned int eflags = read_eflags(); 198 199 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 200 cpu_send_ipiq(target->gd_cpuid); 201 cpu_enable_intr(); 202 ++ipiq_fifofull; 203 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 204 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 205 lwkt_process_ipiq(); 206 } 207 write_eflags(eflags); 208 } 209 210 /* 211 * Queue the new message 212 */ 213 windex = ip->ip_windex & MAXCPUFIFO_MASK; 214 ip->ip_func[windex] = func; 215 ip->ip_arg1[windex] = arg1; 216 ip->ip_arg2[windex] = arg2; 217 cpu_sfence(); 218 ++ip->ip_windex; 219 --gd->gd_intr_nesting_level; 220 221 /* 222 * signal the target cpu that there is work pending. 223 */ 224 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 225 cpu_send_ipiq(target->gd_cpuid); 226 } else { 227 if (ipiq_optimized == 0) 228 cpu_send_ipiq(target->gd_cpuid); 229 ++ipiq_avoided; 230 } 231 crit_exit(); 232 return(ip->ip_windex); 233 } 234 235 /* 236 * Similar to lwkt_send_ipiq() but this function does not actually initiate 237 * the IPI to the target cpu unless the FIFO has become too full, so it is 238 * very fast. 239 * 240 * This function is used for non-critical IPI messages, such as memory 241 * deallocations. The queue will typically be flushed by the target cpu at 242 * the next clock interrupt. 243 * 244 * Need not be called from a critical section. 245 */ 246 int 247 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 248 void *arg1, int arg2) 249 { 250 lwkt_ipiq_t ip; 251 int windex; 252 struct globaldata *gd = mycpu; 253 254 KKASSERT(target != gd); 255 crit_enter(); 256 logipiq(send_pasv, func, arg1, arg2, gd, target); 257 ++gd->gd_intr_nesting_level; 258 #ifdef INVARIANTS 259 if (gd->gd_intr_nesting_level > 20) 260 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 261 #endif 262 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 263 ++ipiq_count; 264 ++ipiq_passive; 265 ip = &gd->gd_ipiq[target->gd_cpuid]; 266 267 /* 268 * Do not allow the FIFO to become full. Interrupts must be physically 269 * enabled while we liveloop to avoid deadlocking the APIC. 270 */ 271 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 272 unsigned int eflags = read_eflags(); 273 274 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 275 cpu_send_ipiq(target->gd_cpuid); 276 cpu_enable_intr(); 277 ++ipiq_fifofull; 278 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 279 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 280 lwkt_process_ipiq(); 281 } 282 write_eflags(eflags); 283 } 284 285 /* 286 * Queue the new message 287 */ 288 windex = ip->ip_windex & MAXCPUFIFO_MASK; 289 ip->ip_func[windex] = func; 290 ip->ip_arg1[windex] = arg1; 291 ip->ip_arg2[windex] = arg2; 292 cpu_sfence(); 293 ++ip->ip_windex; 294 --gd->gd_intr_nesting_level; 295 296 /* 297 * Do not signal the target cpu, it will pick up the IPI when it next 298 * polls (typically on the next tick). 299 */ 300 crit_exit(); 301 return(ip->ip_windex); 302 } 303 304 /* 305 * Send an IPI request without blocking, return 0 on success, ENOENT on 306 * failure. The actual queueing of the hardware IPI may still force us 307 * to spin and process incoming IPIs but that will eventually go away 308 * when we've gotten rid of the other general IPIs. 309 */ 310 int 311 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 312 void *arg1, int arg2) 313 { 314 lwkt_ipiq_t ip; 315 int windex; 316 struct globaldata *gd = mycpu; 317 318 logipiq(send_nbio, func, arg1, arg2, gd, target); 319 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 320 if (target == gd) { 321 func(arg1, arg2, NULL); 322 return(0); 323 } 324 ++ipiq_count; 325 ip = &gd->gd_ipiq[target->gd_cpuid]; 326 327 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 328 logipiq(send_fail, func, arg1, arg2, gd, target); 329 return(ENOENT); 330 } 331 windex = ip->ip_windex & MAXCPUFIFO_MASK; 332 ip->ip_func[windex] = func; 333 ip->ip_arg1[windex] = arg1; 334 ip->ip_arg2[windex] = arg2; 335 cpu_sfence(); 336 ++ip->ip_windex; 337 338 /* 339 * This isn't a passive IPI, we still have to signal the target cpu. 340 */ 341 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 342 cpu_send_ipiq(target->gd_cpuid); 343 } else { 344 if (ipiq_optimized == 0) 345 cpu_send_ipiq(target->gd_cpuid); 346 else 347 ++ipiq_avoided; 348 } 349 return(0); 350 } 351 352 /* 353 * deprecated, used only by fast int forwarding. 354 */ 355 int 356 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 357 { 358 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 359 } 360 361 /* 362 * Send a message to several target cpus. Typically used for scheduling. 363 * The message will not be sent to stopped cpus. 364 */ 365 int 366 lwkt_send_ipiq3_mask(u_int32_t mask, ipifunc3_t func, void *arg1, int arg2) 367 { 368 int cpuid; 369 int count = 0; 370 371 mask &= ~stopped_cpus; 372 while (mask) { 373 cpuid = bsfl(mask); 374 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 375 mask &= ~(1 << cpuid); 376 ++count; 377 } 378 return(count); 379 } 380 381 /* 382 * Wait for the remote cpu to finish processing a function. 383 * 384 * YYY we have to enable interrupts and process the IPIQ while waiting 385 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 386 * function to do this! YYY we really should 'block' here. 387 * 388 * MUST be called from a critical section. This routine may be called 389 * from an interrupt (for example, if an interrupt wakes a foreign thread 390 * up). 391 */ 392 void 393 lwkt_wait_ipiq(globaldata_t target, int seq) 394 { 395 lwkt_ipiq_t ip; 396 int maxc = 100000000; 397 398 if (target != mycpu) { 399 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 400 if ((int)(ip->ip_xindex - seq) < 0) { 401 unsigned int eflags = read_eflags(); 402 cpu_enable_intr(); 403 while ((int)(ip->ip_xindex - seq) < 0) { 404 crit_enter(); 405 lwkt_process_ipiq(); 406 crit_exit(); 407 if (--maxc == 0) 408 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 409 if (maxc < -1000000) 410 panic("LWKT_WAIT_IPIQ"); 411 /* 412 * xindex may be modified by another cpu, use a load fence 413 * to ensure that the loop does not use a speculative value 414 * (which may improve performance). 415 */ 416 cpu_lfence(); 417 } 418 write_eflags(eflags); 419 } 420 } 421 } 422 423 int 424 lwkt_seq_ipiq(globaldata_t target) 425 { 426 lwkt_ipiq_t ip; 427 428 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 429 return(ip->ip_windex); 430 } 431 432 /* 433 * Called from IPI interrupt (like a fast interrupt), which has placed 434 * us in a critical section. The MP lock may or may not be held. 435 * May also be called from doreti or splz, or be reentrantly called 436 * indirectly through the ip_func[] we run. 437 * 438 * There are two versions, one where no interrupt frame is available (when 439 * called from the send code and from splz, and one where an interrupt 440 * frame is available. 441 */ 442 void 443 lwkt_process_ipiq(void) 444 { 445 globaldata_t gd = mycpu; 446 globaldata_t sgd; 447 lwkt_ipiq_t ip; 448 int n; 449 450 again: 451 for (n = 0; n < ncpus; ++n) { 452 if (n != gd->gd_cpuid) { 453 sgd = globaldata_find(n); 454 ip = sgd->gd_ipiq; 455 if (ip != NULL) { 456 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 457 ; 458 } 459 } 460 } 461 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 462 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 463 if (gd->gd_curthread->td_cscount == 0) 464 goto again; 465 need_ipiq(); 466 } 467 } 468 } 469 470 #ifdef _KERNEL 471 void 472 lwkt_process_ipiq_frame(struct intrframe *frame) 473 { 474 globaldata_t gd = mycpu; 475 globaldata_t sgd; 476 lwkt_ipiq_t ip; 477 int n; 478 479 again: 480 for (n = 0; n < ncpus; ++n) { 481 if (n != gd->gd_cpuid) { 482 sgd = globaldata_find(n); 483 ip = sgd->gd_ipiq; 484 if (ip != NULL) { 485 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 486 ; 487 } 488 } 489 } 490 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 491 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 492 if (gd->gd_curthread->td_cscount == 0) 493 goto again; 494 need_ipiq(); 495 } 496 } 497 } 498 #endif 499 500 static int 501 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 502 struct intrframe *frame) 503 { 504 int ri; 505 int wi; 506 ipifunc3_t copy_func; 507 void *copy_arg1; 508 int copy_arg2; 509 510 /* 511 * Obtain the current write index, which is modified by a remote cpu. 512 * Issue a load fence to prevent speculative reads of e.g. data written 513 * by the other cpu prior to it updating the index. 514 */ 515 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 516 wi = ip->ip_windex; 517 cpu_lfence(); 518 519 /* 520 * Note: xindex is only updated after we are sure the function has 521 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 522 * function may send an IPI which may block/drain. 523 * 524 * Note: due to additional IPI operations that the callback function 525 * may make, it is possible for both rindex and windex to advance and 526 * thus for rindex to advance passed our cached windex. 527 */ 528 while (wi - (ri = ip->ip_rindex) > 0) { 529 ri &= MAXCPUFIFO_MASK; 530 copy_func = ip->ip_func[ri]; 531 copy_arg1 = ip->ip_arg1[ri]; 532 copy_arg2 = ip->ip_arg2[ri]; 533 cpu_mfence(); 534 ++ip->ip_rindex; 535 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK)); 536 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 537 copy_func(copy_arg1, copy_arg2, frame); 538 cpu_sfence(); 539 ip->ip_xindex = ip->ip_rindex; 540 541 #ifdef PANIC_DEBUG 542 /* 543 * Simulate panics during the processing of an IPI 544 */ 545 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 546 if (--panic_ipiq_count == 0) { 547 #ifdef DDB 548 Debugger("PANIC_DEBUG"); 549 #else 550 panic("PANIC_DEBUG"); 551 #endif 552 } 553 } 554 #endif 555 } 556 557 /* 558 * Return non-zero if there are more IPI messages pending on this 559 * ipiq. ip_npoll is left set as long as possible to reduce the 560 * number of IPIs queued by the originating cpu, but must be cleared 561 * *BEFORE* checking windex. 562 */ 563 atomic_poll_release_int(&ip->ip_npoll); 564 return(wi != ip->ip_windex); 565 } 566 567 #endif 568 569 /* 570 * CPU Synchronization Support 571 * 572 * lwkt_cpusync_simple() 573 * 574 * The function is executed synchronously before return on remote cpus. 575 * A lwkt_cpusync_t pointer is passed as an argument. The data can 576 * be accessed via arg->cs_data. 577 * 578 * XXX should I just pass the data as an argument to be consistent? 579 */ 580 581 void 582 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 583 { 584 struct lwkt_cpusync cmd; 585 586 cmd.cs_run_func = NULL; 587 cmd.cs_fin1_func = func; 588 cmd.cs_fin2_func = NULL; 589 cmd.cs_data = data; 590 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 591 if (mask & (1 << mycpu->gd_cpuid)) 592 func(&cmd); 593 lwkt_cpusync_finish(&cmd); 594 } 595 596 /* 597 * lwkt_cpusync_fastdata() 598 * 599 * The function is executed in tandem with return on remote cpus. 600 * The data is directly passed as an argument. Do not pass pointers to 601 * temporary storage as the storage might have 602 * gone poof by the time the target cpu executes 603 * the function. 604 * 605 * At the moment lwkt_cpusync is declared on the stack and we must wait 606 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 607 * optimization we should be able to put a counter in the globaldata 608 * structure (if it is not otherwise being used) and just poke it and 609 * return without waiting. XXX 610 */ 611 void 612 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 613 { 614 struct lwkt_cpusync cmd; 615 616 cmd.cs_run_func = NULL; 617 cmd.cs_fin1_func = NULL; 618 cmd.cs_fin2_func = func; 619 cmd.cs_data = NULL; 620 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 621 if (mask & (1 << mycpu->gd_cpuid)) 622 func(data); 623 lwkt_cpusync_finish(&cmd); 624 } 625 626 /* 627 * lwkt_cpusync_start() 628 * 629 * Start synchronization with a set of target cpus, return once they are 630 * known to be in a synchronization loop. The target cpus will execute 631 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 632 * 633 * XXX future: add lwkt_cpusync_start_quick() and require a call to 634 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 635 * potentially absorb the IPI latency doing something useful. 636 */ 637 void 638 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 639 { 640 globaldata_t gd = mycpu; 641 642 poll->cs_count = 0; 643 poll->cs_mask = mask; 644 #ifdef SMP 645 logipiq2(sync_start, mask & gd->gd_other_cpus); 646 poll->cs_maxcount = lwkt_send_ipiq_mask( 647 mask & gd->gd_other_cpus & smp_active_mask, 648 (ipifunc1_t)lwkt_cpusync_remote1, poll); 649 #endif 650 if (mask & gd->gd_cpumask) { 651 if (poll->cs_run_func) 652 poll->cs_run_func(poll); 653 } 654 #ifdef SMP 655 if (poll->cs_maxcount) { 656 ++ipiq_cscount; 657 ++gd->gd_curthread->td_cscount; 658 while (poll->cs_count != poll->cs_maxcount) { 659 crit_enter(); 660 lwkt_process_ipiq(); 661 crit_exit(); 662 } 663 } 664 #endif 665 } 666 667 void 668 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 669 { 670 globaldata_t gd = mycpu; 671 #ifdef SMP 672 int count; 673 #endif 674 675 mask &= ~poll->cs_mask; 676 poll->cs_mask |= mask; 677 #ifdef SMP 678 logipiq2(sync_add, mask & gd->gd_other_cpus); 679 count = lwkt_send_ipiq_mask( 680 mask & gd->gd_other_cpus & smp_active_mask, 681 (ipifunc1_t)lwkt_cpusync_remote1, poll); 682 #endif 683 if (mask & gd->gd_cpumask) { 684 if (poll->cs_run_func) 685 poll->cs_run_func(poll); 686 } 687 #ifdef SMP 688 poll->cs_maxcount += count; 689 if (poll->cs_maxcount) { 690 if (poll->cs_maxcount == count) 691 ++gd->gd_curthread->td_cscount; 692 while (poll->cs_count != poll->cs_maxcount) { 693 crit_enter(); 694 lwkt_process_ipiq(); 695 crit_exit(); 696 } 697 } 698 #endif 699 } 700 701 /* 702 * Finish synchronization with a set of target cpus. The target cpus will 703 * execute cs_fin1_func(poll) prior to this function returning, and will 704 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 705 * 706 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 707 * more remote cpus and must account for it in our thread structure. 708 */ 709 void 710 lwkt_cpusync_finish(lwkt_cpusync_t poll) 711 { 712 globaldata_t gd = mycpu; 713 714 poll->cs_count = -1; 715 if (poll->cs_mask & gd->gd_cpumask) { 716 if (poll->cs_fin1_func) 717 poll->cs_fin1_func(poll); 718 if (poll->cs_fin2_func) 719 poll->cs_fin2_func(poll->cs_data); 720 } 721 #ifdef SMP 722 if (poll->cs_maxcount) { 723 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 724 crit_enter(); 725 lwkt_process_ipiq(); 726 crit_exit(); 727 } 728 --gd->gd_curthread->td_cscount; 729 } 730 #endif 731 } 732 733 #ifdef SMP 734 735 /* 736 * helper IPI remote messaging function. 737 * 738 * Called on remote cpu when a new cpu synchronization request has been 739 * sent to us. Execute the run function and adjust cs_count, then requeue 740 * the request so we spin on it. 741 */ 742 static void 743 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 744 { 745 atomic_add_int(&poll->cs_count, 1); 746 if (poll->cs_run_func) 747 poll->cs_run_func(poll); 748 lwkt_cpusync_remote2(poll); 749 } 750 751 /* 752 * helper IPI remote messaging function. 753 * 754 * Poll for the originator telling us to finish. If it hasn't, requeue 755 * our request so we spin on it. When the originator requests that we 756 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 757 * in tandem with the release. 758 */ 759 static void 760 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 761 { 762 if (poll->cs_count < 0) { 763 cpusync_func2_t savef; 764 void *saved; 765 766 if (poll->cs_fin1_func) 767 poll->cs_fin1_func(poll); 768 if (poll->cs_fin2_func) { 769 savef = poll->cs_fin2_func; 770 saved = poll->cs_data; 771 atomic_add_int(&poll->cs_count, -1); 772 savef(saved); 773 } else { 774 atomic_add_int(&poll->cs_count, -1); 775 } 776 } else { 777 globaldata_t gd = mycpu; 778 lwkt_ipiq_t ip; 779 int wi; 780 781 ip = &gd->gd_cpusyncq; 782 wi = ip->ip_windex & MAXCPUFIFO_MASK; 783 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 784 ip->ip_arg1[wi] = poll; 785 ip->ip_arg2[wi] = 0; 786 cpu_sfence(); 787 ++ip->ip_windex; 788 } 789 } 790 791 #endif 792