1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.27 2008/05/18 20:57:56 nth Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/rtprio.h> 49 #include <sys/queue.h> 50 #include <sys/thread2.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/kthread.h> 54 #include <machine/cpu.h> 55 #include <sys/lock.h> 56 #include <sys/caps.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/vm_kern.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_pager.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_zone.h> 67 68 #include <machine/stdarg.h> 69 #include <machine/smp.h> 70 #include <machine/atomic.h> 71 72 #ifdef SMP 73 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 74 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 75 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 76 static __int64_t ipiq_passive; /* passive IPI messages */ 77 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 78 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 79 #ifdef PANIC_DEBUG 80 static int panic_ipiq_cpu = -1; 81 static int panic_ipiq_count = 100; 82 #endif 83 #endif 84 85 #ifdef SMP 86 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, 87 "Number of IPI's sent"); 88 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, 89 "Number of fifo full conditions detected"); 90 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, 91 "Number of IPI's avoided by interlock with target cpu"); 92 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, 93 "Number of passive IPI messages sent"); 94 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, 95 "Number of cpu synchronizations"); 96 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, 97 ""); 98 #ifdef PANIC_DEBUG 99 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 100 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 101 #endif 102 103 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 104 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 3) 105 106 #if !defined(KTR_IPIQ) 107 #define KTR_IPIQ KTR_ALL 108 #endif 109 KTR_INFO_MASTER(ipiq); 110 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 111 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 112 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 113 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 114 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 115 KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t)); 116 KTR_INFO(KTR_IPIQ, ipiq, sync_add, 6, "cpumask=%08x", sizeof(cpumask_t)); 117 KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARG_SIZE); 118 KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARG_SIZE); 119 120 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 121 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 122 #define logipiq2(name, arg) \ 123 KTR_LOG(ipiq_ ## name, arg) 124 125 #endif /* SMP */ 126 127 #ifdef SMP 128 129 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 130 struct intrframe *frame); 131 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 132 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 133 134 /* 135 * Send a function execution request to another cpu. The request is queued 136 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 137 * possible target cpu. The FIFO can be written. 138 * 139 * If the FIFO fills up we have to enable interrupts to avoid an APIC 140 * deadlock and process pending IPIQs while waiting for it to empty. 141 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 142 * 143 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 144 * end will take care of any pending interrupts. 145 * 146 * The actual hardware IPI is avoided if the target cpu is already processing 147 * the queue from a prior IPI. It is possible to pipeline IPI messages 148 * very quickly between cpus due to the FIFO hysteresis. 149 * 150 * Need not be called from a critical section. 151 */ 152 int 153 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 154 { 155 lwkt_ipiq_t ip; 156 int windex; 157 struct globaldata *gd = mycpu; 158 159 logipiq(send_norm, func, arg1, arg2, gd, target); 160 161 if (target == gd) { 162 func(arg1, arg2, NULL); 163 logipiq(send_end, func, arg1, arg2, gd, target); 164 return(0); 165 } 166 crit_enter(); 167 ++gd->gd_intr_nesting_level; 168 #ifdef INVARIANTS 169 if (gd->gd_intr_nesting_level > 20) 170 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 171 #endif 172 KKASSERT(curthread->td_critcount); 173 ++ipiq_count; 174 ip = &gd->gd_ipiq[target->gd_cpuid]; 175 176 /* 177 * Do not allow the FIFO to become full. Interrupts must be physically 178 * enabled while we liveloop to avoid deadlocking the APIC. 179 */ 180 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 181 #if defined(__i386__) 182 unsigned int eflags = read_eflags(); 183 #elif defined(__x86_64__) 184 unsigned long rflags = read_rflags(); 185 #endif 186 187 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 188 logipiq(cpu_send, func, arg1, arg2, gd, target); 189 cpu_send_ipiq(target->gd_cpuid); 190 } 191 cpu_enable_intr(); 192 ++ipiq_fifofull; 193 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 194 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 195 lwkt_process_ipiq(); 196 } 197 #if defined(__i386__) 198 write_eflags(eflags); 199 #elif defined(__x86_64__) 200 write_rflags(rflags); 201 #endif 202 } 203 204 /* 205 * Queue the new message 206 */ 207 windex = ip->ip_windex & MAXCPUFIFO_MASK; 208 ip->ip_func[windex] = func; 209 ip->ip_arg1[windex] = arg1; 210 ip->ip_arg2[windex] = arg2; 211 cpu_sfence(); 212 ++ip->ip_windex; 213 --gd->gd_intr_nesting_level; 214 215 /* 216 * signal the target cpu that there is work pending. 217 */ 218 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 219 logipiq(cpu_send, func, arg1, arg2, gd, target); 220 cpu_send_ipiq(target->gd_cpuid); 221 } else { 222 if (ipiq_optimized == 0) { 223 logipiq(cpu_send, func, arg1, arg2, gd, target); 224 cpu_send_ipiq(target->gd_cpuid); 225 } else { 226 ++ipiq_avoided; 227 } 228 } 229 crit_exit(); 230 231 logipiq(send_end, func, arg1, arg2, gd, target); 232 return(ip->ip_windex); 233 } 234 235 /* 236 * Similar to lwkt_send_ipiq() but this function does not actually initiate 237 * the IPI to the target cpu unless the FIFO has become too full, so it is 238 * very fast. 239 * 240 * This function is used for non-critical IPI messages, such as memory 241 * deallocations. The queue will typically be flushed by the target cpu at 242 * the next clock interrupt. 243 * 244 * Need not be called from a critical section. 245 */ 246 int 247 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 248 void *arg1, int arg2) 249 { 250 lwkt_ipiq_t ip; 251 int windex; 252 struct globaldata *gd = mycpu; 253 254 KKASSERT(target != gd); 255 crit_enter(); 256 logipiq(send_pasv, func, arg1, arg2, gd, target); 257 ++gd->gd_intr_nesting_level; 258 #ifdef INVARIANTS 259 if (gd->gd_intr_nesting_level > 20) 260 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 261 #endif 262 KKASSERT(curthread->td_critcount); 263 ++ipiq_count; 264 ++ipiq_passive; 265 ip = &gd->gd_ipiq[target->gd_cpuid]; 266 267 /* 268 * Do not allow the FIFO to become full. Interrupts must be physically 269 * enabled while we liveloop to avoid deadlocking the APIC. 270 */ 271 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 272 #if defined(__i386__) 273 unsigned int eflags = read_eflags(); 274 #elif defined(__x86_64__) 275 unsigned long rflags = read_rflags(); 276 #endif 277 278 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 279 logipiq(cpu_send, func, arg1, arg2, gd, target); 280 cpu_send_ipiq(target->gd_cpuid); 281 } 282 cpu_enable_intr(); 283 ++ipiq_fifofull; 284 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 285 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 286 lwkt_process_ipiq(); 287 } 288 #if defined(__i386__) 289 write_eflags(eflags); 290 #elif defined(__x86_64__) 291 write_rflags(rflags); 292 #endif 293 } 294 295 /* 296 * Queue the new message 297 */ 298 windex = ip->ip_windex & MAXCPUFIFO_MASK; 299 ip->ip_func[windex] = func; 300 ip->ip_arg1[windex] = arg1; 301 ip->ip_arg2[windex] = arg2; 302 cpu_sfence(); 303 ++ip->ip_windex; 304 --gd->gd_intr_nesting_level; 305 306 /* 307 * Do not signal the target cpu, it will pick up the IPI when it next 308 * polls (typically on the next tick). 309 */ 310 crit_exit(); 311 312 logipiq(send_end, func, arg1, arg2, gd, target); 313 return(ip->ip_windex); 314 } 315 316 /* 317 * Send an IPI request without blocking, return 0 on success, ENOENT on 318 * failure. The actual queueing of the hardware IPI may still force us 319 * to spin and process incoming IPIs but that will eventually go away 320 * when we've gotten rid of the other general IPIs. 321 */ 322 int 323 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 324 void *arg1, int arg2) 325 { 326 lwkt_ipiq_t ip; 327 int windex; 328 struct globaldata *gd = mycpu; 329 330 logipiq(send_nbio, func, arg1, arg2, gd, target); 331 KKASSERT(curthread->td_critcount); 332 if (target == gd) { 333 func(arg1, arg2, NULL); 334 logipiq(send_end, func, arg1, arg2, gd, target); 335 return(0); 336 } 337 ++ipiq_count; 338 ip = &gd->gd_ipiq[target->gd_cpuid]; 339 340 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 341 logipiq(send_fail, func, arg1, arg2, gd, target); 342 return(ENOENT); 343 } 344 windex = ip->ip_windex & MAXCPUFIFO_MASK; 345 ip->ip_func[windex] = func; 346 ip->ip_arg1[windex] = arg1; 347 ip->ip_arg2[windex] = arg2; 348 cpu_sfence(); 349 ++ip->ip_windex; 350 351 /* 352 * This isn't a passive IPI, we still have to signal the target cpu. 353 */ 354 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 355 logipiq(cpu_send, func, arg1, arg2, gd, target); 356 cpu_send_ipiq(target->gd_cpuid); 357 } else { 358 if (ipiq_optimized == 0) { 359 logipiq(cpu_send, func, arg1, arg2, gd, target); 360 cpu_send_ipiq(target->gd_cpuid); 361 } else { 362 ++ipiq_avoided; 363 } 364 } 365 366 logipiq(send_end, func, arg1, arg2, gd, target); 367 return(0); 368 } 369 370 /* 371 * deprecated, used only by fast int forwarding. 372 */ 373 int 374 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 375 { 376 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 377 } 378 379 /* 380 * Send a message to several target cpus. Typically used for scheduling. 381 * The message will not be sent to stopped cpus. 382 */ 383 int 384 lwkt_send_ipiq3_mask(cpumask_t mask, ipifunc3_t func, void *arg1, int arg2) 385 { 386 int cpuid; 387 int count = 0; 388 389 mask &= ~stopped_cpus; 390 while (mask) { 391 cpuid = BSFCPUMASK(mask); 392 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 393 mask &= ~CPUMASK(cpuid); 394 ++count; 395 } 396 return(count); 397 } 398 399 /* 400 * Wait for the remote cpu to finish processing a function. 401 * 402 * YYY we have to enable interrupts and process the IPIQ while waiting 403 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 404 * function to do this! YYY we really should 'block' here. 405 * 406 * MUST be called from a critical section. This routine may be called 407 * from an interrupt (for example, if an interrupt wakes a foreign thread 408 * up). 409 */ 410 void 411 lwkt_wait_ipiq(globaldata_t target, int seq) 412 { 413 lwkt_ipiq_t ip; 414 int maxc = 100000000; 415 416 if (target != mycpu) { 417 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 418 if ((int)(ip->ip_xindex - seq) < 0) { 419 #if defined(__i386__) 420 unsigned int eflags = read_eflags(); 421 #elif defined(__x86_64__) 422 unsigned long rflags = read_rflags(); 423 #endif 424 cpu_enable_intr(); 425 while ((int)(ip->ip_xindex - seq) < 0) { 426 crit_enter(); 427 lwkt_process_ipiq(); 428 crit_exit(); 429 if (--maxc == 0) 430 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 431 if (maxc < -1000000) 432 panic("LWKT_WAIT_IPIQ"); 433 /* 434 * xindex may be modified by another cpu, use a load fence 435 * to ensure that the loop does not use a speculative value 436 * (which may improve performance). 437 */ 438 cpu_lfence(); 439 } 440 #if defined(__i386__) 441 write_eflags(eflags); 442 #elif defined(__x86_64__) 443 write_rflags(rflags); 444 #endif 445 } 446 } 447 } 448 449 int 450 lwkt_seq_ipiq(globaldata_t target) 451 { 452 lwkt_ipiq_t ip; 453 454 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 455 return(ip->ip_windex); 456 } 457 458 /* 459 * Called from IPI interrupt (like a fast interrupt), which has placed 460 * us in a critical section. The MP lock may or may not be held. 461 * May also be called from doreti or splz, or be reentrantly called 462 * indirectly through the ip_func[] we run. 463 * 464 * There are two versions, one where no interrupt frame is available (when 465 * called from the send code and from splz, and one where an interrupt 466 * frame is available. 467 */ 468 void 469 lwkt_process_ipiq(void) 470 { 471 globaldata_t gd = mycpu; 472 globaldata_t sgd; 473 lwkt_ipiq_t ip; 474 int n; 475 476 again: 477 for (n = 0; n < ncpus; ++n) { 478 if (n != gd->gd_cpuid) { 479 sgd = globaldata_find(n); 480 ip = sgd->gd_ipiq; 481 if (ip != NULL) { 482 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 483 ; 484 } 485 } 486 } 487 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 488 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 489 if (gd->gd_curthread->td_cscount == 0) 490 goto again; 491 need_ipiq(); 492 } 493 } 494 } 495 496 void 497 lwkt_process_ipiq_frame(struct intrframe *frame) 498 { 499 globaldata_t gd = mycpu; 500 globaldata_t sgd; 501 lwkt_ipiq_t ip; 502 int n; 503 504 again: 505 for (n = 0; n < ncpus; ++n) { 506 if (n != gd->gd_cpuid) { 507 sgd = globaldata_find(n); 508 ip = sgd->gd_ipiq; 509 if (ip != NULL) { 510 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 511 ; 512 } 513 } 514 } 515 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 516 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 517 if (gd->gd_curthread->td_cscount == 0) 518 goto again; 519 need_ipiq(); 520 } 521 } 522 } 523 524 static int 525 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 526 struct intrframe *frame) 527 { 528 globaldata_t mygd = mycpu; 529 int ri; 530 int wi; 531 ipifunc3_t copy_func; 532 void *copy_arg1; 533 int copy_arg2; 534 535 /* 536 * Obtain the current write index, which is modified by a remote cpu. 537 * Issue a load fence to prevent speculative reads of e.g. data written 538 * by the other cpu prior to it updating the index. 539 */ 540 KKASSERT(curthread->td_critcount); 541 wi = ip->ip_windex; 542 cpu_lfence(); 543 ++mygd->gd_intr_nesting_level; 544 545 /* 546 * NOTE: xindex is only updated after we are sure the function has 547 * finished execution. Beware lwkt_process_ipiq() reentrancy! 548 * The function may send an IPI which may block/drain. 549 * 550 * NOTE: Due to additional IPI operations that the callback function 551 * may make, it is possible for both rindex and windex to advance and 552 * thus for rindex to advance passed our cached windex. 553 * 554 * NOTE: A memory fence is required to prevent speculative loads prior 555 * to the loading of ip_rindex. Even though stores might be 556 * ordered, loads are probably not. 557 */ 558 while (wi - (ri = ip->ip_rindex) > 0) { 559 ri &= MAXCPUFIFO_MASK; 560 cpu_mfence(); 561 copy_func = ip->ip_func[ri]; 562 copy_arg1 = ip->ip_arg1[ri]; 563 copy_arg2 = ip->ip_arg2[ri]; 564 ++ip->ip_rindex; 565 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == 566 ((ri + 1) & MAXCPUFIFO_MASK)); 567 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 568 copy_func(copy_arg1, copy_arg2, frame); 569 cpu_sfence(); 570 ip->ip_xindex = ip->ip_rindex; 571 572 #ifdef PANIC_DEBUG 573 /* 574 * Simulate panics during the processing of an IPI 575 */ 576 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 577 if (--panic_ipiq_count == 0) { 578 #ifdef DDB 579 Debugger("PANIC_DEBUG"); 580 #else 581 panic("PANIC_DEBUG"); 582 #endif 583 } 584 } 585 #endif 586 } 587 --mygd->gd_intr_nesting_level; 588 589 /* 590 * Return non-zero if there are more IPI messages pending on this 591 * ipiq. ip_npoll is left set as long as possible to reduce the 592 * number of IPIs queued by the originating cpu, but must be cleared 593 * *BEFORE* checking windex. 594 */ 595 atomic_poll_release_int(&ip->ip_npoll); 596 return(wi != ip->ip_windex); 597 } 598 599 static void 600 lwkt_sync_ipiq(void *arg) 601 { 602 cpumask_t *cpumask = arg; 603 604 atomic_clear_cpumask(cpumask, mycpu->gd_cpumask); 605 if (*cpumask == 0) 606 wakeup(cpumask); 607 } 608 609 void 610 lwkt_synchronize_ipiqs(const char *wmesg) 611 { 612 cpumask_t other_cpumask; 613 614 other_cpumask = mycpu->gd_other_cpus & smp_active_mask; 615 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask); 616 617 while (other_cpumask != 0) { 618 tsleep_interlock(&other_cpumask, 0); 619 if (other_cpumask != 0) 620 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0); 621 } 622 } 623 624 #endif 625 626 /* 627 * CPU Synchronization Support 628 * 629 * lwkt_cpusync_simple() 630 * 631 * The function is executed synchronously before return on remote cpus. 632 * A lwkt_cpusync_t pointer is passed as an argument. The data can 633 * be accessed via arg->cs_data. 634 * 635 * XXX should I just pass the data as an argument to be consistent? 636 */ 637 638 void 639 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 640 { 641 struct lwkt_cpusync cmd; 642 643 cmd.cs_run_func = NULL; 644 cmd.cs_fin1_func = func; 645 cmd.cs_fin2_func = NULL; 646 cmd.cs_data = data; 647 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 648 if (mask & CPUMASK(mycpu->gd_cpuid)) 649 func(&cmd); 650 lwkt_cpusync_finish(&cmd); 651 } 652 653 /* 654 * lwkt_cpusync_fastdata() 655 * 656 * The function is executed in tandem with return on remote cpus. 657 * The data is directly passed as an argument. Do not pass pointers to 658 * temporary storage as the storage might have 659 * gone poof by the time the target cpu executes 660 * the function. 661 * 662 * At the moment lwkt_cpusync is declared on the stack and we must wait 663 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 664 * optimization we should be able to put a counter in the globaldata 665 * structure (if it is not otherwise being used) and just poke it and 666 * return without waiting. XXX 667 */ 668 void 669 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 670 { 671 struct lwkt_cpusync cmd; 672 673 cmd.cs_run_func = NULL; 674 cmd.cs_fin1_func = NULL; 675 cmd.cs_fin2_func = func; 676 cmd.cs_data = NULL; 677 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 678 if (mask & CPUMASK(mycpu->gd_cpuid)) 679 func(data); 680 lwkt_cpusync_finish(&cmd); 681 } 682 683 /* 684 * lwkt_cpusync_start() 685 * 686 * Start synchronization with a set of target cpus, return once they are 687 * known to be in a synchronization loop. The target cpus will execute 688 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 689 * 690 * XXX future: add lwkt_cpusync_start_quick() and require a call to 691 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 692 * potentially absorb the IPI latency doing something useful. 693 */ 694 void 695 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 696 { 697 globaldata_t gd = mycpu; 698 699 poll->cs_count = 0; 700 poll->cs_mask = mask; 701 #ifdef SMP 702 logipiq2(sync_start, mask & gd->gd_other_cpus); 703 poll->cs_maxcount = lwkt_send_ipiq_mask( 704 mask & gd->gd_other_cpus & smp_active_mask, 705 (ipifunc1_t)lwkt_cpusync_remote1, poll); 706 #endif 707 if (mask & gd->gd_cpumask) { 708 if (poll->cs_run_func) 709 poll->cs_run_func(poll); 710 } 711 #ifdef SMP 712 if (poll->cs_maxcount) { 713 ++ipiq_cscount; 714 ++gd->gd_curthread->td_cscount; 715 while (poll->cs_count != poll->cs_maxcount) { 716 crit_enter(); 717 lwkt_process_ipiq(); 718 crit_exit(); 719 } 720 } 721 #endif 722 } 723 724 void 725 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 726 { 727 globaldata_t gd = mycpu; 728 #ifdef SMP 729 int count; 730 #endif 731 732 mask &= ~poll->cs_mask; 733 poll->cs_mask |= mask; 734 #ifdef SMP 735 logipiq2(sync_add, mask & gd->gd_other_cpus); 736 count = lwkt_send_ipiq_mask( 737 mask & gd->gd_other_cpus & smp_active_mask, 738 (ipifunc1_t)lwkt_cpusync_remote1, poll); 739 #endif 740 if (mask & gd->gd_cpumask) { 741 if (poll->cs_run_func) 742 poll->cs_run_func(poll); 743 } 744 #ifdef SMP 745 poll->cs_maxcount += count; 746 if (poll->cs_maxcount) { 747 if (poll->cs_maxcount == count) 748 ++gd->gd_curthread->td_cscount; 749 while (poll->cs_count != poll->cs_maxcount) { 750 crit_enter(); 751 lwkt_process_ipiq(); 752 crit_exit(); 753 } 754 } 755 #endif 756 } 757 758 /* 759 * Finish synchronization with a set of target cpus. The target cpus will 760 * execute cs_fin1_func(poll) prior to this function returning, and will 761 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 762 * 763 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 764 * more remote cpus and must account for it in our thread structure. 765 */ 766 void 767 lwkt_cpusync_finish(lwkt_cpusync_t poll) 768 { 769 globaldata_t gd = mycpu; 770 771 poll->cs_count = -1; 772 if (poll->cs_mask & gd->gd_cpumask) { 773 if (poll->cs_fin1_func) 774 poll->cs_fin1_func(poll); 775 if (poll->cs_fin2_func) 776 poll->cs_fin2_func(poll->cs_data); 777 } 778 #ifdef SMP 779 if (poll->cs_maxcount) { 780 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 781 crit_enter(); 782 lwkt_process_ipiq(); 783 crit_exit(); 784 } 785 --gd->gd_curthread->td_cscount; 786 } 787 #endif 788 } 789 790 #ifdef SMP 791 792 /* 793 * helper IPI remote messaging function. 794 * 795 * Called on remote cpu when a new cpu synchronization request has been 796 * sent to us. Execute the run function and adjust cs_count, then requeue 797 * the request so we spin on it. 798 */ 799 static void 800 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 801 { 802 atomic_add_int(&poll->cs_count, 1); 803 if (poll->cs_run_func) 804 poll->cs_run_func(poll); 805 lwkt_cpusync_remote2(poll); 806 } 807 808 /* 809 * helper IPI remote messaging function. 810 * 811 * Poll for the originator telling us to finish. If it hasn't, requeue 812 * our request so we spin on it. When the originator requests that we 813 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 814 * in tandem with the release. 815 */ 816 static void 817 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 818 { 819 if (poll->cs_count < 0) { 820 cpusync_func2_t savef; 821 void *saved; 822 823 if (poll->cs_fin1_func) 824 poll->cs_fin1_func(poll); 825 if (poll->cs_fin2_func) { 826 savef = poll->cs_fin2_func; 827 saved = poll->cs_data; 828 cpu_ccfence(); /* required ordering for MP operation */ 829 atomic_add_int(&poll->cs_count, -1); 830 savef(saved); 831 } else { 832 atomic_add_int(&poll->cs_count, -1); 833 } 834 } else { 835 globaldata_t gd = mycpu; 836 lwkt_ipiq_t ip; 837 int wi; 838 839 ip = &gd->gd_cpusyncq; 840 wi = ip->ip_windex & MAXCPUFIFO_MASK; 841 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 842 ip->ip_arg1[wi] = poll; 843 ip->ip_arg2[wi] = 0; 844 cpu_sfence(); 845 ++ip->ip_windex; 846 } 847 } 848 849 #endif 850