1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * This module implements IPI message queueing and the MI portion of IPI 37 * message processing. 38 */ 39 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/proc.h> 46 #include <sys/rtprio.h> 47 #include <sys/queue.h> 48 #include <sys/thread2.h> 49 #include <sys/sysctl.h> 50 #include <sys/ktr.h> 51 #include <sys/kthread.h> 52 #include <machine/cpu.h> 53 #include <sys/lock.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_param.h> 57 #include <vm/vm_kern.h> 58 #include <vm/vm_object.h> 59 #include <vm/vm_page.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_pager.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_zone.h> 64 65 #include <machine/stdarg.h> 66 #include <machine/smp.h> 67 #include <machine/atomic.h> 68 69 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 70 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 71 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 72 static __int64_t ipiq_passive; /* passive IPI messages */ 73 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 74 static int ipiq_debug; /* set to 1 for debug */ 75 #ifdef PANIC_DEBUG 76 static int panic_ipiq_cpu = -1; 77 static int panic_ipiq_count = 100; 78 #endif 79 80 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, 81 "Number of IPI's sent"); 82 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, 83 "Number of fifo full conditions detected"); 84 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, 85 "Number of IPI's avoided by interlock with target cpu"); 86 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, 87 "Number of passive IPI messages sent"); 88 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, 89 "Number of cpu synchronizations"); 90 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_debug, CTLFLAG_RW, &ipiq_debug, 0, 91 ""); 92 #ifdef PANIC_DEBUG 93 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 94 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 95 #endif 96 97 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 98 #define IPIQ_ARGS void *func, void *arg1, int arg2, int scpu, int dcpu 99 100 #if !defined(KTR_IPIQ) 101 #define KTR_IPIQ KTR_ALL 102 #endif 103 KTR_INFO_MASTER(ipiq); 104 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARGS); 105 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARGS); 106 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARGS); 107 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARGS); 108 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARGS); 109 KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08lx", unsigned long mask); 110 KTR_INFO(KTR_IPIQ, ipiq, sync_end, 6, "cpumask=%08lx", unsigned long mask); 111 KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARGS); 112 KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARGS); 113 114 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 115 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 116 #define logipiq2(name, arg) \ 117 KTR_LOG(ipiq_ ## name, arg) 118 119 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 120 struct intrframe *frame); 121 static void lwkt_cpusync_remote1(lwkt_cpusync_t cs); 122 static void lwkt_cpusync_remote2(lwkt_cpusync_t cs); 123 124 /* 125 * Send a function execution request to another cpu. The request is queued 126 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 127 * possible target cpu. The FIFO can be written. 128 * 129 * If the FIFO fills up we have to enable interrupts to avoid an APIC 130 * deadlock and process pending IPIQs while waiting for it to empty. 131 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 132 * 133 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 134 * end will take care of any pending interrupts. 135 * 136 * The actual hardware IPI is avoided if the target cpu is already processing 137 * the queue from a prior IPI. It is possible to pipeline IPI messages 138 * very quickly between cpus due to the FIFO hysteresis. 139 * 140 * Need not be called from a critical section. 141 */ 142 int 143 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 144 { 145 lwkt_ipiq_t ip; 146 int windex; 147 struct globaldata *gd = mycpu; 148 149 logipiq(send_norm, func, arg1, arg2, gd, target); 150 151 if (target == gd) { 152 func(arg1, arg2, NULL); 153 logipiq(send_end, func, arg1, arg2, gd, target); 154 return(0); 155 } 156 crit_enter(); 157 ++gd->gd_intr_nesting_level; 158 #ifdef INVARIANTS 159 if (gd->gd_intr_nesting_level > 20) 160 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 161 #endif 162 KKASSERT(curthread->td_critcount); 163 ++ipiq_count; 164 ip = &gd->gd_ipiq[target->gd_cpuid]; 165 166 /* 167 * Do not allow the FIFO to become full. Interrupts must be physically 168 * enabled while we liveloop to avoid deadlocking the APIC. 169 * 170 * The target ipiq may have gotten filled up due to passive IPIs and thus 171 * not be aware that its queue is too full, so be sure to issue an 172 * ipiq interrupt to the target cpu. 173 */ 174 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 175 #if defined(__i386__) 176 unsigned int eflags = read_eflags(); 177 #elif defined(__x86_64__) 178 unsigned long rflags = read_rflags(); 179 #endif 180 181 cpu_enable_intr(); 182 ++ipiq_fifofull; 183 DEBUG_PUSH_INFO("send_ipiq3"); 184 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 185 if (atomic_poll_acquire_int(&target->gd_npoll)) { 186 logipiq(cpu_send, func, arg1, arg2, gd, target); 187 cpu_send_ipiq(target->gd_cpuid); 188 } 189 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 190 lwkt_process_ipiq(); 191 cpu_pause(); 192 } 193 DEBUG_POP_INFO(); 194 #if defined(__i386__) 195 write_eflags(eflags); 196 #elif defined(__x86_64__) 197 write_rflags(rflags); 198 #endif 199 } 200 201 /* 202 * Queue the new message 203 */ 204 windex = ip->ip_windex & MAXCPUFIFO_MASK; 205 ip->ip_info[windex].func = func; 206 ip->ip_info[windex].arg1 = arg1; 207 ip->ip_info[windex].arg2 = arg2; 208 cpu_sfence(); 209 ++ip->ip_windex; 210 atomic_set_cpumask(&target->gd_ipimask, gd->gd_cpumask); 211 212 /* 213 * signal the target cpu that there is work pending. 214 */ 215 if (atomic_poll_acquire_int(&target->gd_npoll)) { 216 logipiq(cpu_send, func, arg1, arg2, gd, target); 217 cpu_send_ipiq(target->gd_cpuid); 218 } else { 219 ++ipiq_avoided; 220 } 221 --gd->gd_intr_nesting_level; 222 crit_exit(); 223 logipiq(send_end, func, arg1, arg2, gd, target); 224 225 return(ip->ip_windex); 226 } 227 228 /* 229 * Similar to lwkt_send_ipiq() but this function does not actually initiate 230 * the IPI to the target cpu unless the FIFO has become too full, so it is 231 * very fast. 232 * 233 * This function is used for non-critical IPI messages, such as memory 234 * deallocations. The queue will typically be flushed by the target cpu at 235 * the next clock interrupt. 236 * 237 * Need not be called from a critical section. 238 */ 239 int 240 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 241 void *arg1, int arg2) 242 { 243 lwkt_ipiq_t ip; 244 int windex; 245 struct globaldata *gd = mycpu; 246 247 KKASSERT(target != gd); 248 crit_enter(); 249 ++gd->gd_intr_nesting_level; 250 logipiq(send_pasv, func, arg1, arg2, gd, target); 251 #ifdef INVARIANTS 252 if (gd->gd_intr_nesting_level > 20) 253 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 254 #endif 255 KKASSERT(curthread->td_critcount); 256 ++ipiq_count; 257 ++ipiq_passive; 258 ip = &gd->gd_ipiq[target->gd_cpuid]; 259 260 /* 261 * Do not allow the FIFO to become full. Interrupts must be physically 262 * enabled while we liveloop to avoid deadlocking the APIC. 263 */ 264 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 265 #if defined(__i386__) 266 unsigned int eflags = read_eflags(); 267 #elif defined(__x86_64__) 268 unsigned long rflags = read_rflags(); 269 #endif 270 271 cpu_enable_intr(); 272 ++ipiq_fifofull; 273 DEBUG_PUSH_INFO("send_ipiq3_passive"); 274 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 275 if (atomic_poll_acquire_int(&target->gd_npoll)) { 276 logipiq(cpu_send, func, arg1, arg2, gd, target); 277 cpu_send_ipiq(target->gd_cpuid); 278 } 279 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 280 lwkt_process_ipiq(); 281 cpu_pause(); 282 } 283 DEBUG_POP_INFO(); 284 #if defined(__i386__) 285 write_eflags(eflags); 286 #elif defined(__x86_64__) 287 write_rflags(rflags); 288 #endif 289 } 290 291 /* 292 * Queue the new message 293 */ 294 windex = ip->ip_windex & MAXCPUFIFO_MASK; 295 ip->ip_info[windex].func = func; 296 ip->ip_info[windex].arg1 = arg1; 297 ip->ip_info[windex].arg2 = arg2; 298 cpu_sfence(); 299 ++ip->ip_windex; 300 atomic_set_cpumask(&target->gd_ipimask, gd->gd_cpumask); 301 --gd->gd_intr_nesting_level; 302 303 /* 304 * Do not signal the target cpu, it will pick up the IPI when it next 305 * polls (typically on the next tick). 306 */ 307 crit_exit(); 308 logipiq(send_end, func, arg1, arg2, gd, target); 309 310 return(ip->ip_windex); 311 } 312 313 /* 314 * Send an IPI request without blocking, return 0 on success, ENOENT on 315 * failure. The actual queueing of the hardware IPI may still force us 316 * to spin and process incoming IPIs but that will eventually go away 317 * when we've gotten rid of the other general IPIs. 318 */ 319 int 320 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 321 void *arg1, int arg2) 322 { 323 lwkt_ipiq_t ip; 324 int windex; 325 struct globaldata *gd = mycpu; 326 327 logipiq(send_nbio, func, arg1, arg2, gd, target); 328 KKASSERT(curthread->td_critcount); 329 if (target == gd) { 330 func(arg1, arg2, NULL); 331 logipiq(send_end, func, arg1, arg2, gd, target); 332 return(0); 333 } 334 crit_enter(); 335 ++gd->gd_intr_nesting_level; 336 ++ipiq_count; 337 ip = &gd->gd_ipiq[target->gd_cpuid]; 338 339 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 340 logipiq(send_fail, func, arg1, arg2, gd, target); 341 --gd->gd_intr_nesting_level; 342 crit_exit(); 343 return(ENOENT); 344 } 345 windex = ip->ip_windex & MAXCPUFIFO_MASK; 346 ip->ip_info[windex].func = func; 347 ip->ip_info[windex].arg1 = arg1; 348 ip->ip_info[windex].arg2 = arg2; 349 cpu_sfence(); 350 ++ip->ip_windex; 351 atomic_set_cpumask(&target->gd_ipimask, gd->gd_cpumask); 352 353 /* 354 * This isn't a passive IPI, we still have to signal the target cpu. 355 */ 356 if (atomic_poll_acquire_int(&target->gd_npoll)) { 357 logipiq(cpu_send, func, arg1, arg2, gd, target); 358 cpu_send_ipiq(target->gd_cpuid); 359 } else { 360 ++ipiq_avoided; 361 } 362 --gd->gd_intr_nesting_level; 363 crit_exit(); 364 365 logipiq(send_end, func, arg1, arg2, gd, target); 366 return(0); 367 } 368 369 /* 370 * deprecated, used only by fast int forwarding. 371 */ 372 int 373 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 374 { 375 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 376 } 377 378 /* 379 * Send a message to several target cpus. Typically used for scheduling. 380 * The message will not be sent to stopped cpus. 381 */ 382 int 383 lwkt_send_ipiq3_mask(cpumask_t mask, ipifunc3_t func, void *arg1, int arg2) 384 { 385 int cpuid; 386 int count = 0; 387 388 mask &= ~stopped_cpus; 389 while (mask) { 390 cpuid = BSFCPUMASK(mask); 391 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 392 mask &= ~CPUMASK(cpuid); 393 ++count; 394 } 395 return(count); 396 } 397 398 /* 399 * Wait for the remote cpu to finish processing a function. 400 * 401 * YYY we have to enable interrupts and process the IPIQ while waiting 402 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 403 * function to do this! YYY we really should 'block' here. 404 * 405 * MUST be called from a critical section. This routine may be called 406 * from an interrupt (for example, if an interrupt wakes a foreign thread 407 * up). 408 */ 409 void 410 lwkt_wait_ipiq(globaldata_t target, int seq) 411 { 412 lwkt_ipiq_t ip; 413 int maxc = 100000000; 414 415 if (target != mycpu) { 416 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 417 if ((int)(ip->ip_xindex - seq) < 0) { 418 #if defined(__i386__) 419 unsigned int eflags = read_eflags(); 420 #elif defined(__x86_64__) 421 unsigned long rflags = read_rflags(); 422 #endif 423 cpu_enable_intr(); 424 DEBUG_PUSH_INFO("wait_ipiq"); 425 while ((int)(ip->ip_xindex - seq) < 0) { 426 crit_enter(); 427 lwkt_process_ipiq(); 428 crit_exit(); 429 if (--maxc == 0) 430 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 431 if (maxc < -1000000) 432 panic("LWKT_WAIT_IPIQ"); 433 /* 434 * xindex may be modified by another cpu, use a load fence 435 * to ensure that the loop does not use a speculative value 436 * (which may improve performance). 437 */ 438 cpu_lfence(); 439 } 440 DEBUG_POP_INFO(); 441 #if defined(__i386__) 442 write_eflags(eflags); 443 #elif defined(__x86_64__) 444 write_rflags(rflags); 445 #endif 446 } 447 } 448 } 449 450 int 451 lwkt_seq_ipiq(globaldata_t target) 452 { 453 lwkt_ipiq_t ip; 454 455 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 456 return(ip->ip_windex); 457 } 458 459 /* 460 * Called from IPI interrupt (like a fast interrupt), which has placed 461 * us in a critical section. The MP lock may or may not be held. 462 * May also be called from doreti or splz, or be reentrantly called 463 * indirectly through the ip_info[].func we run. 464 * 465 * There are two versions, one where no interrupt frame is available (when 466 * called from the send code and from splz, and one where an interrupt 467 * frame is available. 468 * 469 * When the current cpu is mastering a cpusync we do NOT internally loop 470 * on the cpusyncq poll. We also do not re-flag a pending ipi due to 471 * the cpusyncq poll because this can cause doreti/splz to loop internally. 472 * The cpusync master's own loop must be allowed to run to avoid a deadlock. 473 */ 474 void 475 lwkt_process_ipiq(void) 476 { 477 globaldata_t gd = mycpu; 478 globaldata_t sgd; 479 lwkt_ipiq_t ip; 480 cpumask_t mask; 481 int n; 482 483 ++gd->gd_processing_ipiq; 484 again: 485 cpu_lfence(); 486 mask = gd->gd_ipimask; 487 atomic_clear_cpumask(&gd->gd_ipimask, mask); 488 while (mask) { 489 n = BSFCPUMASK(mask); 490 if (n != gd->gd_cpuid) { 491 sgd = globaldata_find(n); 492 ip = sgd->gd_ipiq; 493 if (ip != NULL) { 494 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 495 ; 496 } 497 } 498 mask &= ~CPUMASK(n); 499 } 500 501 /* 502 * Process pending cpusyncs. If the current thread has a cpusync 503 * active cpusync we only run the list once and do not re-flag 504 * as the thread itself is processing its interlock. 505 */ 506 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 507 if (gd->gd_curthread->td_cscount == 0) 508 goto again; 509 /* need_ipiq(); do not reflag */ 510 } 511 512 /* 513 * Interlock to allow more IPI interrupts. Recheck ipimask after 514 * releasing gd_npoll. 515 */ 516 if (gd->gd_ipimask) 517 goto again; 518 atomic_poll_release_int(&gd->gd_npoll); 519 cpu_mfence(); 520 if (gd->gd_ipimask) 521 goto again; 522 --gd->gd_processing_ipiq; 523 } 524 525 void 526 lwkt_process_ipiq_frame(struct intrframe *frame) 527 { 528 globaldata_t gd = mycpu; 529 globaldata_t sgd; 530 lwkt_ipiq_t ip; 531 cpumask_t mask; 532 int n; 533 534 again: 535 cpu_lfence(); 536 mask = gd->gd_ipimask; 537 atomic_clear_cpumask(&gd->gd_ipimask, mask); 538 while (mask) { 539 n = BSFCPUMASK(mask); 540 if (n != gd->gd_cpuid) { 541 sgd = globaldata_find(n); 542 ip = sgd->gd_ipiq; 543 if (ip != NULL) { 544 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 545 ; 546 } 547 } 548 mask &= ~CPUMASK(n); 549 } 550 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 551 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 552 if (gd->gd_curthread->td_cscount == 0) 553 goto again; 554 /* need_ipiq(); do not reflag */ 555 } 556 } 557 558 /* 559 * Interlock to allow more IPI interrupts. Recheck ipimask after 560 * releasing gd_npoll. 561 */ 562 if (gd->gd_ipimask) 563 goto again; 564 atomic_poll_release_int(&gd->gd_npoll); 565 cpu_mfence(); 566 if (gd->gd_ipimask) 567 goto again; 568 } 569 570 #if 0 571 static int iqticks[SMP_MAXCPU]; 572 static int iqcount[SMP_MAXCPU]; 573 #endif 574 #if 0 575 static int iqterm[SMP_MAXCPU]; 576 #endif 577 578 static int 579 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 580 struct intrframe *frame) 581 { 582 globaldata_t mygd = mycpu; 583 int ri; 584 int wi; 585 ipifunc3_t copy_func; 586 void *copy_arg1; 587 int copy_arg2; 588 589 #if 0 590 if (iqticks[mygd->gd_cpuid] != ticks) { 591 iqticks[mygd->gd_cpuid] = ticks; 592 iqcount[mygd->gd_cpuid] = 0; 593 } 594 if (++iqcount[mygd->gd_cpuid] > 3000000) { 595 kprintf("cpu %d ipiq maxed cscount %d spin %d\n", 596 mygd->gd_cpuid, 597 mygd->gd_curthread->td_cscount, 598 mygd->gd_spinlocks); 599 iqcount[mygd->gd_cpuid] = 0; 600 #if 0 601 if (++iqterm[mygd->gd_cpuid] > 10) 602 panic("cpu %d ipiq maxed", mygd->gd_cpuid); 603 #endif 604 int i; 605 for (i = 0; i < ncpus; ++i) { 606 if (globaldata_find(i)->gd_infomsg) 607 kprintf(" %s", globaldata_find(i)->gd_infomsg); 608 } 609 kprintf("\n"); 610 } 611 #endif 612 613 /* 614 * Clear the originating core from our ipimask, we will process all 615 * incoming messages. 616 * 617 * Obtain the current write index, which is modified by a remote cpu. 618 * Issue a load fence to prevent speculative reads of e.g. data written 619 * by the other cpu prior to it updating the index. 620 */ 621 KKASSERT(curthread->td_critcount); 622 wi = ip->ip_windex; 623 cpu_lfence(); 624 ++mygd->gd_intr_nesting_level; 625 626 /* 627 * NOTE: xindex is only updated after we are sure the function has 628 * finished execution. Beware lwkt_process_ipiq() reentrancy! 629 * The function may send an IPI which may block/drain. 630 * 631 * NOTE: Due to additional IPI operations that the callback function 632 * may make, it is possible for both rindex and windex to advance and 633 * thus for rindex to advance passed our cached windex. 634 * 635 * NOTE: A load fence is required to prevent speculative loads prior 636 * to the loading of ip_rindex. Even though stores might be 637 * ordered, loads are probably not. A memory fence is required 638 * to prevent reordering of the loads after the ip_rindex update. 639 * 640 * NOTE: Single pass only. Returns non-zero if the queue is not empty 641 * on return. 642 */ 643 while (wi - (ri = ip->ip_rindex) > 0) { 644 ri &= MAXCPUFIFO_MASK; 645 cpu_lfence(); 646 copy_func = ip->ip_info[ri].func; 647 copy_arg1 = ip->ip_info[ri].arg1; 648 copy_arg2 = ip->ip_info[ri].arg2; 649 cpu_mfence(); 650 ++ip->ip_rindex; 651 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == 652 ((ri + 1) & MAXCPUFIFO_MASK)); 653 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 654 #ifdef INVARIANTS 655 if (ipiq_debug && (ip->ip_rindex & 0xFFFFFF) == 0) { 656 kprintf("cpu %d ipifunc %p %p %d (frame %p)\n", 657 mycpu->gd_cpuid, 658 copy_func, copy_arg1, copy_arg2, 659 #if defined(__i386__) 660 (frame ? (void *)frame->if_eip : NULL)); 661 #elif defined(__amd64__) 662 (frame ? (void *)frame->if_rip : NULL)); 663 #else 664 NULL); 665 #endif 666 } 667 #endif 668 copy_func(copy_arg1, copy_arg2, frame); 669 cpu_sfence(); 670 ip->ip_xindex = ip->ip_rindex; 671 672 #ifdef PANIC_DEBUG 673 /* 674 * Simulate panics during the processing of an IPI 675 */ 676 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 677 if (--panic_ipiq_count == 0) { 678 #ifdef DDB 679 Debugger("PANIC_DEBUG"); 680 #else 681 panic("PANIC_DEBUG"); 682 #endif 683 } 684 } 685 #endif 686 } 687 --mygd->gd_intr_nesting_level; 688 689 /* 690 * Return non-zero if there is still more in the queue. 691 */ 692 cpu_lfence(); 693 return (ip->ip_rindex != ip->ip_windex); 694 } 695 696 static void 697 lwkt_sync_ipiq(void *arg) 698 { 699 volatile cpumask_t *cpumask = arg; 700 701 atomic_clear_cpumask(cpumask, mycpu->gd_cpumask); 702 if (*cpumask == 0) 703 wakeup(cpumask); 704 } 705 706 void 707 lwkt_synchronize_ipiqs(const char *wmesg) 708 { 709 volatile cpumask_t other_cpumask; 710 711 other_cpumask = mycpu->gd_other_cpus & smp_active_mask; 712 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, 713 __DEVOLATILE(void *, &other_cpumask)); 714 715 while (other_cpumask != 0) { 716 tsleep_interlock(&other_cpumask, 0); 717 if (other_cpumask != 0) 718 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0); 719 } 720 } 721 722 /* 723 * CPU Synchronization Support 724 * 725 * lwkt_cpusync_interlock() - Place specified cpus in a quiescent state. 726 * The current cpu is placed in a hard critical 727 * section. 728 * 729 * lwkt_cpusync_deinterlock() - Execute cs_func on specified cpus, including 730 * current cpu if specified, then return. 731 */ 732 void 733 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *arg) 734 { 735 struct lwkt_cpusync cs; 736 737 lwkt_cpusync_init(&cs, mask, func, arg); 738 lwkt_cpusync_interlock(&cs); 739 lwkt_cpusync_deinterlock(&cs); 740 } 741 742 743 void 744 lwkt_cpusync_interlock(lwkt_cpusync_t cs) 745 { 746 #if 0 747 const char *smsg = "SMPSYNL"; 748 #endif 749 globaldata_t gd = mycpu; 750 cpumask_t mask; 751 752 /* 753 * mask acknowledge (cs_mack): 0->mask for stage 1 754 * 755 * mack does not include the current cpu. 756 */ 757 mask = cs->cs_mask & gd->gd_other_cpus & smp_active_mask; 758 cs->cs_mack = 0; 759 crit_enter_id("cpusync"); 760 if (mask) { 761 DEBUG_PUSH_INFO("cpusync_interlock"); 762 ++ipiq_cscount; 763 ++gd->gd_curthread->td_cscount; 764 lwkt_send_ipiq_mask(mask, (ipifunc1_t)lwkt_cpusync_remote1, cs); 765 logipiq2(sync_start, (long)mask); 766 #if 0 767 if (gd->gd_curthread->td_wmesg == NULL) 768 gd->gd_curthread->td_wmesg = smsg; 769 #endif 770 while (cs->cs_mack != mask) { 771 lwkt_process_ipiq(); 772 cpu_pause(); 773 } 774 #if 0 775 if (gd->gd_curthread->td_wmesg == smsg) 776 gd->gd_curthread->td_wmesg = NULL; 777 #endif 778 DEBUG_POP_INFO(); 779 } 780 } 781 782 /* 783 * Interlocked cpus have executed remote1 and are polling in remote2. 784 * To deinterlock we clear cs_mack and wait for the cpus to execute 785 * the func and set their bit in cs_mack again. 786 * 787 */ 788 void 789 lwkt_cpusync_deinterlock(lwkt_cpusync_t cs) 790 { 791 globaldata_t gd = mycpu; 792 #if 0 793 const char *smsg = "SMPSYNU"; 794 #endif 795 cpumask_t mask; 796 797 /* 798 * mask acknowledge (cs_mack): mack->0->mack for stage 2 799 * 800 * Clearing cpu bits for polling cpus in cs_mack will cause them to 801 * execute stage 2, which executes the cs_func(cs_data) and then sets 802 * their bit in cs_mack again. 803 * 804 * mack does not include the current cpu. 805 */ 806 mask = cs->cs_mack; 807 cpu_ccfence(); 808 cs->cs_mack = 0; 809 cpu_ccfence(); 810 if (cs->cs_func && (cs->cs_mask & gd->gd_cpumask)) 811 cs->cs_func(cs->cs_data); 812 if (mask) { 813 DEBUG_PUSH_INFO("cpusync_deinterlock"); 814 #if 0 815 if (gd->gd_curthread->td_wmesg == NULL) 816 gd->gd_curthread->td_wmesg = smsg; 817 #endif 818 while (cs->cs_mack != mask) { 819 lwkt_process_ipiq(); 820 cpu_pause(); 821 } 822 #if 0 823 if (gd->gd_curthread->td_wmesg == smsg) 824 gd->gd_curthread->td_wmesg = NULL; 825 #endif 826 DEBUG_POP_INFO(); 827 /* 828 * cpusyncq ipis may be left queued without the RQF flag set due to 829 * a non-zero td_cscount, so be sure to process any laggards after 830 * decrementing td_cscount. 831 */ 832 --gd->gd_curthread->td_cscount; 833 lwkt_process_ipiq(); 834 logipiq2(sync_end, (long)mask); 835 } 836 crit_exit_id("cpusync"); 837 } 838 839 /* 840 * helper IPI remote messaging function. 841 * 842 * Called on remote cpu when a new cpu synchronization request has been 843 * sent to us. Execute the run function and adjust cs_count, then requeue 844 * the request so we spin on it. 845 */ 846 static void 847 lwkt_cpusync_remote1(lwkt_cpusync_t cs) 848 { 849 globaldata_t gd = mycpu; 850 851 atomic_set_cpumask(&cs->cs_mack, gd->gd_cpumask); 852 lwkt_cpusync_remote2(cs); 853 } 854 855 /* 856 * helper IPI remote messaging function. 857 * 858 * Poll for the originator telling us to finish. If it hasn't, requeue 859 * our request so we spin on it. 860 */ 861 static void 862 lwkt_cpusync_remote2(lwkt_cpusync_t cs) 863 { 864 globaldata_t gd = mycpu; 865 866 if ((cs->cs_mack & gd->gd_cpumask) == 0) { 867 if (cs->cs_func) 868 cs->cs_func(cs->cs_data); 869 atomic_set_cpumask(&cs->cs_mack, gd->gd_cpumask); 870 /* cs can be ripped out at this point */ 871 } else { 872 lwkt_ipiq_t ip; 873 int wi; 874 875 ip = &gd->gd_cpusyncq; 876 wi = ip->ip_windex & MAXCPUFIFO_MASK; 877 ip->ip_info[wi].func = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 878 ip->ip_info[wi].arg1 = cs; 879 ip->ip_info[wi].arg2 = 0; 880 cpu_sfence(); 881 KKASSERT(ip->ip_windex - ip->ip_rindex < MAXCPUFIFO); 882 ++ip->ip_windex; 883 if (ipiq_debug && (ip->ip_windex & 0xFFFFFF) == 0) { 884 kprintf("cpu %d cm=%016jx %016jx f=%p\n", 885 gd->gd_cpuid, 886 (intmax_t)cs->cs_mask, (intmax_t)cs->cs_mack, 887 cs->cs_func); 888 } 889 } 890 } 891