1 /* 2 * Copyright (c) 2003-2011 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * pmap invalidation support code. Certain hardware requirements must 37 * be dealt with when manipulating page table entries and page directory 38 * entries within a pmap. In particular, we cannot safely manipulate 39 * page tables which are in active use by another cpu (even if it is 40 * running in userland) for two reasons: First, TLB writebacks will 41 * race against our own modifications and tests. Second, even if we 42 * were to use bus-locked instruction we can still screw up the 43 * target cpu's instruction pipeline due to Intel cpu errata. 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/vmmeter.h> 51 #include <sys/thread2.h> 52 #include <sys/sysctl.h> 53 54 #include <vm/vm.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_object.h> 57 58 #include <machine/cputypes.h> 59 #include <machine/md_var.h> 60 #include <machine/specialreg.h> 61 #include <machine/smp.h> 62 #include <machine/globaldata.h> 63 #include <machine/pmap.h> 64 #include <machine/pmap_inval.h> 65 #include <machine/clock.h> 66 67 #if 1 /* DEBUGGING */ 68 #define LOOPRECOVER /* enable watchdog */ 69 #endif 70 71 /* 72 * Watchdog recovery interval, in seconds. 73 * 74 * The watchdog value is generous for two reasons. First, because the 75 * situation is not supposed to happen at all (but does), and second, 76 * because VMs could be very slow at handling IPIs. 77 */ 78 #define LOOPRECOVER_TIMEOUT1 2 /* initial recovery */ 79 #define LOOPRECOVER_TIMEOUT2 1 /* repeated recoveries */ 80 81 #define MAX_INVAL_PAGES 128 82 83 struct pmap_inval_info { 84 vm_offset_t va; 85 pt_entry_t *ptep; 86 pt_entry_t opte; 87 pt_entry_t npte; 88 enum { INVDONE, INVSTORE, INVCMPSET } mode; 89 int success; 90 int npgs; 91 cpumask_t done; 92 cpumask_t mask; 93 #ifdef LOOPRECOVER 94 cpumask_t sigmask; 95 int failed; 96 int64_t tsc_target; 97 #endif 98 } __cachealign; 99 100 typedef struct pmap_inval_info pmap_inval_info_t; 101 102 static pmap_inval_info_t invinfo[MAXCPU]; 103 extern cpumask_t smp_invmask; 104 #ifdef LOOPRECOVER 105 #ifdef LOOPMASK_IN 106 extern cpumask_t smp_in_mask; 107 #endif 108 extern cpumask_t smp_smurf_mask; 109 #endif 110 static long pmap_inval_bulk_count; 111 static int pmap_inval_watchdog_print; /* must always default off */ 112 113 SYSCTL_LONG(_machdep, OID_AUTO, pmap_inval_bulk_count, CTLFLAG_RW, 114 &pmap_inval_bulk_count, 0, ""); 115 SYSCTL_INT(_machdep, OID_AUTO, pmap_inval_watchdog_print, CTLFLAG_RW, 116 &pmap_inval_watchdog_print, 0, ""); 117 118 static void 119 pmap_inval_init(pmap_t pmap) 120 { 121 cpulock_t olock; 122 cpulock_t nlock; 123 124 crit_enter_id("inval"); 125 126 if (pmap != &kernel_pmap) { 127 for (;;) { 128 olock = pmap->pm_active_lock; 129 cpu_ccfence(); 130 nlock = olock | CPULOCK_EXCL; 131 if (olock != nlock && 132 atomic_cmpset_int(&pmap->pm_active_lock, 133 olock, nlock)) { 134 break; 135 } 136 lwkt_process_ipiq(); 137 cpu_pause(); 138 } 139 atomic_add_acq_long(&pmap->pm_invgen, 1); 140 } 141 } 142 143 static void 144 pmap_inval_done(pmap_t pmap) 145 { 146 if (pmap != &kernel_pmap) { 147 atomic_add_acq_long(&pmap->pm_invgen, 1); 148 atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL); 149 } 150 crit_exit_id("inval"); 151 } 152 153 #ifdef LOOPRECOVER 154 155 /* 156 * Debugging and lost IPI recovery code. 157 */ 158 static 159 __inline 160 int 161 loopwdog(struct pmap_inval_info *info) 162 { 163 int64_t tsc; 164 165 tsc = rdtsc(); 166 if (info->tsc_target - tsc < 0 && tsc_frequency) { 167 info->tsc_target = tsc + (tsc_frequency * LOOPRECOVER_TIMEOUT2); 168 return 1; 169 } 170 return 0; 171 } 172 173 static 174 void 175 loopdebug(const char *msg, pmap_inval_info_t *info) 176 { 177 int p; 178 int cpu = mycpu->gd_cpuid; 179 180 /* 181 * Don't kprintf() anything if the pmap inval watchdog gets hit. 182 * DRM can cause an occassional watchdog hit (at least with a 1/16 183 * second watchdog), and attempting to kprintf to the KVM frame buffer 184 * from Xinvltlb, which ignores critical sections, can implode the 185 * system. 186 */ 187 if (pmap_inval_watchdog_print == 0) 188 return; 189 190 cpu_lfence(); 191 #ifdef LOOPRECOVER 192 atomic_add_long(&smp_smurf_mask.ary[0], 0); 193 #endif 194 kprintf("ipilost-%s! %d mode=%d m=%08jx d=%08jx " 195 #ifdef LOOPRECOVER 196 "s=%08jx " 197 #endif 198 #ifdef LOOPMASK_IN 199 "in=%08jx " 200 #endif 201 #ifdef LOOPRECOVER 202 "smurf=%08jx\n" 203 #endif 204 , msg, cpu, info->mode, 205 info->mask.ary[0], 206 info->done.ary[0] 207 #ifdef LOOPRECOVER 208 , info->sigmask.ary[0] 209 #endif 210 #ifdef LOOPMASK_IN 211 , smp_in_mask.ary[0] 212 #endif 213 #ifdef LOOPRECOVER 214 , smp_smurf_mask.ary[0] 215 #endif 216 ); 217 kprintf("mdglob "); 218 for (p = 0; p < ncpus; ++p) 219 kprintf(" %d", CPU_prvspace[p]->mdglobaldata.gd_xinvaltlb); 220 kprintf("\n"); 221 } 222 223 #endif 224 225 #ifdef CHECKSIG 226 227 #define CHECKSIGMASK(info) _checksigmask(info, __FILE__, __LINE__) 228 229 static 230 void 231 _checksigmask(pmap_inval_info_t *info, const char *file, int line) 232 { 233 cpumask_t tmp; 234 235 tmp = info->mask; 236 CPUMASK_ANDMASK(tmp, info->sigmask); 237 if (CPUMASK_CMPMASKNEQ(tmp, info->mask)) { 238 kprintf("\"%s\" line %d: bad sig/mask %08jx %08jx\n", 239 file, line, info->sigmask.ary[0], info->mask.ary[0]); 240 } 241 } 242 243 #else 244 245 #define CHECKSIGMASK(info) 246 247 #endif 248 249 /* 250 * Invalidate the specified va across all cpus associated with the pmap. 251 * If va == (vm_offset_t)-1, we invltlb() instead of invlpg(). The operation 252 * will be done fully synchronously with storing npte into *ptep and returning 253 * opte. 254 * 255 * If ptep is NULL the operation will execute semi-synchronously. 256 * ptep must be NULL if npgs > 1 257 */ 258 pt_entry_t 259 pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs, 260 pt_entry_t *ptep, pt_entry_t npte) 261 { 262 globaldata_t gd = mycpu; 263 pmap_inval_info_t *info; 264 pt_entry_t opte = 0; 265 int cpu = gd->gd_cpuid; 266 cpumask_t tmpmask; 267 unsigned long rflags; 268 269 /* 270 * Initialize invalidation for pmap and enter critical section. 271 */ 272 if (pmap == NULL) 273 pmap = &kernel_pmap; 274 pmap_inval_init(pmap); 275 276 /* 277 * Shortcut single-cpu case if possible. 278 */ 279 if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { 280 /* 281 * Convert to invltlb if there are too many pages to 282 * invlpg on. 283 */ 284 if (npgs > MAX_INVAL_PAGES) { 285 npgs = 0; 286 va = (vm_offset_t)-1; 287 } 288 289 /* 290 * Invalidate the specified pages, handle invltlb if requested. 291 */ 292 while (npgs) { 293 --npgs; 294 if (ptep) { 295 opte = atomic_swap_long(ptep, npte); 296 ++ptep; 297 } 298 if (va == (vm_offset_t)-1) 299 break; 300 cpu_invlpg((void *)va); 301 va += PAGE_SIZE; 302 } 303 if (va == (vm_offset_t)-1) 304 cpu_invltlb(); 305 pmap_inval_done(pmap); 306 307 return opte; 308 } 309 310 /* 311 * We need a critical section to prevent getting preempted while 312 * we setup our command. A preemption might execute its own 313 * pmap_inval*() command and create confusion below. 314 * 315 * tsc_target is our watchdog timeout that will attempt to recover 316 * from a lost IPI. Set to 1/16 second for now. 317 */ 318 info = &invinfo[cpu]; 319 info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1); 320 321 /* 322 * We must wait for other cpus which may still be finishing up a 323 * prior operation that we requested. 324 * 325 * We do not have to disable interrupts here. An Xinvltlb can occur 326 * at any time (even within a critical section), but it will not 327 * act on our command until we set our done bits. 328 */ 329 while (CPUMASK_TESTNZERO(info->done)) { 330 #ifdef LOOPRECOVER 331 if (loopwdog(info)) { 332 info->failed = 1; 333 loopdebug("A", info); 334 /* XXX recover from possible bug */ 335 CPUMASK_ASSZERO(info->done); 336 } 337 #endif 338 cpu_pause(); 339 } 340 KKASSERT(info->mode == INVDONE); 341 342 /* 343 * Must set our cpu in the invalidation scan mask before 344 * any possibility of [partial] execution (remember, XINVLTLB 345 * can interrupt a critical section). 346 */ 347 ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); 348 349 info->va = va; 350 info->npgs = npgs; 351 info->ptep = ptep; 352 info->npte = npte; 353 info->opte = 0; 354 #ifdef LOOPRECOVER 355 info->failed = 0; 356 #endif 357 info->mode = INVSTORE; 358 359 tmpmask = pmap->pm_active; /* volatile (bits may be cleared) */ 360 cpu_ccfence(); 361 CPUMASK_ANDMASK(tmpmask, smp_active_mask); 362 363 /* 364 * If ptep is NULL the operation can be semi-synchronous, which means 365 * we can improve performance by flagging and removing idle cpus 366 * (see the idleinvlclr function in mp_machdep.c). 367 * 368 * Typically kernel page table operation is semi-synchronous. 369 */ 370 if (ptep == NULL) 371 smp_smurf_idleinvlclr(&tmpmask); 372 CPUMASK_ORBIT(tmpmask, cpu); 373 info->mask = tmpmask; 374 375 /* 376 * Command may start executing the moment 'done' is initialized, 377 * disable current cpu interrupt to prevent 'done' field from 378 * changing (other cpus can't clear done bits until the originating 379 * cpu clears its mask bit, but other cpus CAN start clearing their 380 * mask bits). 381 */ 382 #ifdef LOOPRECOVER 383 info->sigmask = tmpmask; 384 CHECKSIGMASK(info); 385 #endif 386 cpu_sfence(); 387 rflags = read_rflags(); 388 cpu_disable_intr(); 389 390 ATOMIC_CPUMASK_COPY(info->done, tmpmask); 391 /* execution can begin here due to races */ 392 393 /* 394 * Pass our copy of the done bits (so they don't change out from 395 * under us) to generate the Xinvltlb interrupt on the targets. 396 */ 397 smp_invlpg(&tmpmask); 398 opte = info->opte; 399 KKASSERT(info->mode == INVDONE); 400 401 /* 402 * Target cpus will be in their loop exiting concurrently with our 403 * cleanup. They will not lose the bitmask they obtained before so 404 * we can safely clear this bit. 405 */ 406 ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); 407 write_rflags(rflags); 408 pmap_inval_done(pmap); 409 410 return opte; 411 } 412 413 /* 414 * API function - invalidate the pte at (va) and replace *ptep with npte 415 * atomically only if *ptep equals opte, across the pmap's active cpus. 416 * 417 * Returns 1 on success, 0 on failure (caller typically retries). 418 */ 419 int 420 pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, 421 pt_entry_t opte, pt_entry_t npte) 422 { 423 globaldata_t gd = mycpu; 424 pmap_inval_info_t *info; 425 int success; 426 int cpu = gd->gd_cpuid; 427 cpumask_t tmpmask; 428 unsigned long rflags; 429 430 /* 431 * Initialize invalidation for pmap and enter critical section. 432 */ 433 if (pmap == NULL) 434 pmap = &kernel_pmap; 435 pmap_inval_init(pmap); 436 437 /* 438 * Shortcut single-cpu case if possible. 439 */ 440 if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { 441 if (atomic_cmpset_long(ptep, opte, npte)) { 442 if (va == (vm_offset_t)-1) 443 cpu_invltlb(); 444 else 445 cpu_invlpg((void *)va); 446 pmap_inval_done(pmap); 447 return 1; 448 } else { 449 pmap_inval_done(pmap); 450 return 0; 451 } 452 } 453 454 /* 455 * We need a critical section to prevent getting preempted while 456 * we setup our command. A preemption might execute its own 457 * pmap_inval*() command and create confusion below. 458 */ 459 info = &invinfo[cpu]; 460 info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1); 461 462 /* 463 * We must wait for other cpus which may still be finishing 464 * up a prior operation. 465 */ 466 while (CPUMASK_TESTNZERO(info->done)) { 467 #ifdef LOOPRECOVER 468 if (loopwdog(info)) { 469 info->failed = 1; 470 loopdebug("B", info); 471 /* XXX recover from possible bug */ 472 CPUMASK_ASSZERO(info->done); 473 } 474 #endif 475 cpu_pause(); 476 } 477 KKASSERT(info->mode == INVDONE); 478 479 /* 480 * Must set our cpu in the invalidation scan mask before 481 * any possibility of [partial] execution (remember, XINVLTLB 482 * can interrupt a critical section). 483 */ 484 ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); 485 486 info->va = va; 487 info->npgs = 1; /* unused */ 488 info->ptep = ptep; 489 info->npte = npte; 490 info->opte = opte; 491 #ifdef LOOPRECOVER 492 info->failed = 0; 493 #endif 494 info->mode = INVCMPSET; 495 info->success = 0; 496 497 tmpmask = pmap->pm_active; /* volatile */ 498 cpu_ccfence(); 499 CPUMASK_ANDMASK(tmpmask, smp_active_mask); 500 CPUMASK_ORBIT(tmpmask, cpu); 501 info->mask = tmpmask; 502 503 /* 504 * Command may start executing the moment 'done' is initialized, 505 * disable current cpu interrupt to prevent 'done' field from 506 * changing (other cpus can't clear done bits until the originating 507 * cpu clears its mask bit). 508 */ 509 #ifdef LOOPRECOVER 510 info->sigmask = tmpmask; 511 CHECKSIGMASK(info); 512 #endif 513 cpu_sfence(); 514 rflags = read_rflags(); 515 cpu_disable_intr(); 516 517 ATOMIC_CPUMASK_COPY(info->done, tmpmask); 518 519 /* 520 * Pass our copy of the done bits (so they don't change out from 521 * under us) to generate the Xinvltlb interrupt on the targets. 522 */ 523 smp_invlpg(&tmpmask); 524 success = info->success; 525 KKASSERT(info->mode == INVDONE); 526 527 ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); 528 write_rflags(rflags); 529 pmap_inval_done(pmap); 530 531 return success; 532 } 533 534 void 535 pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap) 536 { 537 bulk->pmap = pmap; 538 bulk->va_beg = 0; 539 bulk->va_end = 0; 540 bulk->count = 0; 541 } 542 543 pt_entry_t 544 pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va, 545 pt_entry_t *ptep, pt_entry_t npte) 546 { 547 pt_entry_t pte; 548 549 /* 550 * Degenerate case, localized or we don't care (e.g. because we 551 * are jacking the entire page table) or the pmap is not in-use 552 * by anyone. No invalidations are done on any cpu. 553 */ 554 if (bulk == NULL) { 555 pte = atomic_swap_long(ptep, npte); 556 return pte; 557 } 558 559 /* 560 * If it isn't the kernel pmap we execute the operation synchronously 561 * on all cpus belonging to the pmap, which avoids concurrency bugs in 562 * the hw related to changing pte's out from under threads. 563 * 564 * Eventually I would like to implement streaming pmap invalidation 565 * for user pmaps to reduce mmap/munmap overheads for heavily-loaded 566 * threaded programs. 567 */ 568 if (bulk->pmap != &kernel_pmap) { 569 pte = pmap_inval_smp(bulk->pmap, va, 1, ptep, npte); 570 return pte; 571 } 572 573 /* 574 * This is the kernel_pmap. All unmap operations presume that there 575 * are no other cpus accessing the addresses in question. Implement 576 * the bulking algorithm. collect the required information and 577 * synchronize once at the end. 578 */ 579 pte = atomic_swap_long(ptep, npte); 580 if (va == (vm_offset_t)-1) { 581 bulk->va_beg = va; 582 } else if (bulk->va_beg == bulk->va_end) { 583 bulk->va_beg = va; 584 bulk->va_end = va + PAGE_SIZE; 585 } else if (va == bulk->va_end) { 586 bulk->va_end = va + PAGE_SIZE; 587 } else { 588 bulk->va_beg = (vm_offset_t)-1; 589 bulk->va_end = 0; 590 #if 0 591 pmap_inval_bulk_flush(bulk); 592 bulk->count = 1; 593 if (va == (vm_offset_t)-1) { 594 bulk->va_beg = va; 595 bulk->va_end = 0; 596 } else { 597 bulk->va_beg = va; 598 bulk->va_end = va + PAGE_SIZE; 599 } 600 #endif 601 } 602 ++bulk->count; 603 604 return pte; 605 } 606 607 void 608 pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk) 609 { 610 if (bulk == NULL) 611 return; 612 if (bulk->count > 0) 613 pmap_inval_bulk_count += (bulk->count - 1); 614 if (bulk->va_beg != bulk->va_end) { 615 if (bulk->va_beg == (vm_offset_t)-1) { 616 pmap_inval_smp(bulk->pmap, bulk->va_beg, 1, NULL, 0); 617 } else { 618 long n; 619 620 n = (bulk->va_end - bulk->va_beg) >> PAGE_SHIFT; 621 pmap_inval_smp(bulk->pmap, bulk->va_beg, n, NULL, 0); 622 } 623 } 624 bulk->va_beg = 0; 625 bulk->va_end = 0; 626 bulk->count = 0; 627 } 628 629 /* 630 * Called with a critical section held and interrupts enabled. 631 */ 632 int 633 pmap_inval_intr(cpumask_t *cpumaskp, int toolong) 634 { 635 globaldata_t gd = mycpu; 636 pmap_inval_info_t *info; 637 int loopme = 0; 638 int cpu; 639 cpumask_t cpumask; 640 641 /* 642 * Check all cpus for invalidations we may need to service. 643 */ 644 cpu_ccfence(); 645 cpu = gd->gd_cpuid; 646 cpumask = *cpumaskp; 647 648 while (CPUMASK_TESTNZERO(cpumask)) { 649 int n = BSFCPUMASK(cpumask); 650 651 #ifdef LOOPRECOVER 652 KKASSERT(n >= 0 && n < MAXCPU); 653 #endif 654 655 CPUMASK_NANDBIT(cpumask, n); 656 info = &invinfo[n]; 657 658 /* 659 * Due to interrupts/races we can catch a new operation 660 * in an older interrupt. A fence is needed once we detect 661 * the (not) done bit. 662 */ 663 if (!CPUMASK_TESTBIT(info->done, cpu)) 664 continue; 665 cpu_lfence(); 666 #ifdef LOOPRECOVER 667 if (toolong) { 668 kprintf("pminvl %d->%d %08jx %08jx mode=%d\n", 669 cpu, n, info->done.ary[0], info->mask.ary[0], 670 info->mode); 671 } 672 #endif 673 674 /* 675 * info->mask and info->done always contain the originating 676 * cpu until the originator is done. Targets may still be 677 * present in info->done after the originator is done (they 678 * will be finishing up their loops). 679 * 680 * Clear info->mask bits on other cpus to indicate that they 681 * have quiesced (entered the loop). Once the other mask bits 682 * are clear we can execute the operation on the original, 683 * then clear the mask and done bits on the originator. The 684 * targets will then finish up their side and clear their 685 * done bits. 686 * 687 * The command is considered 100% done when all done bits have 688 * been cleared. 689 */ 690 if (n != cpu) { 691 /* 692 * Command state machine for 'other' cpus. 693 */ 694 if (CPUMASK_TESTBIT(info->mask, cpu)) { 695 /* 696 * Other cpu indicate to originator that they 697 * are quiesced. 698 */ 699 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 700 loopme = 1; 701 } else if (info->ptep && 702 CPUMASK_TESTBIT(info->mask, n)) { 703 /* 704 * Other cpu must wait for the originator (n) 705 * to complete its command if ptep is not NULL. 706 */ 707 loopme = 1; 708 } else { 709 /* 710 * Other cpu detects that the originator has 711 * completed its command, or there was no 712 * command. 713 * 714 * Now that the page table entry has changed, 715 * we can follow up with our own invalidation. 716 */ 717 vm_offset_t va = info->va; 718 int npgs; 719 720 if (va == (vm_offset_t)-1 || 721 info->npgs > MAX_INVAL_PAGES) { 722 cpu_invltlb(); 723 } else { 724 for (npgs = info->npgs; npgs; --npgs) { 725 cpu_invlpg((void *)va); 726 va += PAGE_SIZE; 727 } 728 } 729 ATOMIC_CPUMASK_NANDBIT(info->done, cpu); 730 /* info invalid now */ 731 /* loopme left alone */ 732 } 733 } else if (CPUMASK_TESTBIT(info->mask, cpu)) { 734 /* 735 * Originator is waiting for other cpus 736 */ 737 if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) { 738 /* 739 * Originator waits for other cpus to enter 740 * their loop (aka quiesce). 741 * 742 * If this bugs out the IPI may have been lost, 743 * try to reissue by resetting our own 744 * reentrancy bit and clearing the smurf mask 745 * for the cpus that did not respond, then 746 * reissuing the IPI. 747 */ 748 loopme = 1; 749 #ifdef LOOPRECOVER 750 if (loopwdog(info)) { 751 info->failed = 1; 752 loopdebug("C", info); 753 /* XXX recover from possible bug */ 754 mdcpu->gd_xinvaltlb = 0; 755 ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask, 756 info->mask); 757 cpu_disable_intr(); 758 smp_invlpg(&smp_active_mask); 759 760 /* 761 * Force outer-loop retest of Xinvltlb 762 * requests (see mp_machdep.c). 763 */ 764 mdcpu->gd_xinvaltlb = 2; 765 cpu_enable_intr(); 766 } 767 #endif 768 } else { 769 /* 770 * Originator executes operation and clears 771 * mask to allow other cpus to finish. 772 */ 773 KKASSERT(info->mode != INVDONE); 774 if (info->mode == INVSTORE) { 775 if (info->ptep) 776 info->opte = atomic_swap_long(info->ptep, info->npte); 777 CHECKSIGMASK(info); 778 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 779 CHECKSIGMASK(info); 780 } else { 781 if (atomic_cmpset_long(info->ptep, 782 info->opte, info->npte)) { 783 info->success = 1; 784 } else { 785 info->success = 0; 786 } 787 CHECKSIGMASK(info); 788 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 789 CHECKSIGMASK(info); 790 } 791 loopme = 1; 792 } 793 } else { 794 /* 795 * Originator does not have to wait for the other 796 * cpus to finish. It clears its done bit. A new 797 * command will not be initiated by the originator 798 * until the other cpus have cleared their done bits 799 * (asynchronously). 800 */ 801 vm_offset_t va = info->va; 802 int npgs; 803 804 if (va == (vm_offset_t)-1 || 805 info->npgs > MAX_INVAL_PAGES) { 806 cpu_invltlb(); 807 } else { 808 for (npgs = info->npgs; npgs; --npgs) { 809 cpu_invlpg((void *)va); 810 va += PAGE_SIZE; 811 } 812 } 813 814 /* leave loopme alone */ 815 /* other cpus may still be finishing up */ 816 /* can't race originator since that's us */ 817 info->mode = INVDONE; 818 ATOMIC_CPUMASK_NANDBIT(info->done, cpu); 819 } 820 } 821 return loopme; 822 } 823