1 /* 2 * Copyright (c) 2003-2011 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * pmap invalidation support code. Certain hardware requirements must 37 * be dealt with when manipulating page table entries and page directory 38 * entries within a pmap. In particular, we cannot safely manipulate 39 * page tables which are in active use by another cpu (even if it is 40 * running in userland) for two reasons: First, TLB writebacks will 41 * race against our own modifications and tests. Second, even if we 42 * were to use bus-locked instruction we can still screw up the 43 * target cpu's instruction pipeline due to Intel cpu errata. 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/vmmeter.h> 51 #include <sys/thread2.h> 52 #include <sys/sysctl.h> 53 54 #include <vm/vm.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_object.h> 57 58 #include <machine/cputypes.h> 59 #include <machine/md_var.h> 60 #include <machine/specialreg.h> 61 #include <machine/smp.h> 62 #include <machine/globaldata.h> 63 #include <machine/pmap.h> 64 #include <machine/pmap_inval.h> 65 66 #if 1 /* DEBUGGING */ 67 #define LOOPMASK (/* 32 * */ 16 * 128 * 1024 - 1) 68 #endif 69 70 #define MAX_INVAL_PAGES 128 71 72 struct pmap_inval_info { 73 vm_offset_t va; 74 pt_entry_t *ptep; 75 pt_entry_t opte; 76 pt_entry_t npte; 77 enum { INVDONE, INVSTORE, INVCMPSET } mode; 78 int success; 79 int npgs; 80 cpumask_t done; 81 cpumask_t mask; 82 #ifdef LOOPMASK 83 cpumask_t sigmask; 84 int failed; 85 int xloops; 86 #endif 87 } __cachealign; 88 89 typedef struct pmap_inval_info pmap_inval_info_t; 90 91 static pmap_inval_info_t invinfo[MAXCPU]; 92 extern cpumask_t smp_invmask; 93 #ifdef LOOPMASK 94 #ifdef LOOPMASK_IN 95 extern cpumask_t smp_in_mask; 96 #endif 97 extern cpumask_t smp_smurf_mask; 98 #endif 99 static long pmap_inval_bulk_count; 100 101 SYSCTL_LONG(_machdep, OID_AUTO, pmap_inval_bulk_count, CTLFLAG_RW, 102 &pmap_inval_bulk_count, 0, ""); 103 104 static void 105 pmap_inval_init(pmap_t pmap) 106 { 107 cpulock_t olock; 108 cpulock_t nlock; 109 110 crit_enter_id("inval"); 111 112 if (pmap != &kernel_pmap) { 113 for (;;) { 114 olock = pmap->pm_active_lock; 115 cpu_ccfence(); 116 nlock = olock | CPULOCK_EXCL; 117 if (olock != nlock && 118 atomic_cmpset_int(&pmap->pm_active_lock, 119 olock, nlock)) { 120 break; 121 } 122 lwkt_process_ipiq(); 123 cpu_pause(); 124 } 125 atomic_add_acq_long(&pmap->pm_invgen, 1); 126 } 127 } 128 129 static void 130 pmap_inval_done(pmap_t pmap) 131 { 132 if (pmap != &kernel_pmap) { 133 atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL); 134 atomic_add_acq_long(&pmap->pm_invgen, 1); 135 } 136 crit_exit_id("inval"); 137 } 138 139 /* 140 * API function - invalidation the pte at (va) and replace *ptep with 141 * npte atomically across the pmap's active cpus. 142 * 143 * This is a holy mess. 144 * 145 * Returns the previous contents of *ptep. 146 */ 147 static 148 void 149 loopdebug(const char *msg, pmap_inval_info_t *info) 150 { 151 int p; 152 int cpu = mycpu->gd_cpuid; 153 154 cpu_lfence(); 155 atomic_add_long(&smp_smurf_mask.ary[0], 0); 156 kprintf("%s %d mode=%d m=%08jx d=%08jx s=%08jx " 157 #ifdef LOOPMASK_IN 158 "in=%08jx " 159 #endif 160 "smurf=%08jx\n", 161 msg, cpu, info->mode, 162 info->mask.ary[0], 163 info->done.ary[0], 164 info->sigmask.ary[0], 165 #ifdef LOOPMASK_IN 166 smp_in_mask.ary[0], 167 #endif 168 smp_smurf_mask.ary[0]); 169 kprintf("mdglob "); 170 for (p = 0; p < ncpus; ++p) 171 kprintf(" %d", CPU_prvspace[p]->mdglobaldata.gd_xinvaltlb); 172 kprintf("\n"); 173 } 174 175 #ifdef CHECKSIG 176 177 #define CHECKSIGMASK(info) _checksigmask(info, __FILE__, __LINE__) 178 179 static 180 void 181 _checksigmask(pmap_inval_info_t *info, const char *file, int line) 182 { 183 cpumask_t tmp; 184 185 tmp = info->mask; 186 CPUMASK_ANDMASK(tmp, info->sigmask); 187 if (CPUMASK_CMPMASKNEQ(tmp, info->mask)) { 188 kprintf("\"%s\" line %d: bad sig/mask %08jx %08jx\n", 189 file, line, info->sigmask.ary[0], info->mask.ary[0]); 190 } 191 } 192 193 #else 194 195 #define CHECKSIGMASK(info) 196 197 #endif 198 199 /* 200 * Invalidate the specified va across all cpus associated with the pmap. 201 * If va == (vm_offset_t)-1, we invltlb() instead of invlpg(). The operation 202 * will be done fully synchronously with storing npte into *ptep and returning 203 * opte. 204 * 205 * If ptep is NULL the operation will execute semi-synchronously. 206 * ptep must be NULL if npgs > 1 207 */ 208 pt_entry_t 209 pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs, 210 pt_entry_t *ptep, pt_entry_t npte) 211 { 212 globaldata_t gd = mycpu; 213 pmap_inval_info_t *info; 214 pt_entry_t opte = 0; 215 int cpu = gd->gd_cpuid; 216 cpumask_t tmpmask; 217 unsigned long rflags; 218 219 /* 220 * Initialize invalidation for pmap and enter critical section. 221 */ 222 if (pmap == NULL) 223 pmap = &kernel_pmap; 224 pmap_inval_init(pmap); 225 226 /* 227 * Shortcut single-cpu case if possible. 228 */ 229 if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { 230 /* 231 * Convert to invltlb if there are too many pages to 232 * invlpg on. 233 */ 234 if (npgs > MAX_INVAL_PAGES) { 235 npgs = 0; 236 va = (vm_offset_t)-1; 237 } 238 239 /* 240 * Invalidate the specified pages, handle invltlb if requested. 241 */ 242 while (npgs) { 243 --npgs; 244 if (ptep) { 245 opte = atomic_swap_long(ptep, npte); 246 ++ptep; 247 } 248 if (va == (vm_offset_t)-1) 249 break; 250 cpu_invlpg((void *)va); 251 va += PAGE_SIZE; 252 } 253 if (va == (vm_offset_t)-1) 254 cpu_invltlb(); 255 pmap_inval_done(pmap); 256 257 return opte; 258 } 259 260 /* 261 * We need a critical section to prevent getting preempted while 262 * we setup our command. A preemption might execute its own 263 * pmap_inval*() command and create confusion below. 264 */ 265 info = &invinfo[cpu]; 266 267 /* 268 * We must wait for other cpus which may still be finishing up a 269 * prior operation that we requested. 270 * 271 * We do not have to disable interrupts here. An Xinvltlb can occur 272 * at any time (even within a critical section), but it will not 273 * act on our command until we set our done bits. 274 */ 275 while (CPUMASK_TESTNZERO(info->done)) { 276 #ifdef LOOPMASK 277 int loops; 278 279 loops = ++info->xloops; 280 if ((loops & LOOPMASK) == 0) { 281 info->failed = 1; 282 loopdebug("orig_waitA", info); 283 /* XXX recover from possible bug */ 284 CPUMASK_ASSZERO(info->done); 285 } 286 #endif 287 cpu_pause(); 288 } 289 KKASSERT(info->mode == INVDONE); 290 291 /* 292 * Must set our cpu in the invalidation scan mask before 293 * any possibility of [partial] execution (remember, XINVLTLB 294 * can interrupt a critical section). 295 */ 296 ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); 297 298 info->va = va; 299 info->npgs = npgs; 300 info->ptep = ptep; 301 info->npte = npte; 302 info->opte = 0; 303 #ifdef LOOPMASK 304 info->failed = 0; 305 #endif 306 info->mode = INVSTORE; 307 308 tmpmask = pmap->pm_active; /* volatile (bits may be cleared) */ 309 cpu_ccfence(); 310 CPUMASK_ANDMASK(tmpmask, smp_active_mask); 311 312 /* 313 * If ptep is NULL the operation can be semi-synchronous, which means 314 * we can improve performance by flagging and removing idle cpus 315 * (see the idleinvlclr function in mp_machdep.c). 316 * 317 * Typically kernel page table operation is semi-synchronous. 318 */ 319 if (ptep == NULL) 320 smp_smurf_idleinvlclr(&tmpmask); 321 CPUMASK_ORBIT(tmpmask, cpu); 322 info->mask = tmpmask; 323 324 /* 325 * Command may start executing the moment 'done' is initialized, 326 * disable current cpu interrupt to prevent 'done' field from 327 * changing (other cpus can't clear done bits until the originating 328 * cpu clears its mask bit, but other cpus CAN start clearing their 329 * mask bits). 330 */ 331 #ifdef LOOPMASK 332 info->sigmask = tmpmask; 333 CHECKSIGMASK(info); 334 #endif 335 cpu_sfence(); 336 rflags = read_rflags(); 337 cpu_disable_intr(); 338 339 ATOMIC_CPUMASK_COPY(info->done, tmpmask); 340 /* execution can begin here due to races */ 341 342 /* 343 * Pass our copy of the done bits (so they don't change out from 344 * under us) to generate the Xinvltlb interrupt on the targets. 345 */ 346 smp_invlpg(&tmpmask); 347 opte = info->opte; 348 KKASSERT(info->mode == INVDONE); 349 350 /* 351 * Target cpus will be in their loop exiting concurrently with our 352 * cleanup. They will not lose the bitmask they obtained before so 353 * we can safely clear this bit. 354 */ 355 ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); 356 write_rflags(rflags); 357 pmap_inval_done(pmap); 358 359 return opte; 360 } 361 362 /* 363 * API function - invalidate the pte at (va) and replace *ptep with npte 364 * atomically only if *ptep equals opte, across the pmap's active cpus. 365 * 366 * Returns 1 on success, 0 on failure (caller typically retries). 367 */ 368 int 369 pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, 370 pt_entry_t opte, pt_entry_t npte) 371 { 372 globaldata_t gd = mycpu; 373 pmap_inval_info_t *info; 374 int success; 375 int cpu = gd->gd_cpuid; 376 cpumask_t tmpmask; 377 unsigned long rflags; 378 379 /* 380 * Initialize invalidation for pmap and enter critical section. 381 */ 382 if (pmap == NULL) 383 pmap = &kernel_pmap; 384 pmap_inval_init(pmap); 385 386 /* 387 * Shortcut single-cpu case if possible. 388 */ 389 if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { 390 if (atomic_cmpset_long(ptep, opte, npte)) { 391 if (va == (vm_offset_t)-1) 392 cpu_invltlb(); 393 else 394 cpu_invlpg((void *)va); 395 pmap_inval_done(pmap); 396 return 1; 397 } else { 398 pmap_inval_done(pmap); 399 return 0; 400 } 401 } 402 403 /* 404 * We need a critical section to prevent getting preempted while 405 * we setup our command. A preemption might execute its own 406 * pmap_inval*() command and create confusion below. 407 */ 408 info = &invinfo[cpu]; 409 410 /* 411 * We must wait for other cpus which may still be finishing 412 * up a prior operation. 413 */ 414 while (CPUMASK_TESTNZERO(info->done)) { 415 #ifdef LOOPMASK 416 int loops; 417 418 loops = ++info->xloops; 419 if ((loops & LOOPMASK) == 0) { 420 info->failed = 1; 421 loopdebug("orig_waitB", info); 422 /* XXX recover from possible bug */ 423 CPUMASK_ASSZERO(info->done); 424 } 425 #endif 426 cpu_pause(); 427 } 428 KKASSERT(info->mode == INVDONE); 429 430 /* 431 * Must set our cpu in the invalidation scan mask before 432 * any possibility of [partial] execution (remember, XINVLTLB 433 * can interrupt a critical section). 434 */ 435 ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); 436 437 info->va = va; 438 info->npgs = 1; /* unused */ 439 info->ptep = ptep; 440 info->npte = npte; 441 info->opte = opte; 442 info->failed = 0; 443 info->mode = INVCMPSET; 444 info->success = 0; 445 446 tmpmask = pmap->pm_active; /* volatile */ 447 cpu_ccfence(); 448 CPUMASK_ANDMASK(tmpmask, smp_active_mask); 449 CPUMASK_ORBIT(tmpmask, cpu); 450 info->mask = tmpmask; 451 452 /* 453 * Command may start executing the moment 'done' is initialized, 454 * disable current cpu interrupt to prevent 'done' field from 455 * changing (other cpus can't clear done bits until the originating 456 * cpu clears its mask bit). 457 */ 458 #ifdef LOOPMASK 459 info->sigmask = tmpmask; 460 CHECKSIGMASK(info); 461 #endif 462 cpu_sfence(); 463 rflags = read_rflags(); 464 cpu_disable_intr(); 465 466 ATOMIC_CPUMASK_COPY(info->done, tmpmask); 467 468 /* 469 * Pass our copy of the done bits (so they don't change out from 470 * under us) to generate the Xinvltlb interrupt on the targets. 471 */ 472 smp_invlpg(&tmpmask); 473 success = info->success; 474 KKASSERT(info->mode == INVDONE); 475 476 ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); 477 write_rflags(rflags); 478 pmap_inval_done(pmap); 479 480 return success; 481 } 482 483 void 484 pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap) 485 { 486 bulk->pmap = pmap; 487 bulk->va_beg = 0; 488 bulk->va_end = 0; 489 bulk->count = 0; 490 } 491 492 pt_entry_t 493 pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va, 494 pt_entry_t *ptep, pt_entry_t npte) 495 { 496 pt_entry_t pte; 497 498 /* 499 * Degenerate case, localized or we don't care (e.g. because we 500 * are jacking the entire page table) or the pmap is not in-use 501 * by anyone. No invalidations are done on any cpu. 502 */ 503 if (bulk == NULL) { 504 pte = atomic_swap_long(ptep, npte); 505 return pte; 506 } 507 508 /* 509 * If it isn't the kernel pmap we execute the operation synchronously 510 * on all cpus belonging to the pmap, which avoids concurrency bugs in 511 * the hw related to changing pte's out from under threads. 512 * 513 * Eventually I would like to implement streaming pmap invalidation 514 * for user pmaps to reduce mmap/munmap overheads for heavily-loaded 515 * threaded programs. 516 */ 517 if (bulk->pmap != &kernel_pmap) { 518 pte = pmap_inval_smp(bulk->pmap, va, 1, ptep, npte); 519 return pte; 520 } 521 522 /* 523 * This is the kernel_pmap. All unmap operations presume that there 524 * are no other cpus accessing the addresses in question. Implement 525 * the bulking algorithm. collect the required information and 526 * synchronize once at the end. 527 */ 528 pte = atomic_swap_long(ptep, npte); 529 if (va == (vm_offset_t)-1) { 530 bulk->va_beg = va; 531 } else if (bulk->va_beg == bulk->va_end) { 532 bulk->va_beg = va; 533 bulk->va_end = va + PAGE_SIZE; 534 } else if (va == bulk->va_end) { 535 bulk->va_end = va + PAGE_SIZE; 536 } else { 537 bulk->va_beg = (vm_offset_t)-1; 538 bulk->va_end = 0; 539 #if 0 540 pmap_inval_bulk_flush(bulk); 541 bulk->count = 1; 542 if (va == (vm_offset_t)-1) { 543 bulk->va_beg = va; 544 bulk->va_end = 0; 545 } else { 546 bulk->va_beg = va; 547 bulk->va_end = va + PAGE_SIZE; 548 } 549 #endif 550 } 551 ++bulk->count; 552 553 return pte; 554 } 555 556 void 557 pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk) 558 { 559 if (bulk == NULL) 560 return; 561 if (bulk->count > 0) 562 pmap_inval_bulk_count += (bulk->count - 1); 563 if (bulk->va_beg != bulk->va_end) { 564 if (bulk->va_beg == (vm_offset_t)-1) { 565 pmap_inval_smp(bulk->pmap, bulk->va_beg, 1, NULL, 0); 566 } else { 567 long n; 568 569 n = (bulk->va_end - bulk->va_beg) >> PAGE_SHIFT; 570 pmap_inval_smp(bulk->pmap, bulk->va_beg, n, NULL, 0); 571 } 572 } 573 bulk->va_beg = 0; 574 bulk->va_end = 0; 575 bulk->count = 0; 576 } 577 578 /* 579 * Called with a critical section held and interrupts enabled. 580 */ 581 int 582 pmap_inval_intr(cpumask_t *cpumaskp, int toolong) 583 { 584 globaldata_t gd = mycpu; 585 pmap_inval_info_t *info; 586 int loopme = 0; 587 int cpu; 588 cpumask_t cpumask; 589 #ifdef LOOPMASK 590 int loops; 591 #endif 592 593 /* 594 * Check all cpus for invalidations we may need to service. 595 */ 596 cpu_ccfence(); 597 cpu = gd->gd_cpuid; 598 cpumask = *cpumaskp; 599 600 while (CPUMASK_TESTNZERO(cpumask)) { 601 int n = BSFCPUMASK(cpumask); 602 603 #ifdef LOOPMASK 604 KKASSERT(n >= 0 && n < MAXCPU); 605 #endif 606 607 CPUMASK_NANDBIT(cpumask, n); 608 info = &invinfo[n]; 609 610 /* 611 * Due to interrupts/races we can catch a new operation 612 * in an older interrupt. A fence is needed once we detect 613 * the (not) done bit. 614 */ 615 if (!CPUMASK_TESTBIT(info->done, cpu)) 616 continue; 617 cpu_lfence(); 618 #ifdef LOOPMASK 619 if (toolong) { 620 kprintf("pminvl %d->%d %08jx %08jx mode=%d\n", 621 cpu, n, info->done.ary[0], info->mask.ary[0], 622 info->mode); 623 } 624 #endif 625 626 /* 627 * info->mask and info->done always contain the originating 628 * cpu until the originator is done. Targets may still be 629 * present in info->done after the originator is done (they 630 * will be finishing up their loops). 631 * 632 * Clear info->mask bits on other cpus to indicate that they 633 * have quiesced (entered the loop). Once the other mask bits 634 * are clear we can execute the operation on the original, 635 * then clear the mask and done bits on the originator. The 636 * targets will then finish up their side and clear their 637 * done bits. 638 * 639 * The command is considered 100% done when all done bits have 640 * been cleared. 641 */ 642 if (n != cpu) { 643 /* 644 * Command state machine for 'other' cpus. 645 */ 646 if (CPUMASK_TESTBIT(info->mask, cpu)) { 647 /* 648 * Other cpu indicate to originator that they 649 * are quiesced. 650 */ 651 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 652 loopme = 1; 653 } else if (info->ptep && 654 CPUMASK_TESTBIT(info->mask, n)) { 655 /* 656 * Other cpu must wait for the originator (n) 657 * to complete its command if ptep is not NULL. 658 */ 659 loopme = 1; 660 } else { 661 /* 662 * Other cpu detects that the originator has 663 * completed its command, or there was no 664 * command. 665 * 666 * Now that the page table entry has changed, 667 * we can follow up with our own invalidation. 668 */ 669 vm_offset_t va = info->va; 670 int npgs; 671 672 if (va == (vm_offset_t)-1 || 673 info->npgs > MAX_INVAL_PAGES) { 674 cpu_invltlb(); 675 } else { 676 for (npgs = info->npgs; npgs; --npgs) { 677 cpu_invlpg((void *)va); 678 va += PAGE_SIZE; 679 } 680 } 681 ATOMIC_CPUMASK_NANDBIT(info->done, cpu); 682 /* info invalid now */ 683 /* loopme left alone */ 684 } 685 } else if (CPUMASK_TESTBIT(info->mask, cpu)) { 686 /* 687 * Originator is waiting for other cpus 688 */ 689 if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) { 690 /* 691 * Originator waits for other cpus to enter 692 * their loop (aka quiesce). 693 */ 694 loopme = 1; 695 #ifdef LOOPMASK 696 loops = ++info->xloops; 697 if ((loops & LOOPMASK) == 0) { 698 info->failed = 1; 699 loopdebug("orig_waitC", info); 700 /* XXX recover from possible bug */ 701 mdcpu->gd_xinvaltlb = 0; 702 cpu_disable_intr(); 703 smp_invlpg(&smp_active_mask); 704 cpu_enable_intr(); 705 } 706 #endif 707 } else { 708 /* 709 * Originator executes operation and clears 710 * mask to allow other cpus to finish. 711 */ 712 KKASSERT(info->mode != INVDONE); 713 if (info->mode == INVSTORE) { 714 if (info->ptep) 715 info->opte = atomic_swap_long(info->ptep, info->npte); 716 CHECKSIGMASK(info); 717 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 718 CHECKSIGMASK(info); 719 } else { 720 if (atomic_cmpset_long(info->ptep, 721 info->opte, info->npte)) { 722 info->success = 1; 723 } else { 724 info->success = 0; 725 } 726 CHECKSIGMASK(info); 727 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); 728 CHECKSIGMASK(info); 729 } 730 loopme = 1; 731 } 732 } else { 733 /* 734 * Originator does not have to wait for the other 735 * cpus to finish. It clears its done bit. A new 736 * command will not be initiated by the originator 737 * until the other cpus have cleared their done bits 738 * (asynchronously). 739 */ 740 vm_offset_t va = info->va; 741 int npgs; 742 743 if (va == (vm_offset_t)-1 || 744 info->npgs > MAX_INVAL_PAGES) { 745 cpu_invltlb(); 746 } else { 747 for (npgs = info->npgs; npgs; --npgs) { 748 cpu_invlpg((void *)va); 749 va += PAGE_SIZE; 750 } 751 } 752 #ifdef LOOPMASK 753 info->xloops = 0; 754 #endif 755 /* leave loopme alone */ 756 /* other cpus may still be finishing up */ 757 /* can't race originator since that's us */ 758 info->mode = INVDONE; 759 ATOMIC_CPUMASK_NANDBIT(info->done, cpu); 760 } 761 } 762 return loopme; 763 } 764