1 /* $NetBSD: cache.c,v 1.63 2002/07/29 06:39:41 grant Exp $ */ 2 3 /* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Harvard University. 16 * This product includes software developed by the University of 17 * California, Lawrence Berkeley Laboratory. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * 1. Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * 2. Redistributions in binary form must reproduce the above copyright 26 * notice, this list of conditions and the following disclaimer in the 27 * documentation and/or other materials provided with the distribution. 28 * 3. All advertising materials mentioning features or use of this software 29 * must display the following acknowledgement: 30 * This product includes software developed by Aaron Brown and 31 * Harvard University. 32 * This product includes software developed by the University of 33 * California, Berkeley and its contributors. 34 * 4. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * 50 * @(#)cache.c 8.2 (Berkeley) 10/30/93 51 * 52 */ 53 54 /* 55 * Cache routines. 56 * 57 * TODO: 58 * - rework range flush 59 */ 60 61 #include "opt_multiprocessor.h" 62 #include "opt_sparc_arch.h" 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 68 #include <machine/ctlreg.h> 69 #include <machine/pte.h> 70 71 #include <sparc/sparc/asm.h> 72 #include <sparc/sparc/cache.h> 73 #include <sparc/sparc/cpuvar.h> 74 75 struct cachestats cachestats; 76 77 int cache_alias_dist; /* Cache anti-aliasing constants */ 78 int cache_alias_bits; 79 u_long dvma_cachealign; 80 81 /* 82 * Enable the cache. 83 * We need to clear out the valid bits first. 84 */ 85 void 86 sun4_cache_enable() 87 { 88 u_int i, lim, ls, ts; 89 90 cache_alias_bits = CPU_ISSUN4 91 ? CACHE_ALIAS_BITS_SUN4 92 : CACHE_ALIAS_BITS_SUN4C; 93 cache_alias_dist = CPU_ISSUN4 94 ? CACHE_ALIAS_DIST_SUN4 95 : CACHE_ALIAS_DIST_SUN4C; 96 97 ls = CACHEINFO.c_linesize; 98 ts = CACHEINFO.c_totalsize; 99 100 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls) 101 sta(i, ASI_CONTROL, 0); 102 103 stba(AC_SYSENABLE, ASI_CONTROL, 104 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE); 105 CACHEINFO.c_enabled = 1; 106 107 #ifdef notyet 108 if (cpuinfo.flags & SUN4_IOCACHE) { 109 stba(AC_SYSENABLE, ASI_CONTROL, 110 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE); 111 printf("iocache enabled\n"); 112 } 113 #endif 114 } 115 116 /* 117 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking. 118 */ 119 #if defined(SUN4M) || defined(SUN4D) 120 void 121 ms1_cache_enable() 122 { 123 u_int pcr; 124 125 cache_alias_dist = max( 126 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 127 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 128 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 129 130 pcr = lda(SRMMU_PCR, ASI_SRMMU); 131 132 /* We "flash-clear" the I/D caches. */ 133 if ((pcr & MS1_PCR_ICE) == 0) 134 sta(0, ASI_ICACHECLR, 0); 135 if ((pcr & MS1_PCR_DCE) == 0) 136 sta(0, ASI_DCACHECLR, 0); 137 138 /* Turn on caches */ 139 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE); 140 141 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 142 143 /* 144 * When zeroing or copying pages, there might still be entries in 145 * the cache, since we don't flush pages from the cache when 146 * unmapping them (`vactype' is VAC_NONE). Fortunately, the 147 * MS1 cache is write-through and not write-allocate, so we can 148 * use cacheable access while not displacing cache lines. 149 */ 150 cpuinfo.flags |= CPUFLG_CACHE_MANDATORY; 151 } 152 153 void 154 viking_cache_enable() 155 { 156 u_int pcr; 157 158 cache_alias_dist = max( 159 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 160 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 161 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 162 163 pcr = lda(SRMMU_PCR, ASI_SRMMU); 164 165 if ((pcr & VIKING_PCR_ICE) == 0) { 166 /* I-cache not on; "flash-clear" it now. */ 167 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */ 168 sta(0, ASI_ICACHECLR, 0); /* clear */ 169 } 170 if ((pcr & VIKING_PCR_DCE) == 0) { 171 /* D-cache not on: "flash-clear" it. */ 172 sta(0x80000000, ASI_DCACHECLR, 0); 173 sta(0, ASI_DCACHECLR, 0); 174 } 175 176 /* Turn on caches via MMU */ 177 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE); 178 179 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 180 181 /* Now turn on MultiCache if it exists */ 182 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) { 183 /* Set external cache enable bit in MXCC control register */ 184 stda(MXCC_CTRLREG, ASI_CONTROL, 185 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE); 186 cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */ 187 CACHEINFO.ec_enabled = 1; 188 } 189 } 190 191 void 192 hypersparc_cache_enable() 193 { 194 int i, ls, ts; 195 u_int pcr, v; 196 197 ls = CACHEINFO.c_linesize; 198 ts = CACHEINFO.c_totalsize; 199 200 pcr = lda(SRMMU_PCR, ASI_SRMMU); 201 202 /* 203 * Setup the anti-aliasing constants and DVMA alignment constraint. 204 */ 205 cache_alias_dist = CACHEINFO.c_totalsize; 206 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 207 dvma_cachealign = cache_alias_dist; 208 209 /* Now reset cache tag memory if cache not yet enabled */ 210 if ((pcr & HYPERSPARC_PCR_CE) == 0) 211 for (i = 0; i < ts; i += ls) 212 sta(i, ASI_DCACHETAG, 0); 213 214 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM); 215 hypersparc_cache_flush_all(); 216 217 /* Enable write-back cache */ 218 pcr |= HYPERSPARC_PCR_CE; 219 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 220 pcr |= HYPERSPARC_PCR_CM; 221 222 sta(SRMMU_PCR, ASI_SRMMU, pcr); 223 CACHEINFO.c_enabled = 1; 224 225 /* XXX: should add support */ 226 if (CACHEINFO.c_hwflush) 227 panic("cache_enable: can't handle 4M with hw-flush cache"); 228 229 /* 230 * Enable instruction cache and, on single-processor machines, 231 * disable `Unimplemented Flush Traps'. 232 */ 233 v = HYPERSPARC_ICCR_ICE | (ncpu == 1 ? HYPERSPARC_ICCR_FTD : 0); 234 wrasr(v, HYPERSPARC_ASRNUM_ICCR); 235 } 236 237 238 void 239 swift_cache_enable() 240 { 241 int i, ls, ts; 242 u_int pcr; 243 244 cache_alias_dist = max( 245 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 246 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 247 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 248 249 pcr = lda(SRMMU_PCR, ASI_SRMMU); 250 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE); 251 sta(SRMMU_PCR, ASI_SRMMU, pcr); 252 253 /* Now reset cache tag memory if cache not yet enabled */ 254 ls = CACHEINFO.ic_linesize; 255 ts = CACHEINFO.ic_totalsize; 256 if ((pcr & SWIFT_PCR_ICE) == 0) 257 for (i = 0; i < ts; i += ls) 258 sta(i, ASI_ICACHETAG, 0); 259 260 ls = CACHEINFO.dc_linesize; 261 ts = CACHEINFO.dc_totalsize; 262 if ((pcr & SWIFT_PCR_DCE) == 0) 263 for (i = 0; i < ts; i += ls) 264 sta(i, ASI_DCACHETAG, 0); 265 266 CACHEINFO.c_enabled = 1; 267 } 268 269 void 270 cypress_cache_enable() 271 { 272 int i, ls, ts; 273 u_int pcr; 274 275 cache_alias_dist = CACHEINFO.c_totalsize; 276 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 277 278 pcr = lda(SRMMU_PCR, ASI_SRMMU); 279 pcr &= ~(CYPRESS_PCR_CE | CYPRESS_PCR_CM); 280 281 /* Now reset cache tag memory if cache not yet enabled */ 282 ls = CACHEINFO.c_linesize; 283 ts = CACHEINFO.c_totalsize; 284 if ((pcr & CYPRESS_PCR_CE) == 0) 285 for (i = 0; i < ts; i += ls) 286 sta(i, ASI_DCACHETAG, 0); 287 288 pcr |= CYPRESS_PCR_CE; 289 /* If put in write-back mode, turn it on */ 290 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 291 pcr |= CYPRESS_PCR_CM; 292 sta(SRMMU_PCR, ASI_SRMMU, pcr); 293 CACHEINFO.c_enabled = 1; 294 } 295 296 void 297 turbosparc_cache_enable() 298 { 299 int i, ls, ts; 300 u_int pcr, pcf; 301 302 cache_alias_dist = max( 303 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 304 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 305 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 306 307 pcr = lda(SRMMU_PCR, ASI_SRMMU); 308 309 /* Now reset cache tag memory if cache not yet enabled */ 310 ls = CACHEINFO.ic_linesize; 311 ts = CACHEINFO.ic_totalsize; 312 if ((pcr & TURBOSPARC_PCR_ICE) == 0) 313 for (i = 0; i < ts; i += ls) 314 sta(i, ASI_ICACHETAG, 0); 315 316 ls = CACHEINFO.dc_linesize; 317 ts = CACHEINFO.dc_totalsize; 318 if ((pcr & TURBOSPARC_PCR_DCE) == 0) 319 for (i = 0; i < ts; i += ls) 320 sta(i, ASI_DCACHETAG, 0); 321 322 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE); 323 sta(SRMMU_PCR, ASI_SRMMU, pcr); 324 325 pcf = lda(SRMMU_PCFG, ASI_SRMMU); 326 if (pcf & TURBOSPARC_PCFG_SNP) 327 printf(": DVMA coherent "); 328 329 CACHEINFO.c_enabled = 1; 330 } 331 #endif /* SUN4M || SUN4D */ 332 333 /* 334 * Flush the current context from the cache. 335 * 336 * This is done by writing to each cache line in the `flush context' 337 * address space (or, for hardware flush, once to each page in the 338 * hardware flush space, for all cache pages). 339 */ 340 void 341 sun4_vcache_flush_context() 342 { 343 char *p; 344 int i, ls; 345 346 cachestats.cs_ncxflush++; 347 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 348 if (CACHEINFO.c_hwflush) { 349 ls = NBPG; 350 i = CACHEINFO.c_totalsize >> PGSHIFT; 351 for (; --i >= 0; p += ls) 352 sta(p, ASI_HWFLUSHCTX, 0); 353 } else { 354 ls = CACHEINFO.c_linesize; 355 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 356 for (; --i >= 0; p += ls) 357 sta(p, ASI_FLUSHCTX, 0); 358 } 359 } 360 361 /* 362 * Flush the given virtual region from the cache. 363 * 364 * This is also done by writing to each cache line, except that 365 * now the addresses must include the virtual region number, and 366 * we use the `flush region' space. 367 * 368 * This function is only called on sun4's with 3-level MMUs; there's 369 * no hw-flush space. 370 */ 371 void 372 sun4_vcache_flush_region(vreg) 373 int vreg; 374 { 375 int i, ls; 376 char *p; 377 378 cachestats.cs_nrgflush++; 379 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 380 ls = CACHEINFO.c_linesize; 381 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 382 for (; --i >= 0; p += ls) 383 sta(p, ASI_FLUSHREG, 0); 384 } 385 386 /* 387 * Flush the given virtual segment from the cache. 388 * 389 * This is also done by writing to each cache line, except that 390 * now the addresses must include the virtual segment number, and 391 * we use the `flush segment' space. 392 * 393 * Again, for hardware, we just write each page (in hw-flush space). 394 */ 395 void 396 sun4_vcache_flush_segment(vreg, vseg) 397 int vreg, vseg; 398 { 399 int i, ls; 400 char *p; 401 402 cachestats.cs_nsgflush++; 403 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 404 if (CACHEINFO.c_hwflush) { 405 ls = NBPG; 406 i = CACHEINFO.c_totalsize >> PGSHIFT; 407 for (; --i >= 0; p += ls) 408 sta(p, ASI_HWFLUSHSEG, 0); 409 } else { 410 ls = CACHEINFO.c_linesize; 411 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 412 for (; --i >= 0; p += ls) 413 sta(p, ASI_FLUSHSEG, 0); 414 } 415 } 416 417 /* 418 * Flush the given virtual page from the cache. 419 * (va is the actual address, and must be aligned on a page boundary.) 420 * Again we write to each cache line. 421 */ 422 void 423 sun4_vcache_flush_page(va) 424 int va; 425 { 426 int i, ls; 427 char *p; 428 429 #ifdef DEBUG 430 if (va & PGOFSET) 431 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 432 #endif 433 434 cachestats.cs_npgflush++; 435 p = (char *)va; 436 ls = CACHEINFO.c_linesize; 437 i = NBPG >> CACHEINFO.c_l2linesize; 438 for (; --i >= 0; p += ls) 439 sta(p, ASI_FLUSHPG, 0); 440 } 441 442 /* 443 * Flush the given virtual page from the cache. 444 * (va is the actual address, and must be aligned on a page boundary.) 445 * This version uses hardware-assisted flush operation and just needs 446 * one write into ASI_HWFLUSHPG space to flush all cache lines. 447 */ 448 void 449 sun4_vcache_flush_page_hw(va) 450 int va; 451 { 452 char *p; 453 454 #ifdef DEBUG 455 if (va & PGOFSET) 456 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 457 #endif 458 459 cachestats.cs_npgflush++; 460 p = (char *)va; 461 sta(p, ASI_HWFLUSHPG, 0); 462 } 463 464 /* 465 * Flush a range of virtual addresses (in the current context). 466 * The first byte is at (base&~PGOFSET) and the last one is just 467 * before byte (base+len). 468 * 469 * We choose the best of (context,segment,page) here. 470 */ 471 472 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / NBPG) 473 474 void 475 sun4_cache_flush(base, len) 476 caddr_t base; 477 u_int len; 478 { 479 int i, ls, baseoff; 480 char *p; 481 482 if (CACHEINFO.c_vactype == VAC_NONE) 483 return; 484 485 /* 486 * Figure out how much must be flushed. 487 * 488 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 489 * in the same number of loop iterations. We can also do the whole 490 * region. If we need to do between 2 and NSEGRG, do the region. 491 * If we need to do two or more regions, just go ahead and do the 492 * whole context. This might not be ideal (e.g., fsck likes to do 493 * 65536-byte reads, which might not necessarily be aligned). 494 * 495 * We could try to be sneaky here and use the direct mapping 496 * to avoid flushing things `below' the start and `above' the 497 * ending address (rather than rounding to whole pages and 498 * segments), but I did not want to debug that now and it is 499 * not clear it would help much. 500 * 501 * (XXX the magic number 16 is now wrong, must review policy) 502 */ 503 baseoff = (int)base & PGOFSET; 504 i = (baseoff + len + PGOFSET) >> PGSHIFT; 505 506 cachestats.cs_nraflush++; 507 #ifdef notyet 508 cachestats.cs_ra[min(i, MAXCACHERANGE)]++; 509 #endif 510 511 if (i < CACHE_FLUSH_MAGIC) { 512 /* cache_flush_page, for i pages */ 513 p = (char *)((int)base & ~baseoff); 514 if (CACHEINFO.c_hwflush) { 515 for (; --i >= 0; p += NBPG) 516 sta(p, ASI_HWFLUSHPG, 0); 517 } else { 518 ls = CACHEINFO.c_linesize; 519 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 520 for (; --i >= 0; p += ls) 521 sta(p, ASI_FLUSHPG, 0); 522 } 523 return; 524 } 525 baseoff = (u_int)base & SGOFSET; 526 i = (baseoff + len + SGOFSET) >> SGSHIFT; 527 if (i == 1) 528 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base)); 529 else { 530 if (HASSUN4_MMU3L) { 531 baseoff = (u_int)base & RGOFSET; 532 i = (baseoff + len + RGOFSET) >> RGSHIFT; 533 if (i == 1) 534 sun4_vcache_flush_region(VA_VREG(base)); 535 else 536 sun4_vcache_flush_context(); 537 } else 538 sun4_vcache_flush_context(); 539 } 540 } 541 542 543 #if defined(SUN4M) || defined(SUN4D) 544 /* 545 * Flush the current context from the cache. 546 * 547 * This is done by writing to each cache line in the `flush context' 548 * address space (or, for hardware flush, once to each page in the 549 * hardware flush space, for all cache pages). 550 */ 551 void 552 srmmu_vcache_flush_context() 553 { 554 char *p; 555 int i, ls; 556 557 cachestats.cs_ncxflush++; 558 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 559 ls = CACHEINFO.c_linesize; 560 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 561 for (; --i >= 0; p += ls) 562 sta(p, ASI_IDCACHELFC, 0); 563 } 564 565 /* 566 * Flush the given virtual region from the cache. 567 * 568 * This is also done by writing to each cache line, except that 569 * now the addresses must include the virtual region number, and 570 * we use the `flush region' space. 571 */ 572 void 573 srmmu_vcache_flush_region(vreg) 574 int vreg; 575 { 576 int i, ls; 577 char *p; 578 579 cachestats.cs_nrgflush++; 580 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 581 ls = CACHEINFO.c_linesize; 582 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 583 for (; --i >= 0; p += ls) 584 sta(p, ASI_IDCACHELFR, 0); 585 } 586 587 /* 588 * Flush the given virtual segment from the cache. 589 * 590 * This is also done by writing to each cache line, except that 591 * now the addresses must include the virtual segment number, and 592 * we use the `flush segment' space. 593 * 594 * Again, for hardware, we just write each page (in hw-flush space). 595 */ 596 void 597 srmmu_vcache_flush_segment(vreg, vseg) 598 int vreg, vseg; 599 { 600 int i, ls; 601 char *p; 602 603 cachestats.cs_nsgflush++; 604 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 605 ls = CACHEINFO.c_linesize; 606 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 607 for (; --i >= 0; p += ls) 608 sta(p, ASI_IDCACHELFS, 0); 609 } 610 611 /* 612 * Flush the given virtual page from the cache. 613 * (va is the actual address, and must be aligned on a page boundary.) 614 * Again we write to each cache line. 615 */ 616 void 617 srmmu_vcache_flush_page(va) 618 int va; 619 { 620 int i, ls; 621 char *p; 622 623 #ifdef DEBUG 624 if (va & PGOFSET) 625 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 626 #endif 627 628 cachestats.cs_npgflush++; 629 p = (char *)va; 630 ls = CACHEINFO.c_linesize; 631 i = NBPG >> CACHEINFO.c_l2linesize; 632 for (; --i >= 0; p += ls) 633 sta(p, ASI_IDCACHELFP, 0); 634 } 635 636 /* 637 * Flush entire cache. 638 */ 639 void 640 srmmu_cache_flush_all() 641 { 642 srmmu_vcache_flush_context(); 643 } 644 645 /* 646 * Flush a range of virtual addresses (in the current context). 647 * The first byte is at (base&~PGOFSET) and the last one is just 648 * before byte (base+len). 649 * 650 * We choose the best of (context,segment,page) here. 651 */ 652 653 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / NBPG) 654 655 void 656 srmmu_cache_flush(base, len) 657 caddr_t base; 658 u_int len; 659 { 660 int i, ls, baseoff; 661 char *p; 662 663 if (len < NBPG) { 664 /* less than a page, flush just the covered cache lines */ 665 ls = CACHEINFO.c_linesize; 666 baseoff = (int)base & (ls - 1); 667 i = (baseoff + len + ls - 1) >> CACHEINFO.c_l2linesize; 668 p = (char *)((int)base & -ls); 669 for (; --i >= 0; p += ls) 670 sta(p, ASI_IDCACHELFP, 0); 671 return; 672 } 673 674 /* 675 * Figure out how much must be flushed. 676 * 677 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 678 * in the same number of loop iterations. We can also do the whole 679 * region. If we need to do between 2 and NSEGRG, do the region. 680 * If we need to do two or more regions, just go ahead and do the 681 * whole context. This might not be ideal (e.g., fsck likes to do 682 * 65536-byte reads, which might not necessarily be aligned). 683 * 684 * We could try to be sneaky here and use the direct mapping 685 * to avoid flushing things `below' the start and `above' the 686 * ending address (rather than rounding to whole pages and 687 * segments), but I did not want to debug that now and it is 688 * not clear it would help much. 689 * 690 * (XXX the magic number 16 is now wrong, must review policy) 691 */ 692 baseoff = (int)base & PGOFSET; 693 i = (baseoff + len + PGOFSET) >> PGSHIFT; 694 695 cachestats.cs_nraflush++; 696 #ifdef notyet 697 cachestats.cs_ra[min(i, MAXCACHERANGE)]++; 698 #endif 699 700 if (i < CACHE_FLUSH_MAGIC) { 701 /* cache_flush_page, for i pages */ 702 p = (char *)((int)base & ~baseoff); 703 ls = CACHEINFO.c_linesize; 704 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 705 for (; --i >= 0; p += ls) 706 sta(p, ASI_IDCACHELFP, 0); 707 return; 708 } 709 baseoff = (u_int)base & SGOFSET; 710 i = (baseoff + len + SGOFSET) >> SGSHIFT; 711 if (i == 1) 712 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base)); 713 else { 714 baseoff = (u_int)base & RGOFSET; 715 i = (baseoff + len + RGOFSET) >> RGSHIFT; 716 if (i == 1) 717 srmmu_vcache_flush_region(VA_VREG(base)); 718 else 719 srmmu_vcache_flush_context(); 720 } 721 } 722 723 int ms1_cacheflush_magic = 0; 724 #define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic 725 void 726 ms1_cache_flush(base, len) 727 caddr_t base; 728 u_int len; 729 { 730 /* 731 * Although physically tagged, we still need to flush the 732 * data cache after (if we have a write-through cache) or before 733 * (in case of write-back caches) DMA operations. 734 */ 735 736 #if MS1_CACHEFLUSH_MAGIC 737 if (len <= MS1_CACHEFLUSH_MAGIC) { 738 /* 739 * If the range to be flushed is sufficiently small 740 * invalidate the covered cache lines by hand. 741 * 742 * The MicroSPARC I has a direct-mapped virtually addressed 743 * physically tagged data cache which is organised as 744 * 128 lines of 16 bytes. Virtual address bits [4-10] 745 * select the cache line. The cache tags are accessed 746 * through the standard DCACHE control space using the 747 * same address bits as those used to select the cache 748 * line in the virtual address. 749 * 750 * Note: we don't bother to compare the actual tags 751 * since that would require looking up physical addresses. 752 * 753 * The format of the tags we read from ASI_DCACHE control 754 * space is: 755 * 756 * 31 27 26 11 10 1 0 757 * +--------+----------------+------------+-+ 758 * | xxx | PA[26-11] | xxx |V| 759 * +--------+----------------+------------+-+ 760 * 761 * PA: bits 11-26 of the physical address 762 * V: line valid bit 763 */ 764 int tagaddr = ((u_int)base & 0x7f0); 765 766 len = roundup(len, 16); 767 while (len != 0) { 768 int tag = lda(tagaddr, ASI_DCACHETAG); 769 if ((tag & 1) == 1) { 770 /* Mark this cache line invalid */ 771 sta(tagaddr, ASI_DCACHETAG, 0); 772 } 773 len -= 16; 774 tagaddr = (tagaddr + 16) & 0x7f0; 775 } 776 } else 777 #endif 778 /* Flush entire data cache */ 779 sta(0, ASI_DCACHECLR, 0); 780 } 781 782 /* 783 * Flush entire cache. 784 */ 785 void 786 ms1_cache_flush_all() 787 { 788 789 /* Flash-clear both caches */ 790 sta(0, ASI_ICACHECLR, 0); 791 sta(0, ASI_DCACHECLR, 0); 792 } 793 794 void 795 hypersparc_cache_flush_all() 796 { 797 798 srmmu_vcache_flush_context(); 799 /* Flush instruction cache */ 800 hypersparc_pure_vcache_flush(); 801 } 802 803 void 804 cypress_cache_flush_all() 805 { 806 807 extern char kernel_text[]; 808 char *p; 809 int i, ls; 810 811 /* Fill the cache with known read-only content */ 812 p = (char *)kernel_text; 813 ls = CACHEINFO.c_linesize; 814 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 815 for (; --i >= 0; p += ls) 816 (*(volatile char *)p); 817 } 818 819 820 void 821 viking_cache_flush(base, len) 822 caddr_t base; 823 u_int len; 824 { 825 /* 826 * Although physically tagged, we still need to flush the 827 * data cache after (if we have a write-through cache) or before 828 * (in case of write-back caches) DMA operations. 829 */ 830 831 } 832 833 void 834 viking_pcache_flush_page(pa, invalidate_only) 835 paddr_t pa; 836 int invalidate_only; 837 { 838 int set, i; 839 840 /* 841 * The viking's on-chip data cache is 4-way set associative, 842 * consisting of 128 sets, each holding 4 lines of 32 bytes. 843 * Note that one 4096 byte page exactly covers all 128 sets 844 * in the cache. 845 */ 846 if (invalidate_only) { 847 u_int pa_tag = (pa >> 12); 848 u_int tagaddr; 849 u_int64_t tag; 850 851 /* 852 * Loop over all sets and invalidate all entries tagged 853 * with the given physical address by resetting the cache 854 * tag in ASI_DCACHETAG control space. 855 * 856 * The address format for accessing a tag is: 857 * 858 * 31 30 27 26 11 5 4 3 2 0 859 * +------+-----+------+-------//--------+--------+----+-----+ 860 * | type | xxx | line | xxx | set | xx | 0 | 861 * +------+-----+------+-------//--------+--------+----+-----+ 862 * 863 * set: the cache set tag to be read (0-127) 864 * line: the line within the set (0-3) 865 * type: 1: read set tag; 2: read physical tag 866 * 867 * The (type 2) tag read from this address is a 64-bit word 868 * formatted as follows: 869 * 870 * 5 4 4 871 * 63 6 8 0 23 0 872 * +-------+-+-------+-+-------+-+-----------+----------------+ 873 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] | 874 * +-------+-+-------+-+-------+-+-----------+----------------+ 875 * 876 * PA: bits 12-35 of the physical address 877 * S: line shared bit 878 * D: line dirty bit 879 * V: line valid bit 880 */ 881 882 #define VIKING_DCACHETAG_S 0x0000010000000000UL /* line valid bit */ 883 #define VIKING_DCACHETAG_D 0x0001000000000000UL /* line dirty bit */ 884 #define VIKING_DCACHETAG_V 0x0100000000000000UL /* line shared bit */ 885 #define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffUL /* PA tag field */ 886 887 for (set = 0; set < 128; set++) { 888 /* Set set number and access type */ 889 tagaddr = (set << 5) | (2 << 30); 890 891 /* Examine the tag for each line in the set */ 892 for (i = 0 ; i < 4; i++) { 893 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG); 894 /* 895 * If this is a valid tag and the PA field 896 * matches clear the tag. 897 */ 898 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag && 899 (tag & VIKING_DCACHETAG_V) != 0) 900 stda(tagaddr | (i << 26), 901 ASI_DCACHETAG, 0); 902 } 903 } 904 905 } else { 906 extern char kernel_text[]; 907 908 /* 909 * Force the cache to validate its backing memory 910 * by displacing all cache lines with known read-only 911 * content from the start of kernel text. 912 * 913 * Note that this thrashes the entire cache. However, 914 * we currently only need to call upon this code 915 * once at boot time. 916 */ 917 for (set = 0; set < 128; set++) { 918 int *v = (int *)(kernel_text + (set << 5)); 919 920 /* 921 * We need to read (2*associativity-1) different 922 * locations to be sure to displace the entire set. 923 */ 924 i = 2 * 4 - 1; 925 while (i--) { 926 (*(volatile int *)v); 927 v += 4096; 928 } 929 } 930 } 931 } 932 #endif /* SUN4M || SUN4D */ 933 934 935 #if defined(MULTIPROCESSOR) 936 /* 937 * Cache flushing on multi-processor systems involves sending 938 * inter-processor messages to flush the cache on each module. 939 * 940 * The current context of the originating processor is passed in the 941 * message. This assumes the allocation of CPU contextses is a global 942 * operation (remember that the actual context tables for the CPUs 943 * are distinct). 944 * 945 * We don't do cross calls if we're cold or we're accepting them 946 * ourselves (CPUFLG_READY). 947 */ 948 949 void 950 smp_vcache_flush_page(va) 951 int va; 952 { 953 int n, s; 954 955 cpuinfo.sp_vcache_flush_page(va); 956 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 957 return; 958 LOCK_XPMSG(); 959 for (n = 0; n < ncpu; n++) { 960 struct cpu_info *cpi = cpus[n]; 961 struct xpmsg_flush_page *p; 962 963 if (CPU_READY(cpi)) 964 continue; 965 p = &cpi->msg.u.xpmsg_flush_page; 966 s = splhigh(); 967 simple_lock(&cpi->msg.lock); 968 cpi->msg.tag = XPMSG_VCACHE_FLUSH_PAGE; 969 p->ctx = getcontext4m(); 970 p->va = va; 971 raise_ipi_wait_and_unlock(cpi); 972 splx(s); 973 } 974 UNLOCK_XPMSG(); 975 } 976 977 void 978 smp_vcache_flush_segment(vr, vs) 979 int vr, vs; 980 { 981 int n, s; 982 983 cpuinfo.sp_vcache_flush_segment(vr, vs); 984 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 985 return; 986 LOCK_XPMSG(); 987 for (n = 0; n < ncpu; n++) { 988 struct cpu_info *cpi = cpus[n]; 989 struct xpmsg_flush_segment *p; 990 991 if (CPU_READY(cpi)) 992 continue; 993 p = &cpi->msg.u.xpmsg_flush_segment; 994 s = splhigh(); 995 simple_lock(&cpi->msg.lock); 996 cpi->msg.tag = XPMSG_VCACHE_FLUSH_SEGMENT; 997 p->ctx = getcontext4m(); 998 p->vr = vr; 999 p->vs = vs; 1000 raise_ipi_wait_and_unlock(cpi); 1001 splx(s); 1002 } 1003 UNLOCK_XPMSG(); 1004 } 1005 1006 void 1007 smp_vcache_flush_region(vr) 1008 int vr; 1009 { 1010 int n, s; 1011 1012 cpuinfo.sp_vcache_flush_region(vr); 1013 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1014 return; 1015 LOCK_XPMSG(); 1016 for (n = 0; n < ncpu; n++) { 1017 struct cpu_info *cpi = cpus[n]; 1018 struct xpmsg_flush_region *p; 1019 1020 if (CPU_READY(cpi)) 1021 continue; 1022 p = &cpi->msg.u.xpmsg_flush_region; 1023 s = splhigh(); 1024 simple_lock(&cpi->msg.lock); 1025 cpi->msg.tag = XPMSG_VCACHE_FLUSH_REGION; 1026 p->ctx = getcontext4m(); 1027 p->vr = vr; 1028 raise_ipi_wait_and_unlock(cpi); 1029 splx(s); 1030 } 1031 UNLOCK_XPMSG(); 1032 } 1033 1034 void 1035 smp_vcache_flush_context() 1036 { 1037 int n, s; 1038 1039 cpuinfo.sp_vcache_flush_context(); 1040 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1041 return; 1042 LOCK_XPMSG(); 1043 for (n = 0; n < ncpu; n++) { 1044 struct cpu_info *cpi = cpus[n]; 1045 struct xpmsg_flush_context *p; 1046 1047 if (CPU_READY(cpi)) 1048 continue; 1049 p = &cpi->msg.u.xpmsg_flush_context; 1050 s = splhigh(); 1051 simple_lock(&cpi->msg.lock); 1052 cpi->msg.tag = XPMSG_VCACHE_FLUSH_CONTEXT; 1053 p->ctx = getcontext4m(); 1054 raise_ipi_wait_and_unlock(cpi); 1055 splx(s); 1056 } 1057 UNLOCK_XPMSG(); 1058 } 1059 1060 void 1061 smp_cache_flush(va, size) 1062 caddr_t va; 1063 u_int size; 1064 { 1065 int n, s; 1066 1067 cpuinfo.sp_cache_flush(va, size); 1068 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1069 return; 1070 LOCK_XPMSG(); 1071 for (n = 0; n < ncpu; n++) { 1072 struct cpu_info *cpi = cpus[n]; 1073 struct xpmsg_flush_range *p; 1074 1075 if (CPU_READY(cpi)) 1076 continue; 1077 p = &cpi->msg.u.xpmsg_flush_range; 1078 s = splhigh(); 1079 simple_lock(&cpi->msg.lock); 1080 cpi->msg.tag = XPMSG_VCACHE_FLUSH_RANGE; 1081 p->ctx = getcontext4m(); 1082 p->va = va; 1083 p->size = size; 1084 raise_ipi_wait_and_unlock(cpi); 1085 splx(s); 1086 } 1087 UNLOCK_XPMSG(); 1088 } 1089 #endif /* MULTIPROCESSOR */ 1090