1 /* $NetBSD: cache.c,v 1.61 2002/01/25 19:19:46 tsutsui Exp $ */ 2 3 /* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Harvard University. 16 * This product includes software developed by the University of 17 * California, Lawrence Berkeley Laboratory. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * 1. Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * 2. Redistributions in binary form must reproduce the above copyright 26 * notice, this list of conditions and the following disclaimer in the 27 * documentation and/or other materials provided with the distribution. 28 * 3. All advertising materials mentioning features or use of this software 29 * must display the following acknowledgement: 30 * This product includes software developed by Aaron Brown and 31 * Harvard University. 32 * This product includes software developed by the University of 33 * California, Berkeley and its contributors. 34 * 4. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * 50 * @(#)cache.c 8.2 (Berkeley) 10/30/93 51 * 52 */ 53 54 /* 55 * Cache routines. 56 * 57 * TODO: 58 * - rework range flush 59 */ 60 61 #include "opt_multiprocessor.h" 62 #include "opt_sparc_arch.h" 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/kernel.h> 67 68 #include <machine/ctlreg.h> 69 #include <machine/pte.h> 70 71 #include <sparc/sparc/asm.h> 72 #include <sparc/sparc/cache.h> 73 #include <sparc/sparc/cpuvar.h> 74 75 struct cachestats cachestats; 76 77 int cache_alias_dist; /* Cache anti-aliasing constants */ 78 int cache_alias_bits; 79 u_long dvma_cachealign; 80 81 /* 82 * Enable the cache. 83 * We need to clear out the valid bits first. 84 */ 85 void 86 sun4_cache_enable() 87 { 88 u_int i, lim, ls, ts; 89 90 cache_alias_bits = CPU_ISSUN4 91 ? CACHE_ALIAS_BITS_SUN4 92 : CACHE_ALIAS_BITS_SUN4C; 93 cache_alias_dist = CPU_ISSUN4 94 ? CACHE_ALIAS_DIST_SUN4 95 : CACHE_ALIAS_DIST_SUN4C; 96 97 ls = CACHEINFO.c_linesize; 98 ts = CACHEINFO.c_totalsize; 99 100 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls) 101 sta(i, ASI_CONTROL, 0); 102 103 stba(AC_SYSENABLE, ASI_CONTROL, 104 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE); 105 CACHEINFO.c_enabled = 1; 106 107 #ifdef notyet 108 if (cpuinfo.flags & SUN4_IOCACHE) { 109 stba(AC_SYSENABLE, ASI_CONTROL, 110 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE); 111 printf("iocache enabled\n"); 112 } 113 #endif 114 } 115 116 #if defined(SUN4M) 117 void 118 ms1_cache_enable() 119 { 120 u_int pcr; 121 122 cache_alias_dist = max( 123 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 124 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 125 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 126 127 pcr = lda(SRMMU_PCR, ASI_SRMMU); 128 129 /* We "flash-clear" the I/D caches. */ 130 if ((pcr & MS1_PCR_ICE) == 0) 131 sta(0, ASI_ICACHECLR, 0); 132 if ((pcr & MS1_PCR_DCE) == 0) 133 sta(0, ASI_DCACHECLR, 0); 134 135 /* Turn on caches */ 136 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE); 137 138 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 139 140 /* 141 * When zeroing or copying pages, there might still be entries in 142 * the cache, since we don't flush pages from the cache when 143 * unmapping them (`vactype' is VAC_NONE). Fortunately, the 144 * MS1 cache is write-through and not write-allocate, so we can 145 * use cacheable access while not displacing cache lines. 146 */ 147 cpuinfo.flags |= CPUFLG_CACHE_MANDATORY; 148 } 149 150 void 151 viking_cache_enable() 152 { 153 u_int pcr; 154 155 cache_alias_dist = max( 156 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 157 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 158 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 159 160 pcr = lda(SRMMU_PCR, ASI_SRMMU); 161 162 if ((pcr & VIKING_PCR_ICE) == 0) { 163 /* I-cache not on; "flash-clear" it now. */ 164 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */ 165 sta(0, ASI_ICACHECLR, 0); /* clear */ 166 } 167 if ((pcr & VIKING_PCR_DCE) == 0) { 168 /* D-cache not on: "flash-clear" it. */ 169 sta(0x80000000, ASI_DCACHECLR, 0); 170 sta(0, ASI_DCACHECLR, 0); 171 } 172 173 /* Turn on caches via MMU */ 174 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE); 175 176 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 177 178 /* Now turn on MultiCache if it exists */ 179 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) { 180 /* Set external cache enable bit in MXCC control register */ 181 stda(MXCC_CTRLREG, ASI_CONTROL, 182 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE); 183 cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */ 184 CACHEINFO.ec_enabled = 1; 185 } 186 } 187 188 void 189 hypersparc_cache_enable() 190 { 191 int i, ls, ts; 192 u_int pcr, v; 193 194 ls = CACHEINFO.c_linesize; 195 ts = CACHEINFO.c_totalsize; 196 197 pcr = lda(SRMMU_PCR, ASI_SRMMU); 198 199 /* 200 * Setup the anti-aliasing constants and DVMA alignment constraint. 201 */ 202 cache_alias_dist = CACHEINFO.c_totalsize; 203 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 204 dvma_cachealign = cache_alias_dist; 205 206 /* Now reset cache tag memory if cache not yet enabled */ 207 if ((pcr & HYPERSPARC_PCR_CE) == 0) 208 for (i = 0; i < ts; i += ls) 209 sta(i, ASI_DCACHETAG, 0); 210 211 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM); 212 hypersparc_cache_flush_all(); 213 214 /* Enable write-back cache */ 215 pcr |= HYPERSPARC_PCR_CE; 216 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 217 pcr |= HYPERSPARC_PCR_CM; 218 219 sta(SRMMU_PCR, ASI_SRMMU, pcr); 220 CACHEINFO.c_enabled = 1; 221 222 /* XXX: should add support */ 223 if (CACHEINFO.c_hwflush) 224 panic("cache_enable: can't handle 4M with hw-flush cache"); 225 226 /* 227 * Enable instruction cache and, on single-processor machines, 228 * disable `Unimplemented Flush Traps'. 229 */ 230 v = HYPERSPARC_ICCR_ICE | (ncpu == 1 ? HYPERSPARC_ICCR_FTD : 0); 231 wrasr(v, HYPERSPARC_ASRNUM_ICCR); 232 } 233 234 235 void 236 swift_cache_enable() 237 { 238 int i, ls, ts; 239 u_int pcr; 240 241 cache_alias_dist = max( 242 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 243 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 244 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 245 246 pcr = lda(SRMMU_PCR, ASI_SRMMU); 247 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE); 248 sta(SRMMU_PCR, ASI_SRMMU, pcr); 249 250 /* Now reset cache tag memory if cache not yet enabled */ 251 ls = CACHEINFO.ic_linesize; 252 ts = CACHEINFO.ic_totalsize; 253 if ((pcr & SWIFT_PCR_ICE) == 0) 254 for (i = 0; i < ts; i += ls) 255 sta(i, ASI_ICACHETAG, 0); 256 257 ls = CACHEINFO.dc_linesize; 258 ts = CACHEINFO.dc_totalsize; 259 if ((pcr & SWIFT_PCR_DCE) == 0) 260 for (i = 0; i < ts; i += ls) 261 sta(i, ASI_DCACHETAG, 0); 262 263 CACHEINFO.c_enabled = 1; 264 } 265 266 void 267 cypress_cache_enable() 268 { 269 int i, ls, ts; 270 u_int pcr; 271 272 cache_alias_dist = CACHEINFO.c_totalsize; 273 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 274 275 pcr = lda(SRMMU_PCR, ASI_SRMMU); 276 pcr &= ~(CYPRESS_PCR_CE | CYPRESS_PCR_CM); 277 278 /* Now reset cache tag memory if cache not yet enabled */ 279 ls = CACHEINFO.c_linesize; 280 ts = CACHEINFO.c_totalsize; 281 if ((pcr & CYPRESS_PCR_CE) == 0) 282 for (i = 0; i < ts; i += ls) 283 sta(i, ASI_DCACHETAG, 0); 284 285 pcr |= CYPRESS_PCR_CE; 286 /* If put in write-back mode, turn it on */ 287 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 288 pcr |= CYPRESS_PCR_CM; 289 sta(SRMMU_PCR, ASI_SRMMU, pcr); 290 CACHEINFO.c_enabled = 1; 291 } 292 293 void 294 turbosparc_cache_enable() 295 { 296 int i, ls, ts; 297 u_int pcr, pcf; 298 299 cache_alias_dist = max( 300 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 301 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 302 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 303 304 pcr = lda(SRMMU_PCR, ASI_SRMMU); 305 306 /* Now reset cache tag memory if cache not yet enabled */ 307 ls = CACHEINFO.ic_linesize; 308 ts = CACHEINFO.ic_totalsize; 309 if ((pcr & TURBOSPARC_PCR_ICE) == 0) 310 for (i = 0; i < ts; i += ls) 311 sta(i, ASI_ICACHETAG, 0); 312 313 ls = CACHEINFO.dc_linesize; 314 ts = CACHEINFO.dc_totalsize; 315 if ((pcr & TURBOSPARC_PCR_DCE) == 0) 316 for (i = 0; i < ts; i += ls) 317 sta(i, ASI_DCACHETAG, 0); 318 319 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE); 320 sta(SRMMU_PCR, ASI_SRMMU, pcr); 321 322 pcf = lda(SRMMU_PCFG, ASI_SRMMU); 323 if (pcf & TURBOSPARC_PCFG_SNP) 324 printf("DVMA coherent "); 325 326 CACHEINFO.c_enabled = 1; 327 } 328 #endif 329 330 /* 331 * Flush the current context from the cache. 332 * 333 * This is done by writing to each cache line in the `flush context' 334 * address space (or, for hardware flush, once to each page in the 335 * hardware flush space, for all cache pages). 336 */ 337 void 338 sun4_vcache_flush_context() 339 { 340 char *p; 341 int i, ls; 342 343 cachestats.cs_ncxflush++; 344 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 345 if (CACHEINFO.c_hwflush) { 346 ls = NBPG; 347 i = CACHEINFO.c_totalsize >> PGSHIFT; 348 for (; --i >= 0; p += ls) 349 sta(p, ASI_HWFLUSHCTX, 0); 350 } else { 351 ls = CACHEINFO.c_linesize; 352 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 353 for (; --i >= 0; p += ls) 354 sta(p, ASI_FLUSHCTX, 0); 355 } 356 } 357 358 /* 359 * Flush the given virtual region from the cache. 360 * 361 * This is also done by writing to each cache line, except that 362 * now the addresses must include the virtual region number, and 363 * we use the `flush region' space. 364 * 365 * This function is only called on sun4's with 3-level MMUs; there's 366 * no hw-flush space. 367 */ 368 void 369 sun4_vcache_flush_region(vreg) 370 int vreg; 371 { 372 int i, ls; 373 char *p; 374 375 cachestats.cs_nrgflush++; 376 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 377 ls = CACHEINFO.c_linesize; 378 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 379 for (; --i >= 0; p += ls) 380 sta(p, ASI_FLUSHREG, 0); 381 } 382 383 /* 384 * Flush the given virtual segment from the cache. 385 * 386 * This is also done by writing to each cache line, except that 387 * now the addresses must include the virtual segment number, and 388 * we use the `flush segment' space. 389 * 390 * Again, for hardware, we just write each page (in hw-flush space). 391 */ 392 void 393 sun4_vcache_flush_segment(vreg, vseg) 394 int vreg, vseg; 395 { 396 int i, ls; 397 char *p; 398 399 cachestats.cs_nsgflush++; 400 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 401 if (CACHEINFO.c_hwflush) { 402 ls = NBPG; 403 i = CACHEINFO.c_totalsize >> PGSHIFT; 404 for (; --i >= 0; p += ls) 405 sta(p, ASI_HWFLUSHSEG, 0); 406 } else { 407 ls = CACHEINFO.c_linesize; 408 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 409 for (; --i >= 0; p += ls) 410 sta(p, ASI_FLUSHSEG, 0); 411 } 412 } 413 414 /* 415 * Flush the given virtual page from the cache. 416 * (va is the actual address, and must be aligned on a page boundary.) 417 * Again we write to each cache line. 418 */ 419 void 420 sun4_vcache_flush_page(va) 421 int va; 422 { 423 int i, ls; 424 char *p; 425 426 #ifdef DEBUG 427 if (va & PGOFSET) 428 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 429 #endif 430 431 cachestats.cs_npgflush++; 432 p = (char *)va; 433 ls = CACHEINFO.c_linesize; 434 i = NBPG >> CACHEINFO.c_l2linesize; 435 for (; --i >= 0; p += ls) 436 sta(p, ASI_FLUSHPG, 0); 437 } 438 439 /* 440 * Flush the given virtual page from the cache. 441 * (va is the actual address, and must be aligned on a page boundary.) 442 * This version uses hardware-assisted flush operation and just needs 443 * one write into ASI_HWFLUSHPG space to flush all cache lines. 444 */ 445 void 446 sun4_vcache_flush_page_hw(va) 447 int va; 448 { 449 char *p; 450 451 #ifdef DEBUG 452 if (va & PGOFSET) 453 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 454 #endif 455 456 cachestats.cs_npgflush++; 457 p = (char *)va; 458 sta(p, ASI_HWFLUSHPG, 0); 459 } 460 461 /* 462 * Flush a range of virtual addresses (in the current context). 463 * The first byte is at (base&~PGOFSET) and the last one is just 464 * before byte (base+len). 465 * 466 * We choose the best of (context,segment,page) here. 467 */ 468 469 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / NBPG) 470 471 void 472 sun4_cache_flush(base, len) 473 caddr_t base; 474 u_int len; 475 { 476 int i, ls, baseoff; 477 char *p; 478 479 if (CACHEINFO.c_vactype == VAC_NONE) 480 return; 481 482 /* 483 * Figure out how much must be flushed. 484 * 485 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 486 * in the same number of loop iterations. We can also do the whole 487 * region. If we need to do between 2 and NSEGRG, do the region. 488 * If we need to do two or more regions, just go ahead and do the 489 * whole context. This might not be ideal (e.g., fsck likes to do 490 * 65536-byte reads, which might not necessarily be aligned). 491 * 492 * We could try to be sneaky here and use the direct mapping 493 * to avoid flushing things `below' the start and `above' the 494 * ending address (rather than rounding to whole pages and 495 * segments), but I did not want to debug that now and it is 496 * not clear it would help much. 497 * 498 * (XXX the magic number 16 is now wrong, must review policy) 499 */ 500 baseoff = (int)base & PGOFSET; 501 i = (baseoff + len + PGOFSET) >> PGSHIFT; 502 503 cachestats.cs_nraflush++; 504 #ifdef notyet 505 cachestats.cs_ra[min(i, MAXCACHERANGE)]++; 506 #endif 507 508 if (i < CACHE_FLUSH_MAGIC) { 509 /* cache_flush_page, for i pages */ 510 p = (char *)((int)base & ~baseoff); 511 if (CACHEINFO.c_hwflush) { 512 for (; --i >= 0; p += NBPG) 513 sta(p, ASI_HWFLUSHPG, 0); 514 } else { 515 ls = CACHEINFO.c_linesize; 516 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 517 for (; --i >= 0; p += ls) 518 sta(p, ASI_FLUSHPG, 0); 519 } 520 return; 521 } 522 baseoff = (u_int)base & SGOFSET; 523 i = (baseoff + len + SGOFSET) >> SGSHIFT; 524 if (i == 1) 525 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base)); 526 else { 527 if (HASSUN4_MMU3L) { 528 baseoff = (u_int)base & RGOFSET; 529 i = (baseoff + len + RGOFSET) >> RGSHIFT; 530 if (i == 1) 531 sun4_vcache_flush_region(VA_VREG(base)); 532 else 533 sun4_vcache_flush_context(); 534 } else 535 sun4_vcache_flush_context(); 536 } 537 } 538 539 540 #if defined(SUN4M) 541 /* 542 * Flush the current context from the cache. 543 * 544 * This is done by writing to each cache line in the `flush context' 545 * address space (or, for hardware flush, once to each page in the 546 * hardware flush space, for all cache pages). 547 */ 548 void 549 srmmu_vcache_flush_context() 550 { 551 char *p; 552 int i, ls; 553 554 cachestats.cs_ncxflush++; 555 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 556 ls = CACHEINFO.c_linesize; 557 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 558 for (; --i >= 0; p += ls) 559 sta(p, ASI_IDCACHELFC, 0); 560 } 561 562 /* 563 * Flush the given virtual region from the cache. 564 * 565 * This is also done by writing to each cache line, except that 566 * now the addresses must include the virtual region number, and 567 * we use the `flush region' space. 568 */ 569 void 570 srmmu_vcache_flush_region(vreg) 571 int vreg; 572 { 573 int i, ls; 574 char *p; 575 576 cachestats.cs_nrgflush++; 577 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 578 ls = CACHEINFO.c_linesize; 579 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 580 for (; --i >= 0; p += ls) 581 sta(p, ASI_IDCACHELFR, 0); 582 } 583 584 /* 585 * Flush the given virtual segment from the cache. 586 * 587 * This is also done by writing to each cache line, except that 588 * now the addresses must include the virtual segment number, and 589 * we use the `flush segment' space. 590 * 591 * Again, for hardware, we just write each page (in hw-flush space). 592 */ 593 void 594 srmmu_vcache_flush_segment(vreg, vseg) 595 int vreg, vseg; 596 { 597 int i, ls; 598 char *p; 599 600 cachestats.cs_nsgflush++; 601 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 602 ls = CACHEINFO.c_linesize; 603 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 604 for (; --i >= 0; p += ls) 605 sta(p, ASI_IDCACHELFS, 0); 606 } 607 608 /* 609 * Flush the given virtual page from the cache. 610 * (va is the actual address, and must be aligned on a page boundary.) 611 * Again we write to each cache line. 612 */ 613 void 614 srmmu_vcache_flush_page(va) 615 int va; 616 { 617 int i, ls; 618 char *p; 619 620 #ifdef DEBUG 621 if (va & PGOFSET) 622 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 623 #endif 624 625 cachestats.cs_npgflush++; 626 p = (char *)va; 627 ls = CACHEINFO.c_linesize; 628 i = NBPG >> CACHEINFO.c_l2linesize; 629 for (; --i >= 0; p += ls) 630 sta(p, ASI_IDCACHELFP, 0); 631 } 632 633 /* 634 * Flush entire cache. 635 */ 636 void 637 srmmu_cache_flush_all() 638 { 639 srmmu_vcache_flush_context(); 640 } 641 642 /* 643 * Flush a range of virtual addresses (in the current context). 644 * The first byte is at (base&~PGOFSET) and the last one is just 645 * before byte (base+len). 646 * 647 * We choose the best of (context,segment,page) here. 648 */ 649 650 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / NBPG) 651 652 void 653 srmmu_cache_flush(base, len) 654 caddr_t base; 655 u_int len; 656 { 657 int i, ls, baseoff; 658 char *p; 659 660 if (len < NBPG) { 661 /* less than a page, flush just the covered cache lines */ 662 ls = CACHEINFO.c_linesize; 663 baseoff = (int)base & (ls - 1); 664 i = (baseoff + len + ls - 1) >> CACHEINFO.c_l2linesize; 665 p = (char *)((int)base & -ls); 666 for (; --i >= 0; p += ls) 667 sta(p, ASI_IDCACHELFP, 0); 668 return; 669 } 670 671 /* 672 * Figure out how much must be flushed. 673 * 674 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 675 * in the same number of loop iterations. We can also do the whole 676 * region. If we need to do between 2 and NSEGRG, do the region. 677 * If we need to do two or more regions, just go ahead and do the 678 * whole context. This might not be ideal (e.g., fsck likes to do 679 * 65536-byte reads, which might not necessarily be aligned). 680 * 681 * We could try to be sneaky here and use the direct mapping 682 * to avoid flushing things `below' the start and `above' the 683 * ending address (rather than rounding to whole pages and 684 * segments), but I did not want to debug that now and it is 685 * not clear it would help much. 686 * 687 * (XXX the magic number 16 is now wrong, must review policy) 688 */ 689 baseoff = (int)base & PGOFSET; 690 i = (baseoff + len + PGOFSET) >> PGSHIFT; 691 692 cachestats.cs_nraflush++; 693 #ifdef notyet 694 cachestats.cs_ra[min(i, MAXCACHERANGE)]++; 695 #endif 696 697 if (i < CACHE_FLUSH_MAGIC) { 698 /* cache_flush_page, for i pages */ 699 p = (char *)((int)base & ~baseoff); 700 ls = CACHEINFO.c_linesize; 701 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 702 for (; --i >= 0; p += ls) 703 sta(p, ASI_IDCACHELFP, 0); 704 return; 705 } 706 baseoff = (u_int)base & SGOFSET; 707 i = (baseoff + len + SGOFSET) >> SGSHIFT; 708 if (i == 1) 709 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base)); 710 else { 711 baseoff = (u_int)base & RGOFSET; 712 i = (baseoff + len + RGOFSET) >> RGSHIFT; 713 if (i == 1) 714 srmmu_vcache_flush_region(VA_VREG(base)); 715 else 716 srmmu_vcache_flush_context(); 717 } 718 } 719 720 int ms1_cacheflush_magic = 0; 721 #define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic 722 void 723 ms1_cache_flush(base, len) 724 caddr_t base; 725 u_int len; 726 { 727 /* 728 * Although physically tagged, we still need to flush the 729 * data cache after (if we have a write-through cache) or before 730 * (in case of write-back caches) DMA operations. 731 */ 732 733 #if MS1_CACHEFLUSH_MAGIC 734 if (len <= MS1_CACHEFLUSH_MAGIC) { 735 /* 736 * If the range to be flushed is sufficiently small 737 * invalidate the covered cache lines by hand. 738 * 739 * The MicroSPARC I has a direct-mapped virtually addressed 740 * physically tagged data cache which is organised as 741 * 128 lines of 16 bytes. Virtual address bits [4-10] 742 * select the cache line. The cache tags are accessed 743 * through the standard DCACHE control space using the 744 * same address bits as those used to select the cache 745 * line in the virtual address. 746 * 747 * Note: we don't bother to compare the actual tags 748 * since that would require looking up physical addresses. 749 * 750 * The format of the tags we read from ASI_DCACHE control 751 * space is: 752 * 753 * 31 27 26 11 10 1 0 754 * +--------+----------------+------------+-+ 755 * | xxx | PA[26-11] | xxx |V| 756 * +--------+----------------+------------+-+ 757 * 758 * PA: bits 11-26 of the physical address 759 * V: line valid bit 760 */ 761 int tagaddr = ((u_int)base & 0x7f0); 762 763 len = roundup(len, 16); 764 while (len != 0) { 765 int tag = lda(tagaddr, ASI_DCACHETAG); 766 if ((tag & 1) == 1) { 767 /* Mark this cache line invalid */ 768 sta(tagaddr, ASI_DCACHETAG, 0); 769 } 770 len -= 16; 771 tagaddr = (tagaddr + 16) & 0x7f0; 772 } 773 } else 774 #endif 775 /* Flush entire data cache */ 776 sta(0, ASI_DCACHECLR, 0); 777 } 778 779 /* 780 * Flush entire cache. 781 */ 782 void 783 ms1_cache_flush_all() 784 { 785 786 /* Flash-clear both caches */ 787 sta(0, ASI_ICACHECLR, 0); 788 sta(0, ASI_DCACHECLR, 0); 789 } 790 791 void 792 hypersparc_cache_flush_all() 793 { 794 795 srmmu_vcache_flush_context(); 796 /* Flush instruction cache */ 797 hypersparc_pure_vcache_flush(); 798 } 799 800 void 801 cypress_cache_flush_all() 802 { 803 804 extern char kernel_text[]; 805 char *p; 806 int i, ls; 807 808 /* Fill the cache with known read-only content */ 809 p = (char *)kernel_text; 810 ls = CACHEINFO.c_linesize; 811 i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize; 812 for (; --i >= 0; p += ls) 813 (*(volatile char *)p); 814 } 815 816 817 void 818 viking_cache_flush(base, len) 819 caddr_t base; 820 u_int len; 821 { 822 /* 823 * Although physically tagged, we still need to flush the 824 * data cache after (if we have a write-through cache) or before 825 * (in case of write-back caches) DMA operations. 826 */ 827 828 } 829 830 void 831 viking_pcache_flush_page(pa, invalidate_only) 832 paddr_t pa; 833 int invalidate_only; 834 { 835 int set, i; 836 837 /* 838 * The viking's on-chip data cache is 4-way set associative, 839 * consisting of 128 sets, each holding 4 lines of 32 bytes. 840 * Note that one 4096 byte page exactly covers all 128 sets 841 * in the cache. 842 */ 843 if (invalidate_only) { 844 u_int pa_tag = (pa >> 12); 845 u_int tagaddr; 846 u_int64_t tag; 847 848 /* 849 * Loop over all sets and invalidate all entries tagged 850 * with the given physical address by resetting the cache 851 * tag in ASI_DCACHETAG control space. 852 * 853 * The address format for accessing a tag is: 854 * 855 * 31 30 27 26 11 5 4 3 2 0 856 * +------+-----+------+-------//--------+--------+----+-----+ 857 * | type | xxx | line | xxx | set | xx | 0 | 858 * +------+-----+------+-------//--------+--------+----+-----+ 859 * 860 * set: the cache set tag to be read (0-127) 861 * line: the line within the set (0-3) 862 * type: 1: read set tag; 2: read physical tag 863 * 864 * The (type 2) tag read from this address is a 64-bit word 865 * formatted as follows: 866 * 867 * 5 4 4 868 * 63 6 8 0 23 0 869 * +-------+-+-------+-+-------+-+-----------+----------------+ 870 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] | 871 * +-------+-+-------+-+-------+-+-----------+----------------+ 872 * 873 * PA: bits 12-35 of the physical address 874 * S: line shared bit 875 * D: line dirty bit 876 * V: line valid bit 877 */ 878 879 #define VIKING_DCACHETAG_S 0x0000010000000000UL /* line valid bit */ 880 #define VIKING_DCACHETAG_D 0x0001000000000000UL /* line dirty bit */ 881 #define VIKING_DCACHETAG_V 0x0100000000000000UL /* line shared bit */ 882 #define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffUL /* PA tag field */ 883 884 for (set = 0; set < 128; set++) { 885 /* Set set number and access type */ 886 tagaddr = (set << 5) | (2 << 30); 887 888 /* Examine the tag for each line in the set */ 889 for (i = 0 ; i < 4; i++) { 890 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG); 891 /* 892 * If this is a valid tag and the PA field 893 * matches clear the tag. 894 */ 895 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag && 896 (tag & VIKING_DCACHETAG_V) != 0) 897 stda(tagaddr | (i << 26), 898 ASI_DCACHETAG, 0); 899 } 900 } 901 902 } else { 903 extern char kernel_text[]; 904 905 /* 906 * Force the cache to validate its backing memory 907 * by displacing all cache lines with known read-only 908 * content from the start of kernel text. 909 * 910 * Note that this thrashes the entire cache. However, 911 * we currently only need to call upon this code 912 * once at boot time. 913 */ 914 for (set = 0; set < 128; set++) { 915 int *v = (int *)(kernel_text + (set << 5)); 916 917 /* 918 * We need to read (2*associativity-1) different 919 * locations to be sure to displace the entire set. 920 */ 921 i = 2 * 4 - 1; 922 while (i--) { 923 (*(volatile int *)v); 924 v += 4096; 925 } 926 } 927 } 928 } 929 #endif /* SUN4M */ 930 931 932 #if defined(MULTIPROCESSOR) 933 /* 934 * Cache flushing on multi-processor systems involves sending 935 * inter-processor messages to flush the cache on each module. 936 * 937 * The current context of the originating processor is passed in the 938 * message. This assumes the allocation of CPU contextses is a global 939 * operation (remember that the actual context tables for the CPUs 940 * are distinct). 941 * 942 * We don't do cross calls if we're cold or we're accepting them 943 * ourselves (CPUFLG_READY). 944 */ 945 946 void 947 smp_vcache_flush_page(va) 948 int va; 949 { 950 int n, s; 951 952 cpuinfo.sp_vcache_flush_page(va); 953 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 954 return; 955 LOCK_XPMSG(); 956 for (n = 0; n < ncpu; n++) { 957 struct cpu_info *cpi = cpus[n]; 958 struct xpmsg_flush_page *p; 959 960 if (CPU_READY(cpi)) 961 continue; 962 p = &cpi->msg.u.xpmsg_flush_page; 963 s = splhigh(); 964 simple_lock(&cpi->msg.lock); 965 cpi->msg.tag = XPMSG_VCACHE_FLUSH_PAGE; 966 p->ctx = getcontext4m(); 967 p->va = va; 968 raise_ipi_wait_and_unlock(cpi); 969 splx(s); 970 } 971 UNLOCK_XPMSG(); 972 } 973 974 void 975 smp_vcache_flush_segment(vr, vs) 976 int vr, vs; 977 { 978 int n, s; 979 980 cpuinfo.sp_vcache_flush_segment(vr, vs); 981 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 982 return; 983 LOCK_XPMSG(); 984 for (n = 0; n < ncpu; n++) { 985 struct cpu_info *cpi = cpus[n]; 986 struct xpmsg_flush_segment *p; 987 988 if (CPU_READY(cpi)) 989 continue; 990 p = &cpi->msg.u.xpmsg_flush_segment; 991 s = splhigh(); 992 simple_lock(&cpi->msg.lock); 993 cpi->msg.tag = XPMSG_VCACHE_FLUSH_SEGMENT; 994 p->ctx = getcontext4m(); 995 p->vr = vr; 996 p->vs = vs; 997 raise_ipi_wait_and_unlock(cpi); 998 splx(s); 999 } 1000 UNLOCK_XPMSG(); 1001 } 1002 1003 void 1004 smp_vcache_flush_region(vr) 1005 int vr; 1006 { 1007 int n, s; 1008 1009 cpuinfo.sp_vcache_flush_region(vr); 1010 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1011 return; 1012 LOCK_XPMSG(); 1013 for (n = 0; n < ncpu; n++) { 1014 struct cpu_info *cpi = cpus[n]; 1015 struct xpmsg_flush_region *p; 1016 1017 if (CPU_READY(cpi)) 1018 continue; 1019 p = &cpi->msg.u.xpmsg_flush_region; 1020 s = splhigh(); 1021 simple_lock(&cpi->msg.lock); 1022 cpi->msg.tag = XPMSG_VCACHE_FLUSH_REGION; 1023 p->ctx = getcontext4m(); 1024 p->vr = vr; 1025 raise_ipi_wait_and_unlock(cpi); 1026 splx(s); 1027 } 1028 UNLOCK_XPMSG(); 1029 } 1030 1031 void 1032 smp_vcache_flush_context() 1033 { 1034 int n, s; 1035 1036 cpuinfo.sp_vcache_flush_context(); 1037 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1038 return; 1039 LOCK_XPMSG(); 1040 for (n = 0; n < ncpu; n++) { 1041 struct cpu_info *cpi = cpus[n]; 1042 struct xpmsg_flush_context *p; 1043 1044 if (CPU_READY(cpi)) 1045 continue; 1046 p = &cpi->msg.u.xpmsg_flush_context; 1047 s = splhigh(); 1048 simple_lock(&cpi->msg.lock); 1049 cpi->msg.tag = XPMSG_VCACHE_FLUSH_CONTEXT; 1050 p->ctx = getcontext4m(); 1051 raise_ipi_wait_and_unlock(cpi); 1052 splx(s); 1053 } 1054 UNLOCK_XPMSG(); 1055 } 1056 1057 void 1058 smp_cache_flush(va, size) 1059 caddr_t va; 1060 u_int size; 1061 { 1062 int n, s; 1063 1064 cpuinfo.sp_cache_flush(va, size); 1065 if (cold || (cpuinfo.flags & CPUFLG_READY) == 0) 1066 return; 1067 LOCK_XPMSG(); 1068 for (n = 0; n < ncpu; n++) { 1069 struct cpu_info *cpi = cpus[n]; 1070 struct xpmsg_flush_range *p; 1071 1072 if (CPU_READY(cpi)) 1073 continue; 1074 p = &cpi->msg.u.xpmsg_flush_range; 1075 s = splhigh(); 1076 simple_lock(&cpi->msg.lock); 1077 cpi->msg.tag = XPMSG_VCACHE_FLUSH_RANGE; 1078 p->ctx = getcontext4m(); 1079 p->va = va; 1080 p->size = size; 1081 raise_ipi_wait_and_unlock(cpi); 1082 splx(s); 1083 } 1084 UNLOCK_XPMSG(); 1085 } 1086 #endif /* MULTIPROCESSOR */ 1087