1 /* $NetBSD: cache.c,v 1.97 2007/03/04 09:03:34 macallan Exp $ */
2
3 /*
4 * Copyright (c) 1996
5 * The President and Fellows of Harvard College. All rights reserved.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This software was developed by the Computer Systems Engineering group
10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11 * contributed to Berkeley.
12 *
13 * All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Harvard University.
16 * This product includes software developed by the University of
17 * California, Lawrence Berkeley Laboratory.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 *
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 * must display the following acknowledgement:
30 * This product includes software developed by Aaron Brown and
31 * Harvard University.
32 * This product includes software developed by the University of
33 * California, Berkeley and its contributors.
34 * 4. Neither the name of the University nor the names of its contributors
35 * may be used to endorse or promote products derived from this software
36 * without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 * @(#)cache.c 8.2 (Berkeley) 10/30/93
51 *
52 */
53
54 /*
55 * Cache routines.
56 *
57 * TODO:
58 * - rework range flush
59 */
60
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.97 2007/03/04 09:03:34 macallan Exp $");
63
64 #include "opt_multiprocessor.h"
65 #include "opt_sparc_arch.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70
71 #include <uvm/uvm_extern.h>
72
73 #include <machine/ctlreg.h>
74 #include <machine/pte.h>
75
76 #include <sparc/sparc/asm.h>
77 #include <sparc/sparc/cache.h>
78 #include <sparc/sparc/cpuvar.h>
79
80 struct evcnt vcache_flush_pg =
81 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","pg");
82 EVCNT_ATTACH_STATIC(vcache_flush_pg);
83 struct evcnt vcache_flush_seg =
84 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","seg");
85 EVCNT_ATTACH_STATIC(vcache_flush_seg);
86 struct evcnt vcache_flush_reg =
87 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","reg");
88 EVCNT_ATTACH_STATIC(vcache_flush_reg);
89 struct evcnt vcache_flush_ctx =
90 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","ctx");
91 EVCNT_ATTACH_STATIC(vcache_flush_ctx);
92 struct evcnt vcache_flush_range =
93 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","rng");
94 EVCNT_ATTACH_STATIC(vcache_flush_range);
95
96 int cache_alias_dist; /* Cache anti-aliasing constants */
97 int cache_alias_bits;
98 u_long dvma_cachealign;
99
100 /*
101 * Enable the cache.
102 * We need to clear out the valid bits first.
103 */
104 void
sun4_cache_enable(void)105 sun4_cache_enable(void)
106 {
107 u_int i, lim, ls, ts;
108
109 cache_alias_bits = CPU_ISSUN4
110 ? CACHE_ALIAS_BITS_SUN4
111 : CACHE_ALIAS_BITS_SUN4C;
112 cache_alias_dist = CPU_ISSUN4
113 ? CACHE_ALIAS_DIST_SUN4
114 : CACHE_ALIAS_DIST_SUN4C;
115
116 ls = CACHEINFO.c_linesize;
117 ts = CACHEINFO.c_totalsize;
118
119 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
120 sta(i, ASI_CONTROL, 0);
121
122 stba(AC_SYSENABLE, ASI_CONTROL,
123 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
124 CACHEINFO.c_enabled = 1;
125
126 #ifdef notyet
127 if (cpuinfo.flags & SUN4_IOCACHE) {
128 stba(AC_SYSENABLE, ASI_CONTROL,
129 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
130 printf("iocache enabled\n");
131 }
132 #endif
133 }
134
135 /*
136 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
137 */
138 #if defined(SUN4M) || defined(SUN4D)
139 void
ms1_cache_enable(void)140 ms1_cache_enable(void)
141 {
142 u_int pcr;
143
144 cache_alias_dist = max(
145 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
146 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
147 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
148
149 pcr = lda(SRMMU_PCR, ASI_SRMMU);
150
151 /* We "flash-clear" the I/D caches. */
152 if ((pcr & MS1_PCR_ICE) == 0)
153 sta(0, ASI_ICACHECLR, 0);
154 if ((pcr & MS1_PCR_DCE) == 0)
155 sta(0, ASI_DCACHECLR, 0);
156
157 /* Turn on caches */
158 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
159
160 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
161
162 /*
163 * When zeroing or copying pages, there might still be entries in
164 * the cache, since we don't flush pages from the cache when
165 * unmapping them (`vactype' is VAC_NONE). Fortunately, the
166 * MS1 cache is write-through and not write-allocate, so we can
167 * use cacheable access while not displacing cache lines.
168 */
169 cpuinfo.flags |= CPUFLG_CACHE_MANDATORY;
170 }
171
172 void
viking_cache_enable(void)173 viking_cache_enable(void)
174 {
175 u_int pcr;
176
177 pcr = lda(SRMMU_PCR, ASI_SRMMU);
178
179 if ((pcr & VIKING_PCR_ICE) == 0) {
180 /* I-cache not on; "flash-clear" it now. */
181 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */
182 sta(0, ASI_ICACHECLR, 0); /* clear */
183 }
184 if ((pcr & VIKING_PCR_DCE) == 0) {
185 /* D-cache not on: "flash-clear" it. */
186 sta(0x80000000, ASI_DCACHECLR, 0);
187 sta(0, ASI_DCACHECLR, 0);
188 }
189
190 /* Turn on caches via MMU */
191 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
192
193 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
194
195 /* Now turn on MultiCache if it exists */
196 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
197 /* Set external cache enable bit in MXCC control register */
198 stda(MXCC_CTRLREG, ASI_CONTROL,
199 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
200 cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */
201 CACHEINFO.ec_enabled = 1;
202 }
203 }
204
205 void
hypersparc_cache_enable(void)206 hypersparc_cache_enable(void)
207 {
208 int i, ls, ts;
209 u_int pcr, v;
210 int alias_dist;
211
212 /*
213 * Setup the anti-aliasing constants and DVMA alignment constraint.
214 */
215 alias_dist = CACHEINFO.c_totalsize;
216 if (alias_dist > cache_alias_dist) {
217 cache_alias_dist = alias_dist;
218 cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
219 dvma_cachealign = cache_alias_dist;
220 }
221
222 ls = CACHEINFO.c_linesize;
223 ts = CACHEINFO.c_totalsize;
224 pcr = lda(SRMMU_PCR, ASI_SRMMU);
225
226 /* Now reset cache tag memory if cache not yet enabled */
227 if ((pcr & HYPERSPARC_PCR_CE) == 0)
228 for (i = 0; i < ts; i += ls)
229 sta(i, ASI_DCACHETAG, 0);
230
231 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
232 hypersparc_cache_flush_all();
233
234 /* Enable write-back cache */
235 pcr |= HYPERSPARC_PCR_CE;
236 if (CACHEINFO.c_vactype == VAC_WRITEBACK)
237 pcr |= HYPERSPARC_PCR_CM;
238
239 sta(SRMMU_PCR, ASI_SRMMU, pcr);
240 CACHEINFO.c_enabled = 1;
241
242 /* XXX: should add support */
243 if (CACHEINFO.c_hwflush)
244 panic("cache_enable: can't handle 4M with hw-flush cache");
245
246 /*
247 * Enable instruction cache and, on single-processor machines,
248 * disable `Unimplemented Flush Traps'.
249 */
250 v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0);
251 wrasr(v, HYPERSPARC_ASRNUM_ICCR);
252 }
253
254
255 void
swift_cache_enable(void)256 swift_cache_enable(void)
257 {
258 int i, ls, ts;
259 u_int pcr;
260
261 cache_alias_dist = max(
262 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
263 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
264 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
265
266 pcr = lda(SRMMU_PCR, ASI_SRMMU);
267
268 /* Now reset cache tag memory if cache not yet enabled */
269 ls = CACHEINFO.ic_linesize;
270 ts = CACHEINFO.ic_totalsize;
271 if ((pcr & SWIFT_PCR_ICE) == 0)
272 for (i = 0; i < ts; i += ls)
273 sta(i, ASI_ICACHETAG, 0);
274
275 ls = CACHEINFO.dc_linesize;
276 ts = CACHEINFO.dc_totalsize;
277 if ((pcr & SWIFT_PCR_DCE) == 0)
278 for (i = 0; i < ts; i += ls)
279 sta(i, ASI_DCACHETAG, 0);
280
281 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
282 sta(SRMMU_PCR, ASI_SRMMU, pcr);
283 CACHEINFO.c_enabled = 1;
284 }
285
286 void
cypress_cache_enable(void)287 cypress_cache_enable(void)
288 {
289 int i, ls, ts;
290 u_int pcr;
291 int alias_dist;
292
293 alias_dist = CACHEINFO.c_totalsize;
294 if (alias_dist > cache_alias_dist) {
295 cache_alias_dist = alias_dist;
296 cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
297 dvma_cachealign = alias_dist;
298 }
299
300 pcr = lda(SRMMU_PCR, ASI_SRMMU);
301 pcr &= ~CYPRESS_PCR_CM;
302
303 /* Now reset cache tag memory if cache not yet enabled */
304 ls = CACHEINFO.c_linesize;
305 ts = CACHEINFO.c_totalsize;
306 if ((pcr & CYPRESS_PCR_CE) == 0)
307 for (i = 0; i < ts; i += ls)
308 sta(i, ASI_DCACHETAG, 0);
309
310 pcr |= CYPRESS_PCR_CE;
311 /* If put in write-back mode, turn it on */
312 if (CACHEINFO.c_vactype == VAC_WRITEBACK)
313 pcr |= CYPRESS_PCR_CM;
314 sta(SRMMU_PCR, ASI_SRMMU, pcr);
315 CACHEINFO.c_enabled = 1;
316 }
317
318 void
turbosparc_cache_enable(void)319 turbosparc_cache_enable(void)
320 {
321 int i, ls, ts;
322 u_int pcr, pcf;
323 /* External cache sizes in KB; see Turbo sparc manual */
324 static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0};
325
326 cache_alias_dist = max(
327 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
328 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
329 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
330
331 pcr = lda(SRMMU_PCR, ASI_SRMMU);
332
333 /* Now reset cache tag memory if cache not yet enabled */
334 ls = CACHEINFO.ic_linesize;
335 ts = CACHEINFO.ic_totalsize;
336 if ((pcr & TURBOSPARC_PCR_ICE) == 0)
337 for (i = 0; i < ts; i += ls)
338 sta(i, ASI_ICACHETAG, 0);
339
340 ls = CACHEINFO.dc_linesize;
341 ts = CACHEINFO.dc_totalsize;
342 if ((pcr & TURBOSPARC_PCR_DCE) == 0)
343 for (i = 0; i < ts; i += ls)
344 sta(i, ASI_DCACHETAG, 0);
345
346 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
347 sta(SRMMU_PCR, ASI_SRMMU, pcr);
348
349 pcf = lda(SRMMU_PCFG, ASI_SRMMU);
350 if (pcf & TURBOSPARC_PCFG_SE) {
351 /*
352 * Record external cache info. The Turbosparc's second-
353 * level cache is physically addressed/tagged and is
354 * not exposed by the PROM.
355 */
356 CACHEINFO.ec_totalsize = 1024 *
357 ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)];
358 CACHEINFO.ec_linesize = 32;
359 }
360 if (pcf & TURBOSPARC_PCFG_SNP)
361 printf(": DVMA coherent ");
362
363 CACHEINFO.c_enabled = 1;
364 }
365 #endif /* SUN4M || SUN4D */
366
367
368 /*
369 * Note: the sun4 & sun4c the cache flush functions ignore the `ctx'
370 * parameter. This can be done since the pmap operations that need
371 * to flush cache lines will already have switched to the proper
372 * context to manipulate the MMU. Hence we can avoid the overhead
373 * if saving and restoring the context here.
374 */
375
376 /*
377 * Flush the current context from the cache.
378 *
379 * This is done by writing to each cache line in the `flush context'
380 * address space (or, for hardware flush, once to each page in the
381 * hardware flush space, for all cache pages).
382 */
383 void
sun4_vcache_flush_context(int ctx)384 sun4_vcache_flush_context(int ctx)
385 {
386 char *p;
387 int i, ls;
388
389 vcache_flush_ctx.ev_count++;
390 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */
391 if (CACHEINFO.c_hwflush) {
392 ls = PAGE_SIZE;
393 i = CACHEINFO.c_totalsize >> PGSHIFT;
394 for (; --i >= 0; p += ls)
395 sta(p, ASI_HWFLUSHCTX, 0);
396 } else {
397 ls = CACHEINFO.c_linesize;
398 i = CACHEINFO.c_nlines;
399 for (; --i >= 0; p += ls)
400 sta(p, ASI_FLUSHCTX, 0);
401 }
402 }
403
404 /*
405 * Flush the given virtual region from the cache.
406 *
407 * This is also done by writing to each cache line, except that
408 * now the addresses must include the virtual region number, and
409 * we use the `flush region' space.
410 *
411 * This function is only called on sun4's with 3-level MMUs; there's
412 * no hw-flush space.
413 */
414 void
sun4_vcache_flush_region(int vreg,int ctx)415 sun4_vcache_flush_region(int vreg, int ctx)
416 {
417 int i, ls;
418 char *p;
419
420 vcache_flush_reg.ev_count++;
421 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */
422 ls = CACHEINFO.c_linesize;
423 i = CACHEINFO.c_nlines;
424 for (; --i >= 0; p += ls)
425 sta(p, ASI_FLUSHREG, 0);
426 }
427
428 /*
429 * Flush the given virtual segment from the cache.
430 *
431 * This is also done by writing to each cache line, except that
432 * now the addresses must include the virtual segment number, and
433 * we use the `flush segment' space.
434 *
435 * Again, for hardware, we just write each page (in hw-flush space).
436 */
437 void
sun4_vcache_flush_segment(int vreg,int vseg,int ctx)438 sun4_vcache_flush_segment(int vreg, int vseg, int ctx)
439 {
440 int i, ls;
441 char *p;
442
443 vcache_flush_seg.ev_count++;
444 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */
445 if (CACHEINFO.c_hwflush) {
446 ls = PAGE_SIZE;
447 i = CACHEINFO.c_totalsize >> PGSHIFT;
448 for (; --i >= 0; p += ls)
449 sta(p, ASI_HWFLUSHSEG, 0);
450 } else {
451 ls = CACHEINFO.c_linesize;
452 i = CACHEINFO.c_nlines;
453 for (; --i >= 0; p += ls)
454 sta(p, ASI_FLUSHSEG, 0);
455 }
456 }
457
458 /*
459 * Flush the given virtual page from the cache.
460 * (va is the actual address, and must be aligned on a page boundary.)
461 * Again we write to each cache line.
462 */
463 void
sun4_vcache_flush_page(int va,int ctx)464 sun4_vcache_flush_page(int va, int ctx)
465 {
466 int i, ls;
467 char *p;
468
469 #ifdef DEBUG
470 if (va & PGOFSET)
471 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
472 #endif
473
474 vcache_flush_pg.ev_count++;
475 p = (char *)va;
476 ls = CACHEINFO.c_linesize;
477 i = PAGE_SIZE >> CACHEINFO.c_l2linesize;
478 for (; --i >= 0; p += ls)
479 sta(p, ASI_FLUSHPG, 0);
480 }
481
482 /*
483 * Flush the given virtual page from the cache.
484 * (va is the actual address, and must be aligned on a page boundary.)
485 * This version uses hardware-assisted flush operation and just needs
486 * one write into ASI_HWFLUSHPG space to flush all cache lines.
487 */
488 void
sun4_vcache_flush_page_hw(int va,int ctx)489 sun4_vcache_flush_page_hw(int va, int ctx)
490 {
491 char *p;
492
493 #ifdef DEBUG
494 if (va & PGOFSET)
495 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
496 #endif
497
498 vcache_flush_pg.ev_count++;
499 p = (char *)va;
500 sta(p, ASI_HWFLUSHPG, 0);
501 }
502
503 /*
504 * Flush a range of virtual addresses (in the current context).
505 * The first byte is at (base&~PGOFSET) and the last one is just
506 * before byte (base+len).
507 *
508 * We choose the best of (context,segment,page) here.
509 */
510
511 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / PAGE_SIZE)
512
513 void
sun4_cache_flush(void * base,u_int len)514 sun4_cache_flush(void *base, u_int len)
515 {
516 int i, ls, baseoff;
517 char *p;
518
519 if (CACHEINFO.c_vactype == VAC_NONE)
520 return;
521
522 /*
523 * Figure out how much must be flushed.
524 *
525 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment
526 * in the same number of loop iterations. We can also do the whole
527 * region. If we need to do between 2 and NSEGRG, do the region.
528 * If we need to do two or more regions, just go ahead and do the
529 * whole context. This might not be ideal (e.g., fsck likes to do
530 * 65536-byte reads, which might not necessarily be aligned).
531 *
532 * We could try to be sneaky here and use the direct mapping
533 * to avoid flushing things `below' the start and `above' the
534 * ending address (rather than rounding to whole pages and
535 * segments), but I did not want to debug that now and it is
536 * not clear it would help much.
537 *
538 * (XXX the magic number 16 is now wrong, must review policy)
539 */
540 baseoff = (int)base & PGOFSET;
541 i = (baseoff + len + PGOFSET) >> PGSHIFT;
542
543 vcache_flush_range.ev_count++;
544
545 if (__predict_true(i < CACHE_FLUSH_MAGIC)) {
546 /* cache_flush_page, for i pages */
547 p = (char *)((int)base & ~baseoff);
548 if (CACHEINFO.c_hwflush) {
549 for (; --i >= 0; p += PAGE_SIZE)
550 sta(p, ASI_HWFLUSHPG, 0);
551 } else {
552 ls = CACHEINFO.c_linesize;
553 i <<= PGSHIFT - CACHEINFO.c_l2linesize;
554 for (; --i >= 0; p += ls)
555 sta(p, ASI_FLUSHPG, 0);
556 }
557 return;
558 }
559
560 baseoff = (u_int)base & SGOFSET;
561 i = (baseoff + len + SGOFSET) >> SGSHIFT;
562 if (__predict_true(i == 1)) {
563 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0);
564 return;
565 }
566
567 if (HASSUN4_MMU3L) {
568 baseoff = (u_int)base & RGOFSET;
569 i = (baseoff + len + RGOFSET) >> RGSHIFT;
570 if (i == 1)
571 sun4_vcache_flush_region(VA_VREG(base), 0);
572 else
573 sun4_vcache_flush_context(0);
574 } else
575 sun4_vcache_flush_context(0);
576 }
577
578
579 #if defined(SUN4M) || defined(SUN4D)
580 #define trapoff() do { setpsr(getpsr() & ~PSR_ET); } while(0)
581 #define trapon() do { setpsr(getpsr() | PSR_ET); } while(0)
582 /*
583 * Flush the current context from the cache.
584 *
585 * This is done by writing to each cache line in the `flush context'
586 * address space.
587 */
588 void
srmmu_vcache_flush_context(int ctx)589 srmmu_vcache_flush_context(int ctx)
590 {
591 int i, ls, octx;
592 char *p;
593
594 vcache_flush_ctx.ev_count++;
595 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */
596 ls = CACHEINFO.c_linesize;
597 i = CACHEINFO.c_nlines;
598 octx = getcontext4m();
599 trapoff();
600 setcontext4m(ctx);
601 for (; --i >= 0; p += ls)
602 sta(p, ASI_IDCACHELFC, 0);
603 setcontext4m(octx);
604 trapon();
605 }
606
607 /*
608 * Flush the given virtual region from the cache.
609 *
610 * This is also done by writing to each cache line, except that
611 * now the addresses must include the virtual region number, and
612 * we use the `flush region' space.
613 */
614 void
srmmu_vcache_flush_region(int vreg,int ctx)615 srmmu_vcache_flush_region(int vreg, int ctx)
616 {
617 int i, ls, octx;
618 char *p;
619
620 vcache_flush_reg.ev_count++;
621 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */
622 ls = CACHEINFO.c_linesize;
623 i = CACHEINFO.c_nlines;
624 octx = getcontext4m();
625 trapoff();
626 setcontext4m(ctx);
627 for (; --i >= 0; p += ls)
628 sta(p, ASI_IDCACHELFR, 0);
629 setcontext4m(octx);
630 trapon();
631 }
632
633 /*
634 * Flush the given virtual segment from the cache.
635 *
636 * This is also done by writing to each cache line, except that
637 * now the addresses must include the virtual segment number, and
638 * we use the `flush segment' space.
639 *
640 * Again, for hardware, we just write each page (in hw-flush space).
641 */
642 void
srmmu_vcache_flush_segment(int vreg,int vseg,int ctx)643 srmmu_vcache_flush_segment(int vreg, int vseg, int ctx)
644 {
645 int i, ls, octx;
646 char *p;
647
648 vcache_flush_seg.ev_count++;
649 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */
650 ls = CACHEINFO.c_linesize;
651 i = CACHEINFO.c_nlines;
652 octx = getcontext4m();
653 trapoff();
654 setcontext4m(ctx);
655 for (; --i >= 0; p += ls)
656 sta(p, ASI_IDCACHELFS, 0);
657 setcontext4m(octx);
658 trapon();
659 }
660
661 /*
662 * Flush the given virtual page from the cache.
663 * (va is the actual address, and must be aligned on a page boundary.)
664 * Again we write to each cache line.
665 */
666 void
srmmu_vcache_flush_page(int va,int ctx)667 srmmu_vcache_flush_page(int va, int ctx)
668 {
669 int i, ls, octx;
670 char *p;
671
672 #ifdef DEBUG
673 if (va & PGOFSET)
674 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
675 #endif
676
677 vcache_flush_pg.ev_count++;
678 p = (char *)va;
679
680 /*
681 * XXX - if called early during bootstrap, we don't have the cache
682 * info yet. Make up a cache line size (double-word aligned)
683 */
684 if ((ls = CACHEINFO.c_linesize) == 0)
685 ls = 8;
686 i = PAGE_SIZE;
687 octx = getcontext4m();
688 trapoff();
689 setcontext4m(ctx);
690 for (; i > 0; p += ls, i -= ls)
691 sta(p, ASI_IDCACHELFP, 0);
692 #if defined(MULTIPROCESSOR)
693 /*
694 * The page flush operation will have caused a MMU table walk
695 * on Hypersparc because the is physically tagged. Since the pmap
696 * functions will not always cross flush it in the MP case (because
697 * may not be active on this CPU) we flush the TLB entry now.
698 */
699 /*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */
700 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
701
702 #endif
703 setcontext4m(octx);
704 trapon();
705 }
706
707 /*
708 * Flush entire cache.
709 */
710 void
srmmu_cache_flush_all(void)711 srmmu_cache_flush_all(void)
712 {
713
714 srmmu_vcache_flush_context(0);
715 }
716
717 void
srmmu_vcache_flush_range(int va,int len,int ctx)718 srmmu_vcache_flush_range(int va, int len, int ctx)
719 {
720 int i, ls, offset;
721 char *p;
722 int octx;
723
724 /*
725 * XXX - if called early during bootstrap, we don't have the cache
726 * info yet. Make up a cache line size (double-word aligned)
727 */
728 if ((ls = CACHEINFO.c_linesize) == 0)
729 ls = 8;
730
731 vcache_flush_range.ev_count++;
732
733 /* Compute # of cache lines covered by this range */
734 offset = va & (ls - 1);
735 i = len + offset;
736 p = (char *)(va & ~(ls - 1));
737
738 octx = getcontext4m();
739 trapoff();
740 setcontext4m(ctx);
741 for (; i > 0; p += ls, i -= ls)
742 sta(p, ASI_IDCACHELFP, 0);
743
744 #if defined(MULTIPROCESSOR)
745 if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) {
746 /*
747 * See hypersparc comment in srmmu_vcache_flush_page().
748 */
749 offset = va & PGOFSET;
750 i = (offset + len + PGOFSET) >> PGSHIFT;
751
752 va = va & ~PGOFSET;
753 for (; --i >= 0; va += PAGE_SIZE)
754 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
755 }
756 #endif
757 setcontext4m(octx);
758 trapon();
759 return;
760 }
761
762 /*
763 * Flush a range of virtual addresses (in the current context).
764 *
765 * We choose the best of (context,segment,page) here.
766 */
767
768 void
srmmu_cache_flush(void * base,u_int len)769 srmmu_cache_flush(void *base, u_int len)
770 {
771 int ctx = getcontext4m();
772 int i, baseoff;
773
774
775 /*
776 * Figure out the most efficient way to flush.
777 *
778 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment
779 * in the same number of loop iterations. We can also do the whole
780 * region. If we need to do between 2 and NSEGRG, do the region.
781 * If we need to do two or more regions, just go ahead and do the
782 * whole context. This might not be ideal (e.g., fsck likes to do
783 * 65536-byte reads, which might not necessarily be aligned).
784 *
785 * We could try to be sneaky here and use the direct mapping
786 * to avoid flushing things `below' the start and `above' the
787 * ending address (rather than rounding to whole pages and
788 * segments), but I did not want to debug that now and it is
789 * not clear it would help much.
790 *
791 */
792
793 if (__predict_true(len < CACHEINFO.c_totalsize)) {
794 #if defined(MULTIPROCESSOR)
795 FXCALL3(cpuinfo.sp_vcache_flush_range,
796 cpuinfo.ft_vcache_flush_range,
797 (int)base, len, ctx, CPUSET_ALL);
798 #else
799 cpuinfo.sp_vcache_flush_range((int)base, len, ctx);
800 #endif
801 return;
802 }
803
804 baseoff = (u_int)base & SGOFSET;
805 i = (baseoff + len + SGOFSET) >> SGSHIFT;
806 if (__predict_true(i == 1)) {
807 #if defined(MULTIPROCESSOR)
808 FXCALL3(cpuinfo.sp_vcache_flush_segment,
809 cpuinfo.ft_vcache_flush_segment,
810 VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL);
811 #else
812 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx);
813 #endif
814 return;
815 }
816
817 baseoff = (u_int)base & RGOFSET;
818 i = (baseoff + len + RGOFSET) >> RGSHIFT;
819 while (i--) {
820 #if defined(MULTIPROCESSOR)
821 FXCALL2(cpuinfo.sp_vcache_flush_region,
822 cpuinfo.ft_vcache_flush_region,
823 VA_VREG(base), ctx, CPUSET_ALL);
824 #else
825 srmmu_vcache_flush_region(VA_VREG(base), ctx);
826 #endif
827 base = ((char *)base + NBPRG);
828 }
829 }
830
831 int ms1_cacheflush_magic = 0;
832 #define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic
833
834 void
ms1_cache_flush(void * base,u_int len)835 ms1_cache_flush(void *base, u_int len)
836 {
837
838 /*
839 * Although physically tagged, we still need to flush the
840 * data cache after (if we have a write-through cache) or before
841 * (in case of write-back caches) DMA operations.
842 */
843
844 #if MS1_CACHEFLUSH_MAGIC
845 if (len <= MS1_CACHEFLUSH_MAGIC) {
846 /*
847 * If the range to be flushed is sufficiently small
848 * invalidate the covered cache lines by hand.
849 *
850 * The MicroSPARC I has a direct-mapped virtually addressed
851 * physically tagged data cache which is organised as
852 * 128 lines of 16 bytes. Virtual address bits [4-10]
853 * select the cache line. The cache tags are accessed
854 * through the standard DCACHE control space using the
855 * same address bits as those used to select the cache
856 * line in the virtual address.
857 *
858 * Note: we don't bother to compare the actual tags
859 * since that would require looking up physical addresses.
860 *
861 * The format of the tags we read from ASI_DCACHE control
862 * space is:
863 *
864 * 31 27 26 11 10 1 0
865 * +--------+----------------+------------+-+
866 * | xxx | PA[26-11] | xxx |V|
867 * +--------+----------------+------------+-+
868 *
869 * PA: bits 11-26 of the physical address
870 * V: line valid bit
871 */
872 int tagaddr = ((u_int)base & 0x7f0);
873
874 len = roundup(len, 16);
875 while (len != 0) {
876 int tag = lda(tagaddr, ASI_DCACHETAG);
877 if ((tag & 1) == 1) {
878 /* Mark this cache line invalid */
879 sta(tagaddr, ASI_DCACHETAG, 0);
880 }
881 len -= 16;
882 tagaddr = (tagaddr + 16) & 0x7f0;
883 }
884 } else
885 #endif
886 /* Flush entire data cache */
887 sta(0, ASI_DCACHECLR, 0);
888 }
889
890
891 /*
892 * Flush entire cache.
893 */
894 void
ms1_cache_flush_all(void)895 ms1_cache_flush_all(void)
896 {
897
898 /* Flash-clear both caches */
899 sta(0, ASI_ICACHECLR, 0);
900 sta(0, ASI_DCACHECLR, 0);
901 }
902
903 void
hypersparc_cache_flush_all(void)904 hypersparc_cache_flush_all(void)
905 {
906
907 srmmu_vcache_flush_context(getcontext4m());
908 /* Flush instruction cache */
909 hypersparc_pure_vcache_flush();
910 }
911
912 void
cypress_cache_flush_all(void)913 cypress_cache_flush_all(void)
914 {
915 extern char kernel_text[];
916
917 char *p;
918 int i, ls;
919
920 /* Fill the cache with known read-only content */
921 p = (char *)kernel_text;
922 ls = CACHEINFO.c_linesize;
923 i = CACHEINFO.c_nlines;
924 for (; --i >= 0; p += ls)
925 (*(volatile char *)p);
926 }
927
928
929 void
viking_cache_flush(void * base,u_int len)930 viking_cache_flush(void *base, u_int len)
931 {
932 }
933
934 void
viking_pcache_flush_page(paddr_t pa,int invalidate_only)935 viking_pcache_flush_page(paddr_t pa, int invalidate_only)
936 {
937 int set, i;
938
939 /*
940 * The viking's on-chip data cache is 4-way set associative,
941 * consisting of 128 sets, each holding 4 lines of 32 bytes.
942 * Note that one 4096 byte page exactly covers all 128 sets
943 * in the cache.
944 */
945 if (invalidate_only) {
946 u_int pa_tag = (pa >> 12);
947 u_int tagaddr;
948 uint64_t tag;
949
950 /*
951 * Loop over all sets and invalidate all entries tagged
952 * with the given physical address by resetting the cache
953 * tag in ASI_DCACHETAG control space.
954 *
955 * The address format for accessing a tag is:
956 *
957 * 31 30 27 26 11 5 4 3 2 0
958 * +------+-----+------+-------//--------+--------+----+-----+
959 * | type | xxx | line | xxx | set | xx | 0 |
960 * +------+-----+------+-------//--------+--------+----+-----+
961 *
962 * set: the cache set tag to be read (0-127)
963 * line: the line within the set (0-3)
964 * type: 1: read set tag; 2: read physical tag
965 *
966 * The (type 2) tag read from this address is a 64-bit word
967 * formatted as follows:
968 *
969 * 5 4 4
970 * 63 6 8 0 23 0
971 * +-------+-+-------+-+-------+-+-----------+----------------+
972 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] |
973 * +-------+-+-------+-+-------+-+-----------+----------------+
974 *
975 * PA: bits 12-35 of the physical address
976 * S: line shared bit
977 * D: line dirty bit
978 * V: line valid bit
979 */
980
981 #define VIKING_DCACHETAG_S 0x0000010000000000ULL /* line valid bit */
982 #define VIKING_DCACHETAG_D 0x0001000000000000ULL /* line dirty bit */
983 #define VIKING_DCACHETAG_V 0x0100000000000000ULL /* line shared bit */
984 #define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffULL /* PA tag field */
985
986 for (set = 0; set < 128; set++) {
987 /* Set set number and access type */
988 tagaddr = (set << 5) | (2 << 30);
989
990 /* Examine the tag for each line in the set */
991 for (i = 0 ; i < 4; i++) {
992 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
993 /*
994 * If this is a valid tag and the PA field
995 * matches clear the tag.
996 */
997 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
998 (tag & VIKING_DCACHETAG_V) != 0)
999 stda(tagaddr | (i << 26),
1000 ASI_DCACHETAG, 0);
1001 }
1002 }
1003
1004 } else {
1005 extern char kernel_text[];
1006
1007 /*
1008 * Force the cache to validate its backing memory
1009 * by displacing all cache lines with known read-only
1010 * content from the start of kernel text.
1011 *
1012 * Note that this thrashes the entire cache. However,
1013 * we currently only need to call upon this code
1014 * once at boot time.
1015 */
1016 for (set = 0; set < 128; set++) {
1017 int *v = (int *)(kernel_text + (set << 5));
1018
1019 /*
1020 * We need to read (2*associativity-1) different
1021 * locations to be sure to displace the entire set.
1022 */
1023 i = 2 * 4 - 1;
1024 while (i--) {
1025 (*(volatile int *)v);
1026 v += 4096;
1027 }
1028 }
1029 }
1030 }
1031 #endif /* SUN4M || SUN4D */
1032
1033
1034 #if defined(MULTIPROCESSOR)
1035 /*
1036 * Cache flushing on multi-processor systems involves sending
1037 * inter-processor messages to flush the cache on each module.
1038 *
1039 * The current context of the originating processor is passed in the
1040 * message. This assumes the allocation of CPU contextses is a global
1041 * operation (remember that the actual context tables for the CPUs
1042 * are distinct).
1043 */
1044
1045 void
smp_vcache_flush_page(int va,int ctx)1046 smp_vcache_flush_page(int va, int ctx)
1047 {
1048
1049 FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page,
1050 va, ctx, CPUSET_ALL);
1051 }
1052
1053 void
smp_vcache_flush_segment(int vr,int vs,int ctx)1054 smp_vcache_flush_segment(int vr, int vs, int ctx)
1055 {
1056
1057 FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment,
1058 vr, vs, ctx, CPUSET_ALL);
1059 }
1060
1061 void
smp_vcache_flush_region(int vr,int ctx)1062 smp_vcache_flush_region(int vr, int ctx)
1063 {
1064
1065 FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region,
1066 vr, ctx, CPUSET_ALL);
1067 }
1068
1069 void
smp_vcache_flush_context(int ctx)1070 smp_vcache_flush_context(int ctx)
1071 {
1072
1073 FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context,
1074 ctx, CPUSET_ALL);
1075 }
1076 #endif /* MULTIPROCESSOR */
1077