xref: /netbsd/sys/arch/sparc/sparc/cache.c (revision bf9ec67e)
1 /*	$NetBSD: cache.c,v 1.61 2002/01/25 19:19:46 tsutsui Exp $ */
2 
3 /*
4  * Copyright (c) 1996
5  *	The President and Fellows of Harvard College. All rights reserved.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This software was developed by the Computer Systems Engineering group
10  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11  * contributed to Berkeley.
12  *
13  * All advertising materials mentioning features or use of this software
14  * must display the following acknowledgement:
15  *	This product includes software developed by Harvard University.
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  *
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by Aaron Brown and
31  *	Harvard University.
32  *	This product includes software developed by the University of
33  *	California, Berkeley and its contributors.
34  * 4. Neither the name of the University nor the names of its contributors
35  *    may be used to endorse or promote products derived from this software
36  *    without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  *
50  *	@(#)cache.c	8.2 (Berkeley) 10/30/93
51  *
52  */
53 
54 /*
55  * Cache routines.
56  *
57  * TODO:
58  *	- rework range flush
59  */
60 
61 #include "opt_multiprocessor.h"
62 #include "opt_sparc_arch.h"
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 
68 #include <machine/ctlreg.h>
69 #include <machine/pte.h>
70 
71 #include <sparc/sparc/asm.h>
72 #include <sparc/sparc/cache.h>
73 #include <sparc/sparc/cpuvar.h>
74 
75 struct cachestats cachestats;
76 
77 int cache_alias_dist;		/* Cache anti-aliasing constants */
78 int cache_alias_bits;
79 u_long dvma_cachealign;
80 
81 /*
82  * Enable the cache.
83  * We need to clear out the valid bits first.
84  */
85 void
86 sun4_cache_enable()
87 {
88 	u_int i, lim, ls, ts;
89 
90 	cache_alias_bits = CPU_ISSUN4
91 				? CACHE_ALIAS_BITS_SUN4
92 				: CACHE_ALIAS_BITS_SUN4C;
93 	cache_alias_dist = CPU_ISSUN4
94 				? CACHE_ALIAS_DIST_SUN4
95 				: CACHE_ALIAS_DIST_SUN4C;
96 
97 	ls = CACHEINFO.c_linesize;
98 	ts = CACHEINFO.c_totalsize;
99 
100 	for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
101 		sta(i, ASI_CONTROL, 0);
102 
103 	stba(AC_SYSENABLE, ASI_CONTROL,
104 	     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
105 	CACHEINFO.c_enabled = 1;
106 
107 #ifdef notyet
108 	if (cpuinfo.flags & SUN4_IOCACHE) {
109 		stba(AC_SYSENABLE, ASI_CONTROL,
110 		     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
111 		printf("iocache enabled\n");
112 	}
113 #endif
114 }
115 
116 #if defined(SUN4M)
117 void
118 ms1_cache_enable()
119 {
120 	u_int pcr;
121 
122 	cache_alias_dist = max(
123 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
124 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
125 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
126 
127 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
128 
129 	/* We "flash-clear" the I/D caches. */
130 	if ((pcr & MS1_PCR_ICE) == 0)
131 		sta(0, ASI_ICACHECLR, 0);
132 	if ((pcr & MS1_PCR_DCE) == 0)
133 		sta(0, ASI_DCACHECLR, 0);
134 
135 	/* Turn on caches */
136 	sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
137 
138 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
139 
140 	/*
141 	 * When zeroing or copying pages, there might still be entries in
142 	 * the cache, since we don't flush pages from the cache when
143 	 * unmapping them (`vactype' is VAC_NONE).  Fortunately, the
144 	 * MS1 cache is write-through and not write-allocate, so we can
145 	 * use cacheable access while not displacing cache lines.
146 	 */
147 	cpuinfo.flags |= CPUFLG_CACHE_MANDATORY;
148 }
149 
150 void
151 viking_cache_enable()
152 {
153 	u_int pcr;
154 
155 	cache_alias_dist = max(
156 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
157 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
158 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
159 
160 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
161 
162 	if ((pcr & VIKING_PCR_ICE) == 0) {
163 		/* I-cache not on; "flash-clear" it now. */
164 		sta(0x80000000, ASI_ICACHECLR, 0);	/* Unlock */
165 		sta(0, ASI_ICACHECLR, 0);		/* clear */
166 	}
167 	if ((pcr & VIKING_PCR_DCE) == 0) {
168 		/* D-cache not on: "flash-clear" it. */
169 		sta(0x80000000, ASI_DCACHECLR, 0);
170 		sta(0, ASI_DCACHECLR, 0);
171 	}
172 
173 	/* Turn on caches via MMU */
174 	sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
175 
176 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
177 
178 	/* Now turn on MultiCache if it exists */
179 	if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
180 		/* Set external cache enable bit in MXCC control register */
181 		stda(MXCC_CTRLREG, ASI_CONTROL,
182 		     ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
183 		cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */
184 		CACHEINFO.ec_enabled = 1;
185 	}
186 }
187 
188 void
189 hypersparc_cache_enable()
190 {
191 	int i, ls, ts;
192 	u_int pcr, v;
193 
194 	ls = CACHEINFO.c_linesize;
195 	ts = CACHEINFO.c_totalsize;
196 
197 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
198 
199 	/*
200 	 * Setup the anti-aliasing constants and DVMA alignment constraint.
201 	 */
202 	cache_alias_dist = CACHEINFO.c_totalsize;
203 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
204 	dvma_cachealign = cache_alias_dist;
205 
206 	/* Now reset cache tag memory if cache not yet enabled */
207 	if ((pcr & HYPERSPARC_PCR_CE) == 0)
208 		for (i = 0; i < ts; i += ls)
209 			sta(i, ASI_DCACHETAG, 0);
210 
211 	pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
212 	hypersparc_cache_flush_all();
213 
214 	/* Enable write-back cache */
215 	pcr |= HYPERSPARC_PCR_CE;
216 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
217 		pcr |= HYPERSPARC_PCR_CM;
218 
219 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
220 	CACHEINFO.c_enabled = 1;
221 
222 	/* XXX: should add support */
223 	if (CACHEINFO.c_hwflush)
224 		panic("cache_enable: can't handle 4M with hw-flush cache");
225 
226 	/*
227 	 * Enable instruction cache and, on single-processor machines,
228 	 * disable `Unimplemented Flush Traps'.
229 	 */
230 	v = HYPERSPARC_ICCR_ICE | (ncpu == 1 ? HYPERSPARC_ICCR_FTD : 0);
231 	wrasr(v, HYPERSPARC_ASRNUM_ICCR);
232 }
233 
234 
235 void
236 swift_cache_enable()
237 {
238 	int i, ls, ts;
239 	u_int pcr;
240 
241 	cache_alias_dist = max(
242 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
243 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
244 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
245 
246 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
247 	pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
248 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
249 
250 	/* Now reset cache tag memory if cache not yet enabled */
251 	ls = CACHEINFO.ic_linesize;
252 	ts = CACHEINFO.ic_totalsize;
253 	if ((pcr & SWIFT_PCR_ICE) == 0)
254 		for (i = 0; i < ts; i += ls)
255 			sta(i, ASI_ICACHETAG, 0);
256 
257 	ls = CACHEINFO.dc_linesize;
258 	ts = CACHEINFO.dc_totalsize;
259 	if ((pcr & SWIFT_PCR_DCE) == 0)
260 		for (i = 0; i < ts; i += ls)
261 			sta(i, ASI_DCACHETAG, 0);
262 
263 	CACHEINFO.c_enabled = 1;
264 }
265 
266 void
267 cypress_cache_enable()
268 {
269 	int i, ls, ts;
270 	u_int pcr;
271 
272 	cache_alias_dist = CACHEINFO.c_totalsize;
273 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
274 
275 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
276 	pcr &= ~(CYPRESS_PCR_CE | CYPRESS_PCR_CM);
277 
278 	/* Now reset cache tag memory if cache not yet enabled */
279 	ls = CACHEINFO.c_linesize;
280 	ts = CACHEINFO.c_totalsize;
281 	if ((pcr & CYPRESS_PCR_CE) == 0)
282 		for (i = 0; i < ts; i += ls)
283 			sta(i, ASI_DCACHETAG, 0);
284 
285 	pcr |= CYPRESS_PCR_CE;
286 	/* If put in write-back mode, turn it on */
287 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
288 		pcr |= CYPRESS_PCR_CM;
289 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
290 	CACHEINFO.c_enabled = 1;
291 }
292 
293 void
294 turbosparc_cache_enable()
295 {
296 	int i, ls, ts;
297 	u_int pcr, pcf;
298 
299 	cache_alias_dist = max(
300 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
301 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
302 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
303 
304 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
305 
306 	/* Now reset cache tag memory if cache not yet enabled */
307 	ls = CACHEINFO.ic_linesize;
308 	ts = CACHEINFO.ic_totalsize;
309 	if ((pcr & TURBOSPARC_PCR_ICE) == 0)
310 		for (i = 0; i < ts; i += ls)
311 			sta(i, ASI_ICACHETAG, 0);
312 
313 	ls = CACHEINFO.dc_linesize;
314 	ts = CACHEINFO.dc_totalsize;
315 	if ((pcr & TURBOSPARC_PCR_DCE) == 0)
316 		for (i = 0; i < ts; i += ls)
317 			sta(i, ASI_DCACHETAG, 0);
318 
319 	pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
320 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
321 
322 	pcf = lda(SRMMU_PCFG, ASI_SRMMU);
323 	if (pcf & TURBOSPARC_PCFG_SNP)
324 		printf("DVMA coherent ");
325 
326 	CACHEINFO.c_enabled = 1;
327 }
328 #endif
329 
330 /*
331  * Flush the current context from the cache.
332  *
333  * This is done by writing to each cache line in the `flush context'
334  * address space (or, for hardware flush, once to each page in the
335  * hardware flush space, for all cache pages).
336  */
337 void
338 sun4_vcache_flush_context()
339 {
340 	char *p;
341 	int i, ls;
342 
343 	cachestats.cs_ncxflush++;
344 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
345 	if (CACHEINFO.c_hwflush) {
346 		ls = NBPG;
347 		i = CACHEINFO.c_totalsize >> PGSHIFT;
348 		for (; --i >= 0; p += ls)
349 			sta(p, ASI_HWFLUSHCTX, 0);
350 	} else {
351 		ls = CACHEINFO.c_linesize;
352 		i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
353 		for (; --i >= 0; p += ls)
354 			sta(p, ASI_FLUSHCTX, 0);
355 	}
356 }
357 
358 /*
359  * Flush the given virtual region from the cache.
360  *
361  * This is also done by writing to each cache line, except that
362  * now the addresses must include the virtual region number, and
363  * we use the `flush region' space.
364  *
365  * This function is only called on sun4's with 3-level MMUs; there's
366  * no hw-flush space.
367  */
368 void
369 sun4_vcache_flush_region(vreg)
370 	int vreg;
371 {
372 	int i, ls;
373 	char *p;
374 
375 	cachestats.cs_nrgflush++;
376 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
377 	ls = CACHEINFO.c_linesize;
378 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
379 	for (; --i >= 0; p += ls)
380 		sta(p, ASI_FLUSHREG, 0);
381 }
382 
383 /*
384  * Flush the given virtual segment from the cache.
385  *
386  * This is also done by writing to each cache line, except that
387  * now the addresses must include the virtual segment number, and
388  * we use the `flush segment' space.
389  *
390  * Again, for hardware, we just write each page (in hw-flush space).
391  */
392 void
393 sun4_vcache_flush_segment(vreg, vseg)
394 	int vreg, vseg;
395 {
396 	int i, ls;
397 	char *p;
398 
399 	cachestats.cs_nsgflush++;
400 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
401 	if (CACHEINFO.c_hwflush) {
402 		ls = NBPG;
403 		i = CACHEINFO.c_totalsize >> PGSHIFT;
404 		for (; --i >= 0; p += ls)
405 			sta(p, ASI_HWFLUSHSEG, 0);
406 	} else {
407 		ls = CACHEINFO.c_linesize;
408 		i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
409 		for (; --i >= 0; p += ls)
410 			sta(p, ASI_FLUSHSEG, 0);
411 	}
412 }
413 
414 /*
415  * Flush the given virtual page from the cache.
416  * (va is the actual address, and must be aligned on a page boundary.)
417  * Again we write to each cache line.
418  */
419 void
420 sun4_vcache_flush_page(va)
421 	int va;
422 {
423 	int i, ls;
424 	char *p;
425 
426 #ifdef DEBUG
427 	if (va & PGOFSET)
428 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
429 #endif
430 
431 	cachestats.cs_npgflush++;
432 	p = (char *)va;
433 	ls = CACHEINFO.c_linesize;
434 	i = NBPG >> CACHEINFO.c_l2linesize;
435 	for (; --i >= 0; p += ls)
436 		sta(p, ASI_FLUSHPG, 0);
437 }
438 
439 /*
440  * Flush the given virtual page from the cache.
441  * (va is the actual address, and must be aligned on a page boundary.)
442  * This version uses hardware-assisted flush operation and just needs
443  * one write into ASI_HWFLUSHPG space to flush all cache lines.
444  */
445 void
446 sun4_vcache_flush_page_hw(va)
447 	int va;
448 {
449 	char *p;
450 
451 #ifdef DEBUG
452 	if (va & PGOFSET)
453 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
454 #endif
455 
456 	cachestats.cs_npgflush++;
457 	p = (char *)va;
458 	sta(p, ASI_HWFLUSHPG, 0);
459 }
460 
461 /*
462  * Flush a range of virtual addresses (in the current context).
463  * The first byte is at (base&~PGOFSET) and the last one is just
464  * before byte (base+len).
465  *
466  * We choose the best of (context,segment,page) here.
467  */
468 
469 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / NBPG)
470 
471 void
472 sun4_cache_flush(base, len)
473 	caddr_t base;
474 	u_int len;
475 {
476 	int i, ls, baseoff;
477 	char *p;
478 
479 	if (CACHEINFO.c_vactype == VAC_NONE)
480 		return;
481 
482 	/*
483 	 * Figure out how much must be flushed.
484 	 *
485 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
486 	 * in the same number of loop iterations.  We can also do the whole
487 	 * region. If we need to do between 2 and NSEGRG, do the region.
488 	 * If we need to do two or more regions, just go ahead and do the
489 	 * whole context. This might not be ideal (e.g., fsck likes to do
490 	 * 65536-byte reads, which might not necessarily be aligned).
491 	 *
492 	 * We could try to be sneaky here and use the direct mapping
493 	 * to avoid flushing things `below' the start and `above' the
494 	 * ending address (rather than rounding to whole pages and
495 	 * segments), but I did not want to debug that now and it is
496 	 * not clear it would help much.
497 	 *
498 	 * (XXX the magic number 16 is now wrong, must review policy)
499 	 */
500 	baseoff = (int)base & PGOFSET;
501 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
502 
503 	cachestats.cs_nraflush++;
504 #ifdef notyet
505 	cachestats.cs_ra[min(i, MAXCACHERANGE)]++;
506 #endif
507 
508 	if (i < CACHE_FLUSH_MAGIC) {
509 		/* cache_flush_page, for i pages */
510 		p = (char *)((int)base & ~baseoff);
511 		if (CACHEINFO.c_hwflush) {
512 			for (; --i >= 0; p += NBPG)
513 				sta(p, ASI_HWFLUSHPG, 0);
514 		} else {
515 			ls = CACHEINFO.c_linesize;
516 			i <<= PGSHIFT - CACHEINFO.c_l2linesize;
517 			for (; --i >= 0; p += ls)
518 				sta(p, ASI_FLUSHPG, 0);
519 		}
520 		return;
521 	}
522 	baseoff = (u_int)base & SGOFSET;
523 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
524 	if (i == 1)
525 		sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base));
526 	else {
527 		if (HASSUN4_MMU3L) {
528 			baseoff = (u_int)base & RGOFSET;
529 			i = (baseoff + len + RGOFSET) >> RGSHIFT;
530 			if (i == 1)
531 				sun4_vcache_flush_region(VA_VREG(base));
532 			else
533 				sun4_vcache_flush_context();
534 		} else
535 			sun4_vcache_flush_context();
536 	}
537 }
538 
539 
540 #if defined(SUN4M)
541 /*
542  * Flush the current context from the cache.
543  *
544  * This is done by writing to each cache line in the `flush context'
545  * address space (or, for hardware flush, once to each page in the
546  * hardware flush space, for all cache pages).
547  */
548 void
549 srmmu_vcache_flush_context()
550 {
551 	char *p;
552 	int i, ls;
553 
554 	cachestats.cs_ncxflush++;
555 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
556 	ls = CACHEINFO.c_linesize;
557 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
558 	for (; --i >= 0; p += ls)
559 		sta(p, ASI_IDCACHELFC, 0);
560 }
561 
562 /*
563  * Flush the given virtual region from the cache.
564  *
565  * This is also done by writing to each cache line, except that
566  * now the addresses must include the virtual region number, and
567  * we use the `flush region' space.
568  */
569 void
570 srmmu_vcache_flush_region(vreg)
571 	int vreg;
572 {
573 	int i, ls;
574 	char *p;
575 
576 	cachestats.cs_nrgflush++;
577 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
578 	ls = CACHEINFO.c_linesize;
579 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
580 	for (; --i >= 0; p += ls)
581 		sta(p, ASI_IDCACHELFR, 0);
582 }
583 
584 /*
585  * Flush the given virtual segment from the cache.
586  *
587  * This is also done by writing to each cache line, except that
588  * now the addresses must include the virtual segment number, and
589  * we use the `flush segment' space.
590  *
591  * Again, for hardware, we just write each page (in hw-flush space).
592  */
593 void
594 srmmu_vcache_flush_segment(vreg, vseg)
595 	int vreg, vseg;
596 {
597 	int i, ls;
598 	char *p;
599 
600 	cachestats.cs_nsgflush++;
601 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
602 	ls = CACHEINFO.c_linesize;
603 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
604 	for (; --i >= 0; p += ls)
605 		sta(p, ASI_IDCACHELFS, 0);
606 }
607 
608 /*
609  * Flush the given virtual page from the cache.
610  * (va is the actual address, and must be aligned on a page boundary.)
611  * Again we write to each cache line.
612  */
613 void
614 srmmu_vcache_flush_page(va)
615 	int va;
616 {
617 	int i, ls;
618 	char *p;
619 
620 #ifdef DEBUG
621 	if (va & PGOFSET)
622 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
623 #endif
624 
625 	cachestats.cs_npgflush++;
626 	p = (char *)va;
627 	ls = CACHEINFO.c_linesize;
628 	i = NBPG >> CACHEINFO.c_l2linesize;
629 	for (; --i >= 0; p += ls)
630 		sta(p, ASI_IDCACHELFP, 0);
631 }
632 
633 /*
634  * Flush entire cache.
635  */
636 void
637 srmmu_cache_flush_all()
638 {
639 	srmmu_vcache_flush_context();
640 }
641 
642 /*
643  * Flush a range of virtual addresses (in the current context).
644  * The first byte is at (base&~PGOFSET) and the last one is just
645  * before byte (base+len).
646  *
647  * We choose the best of (context,segment,page) here.
648  */
649 
650 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / NBPG)
651 
652 void
653 srmmu_cache_flush(base, len)
654 	caddr_t base;
655 	u_int len;
656 {
657 	int i, ls, baseoff;
658 	char *p;
659 
660 	if (len < NBPG) {
661 		/* less than a page, flush just the covered cache lines */
662 		ls = CACHEINFO.c_linesize;
663 		baseoff = (int)base & (ls - 1);
664 		i = (baseoff + len + ls - 1) >> CACHEINFO.c_l2linesize;
665 		p = (char *)((int)base & -ls);
666 		for (; --i >= 0; p += ls)
667 			sta(p, ASI_IDCACHELFP, 0);
668 		return;
669 	}
670 
671 	/*
672 	 * Figure out how much must be flushed.
673 	 *
674 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
675 	 * in the same number of loop iterations.  We can also do the whole
676 	 * region. If we need to do between 2 and NSEGRG, do the region.
677 	 * If we need to do two or more regions, just go ahead and do the
678 	 * whole context. This might not be ideal (e.g., fsck likes to do
679 	 * 65536-byte reads, which might not necessarily be aligned).
680 	 *
681 	 * We could try to be sneaky here and use the direct mapping
682 	 * to avoid flushing things `below' the start and `above' the
683 	 * ending address (rather than rounding to whole pages and
684 	 * segments), but I did not want to debug that now and it is
685 	 * not clear it would help much.
686 	 *
687 	 * (XXX the magic number 16 is now wrong, must review policy)
688 	 */
689 	baseoff = (int)base & PGOFSET;
690 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
691 
692 	cachestats.cs_nraflush++;
693 #ifdef notyet
694 	cachestats.cs_ra[min(i, MAXCACHERANGE)]++;
695 #endif
696 
697 	if (i < CACHE_FLUSH_MAGIC) {
698 		/* cache_flush_page, for i pages */
699 		p = (char *)((int)base & ~baseoff);
700 		ls = CACHEINFO.c_linesize;
701 		i <<= PGSHIFT - CACHEINFO.c_l2linesize;
702 		for (; --i >= 0; p += ls)
703 			sta(p, ASI_IDCACHELFP, 0);
704 		return;
705 	}
706 	baseoff = (u_int)base & SGOFSET;
707 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
708 	if (i == 1)
709 		srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base));
710 	else {
711 		baseoff = (u_int)base & RGOFSET;
712 		i = (baseoff + len + RGOFSET) >> RGSHIFT;
713 		if (i == 1)
714 			srmmu_vcache_flush_region(VA_VREG(base));
715 		else
716 			srmmu_vcache_flush_context();
717 	}
718 }
719 
720 int ms1_cacheflush_magic = 0;
721 #define MS1_CACHEFLUSH_MAGIC	ms1_cacheflush_magic
722 void
723 ms1_cache_flush(base, len)
724 	caddr_t base;
725 	u_int len;
726 {
727 	/*
728 	 * Although physically tagged, we still need to flush the
729 	 * data cache after (if we have a write-through cache) or before
730 	 * (in case of write-back caches) DMA operations.
731 	 */
732 
733 #if MS1_CACHEFLUSH_MAGIC
734 	if (len <= MS1_CACHEFLUSH_MAGIC) {
735 		/*
736 		 * If the range to be flushed is sufficiently small
737 		 * invalidate the covered cache lines by hand.
738 		 *
739 		 * The MicroSPARC I has a direct-mapped virtually addressed
740 		 * physically tagged data cache which is organised as
741 		 * 128 lines of 16 bytes. Virtual address bits [4-10]
742 		 * select the cache line. The cache tags are accessed
743 		 * through the standard DCACHE control space using the
744 		 * same address bits as those used to select the cache
745 		 * line in the virtual address.
746 		 *
747 		 * Note: we don't bother to compare the actual tags
748 		 * since that would require looking up physical addresses.
749 		 *
750 		 * The format of the tags we read from ASI_DCACHE control
751 		 * space is:
752 		 *
753 		 * 31     27 26            11 10         1 0
754 		 * +--------+----------------+------------+-+
755 		 * |  xxx   |    PA[26-11]   |    xxx     |V|
756 		 * +--------+----------------+------------+-+
757 		 *
758 		 * PA: bits 11-26 of the physical address
759 		 * V:  line valid bit
760 		 */
761 		int tagaddr = ((u_int)base & 0x7f0);
762 
763 		len = roundup(len, 16);
764 		while (len != 0) {
765 			int tag = lda(tagaddr, ASI_DCACHETAG);
766 			if ((tag & 1) == 1) {
767 				/* Mark this cache line invalid */
768 				sta(tagaddr, ASI_DCACHETAG, 0);
769 			}
770 			len -= 16;
771 			tagaddr = (tagaddr + 16) & 0x7f0;
772 		}
773 	} else
774 #endif
775 		/* Flush entire data cache */
776 		sta(0, ASI_DCACHECLR, 0);
777 }
778 
779 /*
780  * Flush entire cache.
781  */
782 void
783 ms1_cache_flush_all()
784 {
785 
786 	/* Flash-clear both caches */
787 	sta(0, ASI_ICACHECLR, 0);
788 	sta(0, ASI_DCACHECLR, 0);
789 }
790 
791 void
792 hypersparc_cache_flush_all()
793 {
794 
795 	srmmu_vcache_flush_context();
796 	/* Flush instruction cache */
797 	hypersparc_pure_vcache_flush();
798 }
799 
800 void
801 cypress_cache_flush_all()
802 {
803 
804 	extern char kernel_text[];
805 	char *p;
806 	int i, ls;
807 
808 	/* Fill the cache with known read-only content */
809 	p = (char *)kernel_text;
810 	ls = CACHEINFO.c_linesize;
811 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
812 	for (; --i >= 0; p += ls)
813 		(*(volatile char *)p);
814 }
815 
816 
817 void
818 viking_cache_flush(base, len)
819 	caddr_t base;
820 	u_int len;
821 {
822 	/*
823 	 * Although physically tagged, we still need to flush the
824 	 * data cache after (if we have a write-through cache) or before
825 	 * (in case of write-back caches) DMA operations.
826 	 */
827 
828 }
829 
830 void
831 viking_pcache_flush_page(pa, invalidate_only)
832 	paddr_t pa;
833 	int invalidate_only;
834 {
835 	int set, i;
836 
837 	/*
838 	 * The viking's on-chip data cache is 4-way set associative,
839 	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
840 	 * Note that one 4096 byte page exactly covers all 128 sets
841 	 * in the cache.
842 	 */
843 	if (invalidate_only) {
844 		u_int pa_tag = (pa >> 12);
845 		u_int tagaddr;
846 		u_int64_t tag;
847 
848 		/*
849 		 * Loop over all sets and invalidate all entries tagged
850 		 * with the given physical address by resetting the cache
851 		 * tag in ASI_DCACHETAG control space.
852 		 *
853 		 * The address format for accessing a tag is:
854 		 *
855 		 * 31   30      27   26                  11      5 4  3 2    0
856 		 * +------+-----+------+-------//--------+--------+----+-----+
857 		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
858 		 * +------+-----+------+-------//--------+--------+----+-----+
859 		 *
860 		 * set:  the cache set tag to be read (0-127)
861 		 * line: the line within the set (0-3)
862 		 * type: 1: read set tag; 2: read physical tag
863 		 *
864 		 * The (type 2) tag read from this address is a 64-bit word
865 		 * formatted as follows:
866 		 *
867 		 *          5         4         4
868 		 * 63       6         8         0            23               0
869 		 * +-------+-+-------+-+-------+-+-----------+----------------+
870 		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
871 		 * +-------+-+-------+-+-------+-+-----------+----------------+
872 		 *
873 		 * PA: bits 12-35 of the physical address
874 		 * S:  line shared bit
875 		 * D:  line dirty bit
876 		 * V:  line valid bit
877 		 */
878 
879 #define VIKING_DCACHETAG_S	0x0000010000000000UL	/* line valid bit */
880 #define VIKING_DCACHETAG_D	0x0001000000000000UL	/* line dirty bit */
881 #define VIKING_DCACHETAG_V	0x0100000000000000UL	/* line shared bit */
882 #define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffUL	/* PA tag field */
883 
884 		for (set = 0; set < 128; set++) {
885 			/* Set set number and access type */
886 			tagaddr = (set << 5) | (2 << 30);
887 
888 			/* Examine the tag for each line in the set */
889 			for (i = 0 ; i < 4; i++) {
890 				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
891 				/*
892 				 * If this is a valid tag and the PA field
893 				 * matches clear the tag.
894 				 */
895 				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
896 				    (tag & VIKING_DCACHETAG_V) != 0)
897 					stda(tagaddr | (i << 26),
898 					     ASI_DCACHETAG, 0);
899 			}
900 		}
901 
902 	} else {
903 		extern char kernel_text[];
904 
905 		/*
906 		 * Force the cache to validate its backing memory
907 		 * by displacing all cache lines with known read-only
908 		 * content from the start of kernel text.
909 		 *
910 		 * Note that this thrashes the entire cache. However,
911 		 * we currently only need to call upon this code
912 		 * once at boot time.
913 		 */
914 		for (set = 0; set < 128; set++) {
915 			int *v = (int *)(kernel_text + (set << 5));
916 
917 			/*
918 			 * We need to read (2*associativity-1) different
919 			 * locations to be sure to displace the entire set.
920 			 */
921 			i = 2 * 4 - 1;
922 			while (i--) {
923 				(*(volatile int *)v);
924 				v += 4096;
925 			}
926 		}
927 	}
928 }
929 #endif /* SUN4M */
930 
931 
932 #if defined(MULTIPROCESSOR)
933 /*
934  * Cache flushing on multi-processor systems involves sending
935  * inter-processor messages to flush the cache on each module.
936  *
937  * The current context of the originating processor is passed in the
938  * message. This assumes the allocation of CPU contextses is a global
939  * operation (remember that the actual context tables for the CPUs
940  * are distinct).
941  *
942  * We don't do cross calls if we're cold or we're accepting them
943  * ourselves (CPUFLG_READY).
944  */
945 
946 void
947 smp_vcache_flush_page(va)
948 	int va;
949 {
950 	int n, s;
951 
952 	cpuinfo.sp_vcache_flush_page(va);
953 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
954 		return;
955 	LOCK_XPMSG();
956 	for (n = 0; n < ncpu; n++) {
957 		struct cpu_info *cpi = cpus[n];
958 		struct xpmsg_flush_page *p;
959 
960 		if (CPU_READY(cpi))
961 			continue;
962 		p = &cpi->msg.u.xpmsg_flush_page;
963 		s = splhigh();
964 		simple_lock(&cpi->msg.lock);
965 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_PAGE;
966 		p->ctx = getcontext4m();
967 		p->va = va;
968 		raise_ipi_wait_and_unlock(cpi);
969 		splx(s);
970 	}
971 	UNLOCK_XPMSG();
972 }
973 
974 void
975 smp_vcache_flush_segment(vr, vs)
976 	int vr, vs;
977 {
978 	int n, s;
979 
980 	cpuinfo.sp_vcache_flush_segment(vr, vs);
981 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
982 		return;
983 	LOCK_XPMSG();
984 	for (n = 0; n < ncpu; n++) {
985 		struct cpu_info *cpi = cpus[n];
986 		struct xpmsg_flush_segment *p;
987 
988 		if (CPU_READY(cpi))
989 			continue;
990 		p = &cpi->msg.u.xpmsg_flush_segment;
991 		s = splhigh();
992 		simple_lock(&cpi->msg.lock);
993 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_SEGMENT;
994 		p->ctx = getcontext4m();
995 		p->vr = vr;
996 		p->vs = vs;
997 		raise_ipi_wait_and_unlock(cpi);
998 		splx(s);
999 	}
1000 	UNLOCK_XPMSG();
1001 }
1002 
1003 void
1004 smp_vcache_flush_region(vr)
1005 	int vr;
1006 {
1007 	int n, s;
1008 
1009 	cpuinfo.sp_vcache_flush_region(vr);
1010 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1011 		return;
1012 	LOCK_XPMSG();
1013 	for (n = 0; n < ncpu; n++) {
1014 		struct cpu_info *cpi = cpus[n];
1015 		struct xpmsg_flush_region *p;
1016 
1017 		if (CPU_READY(cpi))
1018 			continue;
1019 		p = &cpi->msg.u.xpmsg_flush_region;
1020 		s = splhigh();
1021 		simple_lock(&cpi->msg.lock);
1022 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_REGION;
1023 		p->ctx = getcontext4m();
1024 		p->vr = vr;
1025 		raise_ipi_wait_and_unlock(cpi);
1026 		splx(s);
1027 	}
1028 	UNLOCK_XPMSG();
1029 }
1030 
1031 void
1032 smp_vcache_flush_context()
1033 {
1034 	int n, s;
1035 
1036 	cpuinfo.sp_vcache_flush_context();
1037 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1038 		return;
1039 	LOCK_XPMSG();
1040 	for (n = 0; n < ncpu; n++) {
1041 		struct cpu_info *cpi = cpus[n];
1042 		struct xpmsg_flush_context *p;
1043 
1044 		if (CPU_READY(cpi))
1045 			continue;
1046 		p = &cpi->msg.u.xpmsg_flush_context;
1047 		s = splhigh();
1048 		simple_lock(&cpi->msg.lock);
1049 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_CONTEXT;
1050 		p->ctx = getcontext4m();
1051 		raise_ipi_wait_and_unlock(cpi);
1052 		splx(s);
1053 	}
1054 	UNLOCK_XPMSG();
1055 }
1056 
1057 void
1058 smp_cache_flush(va, size)
1059 	caddr_t va;
1060 	u_int size;
1061 {
1062 	int n, s;
1063 
1064 	cpuinfo.sp_cache_flush(va, size);
1065 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1066 		return;
1067 	LOCK_XPMSG();
1068 	for (n = 0; n < ncpu; n++) {
1069 		struct cpu_info *cpi = cpus[n];
1070 		struct xpmsg_flush_range *p;
1071 
1072 		if (CPU_READY(cpi))
1073 			continue;
1074 		p = &cpi->msg.u.xpmsg_flush_range;
1075 		s = splhigh();
1076 		simple_lock(&cpi->msg.lock);
1077 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_RANGE;
1078 		p->ctx = getcontext4m();
1079 		p->va = va;
1080 		p->size = size;
1081 		raise_ipi_wait_and_unlock(cpi);
1082 		splx(s);
1083 	}
1084 	UNLOCK_XPMSG();
1085 }
1086 #endif /* MULTIPROCESSOR */
1087