xref: /netbsd/sys/arch/sparc/sparc/cache.c (revision c4a72b64)
1 /*	$NetBSD: cache.c,v 1.63 2002/07/29 06:39:41 grant Exp $ */
2 
3 /*
4  * Copyright (c) 1996
5  *	The President and Fellows of Harvard College. All rights reserved.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This software was developed by the Computer Systems Engineering group
10  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11  * contributed to Berkeley.
12  *
13  * All advertising materials mentioning features or use of this software
14  * must display the following acknowledgement:
15  *	This product includes software developed by Harvard University.
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  *
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by Aaron Brown and
31  *	Harvard University.
32  *	This product includes software developed by the University of
33  *	California, Berkeley and its contributors.
34  * 4. Neither the name of the University nor the names of its contributors
35  *    may be used to endorse or promote products derived from this software
36  *    without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  *
50  *	@(#)cache.c	8.2 (Berkeley) 10/30/93
51  *
52  */
53 
54 /*
55  * Cache routines.
56  *
57  * TODO:
58  *	- rework range flush
59  */
60 
61 #include "opt_multiprocessor.h"
62 #include "opt_sparc_arch.h"
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 
68 #include <machine/ctlreg.h>
69 #include <machine/pte.h>
70 
71 #include <sparc/sparc/asm.h>
72 #include <sparc/sparc/cache.h>
73 #include <sparc/sparc/cpuvar.h>
74 
75 struct cachestats cachestats;
76 
77 int cache_alias_dist;		/* Cache anti-aliasing constants */
78 int cache_alias_bits;
79 u_long dvma_cachealign;
80 
81 /*
82  * Enable the cache.
83  * We need to clear out the valid bits first.
84  */
85 void
86 sun4_cache_enable()
87 {
88 	u_int i, lim, ls, ts;
89 
90 	cache_alias_bits = CPU_ISSUN4
91 				? CACHE_ALIAS_BITS_SUN4
92 				: CACHE_ALIAS_BITS_SUN4C;
93 	cache_alias_dist = CPU_ISSUN4
94 				? CACHE_ALIAS_DIST_SUN4
95 				: CACHE_ALIAS_DIST_SUN4C;
96 
97 	ls = CACHEINFO.c_linesize;
98 	ts = CACHEINFO.c_totalsize;
99 
100 	for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
101 		sta(i, ASI_CONTROL, 0);
102 
103 	stba(AC_SYSENABLE, ASI_CONTROL,
104 	     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
105 	CACHEINFO.c_enabled = 1;
106 
107 #ifdef notyet
108 	if (cpuinfo.flags & SUN4_IOCACHE) {
109 		stba(AC_SYSENABLE, ASI_CONTROL,
110 		     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
111 		printf("iocache enabled\n");
112 	}
113 #endif
114 }
115 
116 /*
117  * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
118  */
119 #if defined(SUN4M) || defined(SUN4D)
120 void
121 ms1_cache_enable()
122 {
123 	u_int pcr;
124 
125 	cache_alias_dist = max(
126 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
127 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
128 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
129 
130 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
131 
132 	/* We "flash-clear" the I/D caches. */
133 	if ((pcr & MS1_PCR_ICE) == 0)
134 		sta(0, ASI_ICACHECLR, 0);
135 	if ((pcr & MS1_PCR_DCE) == 0)
136 		sta(0, ASI_DCACHECLR, 0);
137 
138 	/* Turn on caches */
139 	sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
140 
141 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
142 
143 	/*
144 	 * When zeroing or copying pages, there might still be entries in
145 	 * the cache, since we don't flush pages from the cache when
146 	 * unmapping them (`vactype' is VAC_NONE).  Fortunately, the
147 	 * MS1 cache is write-through and not write-allocate, so we can
148 	 * use cacheable access while not displacing cache lines.
149 	 */
150 	cpuinfo.flags |= CPUFLG_CACHE_MANDATORY;
151 }
152 
153 void
154 viking_cache_enable()
155 {
156 	u_int pcr;
157 
158 	cache_alias_dist = max(
159 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
160 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
161 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
162 
163 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
164 
165 	if ((pcr & VIKING_PCR_ICE) == 0) {
166 		/* I-cache not on; "flash-clear" it now. */
167 		sta(0x80000000, ASI_ICACHECLR, 0);	/* Unlock */
168 		sta(0, ASI_ICACHECLR, 0);		/* clear */
169 	}
170 	if ((pcr & VIKING_PCR_DCE) == 0) {
171 		/* D-cache not on: "flash-clear" it. */
172 		sta(0x80000000, ASI_DCACHECLR, 0);
173 		sta(0, ASI_DCACHECLR, 0);
174 	}
175 
176 	/* Turn on caches via MMU */
177 	sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
178 
179 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
180 
181 	/* Now turn on MultiCache if it exists */
182 	if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
183 		/* Set external cache enable bit in MXCC control register */
184 		stda(MXCC_CTRLREG, ASI_CONTROL,
185 		     ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
186 		cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */
187 		CACHEINFO.ec_enabled = 1;
188 	}
189 }
190 
191 void
192 hypersparc_cache_enable()
193 {
194 	int i, ls, ts;
195 	u_int pcr, v;
196 
197 	ls = CACHEINFO.c_linesize;
198 	ts = CACHEINFO.c_totalsize;
199 
200 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
201 
202 	/*
203 	 * Setup the anti-aliasing constants and DVMA alignment constraint.
204 	 */
205 	cache_alias_dist = CACHEINFO.c_totalsize;
206 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
207 	dvma_cachealign = cache_alias_dist;
208 
209 	/* Now reset cache tag memory if cache not yet enabled */
210 	if ((pcr & HYPERSPARC_PCR_CE) == 0)
211 		for (i = 0; i < ts; i += ls)
212 			sta(i, ASI_DCACHETAG, 0);
213 
214 	pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
215 	hypersparc_cache_flush_all();
216 
217 	/* Enable write-back cache */
218 	pcr |= HYPERSPARC_PCR_CE;
219 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
220 		pcr |= HYPERSPARC_PCR_CM;
221 
222 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
223 	CACHEINFO.c_enabled = 1;
224 
225 	/* XXX: should add support */
226 	if (CACHEINFO.c_hwflush)
227 		panic("cache_enable: can't handle 4M with hw-flush cache");
228 
229 	/*
230 	 * Enable instruction cache and, on single-processor machines,
231 	 * disable `Unimplemented Flush Traps'.
232 	 */
233 	v = HYPERSPARC_ICCR_ICE | (ncpu == 1 ? HYPERSPARC_ICCR_FTD : 0);
234 	wrasr(v, HYPERSPARC_ASRNUM_ICCR);
235 }
236 
237 
238 void
239 swift_cache_enable()
240 {
241 	int i, ls, ts;
242 	u_int pcr;
243 
244 	cache_alias_dist = max(
245 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
246 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
247 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
248 
249 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
250 	pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
251 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
252 
253 	/* Now reset cache tag memory if cache not yet enabled */
254 	ls = CACHEINFO.ic_linesize;
255 	ts = CACHEINFO.ic_totalsize;
256 	if ((pcr & SWIFT_PCR_ICE) == 0)
257 		for (i = 0; i < ts; i += ls)
258 			sta(i, ASI_ICACHETAG, 0);
259 
260 	ls = CACHEINFO.dc_linesize;
261 	ts = CACHEINFO.dc_totalsize;
262 	if ((pcr & SWIFT_PCR_DCE) == 0)
263 		for (i = 0; i < ts; i += ls)
264 			sta(i, ASI_DCACHETAG, 0);
265 
266 	CACHEINFO.c_enabled = 1;
267 }
268 
269 void
270 cypress_cache_enable()
271 {
272 	int i, ls, ts;
273 	u_int pcr;
274 
275 	cache_alias_dist = CACHEINFO.c_totalsize;
276 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
277 
278 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
279 	pcr &= ~(CYPRESS_PCR_CE | CYPRESS_PCR_CM);
280 
281 	/* Now reset cache tag memory if cache not yet enabled */
282 	ls = CACHEINFO.c_linesize;
283 	ts = CACHEINFO.c_totalsize;
284 	if ((pcr & CYPRESS_PCR_CE) == 0)
285 		for (i = 0; i < ts; i += ls)
286 			sta(i, ASI_DCACHETAG, 0);
287 
288 	pcr |= CYPRESS_PCR_CE;
289 	/* If put in write-back mode, turn it on */
290 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
291 		pcr |= CYPRESS_PCR_CM;
292 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
293 	CACHEINFO.c_enabled = 1;
294 }
295 
296 void
297 turbosparc_cache_enable()
298 {
299 	int i, ls, ts;
300 	u_int pcr, pcf;
301 
302 	cache_alias_dist = max(
303 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
304 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
305 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
306 
307 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
308 
309 	/* Now reset cache tag memory if cache not yet enabled */
310 	ls = CACHEINFO.ic_linesize;
311 	ts = CACHEINFO.ic_totalsize;
312 	if ((pcr & TURBOSPARC_PCR_ICE) == 0)
313 		for (i = 0; i < ts; i += ls)
314 			sta(i, ASI_ICACHETAG, 0);
315 
316 	ls = CACHEINFO.dc_linesize;
317 	ts = CACHEINFO.dc_totalsize;
318 	if ((pcr & TURBOSPARC_PCR_DCE) == 0)
319 		for (i = 0; i < ts; i += ls)
320 			sta(i, ASI_DCACHETAG, 0);
321 
322 	pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
323 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
324 
325 	pcf = lda(SRMMU_PCFG, ASI_SRMMU);
326 	if (pcf & TURBOSPARC_PCFG_SNP)
327 		printf(": DVMA coherent ");
328 
329 	CACHEINFO.c_enabled = 1;
330 }
331 #endif /* SUN4M || SUN4D */
332 
333 /*
334  * Flush the current context from the cache.
335  *
336  * This is done by writing to each cache line in the `flush context'
337  * address space (or, for hardware flush, once to each page in the
338  * hardware flush space, for all cache pages).
339  */
340 void
341 sun4_vcache_flush_context()
342 {
343 	char *p;
344 	int i, ls;
345 
346 	cachestats.cs_ncxflush++;
347 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
348 	if (CACHEINFO.c_hwflush) {
349 		ls = NBPG;
350 		i = CACHEINFO.c_totalsize >> PGSHIFT;
351 		for (; --i >= 0; p += ls)
352 			sta(p, ASI_HWFLUSHCTX, 0);
353 	} else {
354 		ls = CACHEINFO.c_linesize;
355 		i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
356 		for (; --i >= 0; p += ls)
357 			sta(p, ASI_FLUSHCTX, 0);
358 	}
359 }
360 
361 /*
362  * Flush the given virtual region from the cache.
363  *
364  * This is also done by writing to each cache line, except that
365  * now the addresses must include the virtual region number, and
366  * we use the `flush region' space.
367  *
368  * This function is only called on sun4's with 3-level MMUs; there's
369  * no hw-flush space.
370  */
371 void
372 sun4_vcache_flush_region(vreg)
373 	int vreg;
374 {
375 	int i, ls;
376 	char *p;
377 
378 	cachestats.cs_nrgflush++;
379 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
380 	ls = CACHEINFO.c_linesize;
381 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
382 	for (; --i >= 0; p += ls)
383 		sta(p, ASI_FLUSHREG, 0);
384 }
385 
386 /*
387  * Flush the given virtual segment from the cache.
388  *
389  * This is also done by writing to each cache line, except that
390  * now the addresses must include the virtual segment number, and
391  * we use the `flush segment' space.
392  *
393  * Again, for hardware, we just write each page (in hw-flush space).
394  */
395 void
396 sun4_vcache_flush_segment(vreg, vseg)
397 	int vreg, vseg;
398 {
399 	int i, ls;
400 	char *p;
401 
402 	cachestats.cs_nsgflush++;
403 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
404 	if (CACHEINFO.c_hwflush) {
405 		ls = NBPG;
406 		i = CACHEINFO.c_totalsize >> PGSHIFT;
407 		for (; --i >= 0; p += ls)
408 			sta(p, ASI_HWFLUSHSEG, 0);
409 	} else {
410 		ls = CACHEINFO.c_linesize;
411 		i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
412 		for (; --i >= 0; p += ls)
413 			sta(p, ASI_FLUSHSEG, 0);
414 	}
415 }
416 
417 /*
418  * Flush the given virtual page from the cache.
419  * (va is the actual address, and must be aligned on a page boundary.)
420  * Again we write to each cache line.
421  */
422 void
423 sun4_vcache_flush_page(va)
424 	int va;
425 {
426 	int i, ls;
427 	char *p;
428 
429 #ifdef DEBUG
430 	if (va & PGOFSET)
431 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
432 #endif
433 
434 	cachestats.cs_npgflush++;
435 	p = (char *)va;
436 	ls = CACHEINFO.c_linesize;
437 	i = NBPG >> CACHEINFO.c_l2linesize;
438 	for (; --i >= 0; p += ls)
439 		sta(p, ASI_FLUSHPG, 0);
440 }
441 
442 /*
443  * Flush the given virtual page from the cache.
444  * (va is the actual address, and must be aligned on a page boundary.)
445  * This version uses hardware-assisted flush operation and just needs
446  * one write into ASI_HWFLUSHPG space to flush all cache lines.
447  */
448 void
449 sun4_vcache_flush_page_hw(va)
450 	int va;
451 {
452 	char *p;
453 
454 #ifdef DEBUG
455 	if (va & PGOFSET)
456 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
457 #endif
458 
459 	cachestats.cs_npgflush++;
460 	p = (char *)va;
461 	sta(p, ASI_HWFLUSHPG, 0);
462 }
463 
464 /*
465  * Flush a range of virtual addresses (in the current context).
466  * The first byte is at (base&~PGOFSET) and the last one is just
467  * before byte (base+len).
468  *
469  * We choose the best of (context,segment,page) here.
470  */
471 
472 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / NBPG)
473 
474 void
475 sun4_cache_flush(base, len)
476 	caddr_t base;
477 	u_int len;
478 {
479 	int i, ls, baseoff;
480 	char *p;
481 
482 	if (CACHEINFO.c_vactype == VAC_NONE)
483 		return;
484 
485 	/*
486 	 * Figure out how much must be flushed.
487 	 *
488 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
489 	 * in the same number of loop iterations.  We can also do the whole
490 	 * region. If we need to do between 2 and NSEGRG, do the region.
491 	 * If we need to do two or more regions, just go ahead and do the
492 	 * whole context. This might not be ideal (e.g., fsck likes to do
493 	 * 65536-byte reads, which might not necessarily be aligned).
494 	 *
495 	 * We could try to be sneaky here and use the direct mapping
496 	 * to avoid flushing things `below' the start and `above' the
497 	 * ending address (rather than rounding to whole pages and
498 	 * segments), but I did not want to debug that now and it is
499 	 * not clear it would help much.
500 	 *
501 	 * (XXX the magic number 16 is now wrong, must review policy)
502 	 */
503 	baseoff = (int)base & PGOFSET;
504 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
505 
506 	cachestats.cs_nraflush++;
507 #ifdef notyet
508 	cachestats.cs_ra[min(i, MAXCACHERANGE)]++;
509 #endif
510 
511 	if (i < CACHE_FLUSH_MAGIC) {
512 		/* cache_flush_page, for i pages */
513 		p = (char *)((int)base & ~baseoff);
514 		if (CACHEINFO.c_hwflush) {
515 			for (; --i >= 0; p += NBPG)
516 				sta(p, ASI_HWFLUSHPG, 0);
517 		} else {
518 			ls = CACHEINFO.c_linesize;
519 			i <<= PGSHIFT - CACHEINFO.c_l2linesize;
520 			for (; --i >= 0; p += ls)
521 				sta(p, ASI_FLUSHPG, 0);
522 		}
523 		return;
524 	}
525 	baseoff = (u_int)base & SGOFSET;
526 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
527 	if (i == 1)
528 		sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base));
529 	else {
530 		if (HASSUN4_MMU3L) {
531 			baseoff = (u_int)base & RGOFSET;
532 			i = (baseoff + len + RGOFSET) >> RGSHIFT;
533 			if (i == 1)
534 				sun4_vcache_flush_region(VA_VREG(base));
535 			else
536 				sun4_vcache_flush_context();
537 		} else
538 			sun4_vcache_flush_context();
539 	}
540 }
541 
542 
543 #if defined(SUN4M) || defined(SUN4D)
544 /*
545  * Flush the current context from the cache.
546  *
547  * This is done by writing to each cache line in the `flush context'
548  * address space (or, for hardware flush, once to each page in the
549  * hardware flush space, for all cache pages).
550  */
551 void
552 srmmu_vcache_flush_context()
553 {
554 	char *p;
555 	int i, ls;
556 
557 	cachestats.cs_ncxflush++;
558 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
559 	ls = CACHEINFO.c_linesize;
560 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
561 	for (; --i >= 0; p += ls)
562 		sta(p, ASI_IDCACHELFC, 0);
563 }
564 
565 /*
566  * Flush the given virtual region from the cache.
567  *
568  * This is also done by writing to each cache line, except that
569  * now the addresses must include the virtual region number, and
570  * we use the `flush region' space.
571  */
572 void
573 srmmu_vcache_flush_region(vreg)
574 	int vreg;
575 {
576 	int i, ls;
577 	char *p;
578 
579 	cachestats.cs_nrgflush++;
580 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
581 	ls = CACHEINFO.c_linesize;
582 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
583 	for (; --i >= 0; p += ls)
584 		sta(p, ASI_IDCACHELFR, 0);
585 }
586 
587 /*
588  * Flush the given virtual segment from the cache.
589  *
590  * This is also done by writing to each cache line, except that
591  * now the addresses must include the virtual segment number, and
592  * we use the `flush segment' space.
593  *
594  * Again, for hardware, we just write each page (in hw-flush space).
595  */
596 void
597 srmmu_vcache_flush_segment(vreg, vseg)
598 	int vreg, vseg;
599 {
600 	int i, ls;
601 	char *p;
602 
603 	cachestats.cs_nsgflush++;
604 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
605 	ls = CACHEINFO.c_linesize;
606 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
607 	for (; --i >= 0; p += ls)
608 		sta(p, ASI_IDCACHELFS, 0);
609 }
610 
611 /*
612  * Flush the given virtual page from the cache.
613  * (va is the actual address, and must be aligned on a page boundary.)
614  * Again we write to each cache line.
615  */
616 void
617 srmmu_vcache_flush_page(va)
618 	int va;
619 {
620 	int i, ls;
621 	char *p;
622 
623 #ifdef DEBUG
624 	if (va & PGOFSET)
625 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
626 #endif
627 
628 	cachestats.cs_npgflush++;
629 	p = (char *)va;
630 	ls = CACHEINFO.c_linesize;
631 	i = NBPG >> CACHEINFO.c_l2linesize;
632 	for (; --i >= 0; p += ls)
633 		sta(p, ASI_IDCACHELFP, 0);
634 }
635 
636 /*
637  * Flush entire cache.
638  */
639 void
640 srmmu_cache_flush_all()
641 {
642 	srmmu_vcache_flush_context();
643 }
644 
645 /*
646  * Flush a range of virtual addresses (in the current context).
647  * The first byte is at (base&~PGOFSET) and the last one is just
648  * before byte (base+len).
649  *
650  * We choose the best of (context,segment,page) here.
651  */
652 
653 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / NBPG)
654 
655 void
656 srmmu_cache_flush(base, len)
657 	caddr_t base;
658 	u_int len;
659 {
660 	int i, ls, baseoff;
661 	char *p;
662 
663 	if (len < NBPG) {
664 		/* less than a page, flush just the covered cache lines */
665 		ls = CACHEINFO.c_linesize;
666 		baseoff = (int)base & (ls - 1);
667 		i = (baseoff + len + ls - 1) >> CACHEINFO.c_l2linesize;
668 		p = (char *)((int)base & -ls);
669 		for (; --i >= 0; p += ls)
670 			sta(p, ASI_IDCACHELFP, 0);
671 		return;
672 	}
673 
674 	/*
675 	 * Figure out how much must be flushed.
676 	 *
677 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
678 	 * in the same number of loop iterations.  We can also do the whole
679 	 * region. If we need to do between 2 and NSEGRG, do the region.
680 	 * If we need to do two or more regions, just go ahead and do the
681 	 * whole context. This might not be ideal (e.g., fsck likes to do
682 	 * 65536-byte reads, which might not necessarily be aligned).
683 	 *
684 	 * We could try to be sneaky here and use the direct mapping
685 	 * to avoid flushing things `below' the start and `above' the
686 	 * ending address (rather than rounding to whole pages and
687 	 * segments), but I did not want to debug that now and it is
688 	 * not clear it would help much.
689 	 *
690 	 * (XXX the magic number 16 is now wrong, must review policy)
691 	 */
692 	baseoff = (int)base & PGOFSET;
693 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
694 
695 	cachestats.cs_nraflush++;
696 #ifdef notyet
697 	cachestats.cs_ra[min(i, MAXCACHERANGE)]++;
698 #endif
699 
700 	if (i < CACHE_FLUSH_MAGIC) {
701 		/* cache_flush_page, for i pages */
702 		p = (char *)((int)base & ~baseoff);
703 		ls = CACHEINFO.c_linesize;
704 		i <<= PGSHIFT - CACHEINFO.c_l2linesize;
705 		for (; --i >= 0; p += ls)
706 			sta(p, ASI_IDCACHELFP, 0);
707 		return;
708 	}
709 	baseoff = (u_int)base & SGOFSET;
710 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
711 	if (i == 1)
712 		srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base));
713 	else {
714 		baseoff = (u_int)base & RGOFSET;
715 		i = (baseoff + len + RGOFSET) >> RGSHIFT;
716 		if (i == 1)
717 			srmmu_vcache_flush_region(VA_VREG(base));
718 		else
719 			srmmu_vcache_flush_context();
720 	}
721 }
722 
723 int ms1_cacheflush_magic = 0;
724 #define MS1_CACHEFLUSH_MAGIC	ms1_cacheflush_magic
725 void
726 ms1_cache_flush(base, len)
727 	caddr_t base;
728 	u_int len;
729 {
730 	/*
731 	 * Although physically tagged, we still need to flush the
732 	 * data cache after (if we have a write-through cache) or before
733 	 * (in case of write-back caches) DMA operations.
734 	 */
735 
736 #if MS1_CACHEFLUSH_MAGIC
737 	if (len <= MS1_CACHEFLUSH_MAGIC) {
738 		/*
739 		 * If the range to be flushed is sufficiently small
740 		 * invalidate the covered cache lines by hand.
741 		 *
742 		 * The MicroSPARC I has a direct-mapped virtually addressed
743 		 * physically tagged data cache which is organised as
744 		 * 128 lines of 16 bytes. Virtual address bits [4-10]
745 		 * select the cache line. The cache tags are accessed
746 		 * through the standard DCACHE control space using the
747 		 * same address bits as those used to select the cache
748 		 * line in the virtual address.
749 		 *
750 		 * Note: we don't bother to compare the actual tags
751 		 * since that would require looking up physical addresses.
752 		 *
753 		 * The format of the tags we read from ASI_DCACHE control
754 		 * space is:
755 		 *
756 		 * 31     27 26            11 10         1 0
757 		 * +--------+----------------+------------+-+
758 		 * |  xxx   |    PA[26-11]   |    xxx     |V|
759 		 * +--------+----------------+------------+-+
760 		 *
761 		 * PA: bits 11-26 of the physical address
762 		 * V:  line valid bit
763 		 */
764 		int tagaddr = ((u_int)base & 0x7f0);
765 
766 		len = roundup(len, 16);
767 		while (len != 0) {
768 			int tag = lda(tagaddr, ASI_DCACHETAG);
769 			if ((tag & 1) == 1) {
770 				/* Mark this cache line invalid */
771 				sta(tagaddr, ASI_DCACHETAG, 0);
772 			}
773 			len -= 16;
774 			tagaddr = (tagaddr + 16) & 0x7f0;
775 		}
776 	} else
777 #endif
778 		/* Flush entire data cache */
779 		sta(0, ASI_DCACHECLR, 0);
780 }
781 
782 /*
783  * Flush entire cache.
784  */
785 void
786 ms1_cache_flush_all()
787 {
788 
789 	/* Flash-clear both caches */
790 	sta(0, ASI_ICACHECLR, 0);
791 	sta(0, ASI_DCACHECLR, 0);
792 }
793 
794 void
795 hypersparc_cache_flush_all()
796 {
797 
798 	srmmu_vcache_flush_context();
799 	/* Flush instruction cache */
800 	hypersparc_pure_vcache_flush();
801 }
802 
803 void
804 cypress_cache_flush_all()
805 {
806 
807 	extern char kernel_text[];
808 	char *p;
809 	int i, ls;
810 
811 	/* Fill the cache with known read-only content */
812 	p = (char *)kernel_text;
813 	ls = CACHEINFO.c_linesize;
814 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
815 	for (; --i >= 0; p += ls)
816 		(*(volatile char *)p);
817 }
818 
819 
820 void
821 viking_cache_flush(base, len)
822 	caddr_t base;
823 	u_int len;
824 {
825 	/*
826 	 * Although physically tagged, we still need to flush the
827 	 * data cache after (if we have a write-through cache) or before
828 	 * (in case of write-back caches) DMA operations.
829 	 */
830 
831 }
832 
833 void
834 viking_pcache_flush_page(pa, invalidate_only)
835 	paddr_t pa;
836 	int invalidate_only;
837 {
838 	int set, i;
839 
840 	/*
841 	 * The viking's on-chip data cache is 4-way set associative,
842 	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
843 	 * Note that one 4096 byte page exactly covers all 128 sets
844 	 * in the cache.
845 	 */
846 	if (invalidate_only) {
847 		u_int pa_tag = (pa >> 12);
848 		u_int tagaddr;
849 		u_int64_t tag;
850 
851 		/*
852 		 * Loop over all sets and invalidate all entries tagged
853 		 * with the given physical address by resetting the cache
854 		 * tag in ASI_DCACHETAG control space.
855 		 *
856 		 * The address format for accessing a tag is:
857 		 *
858 		 * 31   30      27   26                  11      5 4  3 2    0
859 		 * +------+-----+------+-------//--------+--------+----+-----+
860 		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
861 		 * +------+-----+------+-------//--------+--------+----+-----+
862 		 *
863 		 * set:  the cache set tag to be read (0-127)
864 		 * line: the line within the set (0-3)
865 		 * type: 1: read set tag; 2: read physical tag
866 		 *
867 		 * The (type 2) tag read from this address is a 64-bit word
868 		 * formatted as follows:
869 		 *
870 		 *          5         4         4
871 		 * 63       6         8         0            23               0
872 		 * +-------+-+-------+-+-------+-+-----------+----------------+
873 		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
874 		 * +-------+-+-------+-+-------+-+-----------+----------------+
875 		 *
876 		 * PA: bits 12-35 of the physical address
877 		 * S:  line shared bit
878 		 * D:  line dirty bit
879 		 * V:  line valid bit
880 		 */
881 
882 #define VIKING_DCACHETAG_S	0x0000010000000000UL	/* line valid bit */
883 #define VIKING_DCACHETAG_D	0x0001000000000000UL	/* line dirty bit */
884 #define VIKING_DCACHETAG_V	0x0100000000000000UL	/* line shared bit */
885 #define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffUL	/* PA tag field */
886 
887 		for (set = 0; set < 128; set++) {
888 			/* Set set number and access type */
889 			tagaddr = (set << 5) | (2 << 30);
890 
891 			/* Examine the tag for each line in the set */
892 			for (i = 0 ; i < 4; i++) {
893 				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
894 				/*
895 				 * If this is a valid tag and the PA field
896 				 * matches clear the tag.
897 				 */
898 				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
899 				    (tag & VIKING_DCACHETAG_V) != 0)
900 					stda(tagaddr | (i << 26),
901 					     ASI_DCACHETAG, 0);
902 			}
903 		}
904 
905 	} else {
906 		extern char kernel_text[];
907 
908 		/*
909 		 * Force the cache to validate its backing memory
910 		 * by displacing all cache lines with known read-only
911 		 * content from the start of kernel text.
912 		 *
913 		 * Note that this thrashes the entire cache. However,
914 		 * we currently only need to call upon this code
915 		 * once at boot time.
916 		 */
917 		for (set = 0; set < 128; set++) {
918 			int *v = (int *)(kernel_text + (set << 5));
919 
920 			/*
921 			 * We need to read (2*associativity-1) different
922 			 * locations to be sure to displace the entire set.
923 			 */
924 			i = 2 * 4 - 1;
925 			while (i--) {
926 				(*(volatile int *)v);
927 				v += 4096;
928 			}
929 		}
930 	}
931 }
932 #endif /* SUN4M || SUN4D */
933 
934 
935 #if defined(MULTIPROCESSOR)
936 /*
937  * Cache flushing on multi-processor systems involves sending
938  * inter-processor messages to flush the cache on each module.
939  *
940  * The current context of the originating processor is passed in the
941  * message. This assumes the allocation of CPU contextses is a global
942  * operation (remember that the actual context tables for the CPUs
943  * are distinct).
944  *
945  * We don't do cross calls if we're cold or we're accepting them
946  * ourselves (CPUFLG_READY).
947  */
948 
949 void
950 smp_vcache_flush_page(va)
951 	int va;
952 {
953 	int n, s;
954 
955 	cpuinfo.sp_vcache_flush_page(va);
956 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
957 		return;
958 	LOCK_XPMSG();
959 	for (n = 0; n < ncpu; n++) {
960 		struct cpu_info *cpi = cpus[n];
961 		struct xpmsg_flush_page *p;
962 
963 		if (CPU_READY(cpi))
964 			continue;
965 		p = &cpi->msg.u.xpmsg_flush_page;
966 		s = splhigh();
967 		simple_lock(&cpi->msg.lock);
968 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_PAGE;
969 		p->ctx = getcontext4m();
970 		p->va = va;
971 		raise_ipi_wait_and_unlock(cpi);
972 		splx(s);
973 	}
974 	UNLOCK_XPMSG();
975 }
976 
977 void
978 smp_vcache_flush_segment(vr, vs)
979 	int vr, vs;
980 {
981 	int n, s;
982 
983 	cpuinfo.sp_vcache_flush_segment(vr, vs);
984 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
985 		return;
986 	LOCK_XPMSG();
987 	for (n = 0; n < ncpu; n++) {
988 		struct cpu_info *cpi = cpus[n];
989 		struct xpmsg_flush_segment *p;
990 
991 		if (CPU_READY(cpi))
992 			continue;
993 		p = &cpi->msg.u.xpmsg_flush_segment;
994 		s = splhigh();
995 		simple_lock(&cpi->msg.lock);
996 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_SEGMENT;
997 		p->ctx = getcontext4m();
998 		p->vr = vr;
999 		p->vs = vs;
1000 		raise_ipi_wait_and_unlock(cpi);
1001 		splx(s);
1002 	}
1003 	UNLOCK_XPMSG();
1004 }
1005 
1006 void
1007 smp_vcache_flush_region(vr)
1008 	int vr;
1009 {
1010 	int n, s;
1011 
1012 	cpuinfo.sp_vcache_flush_region(vr);
1013 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1014 		return;
1015 	LOCK_XPMSG();
1016 	for (n = 0; n < ncpu; n++) {
1017 		struct cpu_info *cpi = cpus[n];
1018 		struct xpmsg_flush_region *p;
1019 
1020 		if (CPU_READY(cpi))
1021 			continue;
1022 		p = &cpi->msg.u.xpmsg_flush_region;
1023 		s = splhigh();
1024 		simple_lock(&cpi->msg.lock);
1025 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_REGION;
1026 		p->ctx = getcontext4m();
1027 		p->vr = vr;
1028 		raise_ipi_wait_and_unlock(cpi);
1029 		splx(s);
1030 	}
1031 	UNLOCK_XPMSG();
1032 }
1033 
1034 void
1035 smp_vcache_flush_context()
1036 {
1037 	int n, s;
1038 
1039 	cpuinfo.sp_vcache_flush_context();
1040 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1041 		return;
1042 	LOCK_XPMSG();
1043 	for (n = 0; n < ncpu; n++) {
1044 		struct cpu_info *cpi = cpus[n];
1045 		struct xpmsg_flush_context *p;
1046 
1047 		if (CPU_READY(cpi))
1048 			continue;
1049 		p = &cpi->msg.u.xpmsg_flush_context;
1050 		s = splhigh();
1051 		simple_lock(&cpi->msg.lock);
1052 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_CONTEXT;
1053 		p->ctx = getcontext4m();
1054 		raise_ipi_wait_and_unlock(cpi);
1055 		splx(s);
1056 	}
1057 	UNLOCK_XPMSG();
1058 }
1059 
1060 void
1061 smp_cache_flush(va, size)
1062 	caddr_t va;
1063 	u_int size;
1064 {
1065 	int n, s;
1066 
1067 	cpuinfo.sp_cache_flush(va, size);
1068 	if (cold || (cpuinfo.flags & CPUFLG_READY) == 0)
1069 		return;
1070 	LOCK_XPMSG();
1071 	for (n = 0; n < ncpu; n++) {
1072 		struct cpu_info *cpi = cpus[n];
1073 		struct xpmsg_flush_range *p;
1074 
1075 		if (CPU_READY(cpi))
1076 			continue;
1077 		p = &cpi->msg.u.xpmsg_flush_range;
1078 		s = splhigh();
1079 		simple_lock(&cpi->msg.lock);
1080 		cpi->msg.tag = XPMSG_VCACHE_FLUSH_RANGE;
1081 		p->ctx = getcontext4m();
1082 		p->va = va;
1083 		p->size = size;
1084 		raise_ipi_wait_and_unlock(cpi);
1085 		splx(s);
1086 	}
1087 	UNLOCK_XPMSG();
1088 }
1089 #endif /* MULTIPROCESSOR */
1090