1 /*	$NetBSD: cache.c,v 1.97 2007/03/04 09:03:34 macallan Exp $ */
2 
3 /*
4  * Copyright (c) 1996
5  *	The President and Fellows of Harvard College. All rights reserved.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This software was developed by the Computer Systems Engineering group
10  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11  * contributed to Berkeley.
12  *
13  * All advertising materials mentioning features or use of this software
14  * must display the following acknowledgement:
15  *	This product includes software developed by Harvard University.
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  *
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by Aaron Brown and
31  *	Harvard University.
32  *	This product includes software developed by the University of
33  *	California, Berkeley and its contributors.
34  * 4. Neither the name of the University nor the names of its contributors
35  *    may be used to endorse or promote products derived from this software
36  *    without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  *
50  *	@(#)cache.c	8.2 (Berkeley) 10/30/93
51  *
52  */
53 
54 /*
55  * Cache routines.
56  *
57  * TODO:
58  *	- rework range flush
59  */
60 
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.97 2007/03/04 09:03:34 macallan Exp $");
63 
64 #include "opt_multiprocessor.h"
65 #include "opt_sparc_arch.h"
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70 
71 #include <uvm/uvm_extern.h>
72 
73 #include <machine/ctlreg.h>
74 #include <machine/pte.h>
75 
76 #include <sparc/sparc/asm.h>
77 #include <sparc/sparc/cache.h>
78 #include <sparc/sparc/cpuvar.h>
79 
80 struct evcnt vcache_flush_pg =
81 	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","pg");
82 EVCNT_ATTACH_STATIC(vcache_flush_pg);
83 struct evcnt vcache_flush_seg =
84 	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","seg");
85 EVCNT_ATTACH_STATIC(vcache_flush_seg);
86 struct evcnt vcache_flush_reg =
87 	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","reg");
88 EVCNT_ATTACH_STATIC(vcache_flush_reg);
89 struct evcnt vcache_flush_ctx =
90 	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","ctx");
91 EVCNT_ATTACH_STATIC(vcache_flush_ctx);
92 struct evcnt vcache_flush_range =
93 	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","rng");
94 EVCNT_ATTACH_STATIC(vcache_flush_range);
95 
96 int cache_alias_dist;		/* Cache anti-aliasing constants */
97 int cache_alias_bits;
98 u_long dvma_cachealign;
99 
100 /*
101  * Enable the cache.
102  * We need to clear out the valid bits first.
103  */
104 void
sun4_cache_enable(void)105 sun4_cache_enable(void)
106 {
107 	u_int i, lim, ls, ts;
108 
109 	cache_alias_bits = CPU_ISSUN4
110 				? CACHE_ALIAS_BITS_SUN4
111 				: CACHE_ALIAS_BITS_SUN4C;
112 	cache_alias_dist = CPU_ISSUN4
113 				? CACHE_ALIAS_DIST_SUN4
114 				: CACHE_ALIAS_DIST_SUN4C;
115 
116 	ls = CACHEINFO.c_linesize;
117 	ts = CACHEINFO.c_totalsize;
118 
119 	for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
120 		sta(i, ASI_CONTROL, 0);
121 
122 	stba(AC_SYSENABLE, ASI_CONTROL,
123 	     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
124 	CACHEINFO.c_enabled = 1;
125 
126 #ifdef notyet
127 	if (cpuinfo.flags & SUN4_IOCACHE) {
128 		stba(AC_SYSENABLE, ASI_CONTROL,
129 		     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
130 		printf("iocache enabled\n");
131 	}
132 #endif
133 }
134 
135 /*
136  * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
137  */
138 #if defined(SUN4M) || defined(SUN4D)
139 void
ms1_cache_enable(void)140 ms1_cache_enable(void)
141 {
142 	u_int pcr;
143 
144 	cache_alias_dist = max(
145 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
146 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
147 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
148 
149 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
150 
151 	/* We "flash-clear" the I/D caches. */
152 	if ((pcr & MS1_PCR_ICE) == 0)
153 		sta(0, ASI_ICACHECLR, 0);
154 	if ((pcr & MS1_PCR_DCE) == 0)
155 		sta(0, ASI_DCACHECLR, 0);
156 
157 	/* Turn on caches */
158 	sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
159 
160 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
161 
162 	/*
163 	 * When zeroing or copying pages, there might still be entries in
164 	 * the cache, since we don't flush pages from the cache when
165 	 * unmapping them (`vactype' is VAC_NONE).  Fortunately, the
166 	 * MS1 cache is write-through and not write-allocate, so we can
167 	 * use cacheable access while not displacing cache lines.
168 	 */
169 	cpuinfo.flags |= CPUFLG_CACHE_MANDATORY;
170 }
171 
172 void
viking_cache_enable(void)173 viking_cache_enable(void)
174 {
175 	u_int pcr;
176 
177 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
178 
179 	if ((pcr & VIKING_PCR_ICE) == 0) {
180 		/* I-cache not on; "flash-clear" it now. */
181 		sta(0x80000000, ASI_ICACHECLR, 0);	/* Unlock */
182 		sta(0, ASI_ICACHECLR, 0);		/* clear */
183 	}
184 	if ((pcr & VIKING_PCR_DCE) == 0) {
185 		/* D-cache not on: "flash-clear" it. */
186 		sta(0x80000000, ASI_DCACHECLR, 0);
187 		sta(0, ASI_DCACHECLR, 0);
188 	}
189 
190 	/* Turn on caches via MMU */
191 	sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
192 
193 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
194 
195 	/* Now turn on MultiCache if it exists */
196 	if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
197 		/* Set external cache enable bit in MXCC control register */
198 		stda(MXCC_CTRLREG, ASI_CONTROL,
199 		     ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
200 		cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */
201 		CACHEINFO.ec_enabled = 1;
202 	}
203 }
204 
205 void
hypersparc_cache_enable(void)206 hypersparc_cache_enable(void)
207 {
208 	int i, ls, ts;
209 	u_int pcr, v;
210 	int alias_dist;
211 
212 	/*
213 	 * Setup the anti-aliasing constants and DVMA alignment constraint.
214 	 */
215 	alias_dist = CACHEINFO.c_totalsize;
216 	if (alias_dist > cache_alias_dist) {
217 		cache_alias_dist = alias_dist;
218 		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
219 		dvma_cachealign = cache_alias_dist;
220 	}
221 
222 	ls = CACHEINFO.c_linesize;
223 	ts = CACHEINFO.c_totalsize;
224 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
225 
226 	/* Now reset cache tag memory if cache not yet enabled */
227 	if ((pcr & HYPERSPARC_PCR_CE) == 0)
228 		for (i = 0; i < ts; i += ls)
229 			sta(i, ASI_DCACHETAG, 0);
230 
231 	pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
232 	hypersparc_cache_flush_all();
233 
234 	/* Enable write-back cache */
235 	pcr |= HYPERSPARC_PCR_CE;
236 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
237 		pcr |= HYPERSPARC_PCR_CM;
238 
239 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
240 	CACHEINFO.c_enabled = 1;
241 
242 	/* XXX: should add support */
243 	if (CACHEINFO.c_hwflush)
244 		panic("cache_enable: can't handle 4M with hw-flush cache");
245 
246 	/*
247 	 * Enable instruction cache and, on single-processor machines,
248 	 * disable `Unimplemented Flush Traps'.
249 	 */
250 	v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0);
251 	wrasr(v, HYPERSPARC_ASRNUM_ICCR);
252 }
253 
254 
255 void
swift_cache_enable(void)256 swift_cache_enable(void)
257 {
258 	int i, ls, ts;
259 	u_int pcr;
260 
261 	cache_alias_dist = max(
262 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
263 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
264 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
265 
266 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
267 
268 	/* Now reset cache tag memory if cache not yet enabled */
269 	ls = CACHEINFO.ic_linesize;
270 	ts = CACHEINFO.ic_totalsize;
271 	if ((pcr & SWIFT_PCR_ICE) == 0)
272 		for (i = 0; i < ts; i += ls)
273 			sta(i, ASI_ICACHETAG, 0);
274 
275 	ls = CACHEINFO.dc_linesize;
276 	ts = CACHEINFO.dc_totalsize;
277 	if ((pcr & SWIFT_PCR_DCE) == 0)
278 		for (i = 0; i < ts; i += ls)
279 			sta(i, ASI_DCACHETAG, 0);
280 
281 	pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
282 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
283 	CACHEINFO.c_enabled = 1;
284 }
285 
286 void
cypress_cache_enable(void)287 cypress_cache_enable(void)
288 {
289 	int i, ls, ts;
290 	u_int pcr;
291 	int alias_dist;
292 
293 	alias_dist = CACHEINFO.c_totalsize;
294 	if (alias_dist > cache_alias_dist) {
295 		cache_alias_dist = alias_dist;
296 		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
297 		dvma_cachealign = alias_dist;
298 	}
299 
300 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
301 	pcr &= ~CYPRESS_PCR_CM;
302 
303 	/* Now reset cache tag memory if cache not yet enabled */
304 	ls = CACHEINFO.c_linesize;
305 	ts = CACHEINFO.c_totalsize;
306 	if ((pcr & CYPRESS_PCR_CE) == 0)
307 		for (i = 0; i < ts; i += ls)
308 			sta(i, ASI_DCACHETAG, 0);
309 
310 	pcr |= CYPRESS_PCR_CE;
311 	/* If put in write-back mode, turn it on */
312 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
313 		pcr |= CYPRESS_PCR_CM;
314 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
315 	CACHEINFO.c_enabled = 1;
316 }
317 
318 void
turbosparc_cache_enable(void)319 turbosparc_cache_enable(void)
320 {
321 	int i, ls, ts;
322 	u_int pcr, pcf;
323 	/* External cache sizes in KB; see Turbo sparc manual */
324 	static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0};
325 
326 	cache_alias_dist = max(
327 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
328 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
329 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
330 
331 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
332 
333 	/* Now reset cache tag memory if cache not yet enabled */
334 	ls = CACHEINFO.ic_linesize;
335 	ts = CACHEINFO.ic_totalsize;
336 	if ((pcr & TURBOSPARC_PCR_ICE) == 0)
337 		for (i = 0; i < ts; i += ls)
338 			sta(i, ASI_ICACHETAG, 0);
339 
340 	ls = CACHEINFO.dc_linesize;
341 	ts = CACHEINFO.dc_totalsize;
342 	if ((pcr & TURBOSPARC_PCR_DCE) == 0)
343 		for (i = 0; i < ts; i += ls)
344 			sta(i, ASI_DCACHETAG, 0);
345 
346 	pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
347 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
348 
349 	pcf = lda(SRMMU_PCFG, ASI_SRMMU);
350 	if (pcf & TURBOSPARC_PCFG_SE) {
351 		/*
352 		 * Record external cache info. The Turbosparc's second-
353 		 * level cache is physically addressed/tagged and is
354 		 * not exposed by the PROM.
355 		 */
356 		CACHEINFO.ec_totalsize = 1024 *
357 			ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)];
358 		CACHEINFO.ec_linesize = 32;
359 	}
360 	if (pcf & TURBOSPARC_PCFG_SNP)
361 		printf(": DVMA coherent ");
362 
363 	CACHEINFO.c_enabled = 1;
364 }
365 #endif /* SUN4M || SUN4D */
366 
367 
368 /*
369  * Note: the sun4 & sun4c the cache flush functions ignore the `ctx'
370  * parameter. This can be done since the pmap operations that need
371  * to flush cache lines will already have switched to the proper
372  * context to manipulate the MMU. Hence we can avoid the overhead
373  * if saving and restoring the context here.
374  */
375 
376 /*
377  * Flush the current context from the cache.
378  *
379  * This is done by writing to each cache line in the `flush context'
380  * address space (or, for hardware flush, once to each page in the
381  * hardware flush space, for all cache pages).
382  */
383 void
sun4_vcache_flush_context(int ctx)384 sun4_vcache_flush_context(int ctx)
385 {
386 	char *p;
387 	int i, ls;
388 
389 	vcache_flush_ctx.ev_count++;
390 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
391 	if (CACHEINFO.c_hwflush) {
392 		ls = PAGE_SIZE;
393 		i = CACHEINFO.c_totalsize >> PGSHIFT;
394 		for (; --i >= 0; p += ls)
395 			sta(p, ASI_HWFLUSHCTX, 0);
396 	} else {
397 		ls = CACHEINFO.c_linesize;
398 		i = CACHEINFO.c_nlines;
399 		for (; --i >= 0; p += ls)
400 			sta(p, ASI_FLUSHCTX, 0);
401 	}
402 }
403 
404 /*
405  * Flush the given virtual region from the cache.
406  *
407  * This is also done by writing to each cache line, except that
408  * now the addresses must include the virtual region number, and
409  * we use the `flush region' space.
410  *
411  * This function is only called on sun4's with 3-level MMUs; there's
412  * no hw-flush space.
413  */
414 void
sun4_vcache_flush_region(int vreg,int ctx)415 sun4_vcache_flush_region(int vreg, int ctx)
416 {
417 	int i, ls;
418 	char *p;
419 
420 	vcache_flush_reg.ev_count++;
421 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
422 	ls = CACHEINFO.c_linesize;
423 	i = CACHEINFO.c_nlines;
424 	for (; --i >= 0; p += ls)
425 		sta(p, ASI_FLUSHREG, 0);
426 }
427 
428 /*
429  * Flush the given virtual segment from the cache.
430  *
431  * This is also done by writing to each cache line, except that
432  * now the addresses must include the virtual segment number, and
433  * we use the `flush segment' space.
434  *
435  * Again, for hardware, we just write each page (in hw-flush space).
436  */
437 void
sun4_vcache_flush_segment(int vreg,int vseg,int ctx)438 sun4_vcache_flush_segment(int vreg, int vseg, int ctx)
439 {
440 	int i, ls;
441 	char *p;
442 
443 	vcache_flush_seg.ev_count++;
444 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
445 	if (CACHEINFO.c_hwflush) {
446 		ls = PAGE_SIZE;
447 		i = CACHEINFO.c_totalsize >> PGSHIFT;
448 		for (; --i >= 0; p += ls)
449 			sta(p, ASI_HWFLUSHSEG, 0);
450 	} else {
451 		ls = CACHEINFO.c_linesize;
452 		i = CACHEINFO.c_nlines;
453 		for (; --i >= 0; p += ls)
454 			sta(p, ASI_FLUSHSEG, 0);
455 	}
456 }
457 
458 /*
459  * Flush the given virtual page from the cache.
460  * (va is the actual address, and must be aligned on a page boundary.)
461  * Again we write to each cache line.
462  */
463 void
sun4_vcache_flush_page(int va,int ctx)464 sun4_vcache_flush_page(int va, int ctx)
465 {
466 	int i, ls;
467 	char *p;
468 
469 #ifdef DEBUG
470 	if (va & PGOFSET)
471 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
472 #endif
473 
474 	vcache_flush_pg.ev_count++;
475 	p = (char *)va;
476 	ls = CACHEINFO.c_linesize;
477 	i = PAGE_SIZE >> CACHEINFO.c_l2linesize;
478 	for (; --i >= 0; p += ls)
479 		sta(p, ASI_FLUSHPG, 0);
480 }
481 
482 /*
483  * Flush the given virtual page from the cache.
484  * (va is the actual address, and must be aligned on a page boundary.)
485  * This version uses hardware-assisted flush operation and just needs
486  * one write into ASI_HWFLUSHPG space to flush all cache lines.
487  */
488 void
sun4_vcache_flush_page_hw(int va,int ctx)489 sun4_vcache_flush_page_hw(int va, int ctx)
490 {
491 	char *p;
492 
493 #ifdef DEBUG
494 	if (va & PGOFSET)
495 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
496 #endif
497 
498 	vcache_flush_pg.ev_count++;
499 	p = (char *)va;
500 	sta(p, ASI_HWFLUSHPG, 0);
501 }
502 
503 /*
504  * Flush a range of virtual addresses (in the current context).
505  * The first byte is at (base&~PGOFSET) and the last one is just
506  * before byte (base+len).
507  *
508  * We choose the best of (context,segment,page) here.
509  */
510 
511 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / PAGE_SIZE)
512 
513 void
sun4_cache_flush(void * base,u_int len)514 sun4_cache_flush(void *base, u_int len)
515 {
516 	int i, ls, baseoff;
517 	char *p;
518 
519 	if (CACHEINFO.c_vactype == VAC_NONE)
520 		return;
521 
522 	/*
523 	 * Figure out how much must be flushed.
524 	 *
525 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
526 	 * in the same number of loop iterations.  We can also do the whole
527 	 * region. If we need to do between 2 and NSEGRG, do the region.
528 	 * If we need to do two or more regions, just go ahead and do the
529 	 * whole context. This might not be ideal (e.g., fsck likes to do
530 	 * 65536-byte reads, which might not necessarily be aligned).
531 	 *
532 	 * We could try to be sneaky here and use the direct mapping
533 	 * to avoid flushing things `below' the start and `above' the
534 	 * ending address (rather than rounding to whole pages and
535 	 * segments), but I did not want to debug that now and it is
536 	 * not clear it would help much.
537 	 *
538 	 * (XXX the magic number 16 is now wrong, must review policy)
539 	 */
540 	baseoff = (int)base & PGOFSET;
541 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
542 
543 	vcache_flush_range.ev_count++;
544 
545 	if (__predict_true(i < CACHE_FLUSH_MAGIC)) {
546 		/* cache_flush_page, for i pages */
547 		p = (char *)((int)base & ~baseoff);
548 		if (CACHEINFO.c_hwflush) {
549 			for (; --i >= 0; p += PAGE_SIZE)
550 				sta(p, ASI_HWFLUSHPG, 0);
551 		} else {
552 			ls = CACHEINFO.c_linesize;
553 			i <<= PGSHIFT - CACHEINFO.c_l2linesize;
554 			for (; --i >= 0; p += ls)
555 				sta(p, ASI_FLUSHPG, 0);
556 		}
557 		return;
558 	}
559 
560 	baseoff = (u_int)base & SGOFSET;
561 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
562 	if (__predict_true(i == 1)) {
563 		sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0);
564 		return;
565 	}
566 
567 	if (HASSUN4_MMU3L) {
568 		baseoff = (u_int)base & RGOFSET;
569 		i = (baseoff + len + RGOFSET) >> RGSHIFT;
570 		if (i == 1)
571 			sun4_vcache_flush_region(VA_VREG(base), 0);
572 		else
573 			sun4_vcache_flush_context(0);
574 	} else
575 		sun4_vcache_flush_context(0);
576 }
577 
578 
579 #if defined(SUN4M) || defined(SUN4D)
580 #define trapoff()	do { setpsr(getpsr() & ~PSR_ET); } while(0)
581 #define trapon()	do { setpsr(getpsr() | PSR_ET); } while(0)
582 /*
583  * Flush the current context from the cache.
584  *
585  * This is done by writing to each cache line in the `flush context'
586  * address space.
587  */
588 void
srmmu_vcache_flush_context(int ctx)589 srmmu_vcache_flush_context(int ctx)
590 {
591 	int i, ls, octx;
592 	char *p;
593 
594 	vcache_flush_ctx.ev_count++;
595 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
596 	ls = CACHEINFO.c_linesize;
597 	i = CACHEINFO.c_nlines;
598 	octx = getcontext4m();
599 	trapoff();
600 	setcontext4m(ctx);
601 	for (; --i >= 0; p += ls)
602 		sta(p, ASI_IDCACHELFC, 0);
603 	setcontext4m(octx);
604 	trapon();
605 }
606 
607 /*
608  * Flush the given virtual region from the cache.
609  *
610  * This is also done by writing to each cache line, except that
611  * now the addresses must include the virtual region number, and
612  * we use the `flush region' space.
613  */
614 void
srmmu_vcache_flush_region(int vreg,int ctx)615 srmmu_vcache_flush_region(int vreg, int ctx)
616 {
617 	int i, ls, octx;
618 	char *p;
619 
620 	vcache_flush_reg.ev_count++;
621 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
622 	ls = CACHEINFO.c_linesize;
623 	i = CACHEINFO.c_nlines;
624 	octx = getcontext4m();
625 	trapoff();
626 	setcontext4m(ctx);
627 	for (; --i >= 0; p += ls)
628 		sta(p, ASI_IDCACHELFR, 0);
629 	setcontext4m(octx);
630 	trapon();
631 }
632 
633 /*
634  * Flush the given virtual segment from the cache.
635  *
636  * This is also done by writing to each cache line, except that
637  * now the addresses must include the virtual segment number, and
638  * we use the `flush segment' space.
639  *
640  * Again, for hardware, we just write each page (in hw-flush space).
641  */
642 void
srmmu_vcache_flush_segment(int vreg,int vseg,int ctx)643 srmmu_vcache_flush_segment(int vreg, int vseg, int ctx)
644 {
645 	int i, ls, octx;
646 	char *p;
647 
648 	vcache_flush_seg.ev_count++;
649 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
650 	ls = CACHEINFO.c_linesize;
651 	i = CACHEINFO.c_nlines;
652 	octx = getcontext4m();
653 	trapoff();
654 	setcontext4m(ctx);
655 	for (; --i >= 0; p += ls)
656 		sta(p, ASI_IDCACHELFS, 0);
657 	setcontext4m(octx);
658 	trapon();
659 }
660 
661 /*
662  * Flush the given virtual page from the cache.
663  * (va is the actual address, and must be aligned on a page boundary.)
664  * Again we write to each cache line.
665  */
666 void
srmmu_vcache_flush_page(int va,int ctx)667 srmmu_vcache_flush_page(int va, int ctx)
668 {
669 	int i, ls, octx;
670 	char *p;
671 
672 #ifdef DEBUG
673 	if (va & PGOFSET)
674 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
675 #endif
676 
677 	vcache_flush_pg.ev_count++;
678 	p = (char *)va;
679 
680 	/*
681 	 * XXX - if called early during bootstrap, we don't have the cache
682 	 *	 info yet. Make up a cache line size (double-word aligned)
683 	 */
684 	if ((ls = CACHEINFO.c_linesize) == 0)
685 		ls = 8;
686 	i = PAGE_SIZE;
687 	octx = getcontext4m();
688 	trapoff();
689 	setcontext4m(ctx);
690 	for (; i > 0; p += ls, i -= ls)
691 		sta(p, ASI_IDCACHELFP, 0);
692 #if defined(MULTIPROCESSOR)
693 	/*
694 	 * The page flush operation will have caused a MMU table walk
695 	 * on Hypersparc because the is physically tagged. Since the pmap
696 	 * functions will not always cross flush it in the MP case (because
697 	 * may not be active on this CPU) we flush the TLB entry now.
698 	 */
699 	/*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */
700 		sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
701 
702 #endif
703 	setcontext4m(octx);
704 	trapon();
705 }
706 
707 /*
708  * Flush entire cache.
709  */
710 void
srmmu_cache_flush_all(void)711 srmmu_cache_flush_all(void)
712 {
713 
714 	srmmu_vcache_flush_context(0);
715 }
716 
717 void
srmmu_vcache_flush_range(int va,int len,int ctx)718 srmmu_vcache_flush_range(int va, int len, int ctx)
719 {
720 	int i, ls, offset;
721 	char *p;
722 	int octx;
723 
724 	/*
725 	 * XXX - if called early during bootstrap, we don't have the cache
726 	 *	 info yet. Make up a cache line size (double-word aligned)
727 	 */
728 	if ((ls = CACHEINFO.c_linesize) == 0)
729 		ls = 8;
730 
731 	vcache_flush_range.ev_count++;
732 
733 	/* Compute # of cache lines covered by this range */
734 	offset = va & (ls - 1);
735 	i = len + offset;
736 	p = (char *)(va & ~(ls - 1));
737 
738 	octx = getcontext4m();
739 	trapoff();
740 	setcontext4m(ctx);
741 	for (; i > 0; p += ls, i -= ls)
742 		sta(p, ASI_IDCACHELFP, 0);
743 
744 #if defined(MULTIPROCESSOR)
745 	if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) {
746 		/*
747 		 * See hypersparc comment in srmmu_vcache_flush_page().
748 		 */
749 		offset = va & PGOFSET;
750 		i = (offset + len + PGOFSET) >> PGSHIFT;
751 
752 		va = va & ~PGOFSET;
753 		for (; --i >= 0; va += PAGE_SIZE)
754 			sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
755 	}
756 #endif
757 	setcontext4m(octx);
758 	trapon();
759 	return;
760 }
761 
762 /*
763  * Flush a range of virtual addresses (in the current context).
764  *
765  * We choose the best of (context,segment,page) here.
766  */
767 
768 void
srmmu_cache_flush(void * base,u_int len)769 srmmu_cache_flush(void *base, u_int len)
770 {
771 	int ctx = getcontext4m();
772 	int i, baseoff;
773 
774 
775 	/*
776 	 * Figure out the most efficient way to flush.
777 	 *
778 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
779 	 * in the same number of loop iterations.  We can also do the whole
780 	 * region. If we need to do between 2 and NSEGRG, do the region.
781 	 * If we need to do two or more regions, just go ahead and do the
782 	 * whole context. This might not be ideal (e.g., fsck likes to do
783 	 * 65536-byte reads, which might not necessarily be aligned).
784 	 *
785 	 * We could try to be sneaky here and use the direct mapping
786 	 * to avoid flushing things `below' the start and `above' the
787 	 * ending address (rather than rounding to whole pages and
788 	 * segments), but I did not want to debug that now and it is
789 	 * not clear it would help much.
790 	 *
791 	 */
792 
793 	if (__predict_true(len < CACHEINFO.c_totalsize)) {
794 #if defined(MULTIPROCESSOR)
795 		FXCALL3(cpuinfo.sp_vcache_flush_range,
796 			cpuinfo.ft_vcache_flush_range,
797 			(int)base, len, ctx, CPUSET_ALL);
798 #else
799 		cpuinfo.sp_vcache_flush_range((int)base, len, ctx);
800 #endif
801 		return;
802 	}
803 
804 	baseoff = (u_int)base & SGOFSET;
805 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
806 	if (__predict_true(i == 1)) {
807 #if defined(MULTIPROCESSOR)
808 		FXCALL3(cpuinfo.sp_vcache_flush_segment,
809 			cpuinfo.ft_vcache_flush_segment,
810 			VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL);
811 #else
812 		srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx);
813 #endif
814 		return;
815 	}
816 
817 	baseoff = (u_int)base & RGOFSET;
818 	i = (baseoff + len + RGOFSET) >> RGSHIFT;
819 	while (i--) {
820 #if defined(MULTIPROCESSOR)
821 		FXCALL2(cpuinfo.sp_vcache_flush_region,
822 		       cpuinfo.ft_vcache_flush_region,
823 		       VA_VREG(base), ctx, CPUSET_ALL);
824 #else
825 		srmmu_vcache_flush_region(VA_VREG(base), ctx);
826 #endif
827 		base = ((char *)base + NBPRG);
828 	}
829 }
830 
831 int ms1_cacheflush_magic = 0;
832 #define MS1_CACHEFLUSH_MAGIC	ms1_cacheflush_magic
833 
834 void
ms1_cache_flush(void * base,u_int len)835 ms1_cache_flush(void *base, u_int len)
836 {
837 
838 	/*
839 	 * Although physically tagged, we still need to flush the
840 	 * data cache after (if we have a write-through cache) or before
841 	 * (in case of write-back caches) DMA operations.
842 	 */
843 
844 #if MS1_CACHEFLUSH_MAGIC
845 	if (len <= MS1_CACHEFLUSH_MAGIC) {
846 		/*
847 		 * If the range to be flushed is sufficiently small
848 		 * invalidate the covered cache lines by hand.
849 		 *
850 		 * The MicroSPARC I has a direct-mapped virtually addressed
851 		 * physically tagged data cache which is organised as
852 		 * 128 lines of 16 bytes. Virtual address bits [4-10]
853 		 * select the cache line. The cache tags are accessed
854 		 * through the standard DCACHE control space using the
855 		 * same address bits as those used to select the cache
856 		 * line in the virtual address.
857 		 *
858 		 * Note: we don't bother to compare the actual tags
859 		 * since that would require looking up physical addresses.
860 		 *
861 		 * The format of the tags we read from ASI_DCACHE control
862 		 * space is:
863 		 *
864 		 * 31     27 26            11 10         1 0
865 		 * +--------+----------------+------------+-+
866 		 * |  xxx   |    PA[26-11]   |    xxx     |V|
867 		 * +--------+----------------+------------+-+
868 		 *
869 		 * PA: bits 11-26 of the physical address
870 		 * V:  line valid bit
871 		 */
872 		int tagaddr = ((u_int)base & 0x7f0);
873 
874 		len = roundup(len, 16);
875 		while (len != 0) {
876 			int tag = lda(tagaddr, ASI_DCACHETAG);
877 			if ((tag & 1) == 1) {
878 				/* Mark this cache line invalid */
879 				sta(tagaddr, ASI_DCACHETAG, 0);
880 			}
881 			len -= 16;
882 			tagaddr = (tagaddr + 16) & 0x7f0;
883 		}
884 	} else
885 #endif
886 		/* Flush entire data cache */
887 		sta(0, ASI_DCACHECLR, 0);
888 }
889 
890 
891 /*
892  * Flush entire cache.
893  */
894 void
ms1_cache_flush_all(void)895 ms1_cache_flush_all(void)
896 {
897 
898 	/* Flash-clear both caches */
899 	sta(0, ASI_ICACHECLR, 0);
900 	sta(0, ASI_DCACHECLR, 0);
901 }
902 
903 void
hypersparc_cache_flush_all(void)904 hypersparc_cache_flush_all(void)
905 {
906 
907 	srmmu_vcache_flush_context(getcontext4m());
908 	/* Flush instruction cache */
909 	hypersparc_pure_vcache_flush();
910 }
911 
912 void
cypress_cache_flush_all(void)913 cypress_cache_flush_all(void)
914 {
915 	extern char kernel_text[];
916 
917 	char *p;
918 	int i, ls;
919 
920 	/* Fill the cache with known read-only content */
921 	p = (char *)kernel_text;
922 	ls = CACHEINFO.c_linesize;
923 	i = CACHEINFO.c_nlines;
924 	for (; --i >= 0; p += ls)
925 		(*(volatile char *)p);
926 }
927 
928 
929 void
viking_cache_flush(void * base,u_int len)930 viking_cache_flush(void *base, u_int len)
931 {
932 }
933 
934 void
viking_pcache_flush_page(paddr_t pa,int invalidate_only)935 viking_pcache_flush_page(paddr_t pa, int invalidate_only)
936 {
937 	int set, i;
938 
939 	/*
940 	 * The viking's on-chip data cache is 4-way set associative,
941 	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
942 	 * Note that one 4096 byte page exactly covers all 128 sets
943 	 * in the cache.
944 	 */
945 	if (invalidate_only) {
946 		u_int pa_tag = (pa >> 12);
947 		u_int tagaddr;
948 		uint64_t tag;
949 
950 		/*
951 		 * Loop over all sets and invalidate all entries tagged
952 		 * with the given physical address by resetting the cache
953 		 * tag in ASI_DCACHETAG control space.
954 		 *
955 		 * The address format for accessing a tag is:
956 		 *
957 		 * 31   30      27   26                  11      5 4  3 2    0
958 		 * +------+-----+------+-------//--------+--------+----+-----+
959 		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
960 		 * +------+-----+------+-------//--------+--------+----+-----+
961 		 *
962 		 * set:  the cache set tag to be read (0-127)
963 		 * line: the line within the set (0-3)
964 		 * type: 1: read set tag; 2: read physical tag
965 		 *
966 		 * The (type 2) tag read from this address is a 64-bit word
967 		 * formatted as follows:
968 		 *
969 		 *          5         4         4
970 		 * 63       6         8         0            23               0
971 		 * +-------+-+-------+-+-------+-+-----------+----------------+
972 		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
973 		 * +-------+-+-------+-+-------+-+-----------+----------------+
974 		 *
975 		 * PA: bits 12-35 of the physical address
976 		 * S:  line shared bit
977 		 * D:  line dirty bit
978 		 * V:  line valid bit
979 		 */
980 
981 #define VIKING_DCACHETAG_S	0x0000010000000000ULL	/* line valid bit */
982 #define VIKING_DCACHETAG_D	0x0001000000000000ULL	/* line dirty bit */
983 #define VIKING_DCACHETAG_V	0x0100000000000000ULL	/* line shared bit */
984 #define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffULL	/* PA tag field */
985 
986 		for (set = 0; set < 128; set++) {
987 			/* Set set number and access type */
988 			tagaddr = (set << 5) | (2 << 30);
989 
990 			/* Examine the tag for each line in the set */
991 			for (i = 0 ; i < 4; i++) {
992 				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
993 				/*
994 				 * If this is a valid tag and the PA field
995 				 * matches clear the tag.
996 				 */
997 				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
998 				    (tag & VIKING_DCACHETAG_V) != 0)
999 					stda(tagaddr | (i << 26),
1000 					     ASI_DCACHETAG, 0);
1001 			}
1002 		}
1003 
1004 	} else {
1005 		extern char kernel_text[];
1006 
1007 		/*
1008 		 * Force the cache to validate its backing memory
1009 		 * by displacing all cache lines with known read-only
1010 		 * content from the start of kernel text.
1011 		 *
1012 		 * Note that this thrashes the entire cache. However,
1013 		 * we currently only need to call upon this code
1014 		 * once at boot time.
1015 		 */
1016 		for (set = 0; set < 128; set++) {
1017 			int *v = (int *)(kernel_text + (set << 5));
1018 
1019 			/*
1020 			 * We need to read (2*associativity-1) different
1021 			 * locations to be sure to displace the entire set.
1022 			 */
1023 			i = 2 * 4 - 1;
1024 			while (i--) {
1025 				(*(volatile int *)v);
1026 				v += 4096;
1027 			}
1028 		}
1029 	}
1030 }
1031 #endif /* SUN4M || SUN4D */
1032 
1033 
1034 #if defined(MULTIPROCESSOR)
1035 /*
1036  * Cache flushing on multi-processor systems involves sending
1037  * inter-processor messages to flush the cache on each module.
1038  *
1039  * The current context of the originating processor is passed in the
1040  * message. This assumes the allocation of CPU contextses is a global
1041  * operation (remember that the actual context tables for the CPUs
1042  * are distinct).
1043  */
1044 
1045 void
smp_vcache_flush_page(int va,int ctx)1046 smp_vcache_flush_page(int va, int ctx)
1047 {
1048 
1049 	FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page,
1050 		va, ctx, CPUSET_ALL);
1051 }
1052 
1053 void
smp_vcache_flush_segment(int vr,int vs,int ctx)1054 smp_vcache_flush_segment(int vr, int vs, int ctx)
1055 {
1056 
1057 	FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment,
1058 		vr, vs, ctx, CPUSET_ALL);
1059 }
1060 
1061 void
smp_vcache_flush_region(int vr,int ctx)1062 smp_vcache_flush_region(int vr, int ctx)
1063 {
1064 
1065 	FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region,
1066 		vr, ctx, CPUSET_ALL);
1067 }
1068 
1069 void
smp_vcache_flush_context(int ctx)1070 smp_vcache_flush_context(int ctx)
1071 {
1072 
1073 	FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context,
1074 		ctx, CPUSET_ALL);
1075 }
1076 #endif /* MULTIPROCESSOR */
1077