xref: /netbsd/sys/arch/mips/mips/cache_r5k.c (revision c4a72b64)
1 /*	$NetBSD: cache_r5k.c,v 1.6 2002/11/07 23:03:21 cgd Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/param.h>
39 
40 #include <mips/cache.h>
41 #include <mips/cache_r4k.h>
42 #include <mips/locore.h>
43 
44 /*
45  * Cache operations for R5000-style caches:
46  *
47  *	- 2-way set-associative
48  *	- Write-back
49  *	- Virtually indexed, physically tagged
50  *
51  * Since the R4600 is so similar (2-way set-associative, 32b/l),
52  * we handle that here, too.  Note for R4600, we have to work
53  * around some chip bugs.  From the v1.7 errata:
54  *
55  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
56  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
57  *      executed if there is no other dcache activity. If the dcache is
58  *      accessed for another instruction immeidately preceding when these
59  *      cache instructions are executing, it is possible that the dcache
60  *      tag match outputs used by these cache instructions will be
61  *      incorrect. These cache instructions should be preceded by at least
62  *      four instructions that are not any kind of load or store
63  *      instruction.
64  *
65  * ...and from the v2.0 errata:
66  *
67  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
68  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
69  *   correctly if the internal data cache refill buffer is empty.  These
70  *   CACHE instructions should be separated from any potential data cache
71  *   miss by a load instruction to an uncached address to empty the response
72  *   buffer.
73  *
74  * XXX Does not handle split secondary caches.
75  */
76 
77 #define	round_line16(x)		(((x) + 15) & ~15)
78 #define	trunc_line16(x)		((x) & ~15)
79 #define	round_line(x)		(((x) + 31) & ~31)
80 #define	trunc_line(x)		((x) & ~31)
81 
82 __asm(".set mips3");
83 
84 void
85 r5k_icache_sync_all_32(void)
86 {
87 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
88 	vaddr_t eva = va + mips_picache_size;
89 
90 	/*
91 	 * Since we're hitting the whole thing, we don't have to
92 	 * worry about the 2 different "ways".
93 	 */
94 
95 	mips_dcache_wbinv_all();
96 
97 	__asm __volatile("sync");
98 
99 	while (va < eva) {
100 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
101 		va += (32 * 32);
102 	}
103 }
104 
105 void
106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
107 {
108 	vaddr_t eva = round_line(va + size);
109 
110 	va = trunc_line(va);
111 
112 	mips_dcache_wb_range(va, (eva - va));
113 
114 	__asm __volatile("sync");
115 
116 	while ((eva - va) >= (32 * 32)) {
117 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
118 		va += (32 * 32);
119 	}
120 
121 	while (va < eva) {
122 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
123 		va += 32;
124 	}
125 }
126 
127 void
128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
129 {
130 	vaddr_t w2va, eva, orig_va;
131 
132 	orig_va = va;
133 
134 	eva = round_line(va + size);
135 	va = trunc_line(va);
136 
137 	mips_dcache_wbinv_range_index(va, (eva - va));
138 
139 	__asm __volatile("sync");
140 
141 	/*
142 	 * Since we're doing Index ops, we expect to not be able
143 	 * to access the address we've been given.  So, get the
144 	 * bits that determine the cache index, and make a KSEG0
145 	 * address out of them.
146 	 */
147 	va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
148 
149 	eva = round_line(va + size);
150 	va = trunc_line(va);
151 	w2va = va + mips_picache_way_size;
152 
153 	while ((eva - va) >= (16 * 32)) {
154 		cache_r4k_op_16lines_32_2way(va, w2va,
155 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
156 		va += (16 * 32);
157 		w2va += (16 * 32);
158 	}
159 
160 	while (va < eva) {
161 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
162 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
163 		va += 32;
164 		w2va += 32;
165 	}
166 }
167 
168 void
169 r5k_pdcache_wbinv_all_16(void)
170 {
171 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
172 	vaddr_t eva = va + mips_pdcache_size;
173 
174 	/*
175 	 * Since we're hitting the whole thing, we don't have to
176 	 * worry about the 2 different "ways".
177 	 */
178 
179 	while (va < eva) {
180 		cache_r4k_op_32lines_16(va,
181 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
182 		va += (32 * 16);
183 	}
184 }
185 
186 void
187 r5k_pdcache_wbinv_all_32(void)
188 {
189 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
190 	vaddr_t eva = va + mips_pdcache_size;
191 
192 	/*
193 	 * Since we're hitting the whole thing, we don't have to
194 	 * worry about the 2 different "ways".
195 	 */
196 
197 	while (va < eva) {
198 		cache_r4k_op_32lines_32(va,
199 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
200 		va += (32 * 32);
201 	}
202 }
203 
204 void
205 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
206 {
207 	vaddr_t eva = round_line(va + size);
208 	uint32_t ostatus;
209 
210 	/*
211 	 * This is pathetically slow, but the chip bug is pretty
212 	 * nasty, and we hope that not too many v1.x R4600s are
213 	 * around.
214 	 */
215 
216 	va = trunc_line(va);
217 
218 	/*
219 	 * To make this a little less painful, just hit the entire
220 	 * cache if we have a range >= the cache size.
221 	 */
222 	if ((eva - va) >= mips_pdcache_size) {
223 		r5k_pdcache_wbinv_all_32();
224 		return;
225 	}
226 
227 	ostatus = mips_cp0_status_read();
228 
229 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
230 
231 	while (va < eva) {
232 		__asm __volatile("nop; nop; nop; nop;");
233 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
234 		va += 32;
235 	}
236 
237 	mips_cp0_status_write(ostatus);
238 }
239 
240 void
241 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
242 {
243 	vaddr_t eva = round_line(va + size);
244 	uint32_t ostatus;
245 
246 	va = trunc_line(va);
247 
248 	ostatus = mips_cp0_status_read();
249 
250 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
251 
252 	while ((eva - va) >= (32 * 32)) {
253 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
254 		cache_r4k_op_32lines_32(va,
255 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
256 		va += (32 * 32);
257 	}
258 
259 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
260 	while (va < eva) {
261 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
262 		va += 32;
263 	}
264 
265 	mips_cp0_status_write(ostatus);
266 }
267 
268 void
269 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
270 {
271 	vaddr_t eva = round_line16(va + size);
272 
273 	va = trunc_line16(va);
274 
275 	while ((eva - va) >= (32 * 16)) {
276 		cache_r4k_op_32lines_16(va,
277 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
278 		cache_r4k_op_32lines_16(va,
279 		    CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
280 		va += (32 * 16);
281 	}
282 
283 	while (va < eva) {
284 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
285 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
286 		va += 16;
287 	}
288 }
289 
290 void
291 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
292 {
293 	vaddr_t eva = round_line16(va + size);
294 
295 	va = trunc_line16(va);
296 
297 	while ((eva - va) >= (32 * 16)) {
298 		cache_r4k_op_32lines_16(va,
299 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
300 		va += (32 * 16);
301 	}
302 
303 	while (va < eva) {
304 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
305 		va += 16;
306 	}
307 }
308 
309 void
310 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
311 {
312 	vaddr_t eva = round_line(va + size);
313 
314 	va = trunc_line(va);
315 
316 	while ((eva - va) >= (32 * 32)) {
317 		cache_r4k_op_32lines_32(va,
318 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
319 		va += (32 * 32);
320 	}
321 
322 	while (va < eva) {
323 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
324 		va += 32;
325 	}
326 }
327 
328 void
329 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
330 {
331 	vaddr_t w2va, eva;
332 
333 	/*
334 	 * Since we're doing Index ops, we expect to not be able
335 	 * to access the address we've been given.  So, get the
336 	 * bits that determine the cache index, and make a KSEG0
337 	 * address out of them.
338 	 */
339 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
340 
341 	eva = round_line16(va + size);
342 	va = trunc_line16(va);
343 	w2va = va + mips_pdcache_way_size;
344 
345 	while ((eva - va) >= (16 * 16)) {
346 		cache_r4k_op_16lines_16_2way(va, w2va,
347 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
348 		va += (16 * 16);
349 		w2va += (16 * 16);
350 	}
351 
352 	while (va < eva) {
353 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
354 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
355 		va += 16;
356 		w2va += 16;
357 	}
358 }
359 
360 void
361 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
362 {
363 	vaddr_t w2va, eva;
364 
365 	/*
366 	 * Since we're doing Index ops, we expect to not be able
367 	 * to access the address we've been given.  So, get the
368 	 * bits that determine the cache index, and make a KSEG0
369 	 * address out of them.
370 	 */
371 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
372 
373 	eva = round_line(va + size);
374 	va = trunc_line(va);
375 	w2va = va + mips_pdcache_way_size;
376 
377 	while ((eva - va) >= (16 * 32)) {
378 		cache_r4k_op_16lines_32_2way(va, w2va,
379 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
380 		va += (16 * 32);
381 		w2va += (16 * 32);
382 	}
383 
384 	while (va < eva) {
385 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
386 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
387 		va += 32;
388 		w2va += 32;
389 	}
390 }
391 
392 void
393 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
394 {
395 	vaddr_t eva = round_line(va + size);
396 	uint32_t ostatus;
397 
398 	/*
399 	 * This is pathetically slow, but the chip bug is pretty
400 	 * nasty, and we hope that not too many v1.x R4600s are
401 	 * around.
402 	 */
403 
404 	va = trunc_line(va);
405 
406 	ostatus = mips_cp0_status_read();
407 
408 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
409 
410 	while (va < eva) {
411 		__asm __volatile("nop; nop; nop; nop;");
412 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
413 		va += 32;
414 	}
415 
416 	mips_cp0_status_write(ostatus);
417 }
418 
419 void
420 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
421 {
422 	vaddr_t eva = round_line(va + size);
423 	uint32_t ostatus;
424 
425 	va = trunc_line(va);
426 
427 	ostatus = mips_cp0_status_read();
428 
429 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
430 
431 	/*
432 	 * Between blasts of big cache chunks, give interrupts
433 	 * a chance to get though.
434 	 */
435 	while ((eva - va) >= (32 * 32)) {
436 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
437 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
438 		va += (32 * 32);
439 	}
440 
441 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
442 	while (va < eva) {
443 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
444 		va += 32;
445 	}
446 
447 	mips_cp0_status_write(ostatus);
448 }
449 
450 void
451 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
452 {
453 	vaddr_t eva = round_line16(va + size);
454 
455 	va = trunc_line16(va);
456 
457 	while ((eva - va) >= (32 * 16)) {
458 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
459 		va += (32 * 16);
460 	}
461 
462 	while (va < eva) {
463 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
464 		va += 16;
465 	}
466 }
467 
468 void
469 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
470 {
471 	vaddr_t eva = round_line(va + size);
472 
473 	va = trunc_line(va);
474 
475 	while ((eva - va) >= (32 * 32)) {
476 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
477 		va += (32 * 32);
478 	}
479 
480 	while (va < eva) {
481 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
482 		va += 32;
483 	}
484 }
485 
486 void
487 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
488 {
489 	vaddr_t eva = round_line(va + size);
490 	uint32_t ostatus;
491 
492 	/*
493 	 * This is pathetically slow, but the chip bug is pretty
494 	 * nasty, and we hope that not too many v1.x R4600s are
495 	 * around.
496 	 */
497 
498 	va = trunc_line(va);
499 
500 	ostatus = mips_cp0_status_read();
501 
502 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
503 
504 	while (va < eva) {
505 		__asm __volatile("nop; nop; nop; nop;");
506 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
507 		va += 32;
508 	}
509 
510 	mips_cp0_status_write(ostatus);
511 }
512 
513 void
514 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
515 {
516 	vaddr_t eva = round_line(va + size);
517 	uint32_t ostatus;
518 
519 	va = trunc_line(va);
520 
521 	ostatus = mips_cp0_status_read();
522 
523 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
524 
525 	/*
526 	 * Between blasts of big cache chunks, give interrupts
527 	 * a chance to get though.
528 	 */
529 	while ((eva - va) >= (32 * 32)) {
530 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
531 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
532 		va += (32 * 32);
533 	}
534 
535 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
536 	while (va < eva) {
537 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
538 		va += 32;
539 	}
540 
541 	mips_cp0_status_write(ostatus);
542 }
543 
544 void
545 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
546 {
547 	vaddr_t eva = round_line16(va + size);
548 
549 	va = trunc_line16(va);
550 
551 	while ((eva - va) >= (32 * 16)) {
552 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
553 		va += (32 * 16);
554 	}
555 
556 	while (va < eva) {
557 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
558 		va += 16;
559 	}
560 }
561 
562 void
563 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
564 {
565 	vaddr_t eva = round_line(va + size);
566 
567 	va = trunc_line(va);
568 
569 	while ((eva - va) >= (32 * 32)) {
570 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
571 		va += (32 * 32);
572 	}
573 
574 	while (va < eva) {
575 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
576 		va += 32;
577 	}
578 }
579 
580 #undef round_line16
581 #undef trunc_line16
582 #undef round_line
583 #undef trunc_line
584