xref: /netbsd/sys/arch/mips/mips/cache_r5k.c (revision bf9ec67e)
1 /*	$NetBSD: cache_r5k.c,v 1.5 2002/01/19 04:25:37 shin Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/param.h>
39 
40 #include <mips/cache.h>
41 #include <mips/cache_r4k.h>
42 #include <mips/locore.h>
43 
44 /*
45  * Cache operations for R5000-style caches:
46  *
47  *	- 2-way set-associative
48  *	- Write-back
49  *	- Virtually indexed, physically tagged
50  *
51  * Since the R4600 is so similar (2-way set-associative, 32b/l),
52  * we handle that here, too.  Note for R4600, we have to work
53  * around some chip bugs.  From the v1.7 errata:
54  *
55  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
56  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
57  *      executed if there is no other dcache activity. If the dcache is
58  *      accessed for another instruction immeidately preceding when these
59  *      cache instructions are executing, it is possible that the dcache
60  *      tag match outputs used by these cache instructions will be
61  *      incorrect. These cache instructions should be preceded by at least
62  *      four instructions that are not any kind of load or store
63  *      instruction.
64  *
65  * ...and from the v2.0 errata:
66  *
67  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
68  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
69  *   correctly if the internal data cache refill buffer is empty.  These
70  *   CACHE instructions should be separated from any potential data cache
71  *   miss by a load instruction to an uncached address to empty the response
72  *   buffer.
73  *
74  * XXX Does not handle split secondary caches.
75  */
76 
77 #define	round_line16(x)		(((x) + 15) & ~15)
78 #define	trunc_line16(x)		((x) & ~15)
79 #define	round_line(x)		(((x) + 31) & ~31)
80 #define	trunc_line(x)		((x) & ~31)
81 
82 __asm(".set mips3");
83 
84 void
85 r5k_icache_sync_all_32(void)
86 {
87 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
88 	vaddr_t eva = va + mips_picache_size;
89 
90 	/*
91 	 * Since we're hitting the whole thing, we don't have to
92 	 * worry about the 2 different "ways".
93 	 */
94 
95 	mips_dcache_wbinv_all();
96 
97 	__asm __volatile("sync");
98 
99 	while (va < eva) {
100 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
101 		va += (32 * 32);
102 	}
103 }
104 
105 void
106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
107 {
108 	vaddr_t eva = round_line(va + size);
109 
110 	va = trunc_line(va);
111 
112 	mips_dcache_wb_range(va, (eva - va));
113 
114 	__asm __volatile("sync");
115 
116 	while ((eva - va) >= (32 * 32)) {
117 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
118 		va += (32 * 32);
119 	}
120 
121 	while (va < eva) {
122 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
123 		va += 32;
124 	}
125 }
126 
127 void
128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
129 {
130 	vaddr_t w2va, eva;
131 
132 	eva = round_line(va + size);
133 	va = trunc_line(va);
134 
135 	mips_dcache_wbinv_range_index(va, (eva - va));
136 
137 	__asm __volatile("sync");
138 
139 	/*
140 	 * Since we're doing Index ops, we expect to not be able
141 	 * to access the address we've been given.  So, get the
142 	 * bits that determine the cache index, and make a KSEG0
143 	 * address out of them.
144 	 */
145 	va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask);
146 
147 	eva = round_line(va + size);
148 	va = trunc_line(va);
149 	w2va = va + mips_picache_way_size;
150 
151 	while ((eva - va) >= (16 * 32)) {
152 		cache_r4k_op_16lines_32_2way(va, w2va,
153 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
154 		va += (16 * 32);
155 		w2va += (16 * 32);
156 	}
157 
158 	while (va < eva) {
159 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
160 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
161 		va += 32;
162 		w2va += 32;
163 	}
164 }
165 
166 void
167 r5k_pdcache_wbinv_all_16(void)
168 {
169 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
170 	vaddr_t eva = va + mips_pdcache_size;
171 
172 	/*
173 	 * Since we're hitting the whole thing, we don't have to
174 	 * worry about the 2 different "ways".
175 	 */
176 
177 	while (va < eva) {
178 		cache_r4k_op_32lines_16(va,
179 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
180 		va += (32 * 16);
181 	}
182 }
183 
184 void
185 r5k_pdcache_wbinv_all_32(void)
186 {
187 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
188 	vaddr_t eva = va + mips_pdcache_size;
189 
190 	/*
191 	 * Since we're hitting the whole thing, we don't have to
192 	 * worry about the 2 different "ways".
193 	 */
194 
195 	while (va < eva) {
196 		cache_r4k_op_32lines_32(va,
197 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
198 		va += (32 * 32);
199 	}
200 }
201 
202 void
203 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
204 {
205 	vaddr_t eva = round_line(va + size);
206 	uint32_t ostatus;
207 
208 	/*
209 	 * This is pathetically slow, but the chip bug is pretty
210 	 * nasty, and we hope that not too many v1.x R4600s are
211 	 * around.
212 	 */
213 
214 	va = trunc_line(va);
215 
216 	/*
217 	 * To make this a little less painful, just hit the entire
218 	 * cache if we have a range >= the cache size.
219 	 */
220 	if ((eva - va) >= mips_pdcache_size) {
221 		r5k_pdcache_wbinv_all_32();
222 		return;
223 	}
224 
225 	ostatus = mips_cp0_status_read();
226 
227 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
228 
229 	while (va < eva) {
230 		__asm __volatile("nop; nop; nop; nop;");
231 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
232 		va += 32;
233 	}
234 
235 	mips_cp0_status_write(ostatus);
236 }
237 
238 void
239 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
240 {
241 	vaddr_t eva = round_line(va + size);
242 	uint32_t ostatus;
243 
244 	va = trunc_line(va);
245 
246 	ostatus = mips_cp0_status_read();
247 
248 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
249 
250 	while ((eva - va) >= (32 * 32)) {
251 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
252 		cache_r4k_op_32lines_32(va,
253 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
254 		va += (32 * 32);
255 	}
256 
257 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
258 	while (va < eva) {
259 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
260 		va += 32;
261 	}
262 
263 	mips_cp0_status_write(ostatus);
264 }
265 
266 void
267 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
268 {
269 	vaddr_t eva = round_line16(va + size);
270 
271 	va = trunc_line16(va);
272 
273 	while ((eva - va) >= (32 * 16)) {
274 		cache_r4k_op_32lines_16(va,
275 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
276 		cache_r4k_op_32lines_16(va,
277 		    CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
278 		va += (32 * 16);
279 	}
280 
281 	while (va < eva) {
282 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
283 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
284 		va += 16;
285 	}
286 }
287 
288 void
289 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
290 {
291 	vaddr_t eva = round_line16(va + size);
292 
293 	va = trunc_line16(va);
294 
295 	while ((eva - va) >= (32 * 16)) {
296 		cache_r4k_op_32lines_16(va,
297 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
298 		va += (32 * 16);
299 	}
300 
301 	while (va < eva) {
302 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
303 		va += 16;
304 	}
305 }
306 
307 void
308 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
309 {
310 	vaddr_t eva = round_line(va + size);
311 
312 	va = trunc_line(va);
313 
314 	while ((eva - va) >= (32 * 32)) {
315 		cache_r4k_op_32lines_32(va,
316 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
317 		va += (32 * 32);
318 	}
319 
320 	while (va < eva) {
321 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
322 		va += 32;
323 	}
324 }
325 
326 void
327 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
328 {
329 	vaddr_t w2va, eva;
330 
331 	/*
332 	 * Since we're doing Index ops, we expect to not be able
333 	 * to access the address we've been given.  So, get the
334 	 * bits that determine the cache index, and make a KSEG0
335 	 * address out of them.
336 	 */
337 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
338 
339 	eva = round_line16(va + size);
340 	va = trunc_line16(va);
341 	w2va = va + mips_pdcache_way_size;
342 
343 	while ((eva - va) >= (16 * 16)) {
344 		cache_r4k_op_16lines_16_2way(va, w2va,
345 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
346 		va += (16 * 16);
347 		w2va += (16 * 16);
348 	}
349 
350 	while (va < eva) {
351 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
352 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
353 		va += 16;
354 		w2va += 16;
355 	}
356 }
357 
358 void
359 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
360 {
361 	vaddr_t w2va, eva;
362 
363 	/*
364 	 * Since we're doing Index ops, we expect to not be able
365 	 * to access the address we've been given.  So, get the
366 	 * bits that determine the cache index, and make a KSEG0
367 	 * address out of them.
368 	 */
369 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
370 
371 	eva = round_line(va + size);
372 	va = trunc_line(va);
373 	w2va = va + mips_pdcache_way_size;
374 
375 	while ((eva - va) >= (16 * 32)) {
376 		cache_r4k_op_16lines_32_2way(va, w2va,
377 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
378 		va += (16 * 32);
379 		w2va += (16 * 32);
380 	}
381 
382 	while (va < eva) {
383 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
384 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
385 		va += 32;
386 		w2va += 32;
387 	}
388 }
389 
390 void
391 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
392 {
393 	vaddr_t eva = round_line(va + size);
394 	uint32_t ostatus;
395 
396 	/*
397 	 * This is pathetically slow, but the chip bug is pretty
398 	 * nasty, and we hope that not too many v1.x R4600s are
399 	 * around.
400 	 */
401 
402 	va = trunc_line(va);
403 
404 	ostatus = mips_cp0_status_read();
405 
406 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
407 
408 	while (va < eva) {
409 		__asm __volatile("nop; nop; nop; nop;");
410 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
411 		va += 32;
412 	}
413 
414 	mips_cp0_status_write(ostatus);
415 }
416 
417 void
418 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
419 {
420 	vaddr_t eva = round_line(va + size);
421 	uint32_t ostatus;
422 
423 	va = trunc_line(va);
424 
425 	ostatus = mips_cp0_status_read();
426 
427 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
428 
429 	/*
430 	 * Between blasts of big cache chunks, give interrupts
431 	 * a chance to get though.
432 	 */
433 	while ((eva - va) >= (32 * 32)) {
434 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
435 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
436 		va += (32 * 32);
437 	}
438 
439 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
440 	while (va < eva) {
441 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
442 		va += 32;
443 	}
444 
445 	mips_cp0_status_write(ostatus);
446 }
447 
448 void
449 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
450 {
451 	vaddr_t eva = round_line16(va + size);
452 
453 	va = trunc_line16(va);
454 
455 	while ((eva - va) >= (32 * 16)) {
456 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
457 		va += (32 * 16);
458 	}
459 
460 	while (va < eva) {
461 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
462 		va += 16;
463 	}
464 }
465 
466 void
467 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
468 {
469 	vaddr_t eva = round_line(va + size);
470 
471 	va = trunc_line(va);
472 
473 	while ((eva - va) >= (32 * 32)) {
474 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
475 		va += (32 * 32);
476 	}
477 
478 	while (va < eva) {
479 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
480 		va += 32;
481 	}
482 }
483 
484 void
485 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
486 {
487 	vaddr_t eva = round_line(va + size);
488 	uint32_t ostatus;
489 
490 	/*
491 	 * This is pathetically slow, but the chip bug is pretty
492 	 * nasty, and we hope that not too many v1.x R4600s are
493 	 * around.
494 	 */
495 
496 	va = trunc_line(va);
497 
498 	ostatus = mips_cp0_status_read();
499 
500 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
501 
502 	while (va < eva) {
503 		__asm __volatile("nop; nop; nop; nop;");
504 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
505 		va += 32;
506 	}
507 
508 	mips_cp0_status_write(ostatus);
509 }
510 
511 void
512 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
513 {
514 	vaddr_t eva = round_line(va + size);
515 	uint32_t ostatus;
516 
517 	va = trunc_line(va);
518 
519 	ostatus = mips_cp0_status_read();
520 
521 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
522 
523 	/*
524 	 * Between blasts of big cache chunks, give interrupts
525 	 * a chance to get though.
526 	 */
527 	while ((eva - va) >= (32 * 32)) {
528 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
529 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
530 		va += (32 * 32);
531 	}
532 
533 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
534 	while (va < eva) {
535 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
536 		va += 32;
537 	}
538 
539 	mips_cp0_status_write(ostatus);
540 }
541 
542 void
543 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
544 {
545 	vaddr_t eva = round_line16(va + size);
546 
547 	va = trunc_line16(va);
548 
549 	while ((eva - va) >= (32 * 16)) {
550 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
551 		va += (32 * 16);
552 	}
553 
554 	while (va < eva) {
555 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
556 		va += 16;
557 	}
558 }
559 
560 void
561 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
562 {
563 	vaddr_t eva = round_line(va + size);
564 
565 	va = trunc_line(va);
566 
567 	while ((eva - va) >= (32 * 32)) {
568 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
569 		va += (32 * 32);
570 	}
571 
572 	while (va < eva) {
573 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
574 		va += 32;
575 	}
576 }
577 
578 #undef round_line16
579 #undef trunc_line16
580 #undef round_line
581 #undef trunc_line
582