1 /* $NetBSD: cache_r5k.c,v 1.6 2002/11/07 23:03:21 cgd Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/locore.h> 43 44 /* 45 * Cache operations for R5000-style caches: 46 * 47 * - 2-way set-associative 48 * - Write-back 49 * - Virtually indexed, physically tagged 50 * 51 * Since the R4600 is so similar (2-way set-associative, 32b/l), 52 * we handle that here, too. Note for R4600, we have to work 53 * around some chip bugs. From the v1.7 errata: 54 * 55 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 56 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 57 * executed if there is no other dcache activity. If the dcache is 58 * accessed for another instruction immeidately preceding when these 59 * cache instructions are executing, it is possible that the dcache 60 * tag match outputs used by these cache instructions will be 61 * incorrect. These cache instructions should be preceded by at least 62 * four instructions that are not any kind of load or store 63 * instruction. 64 * 65 * ...and from the v2.0 errata: 66 * 67 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 68 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 69 * correctly if the internal data cache refill buffer is empty. These 70 * CACHE instructions should be separated from any potential data cache 71 * miss by a load instruction to an uncached address to empty the response 72 * buffer. 73 * 74 * XXX Does not handle split secondary caches. 75 */ 76 77 #define round_line16(x) (((x) + 15) & ~15) 78 #define trunc_line16(x) ((x) & ~15) 79 #define round_line(x) (((x) + 31) & ~31) 80 #define trunc_line(x) ((x) & ~31) 81 82 __asm(".set mips3"); 83 84 void 85 r5k_icache_sync_all_32(void) 86 { 87 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 88 vaddr_t eva = va + mips_picache_size; 89 90 /* 91 * Since we're hitting the whole thing, we don't have to 92 * worry about the 2 different "ways". 93 */ 94 95 mips_dcache_wbinv_all(); 96 97 __asm __volatile("sync"); 98 99 while (va < eva) { 100 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 101 va += (32 * 32); 102 } 103 } 104 105 void 106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 107 { 108 vaddr_t eva = round_line(va + size); 109 110 va = trunc_line(va); 111 112 mips_dcache_wb_range(va, (eva - va)); 113 114 __asm __volatile("sync"); 115 116 while ((eva - va) >= (32 * 32)) { 117 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 118 va += (32 * 32); 119 } 120 121 while (va < eva) { 122 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 123 va += 32; 124 } 125 } 126 127 void 128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 129 { 130 vaddr_t w2va, eva, orig_va; 131 132 orig_va = va; 133 134 eva = round_line(va + size); 135 va = trunc_line(va); 136 137 mips_dcache_wbinv_range_index(va, (eva - va)); 138 139 __asm __volatile("sync"); 140 141 /* 142 * Since we're doing Index ops, we expect to not be able 143 * to access the address we've been given. So, get the 144 * bits that determine the cache index, and make a KSEG0 145 * address out of them. 146 */ 147 va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask); 148 149 eva = round_line(va + size); 150 va = trunc_line(va); 151 w2va = va + mips_picache_way_size; 152 153 while ((eva - va) >= (16 * 32)) { 154 cache_r4k_op_16lines_32_2way(va, w2va, 155 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 156 va += (16 * 32); 157 w2va += (16 * 32); 158 } 159 160 while (va < eva) { 161 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 162 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 163 va += 32; 164 w2va += 32; 165 } 166 } 167 168 void 169 r5k_pdcache_wbinv_all_16(void) 170 { 171 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 172 vaddr_t eva = va + mips_pdcache_size; 173 174 /* 175 * Since we're hitting the whole thing, we don't have to 176 * worry about the 2 different "ways". 177 */ 178 179 while (va < eva) { 180 cache_r4k_op_32lines_16(va, 181 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 182 va += (32 * 16); 183 } 184 } 185 186 void 187 r5k_pdcache_wbinv_all_32(void) 188 { 189 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 190 vaddr_t eva = va + mips_pdcache_size; 191 192 /* 193 * Since we're hitting the whole thing, we don't have to 194 * worry about the 2 different "ways". 195 */ 196 197 while (va < eva) { 198 cache_r4k_op_32lines_32(va, 199 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 200 va += (32 * 32); 201 } 202 } 203 204 void 205 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 206 { 207 vaddr_t eva = round_line(va + size); 208 uint32_t ostatus; 209 210 /* 211 * This is pathetically slow, but the chip bug is pretty 212 * nasty, and we hope that not too many v1.x R4600s are 213 * around. 214 */ 215 216 va = trunc_line(va); 217 218 /* 219 * To make this a little less painful, just hit the entire 220 * cache if we have a range >= the cache size. 221 */ 222 if ((eva - va) >= mips_pdcache_size) { 223 r5k_pdcache_wbinv_all_32(); 224 return; 225 } 226 227 ostatus = mips_cp0_status_read(); 228 229 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 230 231 while (va < eva) { 232 __asm __volatile("nop; nop; nop; nop;"); 233 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 234 va += 32; 235 } 236 237 mips_cp0_status_write(ostatus); 238 } 239 240 void 241 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 242 { 243 vaddr_t eva = round_line(va + size); 244 uint32_t ostatus; 245 246 va = trunc_line(va); 247 248 ostatus = mips_cp0_status_read(); 249 250 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 251 252 while ((eva - va) >= (32 * 32)) { 253 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 254 cache_r4k_op_32lines_32(va, 255 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 256 va += (32 * 32); 257 } 258 259 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 260 while (va < eva) { 261 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 262 va += 32; 263 } 264 265 mips_cp0_status_write(ostatus); 266 } 267 268 void 269 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 270 { 271 vaddr_t eva = round_line16(va + size); 272 273 va = trunc_line16(va); 274 275 while ((eva - va) >= (32 * 16)) { 276 cache_r4k_op_32lines_16(va, 277 CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 278 cache_r4k_op_32lines_16(va, 279 CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 280 va += (32 * 16); 281 } 282 283 while (va < eva) { 284 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 285 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 286 va += 16; 287 } 288 } 289 290 void 291 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 292 { 293 vaddr_t eva = round_line16(va + size); 294 295 va = trunc_line16(va); 296 297 while ((eva - va) >= (32 * 16)) { 298 cache_r4k_op_32lines_16(va, 299 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 300 va += (32 * 16); 301 } 302 303 while (va < eva) { 304 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 305 va += 16; 306 } 307 } 308 309 void 310 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 311 { 312 vaddr_t eva = round_line(va + size); 313 314 va = trunc_line(va); 315 316 while ((eva - va) >= (32 * 32)) { 317 cache_r4k_op_32lines_32(va, 318 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 319 va += (32 * 32); 320 } 321 322 while (va < eva) { 323 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 324 va += 32; 325 } 326 } 327 328 void 329 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 330 { 331 vaddr_t w2va, eva; 332 333 /* 334 * Since we're doing Index ops, we expect to not be able 335 * to access the address we've been given. So, get the 336 * bits that determine the cache index, and make a KSEG0 337 * address out of them. 338 */ 339 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 340 341 eva = round_line16(va + size); 342 va = trunc_line16(va); 343 w2va = va + mips_pdcache_way_size; 344 345 while ((eva - va) >= (16 * 16)) { 346 cache_r4k_op_16lines_16_2way(va, w2va, 347 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 348 va += (16 * 16); 349 w2va += (16 * 16); 350 } 351 352 while (va < eva) { 353 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 354 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 355 va += 16; 356 w2va += 16; 357 } 358 } 359 360 void 361 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 362 { 363 vaddr_t w2va, eva; 364 365 /* 366 * Since we're doing Index ops, we expect to not be able 367 * to access the address we've been given. So, get the 368 * bits that determine the cache index, and make a KSEG0 369 * address out of them. 370 */ 371 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 372 373 eva = round_line(va + size); 374 va = trunc_line(va); 375 w2va = va + mips_pdcache_way_size; 376 377 while ((eva - va) >= (16 * 32)) { 378 cache_r4k_op_16lines_32_2way(va, w2va, 379 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 380 va += (16 * 32); 381 w2va += (16 * 32); 382 } 383 384 while (va < eva) { 385 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 386 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 387 va += 32; 388 w2va += 32; 389 } 390 } 391 392 void 393 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 394 { 395 vaddr_t eva = round_line(va + size); 396 uint32_t ostatus; 397 398 /* 399 * This is pathetically slow, but the chip bug is pretty 400 * nasty, and we hope that not too many v1.x R4600s are 401 * around. 402 */ 403 404 va = trunc_line(va); 405 406 ostatus = mips_cp0_status_read(); 407 408 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 409 410 while (va < eva) { 411 __asm __volatile("nop; nop; nop; nop;"); 412 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 413 va += 32; 414 } 415 416 mips_cp0_status_write(ostatus); 417 } 418 419 void 420 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 421 { 422 vaddr_t eva = round_line(va + size); 423 uint32_t ostatus; 424 425 va = trunc_line(va); 426 427 ostatus = mips_cp0_status_read(); 428 429 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 430 431 /* 432 * Between blasts of big cache chunks, give interrupts 433 * a chance to get though. 434 */ 435 while ((eva - va) >= (32 * 32)) { 436 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 437 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 438 va += (32 * 32); 439 } 440 441 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 442 while (va < eva) { 443 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 444 va += 32; 445 } 446 447 mips_cp0_status_write(ostatus); 448 } 449 450 void 451 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size) 452 { 453 vaddr_t eva = round_line16(va + size); 454 455 va = trunc_line16(va); 456 457 while ((eva - va) >= (32 * 16)) { 458 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 459 va += (32 * 16); 460 } 461 462 while (va < eva) { 463 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 464 va += 16; 465 } 466 } 467 468 void 469 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 470 { 471 vaddr_t eva = round_line(va + size); 472 473 va = trunc_line(va); 474 475 while ((eva - va) >= (32 * 32)) { 476 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 477 va += (32 * 32); 478 } 479 480 while (va < eva) { 481 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 482 va += 32; 483 } 484 } 485 486 void 487 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 488 { 489 vaddr_t eva = round_line(va + size); 490 uint32_t ostatus; 491 492 /* 493 * This is pathetically slow, but the chip bug is pretty 494 * nasty, and we hope that not too many v1.x R4600s are 495 * around. 496 */ 497 498 va = trunc_line(va); 499 500 ostatus = mips_cp0_status_read(); 501 502 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 503 504 while (va < eva) { 505 __asm __volatile("nop; nop; nop; nop;"); 506 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 507 va += 32; 508 } 509 510 mips_cp0_status_write(ostatus); 511 } 512 513 void 514 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 515 { 516 vaddr_t eva = round_line(va + size); 517 uint32_t ostatus; 518 519 va = trunc_line(va); 520 521 ostatus = mips_cp0_status_read(); 522 523 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 524 525 /* 526 * Between blasts of big cache chunks, give interrupts 527 * a chance to get though. 528 */ 529 while ((eva - va) >= (32 * 32)) { 530 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 531 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 532 va += (32 * 32); 533 } 534 535 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 536 while (va < eva) { 537 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 538 va += 32; 539 } 540 541 mips_cp0_status_write(ostatus); 542 } 543 544 void 545 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size) 546 { 547 vaddr_t eva = round_line16(va + size); 548 549 va = trunc_line16(va); 550 551 while ((eva - va) >= (32 * 16)) { 552 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 553 va += (32 * 16); 554 } 555 556 while (va < eva) { 557 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 558 va += 16; 559 } 560 } 561 562 void 563 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 564 { 565 vaddr_t eva = round_line(va + size); 566 567 va = trunc_line(va); 568 569 while ((eva - va) >= (32 * 32)) { 570 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 571 va += (32 * 32); 572 } 573 574 while (va < eva) { 575 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 576 va += 32; 577 } 578 } 579 580 #undef round_line16 581 #undef trunc_line16 582 #undef round_line 583 #undef trunc_line 584