1 /* $NetBSD: cache_r5k.c,v 1.5 2002/01/19 04:25:37 shin Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/locore.h> 43 44 /* 45 * Cache operations for R5000-style caches: 46 * 47 * - 2-way set-associative 48 * - Write-back 49 * - Virtually indexed, physically tagged 50 * 51 * Since the R4600 is so similar (2-way set-associative, 32b/l), 52 * we handle that here, too. Note for R4600, we have to work 53 * around some chip bugs. From the v1.7 errata: 54 * 55 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 56 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 57 * executed if there is no other dcache activity. If the dcache is 58 * accessed for another instruction immeidately preceding when these 59 * cache instructions are executing, it is possible that the dcache 60 * tag match outputs used by these cache instructions will be 61 * incorrect. These cache instructions should be preceded by at least 62 * four instructions that are not any kind of load or store 63 * instruction. 64 * 65 * ...and from the v2.0 errata: 66 * 67 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 68 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 69 * correctly if the internal data cache refill buffer is empty. These 70 * CACHE instructions should be separated from any potential data cache 71 * miss by a load instruction to an uncached address to empty the response 72 * buffer. 73 * 74 * XXX Does not handle split secondary caches. 75 */ 76 77 #define round_line16(x) (((x) + 15) & ~15) 78 #define trunc_line16(x) ((x) & ~15) 79 #define round_line(x) (((x) + 31) & ~31) 80 #define trunc_line(x) ((x) & ~31) 81 82 __asm(".set mips3"); 83 84 void 85 r5k_icache_sync_all_32(void) 86 { 87 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 88 vaddr_t eva = va + mips_picache_size; 89 90 /* 91 * Since we're hitting the whole thing, we don't have to 92 * worry about the 2 different "ways". 93 */ 94 95 mips_dcache_wbinv_all(); 96 97 __asm __volatile("sync"); 98 99 while (va < eva) { 100 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 101 va += (32 * 32); 102 } 103 } 104 105 void 106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 107 { 108 vaddr_t eva = round_line(va + size); 109 110 va = trunc_line(va); 111 112 mips_dcache_wb_range(va, (eva - va)); 113 114 __asm __volatile("sync"); 115 116 while ((eva - va) >= (32 * 32)) { 117 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 118 va += (32 * 32); 119 } 120 121 while (va < eva) { 122 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 123 va += 32; 124 } 125 } 126 127 void 128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 129 { 130 vaddr_t w2va, eva; 131 132 eva = round_line(va + size); 133 va = trunc_line(va); 134 135 mips_dcache_wbinv_range_index(va, (eva - va)); 136 137 __asm __volatile("sync"); 138 139 /* 140 * Since we're doing Index ops, we expect to not be able 141 * to access the address we've been given. So, get the 142 * bits that determine the cache index, and make a KSEG0 143 * address out of them. 144 */ 145 va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask); 146 147 eva = round_line(va + size); 148 va = trunc_line(va); 149 w2va = va + mips_picache_way_size; 150 151 while ((eva - va) >= (16 * 32)) { 152 cache_r4k_op_16lines_32_2way(va, w2va, 153 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 154 va += (16 * 32); 155 w2va += (16 * 32); 156 } 157 158 while (va < eva) { 159 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 160 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 161 va += 32; 162 w2va += 32; 163 } 164 } 165 166 void 167 r5k_pdcache_wbinv_all_16(void) 168 { 169 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 170 vaddr_t eva = va + mips_pdcache_size; 171 172 /* 173 * Since we're hitting the whole thing, we don't have to 174 * worry about the 2 different "ways". 175 */ 176 177 while (va < eva) { 178 cache_r4k_op_32lines_16(va, 179 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 180 va += (32 * 16); 181 } 182 } 183 184 void 185 r5k_pdcache_wbinv_all_32(void) 186 { 187 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 188 vaddr_t eva = va + mips_pdcache_size; 189 190 /* 191 * Since we're hitting the whole thing, we don't have to 192 * worry about the 2 different "ways". 193 */ 194 195 while (va < eva) { 196 cache_r4k_op_32lines_32(va, 197 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 198 va += (32 * 32); 199 } 200 } 201 202 void 203 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 204 { 205 vaddr_t eva = round_line(va + size); 206 uint32_t ostatus; 207 208 /* 209 * This is pathetically slow, but the chip bug is pretty 210 * nasty, and we hope that not too many v1.x R4600s are 211 * around. 212 */ 213 214 va = trunc_line(va); 215 216 /* 217 * To make this a little less painful, just hit the entire 218 * cache if we have a range >= the cache size. 219 */ 220 if ((eva - va) >= mips_pdcache_size) { 221 r5k_pdcache_wbinv_all_32(); 222 return; 223 } 224 225 ostatus = mips_cp0_status_read(); 226 227 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 228 229 while (va < eva) { 230 __asm __volatile("nop; nop; nop; nop;"); 231 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 232 va += 32; 233 } 234 235 mips_cp0_status_write(ostatus); 236 } 237 238 void 239 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 240 { 241 vaddr_t eva = round_line(va + size); 242 uint32_t ostatus; 243 244 va = trunc_line(va); 245 246 ostatus = mips_cp0_status_read(); 247 248 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 249 250 while ((eva - va) >= (32 * 32)) { 251 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 252 cache_r4k_op_32lines_32(va, 253 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 254 va += (32 * 32); 255 } 256 257 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 258 while (va < eva) { 259 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 260 va += 32; 261 } 262 263 mips_cp0_status_write(ostatus); 264 } 265 266 void 267 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 268 { 269 vaddr_t eva = round_line16(va + size); 270 271 va = trunc_line16(va); 272 273 while ((eva - va) >= (32 * 16)) { 274 cache_r4k_op_32lines_16(va, 275 CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 276 cache_r4k_op_32lines_16(va, 277 CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 278 va += (32 * 16); 279 } 280 281 while (va < eva) { 282 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 283 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 284 va += 16; 285 } 286 } 287 288 void 289 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 290 { 291 vaddr_t eva = round_line16(va + size); 292 293 va = trunc_line16(va); 294 295 while ((eva - va) >= (32 * 16)) { 296 cache_r4k_op_32lines_16(va, 297 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 298 va += (32 * 16); 299 } 300 301 while (va < eva) { 302 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 303 va += 16; 304 } 305 } 306 307 void 308 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 309 { 310 vaddr_t eva = round_line(va + size); 311 312 va = trunc_line(va); 313 314 while ((eva - va) >= (32 * 32)) { 315 cache_r4k_op_32lines_32(va, 316 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 317 va += (32 * 32); 318 } 319 320 while (va < eva) { 321 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 322 va += 32; 323 } 324 } 325 326 void 327 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 328 { 329 vaddr_t w2va, eva; 330 331 /* 332 * Since we're doing Index ops, we expect to not be able 333 * to access the address we've been given. So, get the 334 * bits that determine the cache index, and make a KSEG0 335 * address out of them. 336 */ 337 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 338 339 eva = round_line16(va + size); 340 va = trunc_line16(va); 341 w2va = va + mips_pdcache_way_size; 342 343 while ((eva - va) >= (16 * 16)) { 344 cache_r4k_op_16lines_16_2way(va, w2va, 345 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 346 va += (16 * 16); 347 w2va += (16 * 16); 348 } 349 350 while (va < eva) { 351 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 352 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 353 va += 16; 354 w2va += 16; 355 } 356 } 357 358 void 359 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 360 { 361 vaddr_t w2va, eva; 362 363 /* 364 * Since we're doing Index ops, we expect to not be able 365 * to access the address we've been given. So, get the 366 * bits that determine the cache index, and make a KSEG0 367 * address out of them. 368 */ 369 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 370 371 eva = round_line(va + size); 372 va = trunc_line(va); 373 w2va = va + mips_pdcache_way_size; 374 375 while ((eva - va) >= (16 * 32)) { 376 cache_r4k_op_16lines_32_2way(va, w2va, 377 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 378 va += (16 * 32); 379 w2va += (16 * 32); 380 } 381 382 while (va < eva) { 383 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 384 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 385 va += 32; 386 w2va += 32; 387 } 388 } 389 390 void 391 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 392 { 393 vaddr_t eva = round_line(va + size); 394 uint32_t ostatus; 395 396 /* 397 * This is pathetically slow, but the chip bug is pretty 398 * nasty, and we hope that not too many v1.x R4600s are 399 * around. 400 */ 401 402 va = trunc_line(va); 403 404 ostatus = mips_cp0_status_read(); 405 406 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 407 408 while (va < eva) { 409 __asm __volatile("nop; nop; nop; nop;"); 410 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 411 va += 32; 412 } 413 414 mips_cp0_status_write(ostatus); 415 } 416 417 void 418 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 419 { 420 vaddr_t eva = round_line(va + size); 421 uint32_t ostatus; 422 423 va = trunc_line(va); 424 425 ostatus = mips_cp0_status_read(); 426 427 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 428 429 /* 430 * Between blasts of big cache chunks, give interrupts 431 * a chance to get though. 432 */ 433 while ((eva - va) >= (32 * 32)) { 434 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 435 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 436 va += (32 * 32); 437 } 438 439 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 440 while (va < eva) { 441 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 442 va += 32; 443 } 444 445 mips_cp0_status_write(ostatus); 446 } 447 448 void 449 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size) 450 { 451 vaddr_t eva = round_line16(va + size); 452 453 va = trunc_line16(va); 454 455 while ((eva - va) >= (32 * 16)) { 456 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 457 va += (32 * 16); 458 } 459 460 while (va < eva) { 461 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 462 va += 16; 463 } 464 } 465 466 void 467 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 468 { 469 vaddr_t eva = round_line(va + size); 470 471 va = trunc_line(va); 472 473 while ((eva - va) >= (32 * 32)) { 474 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 475 va += (32 * 32); 476 } 477 478 while (va < eva) { 479 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 480 va += 32; 481 } 482 } 483 484 void 485 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 486 { 487 vaddr_t eva = round_line(va + size); 488 uint32_t ostatus; 489 490 /* 491 * This is pathetically slow, but the chip bug is pretty 492 * nasty, and we hope that not too many v1.x R4600s are 493 * around. 494 */ 495 496 va = trunc_line(va); 497 498 ostatus = mips_cp0_status_read(); 499 500 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 501 502 while (va < eva) { 503 __asm __volatile("nop; nop; nop; nop;"); 504 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 505 va += 32; 506 } 507 508 mips_cp0_status_write(ostatus); 509 } 510 511 void 512 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 513 { 514 vaddr_t eva = round_line(va + size); 515 uint32_t ostatus; 516 517 va = trunc_line(va); 518 519 ostatus = mips_cp0_status_read(); 520 521 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 522 523 /* 524 * Between blasts of big cache chunks, give interrupts 525 * a chance to get though. 526 */ 527 while ((eva - va) >= (32 * 32)) { 528 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 529 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 530 va += (32 * 32); 531 } 532 533 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 534 while (va < eva) { 535 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 536 va += 32; 537 } 538 539 mips_cp0_status_write(ostatus); 540 } 541 542 void 543 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size) 544 { 545 vaddr_t eva = round_line16(va + size); 546 547 va = trunc_line16(va); 548 549 while ((eva - va) >= (32 * 16)) { 550 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 551 va += (32 * 16); 552 } 553 554 while (va < eva) { 555 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 556 va += 16; 557 } 558 } 559 560 void 561 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 562 { 563 vaddr_t eva = round_line(va + size); 564 565 va = trunc_line(va); 566 567 while ((eva - va) >= (32 * 32)) { 568 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 569 va += (32 * 32); 570 } 571 572 while (va < eva) { 573 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 574 va += 32; 575 } 576 } 577 578 #undef round_line16 579 #undef trunc_line16 580 #undef round_line 581 #undef trunc_line 582