1 /* $NetBSD: cache_mipsNN.c,v 1.6 2002/11/24 07:41:31 simonb Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/cache_mipsNN.h> 43 #include <mips/mipsNN.h> 44 45 #include <uvm/uvm_extern.h> 46 47 #define round_line16(x) (((x) + 15) & ~15) 48 #define trunc_line16(x) ((x) & ~15) 49 50 #define round_line32(x) (((x) + 31) & ~31) 51 #define trunc_line32(x) ((x) & ~31) 52 53 54 #ifdef SB1250_PASS1 55 #define SYNC __asm __volatile("sync; sync") 56 #else 57 #define SYNC __asm __volatile("sync") 58 #endif 59 60 __asm(".set mips32"); 61 62 static int picache_stride; 63 static int picache_loopcount; 64 static int pdcache_stride; 65 static int pdcache_loopcount; 66 67 void 68 mipsNN_cache_init(uint32_t config, uint32_t config1) 69 { 70 int flush_multiple_lines_per_way; 71 72 flush_multiple_lines_per_way = mips_picache_way_size > PAGE_SIZE; 73 if (config & MIPSNN_CFG_VI) { 74 /* 75 * With a virtual Icache we don't need to flush 76 * multiples of the page size with index ops; we just 77 * need to flush one pages' worth. 78 */ 79 flush_multiple_lines_per_way = 0; 80 } 81 82 if (flush_multiple_lines_per_way) { 83 picache_stride = PAGE_SIZE; 84 picache_loopcount = (mips_picache_way_size / PAGE_SIZE) * 85 mips_picache_ways; 86 } else { 87 picache_stride = mips_picache_way_size; 88 picache_loopcount = mips_picache_ways; 89 } 90 91 if (mips_pdcache_way_size < PAGE_SIZE) { 92 pdcache_stride = mips_pdcache_way_size; 93 pdcache_loopcount = mips_pdcache_ways; 94 } else { 95 pdcache_stride = PAGE_SIZE; 96 pdcache_loopcount = (mips_pdcache_way_size / PAGE_SIZE) * 97 mips_pdcache_ways; 98 } 99 #define CACHE_DEBUG 100 #ifdef CACHE_DEBUG 101 if (config & MIPSNN_CFG_VI) 102 printf(" icache is virtual\n"); 103 printf(" picache_stride = %d\n", picache_stride); 104 printf(" picache_loopcount = %d\n", picache_loopcount); 105 printf(" pdcache_stride = %d\n", pdcache_stride); 106 printf(" pdcache_loopcount = %d\n", pdcache_loopcount); 107 #endif 108 } 109 110 void 111 mipsNN_icache_sync_all_16(void) 112 { 113 vaddr_t va, eva; 114 115 va = MIPS_PHYS_TO_KSEG0(0); 116 eva = va + mips_picache_size; 117 118 /* 119 * Since we're hitting the whole thing, we don't have to 120 * worry about the N different "ways". 121 */ 122 123 mips_dcache_wbinv_all(); 124 125 while (va < eva) { 126 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 127 va += (32 * 16); 128 } 129 130 SYNC; 131 } 132 133 void 134 mipsNN_icache_sync_all_32(void) 135 { 136 vaddr_t va, eva; 137 138 va = MIPS_PHYS_TO_KSEG0(0); 139 eva = va + mips_picache_size; 140 141 /* 142 * Since we're hitting the whole thing, we don't have to 143 * worry about the N different "ways". 144 */ 145 146 mips_dcache_wbinv_all(); 147 148 while (va < eva) { 149 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 150 va += (32 * 32); 151 } 152 153 SYNC; 154 } 155 156 void 157 mipsNN_icache_sync_range_16(vaddr_t va, vsize_t size) 158 { 159 vaddr_t eva; 160 161 eva = round_line16(va + size); 162 va = trunc_line16(va); 163 164 mips_dcache_wb_range(va, (eva - va)); 165 166 while ((eva - va) >= (32 * 16)) { 167 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 168 va += (32 * 16); 169 } 170 171 while (va < eva) { 172 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 173 va += 16; 174 } 175 176 SYNC; 177 } 178 179 void 180 mipsNN_icache_sync_range_32(vaddr_t va, vsize_t size) 181 { 182 vaddr_t eva; 183 184 eva = round_line32(va + size); 185 va = trunc_line32(va); 186 187 mips_dcache_wb_range(va, (eva - va)); 188 189 while ((eva - va) >= (32 * 32)) { 190 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 191 va += (32 * 32); 192 } 193 194 while (va < eva) { 195 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 196 va += 32; 197 } 198 199 SYNC; 200 } 201 202 void 203 mipsNN_icache_sync_range_index_16(vaddr_t va, vsize_t size) 204 { 205 unsigned int eva, tmpva; 206 int i, stride, loopcount; 207 208 /* 209 * Since we're doing Index ops, we expect to not be able 210 * to access the address we've been given. So, get the 211 * bits that determine the cache index, and make a KSEG0 212 * address out of them. 213 */ 214 va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask); 215 216 eva = round_line16(va + size); 217 va = trunc_line16(va); 218 219 /* 220 * GCC generates better code in the loops if we reference local 221 * copies of these global variables. 222 */ 223 stride = picache_stride; 224 loopcount = picache_loopcount; 225 226 mips_dcache_wbinv_range_index(va, (eva - va)); 227 228 while ((eva - va) >= (8 * 16)) { 229 tmpva = va; 230 for (i = 0; i < loopcount; i++, tmpva += stride) 231 cache_r4k_op_8lines_16(tmpva, 232 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 233 va += 8 * 16; 234 } 235 236 while (va < eva) { 237 tmpva = va; 238 for (i = 0; i < loopcount; i++, tmpva += stride) 239 cache_op_r4k_line(tmpva, 240 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 241 va += 16; 242 } 243 } 244 245 void 246 mipsNN_icache_sync_range_index_32(vaddr_t va, vsize_t size) 247 { 248 unsigned int eva, tmpva; 249 int i, stride, loopcount; 250 251 /* 252 * Since we're doing Index ops, we expect to not be able 253 * to access the address we've been given. So, get the 254 * bits that determine the cache index, and make a KSEG0 255 * address out of them. 256 */ 257 va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask); 258 259 eva = round_line32(va + size); 260 va = trunc_line32(va); 261 262 /* 263 * GCC generates better code in the loops if we reference local 264 * copies of these global variables. 265 */ 266 stride = picache_stride; 267 loopcount = picache_loopcount; 268 269 mips_dcache_wbinv_range_index(va, (eva - va)); 270 271 while ((eva - va) >= (8 * 32)) { 272 tmpva = va; 273 for (i = 0; i < loopcount; i++, tmpva += stride) 274 cache_r4k_op_8lines_32(tmpva, 275 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 276 va += 8 * 32; 277 } 278 279 while (va < eva) { 280 tmpva = va; 281 for (i = 0; i < loopcount; i++, tmpva += stride) 282 cache_op_r4k_line(tmpva, 283 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 284 va += 32; 285 } 286 } 287 288 void 289 mipsNN_pdcache_wbinv_all_16(void) 290 { 291 vaddr_t va, eva; 292 293 va = MIPS_PHYS_TO_KSEG0(0); 294 eva = va + mips_pdcache_size; 295 296 /* 297 * Since we're hitting the whole thing, we don't have to 298 * worry about the N different "ways". 299 */ 300 301 while (va < eva) { 302 cache_r4k_op_32lines_16(va, 303 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 304 va += (32 * 16); 305 } 306 307 SYNC; 308 } 309 310 void 311 mipsNN_pdcache_wbinv_all_32(void) 312 { 313 vaddr_t va, eva; 314 315 va = MIPS_PHYS_TO_KSEG0(0); 316 eva = va + mips_pdcache_size; 317 318 /* 319 * Since we're hitting the whole thing, we don't have to 320 * worry about the N different "ways". 321 */ 322 323 while (va < eva) { 324 cache_r4k_op_32lines_32(va, 325 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 326 va += (32 * 32); 327 } 328 329 SYNC; 330 } 331 332 void 333 mipsNN_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 334 { 335 vaddr_t eva; 336 337 eva = round_line16(va + size); 338 va = trunc_line16(va); 339 340 while ((eva - va) >= (32 * 16)) { 341 cache_r4k_op_32lines_16(va, 342 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 343 va += (32 * 16); 344 } 345 346 while (va < eva) { 347 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 348 va += 16; 349 } 350 351 SYNC; 352 } 353 354 void 355 mipsNN_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 356 { 357 vaddr_t eva; 358 359 eva = round_line32(va + size); 360 va = trunc_line32(va); 361 362 while ((eva - va) >= (32 * 32)) { 363 cache_r4k_op_32lines_32(va, 364 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 365 va += (32 * 32); 366 } 367 368 while (va < eva) { 369 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 370 va += 32; 371 } 372 373 SYNC; 374 } 375 376 void 377 mipsNN_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 378 { 379 unsigned int eva, tmpva; 380 int i, stride, loopcount; 381 382 /* 383 * Since we're doing Index ops, we expect to not be able 384 * to access the address we've been given. So, get the 385 * bits that determine the cache index, and make a KSEG0 386 * address out of them. 387 */ 388 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 389 390 eva = round_line16(va + size); 391 va = trunc_line16(va); 392 393 /* 394 * GCC generates better code in the loops if we reference local 395 * copies of these global variables. 396 */ 397 stride = pdcache_stride; 398 loopcount = pdcache_loopcount; 399 400 while ((eva - va) >= (8 * 16)) { 401 tmpva = va; 402 for (i = 0; i < loopcount; i++, tmpva += stride) 403 cache_r4k_op_8lines_16(tmpva, 404 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 405 va += 8 * 16; 406 } 407 408 while (va < eva) { 409 tmpva = va; 410 for (i = 0; i < loopcount; i++, tmpva += stride) 411 cache_op_r4k_line(tmpva, 412 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 413 va += 16; 414 } 415 } 416 417 void 418 mipsNN_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 419 { 420 unsigned int eva, tmpva; 421 int i, stride, loopcount; 422 423 /* 424 * Since we're doing Index ops, we expect to not be able 425 * to access the address we've been given. So, get the 426 * bits that determine the cache index, and make a KSEG0 427 * address out of them. 428 */ 429 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 430 431 eva = round_line32(va + size); 432 va = trunc_line32(va); 433 434 /* 435 * GCC generates better code in the loops if we reference local 436 * copies of these global variables. 437 */ 438 stride = pdcache_stride; 439 loopcount = pdcache_loopcount; 440 441 while ((eva - va) >= (8 * 32)) { 442 tmpva = va; 443 for (i = 0; i < loopcount; i++, tmpva += stride) 444 cache_r4k_op_8lines_32(tmpva, 445 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 446 va += 8 * 32; 447 } 448 449 while (va < eva) { 450 tmpva = va; 451 for (i = 0; i < loopcount; i++, tmpva += stride) 452 cache_op_r4k_line(tmpva, 453 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 454 va += 32; 455 } 456 } 457 458 void 459 mipsNN_pdcache_inv_range_16(vaddr_t va, vsize_t size) 460 { 461 vaddr_t eva; 462 463 eva = round_line16(va + size); 464 va = trunc_line16(va); 465 466 while ((eva - va) >= (32 * 16)) { 467 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 468 va += (32 * 16); 469 } 470 471 while (va < eva) { 472 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 473 va += 16; 474 } 475 476 SYNC; 477 } 478 479 void 480 mipsNN_pdcache_inv_range_32(vaddr_t va, vsize_t size) 481 { 482 vaddr_t eva; 483 484 eva = round_line32(va + size); 485 va = trunc_line32(va); 486 487 while ((eva - va) >= (32 * 32)) { 488 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 489 va += (32 * 32); 490 } 491 492 while (va < eva) { 493 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 494 va += 32; 495 } 496 497 SYNC; 498 } 499 500 void 501 mipsNN_pdcache_wb_range_16(vaddr_t va, vsize_t size) 502 { 503 vaddr_t eva; 504 505 eva = round_line16(va + size); 506 va = trunc_line16(va); 507 508 while ((eva - va) >= (32 * 16)) { 509 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 510 va += (32 * 16); 511 } 512 513 while (va < eva) { 514 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 515 va += 16; 516 } 517 518 SYNC; 519 } 520 521 void 522 mipsNN_pdcache_wb_range_32(vaddr_t va, vsize_t size) 523 { 524 vaddr_t eva; 525 526 eva = round_line32(va + size); 527 va = trunc_line32(va); 528 529 while ((eva - va) >= (32 * 32)) { 530 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 531 va += (32 * 32); 532 } 533 534 while (va < eva) { 535 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 536 va += 32; 537 } 538 539 SYNC; 540 } 541