1/* 2 * vr5xxx.S -- CPU specific support routines 3 * 4 * Copyright (c) 1999 Cygnus Solutions 5 * 6 * The authors hereby grant permission to use, copy, modify, distribute, 7 * and license this software and its documentation for any purpose, provided 8 * that existing copyright notices are retained in all copies and that this 9 * notice is included verbatim in any distributions. No written agreement, 10 * license, or royalty fee is required for any of the authorized uses. 11 * Modifications to this software may be copyrighted by their authors 12 * and need not follow the licensing terms described here, provided that 13 * the new terms are clearly indicated on the first page of each file where 14 * they apply. 15 */ 16 17/* This file cloned from vr4300.S by dlindsay@cygnus.com 18 * and recoded to suit Vr5432 and Vr5000. 19 * Should be no worse for Vr43{00,05,10}. 20 * Specifically, __cpu_flush() has been changed (a) to allow for the hardware 21 * difference (in set associativity) between the Vr5432 and Vr5000, 22 * and (b) to flush the optional secondary cache of the Vr5000. 23 */ 24 25/* Processor Revision Identifier (PRID) Register: Implementation Numbers */ 26#define IMPL_VR5432 0x54 27 28/* Cache Constants not determinable dynamically */ 29#define VR5000_2NDLINE 32 /* secondary cache line size */ 30#define VR5432_LINE 32 /* I,Dcache line sizes */ 31#define VR5432_SIZE (16*1024) /* I,Dcache half-size */ 32 33 34#ifndef __mips64 35 .set mips3 36#endif 37#ifdef __mips16 38/* This file contains 32 bit assembly code. */ 39 .set nomips16 40#endif 41 42#include "regs.S" 43 44 .text 45 .align 2 46 47 # Taken from "R4300 Preliminary RISC Processor Specification 48 # Revision 2.0 January 1995" page 39: "The Count 49 # register... increments at a constant rate... at one-half the 50 # PClock speed." 51 # We can use this fact to provide small polled delays. 52 .globl __cpu_timer_poll 53 .ent __cpu_timer_poll 54__cpu_timer_poll: 55 .set noreorder 56 # in: a0 = (unsigned int) number of PClock ticks to wait for 57 # out: void 58 59 # The Vr4300 counter updates at half PClock, so divide by 2 to 60 # get counter delta: 61 bnezl a0, 1f # continue if delta non-zero 62 srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT} 63 # perform a quick return to the caller: 64 j ra 65 nop # {DELAY SLOT} 661: 67 mfc0 v0, C0_COUNT # get current counter value 68 nop 69 nop 70 # We cannot just do the simple test, of adding our delta onto 71 # the current value (ignoring overflow) and then checking for 72 # equality. The counter is incrementing every two PClocks, 73 # which means the counter value can change between 74 # instructions, making it hard to sample at the exact value 75 # desired. 76 77 # However, we do know that our entry delta value is less than 78 # half the number space (since we divide by 2 on entry). This 79 # means we can use a difference in signs to indicate timer 80 # overflow. 81 addu a0, v0, a0 # unsigned add (ignore overflow) 82 # We know have our end value (which will have been 83 # sign-extended to fill the 64bit register value). 842: 85 # get current counter value: 86 mfc0 v0, C0_COUNT 87 nop 88 nop 89 # This is an unsigned 32bit subtraction: 90 subu v0, a0, v0 # delta = (end - now) {DELAY SLOT} 91 bgtzl v0, 2b # looping back is most likely 92 nop 93 # We have now been delayed (in the foreground) for AT LEAST 94 # the required number of counter ticks. 95 j ra # return to caller 96 nop # {DELAY SLOT} 97 .set reorder 98 .end __cpu_timer_poll 99 100 # Flush the processor caches to memory: 101 102 .globl __cpu_flush 103 .ent __cpu_flush 104__cpu_flush: 105 .set noreorder 106 # NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache. 107 # On those, SC (bit 17 of CONFIG register) is hard-wired to 1, 108 # except that email from Dennis_Han@el.nec.com says that old 109 # versions of the Vr5432 incorrectly hard-wired this bit to 0. 110 # The Vr5000 has an optional direct-mapped secondary cache, 111 # and the SC bit correctly indicates this. 112 113 # So, for the 4300 and 5432 we want to just 114 # flush the primary Data and Instruction caches. 115 # For the 5000 it is desired to flush the secondary cache too. 116 # There is an operation difference worth noting. 117 # The 4300 and 5000 primary caches use VA bit 14 to choose cache set, 118 # whereas 5432 primary caches use VA bit 0. 119 120 # This code interprets the relevant Config register bits as 121 # much as possible, except for the 5432. 122 # The code therefore has some portability. 123 # However, the associativity issues mean you should not just assume 124 # that this code works anywhere. Also, the secondary cache set 125 # size is hardwired, since the 5000 series does not define codes 126 # for variant sizes. 127 128 # Note: this version of the code flushes D$ before I$. 129 # It is difficult to construct a case where that matters, 130 # but it cant hurt. 131 132 mfc0 a0, C0_PRID # a0 = Processor Revision register 133 nop # dlindsay: unclear why the nops, but 134 nop # vr4300.S had such so I do too. 135 srl a2, a0, PR_IMP # want bits 8..15 136 andi a2, a2, 0x255 # mask: now a2 = Implementation # field 137 li a1, IMPL_VR5432 138 beq a1, a2, 8f # use Vr5432-specific flush algorithm 139 nop 140 141 # Non-Vr5432 version of the code. 142 # (The distinctions being: CONFIG is truthful about secondary cache, 143 # and we act as if the primary Icache and Dcache are direct mapped.) 144 145 mfc0 t0, C0_CONFIG # t0 = CONFIG register 146 nop 147 nop 148 li a1, 1 # a1=1, a useful constant 149 150 srl a2, t0, CR_IC # want IC field of CONFIG 151 andi a2, a2, 0x7 # mask: now a2= code for Icache size 152 add a2, a2, 12 # +12 153 sllv a2, a1, a2 # a2=primary instruction cache size in bytes 154 155 srl a3, t0, CR_DC # DC field of CONFIG 156 andi a3, a3, 0x7 # mask: now a3= code for Dcache size 157 add a3, a3, 12 # +12 158 sllv a3, a1, a3 # a3=primary data cache size in bytes 159 160 li t2, (1 << CR_IB) # t2=mask over IB boolean 161 and t2, t2, t0 # test IB field of CONFIG register value 162 beqz t2, 1f # 163 li a1, 16 # 16 bytes (branch shadow: always loaded.) 164 li a1, 32 # non-zero, then 32bytes 1651: 166 167 li t2, (1 << CR_DB) # t2=mask over DB boolean 168 and t2, t2, t0 # test BD field of CONFIG register value 169 beqz t2, 2f # 170 li a0, 16 # 16bytes (branch shadow: always loaded.) 171 li a0, 32 # non-zero, then 32bytes 1722: 173 lui t1, ((K0BASE >> 16) & 0xFFFF) 174 ori t1, t1, (K0BASE & 0xFFFF) 175 176 # At this point, 177 # a0 = primary Dcache line size in bytes 178 # a1 = primary Icache line size in bytes 179 # a2 = primary Icache size in bytes 180 # a3 = primary Dcache size in bytes 181 # t0 = CONFIG value 182 # t1 = a round unmapped cached base address (we are in kernel mode) 183 # t2,t3 scratch 184 185 addi t3, t1, 0 # t3=t1=start address for any cache 186 add t2, t3, a3 # t2=end adress+1 of Dcache 187 sub t2, t2, a0 # t2=address of last line in Dcache 1883: 189 cache INDEX_WRITEBACK_INVALIDATE_D,0(t3) 190 bne t3, t2, 3b # 191 addu t3, a0 # (delay slot) increment by Dcache line size 192 193 194 # Now check CONFIG to see if there is a secondary cache 195 lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean 196 and t2, t2, t0 # test SC in CONFIG 197 bnez t2, 6f 198 199 # There is a secondary cache. Find out its sizes. 200 201 srl t3, t0, CR_SS # want SS field of CONFIG 202 andi t3, t3, 0x3 # mask: now t3= code for cache size. 203 beqz t3, 4f 204 lui a3, ((512*1024)>>16) # a3= 512K, code was 0 205 addu t3, -1 # decrement code 206 beqz t3, 4f 207 lui a3, ((1024*1024)>>16) # a3= 1 M, code 1 208 addu t3, -1 # decrement code 209 beqz t3, 4f 210 lui a3, ((2*1024*1024)>>16) # a3= 2 M, code 2 211 j 6f # no secondary cache, code 3 212 2134: # a3 = secondary cache size in bytes 214 li a0, VR5000_2NDLINE # no codes assigned for other than 32 215 216 # At this point, 217 # a0 = secondary cache line size in bytes 218 # a1 = primary Icache line size in bytes 219 # a2 = primary Icache size in bytes 220 # a3 = secondary cache size in bytes 221 # t1 = a round unmapped cached base address (we are in kernel mode) 222 # t2,t3 scratch 223 224 addi t3, t1, 0 # t3=t1=start address for any cache 225 add t2, t3, a3 # t2=end address+1 of secondary cache 226 sub t2, t2, a0 # t2=address of last line in secondary cache 2275: 228 cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3) 229 bne t3, t2, 5b 230 addu t3, a0 # (delay slot) increment by line size 231 232 2336: # Any optional secondary cache done. Now do I-cache and return. 234 235 # At this point, 236 # a1 = primary Icache line size in bytes 237 # a2 = primary Icache size in bytes 238 # t1 = a round unmapped cached base address (we are in kernel mode) 239 # t2,t3 scratch 240 241 add t2, t1, a2 # t2=end adress+1 of Icache 242 sub t2, t2, a1 # t2=address of last line in Icache 2437: 244 cache INDEX_INVALIDATE_I,0(t1) 245 bne t1, t2, 7b 246 addu t1, a1 # (delay slot) increment by Icache line size 247 248 j ra # return to the caller 249 nop 250 2518: 252 253# Vr5432 version of the cpu_flush code. 254# (The distinctions being: CONFIG can not be trusted about secondary 255# cache (which does not exist). The primary caches use Virtual Address Bit 0 256# to control set selection. 257 258# Code does not consult CONFIG about cache sizes: knows the hardwired sizes. 259# Since both I and D have the same size and line size, uses a merged loop. 260 261 li a0, VR5432_LINE 262 li a1, VR5432_SIZE 263 lui t1, ((K0BASE >> 16) & 0xFFFF) 264 ori t1, t1, (K0BASE & 0xFFFF) 265 266 # a0 = cache line size in bytes 267 # a1 = 1/2 cache size in bytes 268 # t1 = a round unmapped cached base address (we are in kernel mode) 269 270 add t2, t1, a1 # t2=end address+1 271 sub t2, t2, a0 # t2=address of last line in Icache 272 2739: 274 cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0 275 cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1 276 cache INDEX_INVALIDATE_I,0(t1) # set 0 277 cache INDEX_INVALIDATE_I,1(t1) # set 1 278 bne t1, t2, 9b 279 addu t1, a0 280 281 j ra # return to the caller 282 nop 283 .set reorder 284 .end __cpu_flush 285 286 # NOTE: This variable should *NOT* be addressed relative to 287 # the $gp register since this code is executed before $gp is 288 # initialised... hence we leave it in the text area. This will 289 # cause problems if this routine is ever ROMmed: 290 291 .globl __buserr_cnt 292__buserr_cnt: 293 .word 0 294 .align 3 295__k1_save: 296 .word 0 297 .word 0 298 .align 2 299 300 .ent __buserr 301 .globl __buserr 302__buserr: 303 .set noat 304 .set noreorder 305 # k0 and k1 available for use: 306 mfc0 k0,C0_CAUSE 307 nop 308 nop 309 andi k0,k0,0x7c 310 sub k0,k0,7 << 2 311 beq k0,$0,__buserr_do 312 nop 313 # call the previous handler 314 la k0,__previous 315 jr k0 316 nop 317 # 318__buserr_do: 319 # TODO: check that the cause is indeed a bus error 320 # - if not then just jump to the previous handler 321 la k0,__k1_save 322 sd k1,0(k0) 323 # 324 la k1,__buserr_cnt 325 lw k0,0(k1) # increment counter 326 addu k0,1 327 sw k0,0(k1) 328 # 329 la k0,__k1_save 330 ld k1,0(k0) 331 # 332 mfc0 k0,C0_EPC 333 nop 334 nop 335 addu k0,k0,4 # skip offending instruction 336 mtc0 k0,C0_EPC # update EPC 337 nop 338 nop 339 eret 340# j k0 341# rfe 342 .set reorder 343 .set at 344 .end __buserr 345 346__exception_code: 347 .set noreorder 348 lui k0,%hi(__buserr) 349 daddiu k0,k0,%lo(__buserr) 350 jr k0 351 nop 352 .set reorder 353__exception_code_end: 354 355 .data 356__previous: 357 .space (__exception_code_end - __exception_code) 358 # This subtracting two addresses is working 359 # but is not garenteed to continue working. 360 # The assemble reserves the right to put these 361 # two labels into different frags, and then 362 # cant take their difference. 363 364 .text 365 366 .ent __default_buserr_handler 367 .globl __default_buserr_handler 368__default_buserr_handler: 369 .set noreorder 370 # attach our simple bus error handler: 371 # in: void 372 # out: void 373 mfc0 a0,C0_SR 374 nop 375 li a1,SR_BEV 376 and a1,a1,a0 377 beq a1,$0,baseaddr 378 lui a0,0x8000 # delay slot 379 lui a0,0xbfc0 380 daddiu a0,a0,0x0200 381baseaddr: 382 daddiu a0,a0,0x0180 383 # a0 = base vector table address 384 la a1,__exception_code_end 385 la a2,__exception_code 386 subu a1,a1,a2 387 la a3,__previous 388 # there must be a better way of doing this???? 389copyloop: 390 lw v0,0(a0) 391 sw v0,0(a3) 392 lw v0,0(a2) 393 sw v0,0(a0) 394 daddiu a0,a0,4 395 daddiu a2,a2,4 396 daddiu a3,a3,4 397 subu a1,a1,4 398 bne a1,$0,copyloop 399 nop 400 la a0,__buserr_cnt 401 sw $0,0(a0) 402 j ra 403 nop 404 .set reorder 405 .end __default_buserr_handler 406 407 .ent __restore_buserr_handler 408 .globl __restore_buserr_handler 409__restore_buserr_handler: 410 .set noreorder 411 # restore original (monitor) bus error handler 412 # in: void 413 # out: void 414 mfc0 a0,C0_SR 415 nop 416 li a1,SR_BEV 417 and a1,a1,a0 418 beq a1,$0,res_baseaddr 419 lui a0,0x8000 # delay slot 420 lui a0,0xbfc0 421 daddiu a0,a0,0x0200 422res_baseaddr: 423 daddiu a0,a0,0x0180 424 # a0 = base vector table address 425 la a1,__exception_code_end 426 la a3,__exception_code 427 subu a1,a1,a3 428 la a3,__previous 429 # there must be a better way of doing this???? 430res_copyloop: 431 lw v0,0(a3) 432 sw v0,0(a0) 433 daddiu a0,a0,4 434 daddiu a3,a3,4 435 subu a1,a1,4 436 bne a1,$0,res_copyloop 437 nop 438 j ra 439 nop 440 .set reorder 441 .end __restore_buserr_handler 442 443 .ent __buserr_count 444 .globl __buserr_count 445__buserr_count: 446 .set noreorder 447 # restore original (monitor) bus error handler 448 # in: void 449 # out: unsigned int __buserr_cnt 450 la v0,__buserr_cnt 451 lw v0,0(v0) 452 j ra 453 nop 454 .set reorder 455 .end __buserr_count 456 457/* EOF vr5xxx.S */ 458