1/* $NetBSD: bzero.S,v 1.5 2002/07/30 06:07:58 matt Exp $ */ 2 3/*- 4 * Copyright (C) 2001 Martin J. Laubach <mjl@netbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29/*----------------------------------------------------------------------*/ 30 31#include <machine/asm.h> 32#ifdef _KERNEL 33#include <assym.h> 34#endif 35 36#define USE_STSWX 0 /* don't. slower than trivial copy loop */ 37 38/*----------------------------------------------------------------------*/ 39/* 40 void bzero(void *b %r3, size_t len %r4); 41 void * memset(void *b %r3, int c %r4, size_t len %r5); 42*/ 43/*----------------------------------------------------------------------*/ 44 45#define r_dst %r3 46#define r_len %r4 47#define r_val %r0 48 49 .text 50 .align 4 51ENTRY(bzero) 52 li r_val, 0 /* Value to stuff in */ 53 b cb_memset 54 55ENTRY(memset) 56 cmplwi cr1, %r5, 0 57 mr. %r0, %r4 58 mr %r8, %r3 59 beqlr- cr1 /* Nothing to do */ 60 61 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */ 62 rlwimi %r0, %r0, 16, 0, 15 63 mr %r4, %r5 64 bne- simple_fill /* =! 0, use trivial fill */ 65cb_memset: 66 67/*----------------------------------------------------------------------*/ 68#ifndef _KERNEL 69 /* First find out cache line size */ 70#ifdef PIC 71 mflr %r9 72 bl _GLOBAL_OFFSET_TABLE_@local-4 73 mflr %r10 74 mtlr %r9 75 lwz %r5,cache_info@got(%r10) 76#else 77 lis %r5,cache_info@h 78 ori %r5,%r5,cache_info@l 79#endif 80 lwz %r6, 4(%r5) 81 cmpwi %r6, -1 82 bne+ cb_cacheline_known 83 84/*----------------------------------------------------------------------*/ 85#define CTL_MACHDEP 7 86#define CPU_CACHELINE 1 87#define CPU_CACHEINFO 5 88 89#define STKFRAME_SZ 48 90#define MIB 8 91#define OLDPLEN 16 92#define R3_SAVE 20 93#define R4_SAVE 24 94#define R0_SAVE 28 95#define R8_SAVE 32 96 97 mflr %r6 98 stw %r6, 4(%r1) 99 stwu %r1, -STKFRAME_SZ(%r1) 100 101 stw %r8, R8_SAVE(%r1) 102 stw %r3, R3_SAVE(%r1) 103 stw %r4, R4_SAVE(%r1) 104 stw %r0, R0_SAVE(%r1) 105 106 107 108 li %r0, CTL_MACHDEP /* Construct MIB */ 109 stw %r0, MIB(%r1) 110 li %r0, CPU_CACHEINFO 111 stw %r0, MIB+4(%r1) 112 113 li %r0, 4*4 /* Oldlenp := 4*4 */ 114 stw %r0, OLDPLEN(%r1) 115 116 addi %r3, %r1, MIB 117 li %r4, 2 /* namelen */ 118 /* %r5 already contains &cache_info */ 119 addi %r6, %r1, OLDPLEN 120 li %r7, 0 121 li %r8, 0 122 bl PIC_PLT(_C_LABEL(sysctl)) 123 124 cmpwi %r3, 0 /* Check result */ 125 beq 1f 126 127 /* Failure, try older sysctl */ 128 129 li %r0, CTL_MACHDEP /* Construct MIB */ 130 stw %r0, MIB(%r1) 131 li %r0, CPU_CACHELINE 132 stw %r0, MIB+4(%r1) 133 134 li %r0, 4 /* Oldlenp := 4 */ 135 stw %r0, OLDPLEN(%r1) 136 137 addi %r3, %r1, MIB 138 li %r4, 2 /* namelen */ 139#ifdef PIC 140 mflr %r9 141 bl _GLOBAL_OFFSET_TABLE_@local-4 142 mflr %r10 143 mtlr %r9 144 lwz %r5,cache_info@got(%r10) 145 addi %r5, %r5, 4 146#else 147 lis %r5,cache_info+4@h 148 ori %r5,%r5,cache_info+4@l 149#endif 150 addi %r6, %r1, OLDPLEN 151 li %r7, 0 152 li %r8, 0 153 bl PIC_PLT(_C_LABEL(sysctl)) 1541: 155 lwz %r8, R8_SAVE(%r1) 156 lwz %r3, R3_SAVE(%r1) 157 lwz %r4, R4_SAVE(%r1) 158 lwz %r0, R0_SAVE(%r1) 159 160#ifdef PIC 161 bl _GLOBAL_OFFSET_TABLE_@local-4 162 mflr %r10 163 lwz %r9, cache_info@got(%r10) 164 lwz %r9, 4(%r9) 165#else 166 lis %r5, cache_info+4@ha 167 lwz %r9, cache_info+4@l(%r5) 168#endif 169 la %r1, STKFRAME_SZ(%r1) 170 lwz %r5, 4(%r1) 171 mtlr %r5 172 173 cntlzw %r6, %r9 /* compute shift value */ 174 li %r5, 31 175 subf %r5, %r6, %r5 176 177#ifdef PIC 178 lwz %r6, cache_sh@got(%r10) 179 stw %r5, 0(%r6) 180#else 181 lis %r6, cache_sh@ha 182 stw %r5, cache_sh@l(%r6) 183#endif 184/*----------------------------------------------------------------------*/ 185/* Okay, we know the cache line size (%r9) and shift value (%r10) */ 186cb_cacheline_known: 187#ifdef PIC 188 lwz %r5, cache_info@got(%r10) 189 lwz %r9, 4(%r5) 190 lwz %r5, cache_sh@got(%r10) 191 lwz %r10, 0(%r5) 192#else 193 lis %r9, cache_info+4@ha 194 lwz %r9, cache_info+4@l(%r9) 195 lis %r10, cache_sh@ha 196 lwz %r10, cache_sh@l(%r10) 197#endif 198 199#else /* _KERNEL */ 200#ifdef MULTIPROCESSOR 201 mfsprg %r10, 0 /* Get cpu_info pointer */ 202#else 203 lis %r10, cpu_info_store@ha 204 addi %r10, %r10, cpu_info_store@l 205#endif 206 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */ 207 cntlzw %r10, %r9 /* Calculate shift.. */ 208 li %r6, 31 209 subf %r10, %r10, %r6 210#endif /* _KERNEL */ 211 /* Back in memory filling business */ 212 213 cmplwi cr1, r_len, 0 /* Nothing to do? */ 214 add %r5, %r9, %r9 215 cmplw r_len, %r5 /* <= 2*CL bytes to move? */ 216 beqlr- cr1 /* then do nothing */ 217 218 blt+ simple_fill /* a trivial fill routine */ 219 220 /* Word align the block, fill bytewise until dst even*/ 221 222 andi. %r5, r_dst, 0x03 223 li %r6, 4 224 beq+ cb_aligned_w /* already aligned to word? */ 225 226 subf %r5, %r5, %r6 /* bytes to fill to align4 */ 227#if USE_STSWX 228 mtxer %r5 229 stswx %r0, 0, r_dst 230 add r_dst, %r5, r_dst 231#else 232 mtctr %r5 233 234 subi r_dst, r_dst, 1 2351: stbu r_val, 1(r_dst) /* Fill bytewise */ 236 bdnz 1b 237 238 addi r_dst, r_dst, 1 239#endif 240 subf r_len, %r5, r_len 241 242cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ 243 244 /* I know I have something to do since we had > 2*CL initially */ 245 /* so no need to check for r_len = 0 */ 246 247 rlwinm. %r5, r_dst, 30, 29, 31 248 srwi %r6, %r9, 2 249 beq cb_aligned_cb /* already on CL boundary? */ 250 251 subf %r5, %r5, %r6 /* words to fill to alignment */ 252 mtctr %r5 253 slwi %r5, %r5, 2 254 subf r_len, %r5, r_len 255 256 subi r_dst, r_dst, 4 2571: stwu r_val, 4(r_dst) /* Fill wordwise */ 258 bdnz 1b 259 addi r_dst, r_dst, 4 260 261cb_aligned_cb: /* no need to check r_len, see above */ 262 263 srw. %r5, r_len, %r10 /* Number of cache blocks */ 264 mtctr %r5 265 beq cblocks_done 266 267 slw %r5, %r5, %r10 268 subf r_len, %r5, r_len 269 2701: dcbz 0, r_dst /* Clear blockwise */ 271 add r_dst, r_dst, %r9 272 bdnz 1b 273 274cblocks_done: /* still CL aligned, but less than CL bytes left */ 275 cmplwi cr1, r_len, 0 276 cmplwi r_len, 8 277 beq- cr1, sf_return 278 279 blt- sf_bytewise /* <8 remaining? */ 280 b sf_aligned_w 281 282/*----------------------------------------------------------------------*/ 283wbzero: li r_val, 0 284 285 cmplwi r_len, 0 286 beqlr- /* Nothing to do */ 287 288simple_fill: 289#if USE_STSWX 290 cmplwi cr1, r_len, 12 /* < 12 bytes to move? */ 291#else 292 cmplwi cr1, r_len, 8 /* < 8 bytes to move? */ 293#endif 294 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */ 295 blt cr1, sf_bytewise /* trivial byte mover */ 296 297 li %r6, 4 298 subf %r5, %r5, %r6 299 beq+ sf_aligned_w /* dest is word aligned */ 300 301#if USE_STSWX 302 mtxer %r5 303 stswx %r0, 0, r_dst 304 add r_dst, %r5, r_dst 305#else 306 mtctr %r5 /* nope, then fill bytewise */ 307 subi r_dst, r_dst, 1 /* until it is */ 3081: stbu r_val, 1(r_dst) 309 bdnz 1b 310 311 addi r_dst, r_dst, 1 312#endif 313 subf r_len, %r5, r_len 314 315sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ 316#if USE_STSWX 317 mr %r6, %r0 318 mr %r7, %r0 319 320 srwi %r5, r_len, 3 321 mtctr %r5 322 323 slwi %r5, %r5, 3 /* adjust len */ 324 subf. r_len, %r5, r_len 325 3261: stswi %r6, r_dst, 8 327 addi r_dst, r_dst, 8 328 bdnz 1b 329#else 330 srwi %r5, r_len, 2 /* words to fill */ 331 mtctr %r5 332 333 slwi %r5, %r5, 2 334 subf. r_len, %r5, r_len /* adjust len for fill */ 335 336 subi r_dst, r_dst, 4 3371: stwu r_val, 4(r_dst) 338 bdnz 1b 339 addi r_dst, r_dst, 4 340#endif 341 342sf_word_done: bne- sf_bytewise 343 344sf_return: mr %r3, %r8 /* restore orig ptr */ 345 blr /* for memset functionality */ 346 347sf_bytewise: 348#if USE_STSWX 349 mr %r5, %r0 350 mr %r6, %r0 351 mr %r7, %r0 352 353 mtxer r_len 354 stswx %r5, 0, r_dst 355#else 356 mtctr r_len 357 358 subi r_dst, r_dst, 1 3591: stbu r_val, 1(r_dst) 360 bdnz 1b 361#endif 362 mr %r3, %r8 /* restore orig ptr */ 363 blr /* for memset functionality */ 364 365/*----------------------------------------------------------------------*/ 366#ifndef _KERNEL 367 .data 368cache_info: .long -1, -1, -1, -1 369cache_sh: .long 0 370 371#endif 372/*----------------------------------------------------------------------*/ 373