1/* $NetBSD: bzero.S,v 1.11 2011/01/29 02:21:20 matt Exp $ */ 2 3/*- 4 * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29/*----------------------------------------------------------------------*/ 30 31#include <machine/asm.h> 32 33 34#if defined(LIBC_SCCS) && !defined(lint) 35__RCSID("$NetBSD: bzero.S,v 1.11 2011/01/29 02:21:20 matt Exp $") 36#endif /* LIBC_SCCS && !lint */ 37 38#ifdef _KERNEL 39#include <assym.h> 40#endif 41 42#define USE_STSWX 0 /* don't. slower than trivial copy loop */ 43 44/*----------------------------------------------------------------------*/ 45/* 46 void bzero(void *b %r3, size_t len %r4); 47 void * memset(void *b %r3, int c %r4, size_t len %r5); 48*/ 49/*----------------------------------------------------------------------*/ 50 51#define r_dst %r3 52#define r_len %r4 53#define r_val %r0 54 55 .text 56 .align 4 57ENTRY(bzero) 58 li r_val, 0 /* Value to stuff in */ 59 b cb_memset 60END(bzero) 61 62ENTRY(memset) 63 cmplwi %cr1, %r5, 0 64 mr. %r0, %r4 65 mr %r8, %r3 66 beqlr- %cr1 /* Nothing to do */ 67 68 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */ 69 rlwimi %r0, %r0, 16, 0, 15 70 mr %r4, %r5 71 bne- simple_fill /* =! 0, use trivial fill */ 72cb_memset: 73 74/*----------------------------------------------------------------------*/ 75#ifndef _KERNEL 76 /* First find out cache line size */ 77 mflr %r9 78#ifdef PIC 79 PIC_GOTSETUP(%r10) 80 mtlr %r9 81 lwz %r5,cache_info@got(%r10) 82#else 83 lis %r5,cache_info@h 84 ori %r5,%r5,cache_info@l 85#endif 86 lwz %r6, 4(%r5) 87 cmpwi %r6, -1 88 bne+ cb_cacheline_known 89 90/*----------------------------------------------------------------------*/ 91#define CTL_MACHDEP 7 92#define CPU_CACHELINE 1 93#define CPU_CACHEINFO 5 94 95#define STKFRAME_SZ 64 96#define MIB 8 97#define OLDPLEN 16 98#define R3_SAVE 20 99#define R4_SAVE 24 100#define R0_SAVE 28 101#define R8_SAVE 32 102#define R31_SAVE 36 103#ifdef PIC 104#define R30_SAVE 40 105#endif 106 107 stw %r9, 4(%r1) 108 stwu %r1, -STKFRAME_SZ(%r1) 109 110 stw %r31, R31_SAVE(%r1) 111 mr %r31, %r5 /* cache info */ 112 113#ifdef PIC 114 stw %r30, R30_SAVE(%r1) 115 PIC_TOCSETUP(cb_memset,%r30) 116#endif 117 118 stw %r8, R8_SAVE(%r1) 119 stw %r3, R3_SAVE(%r1) 120 stw %r4, R4_SAVE(%r1) 121 stw %r0, R0_SAVE(%r1) 122 123 li %r0, CTL_MACHDEP /* Construct MIB */ 124 stw %r0, MIB(%r1) 125 li %r0, CPU_CACHEINFO 126 stw %r0, MIB+4(%r1) 127 128 li %r0, 4*4 /* Oldlenp := 4*4 */ 129 stw %r0, OLDPLEN(%r1) 130 131 addi %r3, %r1, MIB 132 li %r4, 2 /* namelen */ 133 /* %r5 already contains &cache_info */ 134 addi %r6, %r1, OLDPLEN 135 li %r7, 0 136 li %r8, 0 137 bl PIC_PLT(_C_LABEL(sysctl)) 138 139 cmpwi %r3, 0 /* Check result */ 140 beq 1f 141 142 /* Failure, try older sysctl */ 143 144 li %r0, CTL_MACHDEP /* Construct MIB */ 145 stw %r0, MIB(%r1) 146 li %r0, CPU_CACHELINE 147 stw %r0, MIB+4(%r1) 148 149 li %r0, 4 /* Oldlenp := 4 */ 150 stw %r0, OLDPLEN(%r1) 151 152 addi %r3, %r1, MIB 153 li %r4, 2 /* namelen */ 154 addi %r5, %r31, 4 155 addi %r6, %r1, OLDPLEN 156 li %r7, 0 157 li %r8, 0 158 bl PIC_PLT(_C_LABEL(sysctl)) 1591: 160 lwz %r3, R3_SAVE(%r1) 161 lwz %r4, R4_SAVE(%r1) 162 lwz %r8, R8_SAVE(%r1) 163 lwz %r0, R0_SAVE(%r1) 164 lwz %r9, 4(%r31) 165 lwz %r31, R31_SAVE(%r1) 166#ifdef PIC 167 lwz %r30, R30_SAVE(%r1) 168#endif 169 addi %r1, %r1, STKFRAME_SZ 170 lwz %r7, 4(%r1) 171 mtlr %r7 172 173 cntlzw %r6, %r9 /* compute shift value */ 174 li %r5, 31 175 subf %r5, %r6, %r5 176 177#ifdef PIC 178 mflr %r9 179 PIC_GOTSETUP(%r10) 180 mtlr %r9 181 lwz %r6, cache_sh@got(%r10) 182 stw %r5, 0(%r6) 183#else 184 lis %r6, cache_sh@ha 185 stw %r5, cache_sh@l(%r6) 186#endif 187/*----------------------------------------------------------------------*/ 188/* Okay, we know the cache line size (%r9) and shift value (%r10) */ 189cb_cacheline_known: 190#ifdef PIC 191 lwz %r5, cache_info@got(%r10) 192 lwz %r9, 4(%r5) 193 lwz %r5, cache_sh@got(%r10) 194 lwz %r10, 0(%r5) 195#else 196 lis %r9, cache_info+4@ha 197 lwz %r9, cache_info+4@l(%r9) 198 lis %r10, cache_sh@ha 199 lwz %r10, cache_sh@l(%r10) 200#endif 201 202#else /* _KERNEL */ 203#ifdef MULTIPROCESSOR 204 mfsprg %r10, 0 /* Get cpu_info pointer */ 205#else 206 lis %r10, cpu_info_store@ha 207 addi %r10, %r10, cpu_info_store@l 208#endif 209 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */ 210 cntlzw %r10, %r9 /* Calculate shift.. */ 211 li %r6, 31 212 subf %r10, %r10, %r6 213#endif /* _KERNEL */ 214 /* Back in memory filling business */ 215 216 cmplwi %cr1, r_len, 0 /* Nothing to do? */ 217 add %r5, %r9, %r9 218 cmplw r_len, %r5 /* <= 2*CL bytes to move? */ 219 beqlr- %cr1 /* then do nothing */ 220 221 blt+ simple_fill /* a trivial fill routine */ 222 223 /* Word align the block, fill bytewise until dst even*/ 224 225 andi. %r5, r_dst, 0x03 226 li %r6, 4 227 beq+ cb_aligned_w /* already aligned to word? */ 228 229 subf %r5, %r5, %r6 /* bytes to fill to align4 */ 230#if USE_STSWX 231 mtxer %r5 232 stswx %r0, 0, r_dst 233 add r_dst, %r5, r_dst 234#else 235 mtctr %r5 236 237 subi r_dst, r_dst, 1 2381: stbu r_val, 1(r_dst) /* Fill bytewise */ 239 bdnz 1b 240 241 addi r_dst, r_dst, 1 242#endif 243 subf r_len, %r5, r_len 244 245cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ 246 247 /* I know I have something to do since we had > 2*CL initially */ 248 /* so no need to check for r_len = 0 */ 249 250 subi %r6, %r9, 1 /* CL mask */ 251 and. %r5, r_dst, %r6 252 srwi %r5, %r5, 2 253 srwi %r6, %r9, 2 254 beq cb_aligned_cb /* already on CL boundary? */ 255 256 subf %r5, %r5, %r6 /* words to fill to alignment */ 257 mtctr %r5 258 slwi %r5, %r5, 2 259 subf r_len, %r5, r_len 260 261 subi r_dst, r_dst, 4 2621: stwu r_val, 4(r_dst) /* Fill wordwise */ 263 bdnz 1b 264 addi r_dst, r_dst, 4 265 266cb_aligned_cb: /* no need to check r_len, see above */ 267 268 srw. %r5, r_len, %r10 /* Number of cache blocks */ 269 mtctr %r5 270 beq cblocks_done 271 272 slw %r5, %r5, %r10 273 subf r_len, %r5, r_len 274 2751: dcbz 0, r_dst /* Clear blockwise */ 276 add r_dst, r_dst, %r9 277 bdnz 1b 278 279cblocks_done: /* still CL aligned, but less than CL bytes left */ 280 cmplwi %cr1, r_len, 0 281 cmplwi r_len, 8 282 beq- %cr1, sf_return 283 284 blt- sf_bytewise /* <8 remaining? */ 285 b sf_aligned_w 286 287/*----------------------------------------------------------------------*/ 288wbzero: li r_val, 0 289 290 cmplwi r_len, 0 291 beqlr- /* Nothing to do */ 292 293simple_fill: 294#if USE_STSWX 295 cmplwi %cr1, r_len, 12 /* < 12 bytes to move? */ 296#else 297 cmplwi %cr1, r_len, 8 /* < 8 bytes to move? */ 298#endif 299 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */ 300 blt %cr1, sf_bytewise /* trivial byte mover */ 301 302 li %r6, 4 303 subf %r5, %r5, %r6 304 beq+ sf_aligned_w /* dest is word aligned */ 305 306#if USE_STSWX 307 mtxer %r5 308 stswx %r0, 0, r_dst 309 add r_dst, %r5, r_dst 310#else 311 mtctr %r5 /* nope, then fill bytewise */ 312 subi r_dst, r_dst, 1 /* until it is */ 3131: stbu r_val, 1(r_dst) 314 bdnz 1b 315 316 addi r_dst, r_dst, 1 317#endif 318 subf r_len, %r5, r_len 319 320sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ 321#if USE_STSWX 322 mr %r6, %r0 323 mr %r7, %r0 324 325 srwi %r5, r_len, 3 326 mtctr %r5 327 328 slwi %r5, %r5, 3 /* adjust len */ 329 subf. r_len, %r5, r_len 330 3311: stswi %r6, r_dst, 8 332 addi r_dst, r_dst, 8 333 bdnz 1b 334#else 335 srwi %r5, r_len, 2 /* words to fill */ 336 mtctr %r5 337 338 slwi %r5, %r5, 2 339 subf. r_len, %r5, r_len /* adjust len for fill */ 340 341 subi r_dst, r_dst, 4 3421: stwu r_val, 4(r_dst) 343 bdnz 1b 344 addi r_dst, r_dst, 4 345#endif 346 347sf_word_done: bne- sf_bytewise 348 349sf_return: mr %r3, %r8 /* restore orig ptr */ 350 blr /* for memset functionality */ 351 352sf_bytewise: 353#if USE_STSWX 354 mr %r5, %r0 355 mr %r6, %r0 356 mr %r7, %r0 357 358 mtxer r_len 359 stswx %r5, 0, r_dst 360#else 361 mtctr r_len 362 363 subi r_dst, r_dst, 1 3641: stbu r_val, 1(r_dst) 365 bdnz 1b 366#endif 367 mr %r3, %r8 /* restore orig ptr */ 368 blr /* for memset functionality */ 369END(memset) 370 371/*----------------------------------------------------------------------*/ 372#ifndef _KERNEL 373 .data 374cache_info: .long -1, -1, -1, -1 375cache_sh: .long 0 376 377#endif 378/*----------------------------------------------------------------------*/ 379