1/* $NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $ */ 2 3/* 4 * Copyright (c) 1996-2002 Eduardo Horvath 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 */ 26#include "strmacros.h" 27#if defined(LIBC_SCCS) && !defined(lint) 28RCSID("$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $") 29#endif /* LIBC_SCCS and not lint */ 30 31 32/* 33 * XXXXXXXXXXXXXXXXXXXX 34 * We need to make sure that this doesn't use floating point 35 * before our trap handlers are installed or we could panic 36 * XXXXXXXXXXXXXXXXXXXX 37 */ 38/* 39 * memset(addr, c, len) 40 * 41 * We want to use VIS instructions if we're clearing out more than 42 * 256 bytes, but to do that we need to properly save and restore the 43 * FP registers. Unfortunately the code to do that in the kernel needs 44 * to keep track of the current owner of the FPU, hence the different 45 * code. 46 * 47 * XXXXX To produce more efficient code, we do not allow lengths 48 * greater than 0x80000000000000000, which are negative numbers. 49 * This should not really be an issue since the VA hole should 50 * cause any such ranges to fail anyway. 51 */ 52#if !defined(_KERNEL) || defined(_RUMPKERNEL) 53ENTRY(bzero) 54 ! %o0 = addr, %o1 = len 55 mov %o1, %o2 56 clr %o1 ! ser pattern 57#endif 58ENTRY(memset) 59 ! %o0 = addr, %o1 = pattern, %o2 = len 60 mov %o0, %o4 ! Save original pointer 61 62Lmemset_internal: 63 btst 7, %o0 ! Word aligned? 64 bz,pn %xcc, 0f 65 nop 66 inc %o0 67 deccc %o2 ! Store up to 7 bytes 68 bge,a,pt CCCR, Lmemset_internal 69 stb %o1, [%o0 - 1] 70 71 retl ! Duplicate Lmemset_done 72 mov %o4, %o0 730: 74 /* 75 * Duplicate the pattern so it fills 64-bits. 76 */ 77 andcc %o1, 0x0ff, %o1 ! No need to extend zero 78 bz,pt %icc, 1f 79 sllx %o1, 8, %o3 ! sigh. all dependent insns. 80 or %o1, %o3, %o1 81 sllx %o1, 16, %o3 82 or %o1, %o3, %o1 83 sllx %o1, 32, %o3 84 or %o1, %o3, %o1 851: 86#ifdef USE_BLOCK_STORE_LOAD 87 !! Now we are 64-bit aligned 88 cmp %o2, 256 ! Use block clear if len > 256 89 bge,pt CCCR, Lmemset_block ! use block store insns 90#endif /* USE_BLOCK_STORE_LOAD */ 91 deccc 8, %o2 92Lmemset_longs: 93 bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left 94 nop 953: 96 inc 8, %o0 97 deccc 8, %o2 98 bge,pt CCCR, 3b 99 stx %o1, [%o0 - 8] ! Do 1 longword at a time 100 101 /* 102 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero, 103 * -6 => two bytes, etc. Mop up this remainder, if any. 104 */ 105Lmemset_cleanup: 106 btst 4, %o2 107 bz,pt CCCR, 5f ! if (len & 4) { 108 nop 109 stw %o1, [%o0] ! *(int *)addr = 0; 110 inc 4, %o0 ! addr += 4; 1115: 112 btst 2, %o2 113 bz,pt CCCR, 7f ! if (len & 2) { 114 nop 115 sth %o1, [%o0] ! *(short *)addr = 0; 116 inc 2, %o0 ! addr += 2; 1177: 118 btst 1, %o2 119 bnz,a %icc, Lmemset_done ! if (len & 1) 120 stb %o1, [%o0] ! *addr = 0; 121Lmemset_done: 122 retl 123 mov %o4, %o0 ! Restore ponter for memset (ugh) 124 125#ifdef USE_BLOCK_STORE_LOAD 126Lmemset_block: 127 sethi %hi(block_disable), %o3 128 ldx [ %o3 + %lo(block_disable) ], %o3 129 brnz,pn %o3, Lmemset_longs 130 !! Make sure our trap table is installed 131 set _C_LABEL(trapbase), %o5 132 rdpr %tba, %o3 133 sub %o3, %o5, %o3 134 brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store 135 nop 136/* 137 * Kernel: 138 * 139 * Here we use VIS instructions to do a block clear of a page. 140 * But before we can do that we need to save and enable the FPU. 141 * The last owner of the FPU registers is fplwp, and 142 * fplwp->l_md.md_fpstate is the current fpstate. If that's not 143 * null, call savefpstate() with it to store our current fp state. 144 * 145 * Next, allocate an aligned fpstate on the stack. We will properly 146 * nest calls on a particular stack so this should not be a problem. 147 * 148 * Now we grab either curlwp (or if we're on the interrupt stack 149 * lwp0). We stash its existing fpstate in a local register and 150 * put our new fpstate in curlwp->p_md.md_fpstate. We point 151 * fplwp at curlwp (or lwp0) and enable the FPU. 152 * 153 * If we are ever preempted, our FPU state will be saved in our 154 * fpstate. Then, when we're resumed and we take an FPDISABLED 155 * trap, the trap handler will be able to fish our FPU state out 156 * of curlwp (or lwp0). 157 * 158 * On exiting this routine we undo the damage: restore the original 159 * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 160 * the MMU. 161 * 162 */ 163 164 ENABLE_FPU(0) 165 166 !! We are now 8-byte aligned. We need to become 64-byte aligned. 167 btst 63, %i0 168 bz,pt CCCR, 2f 169 nop 1701: 171 stx %i1, [%i0] 172 inc 8, %i0 173 btst 63, %i0 174 bnz,pt %xcc, 1b 175 dec 8, %i2 176 1772: 178 brz %i1, 3f ! Skip the memory op 179 fzero %f0 ! if pattern is 0 180 181#ifdef _LP64 182 stx %i1, [%i0] ! Flush this puppy to RAM 183 membar #StoreLoad 184 ldd [%i0], %f0 185#else 186 stw %i1, [%i0] ! Flush this puppy to RAM 187 membar #StoreLoad 188 ld [%i0], %f0 189 fmovsa %icc, %f0, %f1 190#endif 191 1923: 193 fmovd %f0, %f2 ! Duplicate the pattern 194 fmovd %f0, %f4 195 fmovd %f0, %f6 196 fmovd %f0, %f8 197 fmovd %f0, %f10 198 fmovd %f0, %f12 199 fmovd %f0, %f14 200 201 !! Remember: we were 8 bytes too far 202 dec 56, %i2 ! Go one iteration too far 2035: 204 stda %f0, [%i0] ASI_STORE ! Store 64 bytes 205 deccc BLOCK_SIZE, %i2 206 bg,pt %icc, 5b 207 inc BLOCK_SIZE, %i0 208 209 membar #Sync 210/* 211 * We've saved our possible fpstate, now disable the fpu 212 * and continue with life. 213 */ 214 RESTORE_FPU 215 addcc %i2, 56, %i2 ! Restore the count 216 ba,pt %xcc, Lmemset_longs ! Finish up the remainder 217 restore 218#endif /* USE_BLOCK_STORE_LOAD */ 219