1/* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */ 2 3/*- 4 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30 31#if defined(LIBC_SCCS) && !defined(lint) 32 RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $") 33#endif 34 35#define REG_PTR r0 36#define REG_TMP1 r1 37 38#ifdef BZERO 39# define REG_C r2 40# define REG_DST r4 41# define REG_LEN r5 42#else 43# define REG_DST0 r3 44# define REG_DST r4 45# define REG_C r5 46# define REG_LEN r6 47#endif 48 49#ifdef BZERO 50ENTRY(bzero) 51#else 52ENTRY(memset) 53 mov REG_DST,REG_DST0 /* for return value */ 54#endif 55 /* small amount to fill ? */ 56 mov #28,REG_TMP1 57 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 58 bt/s large 59 mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 60 cmp/hs REG_TMP1,REG_LEN 61 bt/s small 62#ifdef BZERO 63 mov #0,REG_C 64#endif 65 /* very little fill (0 ~ 11 bytes) */ 66 tst REG_LEN,REG_LEN 67 add REG_DST,REG_LEN 68 bt/s done 69 add #1,REG_DST 70 71 /* unroll 4 loops */ 72 cmp/eq REG_DST,REG_LEN 731: mov.b REG_C,@-REG_LEN 74 bt/s done 75 cmp/eq REG_DST,REG_LEN 76 mov.b REG_C,@-REG_LEN 77 bt/s done 78 cmp/eq REG_DST,REG_LEN 79 mov.b REG_C,@-REG_LEN 80 bt/s done 81 cmp/eq REG_DST,REG_LEN 82 mov.b REG_C,@-REG_LEN 83 bf/s 1b 84 cmp/eq REG_DST,REG_LEN 85done: 86#ifdef BZERO 87 rts 88 nop 89#else 90 rts 91 mov REG_DST0,r0 92#endif 93 94 95small: 96 mov REG_DST,r0 97 tst #1,r0 98 bt/s small_aligned 99 mov REG_DST,REG_TMP1 100 shll REG_LEN 101 mova 1f,r0 /* 1f must be 4bytes aligned! */ 102 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 103 sub REG_LEN,r0 104 jmp @r0 105 mov REG_C,r0 106 107 .align 2 108 mov.b r0,@(15,REG_TMP1) 109 mov.b r0,@(14,REG_TMP1) 110 mov.b r0,@(13,REG_TMP1) 111 mov.b r0,@(12,REG_TMP1) 112 mov.b r0,@(11,REG_TMP1) 113 mov.b r0,@(10,REG_TMP1) 114 mov.b r0,@(9,REG_TMP1) 115 mov.b r0,@(8,REG_TMP1) 116 mov.b r0,@(7,REG_TMP1) 117 mov.b r0,@(6,REG_TMP1) 118 mov.b r0,@(5,REG_TMP1) 119 mov.b r0,@(4,REG_TMP1) 120 mov.b r0,@(3,REG_TMP1) 121 mov.b r0,@(2,REG_TMP1) 122 mov.b r0,@(1,REG_TMP1) 123 mov.b r0,@REG_TMP1 124 mov.b r0,@(15,REG_DST) 125 mov.b r0,@(14,REG_DST) 126 mov.b r0,@(13,REG_DST) 127 mov.b r0,@(12,REG_DST) 128 mov.b r0,@(11,REG_DST) 129 mov.b r0,@(10,REG_DST) 130 mov.b r0,@(9,REG_DST) 131 mov.b r0,@(8,REG_DST) 132 mov.b r0,@(7,REG_DST) 133 mov.b r0,@(6,REG_DST) 134 mov.b r0,@(5,REG_DST) 135 mov.b r0,@(4,REG_DST) 136 mov.b r0,@(3,REG_DST) 137 mov.b r0,@(2,REG_DST) 138 mov.b r0,@(1,REG_DST) 139#ifdef BZERO 140 rts 1411: mov.b r0,@REG_DST 142#else 143 mov.b r0,@REG_DST 1441: rts 145 mov REG_DST0,r0 146#endif 147 148 149/* 2 bytes aligned small fill */ 150small_aligned: 151#ifndef BZERO 152 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 153 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 154 or REG_TMP1,REG_C /* REG_C = ????xxxx */ 155#endif 156 157 mov REG_LEN,r0 158 tst #1,r0 /* len is aligned? */ 159 bt/s 1f 160 add #-1,r0 161 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 162 mov r0,REG_LEN 1631: 164 165 mova 1f,r0 /* 1f must be 4bytes aligned! */ 166 sub REG_LEN,r0 167 jmp @r0 168 mov REG_C,r0 169 170 .align 2 171 mov.w r0,@(30,REG_DST) 172 mov.w r0,@(28,REG_DST) 173 mov.w r0,@(26,REG_DST) 174 mov.w r0,@(24,REG_DST) 175 mov.w r0,@(22,REG_DST) 176 mov.w r0,@(20,REG_DST) 177 mov.w r0,@(18,REG_DST) 178 mov.w r0,@(16,REG_DST) 179 mov.w r0,@(14,REG_DST) 180 mov.w r0,@(12,REG_DST) 181 mov.w r0,@(10,REG_DST) 182 mov.w r0,@(8,REG_DST) 183 mov.w r0,@(6,REG_DST) 184 mov.w r0,@(4,REG_DST) 185 mov.w r0,@(2,REG_DST) 186#ifdef BZERO 187 rts 1881: mov.w r0,@REG_DST 189#else 190 mov.w r0,@REG_DST 1911: rts 192 mov REG_DST0,r0 193#endif 194 195 196 197 .align 2 198large: 199#ifdef BZERO 200 mov #0,REG_C 201#else 202 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 203 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 204 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 205 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 206 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 207#endif 208 209 mov #3,REG_TMP1 210 tst REG_TMP1,REG_DST 211 mov REG_DST,REG_PTR 212 bf/s unaligned_dst 213 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 214 tst REG_TMP1,REG_LEN 215 bf/s unaligned_len 216 217aligned: 218 /* fill 32*n bytes */ 219 mov #32,REG_TMP1 220 cmp/hi REG_LEN,REG_TMP1 221 bt 9f 222 .align 2 2231: sub REG_TMP1,REG_PTR 224 mov.l REG_C,@REG_PTR 225 sub REG_TMP1,REG_LEN 226 mov.l REG_C,@(4,REG_PTR) 227 cmp/hi REG_LEN,REG_TMP1 228 mov.l REG_C,@(8,REG_PTR) 229 mov.l REG_C,@(12,REG_PTR) 230 mov.l REG_C,@(16,REG_PTR) 231 mov.l REG_C,@(20,REG_PTR) 232 mov.l REG_C,@(24,REG_PTR) 233 bf/s 1b 234 mov.l REG_C,@(28,REG_PTR) 2359: 236 237 /* fill left 4*n bytes */ 238 cmp/eq REG_DST,REG_PTR 239 bt 9f 240 add #4,REG_DST 241 cmp/eq REG_DST,REG_PTR 2421: mov.l REG_C,@-REG_PTR 243 bt/s 9f 244 cmp/eq REG_DST,REG_PTR 245 mov.l REG_C,@-REG_PTR 246 bt/s 9f 247 cmp/eq REG_DST,REG_PTR 248 mov.l REG_C,@-REG_PTR 249 bt/s 9f 250 cmp/eq REG_DST,REG_PTR 251 mov.l REG_C,@-REG_PTR 252 bf/s 1b 253 cmp/eq REG_DST,REG_PTR 2549: 255#ifdef BZERO 256 rts 257 nop 258#else 259 rts 260 mov REG_DST0,r0 261#endif 262 263 264unaligned_dst: 265 mov #1,REG_TMP1 266 tst REG_TMP1,REG_DST /* if (dst & 1) { */ 267 add #1,REG_TMP1 268 bt/s 2f 269 tst REG_TMP1,REG_DST 270 mov.b REG_C,@REG_DST /* *dst++ = c; */ 271 add #1,REG_DST 272 tst REG_TMP1,REG_DST 2732: /* } */ 274 /* if (dst & 2) { */ 275 bt 4f 276 mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */ 277 add #2,REG_DST 2784: /* } */ 279 280 281 tst #3,REG_PTR /* if (ptr & 3) { */ 282 bt/s 4f /* */ 283unaligned_len: 284 tst #1,REG_PTR /* if (ptr & 1) { */ 285 bt/s 2f 286 tst #2,REG_PTR 287 mov.b REG_C,@-REG_PTR /* --ptr = c; */ 2882: /* } */ 289 /* if (ptr & 2) { */ 290 bt 4f 291 mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */ 2924: /* } */ 293 /* } */ 294 295 mov REG_PTR,REG_LEN 296 bra aligned 297 sub REG_DST,REG_LEN 298 299