1/* $NetBSD: memcpy_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34#if !defined(__minix) 35#if defined(__ARM_EABI__) 36STRONG_ALIAS(__aeabi_memcpy, memcpy) 37#endif 38#endif /* !defined(__minix) */ 39 40/* 41 * This is one fun bit of code ... 42 * Some easy listening music is suggested while trying to understand this 43 * code e.g. Iron Maiden 44 * 45 * For anyone attempting to understand it : 46 * 47 * The core code is implemented here with simple stubs for memcpy(). 48 * 49 * All local labels are prefixed with Lmemcpy_ 50 * Following the prefix a label starting f is used in the forward copy code 51 * while a label using b is used in the backwards copy code 52 * The source and destination addresses determine whether a forward or 53 * backward copy is performed. 54 * Separate bits of code are used to deal with the following situations 55 * for both the forward and backwards copy. 56 * unaligned source address 57 * unaligned destination address 58 * Separate copy routines are used to produce an optimised result for each 59 * of these cases. 60 * The copy code will use LDM/STM instructions to copy up to 32 bytes at 61 * a time where possible. 62 * 63 * Note: r12 (aka ip) can be trashed during the function along with 64 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 65 * Additional registers are preserved prior to use i.e. r4, r5 & lr 66 * 67 * Apologies for the state of the comments ;-) 68 */ 69 70/* For MINIX, we always spill r0, r4, r5, and lr, so we can easily 71 * clean up the stack after a phys_copy fault. NetBSD, in contrast, 72 * spills the minimum number of registers for each path. 73 */ 74#if defined(__minix) 75/* LINTSTUB: Func: void *phys_copy(void *src, void *dst, size_t len) */ 76ENTRY(phys_copy) 77 /* switch the source and destination registers */ 78 eor r0, r1, r0 79 eor r1, r0, r1 80 eor r0, r1, r0 81#else 82/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 83ENTRY(memcpy) 84#endif 85 /* save leaf functions having to store this away */ 86#if defined(__minix) 87 push {r0, r4, r5, lr} /* memcpy() returns dest addr */ 88#else 89 push {r0, lr} /* memcpy() returns dest addr */ 90#endif 91 92 subs r2, r2, #4 93 blt .Lmemcpy_l4 /* less than 4 bytes */ 94 ands r12, r0, #3 95 bne .Lmemcpy_destul /* oh unaligned destination addr */ 96 ands r12, r1, #3 97 bne .Lmemcpy_srcul /* oh unaligned source addr */ 98 99.Lmemcpy_t8: 100 /* We have aligned source and destination */ 101 subs r2, r2, #8 102 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 103 subs r2, r2, #0x14 104 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 105#if !defined(__minix) 106 push {r4} /* borrow r4 */ 107#endif 108 109 /* blat 32 bytes at a time */ 110 /* XXX for really big copies perhaps we should use more registers */ 111.Lmemcpy_loop32: 112 ldmia r1!, {r3, r4, r12, lr} 113 stmia r0!, {r3, r4, r12, lr} 114 ldmia r1!, {r3, r4, r12, lr} 115 stmia r0!, {r3, r4, r12, lr} 116 subs r2, r2, #0x20 117 bge .Lmemcpy_loop32 118 119 cmn r2, #0x10 120 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 121 stmiage r0!, {r3, r4, r12, lr} 122 subge r2, r2, #0x10 123#if !defined(__minix) 124 pop {r4} /* return r4 */ 125#endif 126 127.Lmemcpy_l32: 128 adds r2, r2, #0x14 129 130 /* blat 12 bytes at a time */ 131.Lmemcpy_loop12: 132 ldmiage r1!, {r3, r12, lr} 133 stmiage r0!, {r3, r12, lr} 134 subsge r2, r2, #0x0c 135 bge .Lmemcpy_loop12 136 137.Lmemcpy_l12: 138 adds r2, r2, #8 139 blt .Lmemcpy_l4 140 141 subs r2, r2, #4 142 ldrlt r3, [r1], #4 143 strlt r3, [r0], #4 144 ldmiage r1!, {r3, r12} 145 stmiage r0!, {r3, r12} 146 subge r2, r2, #4 147 148.Lmemcpy_l4: 149 /* less than 4 bytes to go */ 150 adds r2, r2, #4 151#if defined(__minix) 152 popeq {r0, r4, r5} 153 moveq r0, #0 154 popeq {pc} 155#else 156#ifdef __APCS_26_ 157 ldmiaeq sp!, {r0, pc}^ /* done */ 158#else 159 popeq {r0, pc} /* done */ 160#endif 161#endif 162 /* copy the crud byte at a time */ 163 cmp r2, #2 164 ldrb r3, [r1], #1 165 strb r3, [r0], #1 166 ldrbge r3, [r1], #1 167 strbge r3, [r0], #1 168 ldrbgt r3, [r1], #1 169 strbgt r3, [r0], #1 170#if defined(__minix) 171 pop {r0, r4, r5} 172 mov r0, #0 173 pop {pc} 174#else 175 pop {r0, pc} 176#endif 177 178 /* erg - unaligned destination */ 179.Lmemcpy_destul: 180 rsb r12, r12, #4 181 cmp r12, #2 182 183 /* align destination with byte copies */ 184 ldrb r3, [r1], #1 185 strb r3, [r0], #1 186 ldrbge r3, [r1], #1 187 strbge r3, [r0], #1 188 ldrbgt r3, [r1], #1 189 strbgt r3, [r0], #1 190 subs r2, r2, r12 191 blt .Lmemcpy_l4 /* less the 4 bytes */ 192 193 ands r12, r1, #3 194 beq .Lmemcpy_t8 /* we have an aligned source */ 195 196 /* erg - unaligned source */ 197 /* This is where it gets nasty ... */ 198.Lmemcpy_srcul: 199 bic r1, r1, #3 200 ldr lr, [r1], #4 201 cmp r12, #2 202 bgt .Lmemcpy_srcul3 203 beq .Lmemcpy_srcul2 204 cmp r2, #0x0c 205 blt .Lmemcpy_srcul1loop4 206 sub r2, r2, #0x0c 207#if !defined(__minix) 208 push {r4, r5} 209#endif 210 211.Lmemcpy_srcul1loop16: 212#ifdef __ARMEB__ 213 mov r3, lr, lsl #8 214#else 215 mov r3, lr, lsr #8 216#endif 217 ldmia r1!, {r4, r5, r12, lr} 218#ifdef __ARMEB__ 219 orr r3, r3, r4, lsr #24 220 mov r4, r4, lsl #8 221 orr r4, r4, r5, lsr #24 222 mov r5, r5, lsl #8 223 orr r5, r5, r12, lsr #24 224 mov r12, r12, lsl #8 225 orr r12, r12, lr, lsr #24 226#else 227 orr r3, r3, r4, lsl #24 228 mov r4, r4, lsr #8 229 orr r4, r4, r5, lsl #24 230 mov r5, r5, lsr #8 231 orr r5, r5, r12, lsl #24 232 mov r12, r12, lsr #8 233 orr r12, r12, lr, lsl #24 234#endif 235 stmia r0!, {r3-r5, r12} 236 subs r2, r2, #0x10 237 bge .Lmemcpy_srcul1loop16 238#if !defined(__minix) 239 pop {r4, r5} 240#endif 241 adds r2, r2, #0x0c 242 blt .Lmemcpy_srcul1l4 243 244.Lmemcpy_srcul1loop4: 245#ifdef __ARMEB__ 246 mov r12, lr, lsl #8 247#else 248 mov r12, lr, lsr #8 249#endif 250 ldr lr, [r1], #4 251#ifdef __ARMEB__ 252 orr r12, r12, lr, lsr #24 253#else 254 orr r12, r12, lr, lsl #24 255#endif 256 str r12, [r0], #4 257 subs r2, r2, #4 258 bge .Lmemcpy_srcul1loop4 259 260.Lmemcpy_srcul1l4: 261 sub r1, r1, #3 262 b .Lmemcpy_l4 263 264.Lmemcpy_srcul2: 265 cmp r2, #0x0c 266 blt .Lmemcpy_srcul2loop4 267 sub r2, r2, #0x0c 268#if !defined(__minix) 269 push {r4, r5} 270#endif 271 272.Lmemcpy_srcul2loop16: 273#ifdef __ARMEB__ 274 mov r3, lr, lsl #16 275#else 276 mov r3, lr, lsr #16 277#endif 278 ldmia r1!, {r4, r5, r12, lr} 279#ifdef __ARMEB__ 280 orr r3, r3, r4, lsr #16 281 mov r4, r4, lsl #16 282 orr r4, r4, r5, lsr #16 283 mov r5, r5, lsl #16 284 orr r5, r5, r12, lsr #16 285 mov r12, r12, lsl #16 286 orr r12, r12, lr, lsr #16 287#else 288 orr r3, r3, r4, lsl #16 289 mov r4, r4, lsr #16 290 orr r4, r4, r5, lsl #16 291 mov r5, r5, lsr #16 292 orr r5, r5, r12, lsl #16 293 mov r12, r12, lsr #16 294 orr r12, r12, lr, lsl #16 295#endif 296 stmia r0!, {r3-r5, r12} 297 subs r2, r2, #0x10 298 bge .Lmemcpy_srcul2loop16 299#if !defined(__minix) 300 pop {r4, r5} 301#endif 302 adds r2, r2, #0x0c 303 blt .Lmemcpy_srcul2l4 304 305.Lmemcpy_srcul2loop4: 306#ifdef __ARMEB__ 307 mov r12, lr, lsl #16 308#else 309 mov r12, lr, lsr #16 310#endif 311 ldr lr, [r1], #4 312#ifdef __ARMEB__ 313 orr r12, r12, lr, lsr #16 314#else 315 orr r12, r12, lr, lsl #16 316#endif 317 str r12, [r0], #4 318 subs r2, r2, #4 319 bge .Lmemcpy_srcul2loop4 320 321.Lmemcpy_srcul2l4: 322 sub r1, r1, #2 323 b .Lmemcpy_l4 324 325.Lmemcpy_srcul3: 326 cmp r2, #0x0c 327 blt .Lmemcpy_srcul3loop4 328 sub r2, r2, #0x0c 329#if !defined(__minix) 330 push {r4, r5} 331#endif 332 333.Lmemcpy_srcul3loop16: 334#ifdef __ARMEB__ 335 mov r3, lr, lsl #24 336#else 337 mov r3, lr, lsr #24 338#endif 339 ldmia r1!, {r4, r5, r12, lr} 340#ifdef __ARMEB__ 341 orr r3, r3, r4, lsr #8 342 mov r4, r4, lsl #24 343 orr r4, r4, r5, lsr #8 344 mov r5, r5, lsl #24 345 orr r5, r5, r12, lsr #8 346 mov r12, r12, lsl #24 347 orr r12, r12, lr, lsr #8 348#else 349 orr r3, r3, r4, lsl #8 350 mov r4, r4, lsr #24 351 orr r4, r4, r5, lsl #8 352 mov r5, r5, lsr #24 353 orr r5, r5, r12, lsl #8 354 mov r12, r12, lsr #24 355 orr r12, r12, lr, lsl #8 356#endif 357 stmia r0!, {r3-r5, r12} 358 subs r2, r2, #0x10 359 bge .Lmemcpy_srcul3loop16 360#if !defined(__minix) 361 pop {r4, r5} 362#endif 363 adds r2, r2, #0x0c 364 blt .Lmemcpy_srcul3l4 365 366.Lmemcpy_srcul3loop4: 367#ifdef __ARMEB__ 368 mov r12, lr, lsl #24 369#else 370 mov r12, lr, lsr #24 371#endif 372 ldr lr, [r1], #4 373#ifdef __ARMEB__ 374 orr r12, r12, lr, lsr #8 375#else 376 orr r12, r12, lr, lsl #8 377#endif 378 str r12, [r0], #4 379 subs r2, r2, #4 380 bge .Lmemcpy_srcul3loop4 381 382.Lmemcpy_srcul3l4: 383 sub r1, r1, #1 384 b .Lmemcpy_l4 385 386#if defined(__minix) 387LABEL(phys_copy_fault) /* kernel can send us here */ 388 pop {r0, r4, r5} 389 pop {pc} 390 391LABEL(phys_copy_fault_in_kernel) /* kernel can send us here */ 392 pop {r0, r4, r5} 393 mrc p15, 0, r0, c6, c0, 0 /* Read DFAR */ 394 pop {pc} 395#else 396END(memcpy) 397#endif 398