1/* $NetBSD: memmove.S,v 1.9 2015/03/26 13:34:51 justin Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34#ifndef _BCOPY 35/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 36ENTRY(memmove) 37#else 38/* bcopy = memcpy/memmove with arguments reversed. */ 39/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 40ENTRY(bcopy) 41 /* switch the source and destination registers */ 42 eor r0, r1, r0 43 eor r1, r0, r1 44 eor r0, r1, r0 45#endif 46 /* Do the buffers overlap? */ 47 cmp r0, r1 48 RETc(eq) /* Bail now if src/dst are the same */ 49 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */ 50 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */ 51 cmp r3, r2 /* if (r3 >= len) we have an overlap */ 52 bhs PLT_SYM(_C_LABEL(memcpy)) 53 54 /* Determine copy direction */ 55 cmp r1, r0 56 bcc .Lmemmove_backwards 57 58 moveq r0, #0 /* Quick abort for len=0 */ 59 RETc(eq) 60 61 push {r0, lr} /* memmove() returns dest addr */ 62 subs r2, r2, #4 63 blt .Lmemmove_fl4 /* less than 4 bytes */ 64 ands r12, r0, #3 65 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 66 ands r12, r1, #3 67 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 68 69.Lmemmove_ft8: 70 /* We have aligned source and destination */ 71 subs r2, r2, #8 72 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 73 subs r2, r2, #0x14 74 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 75 push {r4} /* borrow r4 */ 76 77 /* blat 32 bytes at a time */ 78 /* XXX for really big copies perhaps we should use more registers */ 79.Lmemmove_floop32: 80 ldmia r1!, {r3, r4, r12, lr} 81 stmia r0!, {r3, r4, r12, lr} 82 ldmia r1!, {r3, r4, r12, lr} 83 stmia r0!, {r3, r4, r12, lr} 84 subs r2, r2, #0x20 85 bge .Lmemmove_floop32 86 87 cmn r2, #0x10 88 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 89 stmiage r0!, {r3, r4, r12, lr} 90 subge r2, r2, #0x10 91 pop {r4} /* return r4 */ 92 93.Lmemmove_fl32: 94 adds r2, r2, #0x14 95 96 /* blat 12 bytes at a time */ 97.Lmemmove_floop12: 98 ldmiage r1!, {r3, r12, lr} 99 stmiage r0!, {r3, r12, lr} 100 subsge r2, r2, #0x0c 101 bge .Lmemmove_floop12 102 103.Lmemmove_fl12: 104 adds r2, r2, #8 105 blt .Lmemmove_fl4 106 107 subs r2, r2, #4 108 ldrlt r3, [r1], #4 109 strlt r3, [r0], #4 110 ldmiage r1!, {r3, r12} 111 stmiage r0!, {r3, r12} 112 subge r2, r2, #4 113 114.Lmemmove_fl4: 115 /* less than 4 bytes to go */ 116 adds r2, r2, #4 117 popeq {r0, pc} /* done */ 118 119 /* copy the crud byte at a time */ 120 cmp r2, #2 121 ldrb r3, [r1], #1 122 strb r3, [r0], #1 123 ldrbge r3, [r1], #1 124 strbge r3, [r0], #1 125 ldrbgt r3, [r1], #1 126 strbgt r3, [r0], #1 127 pop {r0, pc} 128 129 /* erg - unaligned destination */ 130.Lmemmove_fdestul: 131 rsb r12, r12, #4 132 cmp r12, #2 133 134 /* align destination with byte copies */ 135 ldrb r3, [r1], #1 136 strb r3, [r0], #1 137 ldrbge r3, [r1], #1 138 strbge r3, [r0], #1 139 ldrbgt r3, [r1], #1 140 strbgt r3, [r0], #1 141 subs r2, r2, r12 142 blt .Lmemmove_fl4 /* less the 4 bytes */ 143 144 ands r12, r1, #3 145 beq .Lmemmove_ft8 /* we have an aligned source */ 146 147 /* erg - unaligned source */ 148 /* This is where it gets nasty ... */ 149.Lmemmove_fsrcul: 150 bic r1, r1, #3 151 ldr lr, [r1], #4 152 cmp r12, #2 153 bgt .Lmemmove_fsrcul3 154 beq .Lmemmove_fsrcul2 155 cmp r2, #0x0c 156 blt .Lmemmove_fsrcul1loop4 157 sub r2, r2, #0x0c 158 push {r4, r5} 159 160.Lmemmove_fsrcul1loop16: 161#ifdef __ARMEB__ 162 mov r3, lr, lsl #8 163#else 164 mov r3, lr, lsr #8 165#endif 166 ldmia r1!, {r4, r5, r12, lr} 167#ifdef __ARMEB__ 168 orr r3, r3, r4, lsr #24 169 mov r4, r4, lsl #8 170 orr r4, r4, r5, lsr #24 171 mov r5, r5, lsl #8 172 orr r5, r5, r12, lsr #24 173 mov r12, r12, lsl #8 174 orr r12, r12, lr, lsr #24 175#else 176 orr r3, r3, r4, lsl #24 177 mov r4, r4, lsr #8 178 orr r4, r4, r5, lsl #24 179 mov r5, r5, lsr #8 180 orr r5, r5, r12, lsl #24 181 mov r12, r12, lsr #8 182 orr r12, r12, lr, lsl #24 183#endif 184 stmia r0!, {r3-r5, r12} 185 subs r2, r2, #0x10 186 bge .Lmemmove_fsrcul1loop16 187 pop {r4, r5} 188 adds r2, r2, #0x0c 189 blt .Lmemmove_fsrcul1l4 190 191.Lmemmove_fsrcul1loop4: 192#ifdef __ARMEB__ 193 mov r12, lr, lsl #8 194#else 195 mov r12, lr, lsr #8 196#endif 197 ldr lr, [r1], #4 198#ifdef __ARMEB__ 199 orr r12, r12, lr, lsr #24 200#else 201 orr r12, r12, lr, lsl #24 202#endif 203 str r12, [r0], #4 204 subs r2, r2, #4 205 bge .Lmemmove_fsrcul1loop4 206 207.Lmemmove_fsrcul1l4: 208 sub r1, r1, #3 209 b .Lmemmove_fl4 210 211.Lmemmove_fsrcul2: 212 cmp r2, #0x0c 213 blt .Lmemmove_fsrcul2loop4 214 sub r2, r2, #0x0c 215 push {r4, r5} 216 217.Lmemmove_fsrcul2loop16: 218#ifdef __ARMEB__ 219 mov r3, lr, lsl #16 220#else 221 mov r3, lr, lsr #16 222#endif 223 ldmia r1!, {r4, r5, r12, lr} 224#ifdef __ARMEB__ 225 orr r3, r3, r4, lsr #16 226 mov r4, r4, lsl #16 227 orr r4, r4, r5, lsr #16 228 mov r5, r5, lsl #16 229 orr r5, r5, r12, lsr #16 230 mov r12, r12, lsl #16 231 orr r12, r12, lr, lsr #16 232#else 233 orr r3, r3, r4, lsl #16 234 mov r4, r4, lsr #16 235 orr r4, r4, r5, lsl #16 236 mov r5, r5, lsr #16 237 orr r5, r5, r12, lsl #16 238 mov r12, r12, lsr #16 239 orr r12, r12, lr, lsl #16 240#endif 241 stmia r0!, {r3-r5, r12} 242 subs r2, r2, #0x10 243 bge .Lmemmove_fsrcul2loop16 244 pop {r4, r5} 245 adds r2, r2, #0x0c 246 blt .Lmemmove_fsrcul2l4 247 248.Lmemmove_fsrcul2loop4: 249#ifdef __ARMEB__ 250 mov r12, lr, lsl #16 251#else 252 mov r12, lr, lsr #16 253#endif 254 ldr lr, [r1], #4 255#ifdef __ARMEB__ 256 orr r12, r12, lr, lsr #16 257#else 258 orr r12, r12, lr, lsl #16 259#endif 260 str r12, [r0], #4 261 subs r2, r2, #4 262 bge .Lmemmove_fsrcul2loop4 263 264.Lmemmove_fsrcul2l4: 265 sub r1, r1, #2 266 b .Lmemmove_fl4 267 268.Lmemmove_fsrcul3: 269 cmp r2, #0x0c 270 blt .Lmemmove_fsrcul3loop4 271 sub r2, r2, #0x0c 272 push {r4, r5} 273 274.Lmemmove_fsrcul3loop16: 275#ifdef __ARMEB__ 276 mov r3, lr, lsl #24 277#else 278 mov r3, lr, lsr #24 279#endif 280 ldmia r1!, {r4, r5, r12, lr} 281#ifdef __ARMEB__ 282 orr r3, r3, r4, lsr #8 283 mov r4, r4, lsl #24 284 orr r4, r4, r5, lsr #8 285 mov r5, r5, lsl #24 286 orr r5, r5, r12, lsr #8 287 mov r12, r12, lsl #24 288 orr r12, r12, lr, lsr #8 289#else 290 orr r3, r3, r4, lsl #8 291 mov r4, r4, lsr #24 292 orr r4, r4, r5, lsl #8 293 mov r5, r5, lsr #24 294 orr r5, r5, r12, lsl #8 295 mov r12, r12, lsr #24 296 orr r12, r12, lr, lsl #8 297#endif 298 stmia r0!, {r3-r5, r12} 299 subs r2, r2, #0x10 300 bge .Lmemmove_fsrcul3loop16 301 pop {r4, r5} 302 adds r2, r2, #0x0c 303 blt .Lmemmove_fsrcul3l4 304 305.Lmemmove_fsrcul3loop4: 306#ifdef __ARMEB__ 307 mov r12, lr, lsl #24 308#else 309 mov r12, lr, lsr #24 310#endif 311 ldr lr, [r1], #4 312#ifdef __ARMEB__ 313 orr r12, r12, lr, lsr #8 314#else 315 orr r12, r12, lr, lsl #8 316#endif 317 str r12, [r0], #4 318 subs r2, r2, #4 319 bge .Lmemmove_fsrcul3loop4 320 321.Lmemmove_fsrcul3l4: 322 sub r1, r1, #1 323 b .Lmemmove_fl4 324 325.Lmemmove_backwards: 326 add r1, r1, r2 327 add r0, r0, r2 328 subs r2, r2, #4 329 blt .Lmemmove_bl4 /* less than 4 bytes */ 330 ands r12, r0, #3 331 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 332 ands r12, r1, #3 333 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 334 335.Lmemmove_bt8: 336 /* We have aligned source and destination */ 337 subs r2, r2, #8 338 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 339 push {r4, lr} 340 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 341 blt .Lmemmove_bl32 342 343 /* blat 32 bytes at a time */ 344 /* XXX for really big copies perhaps we should use more registers */ 345.Lmemmove_bloop32: 346 ldmdb r1!, {r3, r4, r12, lr} 347 stmdb r0!, {r3, r4, r12, lr} 348 ldmdb r1!, {r3, r4, r12, lr} 349 stmdb r0!, {r3, r4, r12, lr} 350 subs r2, r2, #0x20 351 bge .Lmemmove_bloop32 352 353.Lmemmove_bl32: 354 cmn r2, #0x10 355 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 356 stmdbge r0!, {r3, r4, r12, lr} 357 subge r2, r2, #0x10 358 adds r2, r2, #0x14 359 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 360 stmdbge r0!, {r3, r12, lr} 361 subge r2, r2, #0x0c 362 pop {r4, lr} 363 364.Lmemmove_bl12: 365 adds r2, r2, #8 366 blt .Lmemmove_bl4 367 subs r2, r2, #4 368 ldrlt r3, [r1, #-4]! 369 strlt r3, [r0, #-4]! 370 ldmdbge r1!, {r3, r12} 371 stmdbge r0!, {r3, r12} 372 subge r2, r2, #4 373 374.Lmemmove_bl4: 375 /* less than 4 bytes to go */ 376 adds r2, r2, #4 377 RETc(eq) 378 379 /* copy the crud byte at a time */ 380 cmp r2, #2 381 ldrb r3, [r1, #-1]! 382 strb r3, [r0, #-1]! 383 ldrbge r3, [r1, #-1]! 384 strbge r3, [r0, #-1]! 385 ldrbgt r3, [r1, #-1]! 386 strbgt r3, [r0, #-1]! 387 RET 388 389 /* erg - unaligned destination */ 390.Lmemmove_bdestul: 391 cmp r12, #2 392 393 /* align destination with byte copies */ 394 ldrb r3, [r1, #-1]! 395 strb r3, [r0, #-1]! 396 ldrbge r3, [r1, #-1]! 397 strbge r3, [r0, #-1]! 398 ldrbgt r3, [r1, #-1]! 399 strbgt r3, [r0, #-1]! 400 subs r2, r2, r12 401 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 402 ands r12, r1, #3 403 beq .Lmemmove_bt8 /* we have an aligned source */ 404 405 /* erg - unaligned source */ 406 /* This is where it gets nasty ... */ 407.Lmemmove_bsrcul: 408 bic r1, r1, #3 409 ldr r3, [r1, #0] 410 cmp r12, #2 411 blt .Lmemmove_bsrcul1 412 beq .Lmemmove_bsrcul2 413 cmp r2, #0x0c 414 blt .Lmemmove_bsrcul3loop4 415 sub r2, r2, #0x0c 416 push {r4, r5, lr} 417 418.Lmemmove_bsrcul3loop16: 419#ifdef __ARMEB__ 420 mov lr, r3, lsr #8 421#else 422 mov lr, r3, lsl #8 423#endif 424 ldmdb r1!, {r3-r5, r12} 425#ifdef __ARMEB__ 426 orr lr, lr, r12, lsl #24 427 mov r12, r12, lsr #8 428 orr r12, r12, r5, lsl #24 429 mov r5, r5, lsr #8 430 orr r5, r5, r4, lsl #24 431 mov r4, r4, lsr #8 432 orr r4, r4, r3, lsl #24 433#else 434 orr lr, lr, r12, lsr #24 435 mov r12, r12, lsl #8 436 orr r12, r12, r5, lsr #24 437 mov r5, r5, lsl #8 438 orr r5, r5, r4, lsr #24 439 mov r4, r4, lsl #8 440 orr r4, r4, r3, lsr #24 441#endif 442 stmdb r0!, {r4, r5, r12, lr} 443 subs r2, r2, #0x10 444 bge .Lmemmove_bsrcul3loop16 445 pop {r4, r5, lr} 446 adds r2, r2, #0x0c 447 blt .Lmemmove_bsrcul3l4 448 449.Lmemmove_bsrcul3loop4: 450#ifdef __ARMEB__ 451 mov r12, r3, lsr #8 452#else 453 mov r12, r3, lsl #8 454#endif 455 ldr r3, [r1, #-4]! 456#ifdef __ARMEB__ 457 orr r12, r12, r3, lsl #24 458#else 459 orr r12, r12, r3, lsr #24 460#endif 461 str r12, [r0, #-4]! 462 subs r2, r2, #4 463 bge .Lmemmove_bsrcul3loop4 464 465.Lmemmove_bsrcul3l4: 466 add r1, r1, #3 467 b .Lmemmove_bl4 468 469.Lmemmove_bsrcul2: 470 cmp r2, #0x0c 471 blt .Lmemmove_bsrcul2loop4 472 sub r2, r2, #0x0c 473 push {r4, r5, lr} 474 475.Lmemmove_bsrcul2loop16: 476#ifdef __ARMEB__ 477 mov lr, r3, lsr #16 478#else 479 mov lr, r3, lsl #16 480#endif 481 ldmdb r1!, {r3-r5, r12} 482#ifdef __ARMEB__ 483 orr lr, lr, r12, lsl #16 484 mov r12, r12, lsr #16 485 orr r12, r12, r5, lsl #16 486 mov r5, r5, lsr #16 487 orr r5, r5, r4, lsl #16 488 mov r4, r4, lsr #16 489 orr r4, r4, r3, lsl #16 490#else 491 orr lr, lr, r12, lsr #16 492 mov r12, r12, lsl #16 493 orr r12, r12, r5, lsr #16 494 mov r5, r5, lsl #16 495 orr r5, r5, r4, lsr #16 496 mov r4, r4, lsl #16 497 orr r4, r4, r3, lsr #16 498#endif 499 stmdb r0!, {r4, r5, r12, lr} 500 subs r2, r2, #0x10 501 bge .Lmemmove_bsrcul2loop16 502 pop {r4, r5, lr} 503 adds r2, r2, #0x0c 504 blt .Lmemmove_bsrcul2l4 505 506.Lmemmove_bsrcul2loop4: 507#ifdef __ARMEB__ 508 mov r12, r3, lsr #16 509#else 510 mov r12, r3, lsl #16 511#endif 512 ldr r3, [r1, #-4]! 513#ifdef __ARMEB__ 514 orr r12, r12, r3, lsl #16 515#else 516 orr r12, r12, r3, lsr #16 517#endif 518 str r12, [r0, #-4]! 519 subs r2, r2, #4 520 bge .Lmemmove_bsrcul2loop4 521 522.Lmemmove_bsrcul2l4: 523 add r1, r1, #2 524 b .Lmemmove_bl4 525 526.Lmemmove_bsrcul1: 527 cmp r2, #0x0c 528 blt .Lmemmove_bsrcul1loop4 529 sub r2, r2, #0x0c 530 push {r4, r5, lr} 531 532.Lmemmove_bsrcul1loop32: 533#ifdef __ARMEB__ 534 mov lr, r3, lsr #24 535#else 536 mov lr, r3, lsl #24 537#endif 538 ldmdb r1!, {r3-r5, r12} 539#ifdef __ARMEB__ 540 orr lr, lr, r12, lsl #8 541 mov r12, r12, lsr #24 542 orr r12, r12, r5, lsl #8 543 mov r5, r5, lsr #24 544 orr r5, r5, r4, lsl #8 545 mov r4, r4, lsr #24 546 orr r4, r4, r3, lsl #8 547#else 548 orr lr, lr, r12, lsr #8 549 mov r12, r12, lsl #24 550 orr r12, r12, r5, lsr #8 551 mov r5, r5, lsl #24 552 orr r5, r5, r4, lsr #8 553 mov r4, r4, lsl #24 554 orr r4, r4, r3, lsr #8 555#endif 556 stmdb r0!, {r4, r5, r12, lr} 557 subs r2, r2, #0x10 558 bge .Lmemmove_bsrcul1loop32 559 pop {r4, r5, lr} 560 adds r2, r2, #0x0c 561 blt .Lmemmove_bsrcul1l4 562 563.Lmemmove_bsrcul1loop4: 564#ifdef __ARMEB__ 565 mov r12, r3, lsr #24 566#else 567 mov r12, r3, lsl #24 568#endif 569 ldr r3, [r1, #-4]! 570#ifdef __ARMEB__ 571 orr r12, r12, r3, lsl #8 572#else 573 orr r12, r12, r3, lsr #8 574#endif 575 str r12, [r0, #-4]! 576 subs r2, r2, #4 577 bge .Lmemmove_bsrcul1loop4 578 579.Lmemmove_bsrcul1l4: 580 add r1, r1, #1 581 b .Lmemmove_bl4 582#ifndef _BCOPY 583END(memmove) 584#else 585END(bcopy) 586#endif 587 588#if defined(__ARM_EABI__) && !defined(BCOPY) && !defined(_RUMPKERNEL) 589STRONG_ALIAS(__aeabi_memmove, memmove) 590STRONG_ALIAS(__aeabi_memmove4, memmove) 591STRONG_ALIAS(__aeabi_memmove8, memmove) 592#endif 593