1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90#include <machine/asmacros.h> 91__FBSDID("$FreeBSD$"); 92 93#include "assym.s" 94 95.L_arm_memcpy: 96 .word _C_LABEL(_arm_memcpy) 97.L_arm_bzero: 98 .word _C_LABEL(_arm_bzero) 99.L_min_memcpy_size: 100 .word _C_LABEL(_min_memcpy_size) 101.L_min_bzero_size: 102 .word _C_LABEL(_min_bzero_size) 103/* 104 * memset: Sets a block of memory to the specified value 105 * 106 * On entry: 107 * r0 - dest address 108 * r1 - byte to write 109 * r2 - number of bytes to write 110 * 111 * On exit: 112 * r0 - dest address 113 */ 114/* LINTSTUB: Func: void bzero(void *, size_t) */ 115ENTRY(bzero) 116 ldr r3, .L_arm_bzero 117 ldr r3, [r3] 118 cmp r3, #0 119 beq .Lnormal0 120 ldr r2, .L_min_bzero_size 121 ldr r2, [r2] 122 cmp r1, r2 123 blt .Lnormal0 124 stmfd sp!, {r0, r1, lr} 125 mov r2, #0 126 mov lr, pc 127 mov pc, r3 128 cmp r0, #0 129 ldmfd sp!, {r0, r1, lr} 130 RETeq 131.Lnormal0: 132 mov r3, #0x00 133 b do_memset 134 135/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 136ENTRY(memset) 137 and r3, r1, #0xff /* We deal with bytes */ 138 mov r1, r2 139do_memset: 140 cmp r1, #0x04 /* Do we have less than 4 bytes */ 141 mov ip, r0 142 blt .Lmemset_lessthanfour 143 144 /* Ok first we will word align the address */ 145 ands r2, ip, #0x03 /* Get the bottom two bits */ 146 bne .Lmemset_wordunaligned /* The address is not word aligned */ 147 148 /* We are now word aligned */ 149.Lmemset_wordaligned: 150 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 151#ifdef _ARM_ARCH_5E 152 tst ip, #0x04 /* Quad-align for armv5e */ 153#else 154 cmp r1, #0x10 155#endif 156 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 157#ifdef _ARM_ARCH_5E 158 subne r1, r1, #0x04 /* Quad-align if necessary */ 159 strne r3, [ip], #0x04 160 cmp r1, #0x10 161#endif 162 blt .Lmemset_loop4 /* If less than 16 then use words */ 163 mov r2, r3 /* Duplicate data */ 164 cmp r1, #0x80 /* If < 128 then skip the big loop */ 165 blt .Lmemset_loop32 166 167 /* Do 128 bytes at a time */ 168.Lmemset_loop128: 169 subs r1, r1, #0x80 170#ifdef _ARM_ARCH_5E 171 strged r2, [ip], #0x08 172 strged r2, [ip], #0x08 173 strged r2, [ip], #0x08 174 strged r2, [ip], #0x08 175 strged r2, [ip], #0x08 176 strged r2, [ip], #0x08 177 strged r2, [ip], #0x08 178 strged r2, [ip], #0x08 179 strged r2, [ip], #0x08 180 strged r2, [ip], #0x08 181 strged r2, [ip], #0x08 182 strged r2, [ip], #0x08 183 strged r2, [ip], #0x08 184 strged r2, [ip], #0x08 185 strged r2, [ip], #0x08 186 strged r2, [ip], #0x08 187#else 188 stmgeia ip!, {r2-r3} 189 stmgeia ip!, {r2-r3} 190 stmgeia ip!, {r2-r3} 191 stmgeia ip!, {r2-r3} 192 stmgeia ip!, {r2-r3} 193 stmgeia ip!, {r2-r3} 194 stmgeia ip!, {r2-r3} 195 stmgeia ip!, {r2-r3} 196 stmgeia ip!, {r2-r3} 197 stmgeia ip!, {r2-r3} 198 stmgeia ip!, {r2-r3} 199 stmgeia ip!, {r2-r3} 200 stmgeia ip!, {r2-r3} 201 stmgeia ip!, {r2-r3} 202 stmgeia ip!, {r2-r3} 203 stmgeia ip!, {r2-r3} 204#endif 205 bgt .Lmemset_loop128 206 RETeq /* Zero length so just exit */ 207 208 add r1, r1, #0x80 /* Adjust for extra sub */ 209 210 /* Do 32 bytes at a time */ 211.Lmemset_loop32: 212 subs r1, r1, #0x20 213#ifdef _ARM_ARCH_5E 214 strged r2, [ip], #0x08 215 strged r2, [ip], #0x08 216 strged r2, [ip], #0x08 217 strged r2, [ip], #0x08 218#else 219 stmgeia ip!, {r2-r3} 220 stmgeia ip!, {r2-r3} 221 stmgeia ip!, {r2-r3} 222 stmgeia ip!, {r2-r3} 223#endif 224 bgt .Lmemset_loop32 225 RETeq /* Zero length so just exit */ 226 227 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 228 229 /* Deal with 16 bytes or more */ 230#ifdef _ARM_ARCH_5E 231 strged r2, [ip], #0x08 232 strged r2, [ip], #0x08 233#else 234 stmgeia ip!, {r2-r3} 235 stmgeia ip!, {r2-r3} 236#endif 237 RETeq /* Zero length so just exit */ 238 239 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 240 241 /* We have at least 4 bytes so copy as words */ 242.Lmemset_loop4: 243 subs r1, r1, #0x04 244 strge r3, [ip], #0x04 245 bgt .Lmemset_loop4 246 RETeq /* Zero length so just exit */ 247 248#ifdef _ARM_ARCH_5E 249 /* Compensate for 64-bit alignment check */ 250 adds r1, r1, #0x04 251 RETeq 252 cmp r1, #2 253#else 254 cmp r1, #-2 255#endif 256 257 strb r3, [ip], #0x01 /* Set 1 byte */ 258 strgeb r3, [ip], #0x01 /* Set another byte */ 259 strgtb r3, [ip] /* and a third */ 260 RET /* Exit */ 261 262.Lmemset_wordunaligned: 263 rsb r2, r2, #0x004 264 strb r3, [ip], #0x01 /* Set 1 byte */ 265 cmp r2, #0x02 266 strgeb r3, [ip], #0x01 /* Set another byte */ 267 sub r1, r1, r2 268 strgtb r3, [ip], #0x01 /* and a third */ 269 cmp r1, #0x04 /* More than 4 bytes left? */ 270 bge .Lmemset_wordaligned /* Yup */ 271 272.Lmemset_lessthanfour: 273 cmp r1, #0x00 274 RETeq /* Zero length so exit */ 275 strb r3, [ip], #0x01 /* Set 1 byte */ 276 cmp r1, #0x02 277 strgeb r3, [ip], #0x01 /* Set another byte */ 278 strgtb r3, [ip] /* and a third */ 279 RET /* Exit */ 280 281ENTRY(bcmp) 282 mov ip, r0 283 cmp r2, #0x06 284 beq .Lmemcmp_6bytes 285 mov r0, #0x00 286 287 /* Are both addresses aligned the same way? */ 288 cmp r2, #0x00 289 eornes r3, ip, r1 290 RETeq /* len == 0, or same addresses! */ 291 tst r3, #0x03 292 subne r2, r2, #0x01 293 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 294 295 /* Word-align the addresses, if necessary */ 296 sub r3, r1, #0x05 297 ands r3, r3, #0x03 298 add r3, r3, r3, lsl #1 299 addne pc, pc, r3, lsl #3 300 nop 301 302 /* Compare up to 3 bytes */ 303 ldrb r0, [ip], #0x01 304 ldrb r3, [r1], #0x01 305 subs r0, r0, r3 306 RETne 307 subs r2, r2, #0x01 308 RETeq 309 310 /* Compare up to 2 bytes */ 311 ldrb r0, [ip], #0x01 312 ldrb r3, [r1], #0x01 313 subs r0, r0, r3 314 RETne 315 subs r2, r2, #0x01 316 RETeq 317 318 /* Compare 1 byte */ 319 ldrb r0, [ip], #0x01 320 ldrb r3, [r1], #0x01 321 subs r0, r0, r3 322 RETne 323 subs r2, r2, #0x01 324 RETeq 325 326 /* Compare 4 bytes at a time, if possible */ 327 subs r2, r2, #0x04 328 bcc .Lmemcmp_bytewise 329.Lmemcmp_word_aligned: 330 ldr r0, [ip], #0x04 331 ldr r3, [r1], #0x04 332 subs r2, r2, #0x04 333 cmpcs r0, r3 334 beq .Lmemcmp_word_aligned 335 sub r0, r0, r3 336 337 /* Correct for extra subtraction, and check if done */ 338 adds r2, r2, #0x04 339 cmpeq r0, #0x00 /* If done, did all bytes match? */ 340 RETeq /* Yup. Just return */ 341 342 /* Re-do the final word byte-wise */ 343 sub ip, ip, #0x04 344 sub r1, r1, #0x04 345 346.Lmemcmp_bytewise: 347 add r2, r2, #0x03 348.Lmemcmp_bytewise2: 349 ldrb r0, [ip], #0x01 350 ldrb r3, [r1], #0x01 351 subs r2, r2, #0x01 352 cmpcs r0, r3 353 beq .Lmemcmp_bytewise2 354 sub r0, r0, r3 355 RET 356 357 /* 358 * 6 byte compares are very common, thanks to the network stack. 359 * This code is hand-scheduled to reduce the number of stalls for 360 * load results. Everything else being equal, this will be ~32% 361 * faster than a byte-wise memcmp. 362 */ 363 .align 5 364.Lmemcmp_6bytes: 365 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 366 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 367 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 368 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 369 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 370 RETne /* Return if mismatch on #0 */ 371 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 372 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 373 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 374 RETne /* Return if mismatch on #1 */ 375 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 376 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 377 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 378 RETne /* Return if mismatch on #2 */ 379 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 380 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 381 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 382 RETne /* Return if mismatch on #3 */ 383 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 384 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 385 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 386 RETne /* Return if mismatch on #4 */ 387 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 388 RET 389 390ENTRY(bcopy) 391 /* switch the source and destination registers */ 392 eor r0, r1, r0 393 eor r1, r0, r1 394 eor r0, r1, r0 395ENTRY(memmove) 396 /* Do the buffers overlap? */ 397 cmp r0, r1 398 RETeq /* Bail now if src/dst are the same */ 399 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 400 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 401 cmp r3, r2 /* if (r3 < len) we have an overlap */ 402 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 403 404 /* Determine copy direction */ 405 cmp r1, r0 406 bcc .Lmemmove_backwards 407 408 moveq r0, #0 /* Quick abort for len=0 */ 409 RETeq 410 411 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 412 subs r2, r2, #4 413 blt .Lmemmove_fl4 /* less than 4 bytes */ 414 ands r12, r0, #3 415 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 416 ands r12, r1, #3 417 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 418 419.Lmemmove_ft8: 420 /* We have aligned source and destination */ 421 subs r2, r2, #8 422 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 423 subs r2, r2, #0x14 424 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 425 stmdb sp!, {r4} /* borrow r4 */ 426 427 /* blat 32 bytes at a time */ 428 /* XXX for really big copies perhaps we should use more registers */ 429.Lmemmove_floop32: 430 ldmia r1!, {r3, r4, r12, lr} 431 stmia r0!, {r3, r4, r12, lr} 432 ldmia r1!, {r3, r4, r12, lr} 433 stmia r0!, {r3, r4, r12, lr} 434 subs r2, r2, #0x20 435 bge .Lmemmove_floop32 436 437 cmn r2, #0x10 438 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 439 stmgeia r0!, {r3, r4, r12, lr} 440 subge r2, r2, #0x10 441 ldmia sp!, {r4} /* return r4 */ 442 443.Lmemmove_fl32: 444 adds r2, r2, #0x14 445 446 /* blat 12 bytes at a time */ 447.Lmemmove_floop12: 448 ldmgeia r1!, {r3, r12, lr} 449 stmgeia r0!, {r3, r12, lr} 450 subges r2, r2, #0x0c 451 bge .Lmemmove_floop12 452 453.Lmemmove_fl12: 454 adds r2, r2, #8 455 blt .Lmemmove_fl4 456 457 subs r2, r2, #4 458 ldrlt r3, [r1], #4 459 strlt r3, [r0], #4 460 ldmgeia r1!, {r3, r12} 461 stmgeia r0!, {r3, r12} 462 subge r2, r2, #4 463 464.Lmemmove_fl4: 465 /* less than 4 bytes to go */ 466 adds r2, r2, #4 467 ldmeqia sp!, {r0, pc} /* done */ 468 469 /* copy the crud byte at a time */ 470 cmp r2, #2 471 ldrb r3, [r1], #1 472 strb r3, [r0], #1 473 ldrgeb r3, [r1], #1 474 strgeb r3, [r0], #1 475 ldrgtb r3, [r1], #1 476 strgtb r3, [r0], #1 477 ldmia sp!, {r0, pc} 478 479 /* erg - unaligned destination */ 480.Lmemmove_fdestul: 481 rsb r12, r12, #4 482 cmp r12, #2 483 484 /* align destination with byte copies */ 485 ldrb r3, [r1], #1 486 strb r3, [r0], #1 487 ldrgeb r3, [r1], #1 488 strgeb r3, [r0], #1 489 ldrgtb r3, [r1], #1 490 strgtb r3, [r0], #1 491 subs r2, r2, r12 492 blt .Lmemmove_fl4 /* less the 4 bytes */ 493 494 ands r12, r1, #3 495 beq .Lmemmove_ft8 /* we have an aligned source */ 496 497 /* erg - unaligned source */ 498 /* This is where it gets nasty ... */ 499.Lmemmove_fsrcul: 500 bic r1, r1, #3 501 ldr lr, [r1], #4 502 cmp r12, #2 503 bgt .Lmemmove_fsrcul3 504 beq .Lmemmove_fsrcul2 505 cmp r2, #0x0c 506 blt .Lmemmove_fsrcul1loop4 507 sub r2, r2, #0x0c 508 stmdb sp!, {r4, r5} 509 510.Lmemmove_fsrcul1loop16: 511#ifdef __ARMEB__ 512 mov r3, lr, lsl #8 513#else 514 mov r3, lr, lsr #8 515#endif 516 ldmia r1!, {r4, r5, r12, lr} 517#ifdef __ARMEB__ 518 orr r3, r3, r4, lsr #24 519 mov r4, r4, lsl #8 520 orr r4, r4, r5, lsr #24 521 mov r5, r5, lsl #8 522 orr r5, r5, r12, lsr #24 523 mov r12, r12, lsl #8 524 orr r12, r12, lr, lsr #24 525#else 526 orr r3, r3, r4, lsl #24 527 mov r4, r4, lsr #8 528 orr r4, r4, r5, lsl #24 529 mov r5, r5, lsr #8 530 orr r5, r5, r12, lsl #24 531 mov r12, r12, lsr #8 532 orr r12, r12, lr, lsl #24 533#endif 534 stmia r0!, {r3-r5, r12} 535 subs r2, r2, #0x10 536 bge .Lmemmove_fsrcul1loop16 537 ldmia sp!, {r4, r5} 538 adds r2, r2, #0x0c 539 blt .Lmemmove_fsrcul1l4 540 541.Lmemmove_fsrcul1loop4: 542#ifdef __ARMEB__ 543 mov r12, lr, lsl #8 544#else 545 mov r12, lr, lsr #8 546#endif 547 ldr lr, [r1], #4 548#ifdef __ARMEB__ 549 orr r12, r12, lr, lsr #24 550#else 551 orr r12, r12, lr, lsl #24 552#endif 553 str r12, [r0], #4 554 subs r2, r2, #4 555 bge .Lmemmove_fsrcul1loop4 556 557.Lmemmove_fsrcul1l4: 558 sub r1, r1, #3 559 b .Lmemmove_fl4 560 561.Lmemmove_fsrcul2: 562 cmp r2, #0x0c 563 blt .Lmemmove_fsrcul2loop4 564 sub r2, r2, #0x0c 565 stmdb sp!, {r4, r5} 566 567.Lmemmove_fsrcul2loop16: 568#ifdef __ARMEB__ 569 mov r3, lr, lsl #16 570#else 571 mov r3, lr, lsr #16 572#endif 573 ldmia r1!, {r4, r5, r12, lr} 574#ifdef __ARMEB__ 575 orr r3, r3, r4, lsr #16 576 mov r4, r4, lsl #16 577 orr r4, r4, r5, lsr #16 578 mov r5, r5, lsl #16 579 orr r5, r5, r12, lsr #16 580 mov r12, r12, lsl #16 581 orr r12, r12, lr, lsr #16 582#else 583 orr r3, r3, r4, lsl #16 584 mov r4, r4, lsr #16 585 orr r4, r4, r5, lsl #16 586 mov r5, r5, lsr #16 587 orr r5, r5, r12, lsl #16 588 mov r12, r12, lsr #16 589 orr r12, r12, lr, lsl #16 590#endif 591 stmia r0!, {r3-r5, r12} 592 subs r2, r2, #0x10 593 bge .Lmemmove_fsrcul2loop16 594 ldmia sp!, {r4, r5} 595 adds r2, r2, #0x0c 596 blt .Lmemmove_fsrcul2l4 597 598.Lmemmove_fsrcul2loop4: 599#ifdef __ARMEB__ 600 mov r12, lr, lsl #16 601#else 602 mov r12, lr, lsr #16 603#endif 604 ldr lr, [r1], #4 605#ifdef __ARMEB__ 606 orr r12, r12, lr, lsr #16 607#else 608 orr r12, r12, lr, lsl #16 609#endif 610 str r12, [r0], #4 611 subs r2, r2, #4 612 bge .Lmemmove_fsrcul2loop4 613 614.Lmemmove_fsrcul2l4: 615 sub r1, r1, #2 616 b .Lmemmove_fl4 617 618.Lmemmove_fsrcul3: 619 cmp r2, #0x0c 620 blt .Lmemmove_fsrcul3loop4 621 sub r2, r2, #0x0c 622 stmdb sp!, {r4, r5} 623 624.Lmemmove_fsrcul3loop16: 625#ifdef __ARMEB__ 626 mov r3, lr, lsl #24 627#else 628 mov r3, lr, lsr #24 629#endif 630 ldmia r1!, {r4, r5, r12, lr} 631#ifdef __ARMEB__ 632 orr r3, r3, r4, lsr #8 633 mov r4, r4, lsl #24 634 orr r4, r4, r5, lsr #8 635 mov r5, r5, lsl #24 636 orr r5, r5, r12, lsr #8 637 mov r12, r12, lsl #24 638 orr r12, r12, lr, lsr #8 639#else 640 orr r3, r3, r4, lsl #8 641 mov r4, r4, lsr #24 642 orr r4, r4, r5, lsl #8 643 mov r5, r5, lsr #24 644 orr r5, r5, r12, lsl #8 645 mov r12, r12, lsr #24 646 orr r12, r12, lr, lsl #8 647#endif 648 stmia r0!, {r3-r5, r12} 649 subs r2, r2, #0x10 650 bge .Lmemmove_fsrcul3loop16 651 ldmia sp!, {r4, r5} 652 adds r2, r2, #0x0c 653 blt .Lmemmove_fsrcul3l4 654 655.Lmemmove_fsrcul3loop4: 656#ifdef __ARMEB__ 657 mov r12, lr, lsl #24 658#else 659 mov r12, lr, lsr #24 660#endif 661 ldr lr, [r1], #4 662#ifdef __ARMEB__ 663 orr r12, r12, lr, lsr #8 664#else 665 orr r12, r12, lr, lsl #8 666#endif 667 str r12, [r0], #4 668 subs r2, r2, #4 669 bge .Lmemmove_fsrcul3loop4 670 671.Lmemmove_fsrcul3l4: 672 sub r1, r1, #1 673 b .Lmemmove_fl4 674 675.Lmemmove_backwards: 676 add r1, r1, r2 677 add r0, r0, r2 678 subs r2, r2, #4 679 blt .Lmemmove_bl4 /* less than 4 bytes */ 680 ands r12, r0, #3 681 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 682 ands r12, r1, #3 683 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 684 685.Lmemmove_bt8: 686 /* We have aligned source and destination */ 687 subs r2, r2, #8 688 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 689 stmdb sp!, {r4, lr} 690 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 691 blt .Lmemmove_bl32 692 693 /* blat 32 bytes at a time */ 694 /* XXX for really big copies perhaps we should use more registers */ 695.Lmemmove_bloop32: 696 ldmdb r1!, {r3, r4, r12, lr} 697 stmdb r0!, {r3, r4, r12, lr} 698 ldmdb r1!, {r3, r4, r12, lr} 699 stmdb r0!, {r3, r4, r12, lr} 700 subs r2, r2, #0x20 701 bge .Lmemmove_bloop32 702 703.Lmemmove_bl32: 704 cmn r2, #0x10 705 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 706 stmgedb r0!, {r3, r4, r12, lr} 707 subge r2, r2, #0x10 708 adds r2, r2, #0x14 709 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 710 stmgedb r0!, {r3, r12, lr} 711 subge r2, r2, #0x0c 712 ldmia sp!, {r4, lr} 713 714.Lmemmove_bl12: 715 adds r2, r2, #8 716 blt .Lmemmove_bl4 717 subs r2, r2, #4 718 ldrlt r3, [r1, #-4]! 719 strlt r3, [r0, #-4]! 720 ldmgedb r1!, {r3, r12} 721 stmgedb r0!, {r3, r12} 722 subge r2, r2, #4 723 724.Lmemmove_bl4: 725 /* less than 4 bytes to go */ 726 adds r2, r2, #4 727 RETeq /* done */ 728 729 /* copy the crud byte at a time */ 730 cmp r2, #2 731 ldrb r3, [r1, #-1]! 732 strb r3, [r0, #-1]! 733 ldrgeb r3, [r1, #-1]! 734 strgeb r3, [r0, #-1]! 735 ldrgtb r3, [r1, #-1]! 736 strgtb r3, [r0, #-1]! 737 RET 738 739 /* erg - unaligned destination */ 740.Lmemmove_bdestul: 741 cmp r12, #2 742 743 /* align destination with byte copies */ 744 ldrb r3, [r1, #-1]! 745 strb r3, [r0, #-1]! 746 ldrgeb r3, [r1, #-1]! 747 strgeb r3, [r0, #-1]! 748 ldrgtb r3, [r1, #-1]! 749 strgtb r3, [r0, #-1]! 750 subs r2, r2, r12 751 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 752 ands r12, r1, #3 753 beq .Lmemmove_bt8 /* we have an aligned source */ 754 755 /* erg - unaligned source */ 756 /* This is where it gets nasty ... */ 757.Lmemmove_bsrcul: 758 bic r1, r1, #3 759 ldr r3, [r1, #0] 760 cmp r12, #2 761 blt .Lmemmove_bsrcul1 762 beq .Lmemmove_bsrcul2 763 cmp r2, #0x0c 764 blt .Lmemmove_bsrcul3loop4 765 sub r2, r2, #0x0c 766 stmdb sp!, {r4, r5, lr} 767 768.Lmemmove_bsrcul3loop16: 769#ifdef __ARMEB__ 770 mov lr, r3, lsr #8 771#else 772 mov lr, r3, lsl #8 773#endif 774 ldmdb r1!, {r3-r5, r12} 775#ifdef __ARMEB__ 776 orr lr, lr, r12, lsl #24 777 mov r12, r12, lsr #8 778 orr r12, r12, r5, lsl #24 779 mov r5, r5, lsr #8 780 orr r5, r5, r4, lsl #24 781 mov r4, r4, lsr #8 782 orr r4, r4, r3, lsl #24 783#else 784 orr lr, lr, r12, lsr #24 785 mov r12, r12, lsl #8 786 orr r12, r12, r5, lsr #24 787 mov r5, r5, lsl #8 788 orr r5, r5, r4, lsr #24 789 mov r4, r4, lsl #8 790 orr r4, r4, r3, lsr #24 791#endif 792 stmdb r0!, {r4, r5, r12, lr} 793 subs r2, r2, #0x10 794 bge .Lmemmove_bsrcul3loop16 795 ldmia sp!, {r4, r5, lr} 796 adds r2, r2, #0x0c 797 blt .Lmemmove_bsrcul3l4 798 799.Lmemmove_bsrcul3loop4: 800#ifdef __ARMEB__ 801 mov r12, r3, lsr #8 802#else 803 mov r12, r3, lsl #8 804#endif 805 ldr r3, [r1, #-4]! 806#ifdef __ARMEB__ 807 orr r12, r12, r3, lsl #24 808#else 809 orr r12, r12, r3, lsr #24 810#endif 811 str r12, [r0, #-4]! 812 subs r2, r2, #4 813 bge .Lmemmove_bsrcul3loop4 814 815.Lmemmove_bsrcul3l4: 816 add r1, r1, #3 817 b .Lmemmove_bl4 818 819.Lmemmove_bsrcul2: 820 cmp r2, #0x0c 821 blt .Lmemmove_bsrcul2loop4 822 sub r2, r2, #0x0c 823 stmdb sp!, {r4, r5, lr} 824 825.Lmemmove_bsrcul2loop16: 826#ifdef __ARMEB__ 827 mov lr, r3, lsr #16 828#else 829 mov lr, r3, lsl #16 830#endif 831 ldmdb r1!, {r3-r5, r12} 832#ifdef __ARMEB__ 833 orr lr, lr, r12, lsl #16 834 mov r12, r12, lsr #16 835 orr r12, r12, r5, lsl #16 836 mov r5, r5, lsr #16 837 orr r5, r5, r4, lsl #16 838 mov r4, r4, lsr #16 839 orr r4, r4, r3, lsl #16 840#else 841 orr lr, lr, r12, lsr #16 842 mov r12, r12, lsl #16 843 orr r12, r12, r5, lsr #16 844 mov r5, r5, lsl #16 845 orr r5, r5, r4, lsr #16 846 mov r4, r4, lsl #16 847 orr r4, r4, r3, lsr #16 848#endif 849 stmdb r0!, {r4, r5, r12, lr} 850 subs r2, r2, #0x10 851 bge .Lmemmove_bsrcul2loop16 852 ldmia sp!, {r4, r5, lr} 853 adds r2, r2, #0x0c 854 blt .Lmemmove_bsrcul2l4 855 856.Lmemmove_bsrcul2loop4: 857#ifdef __ARMEB__ 858 mov r12, r3, lsr #16 859#else 860 mov r12, r3, lsl #16 861#endif 862 ldr r3, [r1, #-4]! 863#ifdef __ARMEB__ 864 orr r12, r12, r3, lsl #16 865#else 866 orr r12, r12, r3, lsr #16 867#endif 868 str r12, [r0, #-4]! 869 subs r2, r2, #4 870 bge .Lmemmove_bsrcul2loop4 871 872.Lmemmove_bsrcul2l4: 873 add r1, r1, #2 874 b .Lmemmove_bl4 875 876.Lmemmove_bsrcul1: 877 cmp r2, #0x0c 878 blt .Lmemmove_bsrcul1loop4 879 sub r2, r2, #0x0c 880 stmdb sp!, {r4, r5, lr} 881 882.Lmemmove_bsrcul1loop32: 883#ifdef __ARMEB__ 884 mov lr, r3, lsr #24 885#else 886 mov lr, r3, lsl #24 887#endif 888 ldmdb r1!, {r3-r5, r12} 889#ifdef __ARMEB__ 890 orr lr, lr, r12, lsl #8 891 mov r12, r12, lsr #24 892 orr r12, r12, r5, lsl #8 893 mov r5, r5, lsr #24 894 orr r5, r5, r4, lsl #8 895 mov r4, r4, lsr #24 896 orr r4, r4, r3, lsl #8 897#else 898 orr lr, lr, r12, lsr #8 899 mov r12, r12, lsl #24 900 orr r12, r12, r5, lsr #8 901 mov r5, r5, lsl #24 902 orr r5, r5, r4, lsr #8 903 mov r4, r4, lsl #24 904 orr r4, r4, r3, lsr #8 905#endif 906 stmdb r0!, {r4, r5, r12, lr} 907 subs r2, r2, #0x10 908 bge .Lmemmove_bsrcul1loop32 909 ldmia sp!, {r4, r5, lr} 910 adds r2, r2, #0x0c 911 blt .Lmemmove_bsrcul1l4 912 913.Lmemmove_bsrcul1loop4: 914#ifdef __ARMEB__ 915 mov r12, r3, lsr #24 916#else 917 mov r12, r3, lsl #24 918#endif 919 ldr r3, [r1, #-4]! 920#ifdef __ARMEB__ 921 orr r12, r12, r3, lsl #8 922#else 923 orr r12, r12, r3, lsr #8 924#endif 925 str r12, [r0, #-4]! 926 subs r2, r2, #4 927 bge .Lmemmove_bsrcul1loop4 928 929.Lmemmove_bsrcul1l4: 930 add r1, r1, #1 931 b .Lmemmove_bl4 932 933#if !defined(_ARM_ARCH_5E) 934ENTRY(memcpy) 935 /* save leaf functions having to store this away */ 936 /* Do not check arm_memcpy if we're running from flash */ 937#ifdef FLASHADDR 938#if FLASHADDR > PHYSADDR 939 ldr r3, =FLASHADDR 940 cmp r3, pc 941 bls .Lnormal 942#else 943 ldr r3, =FLASHADDR 944 cmp r3, pc 945 bhi .Lnormal 946#endif 947#endif 948 ldr r3, .L_arm_memcpy 949 ldr r3, [r3] 950 cmp r3, #0 951 beq .Lnormal 952 ldr r3, .L_min_memcpy_size 953 ldr r3, [r3] 954 cmp r2, r3 955 blt .Lnormal 956 stmfd sp!, {r0-r2, r4, lr} 957 mov r3, #0 958 ldr r4, .L_arm_memcpy 959 mov lr, pc 960 ldr pc, [r4] 961 cmp r0, #0 962 ldmfd sp!, {r0-r2, r4, lr} 963 RETeq 964 965.Lnormal: 966 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 967 968 subs r2, r2, #4 969 blt .Lmemcpy_l4 /* less than 4 bytes */ 970 ands r12, r0, #3 971 bne .Lmemcpy_destul /* oh unaligned destination addr */ 972 ands r12, r1, #3 973 bne .Lmemcpy_srcul /* oh unaligned source addr */ 974 975.Lmemcpy_t8: 976 /* We have aligned source and destination */ 977 subs r2, r2, #8 978 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 979 subs r2, r2, #0x14 980 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 981 stmdb sp!, {r4} /* borrow r4 */ 982 983 /* blat 32 bytes at a time */ 984 /* XXX for really big copies perhaps we should use more registers */ 985.Lmemcpy_loop32: 986 ldmia r1!, {r3, r4, r12, lr} 987 stmia r0!, {r3, r4, r12, lr} 988 ldmia r1!, {r3, r4, r12, lr} 989 stmia r0!, {r3, r4, r12, lr} 990 subs r2, r2, #0x20 991 bge .Lmemcpy_loop32 992 993 cmn r2, #0x10 994 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 995 stmgeia r0!, {r3, r4, r12, lr} 996 subge r2, r2, #0x10 997 ldmia sp!, {r4} /* return r4 */ 998 999.Lmemcpy_l32: 1000 adds r2, r2, #0x14 1001 1002 /* blat 12 bytes at a time */ 1003.Lmemcpy_loop12: 1004 ldmgeia r1!, {r3, r12, lr} 1005 stmgeia r0!, {r3, r12, lr} 1006 subges r2, r2, #0x0c 1007 bge .Lmemcpy_loop12 1008 1009.Lmemcpy_l12: 1010 adds r2, r2, #8 1011 blt .Lmemcpy_l4 1012 1013 subs r2, r2, #4 1014 ldrlt r3, [r1], #4 1015 strlt r3, [r0], #4 1016 ldmgeia r1!, {r3, r12} 1017 stmgeia r0!, {r3, r12} 1018 subge r2, r2, #4 1019 1020.Lmemcpy_l4: 1021 /* less than 4 bytes to go */ 1022 adds r2, r2, #4 1023#ifdef __APCS_26_ 1024 ldmeqia sp!, {r0, pc}^ /* done */ 1025#else 1026 ldmeqia sp!, {r0, pc} /* done */ 1027#endif 1028 /* copy the crud byte at a time */ 1029 cmp r2, #2 1030 ldrb r3, [r1], #1 1031 strb r3, [r0], #1 1032 ldrgeb r3, [r1], #1 1033 strgeb r3, [r0], #1 1034 ldrgtb r3, [r1], #1 1035 strgtb r3, [r0], #1 1036 ldmia sp!, {r0, pc} 1037 1038 /* erg - unaligned destination */ 1039.Lmemcpy_destul: 1040 rsb r12, r12, #4 1041 cmp r12, #2 1042 1043 /* align destination with byte copies */ 1044 ldrb r3, [r1], #1 1045 strb r3, [r0], #1 1046 ldrgeb r3, [r1], #1 1047 strgeb r3, [r0], #1 1048 ldrgtb r3, [r1], #1 1049 strgtb r3, [r0], #1 1050 subs r2, r2, r12 1051 blt .Lmemcpy_l4 /* less the 4 bytes */ 1052 1053 ands r12, r1, #3 1054 beq .Lmemcpy_t8 /* we have an aligned source */ 1055 1056 /* erg - unaligned source */ 1057 /* This is where it gets nasty ... */ 1058.Lmemcpy_srcul: 1059 bic r1, r1, #3 1060 ldr lr, [r1], #4 1061 cmp r12, #2 1062 bgt .Lmemcpy_srcul3 1063 beq .Lmemcpy_srcul2 1064 cmp r2, #0x0c 1065 blt .Lmemcpy_srcul1loop4 1066 sub r2, r2, #0x0c 1067 stmdb sp!, {r4, r5} 1068 1069.Lmemcpy_srcul1loop16: 1070 mov r3, lr, lsr #8 1071 ldmia r1!, {r4, r5, r12, lr} 1072 orr r3, r3, r4, lsl #24 1073 mov r4, r4, lsr #8 1074 orr r4, r4, r5, lsl #24 1075 mov r5, r5, lsr #8 1076 orr r5, r5, r12, lsl #24 1077 mov r12, r12, lsr #8 1078 orr r12, r12, lr, lsl #24 1079 stmia r0!, {r3-r5, r12} 1080 subs r2, r2, #0x10 1081 bge .Lmemcpy_srcul1loop16 1082 ldmia sp!, {r4, r5} 1083 adds r2, r2, #0x0c 1084 blt .Lmemcpy_srcul1l4 1085 1086.Lmemcpy_srcul1loop4: 1087 mov r12, lr, lsr #8 1088 ldr lr, [r1], #4 1089 orr r12, r12, lr, lsl #24 1090 str r12, [r0], #4 1091 subs r2, r2, #4 1092 bge .Lmemcpy_srcul1loop4 1093 1094.Lmemcpy_srcul1l4: 1095 sub r1, r1, #3 1096 b .Lmemcpy_l4 1097 1098.Lmemcpy_srcul2: 1099 cmp r2, #0x0c 1100 blt .Lmemcpy_srcul2loop4 1101 sub r2, r2, #0x0c 1102 stmdb sp!, {r4, r5} 1103 1104.Lmemcpy_srcul2loop16: 1105 mov r3, lr, lsr #16 1106 ldmia r1!, {r4, r5, r12, lr} 1107 orr r3, r3, r4, lsl #16 1108 mov r4, r4, lsr #16 1109 orr r4, r4, r5, lsl #16 1110 mov r5, r5, lsr #16 1111 orr r5, r5, r12, lsl #16 1112 mov r12, r12, lsr #16 1113 orr r12, r12, lr, lsl #16 1114 stmia r0!, {r3-r5, r12} 1115 subs r2, r2, #0x10 1116 bge .Lmemcpy_srcul2loop16 1117 ldmia sp!, {r4, r5} 1118 adds r2, r2, #0x0c 1119 blt .Lmemcpy_srcul2l4 1120 1121.Lmemcpy_srcul2loop4: 1122 mov r12, lr, lsr #16 1123 ldr lr, [r1], #4 1124 orr r12, r12, lr, lsl #16 1125 str r12, [r0], #4 1126 subs r2, r2, #4 1127 bge .Lmemcpy_srcul2loop4 1128 1129.Lmemcpy_srcul2l4: 1130 sub r1, r1, #2 1131 b .Lmemcpy_l4 1132 1133.Lmemcpy_srcul3: 1134 cmp r2, #0x0c 1135 blt .Lmemcpy_srcul3loop4 1136 sub r2, r2, #0x0c 1137 stmdb sp!, {r4, r5} 1138 1139.Lmemcpy_srcul3loop16: 1140 mov r3, lr, lsr #24 1141 ldmia r1!, {r4, r5, r12, lr} 1142 orr r3, r3, r4, lsl #8 1143 mov r4, r4, lsr #24 1144 orr r4, r4, r5, lsl #8 1145 mov r5, r5, lsr #24 1146 orr r5, r5, r12, lsl #8 1147 mov r12, r12, lsr #24 1148 orr r12, r12, lr, lsl #8 1149 stmia r0!, {r3-r5, r12} 1150 subs r2, r2, #0x10 1151 bge .Lmemcpy_srcul3loop16 1152 ldmia sp!, {r4, r5} 1153 adds r2, r2, #0x0c 1154 blt .Lmemcpy_srcul3l4 1155 1156.Lmemcpy_srcul3loop4: 1157 mov r12, lr, lsr #24 1158 ldr lr, [r1], #4 1159 orr r12, r12, lr, lsl #8 1160 str r12, [r0], #4 1161 subs r2, r2, #4 1162 bge .Lmemcpy_srcul3loop4 1163 1164.Lmemcpy_srcul3l4: 1165 sub r1, r1, #1 1166 b .Lmemcpy_l4 1167#else 1168/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1169ENTRY(memcpy) 1170 pld [r1] 1171 cmp r2, #0x0c 1172 ble .Lmemcpy_short /* <= 12 bytes */ 1173#ifdef FLASHADDR 1174#if FLASHADDR > PHYSADDR 1175 ldr r3, =FLASHADDR 1176 cmp r3, pc 1177 bls .Lnormal 1178#else 1179 ldr r3, =FLASHADDR 1180 cmp r3, pc 1181 bhi .Lnormal 1182#endif 1183#endif 1184 ldr r3, .L_arm_memcpy 1185 ldr r3, [r3] 1186 cmp r3, #0 1187 beq .Lnormal 1188 ldr r3, .L_min_memcpy_size 1189 ldr r3, [r3] 1190 cmp r2, r3 1191 blt .Lnormal 1192 stmfd sp!, {r0-r2, r4, lr} 1193 mov r3, #0 1194 ldr r4, .L_arm_memcpy 1195 mov lr, pc 1196 ldr pc, [r4] 1197 cmp r0, #0 1198 ldmfd sp!, {r0-r2, r4, lr} 1199 RETeq 1200.Lnormal: 1201 mov r3, r0 /* We must not clobber r0 */ 1202 1203 /* Word-align the destination buffer */ 1204 ands ip, r3, #0x03 /* Already word aligned? */ 1205 beq .Lmemcpy_wordaligned /* Yup */ 1206 cmp ip, #0x02 1207 ldrb ip, [r1], #0x01 1208 sub r2, r2, #0x01 1209 strb ip, [r3], #0x01 1210 ldrleb ip, [r1], #0x01 1211 suble r2, r2, #0x01 1212 strleb ip, [r3], #0x01 1213 ldrltb ip, [r1], #0x01 1214 sublt r2, r2, #0x01 1215 strltb ip, [r3], #0x01 1216 1217 /* Destination buffer is now word aligned */ 1218.Lmemcpy_wordaligned: 1219 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1220 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1221 1222 /* Quad-align the destination buffer */ 1223 tst r3, #0x07 /* Already quad aligned? */ 1224 ldrne ip, [r1], #0x04 1225 stmfd sp!, {r4-r9} /* Free up some registers */ 1226 subne r2, r2, #0x04 1227 strne ip, [r3], #0x04 1228 1229 /* Destination buffer quad aligned, source is at least word aligned */ 1230 subs r2, r2, #0x80 1231 blt .Lmemcpy_w_lessthan128 1232 1233 /* Copy 128 bytes at a time */ 1234.Lmemcpy_w_loop128: 1235 ldr r4, [r1], #0x04 /* LD:00-03 */ 1236 ldr r5, [r1], #0x04 /* LD:04-07 */ 1237 pld [r1, #0x18] /* Prefetch 0x20 */ 1238 ldr r6, [r1], #0x04 /* LD:08-0b */ 1239 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1240 ldr r8, [r1], #0x04 /* LD:10-13 */ 1241 ldr r9, [r1], #0x04 /* LD:14-17 */ 1242 strd r4, [r3], #0x08 /* ST:00-07 */ 1243 ldr r4, [r1], #0x04 /* LD:18-1b */ 1244 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1245 strd r6, [r3], #0x08 /* ST:08-0f */ 1246 ldr r6, [r1], #0x04 /* LD:20-23 */ 1247 ldr r7, [r1], #0x04 /* LD:24-27 */ 1248 pld [r1, #0x18] /* Prefetch 0x40 */ 1249 strd r8, [r3], #0x08 /* ST:10-17 */ 1250 ldr r8, [r1], #0x04 /* LD:28-2b */ 1251 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1252 strd r4, [r3], #0x08 /* ST:18-1f */ 1253 ldr r4, [r1], #0x04 /* LD:30-33 */ 1254 ldr r5, [r1], #0x04 /* LD:34-37 */ 1255 strd r6, [r3], #0x08 /* ST:20-27 */ 1256 ldr r6, [r1], #0x04 /* LD:38-3b */ 1257 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1258 strd r8, [r3], #0x08 /* ST:28-2f */ 1259 ldr r8, [r1], #0x04 /* LD:40-43 */ 1260 ldr r9, [r1], #0x04 /* LD:44-47 */ 1261 pld [r1, #0x18] /* Prefetch 0x60 */ 1262 strd r4, [r3], #0x08 /* ST:30-37 */ 1263 ldr r4, [r1], #0x04 /* LD:48-4b */ 1264 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1265 strd r6, [r3], #0x08 /* ST:38-3f */ 1266 ldr r6, [r1], #0x04 /* LD:50-53 */ 1267 ldr r7, [r1], #0x04 /* LD:54-57 */ 1268 strd r8, [r3], #0x08 /* ST:40-47 */ 1269 ldr r8, [r1], #0x04 /* LD:58-5b */ 1270 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1271 strd r4, [r3], #0x08 /* ST:48-4f */ 1272 ldr r4, [r1], #0x04 /* LD:60-63 */ 1273 ldr r5, [r1], #0x04 /* LD:64-67 */ 1274 pld [r1, #0x18] /* Prefetch 0x80 */ 1275 strd r6, [r3], #0x08 /* ST:50-57 */ 1276 ldr r6, [r1], #0x04 /* LD:68-6b */ 1277 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1278 strd r8, [r3], #0x08 /* ST:58-5f */ 1279 ldr r8, [r1], #0x04 /* LD:70-73 */ 1280 ldr r9, [r1], #0x04 /* LD:74-77 */ 1281 strd r4, [r3], #0x08 /* ST:60-67 */ 1282 ldr r4, [r1], #0x04 /* LD:78-7b */ 1283 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1284 strd r6, [r3], #0x08 /* ST:68-6f */ 1285 strd r8, [r3], #0x08 /* ST:70-77 */ 1286 subs r2, r2, #0x80 1287 strd r4, [r3], #0x08 /* ST:78-7f */ 1288 bge .Lmemcpy_w_loop128 1289 1290.Lmemcpy_w_lessthan128: 1291 adds r2, r2, #0x80 /* Adjust for extra sub */ 1292 ldmeqfd sp!, {r4-r9} 1293 RETeq /* Return now if done */ 1294 subs r2, r2, #0x20 1295 blt .Lmemcpy_w_lessthan32 1296 1297 /* Copy 32 bytes at a time */ 1298.Lmemcpy_w_loop32: 1299 ldr r4, [r1], #0x04 1300 ldr r5, [r1], #0x04 1301 pld [r1, #0x18] 1302 ldr r6, [r1], #0x04 1303 ldr r7, [r1], #0x04 1304 ldr r8, [r1], #0x04 1305 ldr r9, [r1], #0x04 1306 strd r4, [r3], #0x08 1307 ldr r4, [r1], #0x04 1308 ldr r5, [r1], #0x04 1309 strd r6, [r3], #0x08 1310 strd r8, [r3], #0x08 1311 subs r2, r2, #0x20 1312 strd r4, [r3], #0x08 1313 bge .Lmemcpy_w_loop32 1314 1315.Lmemcpy_w_lessthan32: 1316 adds r2, r2, #0x20 /* Adjust for extra sub */ 1317 ldmeqfd sp!, {r4-r9} 1318 RETeq /* Return now if done */ 1319 1320 and r4, r2, #0x18 1321 rsbs r4, r4, #0x18 1322 addne pc, pc, r4, lsl #1 1323 nop 1324 1325 /* At least 24 bytes remaining */ 1326 ldr r4, [r1], #0x04 1327 ldr r5, [r1], #0x04 1328 sub r2, r2, #0x08 1329 strd r4, [r3], #0x08 1330 1331 /* At least 16 bytes remaining */ 1332 ldr r4, [r1], #0x04 1333 ldr r5, [r1], #0x04 1334 sub r2, r2, #0x08 1335 strd r4, [r3], #0x08 1336 1337 /* At least 8 bytes remaining */ 1338 ldr r4, [r1], #0x04 1339 ldr r5, [r1], #0x04 1340 subs r2, r2, #0x08 1341 strd r4, [r3], #0x08 1342 1343 /* Less than 8 bytes remaining */ 1344 ldmfd sp!, {r4-r9} 1345 RETeq /* Return now if done */ 1346 subs r2, r2, #0x04 1347 ldrge ip, [r1], #0x04 1348 strge ip, [r3], #0x04 1349 RETeq /* Return now if done */ 1350 addlt r2, r2, #0x04 1351 ldrb ip, [r1], #0x01 1352 cmp r2, #0x02 1353 ldrgeb r2, [r1], #0x01 1354 strb ip, [r3], #0x01 1355 ldrgtb ip, [r1] 1356 strgeb r2, [r3], #0x01 1357 strgtb ip, [r3] 1358 RET 1359 1360 1361/* 1362 * At this point, it has not been possible to word align both buffers. 1363 * The destination buffer is word aligned, but the source buffer is not. 1364 */ 1365.Lmemcpy_bad_align: 1366 stmfd sp!, {r4-r7} 1367 bic r1, r1, #0x03 1368 cmp ip, #2 1369 ldr ip, [r1], #0x04 1370 bgt .Lmemcpy_bad3 1371 beq .Lmemcpy_bad2 1372 b .Lmemcpy_bad1 1373 1374.Lmemcpy_bad1_loop16: 1375#ifdef __ARMEB__ 1376 mov r4, ip, lsl #8 1377#else 1378 mov r4, ip, lsr #8 1379#endif 1380 ldr r5, [r1], #0x04 1381 pld [r1, #0x018] 1382 ldr r6, [r1], #0x04 1383 ldr r7, [r1], #0x04 1384 ldr ip, [r1], #0x04 1385#ifdef __ARMEB__ 1386 orr r4, r4, r5, lsr #24 1387 mov r5, r5, lsl #8 1388 orr r5, r5, r6, lsr #24 1389 mov r6, r6, lsl #8 1390 orr r6, r6, r7, lsr #24 1391 mov r7, r7, lsl #8 1392 orr r7, r7, ip, lsr #24 1393#else 1394 orr r4, r4, r5, lsl #24 1395 mov r5, r5, lsr #8 1396 orr r5, r5, r6, lsl #24 1397 mov r6, r6, lsr #8 1398 orr r6, r6, r7, lsl #24 1399 mov r7, r7, lsr #8 1400 orr r7, r7, ip, lsl #24 1401#endif 1402 str r4, [r3], #0x04 1403 str r5, [r3], #0x04 1404 str r6, [r3], #0x04 1405 str r7, [r3], #0x04 1406.Lmemcpy_bad1: 1407 subs r2, r2, #0x10 1408 bge .Lmemcpy_bad1_loop16 1409 1410 adds r2, r2, #0x10 1411 ldmeqfd sp!, {r4-r7} 1412 RETeq /* Return now if done */ 1413 subs r2, r2, #0x04 1414 sublt r1, r1, #0x03 1415 blt .Lmemcpy_bad_done 1416 1417.Lmemcpy_bad1_loop4: 1418#ifdef __ARMEB__ 1419 mov r4, ip, lsl #8 1420#else 1421 mov r4, ip, lsr #8 1422#endif 1423 ldr ip, [r1], #0x04 1424 subs r2, r2, #0x04 1425#ifdef __ARMEB__ 1426 orr r4, r4, ip, lsr #24 1427#else 1428 orr r4, r4, ip, lsl #24 1429#endif 1430 str r4, [r3], #0x04 1431 bge .Lmemcpy_bad1_loop4 1432 sub r1, r1, #0x03 1433 b .Lmemcpy_bad_done 1434 1435.Lmemcpy_bad2_loop16: 1436#ifdef __ARMEB__ 1437 mov r4, ip, lsl #16 1438#else 1439 mov r4, ip, lsr #16 1440#endif 1441 ldr r5, [r1], #0x04 1442 pld [r1, #0x018] 1443 ldr r6, [r1], #0x04 1444 ldr r7, [r1], #0x04 1445 ldr ip, [r1], #0x04 1446#ifdef __ARMEB__ 1447 orr r4, r4, r5, lsr #16 1448 mov r5, r5, lsl #16 1449 orr r5, r5, r6, lsr #16 1450 mov r6, r6, lsl #16 1451 orr r6, r6, r7, lsr #16 1452 mov r7, r7, lsl #16 1453 orr r7, r7, ip, lsr #16 1454#else 1455 orr r4, r4, r5, lsl #16 1456 mov r5, r5, lsr #16 1457 orr r5, r5, r6, lsl #16 1458 mov r6, r6, lsr #16 1459 orr r6, r6, r7, lsl #16 1460 mov r7, r7, lsr #16 1461 orr r7, r7, ip, lsl #16 1462#endif 1463 str r4, [r3], #0x04 1464 str r5, [r3], #0x04 1465 str r6, [r3], #0x04 1466 str r7, [r3], #0x04 1467.Lmemcpy_bad2: 1468 subs r2, r2, #0x10 1469 bge .Lmemcpy_bad2_loop16 1470 1471 adds r2, r2, #0x10 1472 ldmeqfd sp!, {r4-r7} 1473 RETeq /* Return now if done */ 1474 subs r2, r2, #0x04 1475 sublt r1, r1, #0x02 1476 blt .Lmemcpy_bad_done 1477 1478.Lmemcpy_bad2_loop4: 1479#ifdef __ARMEB__ 1480 mov r4, ip, lsl #16 1481#else 1482 mov r4, ip, lsr #16 1483#endif 1484 ldr ip, [r1], #0x04 1485 subs r2, r2, #0x04 1486#ifdef __ARMEB__ 1487 orr r4, r4, ip, lsr #16 1488#else 1489 orr r4, r4, ip, lsl #16 1490#endif 1491 str r4, [r3], #0x04 1492 bge .Lmemcpy_bad2_loop4 1493 sub r1, r1, #0x02 1494 b .Lmemcpy_bad_done 1495 1496.Lmemcpy_bad3_loop16: 1497#ifdef __ARMEB__ 1498 mov r4, ip, lsl #24 1499#else 1500 mov r4, ip, lsr #24 1501#endif 1502 ldr r5, [r1], #0x04 1503 pld [r1, #0x018] 1504 ldr r6, [r1], #0x04 1505 ldr r7, [r1], #0x04 1506 ldr ip, [r1], #0x04 1507#ifdef __ARMEB__ 1508 orr r4, r4, r5, lsr #8 1509 mov r5, r5, lsl #24 1510 orr r5, r5, r6, lsr #8 1511 mov r6, r6, lsl #24 1512 orr r6, r6, r7, lsr #8 1513 mov r7, r7, lsl #24 1514 orr r7, r7, ip, lsr #8 1515#else 1516 orr r4, r4, r5, lsl #8 1517 mov r5, r5, lsr #24 1518 orr r5, r5, r6, lsl #8 1519 mov r6, r6, lsr #24 1520 orr r6, r6, r7, lsl #8 1521 mov r7, r7, lsr #24 1522 orr r7, r7, ip, lsl #8 1523#endif 1524 str r4, [r3], #0x04 1525 str r5, [r3], #0x04 1526 str r6, [r3], #0x04 1527 str r7, [r3], #0x04 1528.Lmemcpy_bad3: 1529 subs r2, r2, #0x10 1530 bge .Lmemcpy_bad3_loop16 1531 1532 adds r2, r2, #0x10 1533 ldmeqfd sp!, {r4-r7} 1534 RETeq /* Return now if done */ 1535 subs r2, r2, #0x04 1536 sublt r1, r1, #0x01 1537 blt .Lmemcpy_bad_done 1538 1539.Lmemcpy_bad3_loop4: 1540#ifdef __ARMEB__ 1541 mov r4, ip, lsl #24 1542#else 1543 mov r4, ip, lsr #24 1544#endif 1545 ldr ip, [r1], #0x04 1546 subs r2, r2, #0x04 1547#ifdef __ARMEB__ 1548 orr r4, r4, ip, lsr #8 1549#else 1550 orr r4, r4, ip, lsl #8 1551#endif 1552 str r4, [r3], #0x04 1553 bge .Lmemcpy_bad3_loop4 1554 sub r1, r1, #0x01 1555 1556.Lmemcpy_bad_done: 1557 ldmfd sp!, {r4-r7} 1558 adds r2, r2, #0x04 1559 RETeq 1560 ldrb ip, [r1], #0x01 1561 cmp r2, #0x02 1562 ldrgeb r2, [r1], #0x01 1563 strb ip, [r3], #0x01 1564 ldrgtb ip, [r1] 1565 strgeb r2, [r3], #0x01 1566 strgtb ip, [r3] 1567 RET 1568 1569 1570/* 1571 * Handle short copies (less than 16 bytes), possibly misaligned. 1572 * Some of these are *very* common, thanks to the network stack, 1573 * and so are handled specially. 1574 */ 1575.Lmemcpy_short: 1576 add pc, pc, r2, lsl #2 1577 nop 1578 RET /* 0x00 */ 1579 b .Lmemcpy_bytewise /* 0x01 */ 1580 b .Lmemcpy_bytewise /* 0x02 */ 1581 b .Lmemcpy_bytewise /* 0x03 */ 1582 b .Lmemcpy_4 /* 0x04 */ 1583 b .Lmemcpy_bytewise /* 0x05 */ 1584 b .Lmemcpy_6 /* 0x06 */ 1585 b .Lmemcpy_bytewise /* 0x07 */ 1586 b .Lmemcpy_8 /* 0x08 */ 1587 b .Lmemcpy_bytewise /* 0x09 */ 1588 b .Lmemcpy_bytewise /* 0x0a */ 1589 b .Lmemcpy_bytewise /* 0x0b */ 1590 b .Lmemcpy_c /* 0x0c */ 1591.Lmemcpy_bytewise: 1592 mov r3, r0 /* We must not clobber r0 */ 1593 ldrb ip, [r1], #0x01 15941: subs r2, r2, #0x01 1595 strb ip, [r3], #0x01 1596 ldrneb ip, [r1], #0x01 1597 bne 1b 1598 RET 1599 1600/****************************************************************************** 1601 * Special case for 4 byte copies 1602 */ 1603#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1604#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1605 LMEMCPY_4_PAD 1606.Lmemcpy_4: 1607 and r2, r1, #0x03 1608 orr r2, r2, r0, lsl #2 1609 ands r2, r2, #0x0f 1610 sub r3, pc, #0x14 1611 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1612 1613/* 1614 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1615 */ 1616 ldr r2, [r1] 1617 str r2, [r0] 1618 RET 1619 LMEMCPY_4_PAD 1620 1621/* 1622 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1623 */ 1624 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1625 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1626#ifdef __ARMEB__ 1627 mov r3, r3, lsl #8 /* r3 = 012. */ 1628 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1629#else 1630 mov r3, r3, lsr #8 /* r3 = .210 */ 1631 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1632#endif 1633 str r3, [r0] 1634 RET 1635 LMEMCPY_4_PAD 1636 1637/* 1638 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1639 */ 1640#ifdef __ARMEB__ 1641 ldrh r3, [r1] 1642 ldrh r2, [r1, #0x02] 1643#else 1644 ldrh r3, [r1, #0x02] 1645 ldrh r2, [r1] 1646#endif 1647 orr r3, r2, r3, lsl #16 1648 str r3, [r0] 1649 RET 1650 LMEMCPY_4_PAD 1651 1652/* 1653 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1654 */ 1655 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1656 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1657#ifdef __ARMEB__ 1658 mov r3, r3, lsl #24 /* r3 = 0... */ 1659 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1660#else 1661 mov r3, r3, lsr #24 /* r3 = ...0 */ 1662 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1663#endif 1664 str r3, [r0] 1665 RET 1666 LMEMCPY_4_PAD 1667 1668/* 1669 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1670 */ 1671 ldr r2, [r1] 1672#ifdef __ARMEB__ 1673 strb r2, [r0, #0x03] 1674 mov r3, r2, lsr #8 1675 mov r1, r2, lsr #24 1676 strb r1, [r0] 1677#else 1678 strb r2, [r0] 1679 mov r3, r2, lsr #8 1680 mov r1, r2, lsr #24 1681 strb r1, [r0, #0x03] 1682#endif 1683 strh r3, [r0, #0x01] 1684 RET 1685 LMEMCPY_4_PAD 1686 1687/* 1688 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1689 */ 1690 ldrb r2, [r1] 1691 ldrh r3, [r1, #0x01] 1692 ldrb r1, [r1, #0x03] 1693 strb r2, [r0] 1694 strh r3, [r0, #0x01] 1695 strb r1, [r0, #0x03] 1696 RET 1697 LMEMCPY_4_PAD 1698 1699/* 1700 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1701 */ 1702 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1703 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1704#ifdef __ARMEB__ 1705 mov r1, r2, lsr #8 /* r1 = ...0 */ 1706 strb r1, [r0] 1707 mov r2, r2, lsl #8 /* r2 = .01. */ 1708 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1709#else 1710 strb r2, [r0] 1711 mov r2, r2, lsr #8 /* r2 = ...1 */ 1712 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1713 mov r3, r3, lsr #8 /* r3 = ...3 */ 1714#endif 1715 strh r2, [r0, #0x01] 1716 strb r3, [r0, #0x03] 1717 RET 1718 LMEMCPY_4_PAD 1719 1720/* 1721 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1722 */ 1723 ldrb r2, [r1] 1724 ldrh r3, [r1, #0x01] 1725 ldrb r1, [r1, #0x03] 1726 strb r2, [r0] 1727 strh r3, [r0, #0x01] 1728 strb r1, [r0, #0x03] 1729 RET 1730 LMEMCPY_4_PAD 1731 1732/* 1733 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1734 */ 1735 ldr r2, [r1] 1736#ifdef __ARMEB__ 1737 strh r2, [r0, #0x02] 1738 mov r3, r2, lsr #16 1739 strh r3, [r0] 1740#else 1741 strh r2, [r0] 1742 mov r3, r2, lsr #16 1743 strh r3, [r0, #0x02] 1744#endif 1745 RET 1746 LMEMCPY_4_PAD 1747 1748/* 1749 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1750 */ 1751 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1752 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1753 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1754 strh r1, [r0] 1755#ifdef __ARMEB__ 1756 mov r2, r2, lsl #8 /* r2 = 012. */ 1757 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1758#else 1759 mov r2, r2, lsr #24 /* r2 = ...2 */ 1760 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1761#endif 1762 strh r2, [r0, #0x02] 1763 RET 1764 LMEMCPY_4_PAD 1765 1766/* 1767 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1768 */ 1769 ldrh r2, [r1] 1770 ldrh r3, [r1, #0x02] 1771 strh r2, [r0] 1772 strh r3, [r0, #0x02] 1773 RET 1774 LMEMCPY_4_PAD 1775 1776/* 1777 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1778 */ 1779 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1780 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1781 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1782 strh r1, [r0, #0x02] 1783#ifdef __ARMEB__ 1784 mov r3, r3, lsr #24 /* r3 = ...1 */ 1785 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1786#else 1787 mov r3, r3, lsl #8 /* r3 = 321. */ 1788 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1789#endif 1790 strh r3, [r0] 1791 RET 1792 LMEMCPY_4_PAD 1793 1794/* 1795 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1796 */ 1797 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1798#ifdef __ARMEB__ 1799 strb r2, [r0, #0x03] 1800 mov r3, r2, lsr #8 1801 mov r1, r2, lsr #24 1802 strh r3, [r0, #0x01] 1803 strb r1, [r0] 1804#else 1805 strb r2, [r0] 1806 mov r3, r2, lsr #8 1807 mov r1, r2, lsr #24 1808 strh r3, [r0, #0x01] 1809 strb r1, [r0, #0x03] 1810#endif 1811 RET 1812 LMEMCPY_4_PAD 1813 1814/* 1815 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1816 */ 1817 ldrb r2, [r1] 1818 ldrh r3, [r1, #0x01] 1819 ldrb r1, [r1, #0x03] 1820 strb r2, [r0] 1821 strh r3, [r0, #0x01] 1822 strb r1, [r0, #0x03] 1823 RET 1824 LMEMCPY_4_PAD 1825 1826/* 1827 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1828 */ 1829#ifdef __ARMEB__ 1830 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1831 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1832 strb r3, [r0, #0x03] 1833 mov r3, r3, lsr #8 /* r3 = ...2 */ 1834 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1835 strh r3, [r0, #0x01] 1836 mov r2, r2, lsr #8 /* r2 = ...0 */ 1837 strb r2, [r0] 1838#else 1839 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1841 strb r2, [r0] 1842 mov r2, r2, lsr #8 /* r2 = ...1 */ 1843 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1844 strh r2, [r0, #0x01] 1845 mov r3, r3, lsr #8 /* r3 = ...3 */ 1846 strb r3, [r0, #0x03] 1847#endif 1848 RET 1849 LMEMCPY_4_PAD 1850 1851/* 1852 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1853 */ 1854 ldrb r2, [r1] 1855 ldrh r3, [r1, #0x01] 1856 ldrb r1, [r1, #0x03] 1857 strb r2, [r0] 1858 strh r3, [r0, #0x01] 1859 strb r1, [r0, #0x03] 1860 RET 1861 LMEMCPY_4_PAD 1862 1863 1864/****************************************************************************** 1865 * Special case for 6 byte copies 1866 */ 1867#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1868#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1869 LMEMCPY_6_PAD 1870.Lmemcpy_6: 1871 and r2, r1, #0x03 1872 orr r2, r2, r0, lsl #2 1873 ands r2, r2, #0x0f 1874 sub r3, pc, #0x14 1875 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1876 1877/* 1878 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1879 */ 1880 ldr r2, [r1] 1881 ldrh r3, [r1, #0x04] 1882 str r2, [r0] 1883 strh r3, [r0, #0x04] 1884 RET 1885 LMEMCPY_6_PAD 1886 1887/* 1888 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1889 */ 1890 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1891 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1892#ifdef __ARMEB__ 1893 mov r2, r2, lsl #8 /* r2 = 012. */ 1894 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1895#else 1896 mov r2, r2, lsr #8 /* r2 = .210 */ 1897 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1898#endif 1899 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1900 str r2, [r0] 1901 strh r3, [r0, #0x04] 1902 RET 1903 LMEMCPY_6_PAD 1904 1905/* 1906 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1907 */ 1908 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1909 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1910#ifdef __ARMEB__ 1911 mov r1, r3, lsr #16 /* r1 = ..23 */ 1912 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1913 str r1, [r0] 1914 strh r3, [r0, #0x04] 1915#else 1916 mov r1, r3, lsr #16 /* r1 = ..54 */ 1917 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1918 str r2, [r0] 1919 strh r1, [r0, #0x04] 1920#endif 1921 RET 1922 LMEMCPY_6_PAD 1923 1924/* 1925 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1926 */ 1927 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1928 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1929 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1930#ifdef __ARMEB__ 1931 mov r2, r2, lsl #24 /* r2 = 0... */ 1932 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1933 mov r3, r3, lsl #8 /* r3 = 234. */ 1934 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1935#else 1936 mov r2, r2, lsr #24 /* r2 = ...0 */ 1937 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1938 mov r1, r1, lsl #8 /* r1 = xx5. */ 1939 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1940#endif 1941 str r2, [r0] 1942 strh r1, [r0, #0x04] 1943 RET 1944 LMEMCPY_6_PAD 1945 1946/* 1947 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1948 */ 1949 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1950 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1951 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1952 strh r1, [r0, #0x01] 1953#ifdef __ARMEB__ 1954 mov r1, r3, lsr #24 /* r1 = ...0 */ 1955 strb r1, [r0] 1956 mov r3, r3, lsl #8 /* r3 = 123. */ 1957 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1958#else 1959 strb r3, [r0] 1960 mov r3, r3, lsr #24 /* r3 = ...3 */ 1961 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1962 mov r2, r2, lsr #8 /* r2 = ...5 */ 1963#endif 1964 strh r3, [r0, #0x03] 1965 strb r2, [r0, #0x05] 1966 RET 1967 LMEMCPY_6_PAD 1968 1969/* 1970 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1971 */ 1972 ldrb r2, [r1] 1973 ldrh r3, [r1, #0x01] 1974 ldrh ip, [r1, #0x03] 1975 ldrb r1, [r1, #0x05] 1976 strb r2, [r0] 1977 strh r3, [r0, #0x01] 1978 strh ip, [r0, #0x03] 1979 strb r1, [r0, #0x05] 1980 RET 1981 LMEMCPY_6_PAD 1982 1983/* 1984 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1985 */ 1986 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1987 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1988#ifdef __ARMEB__ 1989 mov r3, r2, lsr #8 /* r3 = ...0 */ 1990 strb r3, [r0] 1991 strb r1, [r0, #0x05] 1992 mov r3, r1, lsr #8 /* r3 = .234 */ 1993 strh r3, [r0, #0x03] 1994 mov r3, r2, lsl #8 /* r3 = .01. */ 1995 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 1996 strh r3, [r0, #0x01] 1997#else 1998 strb r2, [r0] 1999 mov r3, r1, lsr #24 2000 strb r3, [r0, #0x05] 2001 mov r3, r1, lsr #8 /* r3 = .543 */ 2002 strh r3, [r0, #0x03] 2003 mov r3, r2, lsr #8 /* r3 = ...1 */ 2004 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2005 strh r3, [r0, #0x01] 2006#endif 2007 RET 2008 LMEMCPY_6_PAD 2009 2010/* 2011 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2012 */ 2013 ldrb r2, [r1] 2014 ldrh r3, [r1, #0x01] 2015 ldrh ip, [r1, #0x03] 2016 ldrb r1, [r1, #0x05] 2017 strb r2, [r0] 2018 strh r3, [r0, #0x01] 2019 strh ip, [r0, #0x03] 2020 strb r1, [r0, #0x05] 2021 RET 2022 LMEMCPY_6_PAD 2023 2024/* 2025 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2026 */ 2027#ifdef __ARMEB__ 2028 ldr r2, [r1] /* r2 = 0123 */ 2029 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2030 mov r1, r2, lsr #16 /* r1 = ..01 */ 2031 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2032 strh r1, [r0] 2033 str r3, [r0, #0x02] 2034#else 2035 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2036 ldr r3, [r1] /* r3 = 3210 */ 2037 mov r2, r2, lsl #16 /* r2 = 54.. */ 2038 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2039 strh r3, [r0] 2040 str r2, [r0, #0x02] 2041#endif 2042 RET 2043 LMEMCPY_6_PAD 2044 2045/* 2046 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2047 */ 2048 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2049 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2050 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2051#ifdef __ARMEB__ 2052 mov r2, r2, lsr #8 /* r2 = .345 */ 2053 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2054#else 2055 mov r2, r2, lsl #8 /* r2 = 543. */ 2056 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2057#endif 2058 strh r1, [r0] 2059 str r2, [r0, #0x02] 2060 RET 2061 LMEMCPY_6_PAD 2062 2063/* 2064 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2065 */ 2066 ldrh r2, [r1] 2067 ldr r3, [r1, #0x02] 2068 strh r2, [r0] 2069 str r3, [r0, #0x02] 2070 RET 2071 LMEMCPY_6_PAD 2072 2073/* 2074 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2075 */ 2076 ldrb r3, [r1] /* r3 = ...0 */ 2077 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2078 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2079#ifdef __ARMEB__ 2080 mov r3, r3, lsl #8 /* r3 = ..0. */ 2081 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2082 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2083#else 2084 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2085 mov r1, r1, lsl #24 /* r1 = 5... */ 2086 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2087#endif 2088 strh r3, [r0] 2089 str r1, [r0, #0x02] 2090 RET 2091 LMEMCPY_6_PAD 2092 2093/* 2094 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2095 */ 2096 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2097 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2098#ifdef __ARMEB__ 2099 mov r3, r2, lsr #24 /* r3 = ...0 */ 2100 strb r3, [r0] 2101 mov r2, r2, lsl #8 /* r2 = 123. */ 2102 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2103#else 2104 strb r2, [r0] 2105 mov r2, r2, lsr #8 /* r2 = .321 */ 2106 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2107 mov r1, r1, lsr #8 /* r1 = ...5 */ 2108#endif 2109 str r2, [r0, #0x01] 2110 strb r1, [r0, #0x05] 2111 RET 2112 LMEMCPY_6_PAD 2113 2114/* 2115 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2116 */ 2117 ldrb r2, [r1] 2118 ldrh r3, [r1, #0x01] 2119 ldrh ip, [r1, #0x03] 2120 ldrb r1, [r1, #0x05] 2121 strb r2, [r0] 2122 strh r3, [r0, #0x01] 2123 strh ip, [r0, #0x03] 2124 strb r1, [r0, #0x05] 2125 RET 2126 LMEMCPY_6_PAD 2127 2128/* 2129 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2130 */ 2131 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2132 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2133#ifdef __ARMEB__ 2134 mov r3, r2, lsr #8 /* r3 = ...0 */ 2135 strb r3, [r0] 2136 mov r2, r2, lsl #24 /* r2 = 1... */ 2137 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2138#else 2139 strb r2, [r0] 2140 mov r2, r2, lsr #8 /* r2 = ...1 */ 2141 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2142 mov r1, r1, lsr #24 /* r1 = ...5 */ 2143#endif 2144 str r2, [r0, #0x01] 2145 strb r1, [r0, #0x05] 2146 RET 2147 LMEMCPY_6_PAD 2148 2149/* 2150 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2151 */ 2152 ldrb r2, [r1] 2153 ldr r3, [r1, #0x01] 2154 ldrb r1, [r1, #0x05] 2155 strb r2, [r0] 2156 str r3, [r0, #0x01] 2157 strb r1, [r0, #0x05] 2158 RET 2159 LMEMCPY_6_PAD 2160 2161 2162/****************************************************************************** 2163 * Special case for 8 byte copies 2164 */ 2165#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2166#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2167 LMEMCPY_8_PAD 2168.Lmemcpy_8: 2169 and r2, r1, #0x03 2170 orr r2, r2, r0, lsl #2 2171 ands r2, r2, #0x0f 2172 sub r3, pc, #0x14 2173 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2174 2175/* 2176 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2177 */ 2178 ldr r2, [r1] 2179 ldr r3, [r1, #0x04] 2180 str r2, [r0] 2181 str r3, [r0, #0x04] 2182 RET 2183 LMEMCPY_8_PAD 2184 2185/* 2186 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2187 */ 2188 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2189 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2190 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2191#ifdef __ARMEB__ 2192 mov r3, r3, lsl #8 /* r3 = 012. */ 2193 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2194 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2195#else 2196 mov r3, r3, lsr #8 /* r3 = .210 */ 2197 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2198 mov r1, r1, lsl #24 /* r1 = 7... */ 2199 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2200#endif 2201 str r3, [r0] 2202 str r2, [r0, #0x04] 2203 RET 2204 LMEMCPY_8_PAD 2205 2206/* 2207 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2208 */ 2209 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2210 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2211 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2212#ifdef __ARMEB__ 2213 mov r2, r2, lsl #16 /* r2 = 01.. */ 2214 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2215 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2216#else 2217 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2218 mov r3, r3, lsr #16 /* r3 = ..54 */ 2219 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2220#endif 2221 str r2, [r0] 2222 str r3, [r0, #0x04] 2223 RET 2224 LMEMCPY_8_PAD 2225 2226/* 2227 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2228 */ 2229 ldrb r3, [r1] /* r3 = ...0 */ 2230 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2231 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2232#ifdef __ARMEB__ 2233 mov r3, r3, lsl #24 /* r3 = 0... */ 2234 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2235 mov r2, r2, lsl #24 /* r2 = 4... */ 2236 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2237#else 2238 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2239 mov r2, r2, lsr #24 /* r2 = ...4 */ 2240 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2241#endif 2242 str r3, [r0] 2243 str r2, [r0, #0x04] 2244 RET 2245 LMEMCPY_8_PAD 2246 2247/* 2248 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2249 */ 2250 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2251 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2252#ifdef __ARMEB__ 2253 mov r1, r3, lsr #24 /* r1 = ...0 */ 2254 strb r1, [r0] 2255 mov r1, r3, lsr #8 /* r1 = .012 */ 2256 strb r2, [r0, #0x07] 2257 mov r3, r3, lsl #24 /* r3 = 3... */ 2258 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2259#else 2260 strb r3, [r0] 2261 mov r1, r2, lsr #24 /* r1 = ...7 */ 2262 strb r1, [r0, #0x07] 2263 mov r1, r3, lsr #8 /* r1 = .321 */ 2264 mov r3, r3, lsr #24 /* r3 = ...3 */ 2265 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2266#endif 2267 strh r1, [r0, #0x01] 2268 str r3, [r0, #0x03] 2269 RET 2270 LMEMCPY_8_PAD 2271 2272/* 2273 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2274 */ 2275 ldrb r2, [r1] 2276 ldrh r3, [r1, #0x01] 2277 ldr ip, [r1, #0x03] 2278 ldrb r1, [r1, #0x07] 2279 strb r2, [r0] 2280 strh r3, [r0, #0x01] 2281 str ip, [r0, #0x03] 2282 strb r1, [r0, #0x07] 2283 RET 2284 LMEMCPY_8_PAD 2285 2286/* 2287 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2288 */ 2289 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2290 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2291 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2292#ifdef __ARMEB__ 2293 mov ip, r2, lsr #8 /* ip = ...0 */ 2294 strb ip, [r0] 2295 mov ip, r2, lsl #8 /* ip = .01. */ 2296 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2297 strb r1, [r0, #0x07] 2298 mov r3, r3, lsl #8 /* r3 = 345. */ 2299 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2300#else 2301 strb r2, [r0] /* 0 */ 2302 mov ip, r1, lsr #8 /* ip = ...7 */ 2303 strb ip, [r0, #0x07] /* 7 */ 2304 mov ip, r2, lsr #8 /* ip = ...1 */ 2305 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2306 mov r3, r3, lsr #8 /* r3 = .543 */ 2307 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2308#endif 2309 strh ip, [r0, #0x01] 2310 str r3, [r0, #0x03] 2311 RET 2312 LMEMCPY_8_PAD 2313 2314/* 2315 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2316 */ 2317 ldrb r3, [r1] /* r3 = ...0 */ 2318 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2319 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2320 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2321 strb r3, [r0] 2322 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2323#ifdef __ARMEB__ 2324 strh r3, [r0, #0x01] 2325 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2326#else 2327 strh ip, [r0, #0x01] 2328 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2329#endif 2330 str r2, [r0, #0x03] 2331 strb r1, [r0, #0x07] 2332 RET 2333 LMEMCPY_8_PAD 2334 2335/* 2336 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2337 */ 2338 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2339 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2340 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2341#ifdef __ARMEB__ 2342 strh r1, [r0] 2343 mov r1, r3, lsr #16 /* r1 = ..45 */ 2344 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2345#else 2346 strh r2, [r0] 2347 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2348 mov r3, r3, lsr #16 /* r3 = ..76 */ 2349#endif 2350 str r2, [r0, #0x02] 2351 strh r3, [r0, #0x06] 2352 RET 2353 LMEMCPY_8_PAD 2354 2355/* 2356 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2357 */ 2358 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2359 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2360 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2361 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2362 strh r1, [r0] 2363#ifdef __ARMEB__ 2364 mov r1, r2, lsl #24 /* r1 = 2... */ 2365 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2366 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2367#else 2368 mov r1, r2, lsr #24 /* r1 = ...2 */ 2369 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2370 mov r3, r3, lsr #24 /* r3 = ...6 */ 2371 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2372#endif 2373 str r1, [r0, #0x02] 2374 strh r3, [r0, #0x06] 2375 RET 2376 LMEMCPY_8_PAD 2377 2378/* 2379 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2380 */ 2381 ldrh r2, [r1] 2382 ldr ip, [r1, #0x02] 2383 ldrh r3, [r1, #0x06] 2384 strh r2, [r0] 2385 str ip, [r0, #0x02] 2386 strh r3, [r0, #0x06] 2387 RET 2388 LMEMCPY_8_PAD 2389 2390/* 2391 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2392 */ 2393 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2394 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2395 ldrb ip, [r1] /* ip = ...0 */ 2396 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2397 strh r1, [r0, #0x06] 2398#ifdef __ARMEB__ 2399 mov r3, r3, lsr #24 /* r3 = ...5 */ 2400 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2401 mov r2, r2, lsr #24 /* r2 = ...1 */ 2402 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2403#else 2404 mov r3, r3, lsl #24 /* r3 = 5... */ 2405 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2406 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2407#endif 2408 str r3, [r0, #0x02] 2409 strh r2, [r0] 2410 RET 2411 LMEMCPY_8_PAD 2412 2413/* 2414 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2415 */ 2416 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2417 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2418 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2419 strh r1, [r0, #0x05] 2420#ifdef __ARMEB__ 2421 strb r3, [r0, #0x07] 2422 mov r1, r2, lsr #24 /* r1 = ...0 */ 2423 strb r1, [r0] 2424 mov r2, r2, lsl #8 /* r2 = 123. */ 2425 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2426 str r2, [r0, #0x01] 2427#else 2428 strb r2, [r0] 2429 mov r1, r3, lsr #24 /* r1 = ...7 */ 2430 strb r1, [r0, #0x07] 2431 mov r2, r2, lsr #8 /* r2 = .321 */ 2432 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2433 str r2, [r0, #0x01] 2434#endif 2435 RET 2436 LMEMCPY_8_PAD 2437 2438/* 2439 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2440 */ 2441 ldrb r3, [r1] /* r3 = ...0 */ 2442 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2443 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2444 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2445 strb r3, [r0] 2446 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2447#ifdef __ARMEB__ 2448 strh ip, [r0, #0x05] 2449 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2450#else 2451 strh r3, [r0, #0x05] 2452 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2453#endif 2454 str r2, [r0, #0x01] 2455 strb r1, [r0, #0x07] 2456 RET 2457 LMEMCPY_8_PAD 2458 2459/* 2460 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2461 */ 2462 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2463 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2464 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2465#ifdef __ARMEB__ 2466 mov ip, r2, lsr #8 /* ip = ...0 */ 2467 strb ip, [r0] 2468 mov ip, r2, lsl #24 /* ip = 1... */ 2469 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2470 strb r1, [r0, #0x07] 2471 mov r1, r1, lsr #8 /* r1 = ...6 */ 2472 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2473#else 2474 strb r2, [r0] 2475 mov ip, r2, lsr #8 /* ip = ...1 */ 2476 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2477 mov r2, r1, lsr #8 /* r2 = ...7 */ 2478 strb r2, [r0, #0x07] 2479 mov r1, r1, lsl #8 /* r1 = .76. */ 2480 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2481#endif 2482 str ip, [r0, #0x01] 2483 strh r1, [r0, #0x05] 2484 RET 2485 LMEMCPY_8_PAD 2486 2487/* 2488 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2489 */ 2490 ldrb r2, [r1] 2491 ldr ip, [r1, #0x01] 2492 ldrh r3, [r1, #0x05] 2493 ldrb r1, [r1, #0x07] 2494 strb r2, [r0] 2495 str ip, [r0, #0x01] 2496 strh r3, [r0, #0x05] 2497 strb r1, [r0, #0x07] 2498 RET 2499 LMEMCPY_8_PAD 2500 2501/****************************************************************************** 2502 * Special case for 12 byte copies 2503 */ 2504#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2505#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2506 LMEMCPY_C_PAD 2507.Lmemcpy_c: 2508 and r2, r1, #0x03 2509 orr r2, r2, r0, lsl #2 2510 ands r2, r2, #0x0f 2511 sub r3, pc, #0x14 2512 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2513 2514/* 2515 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2516 */ 2517 ldr r2, [r1] 2518 ldr r3, [r1, #0x04] 2519 ldr r1, [r1, #0x08] 2520 str r2, [r0] 2521 str r3, [r0, #0x04] 2522 str r1, [r0, #0x08] 2523 RET 2524 LMEMCPY_C_PAD 2525 2526/* 2527 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2528 */ 2529 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2530 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2531 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2532 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2533#ifdef __ARMEB__ 2534 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2535 str r2, [r0, #0x08] 2536 mov r2, ip, lsr #24 /* r2 = ...7 */ 2537 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2538 mov r1, r1, lsl #8 /* r1 = 012. */ 2539 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2540#else 2541 mov r2, r2, lsl #24 /* r2 = B... */ 2542 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2543 str r2, [r0, #0x08] 2544 mov r2, ip, lsl #24 /* r2 = 7... */ 2545 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2546 mov r1, r1, lsr #8 /* r1 = .210 */ 2547 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2548#endif 2549 str r2, [r0, #0x04] 2550 str r1, [r0] 2551 RET 2552 LMEMCPY_C_PAD 2553 2554/* 2555 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2556 */ 2557 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2558 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2559 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2560 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2561#ifdef __ARMEB__ 2562 mov r2, r2, lsl #16 /* r2 = 01.. */ 2563 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2564 str r2, [r0] 2565 mov r3, r3, lsl #16 /* r3 = 45.. */ 2566 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2567 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2568#else 2569 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2570 str r2, [r0] 2571 mov r3, r3, lsr #16 /* r3 = ..54 */ 2572 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2573 mov r1, r1, lsl #16 /* r1 = BA.. */ 2574 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2575#endif 2576 str r3, [r0, #0x04] 2577 str r1, [r0, #0x08] 2578 RET 2579 LMEMCPY_C_PAD 2580 2581/* 2582 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2583 */ 2584 ldrb r2, [r1] /* r2 = ...0 */ 2585 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2586 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2587 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2588#ifdef __ARMEB__ 2589 mov r2, r2, lsl #24 /* r2 = 0... */ 2590 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2591 str r2, [r0] 2592 mov r3, r3, lsl #24 /* r3 = 4... */ 2593 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2594 mov r1, r1, lsr #8 /* r1 = .9AB */ 2595 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2596#else 2597 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2598 str r2, [r0] 2599 mov r3, r3, lsr #24 /* r3 = ...4 */ 2600 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2601 mov r1, r1, lsl #8 /* r1 = BA9. */ 2602 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2603#endif 2604 str r3, [r0, #0x04] 2605 str r1, [r0, #0x08] 2606 RET 2607 LMEMCPY_C_PAD 2608 2609/* 2610 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2611 */ 2612 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2613 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2614 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2615 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2616 strh r1, [r0, #0x01] 2617#ifdef __ARMEB__ 2618 mov r1, r2, lsr #24 /* r1 = ...0 */ 2619 strb r1, [r0] 2620 mov r1, r2, lsl #24 /* r1 = 3... */ 2621 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2622 mov r1, r3, lsl #24 /* r1 = 7... */ 2623 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2624#else 2625 strb r2, [r0] 2626 mov r1, r2, lsr #24 /* r1 = ...3 */ 2627 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2628 mov r1, r3, lsr #24 /* r1 = ...7 */ 2629 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2630 mov ip, ip, lsr #24 /* ip = ...B */ 2631#endif 2632 str r2, [r0, #0x03] 2633 str r1, [r0, #0x07] 2634 strb ip, [r0, #0x0b] 2635 RET 2636 LMEMCPY_C_PAD 2637 2638/* 2639 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2640 */ 2641 ldrb r2, [r1] 2642 ldrh r3, [r1, #0x01] 2643 ldr ip, [r1, #0x03] 2644 strb r2, [r0] 2645 ldr r2, [r1, #0x07] 2646 ldrb r1, [r1, #0x0b] 2647 strh r3, [r0, #0x01] 2648 str ip, [r0, #0x03] 2649 str r2, [r0, #0x07] 2650 strb r1, [r0, #0x0b] 2651 RET 2652 LMEMCPY_C_PAD 2653 2654/* 2655 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2656 */ 2657 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2658 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2659 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2660 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2661#ifdef __ARMEB__ 2662 mov r2, r2, ror #8 /* r2 = 1..0 */ 2663 strb r2, [r0] 2664 mov r2, r2, lsr #16 /* r2 = ..1. */ 2665 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2666 strh r2, [r0, #0x01] 2667 mov r2, r3, lsl #8 /* r2 = 345. */ 2668 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2669 mov r2, ip, lsl #8 /* r2 = 789. */ 2670 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2671#else 2672 strb r2, [r0] 2673 mov r2, r2, lsr #8 /* r2 = ...1 */ 2674 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2675 strh r2, [r0, #0x01] 2676 mov r2, r3, lsr #8 /* r2 = .543 */ 2677 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2678 mov r2, ip, lsr #8 /* r2 = .987 */ 2679 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2680 mov r1, r1, lsr #8 /* r1 = ...B */ 2681#endif 2682 str r3, [r0, #0x03] 2683 str r2, [r0, #0x07] 2684 strb r1, [r0, #0x0b] 2685 RET 2686 LMEMCPY_C_PAD 2687 2688/* 2689 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2690 */ 2691 ldrb r2, [r1] 2692 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2693 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2694 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2695 strb r2, [r0] 2696#ifdef __ARMEB__ 2697 mov r2, r3, lsr #16 /* r2 = ..12 */ 2698 strh r2, [r0, #0x01] 2699 mov r3, r3, lsl #16 /* r3 = 34.. */ 2700 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2701 mov ip, ip, lsl #16 /* ip = 78.. */ 2702 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2703 mov r1, r1, lsr #8 /* r1 = .9AB */ 2704#else 2705 strh r3, [r0, #0x01] 2706 mov r3, r3, lsr #16 /* r3 = ..43 */ 2707 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2708 mov ip, ip, lsr #16 /* ip = ..87 */ 2709 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2710 mov r1, r1, lsr #16 /* r1 = ..xB */ 2711#endif 2712 str r3, [r0, #0x03] 2713 str ip, [r0, #0x07] 2714 strb r1, [r0, #0x0b] 2715 RET 2716 LMEMCPY_C_PAD 2717 2718/* 2719 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2720 */ 2721 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2722 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2723 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2724 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2725#ifdef __ARMEB__ 2726 strh r1, [r0] 2727 mov r1, ip, lsl #16 /* r1 = 23.. */ 2728 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2729 mov r3, r3, lsl #16 /* r3 = 67.. */ 2730 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2731#else 2732 strh ip, [r0] 2733 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2734 mov r3, r3, lsr #16 /* r3 = ..76 */ 2735 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2736 mov r2, r2, lsr #16 /* r2 = ..BA */ 2737#endif 2738 str r1, [r0, #0x02] 2739 str r3, [r0, #0x06] 2740 strh r2, [r0, #0x0a] 2741 RET 2742 LMEMCPY_C_PAD 2743 2744/* 2745 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2746 */ 2747 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2748 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2749 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2750 strh ip, [r0] 2751 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2752 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2753#ifdef __ARMEB__ 2754 mov r2, r2, lsl #24 /* r2 = 2... */ 2755 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2756 mov r3, r3, lsl #24 /* r3 = 6... */ 2757 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2758 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2759#else 2760 mov r2, r2, lsr #24 /* r2 = ...2 */ 2761 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2762 mov r3, r3, lsr #24 /* r3 = ...6 */ 2763 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2764 mov r1, r1, lsl #8 /* r1 = ..B. */ 2765 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2766#endif 2767 str r2, [r0, #0x02] 2768 str r3, [r0, #0x06] 2769 strh r1, [r0, #0x0a] 2770 RET 2771 LMEMCPY_C_PAD 2772 2773/* 2774 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2775 */ 2776 ldrh r2, [r1] 2777 ldr r3, [r1, #0x02] 2778 ldr ip, [r1, #0x06] 2779 ldrh r1, [r1, #0x0a] 2780 strh r2, [r0] 2781 str r3, [r0, #0x02] 2782 str ip, [r0, #0x06] 2783 strh r1, [r0, #0x0a] 2784 RET 2785 LMEMCPY_C_PAD 2786 2787/* 2788 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2789 */ 2790 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2791 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2792 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2793 strh ip, [r0, #0x0a] 2794 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2795 ldrb r1, [r1] /* r1 = ...0 */ 2796#ifdef __ARMEB__ 2797 mov r2, r2, lsr #24 /* r2 = ...9 */ 2798 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2799 mov r3, r3, lsr #24 /* r3 = ...5 */ 2800 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2801 mov r1, r1, lsl #8 /* r1 = ..0. */ 2802 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2803#else 2804 mov r2, r2, lsl #24 /* r2 = 9... */ 2805 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2806 mov r3, r3, lsl #24 /* r3 = 5... */ 2807 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2808 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2809#endif 2810 str r2, [r0, #0x06] 2811 str r3, [r0, #0x02] 2812 strh r1, [r0] 2813 RET 2814 LMEMCPY_C_PAD 2815 2816/* 2817 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2818 */ 2819 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2820 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2821 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2822#ifdef __ARMEB__ 2823 mov r3, r2, lsr #24 /* r3 = ...0 */ 2824 strb r3, [r0] 2825 mov r2, r2, lsl #8 /* r2 = 123. */ 2826 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2827 str r2, [r0, #0x01] 2828 mov r2, ip, lsl #8 /* r2 = 567. */ 2829 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2830 str r2, [r0, #0x05] 2831 mov r2, r1, lsr #8 /* r2 = ..9A */ 2832 strh r2, [r0, #0x09] 2833 strb r1, [r0, #0x0b] 2834#else 2835 strb r2, [r0] 2836 mov r3, r2, lsr #8 /* r3 = .321 */ 2837 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2838 str r3, [r0, #0x01] 2839 mov r3, ip, lsr #8 /* r3 = .765 */ 2840 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2841 str r3, [r0, #0x05] 2842 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2843 strh r1, [r0, #0x09] 2844 mov r1, r1, lsr #16 /* r1 = ...B */ 2845 strb r1, [r0, #0x0b] 2846#endif 2847 RET 2848 LMEMCPY_C_PAD 2849 2850/* 2851 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2852 */ 2853 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2854 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2855 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2856 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2857 strb r2, [r0, #0x0b] 2858#ifdef __ARMEB__ 2859 strh r3, [r0, #0x09] 2860 mov r3, r3, lsr #16 /* r3 = ..78 */ 2861 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2862 mov ip, ip, lsr #16 /* ip = ..34 */ 2863 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2864 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2865#else 2866 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2867 strh r2, [r0, #0x09] 2868 mov r3, r3, lsl #16 /* r3 = 87.. */ 2869 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2870 mov ip, ip, lsl #16 /* ip = 43.. */ 2871 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2872 mov r1, r1, lsr #8 /* r1 = .210 */ 2873#endif 2874 str r3, [r0, #0x05] 2875 str ip, [r0, #0x01] 2876 strb r1, [r0] 2877 RET 2878 LMEMCPY_C_PAD 2879 2880/* 2881 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2882 */ 2883#ifdef __ARMEB__ 2884 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2885 ldr ip, [r1, #0x06] /* ip = 6789 */ 2886 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2887 ldrh r1, [r1] /* r1 = ..01 */ 2888 strb r2, [r0, #0x0b] 2889 mov r2, r2, lsr #8 /* r2 = ...A */ 2890 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2891 mov ip, ip, lsr #8 /* ip = .678 */ 2892 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2893 mov r3, r3, lsr #8 /* r3 = .234 */ 2894 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2895 mov r1, r1, lsr #8 /* r1 = ...0 */ 2896 strb r1, [r0] 2897 str r3, [r0, #0x01] 2898 str ip, [r0, #0x05] 2899 strh r2, [r0, #0x09] 2900#else 2901 ldrh r2, [r1] /* r2 = ..10 */ 2902 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2903 ldr ip, [r1, #0x06] /* ip = 9876 */ 2904 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2905 strb r2, [r0] 2906 mov r2, r2, lsr #8 /* r2 = ...1 */ 2907 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2908 mov r3, r3, lsr #24 /* r3 = ...5 */ 2909 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2910 mov ip, ip, lsr #24 /* ip = ...9 */ 2911 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2912 mov r1, r1, lsr #8 /* r1 = ...B */ 2913 str r2, [r0, #0x01] 2914 str r3, [r0, #0x05] 2915 strh ip, [r0, #0x09] 2916 strb r1, [r0, #0x0b] 2917#endif 2918 RET 2919 LMEMCPY_C_PAD 2920 2921/* 2922 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2923 */ 2924 ldrb r2, [r1] 2925 ldr r3, [r1, #0x01] 2926 ldr ip, [r1, #0x05] 2927 strb r2, [r0] 2928 ldrh r2, [r1, #0x09] 2929 ldrb r1, [r1, #0x0b] 2930 str r3, [r0, #0x01] 2931 str ip, [r0, #0x05] 2932 strh r2, [r0, #0x09] 2933 strb r1, [r0, #0x0b] 2934 RET 2935#endif /* _ARM_ARCH_5E */ 2936 2937#ifdef GPROF 2938 2939ENTRY(user) 2940 nop 2941ENTRY(btrap) 2942 nop 2943ENTRY(etrap) 2944 nop 2945ENTRY(bintr) 2946 nop 2947ENTRY(eintr) 2948 nop 2949 2950#endif 2951