1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <machine/asm.h> 28#include <machine/asmacros.h> 29__FBSDID("$FreeBSD$"); 30 31#include "assym.s" 32 33.L_arm_memcpy: 34 .word _C_LABEL(_arm_memcpy) 35.L_arm_bzero: 36 .word _C_LABEL(_arm_bzero) 37.L_min_memcpy_size: 38 .word _C_LABEL(_min_memcpy_size) 39.L_min_bzero_size: 40 .word _C_LABEL(_min_bzero_size) 41/* 42 * memset: Sets a block of memory to the specified value 43 * 44 * On entry: 45 * r0 - dest address 46 * r1 - byte to write 47 * r2 - number of bytes to write 48 * 49 * On exit: 50 * r0 - dest address 51 */ 52/* LINTSTUB: Func: void bzero(void *, size_t) */ 53ENTRY(bzero) 54 ldr r3, .L_arm_bzero 55 ldr r3, [r3] 56 cmp r3, #0 57 beq .Lnormal0 58 ldr r2, .L_min_bzero_size 59 ldr r2, [r2] 60 cmp r1, r2 61 blt .Lnormal0 62 stmfd sp!, {r0, r1, lr} 63 mov r2, #0 64 mov lr, pc 65 mov pc, r3 66 cmp r0, #0 67 ldmfd sp!, {r0, r1, lr} 68 RETeq 69.Lnormal0: 70 mov r3, #0x00 71 b do_memset 72 73/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 74ENTRY(memset) 75 and r3, r1, #0xff /* We deal with bytes */ 76 mov r1, r2 77do_memset: 78 cmp r1, #0x04 /* Do we have less than 4 bytes */ 79 mov ip, r0 80 blt .Lmemset_lessthanfour 81 82 /* Ok first we will word align the address */ 83 ands r2, ip, #0x03 /* Get the bottom two bits */ 84 bne .Lmemset_wordunaligned /* The address is not word aligned */ 85 86 /* We are now word aligned */ 87.Lmemset_wordaligned: 88 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 89#ifdef __XSCALE__ 90 tst ip, #0x04 /* Quad-align for Xscale */ 91#else 92 cmp r1, #0x10 93#endif 94 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 95#ifdef __XSCALE__ 96 subne r1, r1, #0x04 /* Quad-align if necessary */ 97 strne r3, [ip], #0x04 98 cmp r1, #0x10 99#endif 100 blt .Lmemset_loop4 /* If less than 16 then use words */ 101 mov r2, r3 /* Duplicate data */ 102 cmp r1, #0x80 /* If < 128 then skip the big loop */ 103 blt .Lmemset_loop32 104 105 /* Do 128 bytes at a time */ 106.Lmemset_loop128: 107 subs r1, r1, #0x80 108#ifdef __XSCALE__ 109 strged r2, [ip], #0x08 110 strged r2, [ip], #0x08 111 strged r2, [ip], #0x08 112 strged r2, [ip], #0x08 113 strged r2, [ip], #0x08 114 strged r2, [ip], #0x08 115 strged r2, [ip], #0x08 116 strged r2, [ip], #0x08 117 strged r2, [ip], #0x08 118 strged r2, [ip], #0x08 119 strged r2, [ip], #0x08 120 strged r2, [ip], #0x08 121 strged r2, [ip], #0x08 122 strged r2, [ip], #0x08 123 strged r2, [ip], #0x08 124 strged r2, [ip], #0x08 125#else 126 stmgeia ip!, {r2-r3} 127 stmgeia ip!, {r2-r3} 128 stmgeia ip!, {r2-r3} 129 stmgeia ip!, {r2-r3} 130 stmgeia ip!, {r2-r3} 131 stmgeia ip!, {r2-r3} 132 stmgeia ip!, {r2-r3} 133 stmgeia ip!, {r2-r3} 134 stmgeia ip!, {r2-r3} 135 stmgeia ip!, {r2-r3} 136 stmgeia ip!, {r2-r3} 137 stmgeia ip!, {r2-r3} 138 stmgeia ip!, {r2-r3} 139 stmgeia ip!, {r2-r3} 140 stmgeia ip!, {r2-r3} 141 stmgeia ip!, {r2-r3} 142#endif 143 bgt .Lmemset_loop128 144 RETeq /* Zero length so just exit */ 145 146 add r1, r1, #0x80 /* Adjust for extra sub */ 147 148 /* Do 32 bytes at a time */ 149.Lmemset_loop32: 150 subs r1, r1, #0x20 151#ifdef __XSCALE__ 152 strged r2, [ip], #0x08 153 strged r2, [ip], #0x08 154 strged r2, [ip], #0x08 155 strged r2, [ip], #0x08 156#else 157 stmgeia ip!, {r2-r3} 158 stmgeia ip!, {r2-r3} 159 stmgeia ip!, {r2-r3} 160 stmgeia ip!, {r2-r3} 161#endif 162 bgt .Lmemset_loop32 163 RETeq /* Zero length so just exit */ 164 165 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 166 167 /* Deal with 16 bytes or more */ 168#ifdef __XSCALE__ 169 strged r2, [ip], #0x08 170 strged r2, [ip], #0x08 171#else 172 stmgeia ip!, {r2-r3} 173 stmgeia ip!, {r2-r3} 174#endif 175 RETeq /* Zero length so just exit */ 176 177 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 178 179 /* We have at least 4 bytes so copy as words */ 180.Lmemset_loop4: 181 subs r1, r1, #0x04 182 strge r3, [ip], #0x04 183 bgt .Lmemset_loop4 184 RETeq /* Zero length so just exit */ 185 186#ifdef __XSCALE__ 187 /* Compensate for 64-bit alignment check */ 188 adds r1, r1, #0x04 189 RETeq 190 cmp r1, #2 191#else 192 cmp r1, #-2 193#endif 194 195 strb r3, [ip], #0x01 /* Set 1 byte */ 196 strgeb r3, [ip], #0x01 /* Set another byte */ 197 strgtb r3, [ip] /* and a third */ 198 RET /* Exit */ 199 200.Lmemset_wordunaligned: 201 rsb r2, r2, #0x004 202 strb r3, [ip], #0x01 /* Set 1 byte */ 203 cmp r2, #0x02 204 strgeb r3, [ip], #0x01 /* Set another byte */ 205 sub r1, r1, r2 206 strgtb r3, [ip], #0x01 /* and a third */ 207 cmp r1, #0x04 /* More than 4 bytes left? */ 208 bge .Lmemset_wordaligned /* Yup */ 209 210.Lmemset_lessthanfour: 211 cmp r1, #0x00 212 RETeq /* Zero length so exit */ 213 strb r3, [ip], #0x01 /* Set 1 byte */ 214 cmp r1, #0x02 215 strgeb r3, [ip], #0x01 /* Set another byte */ 216 strgtb r3, [ip] /* and a third */ 217 RET /* Exit */ 218 219ENTRY(bcmp) 220 mov ip, r0 221 cmp r2, #0x06 222 beq .Lmemcmp_6bytes 223 mov r0, #0x00 224 225 /* Are both addresses aligned the same way? */ 226 cmp r2, #0x00 227 eornes r3, ip, r1 228 RETeq /* len == 0, or same addresses! */ 229 tst r3, #0x03 230 subne r2, r2, #0x01 231 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 232 233 /* Word-align the addresses, if necessary */ 234 sub r3, r1, #0x05 235 ands r3, r3, #0x03 236 add r3, r3, r3, lsl #1 237 addne pc, pc, r3, lsl #3 238 nop 239 240 /* Compare up to 3 bytes */ 241 ldrb r0, [ip], #0x01 242 ldrb r3, [r1], #0x01 243 subs r0, r0, r3 244 RETne 245 subs r2, r2, #0x01 246 RETeq 247 248 /* Compare up to 2 bytes */ 249 ldrb r0, [ip], #0x01 250 ldrb r3, [r1], #0x01 251 subs r0, r0, r3 252 RETne 253 subs r2, r2, #0x01 254 RETeq 255 256 /* Compare 1 byte */ 257 ldrb r0, [ip], #0x01 258 ldrb r3, [r1], #0x01 259 subs r0, r0, r3 260 RETne 261 subs r2, r2, #0x01 262 RETeq 263 264 /* Compare 4 bytes at a time, if possible */ 265 subs r2, r2, #0x04 266 bcc .Lmemcmp_bytewise 267.Lmemcmp_word_aligned: 268 ldr r0, [ip], #0x04 269 ldr r3, [r1], #0x04 270 subs r2, r2, #0x04 271 cmpcs r0, r3 272 beq .Lmemcmp_word_aligned 273 sub r0, r0, r3 274 275 /* Correct for extra subtraction, and check if done */ 276 adds r2, r2, #0x04 277 cmpeq r0, #0x00 /* If done, did all bytes match? */ 278 RETeq /* Yup. Just return */ 279 280 /* Re-do the final word byte-wise */ 281 sub ip, ip, #0x04 282 sub r1, r1, #0x04 283 284.Lmemcmp_bytewise: 285 add r2, r2, #0x03 286.Lmemcmp_bytewise2: 287 ldrb r0, [ip], #0x01 288 ldrb r3, [r1], #0x01 289 subs r2, r2, #0x01 290 cmpcs r0, r3 291 beq .Lmemcmp_bytewise2 292 sub r0, r0, r3 293 RET 294 295 /* 296 * 6 byte compares are very common, thanks to the network stack. 297 * This code is hand-scheduled to reduce the number of stalls for 298 * load results. Everything else being equal, this will be ~32% 299 * faster than a byte-wise memcmp. 300 */ 301 .align 5 302.Lmemcmp_6bytes: 303 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 304 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 305 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 306 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 307 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 308 RETne /* Return if mismatch on #0 */ 309 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 310 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 311 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 312 RETne /* Return if mismatch on #1 */ 313 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 314 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 315 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 316 RETne /* Return if mismatch on #2 */ 317 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 318 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 319 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 320 RETne /* Return if mismatch on #3 */ 321 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 322 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 323 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 324 RETne /* Return if mismatch on #4 */ 325 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 326 RET 327 328ENTRY(bcopy) 329 /* switch the source and destination registers */ 330 eor r0, r1, r0 331 eor r1, r0, r1 332 eor r0, r1, r0 333ENTRY(memmove) 334 /* Do the buffers overlap? */ 335 cmp r0, r1 336 RETeq /* Bail now if src/dst are the same */ 337 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 338 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 339 cmp r3, r2 /* if (r3 < len) we have an overlap */ 340 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 341 342 /* Determine copy direction */ 343 cmp r1, r0 344 bcc .Lmemmove_backwards 345 346 moveq r0, #0 /* Quick abort for len=0 */ 347 RETeq 348 349 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 350 subs r2, r2, #4 351 blt .Lmemmove_fl4 /* less than 4 bytes */ 352 ands r12, r0, #3 353 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 354 ands r12, r1, #3 355 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 356 357.Lmemmove_ft8: 358 /* We have aligned source and destination */ 359 subs r2, r2, #8 360 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 361 subs r2, r2, #0x14 362 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 363 stmdb sp!, {r4} /* borrow r4 */ 364 365 /* blat 32 bytes at a time */ 366 /* XXX for really big copies perhaps we should use more registers */ 367.Lmemmove_floop32: 368 ldmia r1!, {r3, r4, r12, lr} 369 stmia r0!, {r3, r4, r12, lr} 370 ldmia r1!, {r3, r4, r12, lr} 371 stmia r0!, {r3, r4, r12, lr} 372 subs r2, r2, #0x20 373 bge .Lmemmove_floop32 374 375 cmn r2, #0x10 376 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 377 stmgeia r0!, {r3, r4, r12, lr} 378 subge r2, r2, #0x10 379 ldmia sp!, {r4} /* return r4 */ 380 381.Lmemmove_fl32: 382 adds r2, r2, #0x14 383 384 /* blat 12 bytes at a time */ 385.Lmemmove_floop12: 386 ldmgeia r1!, {r3, r12, lr} 387 stmgeia r0!, {r3, r12, lr} 388 subges r2, r2, #0x0c 389 bge .Lmemmove_floop12 390 391.Lmemmove_fl12: 392 adds r2, r2, #8 393 blt .Lmemmove_fl4 394 395 subs r2, r2, #4 396 ldrlt r3, [r1], #4 397 strlt r3, [r0], #4 398 ldmgeia r1!, {r3, r12} 399 stmgeia r0!, {r3, r12} 400 subge r2, r2, #4 401 402.Lmemmove_fl4: 403 /* less than 4 bytes to go */ 404 adds r2, r2, #4 405 ldmeqia sp!, {r0, pc} /* done */ 406 407 /* copy the crud byte at a time */ 408 cmp r2, #2 409 ldrb r3, [r1], #1 410 strb r3, [r0], #1 411 ldrgeb r3, [r1], #1 412 strgeb r3, [r0], #1 413 ldrgtb r3, [r1], #1 414 strgtb r3, [r0], #1 415 ldmia sp!, {r0, pc} 416 417 /* erg - unaligned destination */ 418.Lmemmove_fdestul: 419 rsb r12, r12, #4 420 cmp r12, #2 421 422 /* align destination with byte copies */ 423 ldrb r3, [r1], #1 424 strb r3, [r0], #1 425 ldrgeb r3, [r1], #1 426 strgeb r3, [r0], #1 427 ldrgtb r3, [r1], #1 428 strgtb r3, [r0], #1 429 subs r2, r2, r12 430 blt .Lmemmove_fl4 /* less the 4 bytes */ 431 432 ands r12, r1, #3 433 beq .Lmemmove_ft8 /* we have an aligned source */ 434 435 /* erg - unaligned source */ 436 /* This is where it gets nasty ... */ 437.Lmemmove_fsrcul: 438 bic r1, r1, #3 439 ldr lr, [r1], #4 440 cmp r12, #2 441 bgt .Lmemmove_fsrcul3 442 beq .Lmemmove_fsrcul2 443 cmp r2, #0x0c 444 blt .Lmemmove_fsrcul1loop4 445 sub r2, r2, #0x0c 446 stmdb sp!, {r4, r5} 447 448.Lmemmove_fsrcul1loop16: 449#ifdef __ARMEB__ 450 mov r3, lr, lsl #8 451#else 452 mov r3, lr, lsr #8 453#endif 454 ldmia r1!, {r4, r5, r12, lr} 455#ifdef __ARMEB__ 456 orr r3, r3, r4, lsr #24 457 mov r4, r4, lsl #8 458 orr r4, r4, r5, lsr #24 459 mov r5, r5, lsl #8 460 orr r5, r5, r12, lsr #24 461 mov r12, r12, lsl #8 462 orr r12, r12, lr, lsr #24 463#else 464 orr r3, r3, r4, lsl #24 465 mov r4, r4, lsr #8 466 orr r4, r4, r5, lsl #24 467 mov r5, r5, lsr #8 468 orr r5, r5, r12, lsl #24 469 mov r12, r12, lsr #8 470 orr r12, r12, lr, lsl #24 471#endif 472 stmia r0!, {r3-r5, r12} 473 subs r2, r2, #0x10 474 bge .Lmemmove_fsrcul1loop16 475 ldmia sp!, {r4, r5} 476 adds r2, r2, #0x0c 477 blt .Lmemmove_fsrcul1l4 478 479.Lmemmove_fsrcul1loop4: 480#ifdef __ARMEB__ 481 mov r12, lr, lsl #8 482#else 483 mov r12, lr, lsr #8 484#endif 485 ldr lr, [r1], #4 486#ifdef __ARMEB__ 487 orr r12, r12, lr, lsr #24 488#else 489 orr r12, r12, lr, lsl #24 490#endif 491 str r12, [r0], #4 492 subs r2, r2, #4 493 bge .Lmemmove_fsrcul1loop4 494 495.Lmemmove_fsrcul1l4: 496 sub r1, r1, #3 497 b .Lmemmove_fl4 498 499.Lmemmove_fsrcul2: 500 cmp r2, #0x0c 501 blt .Lmemmove_fsrcul2loop4 502 sub r2, r2, #0x0c 503 stmdb sp!, {r4, r5} 504 505.Lmemmove_fsrcul2loop16: 506#ifdef __ARMEB__ 507 mov r3, lr, lsl #16 508#else 509 mov r3, lr, lsr #16 510#endif 511 ldmia r1!, {r4, r5, r12, lr} 512#ifdef __ARMEB__ 513 orr r3, r3, r4, lsr #16 514 mov r4, r4, lsl #16 515 orr r4, r4, r5, lsr #16 516 mov r5, r5, lsl #16 517 orr r5, r5, r12, lsr #16 518 mov r12, r12, lsl #16 519 orr r12, r12, lr, lsr #16 520#else 521 orr r3, r3, r4, lsl #16 522 mov r4, r4, lsr #16 523 orr r4, r4, r5, lsl #16 524 mov r5, r5, lsr #16 525 orr r5, r5, r12, lsl #16 526 mov r12, r12, lsr #16 527 orr r12, r12, lr, lsl #16 528#endif 529 stmia r0!, {r3-r5, r12} 530 subs r2, r2, #0x10 531 bge .Lmemmove_fsrcul2loop16 532 ldmia sp!, {r4, r5} 533 adds r2, r2, #0x0c 534 blt .Lmemmove_fsrcul2l4 535 536.Lmemmove_fsrcul2loop4: 537#ifdef __ARMEB__ 538 mov r12, lr, lsl #16 539#else 540 mov r12, lr, lsr #16 541#endif 542 ldr lr, [r1], #4 543#ifdef __ARMEB__ 544 orr r12, r12, lr, lsr #16 545#else 546 orr r12, r12, lr, lsl #16 547#endif 548 str r12, [r0], #4 549 subs r2, r2, #4 550 bge .Lmemmove_fsrcul2loop4 551 552.Lmemmove_fsrcul2l4: 553 sub r1, r1, #2 554 b .Lmemmove_fl4 555 556.Lmemmove_fsrcul3: 557 cmp r2, #0x0c 558 blt .Lmemmove_fsrcul3loop4 559 sub r2, r2, #0x0c 560 stmdb sp!, {r4, r5} 561 562.Lmemmove_fsrcul3loop16: 563#ifdef __ARMEB__ 564 mov r3, lr, lsl #24 565#else 566 mov r3, lr, lsr #24 567#endif 568 ldmia r1!, {r4, r5, r12, lr} 569#ifdef __ARMEB__ 570 orr r3, r3, r4, lsr #8 571 mov r4, r4, lsl #24 572 orr r4, r4, r5, lsr #8 573 mov r5, r5, lsl #24 574 orr r5, r5, r12, lsr #8 575 mov r12, r12, lsl #24 576 orr r12, r12, lr, lsr #8 577#else 578 orr r3, r3, r4, lsl #8 579 mov r4, r4, lsr #24 580 orr r4, r4, r5, lsl #8 581 mov r5, r5, lsr #24 582 orr r5, r5, r12, lsl #8 583 mov r12, r12, lsr #24 584 orr r12, r12, lr, lsl #8 585#endif 586 stmia r0!, {r3-r5, r12} 587 subs r2, r2, #0x10 588 bge .Lmemmove_fsrcul3loop16 589 ldmia sp!, {r4, r5} 590 adds r2, r2, #0x0c 591 blt .Lmemmove_fsrcul3l4 592 593.Lmemmove_fsrcul3loop4: 594#ifdef __ARMEB__ 595 mov r12, lr, lsl #24 596#else 597 mov r12, lr, lsr #24 598#endif 599 ldr lr, [r1], #4 600#ifdef __ARMEB__ 601 orr r12, r12, lr, lsr #8 602#else 603 orr r12, r12, lr, lsl #8 604#endif 605 str r12, [r0], #4 606 subs r2, r2, #4 607 bge .Lmemmove_fsrcul3loop4 608 609.Lmemmove_fsrcul3l4: 610 sub r1, r1, #1 611 b .Lmemmove_fl4 612 613.Lmemmove_backwards: 614 add r1, r1, r2 615 add r0, r0, r2 616 subs r2, r2, #4 617 blt .Lmemmove_bl4 /* less than 4 bytes */ 618 ands r12, r0, #3 619 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 620 ands r12, r1, #3 621 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 622 623.Lmemmove_bt8: 624 /* We have aligned source and destination */ 625 subs r2, r2, #8 626 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 627 stmdb sp!, {r4, lr} 628 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 629 blt .Lmemmove_bl32 630 631 /* blat 32 bytes at a time */ 632 /* XXX for really big copies perhaps we should use more registers */ 633.Lmemmove_bloop32: 634 ldmdb r1!, {r3, r4, r12, lr} 635 stmdb r0!, {r3, r4, r12, lr} 636 ldmdb r1!, {r3, r4, r12, lr} 637 stmdb r0!, {r3, r4, r12, lr} 638 subs r2, r2, #0x20 639 bge .Lmemmove_bloop32 640 641.Lmemmove_bl32: 642 cmn r2, #0x10 643 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 644 stmgedb r0!, {r3, r4, r12, lr} 645 subge r2, r2, #0x10 646 adds r2, r2, #0x14 647 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 648 stmgedb r0!, {r3, r12, lr} 649 subge r2, r2, #0x0c 650 ldmia sp!, {r4, lr} 651 652.Lmemmove_bl12: 653 adds r2, r2, #8 654 blt .Lmemmove_bl4 655 subs r2, r2, #4 656 ldrlt r3, [r1, #-4]! 657 strlt r3, [r0, #-4]! 658 ldmgedb r1!, {r3, r12} 659 stmgedb r0!, {r3, r12} 660 subge r2, r2, #4 661 662.Lmemmove_bl4: 663 /* less than 4 bytes to go */ 664 adds r2, r2, #4 665 RETeq /* done */ 666 667 /* copy the crud byte at a time */ 668 cmp r2, #2 669 ldrb r3, [r1, #-1]! 670 strb r3, [r0, #-1]! 671 ldrgeb r3, [r1, #-1]! 672 strgeb r3, [r0, #-1]! 673 ldrgtb r3, [r1, #-1]! 674 strgtb r3, [r0, #-1]! 675 RET 676 677 /* erg - unaligned destination */ 678.Lmemmove_bdestul: 679 cmp r12, #2 680 681 /* align destination with byte copies */ 682 ldrb r3, [r1, #-1]! 683 strb r3, [r0, #-1]! 684 ldrgeb r3, [r1, #-1]! 685 strgeb r3, [r0, #-1]! 686 ldrgtb r3, [r1, #-1]! 687 strgtb r3, [r0, #-1]! 688 subs r2, r2, r12 689 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 690 ands r12, r1, #3 691 beq .Lmemmove_bt8 /* we have an aligned source */ 692 693 /* erg - unaligned source */ 694 /* This is where it gets nasty ... */ 695.Lmemmove_bsrcul: 696 bic r1, r1, #3 697 ldr r3, [r1, #0] 698 cmp r12, #2 699 blt .Lmemmove_bsrcul1 700 beq .Lmemmove_bsrcul2 701 cmp r2, #0x0c 702 blt .Lmemmove_bsrcul3loop4 703 sub r2, r2, #0x0c 704 stmdb sp!, {r4, r5, lr} 705 706.Lmemmove_bsrcul3loop16: 707#ifdef __ARMEB__ 708 mov lr, r3, lsr #8 709#else 710 mov lr, r3, lsl #8 711#endif 712 ldmdb r1!, {r3-r5, r12} 713#ifdef __ARMEB__ 714 orr lr, lr, r12, lsl #24 715 mov r12, r12, lsr #8 716 orr r12, r12, r5, lsl #24 717 mov r5, r5, lsr #8 718 orr r5, r5, r4, lsl #24 719 mov r4, r4, lsr #8 720 orr r4, r4, r3, lsl #24 721#else 722 orr lr, lr, r12, lsr #24 723 mov r12, r12, lsl #8 724 orr r12, r12, r5, lsr #24 725 mov r5, r5, lsl #8 726 orr r5, r5, r4, lsr #24 727 mov r4, r4, lsl #8 728 orr r4, r4, r3, lsr #24 729#endif 730 stmdb r0!, {r4, r5, r12, lr} 731 subs r2, r2, #0x10 732 bge .Lmemmove_bsrcul3loop16 733 ldmia sp!, {r4, r5, lr} 734 adds r2, r2, #0x0c 735 blt .Lmemmove_bsrcul3l4 736 737.Lmemmove_bsrcul3loop4: 738#ifdef __ARMEB__ 739 mov r12, r3, lsr #8 740#else 741 mov r12, r3, lsl #8 742#endif 743 ldr r3, [r1, #-4]! 744#ifdef __ARMEB__ 745 orr r12, r12, r3, lsl #24 746#else 747 orr r12, r12, r3, lsr #24 748#endif 749 str r12, [r0, #-4]! 750 subs r2, r2, #4 751 bge .Lmemmove_bsrcul3loop4 752 753.Lmemmove_bsrcul3l4: 754 add r1, r1, #3 755 b .Lmemmove_bl4 756 757.Lmemmove_bsrcul2: 758 cmp r2, #0x0c 759 blt .Lmemmove_bsrcul2loop4 760 sub r2, r2, #0x0c 761 stmdb sp!, {r4, r5, lr} 762 763.Lmemmove_bsrcul2loop16: 764#ifdef __ARMEB__ 765 mov lr, r3, lsr #16 766#else 767 mov lr, r3, lsl #16 768#endif 769 ldmdb r1!, {r3-r5, r12} 770#ifdef __ARMEB__ 771 orr lr, lr, r12, lsl #16 772 mov r12, r12, lsr #16 773 orr r12, r12, r5, lsl #16 774 mov r5, r5, lsr #16 775 orr r5, r5, r4, lsl #16 776 mov r4, r4, lsr #16 777 orr r4, r4, r3, lsl #16 778#else 779 orr lr, lr, r12, lsr #16 780 mov r12, r12, lsl #16 781 orr r12, r12, r5, lsr #16 782 mov r5, r5, lsl #16 783 orr r5, r5, r4, lsr #16 784 mov r4, r4, lsl #16 785 orr r4, r4, r3, lsr #16 786#endif 787 stmdb r0!, {r4, r5, r12, lr} 788 subs r2, r2, #0x10 789 bge .Lmemmove_bsrcul2loop16 790 ldmia sp!, {r4, r5, lr} 791 adds r2, r2, #0x0c 792 blt .Lmemmove_bsrcul2l4 793 794.Lmemmove_bsrcul2loop4: 795#ifdef __ARMEB__ 796 mov r12, r3, lsr #16 797#else 798 mov r12, r3, lsl #16 799#endif 800 ldr r3, [r1, #-4]! 801#ifdef __ARMEB__ 802 orr r12, r12, r3, lsl #16 803#else 804 orr r12, r12, r3, lsr #16 805#endif 806 str r12, [r0, #-4]! 807 subs r2, r2, #4 808 bge .Lmemmove_bsrcul2loop4 809 810.Lmemmove_bsrcul2l4: 811 add r1, r1, #2 812 b .Lmemmove_bl4 813 814.Lmemmove_bsrcul1: 815 cmp r2, #0x0c 816 blt .Lmemmove_bsrcul1loop4 817 sub r2, r2, #0x0c 818 stmdb sp!, {r4, r5, lr} 819 820.Lmemmove_bsrcul1loop32: 821#ifdef __ARMEB__ 822 mov lr, r3, lsr #24 823#else 824 mov lr, r3, lsl #24 825#endif 826 ldmdb r1!, {r3-r5, r12} 827#ifdef __ARMEB__ 828 orr lr, lr, r12, lsl #8 829 mov r12, r12, lsr #24 830 orr r12, r12, r5, lsl #8 831 mov r5, r5, lsr #24 832 orr r5, r5, r4, lsl #8 833 mov r4, r4, lsr #24 834 orr r4, r4, r3, lsl #8 835#else 836 orr lr, lr, r12, lsr #8 837 mov r12, r12, lsl #24 838 orr r12, r12, r5, lsr #8 839 mov r5, r5, lsl #24 840 orr r5, r5, r4, lsr #8 841 mov r4, r4, lsl #24 842 orr r4, r4, r3, lsr #8 843#endif 844 stmdb r0!, {r4, r5, r12, lr} 845 subs r2, r2, #0x10 846 bge .Lmemmove_bsrcul1loop32 847 ldmia sp!, {r4, r5, lr} 848 adds r2, r2, #0x0c 849 blt .Lmemmove_bsrcul1l4 850 851.Lmemmove_bsrcul1loop4: 852#ifdef __ARMEB__ 853 mov r12, r3, lsr #24 854#else 855 mov r12, r3, lsl #24 856#endif 857 ldr r3, [r1, #-4]! 858#ifdef __ARMEB__ 859 orr r12, r12, r3, lsl #8 860#else 861 orr r12, r12, r3, lsr #8 862#endif 863 str r12, [r0, #-4]! 864 subs r2, r2, #4 865 bge .Lmemmove_bsrcul1loop4 866 867.Lmemmove_bsrcul1l4: 868 add r1, r1, #1 869 b .Lmemmove_bl4 870 871#if !defined(__XSCALE__) 872ENTRY(memcpy) 873 /* save leaf functions having to store this away */ 874 /* Do not check arm_memcpy if we're running from flash */ 875#ifdef FLASHADDR 876#if FLASHADDR > PHYSADDR 877 ldr r3, =FLASHADDR 878 cmp r3, pc 879 bls .Lnormal 880#else 881 ldr r3, =FLASHADDR 882 cmp r3, pc 883 bhi .Lnormal 884#endif 885#endif 886 ldr r3, .L_arm_memcpy 887 ldr r3, [r3] 888 cmp r3, #0 889 beq .Lnormal 890 ldr r3, .L_min_memcpy_size 891 ldr r3, [r3] 892 cmp r2, r3 893 blt .Lnormal 894 stmfd sp!, {r0-r2, r4, lr} 895 mov r3, #0 896 ldr r4, .L_arm_memcpy 897 mov lr, pc 898 ldr pc, [r4] 899 cmp r0, #0 900 ldmfd sp!, {r0-r2, r4, lr} 901 RETeq 902 903.Lnormal: 904 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 905 906 subs r2, r2, #4 907 blt .Lmemcpy_l4 /* less than 4 bytes */ 908 ands r12, r0, #3 909 bne .Lmemcpy_destul /* oh unaligned destination addr */ 910 ands r12, r1, #3 911 bne .Lmemcpy_srcul /* oh unaligned source addr */ 912 913.Lmemcpy_t8: 914 /* We have aligned source and destination */ 915 subs r2, r2, #8 916 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 917 subs r2, r2, #0x14 918 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 919 stmdb sp!, {r4} /* borrow r4 */ 920 921 /* blat 32 bytes at a time */ 922 /* XXX for really big copies perhaps we should use more registers */ 923.Lmemcpy_loop32: 924 ldmia r1!, {r3, r4, r12, lr} 925 stmia r0!, {r3, r4, r12, lr} 926 ldmia r1!, {r3, r4, r12, lr} 927 stmia r0!, {r3, r4, r12, lr} 928 subs r2, r2, #0x20 929 bge .Lmemcpy_loop32 930 931 cmn r2, #0x10 932 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 933 stmgeia r0!, {r3, r4, r12, lr} 934 subge r2, r2, #0x10 935 ldmia sp!, {r4} /* return r4 */ 936 937.Lmemcpy_l32: 938 adds r2, r2, #0x14 939 940 /* blat 12 bytes at a time */ 941.Lmemcpy_loop12: 942 ldmgeia r1!, {r3, r12, lr} 943 stmgeia r0!, {r3, r12, lr} 944 subges r2, r2, #0x0c 945 bge .Lmemcpy_loop12 946 947.Lmemcpy_l12: 948 adds r2, r2, #8 949 blt .Lmemcpy_l4 950 951 subs r2, r2, #4 952 ldrlt r3, [r1], #4 953 strlt r3, [r0], #4 954 ldmgeia r1!, {r3, r12} 955 stmgeia r0!, {r3, r12} 956 subge r2, r2, #4 957 958.Lmemcpy_l4: 959 /* less than 4 bytes to go */ 960 adds r2, r2, #4 961#ifdef __APCS_26_ 962 ldmeqia sp!, {r0, pc}^ /* done */ 963#else 964 ldmeqia sp!, {r0, pc} /* done */ 965#endif 966 /* copy the crud byte at a time */ 967 cmp r2, #2 968 ldrb r3, [r1], #1 969 strb r3, [r0], #1 970 ldrgeb r3, [r1], #1 971 strgeb r3, [r0], #1 972 ldrgtb r3, [r1], #1 973 strgtb r3, [r0], #1 974 ldmia sp!, {r0, pc} 975 976 /* erg - unaligned destination */ 977.Lmemcpy_destul: 978 rsb r12, r12, #4 979 cmp r12, #2 980 981 /* align destination with byte copies */ 982 ldrb r3, [r1], #1 983 strb r3, [r0], #1 984 ldrgeb r3, [r1], #1 985 strgeb r3, [r0], #1 986 ldrgtb r3, [r1], #1 987 strgtb r3, [r0], #1 988 subs r2, r2, r12 989 blt .Lmemcpy_l4 /* less the 4 bytes */ 990 991 ands r12, r1, #3 992 beq .Lmemcpy_t8 /* we have an aligned source */ 993 994 /* erg - unaligned source */ 995 /* This is where it gets nasty ... */ 996.Lmemcpy_srcul: 997 bic r1, r1, #3 998 ldr lr, [r1], #4 999 cmp r12, #2 1000 bgt .Lmemcpy_srcul3 1001 beq .Lmemcpy_srcul2 1002 cmp r2, #0x0c 1003 blt .Lmemcpy_srcul1loop4 1004 sub r2, r2, #0x0c 1005 stmdb sp!, {r4, r5} 1006 1007.Lmemcpy_srcul1loop16: 1008 mov r3, lr, lsr #8 1009 ldmia r1!, {r4, r5, r12, lr} 1010 orr r3, r3, r4, lsl #24 1011 mov r4, r4, lsr #8 1012 orr r4, r4, r5, lsl #24 1013 mov r5, r5, lsr #8 1014 orr r5, r5, r12, lsl #24 1015 mov r12, r12, lsr #8 1016 orr r12, r12, lr, lsl #24 1017 stmia r0!, {r3-r5, r12} 1018 subs r2, r2, #0x10 1019 bge .Lmemcpy_srcul1loop16 1020 ldmia sp!, {r4, r5} 1021 adds r2, r2, #0x0c 1022 blt .Lmemcpy_srcul1l4 1023 1024.Lmemcpy_srcul1loop4: 1025 mov r12, lr, lsr #8 1026 ldr lr, [r1], #4 1027 orr r12, r12, lr, lsl #24 1028 str r12, [r0], #4 1029 subs r2, r2, #4 1030 bge .Lmemcpy_srcul1loop4 1031 1032.Lmemcpy_srcul1l4: 1033 sub r1, r1, #3 1034 b .Lmemcpy_l4 1035 1036.Lmemcpy_srcul2: 1037 cmp r2, #0x0c 1038 blt .Lmemcpy_srcul2loop4 1039 sub r2, r2, #0x0c 1040 stmdb sp!, {r4, r5} 1041 1042.Lmemcpy_srcul2loop16: 1043 mov r3, lr, lsr #16 1044 ldmia r1!, {r4, r5, r12, lr} 1045 orr r3, r3, r4, lsl #16 1046 mov r4, r4, lsr #16 1047 orr r4, r4, r5, lsl #16 1048 mov r5, r5, lsr #16 1049 orr r5, r5, r12, lsl #16 1050 mov r12, r12, lsr #16 1051 orr r12, r12, lr, lsl #16 1052 stmia r0!, {r3-r5, r12} 1053 subs r2, r2, #0x10 1054 bge .Lmemcpy_srcul2loop16 1055 ldmia sp!, {r4, r5} 1056 adds r2, r2, #0x0c 1057 blt .Lmemcpy_srcul2l4 1058 1059.Lmemcpy_srcul2loop4: 1060 mov r12, lr, lsr #16 1061 ldr lr, [r1], #4 1062 orr r12, r12, lr, lsl #16 1063 str r12, [r0], #4 1064 subs r2, r2, #4 1065 bge .Lmemcpy_srcul2loop4 1066 1067.Lmemcpy_srcul2l4: 1068 sub r1, r1, #2 1069 b .Lmemcpy_l4 1070 1071.Lmemcpy_srcul3: 1072 cmp r2, #0x0c 1073 blt .Lmemcpy_srcul3loop4 1074 sub r2, r2, #0x0c 1075 stmdb sp!, {r4, r5} 1076 1077.Lmemcpy_srcul3loop16: 1078 mov r3, lr, lsr #24 1079 ldmia r1!, {r4, r5, r12, lr} 1080 orr r3, r3, r4, lsl #8 1081 mov r4, r4, lsr #24 1082 orr r4, r4, r5, lsl #8 1083 mov r5, r5, lsr #24 1084 orr r5, r5, r12, lsl #8 1085 mov r12, r12, lsr #24 1086 orr r12, r12, lr, lsl #8 1087 stmia r0!, {r3-r5, r12} 1088 subs r2, r2, #0x10 1089 bge .Lmemcpy_srcul3loop16 1090 ldmia sp!, {r4, r5} 1091 adds r2, r2, #0x0c 1092 blt .Lmemcpy_srcul3l4 1093 1094.Lmemcpy_srcul3loop4: 1095 mov r12, lr, lsr #24 1096 ldr lr, [r1], #4 1097 orr r12, r12, lr, lsl #8 1098 str r12, [r0], #4 1099 subs r2, r2, #4 1100 bge .Lmemcpy_srcul3loop4 1101 1102.Lmemcpy_srcul3l4: 1103 sub r1, r1, #1 1104 b .Lmemcpy_l4 1105#else 1106/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1107ENTRY(memcpy) 1108 pld [r1] 1109 cmp r2, #0x0c 1110 ble .Lmemcpy_short /* <= 12 bytes */ 1111#ifdef FLASHADDR 1112#if FLASHADDR > PHYSADDR 1113 ldr r3, =FLASHADDR 1114 cmp r3, pc 1115 bls .Lnormal 1116#else 1117 ldr r3, =FLASHADDR 1118 cmp r3, pc 1119 bhi .Lnormal 1120#endif 1121#endif 1122 ldr r3, .L_arm_memcpy 1123 ldr r3, [r3] 1124 cmp r3, #0 1125 beq .Lnormal 1126 ldr r3, .L_min_memcpy_size 1127 ldr r3, [r3] 1128 cmp r2, r3 1129 blt .Lnormal 1130 stmfd sp!, {r0-r2, r4, lr} 1131 mov r3, #0 1132 ldr r4, .L_arm_memcpy 1133 mov lr, pc 1134 ldr pc, [r4] 1135 cmp r0, #0 1136 ldmfd sp!, {r0-r2, r4, lr} 1137 RETeq 1138.Lnormal: 1139 mov r3, r0 /* We must not clobber r0 */ 1140 1141 /* Word-align the destination buffer */ 1142 ands ip, r3, #0x03 /* Already word aligned? */ 1143 beq .Lmemcpy_wordaligned /* Yup */ 1144 cmp ip, #0x02 1145 ldrb ip, [r1], #0x01 1146 sub r2, r2, #0x01 1147 strb ip, [r3], #0x01 1148 ldrleb ip, [r1], #0x01 1149 suble r2, r2, #0x01 1150 strleb ip, [r3], #0x01 1151 ldrltb ip, [r1], #0x01 1152 sublt r2, r2, #0x01 1153 strltb ip, [r3], #0x01 1154 1155 /* Destination buffer is now word aligned */ 1156.Lmemcpy_wordaligned: 1157 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1158 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1159 1160 /* Quad-align the destination buffer */ 1161 tst r3, #0x07 /* Already quad aligned? */ 1162 ldrne ip, [r1], #0x04 1163 stmfd sp!, {r4-r9} /* Free up some registers */ 1164 subne r2, r2, #0x04 1165 strne ip, [r3], #0x04 1166 1167 /* Destination buffer quad aligned, source is at least word aligned */ 1168 subs r2, r2, #0x80 1169 blt .Lmemcpy_w_lessthan128 1170 1171 /* Copy 128 bytes at a time */ 1172.Lmemcpy_w_loop128: 1173 ldr r4, [r1], #0x04 /* LD:00-03 */ 1174 ldr r5, [r1], #0x04 /* LD:04-07 */ 1175 pld [r1, #0x18] /* Prefetch 0x20 */ 1176 ldr r6, [r1], #0x04 /* LD:08-0b */ 1177 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1178 ldr r8, [r1], #0x04 /* LD:10-13 */ 1179 ldr r9, [r1], #0x04 /* LD:14-17 */ 1180 strd r4, [r3], #0x08 /* ST:00-07 */ 1181 ldr r4, [r1], #0x04 /* LD:18-1b */ 1182 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1183 strd r6, [r3], #0x08 /* ST:08-0f */ 1184 ldr r6, [r1], #0x04 /* LD:20-23 */ 1185 ldr r7, [r1], #0x04 /* LD:24-27 */ 1186 pld [r1, #0x18] /* Prefetch 0x40 */ 1187 strd r8, [r3], #0x08 /* ST:10-17 */ 1188 ldr r8, [r1], #0x04 /* LD:28-2b */ 1189 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1190 strd r4, [r3], #0x08 /* ST:18-1f */ 1191 ldr r4, [r1], #0x04 /* LD:30-33 */ 1192 ldr r5, [r1], #0x04 /* LD:34-37 */ 1193 strd r6, [r3], #0x08 /* ST:20-27 */ 1194 ldr r6, [r1], #0x04 /* LD:38-3b */ 1195 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1196 strd r8, [r3], #0x08 /* ST:28-2f */ 1197 ldr r8, [r1], #0x04 /* LD:40-43 */ 1198 ldr r9, [r1], #0x04 /* LD:44-47 */ 1199 pld [r1, #0x18] /* Prefetch 0x60 */ 1200 strd r4, [r3], #0x08 /* ST:30-37 */ 1201 ldr r4, [r1], #0x04 /* LD:48-4b */ 1202 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1203 strd r6, [r3], #0x08 /* ST:38-3f */ 1204 ldr r6, [r1], #0x04 /* LD:50-53 */ 1205 ldr r7, [r1], #0x04 /* LD:54-57 */ 1206 strd r8, [r3], #0x08 /* ST:40-47 */ 1207 ldr r8, [r1], #0x04 /* LD:58-5b */ 1208 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1209 strd r4, [r3], #0x08 /* ST:48-4f */ 1210 ldr r4, [r1], #0x04 /* LD:60-63 */ 1211 ldr r5, [r1], #0x04 /* LD:64-67 */ 1212 pld [r1, #0x18] /* Prefetch 0x80 */ 1213 strd r6, [r3], #0x08 /* ST:50-57 */ 1214 ldr r6, [r1], #0x04 /* LD:68-6b */ 1215 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1216 strd r8, [r3], #0x08 /* ST:58-5f */ 1217 ldr r8, [r1], #0x04 /* LD:70-73 */ 1218 ldr r9, [r1], #0x04 /* LD:74-77 */ 1219 strd r4, [r3], #0x08 /* ST:60-67 */ 1220 ldr r4, [r1], #0x04 /* LD:78-7b */ 1221 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1222 strd r6, [r3], #0x08 /* ST:68-6f */ 1223 strd r8, [r3], #0x08 /* ST:70-77 */ 1224 subs r2, r2, #0x80 1225 strd r4, [r3], #0x08 /* ST:78-7f */ 1226 bge .Lmemcpy_w_loop128 1227 1228.Lmemcpy_w_lessthan128: 1229 adds r2, r2, #0x80 /* Adjust for extra sub */ 1230 ldmeqfd sp!, {r4-r9} 1231 RETeq /* Return now if done */ 1232 subs r2, r2, #0x20 1233 blt .Lmemcpy_w_lessthan32 1234 1235 /* Copy 32 bytes at a time */ 1236.Lmemcpy_w_loop32: 1237 ldr r4, [r1], #0x04 1238 ldr r5, [r1], #0x04 1239 pld [r1, #0x18] 1240 ldr r6, [r1], #0x04 1241 ldr r7, [r1], #0x04 1242 ldr r8, [r1], #0x04 1243 ldr r9, [r1], #0x04 1244 strd r4, [r3], #0x08 1245 ldr r4, [r1], #0x04 1246 ldr r5, [r1], #0x04 1247 strd r6, [r3], #0x08 1248 strd r8, [r3], #0x08 1249 subs r2, r2, #0x20 1250 strd r4, [r3], #0x08 1251 bge .Lmemcpy_w_loop32 1252 1253.Lmemcpy_w_lessthan32: 1254 adds r2, r2, #0x20 /* Adjust for extra sub */ 1255 ldmeqfd sp!, {r4-r9} 1256 RETeq /* Return now if done */ 1257 1258 and r4, r2, #0x18 1259 rsbs r4, r4, #0x18 1260 addne pc, pc, r4, lsl #1 1261 nop 1262 1263 /* At least 24 bytes remaining */ 1264 ldr r4, [r1], #0x04 1265 ldr r5, [r1], #0x04 1266 sub r2, r2, #0x08 1267 strd r4, [r3], #0x08 1268 1269 /* At least 16 bytes remaining */ 1270 ldr r4, [r1], #0x04 1271 ldr r5, [r1], #0x04 1272 sub r2, r2, #0x08 1273 strd r4, [r3], #0x08 1274 1275 /* At least 8 bytes remaining */ 1276 ldr r4, [r1], #0x04 1277 ldr r5, [r1], #0x04 1278 subs r2, r2, #0x08 1279 strd r4, [r3], #0x08 1280 1281 /* Less than 8 bytes remaining */ 1282 ldmfd sp!, {r4-r9} 1283 RETeq /* Return now if done */ 1284 subs r2, r2, #0x04 1285 ldrge ip, [r1], #0x04 1286 strge ip, [r3], #0x04 1287 RETeq /* Return now if done */ 1288 addlt r2, r2, #0x04 1289 ldrb ip, [r1], #0x01 1290 cmp r2, #0x02 1291 ldrgeb r2, [r1], #0x01 1292 strb ip, [r3], #0x01 1293 ldrgtb ip, [r1] 1294 strgeb r2, [r3], #0x01 1295 strgtb ip, [r3] 1296 RET 1297 1298 1299/* 1300 * At this point, it has not been possible to word align both buffers. 1301 * The destination buffer is word aligned, but the source buffer is not. 1302 */ 1303.Lmemcpy_bad_align: 1304 stmfd sp!, {r4-r7} 1305 bic r1, r1, #0x03 1306 cmp ip, #2 1307 ldr ip, [r1], #0x04 1308 bgt .Lmemcpy_bad3 1309 beq .Lmemcpy_bad2 1310 b .Lmemcpy_bad1 1311 1312.Lmemcpy_bad1_loop16: 1313#ifdef __ARMEB__ 1314 mov r4, ip, lsl #8 1315#else 1316 mov r4, ip, lsr #8 1317#endif 1318 ldr r5, [r1], #0x04 1319 pld [r1, #0x018] 1320 ldr r6, [r1], #0x04 1321 ldr r7, [r1], #0x04 1322 ldr ip, [r1], #0x04 1323#ifdef __ARMEB__ 1324 orr r4, r4, r5, lsr #24 1325 mov r5, r5, lsl #8 1326 orr r5, r5, r6, lsr #24 1327 mov r6, r6, lsl #8 1328 orr r6, r6, r7, lsr #24 1329 mov r7, r7, lsl #8 1330 orr r7, r7, ip, lsr #24 1331#else 1332 orr r4, r4, r5, lsl #24 1333 mov r5, r5, lsr #8 1334 orr r5, r5, r6, lsl #24 1335 mov r6, r6, lsr #8 1336 orr r6, r6, r7, lsl #24 1337 mov r7, r7, lsr #8 1338 orr r7, r7, ip, lsl #24 1339#endif 1340 str r4, [r3], #0x04 1341 str r5, [r3], #0x04 1342 str r6, [r3], #0x04 1343 str r7, [r3], #0x04 1344.Lmemcpy_bad1: 1345 subs r2, r2, #0x10 1346 bge .Lmemcpy_bad1_loop16 1347 1348 adds r2, r2, #0x10 1349 ldmeqfd sp!, {r4-r7} 1350 RETeq /* Return now if done */ 1351 subs r2, r2, #0x04 1352 sublt r1, r1, #0x03 1353 blt .Lmemcpy_bad_done 1354 1355.Lmemcpy_bad1_loop4: 1356#ifdef __ARMEB__ 1357 mov r4, ip, lsl #8 1358#else 1359 mov r4, ip, lsr #8 1360#endif 1361 ldr ip, [r1], #0x04 1362 subs r2, r2, #0x04 1363#ifdef __ARMEB__ 1364 orr r4, r4, ip, lsr #24 1365#else 1366 orr r4, r4, ip, lsl #24 1367#endif 1368 str r4, [r3], #0x04 1369 bge .Lmemcpy_bad1_loop4 1370 sub r1, r1, #0x03 1371 b .Lmemcpy_bad_done 1372 1373.Lmemcpy_bad2_loop16: 1374#ifdef __ARMEB__ 1375 mov r4, ip, lsl #16 1376#else 1377 mov r4, ip, lsr #16 1378#endif 1379 ldr r5, [r1], #0x04 1380 pld [r1, #0x018] 1381 ldr r6, [r1], #0x04 1382 ldr r7, [r1], #0x04 1383 ldr ip, [r1], #0x04 1384#ifdef __ARMEB__ 1385 orr r4, r4, r5, lsr #16 1386 mov r5, r5, lsl #16 1387 orr r5, r5, r6, lsr #16 1388 mov r6, r6, lsl #16 1389 orr r6, r6, r7, lsr #16 1390 mov r7, r7, lsl #16 1391 orr r7, r7, ip, lsr #16 1392#else 1393 orr r4, r4, r5, lsl #16 1394 mov r5, r5, lsr #16 1395 orr r5, r5, r6, lsl #16 1396 mov r6, r6, lsr #16 1397 orr r6, r6, r7, lsl #16 1398 mov r7, r7, lsr #16 1399 orr r7, r7, ip, lsl #16 1400#endif 1401 str r4, [r3], #0x04 1402 str r5, [r3], #0x04 1403 str r6, [r3], #0x04 1404 str r7, [r3], #0x04 1405.Lmemcpy_bad2: 1406 subs r2, r2, #0x10 1407 bge .Lmemcpy_bad2_loop16 1408 1409 adds r2, r2, #0x10 1410 ldmeqfd sp!, {r4-r7} 1411 RETeq /* Return now if done */ 1412 subs r2, r2, #0x04 1413 sublt r1, r1, #0x02 1414 blt .Lmemcpy_bad_done 1415 1416.Lmemcpy_bad2_loop4: 1417#ifdef __ARMEB__ 1418 mov r4, ip, lsl #16 1419#else 1420 mov r4, ip, lsr #16 1421#endif 1422 ldr ip, [r1], #0x04 1423 subs r2, r2, #0x04 1424#ifdef __ARMEB__ 1425 orr r4, r4, ip, lsr #16 1426#else 1427 orr r4, r4, ip, lsl #16 1428#endif 1429 str r4, [r3], #0x04 1430 bge .Lmemcpy_bad2_loop4 1431 sub r1, r1, #0x02 1432 b .Lmemcpy_bad_done 1433 1434.Lmemcpy_bad3_loop16: 1435#ifdef __ARMEB__ 1436 mov r4, ip, lsl #24 1437#else 1438 mov r4, ip, lsr #24 1439#endif 1440 ldr r5, [r1], #0x04 1441 pld [r1, #0x018] 1442 ldr r6, [r1], #0x04 1443 ldr r7, [r1], #0x04 1444 ldr ip, [r1], #0x04 1445#ifdef __ARMEB__ 1446 orr r4, r4, r5, lsr #8 1447 mov r5, r5, lsl #24 1448 orr r5, r5, r6, lsr #8 1449 mov r6, r6, lsl #24 1450 orr r6, r6, r7, lsr #8 1451 mov r7, r7, lsl #24 1452 orr r7, r7, ip, lsr #8 1453#else 1454 orr r4, r4, r5, lsl #8 1455 mov r5, r5, lsr #24 1456 orr r5, r5, r6, lsl #8 1457 mov r6, r6, lsr #24 1458 orr r6, r6, r7, lsl #8 1459 mov r7, r7, lsr #24 1460 orr r7, r7, ip, lsl #8 1461#endif 1462 str r4, [r3], #0x04 1463 str r5, [r3], #0x04 1464 str r6, [r3], #0x04 1465 str r7, [r3], #0x04 1466.Lmemcpy_bad3: 1467 subs r2, r2, #0x10 1468 bge .Lmemcpy_bad3_loop16 1469 1470 adds r2, r2, #0x10 1471 ldmeqfd sp!, {r4-r7} 1472 RETeq /* Return now if done */ 1473 subs r2, r2, #0x04 1474 sublt r1, r1, #0x01 1475 blt .Lmemcpy_bad_done 1476 1477.Lmemcpy_bad3_loop4: 1478#ifdef __ARMEB__ 1479 mov r4, ip, lsl #24 1480#else 1481 mov r4, ip, lsr #24 1482#endif 1483 ldr ip, [r1], #0x04 1484 subs r2, r2, #0x04 1485#ifdef __ARMEB__ 1486 orr r4, r4, ip, lsr #8 1487#else 1488 orr r4, r4, ip, lsl #8 1489#endif 1490 str r4, [r3], #0x04 1491 bge .Lmemcpy_bad3_loop4 1492 sub r1, r1, #0x01 1493 1494.Lmemcpy_bad_done: 1495 ldmfd sp!, {r4-r7} 1496 adds r2, r2, #0x04 1497 RETeq 1498 ldrb ip, [r1], #0x01 1499 cmp r2, #0x02 1500 ldrgeb r2, [r1], #0x01 1501 strb ip, [r3], #0x01 1502 ldrgtb ip, [r1] 1503 strgeb r2, [r3], #0x01 1504 strgtb ip, [r3] 1505 RET 1506 1507 1508/* 1509 * Handle short copies (less than 16 bytes), possibly misaligned. 1510 * Some of these are *very* common, thanks to the network stack, 1511 * and so are handled specially. 1512 */ 1513.Lmemcpy_short: 1514 add pc, pc, r2, lsl #2 1515 nop 1516 RET /* 0x00 */ 1517 b .Lmemcpy_bytewise /* 0x01 */ 1518 b .Lmemcpy_bytewise /* 0x02 */ 1519 b .Lmemcpy_bytewise /* 0x03 */ 1520 b .Lmemcpy_4 /* 0x04 */ 1521 b .Lmemcpy_bytewise /* 0x05 */ 1522 b .Lmemcpy_6 /* 0x06 */ 1523 b .Lmemcpy_bytewise /* 0x07 */ 1524 b .Lmemcpy_8 /* 0x08 */ 1525 b .Lmemcpy_bytewise /* 0x09 */ 1526 b .Lmemcpy_bytewise /* 0x0a */ 1527 b .Lmemcpy_bytewise /* 0x0b */ 1528 b .Lmemcpy_c /* 0x0c */ 1529.Lmemcpy_bytewise: 1530 mov r3, r0 /* We must not clobber r0 */ 1531 ldrb ip, [r1], #0x01 15321: subs r2, r2, #0x01 1533 strb ip, [r3], #0x01 1534 ldrneb ip, [r1], #0x01 1535 bne 1b 1536 RET 1537 1538/****************************************************************************** 1539 * Special case for 4 byte copies 1540 */ 1541#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1542#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1543 LMEMCPY_4_PAD 1544.Lmemcpy_4: 1545 and r2, r1, #0x03 1546 orr r2, r2, r0, lsl #2 1547 ands r2, r2, #0x0f 1548 sub r3, pc, #0x14 1549 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1550 1551/* 1552 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1553 */ 1554 ldr r2, [r1] 1555 str r2, [r0] 1556 RET 1557 LMEMCPY_4_PAD 1558 1559/* 1560 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1561 */ 1562 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1563 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1564#ifdef __ARMEB__ 1565 mov r3, r3, lsl #8 /* r3 = 012. */ 1566 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1567#else 1568 mov r3, r3, lsr #8 /* r3 = .210 */ 1569 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1570#endif 1571 str r3, [r0] 1572 RET 1573 LMEMCPY_4_PAD 1574 1575/* 1576 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1577 */ 1578#ifdef __ARMEB__ 1579 ldrh r3, [r1] 1580 ldrh r2, [r1, #0x02] 1581#else 1582 ldrh r3, [r1, #0x02] 1583 ldrh r2, [r1] 1584#endif 1585 orr r3, r2, r3, lsl #16 1586 str r3, [r0] 1587 RET 1588 LMEMCPY_4_PAD 1589 1590/* 1591 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1592 */ 1593 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1594 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1595#ifdef __ARMEB__ 1596 mov r3, r3, lsl #24 /* r3 = 0... */ 1597 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1598#else 1599 mov r3, r3, lsr #24 /* r3 = ...0 */ 1600 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1601#endif 1602 str r3, [r0] 1603 RET 1604 LMEMCPY_4_PAD 1605 1606/* 1607 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1608 */ 1609 ldr r2, [r1] 1610#ifdef __ARMEB__ 1611 strb r2, [r0, #0x03] 1612 mov r3, r2, lsr #8 1613 mov r1, r2, lsr #24 1614 strb r1, [r0] 1615#else 1616 strb r2, [r0] 1617 mov r3, r2, lsr #8 1618 mov r1, r2, lsr #24 1619 strb r1, [r0, #0x03] 1620#endif 1621 strh r3, [r0, #0x01] 1622 RET 1623 LMEMCPY_4_PAD 1624 1625/* 1626 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1627 */ 1628 ldrb r2, [r1] 1629 ldrh r3, [r1, #0x01] 1630 ldrb r1, [r1, #0x03] 1631 strb r2, [r0] 1632 strh r3, [r0, #0x01] 1633 strb r1, [r0, #0x03] 1634 RET 1635 LMEMCPY_4_PAD 1636 1637/* 1638 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1639 */ 1640 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1641 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1642#ifdef __ARMEB__ 1643 mov r1, r2, lsr #8 /* r1 = ...0 */ 1644 strb r1, [r0] 1645 mov r2, r2, lsl #8 /* r2 = .01. */ 1646 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1647#else 1648 strb r2, [r0] 1649 mov r2, r2, lsr #8 /* r2 = ...1 */ 1650 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1651 mov r3, r3, lsr #8 /* r3 = ...3 */ 1652#endif 1653 strh r2, [r0, #0x01] 1654 strb r3, [r0, #0x03] 1655 RET 1656 LMEMCPY_4_PAD 1657 1658/* 1659 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1660 */ 1661 ldrb r2, [r1] 1662 ldrh r3, [r1, #0x01] 1663 ldrb r1, [r1, #0x03] 1664 strb r2, [r0] 1665 strh r3, [r0, #0x01] 1666 strb r1, [r0, #0x03] 1667 RET 1668 LMEMCPY_4_PAD 1669 1670/* 1671 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1672 */ 1673 ldr r2, [r1] 1674#ifdef __ARMEB__ 1675 strh r2, [r0, #0x02] 1676 mov r3, r2, lsr #16 1677 strh r3, [r0] 1678#else 1679 strh r2, [r0] 1680 mov r3, r2, lsr #16 1681 strh r3, [r0, #0x02] 1682#endif 1683 RET 1684 LMEMCPY_4_PAD 1685 1686/* 1687 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1688 */ 1689 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1690 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1691 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1692 strh r1, [r0] 1693#ifdef __ARMEB__ 1694 mov r2, r2, lsl #8 /* r2 = 012. */ 1695 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1696#else 1697 mov r2, r2, lsr #24 /* r2 = ...2 */ 1698 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1699#endif 1700 strh r2, [r0, #0x02] 1701 RET 1702 LMEMCPY_4_PAD 1703 1704/* 1705 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1706 */ 1707 ldrh r2, [r1] 1708 ldrh r3, [r1, #0x02] 1709 strh r2, [r0] 1710 strh r3, [r0, #0x02] 1711 RET 1712 LMEMCPY_4_PAD 1713 1714/* 1715 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1716 */ 1717 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1718 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1719 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1720 strh r1, [r0, #0x02] 1721#ifdef __ARMEB__ 1722 mov r3, r3, lsr #24 /* r3 = ...1 */ 1723 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1724#else 1725 mov r3, r3, lsl #8 /* r3 = 321. */ 1726 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1727#endif 1728 strh r3, [r0] 1729 RET 1730 LMEMCPY_4_PAD 1731 1732/* 1733 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1734 */ 1735 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1736#ifdef __ARMEB__ 1737 strb r2, [r0, #0x03] 1738 mov r3, r2, lsr #8 1739 mov r1, r2, lsr #24 1740 strh r3, [r0, #0x01] 1741 strb r1, [r0] 1742#else 1743 strb r2, [r0] 1744 mov r3, r2, lsr #8 1745 mov r1, r2, lsr #24 1746 strh r3, [r0, #0x01] 1747 strb r1, [r0, #0x03] 1748#endif 1749 RET 1750 LMEMCPY_4_PAD 1751 1752/* 1753 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1754 */ 1755 ldrb r2, [r1] 1756 ldrh r3, [r1, #0x01] 1757 ldrb r1, [r1, #0x03] 1758 strb r2, [r0] 1759 strh r3, [r0, #0x01] 1760 strb r1, [r0, #0x03] 1761 RET 1762 LMEMCPY_4_PAD 1763 1764/* 1765 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1766 */ 1767#ifdef __ARMEB__ 1768 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1769 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1770 strb r3, [r0, #0x03] 1771 mov r3, r3, lsr #8 /* r3 = ...2 */ 1772 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1773 strh r3, [r0, #0x01] 1774 mov r2, r2, lsr #8 /* r2 = ...0 */ 1775 strb r2, [r0] 1776#else 1777 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1778 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1779 strb r2, [r0] 1780 mov r2, r2, lsr #8 /* r2 = ...1 */ 1781 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1782 strh r2, [r0, #0x01] 1783 mov r3, r3, lsr #8 /* r3 = ...3 */ 1784 strb r3, [r0, #0x03] 1785#endif 1786 RET 1787 LMEMCPY_4_PAD 1788 1789/* 1790 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1791 */ 1792 ldrb r2, [r1] 1793 ldrh r3, [r1, #0x01] 1794 ldrb r1, [r1, #0x03] 1795 strb r2, [r0] 1796 strh r3, [r0, #0x01] 1797 strb r1, [r0, #0x03] 1798 RET 1799 LMEMCPY_4_PAD 1800 1801 1802/****************************************************************************** 1803 * Special case for 6 byte copies 1804 */ 1805#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1806#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1807 LMEMCPY_6_PAD 1808.Lmemcpy_6: 1809 and r2, r1, #0x03 1810 orr r2, r2, r0, lsl #2 1811 ands r2, r2, #0x0f 1812 sub r3, pc, #0x14 1813 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1814 1815/* 1816 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1817 */ 1818 ldr r2, [r1] 1819 ldrh r3, [r1, #0x04] 1820 str r2, [r0] 1821 strh r3, [r0, #0x04] 1822 RET 1823 LMEMCPY_6_PAD 1824 1825/* 1826 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1827 */ 1828 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1829 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1830#ifdef __ARMEB__ 1831 mov r2, r2, lsl #8 /* r2 = 012. */ 1832 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1833#else 1834 mov r2, r2, lsr #8 /* r2 = .210 */ 1835 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1836#endif 1837 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1838 str r2, [r0] 1839 strh r3, [r0, #0x04] 1840 RET 1841 LMEMCPY_6_PAD 1842 1843/* 1844 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1845 */ 1846 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1847 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1848#ifdef __ARMEB__ 1849 mov r1, r3, lsr #16 /* r1 = ..23 */ 1850 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1851 str r1, [r0] 1852 strh r3, [r0, #0x04] 1853#else 1854 mov r1, r3, lsr #16 /* r1 = ..54 */ 1855 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1856 str r2, [r0] 1857 strh r1, [r0, #0x04] 1858#endif 1859 RET 1860 LMEMCPY_6_PAD 1861 1862/* 1863 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1864 */ 1865 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1866 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1867 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1868#ifdef __ARMEB__ 1869 mov r2, r2, lsl #24 /* r2 = 0... */ 1870 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1871 mov r3, r3, lsl #8 /* r3 = 234. */ 1872 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1873#else 1874 mov r2, r2, lsr #24 /* r2 = ...0 */ 1875 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1876 mov r1, r1, lsl #8 /* r1 = xx5. */ 1877 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1878#endif 1879 str r2, [r0] 1880 strh r1, [r0, #0x04] 1881 RET 1882 LMEMCPY_6_PAD 1883 1884/* 1885 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1886 */ 1887 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1888 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1889 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1890 strh r1, [r0, #0x01] 1891#ifdef __ARMEB__ 1892 mov r1, r3, lsr #24 /* r1 = ...0 */ 1893 strb r1, [r0] 1894 mov r3, r3, lsl #8 /* r3 = 123. */ 1895 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1896#else 1897 strb r3, [r0] 1898 mov r3, r3, lsr #24 /* r3 = ...3 */ 1899 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1900 mov r2, r2, lsr #8 /* r2 = ...5 */ 1901#endif 1902 strh r3, [r0, #0x03] 1903 strb r2, [r0, #0x05] 1904 RET 1905 LMEMCPY_6_PAD 1906 1907/* 1908 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1909 */ 1910 ldrb r2, [r1] 1911 ldrh r3, [r1, #0x01] 1912 ldrh ip, [r1, #0x03] 1913 ldrb r1, [r1, #0x05] 1914 strb r2, [r0] 1915 strh r3, [r0, #0x01] 1916 strh ip, [r0, #0x03] 1917 strb r1, [r0, #0x05] 1918 RET 1919 LMEMCPY_6_PAD 1920 1921/* 1922 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1923 */ 1924 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1925 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1926#ifdef __ARMEB__ 1927 mov r3, r2, lsr #8 /* r3 = ...0 */ 1928 strb r3, [r0] 1929 strb r1, [r0, #0x05] 1930 mov r3, r1, lsr #8 /* r3 = .234 */ 1931 strh r3, [r0, #0x03] 1932 mov r3, r2, lsl #8 /* r3 = .01. */ 1933 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 1934 strh r3, [r0, #0x01] 1935#else 1936 strb r2, [r0] 1937 mov r3, r1, lsr #24 1938 strb r3, [r0, #0x05] 1939 mov r3, r1, lsr #8 /* r3 = .543 */ 1940 strh r3, [r0, #0x03] 1941 mov r3, r2, lsr #8 /* r3 = ...1 */ 1942 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 1943 strh r3, [r0, #0x01] 1944#endif 1945 RET 1946 LMEMCPY_6_PAD 1947 1948/* 1949 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1950 */ 1951 ldrb r2, [r1] 1952 ldrh r3, [r1, #0x01] 1953 ldrh ip, [r1, #0x03] 1954 ldrb r1, [r1, #0x05] 1955 strb r2, [r0] 1956 strh r3, [r0, #0x01] 1957 strh ip, [r0, #0x03] 1958 strb r1, [r0, #0x05] 1959 RET 1960 LMEMCPY_6_PAD 1961 1962/* 1963 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1964 */ 1965#ifdef __ARMEB__ 1966 ldr r2, [r1] /* r2 = 0123 */ 1967 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 1968 mov r1, r2, lsr #16 /* r1 = ..01 */ 1969 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 1970 strh r1, [r0] 1971 str r3, [r0, #0x02] 1972#else 1973 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 1974 ldr r3, [r1] /* r3 = 3210 */ 1975 mov r2, r2, lsl #16 /* r2 = 54.. */ 1976 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 1977 strh r3, [r0] 1978 str r2, [r0, #0x02] 1979#endif 1980 RET 1981 LMEMCPY_6_PAD 1982 1983/* 1984 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1985 */ 1986 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1987 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1988 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1989#ifdef __ARMEB__ 1990 mov r2, r2, lsr #8 /* r2 = .345 */ 1991 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 1992#else 1993 mov r2, r2, lsl #8 /* r2 = 543. */ 1994 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1995#endif 1996 strh r1, [r0] 1997 str r2, [r0, #0x02] 1998 RET 1999 LMEMCPY_6_PAD 2000 2001/* 2002 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2003 */ 2004 ldrh r2, [r1] 2005 ldr r3, [r1, #0x02] 2006 strh r2, [r0] 2007 str r3, [r0, #0x02] 2008 RET 2009 LMEMCPY_6_PAD 2010 2011/* 2012 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2013 */ 2014 ldrb r3, [r1] /* r3 = ...0 */ 2015 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2016 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2017#ifdef __ARMEB__ 2018 mov r3, r3, lsl #8 /* r3 = ..0. */ 2019 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2020 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2021#else 2022 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2023 mov r1, r1, lsl #24 /* r1 = 5... */ 2024 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2025#endif 2026 strh r3, [r0] 2027 str r1, [r0, #0x02] 2028 RET 2029 LMEMCPY_6_PAD 2030 2031/* 2032 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2033 */ 2034 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2035 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2036#ifdef __ARMEB__ 2037 mov r3, r2, lsr #24 /* r3 = ...0 */ 2038 strb r3, [r0] 2039 mov r2, r2, lsl #8 /* r2 = 123. */ 2040 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2041#else 2042 strb r2, [r0] 2043 mov r2, r2, lsr #8 /* r2 = .321 */ 2044 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2045 mov r1, r1, lsr #8 /* r1 = ...5 */ 2046#endif 2047 str r2, [r0, #0x01] 2048 strb r1, [r0, #0x05] 2049 RET 2050 LMEMCPY_6_PAD 2051 2052/* 2053 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2054 */ 2055 ldrb r2, [r1] 2056 ldrh r3, [r1, #0x01] 2057 ldrh ip, [r1, #0x03] 2058 ldrb r1, [r1, #0x05] 2059 strb r2, [r0] 2060 strh r3, [r0, #0x01] 2061 strh ip, [r0, #0x03] 2062 strb r1, [r0, #0x05] 2063 RET 2064 LMEMCPY_6_PAD 2065 2066/* 2067 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2068 */ 2069 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2070 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2071#ifdef __ARMEB__ 2072 mov r3, r2, lsr #8 /* r3 = ...0 */ 2073 strb r3, [r0] 2074 mov r2, r2, lsl #24 /* r2 = 1... */ 2075 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2076#else 2077 strb r2, [r0] 2078 mov r2, r2, lsr #8 /* r2 = ...1 */ 2079 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2080 mov r1, r1, lsr #24 /* r1 = ...5 */ 2081#endif 2082 str r2, [r0, #0x01] 2083 strb r1, [r0, #0x05] 2084 RET 2085 LMEMCPY_6_PAD 2086 2087/* 2088 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2089 */ 2090 ldrb r2, [r1] 2091 ldr r3, [r1, #0x01] 2092 ldrb r1, [r1, #0x05] 2093 strb r2, [r0] 2094 str r3, [r0, #0x01] 2095 strb r1, [r0, #0x05] 2096 RET 2097 LMEMCPY_6_PAD 2098 2099 2100/****************************************************************************** 2101 * Special case for 8 byte copies 2102 */ 2103#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2104#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2105 LMEMCPY_8_PAD 2106.Lmemcpy_8: 2107 and r2, r1, #0x03 2108 orr r2, r2, r0, lsl #2 2109 ands r2, r2, #0x0f 2110 sub r3, pc, #0x14 2111 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2112 2113/* 2114 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2115 */ 2116 ldr r2, [r1] 2117 ldr r3, [r1, #0x04] 2118 str r2, [r0] 2119 str r3, [r0, #0x04] 2120 RET 2121 LMEMCPY_8_PAD 2122 2123/* 2124 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2125 */ 2126 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2127 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2128 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2129#ifdef __ARMEB__ 2130 mov r3, r3, lsl #8 /* r3 = 012. */ 2131 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2132 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2133#else 2134 mov r3, r3, lsr #8 /* r3 = .210 */ 2135 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2136 mov r1, r1, lsl #24 /* r1 = 7... */ 2137 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2138#endif 2139 str r3, [r0] 2140 str r2, [r0, #0x04] 2141 RET 2142 LMEMCPY_8_PAD 2143 2144/* 2145 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2146 */ 2147 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2148 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2149 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2150#ifdef __ARMEB__ 2151 mov r2, r2, lsl #16 /* r2 = 01.. */ 2152 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2153 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2154#else 2155 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2156 mov r3, r3, lsr #16 /* r3 = ..54 */ 2157 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2158#endif 2159 str r2, [r0] 2160 str r3, [r0, #0x04] 2161 RET 2162 LMEMCPY_8_PAD 2163 2164/* 2165 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2166 */ 2167 ldrb r3, [r1] /* r3 = ...0 */ 2168 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2169 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2170#ifdef __ARMEB__ 2171 mov r3, r3, lsl #24 /* r3 = 0... */ 2172 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2173 mov r2, r2, lsl #24 /* r2 = 4... */ 2174 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2175#else 2176 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2177 mov r2, r2, lsr #24 /* r2 = ...4 */ 2178 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2179#endif 2180 str r3, [r0] 2181 str r2, [r0, #0x04] 2182 RET 2183 LMEMCPY_8_PAD 2184 2185/* 2186 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2187 */ 2188 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2189 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2190#ifdef __ARMEB__ 2191 mov r1, r3, lsr #24 /* r1 = ...0 */ 2192 strb r1, [r0] 2193 mov r1, r3, lsr #8 /* r1 = .012 */ 2194 strb r2, [r0, #0x07] 2195 mov r3, r3, lsl #24 /* r3 = 3... */ 2196 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2197#else 2198 strb r3, [r0] 2199 mov r1, r2, lsr #24 /* r1 = ...7 */ 2200 strb r1, [r0, #0x07] 2201 mov r1, r3, lsr #8 /* r1 = .321 */ 2202 mov r3, r3, lsr #24 /* r3 = ...3 */ 2203 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2204#endif 2205 strh r1, [r0, #0x01] 2206 str r3, [r0, #0x03] 2207 RET 2208 LMEMCPY_8_PAD 2209 2210/* 2211 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2212 */ 2213 ldrb r2, [r1] 2214 ldrh r3, [r1, #0x01] 2215 ldr ip, [r1, #0x03] 2216 ldrb r1, [r1, #0x07] 2217 strb r2, [r0] 2218 strh r3, [r0, #0x01] 2219 str ip, [r0, #0x03] 2220 strb r1, [r0, #0x07] 2221 RET 2222 LMEMCPY_8_PAD 2223 2224/* 2225 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2226 */ 2227 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2228 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2229 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2230#ifdef __ARMEB__ 2231 mov ip, r2, lsr #8 /* ip = ...0 */ 2232 strb ip, [r0] 2233 mov ip, r2, lsl #8 /* ip = .01. */ 2234 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2235 strb r1, [r0, #0x07] 2236 mov r3, r3, lsl #8 /* r3 = 345. */ 2237 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2238#else 2239 strb r2, [r0] /* 0 */ 2240 mov ip, r1, lsr #8 /* ip = ...7 */ 2241 strb ip, [r0, #0x07] /* 7 */ 2242 mov ip, r2, lsr #8 /* ip = ...1 */ 2243 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2244 mov r3, r3, lsr #8 /* r3 = .543 */ 2245 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2246#endif 2247 strh ip, [r0, #0x01] 2248 str r3, [r0, #0x03] 2249 RET 2250 LMEMCPY_8_PAD 2251 2252/* 2253 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2254 */ 2255 ldrb r3, [r1] /* r3 = ...0 */ 2256 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2257 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2258 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2259 strb r3, [r0] 2260 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2261#ifdef __ARMEB__ 2262 strh r3, [r0, #0x01] 2263 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2264#else 2265 strh ip, [r0, #0x01] 2266 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2267#endif 2268 str r2, [r0, #0x03] 2269 strb r1, [r0, #0x07] 2270 RET 2271 LMEMCPY_8_PAD 2272 2273/* 2274 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2275 */ 2276 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2277 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2278 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2279#ifdef __ARMEB__ 2280 strh r1, [r0] 2281 mov r1, r3, lsr #16 /* r1 = ..45 */ 2282 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2283#else 2284 strh r2, [r0] 2285 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2286 mov r3, r3, lsr #16 /* r3 = ..76 */ 2287#endif 2288 str r2, [r0, #0x02] 2289 strh r3, [r0, #0x06] 2290 RET 2291 LMEMCPY_8_PAD 2292 2293/* 2294 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2295 */ 2296 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2297 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2298 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2299 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2300 strh r1, [r0] 2301#ifdef __ARMEB__ 2302 mov r1, r2, lsl #24 /* r1 = 2... */ 2303 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2304 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2305#else 2306 mov r1, r2, lsr #24 /* r1 = ...2 */ 2307 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2308 mov r3, r3, lsr #24 /* r3 = ...6 */ 2309 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2310#endif 2311 str r1, [r0, #0x02] 2312 strh r3, [r0, #0x06] 2313 RET 2314 LMEMCPY_8_PAD 2315 2316/* 2317 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2318 */ 2319 ldrh r2, [r1] 2320 ldr ip, [r1, #0x02] 2321 ldrh r3, [r1, #0x06] 2322 strh r2, [r0] 2323 str ip, [r0, #0x02] 2324 strh r3, [r0, #0x06] 2325 RET 2326 LMEMCPY_8_PAD 2327 2328/* 2329 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2330 */ 2331 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2332 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2333 ldrb ip, [r1] /* ip = ...0 */ 2334 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2335 strh r1, [r0, #0x06] 2336#ifdef __ARMEB__ 2337 mov r3, r3, lsr #24 /* r3 = ...5 */ 2338 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2339 mov r2, r2, lsr #24 /* r2 = ...1 */ 2340 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2341#else 2342 mov r3, r3, lsl #24 /* r3 = 5... */ 2343 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2344 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2345#endif 2346 str r3, [r0, #0x02] 2347 strh r2, [r0] 2348 RET 2349 LMEMCPY_8_PAD 2350 2351/* 2352 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2353 */ 2354 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2355 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2356 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2357 strh r1, [r0, #0x05] 2358#ifdef __ARMEB__ 2359 strb r3, [r0, #0x07] 2360 mov r1, r2, lsr #24 /* r1 = ...0 */ 2361 strb r1, [r0] 2362 mov r2, r2, lsl #8 /* r2 = 123. */ 2363 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2364 str r2, [r0, #0x01] 2365#else 2366 strb r2, [r0] 2367 mov r1, r3, lsr #24 /* r1 = ...7 */ 2368 strb r1, [r0, #0x07] 2369 mov r2, r2, lsr #8 /* r2 = .321 */ 2370 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2371 str r2, [r0, #0x01] 2372#endif 2373 RET 2374 LMEMCPY_8_PAD 2375 2376/* 2377 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2378 */ 2379 ldrb r3, [r1] /* r3 = ...0 */ 2380 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2381 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2382 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2383 strb r3, [r0] 2384 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2385#ifdef __ARMEB__ 2386 strh ip, [r0, #0x05] 2387 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2388#else 2389 strh r3, [r0, #0x05] 2390 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2391#endif 2392 str r2, [r0, #0x01] 2393 strb r1, [r0, #0x07] 2394 RET 2395 LMEMCPY_8_PAD 2396 2397/* 2398 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2399 */ 2400 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2401 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2402 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2403#ifdef __ARMEB__ 2404 mov ip, r2, lsr #8 /* ip = ...0 */ 2405 strb ip, [r0] 2406 mov ip, r2, lsl #24 /* ip = 1... */ 2407 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2408 strb r1, [r0, #0x07] 2409 mov r1, r1, lsr #8 /* r1 = ...6 */ 2410 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2411#else 2412 strb r2, [r0] 2413 mov ip, r2, lsr #8 /* ip = ...1 */ 2414 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2415 mov r2, r1, lsr #8 /* r2 = ...7 */ 2416 strb r2, [r0, #0x07] 2417 mov r1, r1, lsl #8 /* r1 = .76. */ 2418 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2419#endif 2420 str ip, [r0, #0x01] 2421 strh r1, [r0, #0x05] 2422 RET 2423 LMEMCPY_8_PAD 2424 2425/* 2426 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2427 */ 2428 ldrb r2, [r1] 2429 ldr ip, [r1, #0x01] 2430 ldrh r3, [r1, #0x05] 2431 ldrb r1, [r1, #0x07] 2432 strb r2, [r0] 2433 str ip, [r0, #0x01] 2434 strh r3, [r0, #0x05] 2435 strb r1, [r0, #0x07] 2436 RET 2437 LMEMCPY_8_PAD 2438 2439/****************************************************************************** 2440 * Special case for 12 byte copies 2441 */ 2442#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2443#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2444 LMEMCPY_C_PAD 2445.Lmemcpy_c: 2446 and r2, r1, #0x03 2447 orr r2, r2, r0, lsl #2 2448 ands r2, r2, #0x0f 2449 sub r3, pc, #0x14 2450 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2451 2452/* 2453 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2454 */ 2455 ldr r2, [r1] 2456 ldr r3, [r1, #0x04] 2457 ldr r1, [r1, #0x08] 2458 str r2, [r0] 2459 str r3, [r0, #0x04] 2460 str r1, [r0, #0x08] 2461 RET 2462 LMEMCPY_C_PAD 2463 2464/* 2465 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2466 */ 2467 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2468 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2469 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2470 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2471#ifdef __ARMEB__ 2472 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2473 str r2, [r0, #0x08] 2474 mov r2, ip, lsr #24 /* r2 = ...7 */ 2475 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2476 mov r1, r1, lsl #8 /* r1 = 012. */ 2477 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2478#else 2479 mov r2, r2, lsl #24 /* r2 = B... */ 2480 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2481 str r2, [r0, #0x08] 2482 mov r2, ip, lsl #24 /* r2 = 7... */ 2483 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2484 mov r1, r1, lsr #8 /* r1 = .210 */ 2485 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2486#endif 2487 str r2, [r0, #0x04] 2488 str r1, [r0] 2489 RET 2490 LMEMCPY_C_PAD 2491 2492/* 2493 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2494 */ 2495 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2496 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2497 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2498 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2499#ifdef __ARMEB__ 2500 mov r2, r2, lsl #16 /* r2 = 01.. */ 2501 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2502 str r2, [r0] 2503 mov r3, r3, lsl #16 /* r3 = 45.. */ 2504 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2505 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2506#else 2507 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2508 str r2, [r0] 2509 mov r3, r3, lsr #16 /* r3 = ..54 */ 2510 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2511 mov r1, r1, lsl #16 /* r1 = BA.. */ 2512 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2513#endif 2514 str r3, [r0, #0x04] 2515 str r1, [r0, #0x08] 2516 RET 2517 LMEMCPY_C_PAD 2518 2519/* 2520 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2521 */ 2522 ldrb r2, [r1] /* r2 = ...0 */ 2523 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2524 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2525 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2526#ifdef __ARMEB__ 2527 mov r2, r2, lsl #24 /* r2 = 0... */ 2528 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2529 str r2, [r0] 2530 mov r3, r3, lsl #24 /* r3 = 4... */ 2531 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2532 mov r1, r1, lsr #8 /* r1 = .9AB */ 2533 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2534#else 2535 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2536 str r2, [r0] 2537 mov r3, r3, lsr #24 /* r3 = ...4 */ 2538 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2539 mov r1, r1, lsl #8 /* r1 = BA9. */ 2540 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2541#endif 2542 str r3, [r0, #0x04] 2543 str r1, [r0, #0x08] 2544 RET 2545 LMEMCPY_C_PAD 2546 2547/* 2548 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2549 */ 2550 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2551 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2552 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2553 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2554 strh r1, [r0, #0x01] 2555#ifdef __ARMEB__ 2556 mov r1, r2, lsr #24 /* r1 = ...0 */ 2557 strb r1, [r0] 2558 mov r1, r2, lsl #24 /* r1 = 3... */ 2559 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2560 mov r1, r3, lsl #24 /* r1 = 7... */ 2561 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2562#else 2563 strb r2, [r0] 2564 mov r1, r2, lsr #24 /* r1 = ...3 */ 2565 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2566 mov r1, r3, lsr #24 /* r1 = ...7 */ 2567 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2568 mov ip, ip, lsr #24 /* ip = ...B */ 2569#endif 2570 str r2, [r0, #0x03] 2571 str r1, [r0, #0x07] 2572 strb ip, [r0, #0x0b] 2573 RET 2574 LMEMCPY_C_PAD 2575 2576/* 2577 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2578 */ 2579 ldrb r2, [r1] 2580 ldrh r3, [r1, #0x01] 2581 ldr ip, [r1, #0x03] 2582 strb r2, [r0] 2583 ldr r2, [r1, #0x07] 2584 ldrb r1, [r1, #0x0b] 2585 strh r3, [r0, #0x01] 2586 str ip, [r0, #0x03] 2587 str r2, [r0, #0x07] 2588 strb r1, [r0, #0x0b] 2589 RET 2590 LMEMCPY_C_PAD 2591 2592/* 2593 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2594 */ 2595 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2596 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2597 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2598 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2599#ifdef __ARMEB__ 2600 mov r2, r2, ror #8 /* r2 = 1..0 */ 2601 strb r2, [r0] 2602 mov r2, r2, lsr #16 /* r2 = ..1. */ 2603 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2604 strh r2, [r0, #0x01] 2605 mov r2, r3, lsl #8 /* r2 = 345. */ 2606 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2607 mov r2, ip, lsl #8 /* r2 = 789. */ 2608 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2609#else 2610 strb r2, [r0] 2611 mov r2, r2, lsr #8 /* r2 = ...1 */ 2612 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2613 strh r2, [r0, #0x01] 2614 mov r2, r3, lsr #8 /* r2 = .543 */ 2615 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2616 mov r2, ip, lsr #8 /* r2 = .987 */ 2617 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2618 mov r1, r1, lsr #8 /* r1 = ...B */ 2619#endif 2620 str r3, [r0, #0x03] 2621 str r2, [r0, #0x07] 2622 strb r1, [r0, #0x0b] 2623 RET 2624 LMEMCPY_C_PAD 2625 2626/* 2627 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2628 */ 2629 ldrb r2, [r1] 2630 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2631 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2632 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2633 strb r2, [r0] 2634#ifdef __ARMEB__ 2635 mov r2, r3, lsr #16 /* r2 = ..12 */ 2636 strh r2, [r0, #0x01] 2637 mov r3, r3, lsl #16 /* r3 = 34.. */ 2638 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2639 mov ip, ip, lsl #16 /* ip = 78.. */ 2640 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2641 mov r1, r1, lsr #8 /* r1 = .9AB */ 2642#else 2643 strh r3, [r0, #0x01] 2644 mov r3, r3, lsr #16 /* r3 = ..43 */ 2645 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2646 mov ip, ip, lsr #16 /* ip = ..87 */ 2647 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2648 mov r1, r1, lsr #16 /* r1 = ..xB */ 2649#endif 2650 str r3, [r0, #0x03] 2651 str ip, [r0, #0x07] 2652 strb r1, [r0, #0x0b] 2653 RET 2654 LMEMCPY_C_PAD 2655 2656/* 2657 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2658 */ 2659 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2660 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2661 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2662 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2663#ifdef __ARMEB__ 2664 strh r1, [r0] 2665 mov r1, ip, lsl #16 /* r1 = 23.. */ 2666 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2667 mov r3, r3, lsl #16 /* r3 = 67.. */ 2668 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2669#else 2670 strh ip, [r0] 2671 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2672 mov r3, r3, lsr #16 /* r3 = ..76 */ 2673 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2674 mov r2, r2, lsr #16 /* r2 = ..BA */ 2675#endif 2676 str r1, [r0, #0x02] 2677 str r3, [r0, #0x06] 2678 strh r2, [r0, #0x0a] 2679 RET 2680 LMEMCPY_C_PAD 2681 2682/* 2683 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2684 */ 2685 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2686 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2687 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2688 strh ip, [r0] 2689 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2690 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2691#ifdef __ARMEB__ 2692 mov r2, r2, lsl #24 /* r2 = 2... */ 2693 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2694 mov r3, r3, lsl #24 /* r3 = 6... */ 2695 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2696 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2697#else 2698 mov r2, r2, lsr #24 /* r2 = ...2 */ 2699 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2700 mov r3, r3, lsr #24 /* r3 = ...6 */ 2701 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2702 mov r1, r1, lsl #8 /* r1 = ..B. */ 2703 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2704#endif 2705 str r2, [r0, #0x02] 2706 str r3, [r0, #0x06] 2707 strh r1, [r0, #0x0a] 2708 RET 2709 LMEMCPY_C_PAD 2710 2711/* 2712 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2713 */ 2714 ldrh r2, [r1] 2715 ldr r3, [r1, #0x02] 2716 ldr ip, [r1, #0x06] 2717 ldrh r1, [r1, #0x0a] 2718 strh r2, [r0] 2719 str r3, [r0, #0x02] 2720 str ip, [r0, #0x06] 2721 strh r1, [r0, #0x0a] 2722 RET 2723 LMEMCPY_C_PAD 2724 2725/* 2726 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2727 */ 2728 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2729 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2730 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2731 strh ip, [r0, #0x0a] 2732 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2733 ldrb r1, [r1] /* r1 = ...0 */ 2734#ifdef __ARMEB__ 2735 mov r2, r2, lsr #24 /* r2 = ...9 */ 2736 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2737 mov r3, r3, lsr #24 /* r3 = ...5 */ 2738 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2739 mov r1, r1, lsl #8 /* r1 = ..0. */ 2740 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2741#else 2742 mov r2, r2, lsl #24 /* r2 = 9... */ 2743 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2744 mov r3, r3, lsl #24 /* r3 = 5... */ 2745 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2746 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2747#endif 2748 str r2, [r0, #0x06] 2749 str r3, [r0, #0x02] 2750 strh r1, [r0] 2751 RET 2752 LMEMCPY_C_PAD 2753 2754/* 2755 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2756 */ 2757 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2758 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2759 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2760#ifdef __ARMEB__ 2761 mov r3, r2, lsr #24 /* r3 = ...0 */ 2762 strb r3, [r0] 2763 mov r2, r2, lsl #8 /* r2 = 123. */ 2764 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2765 str r2, [r0, #0x01] 2766 mov r2, ip, lsl #8 /* r2 = 567. */ 2767 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2768 str r2, [r0, #0x05] 2769 mov r2, r1, lsr #8 /* r2 = ..9A */ 2770 strh r2, [r0, #0x09] 2771 strb r1, [r0, #0x0b] 2772#else 2773 strb r2, [r0] 2774 mov r3, r2, lsr #8 /* r3 = .321 */ 2775 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2776 str r3, [r0, #0x01] 2777 mov r3, ip, lsr #8 /* r3 = .765 */ 2778 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2779 str r3, [r0, #0x05] 2780 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2781 strh r1, [r0, #0x09] 2782 mov r1, r1, lsr #16 /* r1 = ...B */ 2783 strb r1, [r0, #0x0b] 2784#endif 2785 RET 2786 LMEMCPY_C_PAD 2787 2788/* 2789 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2790 */ 2791 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2792 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2793 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2794 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2795 strb r2, [r0, #0x0b] 2796#ifdef __ARMEB__ 2797 strh r3, [r0, #0x09] 2798 mov r3, r3, lsr #16 /* r3 = ..78 */ 2799 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2800 mov ip, ip, lsr #16 /* ip = ..34 */ 2801 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2802 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2803#else 2804 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2805 strh r2, [r0, #0x09] 2806 mov r3, r3, lsl #16 /* r3 = 87.. */ 2807 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2808 mov ip, ip, lsl #16 /* ip = 43.. */ 2809 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2810 mov r1, r1, lsr #8 /* r1 = .210 */ 2811#endif 2812 str r3, [r0, #0x05] 2813 str ip, [r0, #0x01] 2814 strb r1, [r0] 2815 RET 2816 LMEMCPY_C_PAD 2817 2818/* 2819 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2820 */ 2821#ifdef __ARMEB__ 2822 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2823 ldr ip, [r1, #0x06] /* ip = 6789 */ 2824 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2825 ldrh r1, [r1] /* r1 = ..01 */ 2826 strb r2, [r0, #0x0b] 2827 mov r2, r2, lsr #8 /* r2 = ...A */ 2828 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2829 mov ip, ip, lsr #8 /* ip = .678 */ 2830 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2831 mov r3, r3, lsr #8 /* r3 = .234 */ 2832 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2833 mov r1, r1, lsr #8 /* r1 = ...0 */ 2834 strb r1, [r0] 2835 str r3, [r0, #0x01] 2836 str ip, [r0, #0x05] 2837 strh r2, [r0, #0x09] 2838#else 2839 ldrh r2, [r1] /* r2 = ..10 */ 2840 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2841 ldr ip, [r1, #0x06] /* ip = 9876 */ 2842 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2843 strb r2, [r0] 2844 mov r2, r2, lsr #8 /* r2 = ...1 */ 2845 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2846 mov r3, r3, lsr #24 /* r3 = ...5 */ 2847 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2848 mov ip, ip, lsr #24 /* ip = ...9 */ 2849 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2850 mov r1, r1, lsr #8 /* r1 = ...B */ 2851 str r2, [r0, #0x01] 2852 str r3, [r0, #0x05] 2853 strh ip, [r0, #0x09] 2854 strb r1, [r0, #0x0b] 2855#endif 2856 RET 2857 LMEMCPY_C_PAD 2858 2859/* 2860 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2861 */ 2862 ldrb r2, [r1] 2863 ldr r3, [r1, #0x01] 2864 ldr ip, [r1, #0x05] 2865 strb r2, [r0] 2866 ldrh r2, [r1, #0x09] 2867 ldrb r1, [r1, #0x0b] 2868 str r3, [r0, #0x01] 2869 str ip, [r0, #0x05] 2870 strh r2, [r0, #0x09] 2871 strb r1, [r0, #0x0b] 2872 RET 2873#endif /* __XSCALE__ */ 2874 2875#ifdef GPROF 2876 2877ENTRY(user) 2878 nop 2879ENTRY(btrap) 2880 nop 2881ENTRY(etrap) 2882 nop 2883ENTRY(bintr) 2884 nop 2885ENTRY(eintr) 2886 nop 2887 2888#endif 2889