1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90__FBSDID("$FreeBSD$"); 91 92#include "assym.inc" 93 94 .syntax unified 95 96.L_arm_memcpy: 97 .word _C_LABEL(_arm_memcpy) 98.L_arm_bzero: 99 .word _C_LABEL(_arm_bzero) 100.L_min_memcpy_size: 101 .word _C_LABEL(_min_memcpy_size) 102.L_min_bzero_size: 103 .word _C_LABEL(_min_bzero_size) 104/* 105 * memset: Sets a block of memory to the specified value 106 * 107 * On entry: 108 * r0 - dest address 109 * r1 - byte to write 110 * r2 - number of bytes to write 111 * 112 * On exit: 113 * r0 - dest address 114 */ 115/* LINTSTUB: Func: void bzero(void *, size_t) */ 116ENTRY(bzero) 117 ldr r3, .L_arm_bzero 118 ldr r3, [r3] 119 cmp r3, #0 120 beq .Lnormal0 121 ldr r2, .L_min_bzero_size 122 ldr r2, [r2] 123 cmp r1, r2 124 blt .Lnormal0 125 stmfd sp!, {r0, r1, lr} 126 mov r2, #0 127 mov lr, pc 128 mov pc, r3 129 cmp r0, #0 130 ldmfd sp!, {r0, r1, lr} 131 RETeq 132.Lnormal0: 133 mov r3, #0x00 134 b do_memset 135END(bzero) 136/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 137ENTRY(memset) 138 and r3, r1, #0xff /* We deal with bytes */ 139 mov r1, r2 140do_memset: 141 cmp r1, #0x04 /* Do we have less than 4 bytes */ 142 mov ip, r0 143 blt .Lmemset_lessthanfour 144 145 /* Ok first we will word align the address */ 146 ands r2, ip, #0x03 /* Get the bottom two bits */ 147 bne .Lmemset_wordunaligned /* The address is not word aligned */ 148 149 /* We are now word aligned */ 150.Lmemset_wordaligned: 151 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 152#ifdef _ARM_ARCH_5E 153 tst ip, #0x04 /* Quad-align for armv5e */ 154#else 155 cmp r1, #0x10 156#endif 157 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 158#ifdef _ARM_ARCH_5E 159 subne r1, r1, #0x04 /* Quad-align if necessary */ 160 strne r3, [ip], #0x04 161 cmp r1, #0x10 162#endif 163 blt .Lmemset_loop4 /* If less than 16 then use words */ 164 mov r2, r3 /* Duplicate data */ 165 cmp r1, #0x80 /* If < 128 then skip the big loop */ 166 blt .Lmemset_loop32 167 168 /* Do 128 bytes at a time */ 169.Lmemset_loop128: 170 subs r1, r1, #0x80 171#ifdef _ARM_ARCH_5E 172 strdge r2, [ip], #0x08 173 strdge r2, [ip], #0x08 174 strdge r2, [ip], #0x08 175 strdge r2, [ip], #0x08 176 strdge r2, [ip], #0x08 177 strdge r2, [ip], #0x08 178 strdge r2, [ip], #0x08 179 strdge r2, [ip], #0x08 180 strdge r2, [ip], #0x08 181 strdge r2, [ip], #0x08 182 strdge r2, [ip], #0x08 183 strdge r2, [ip], #0x08 184 strdge r2, [ip], #0x08 185 strdge r2, [ip], #0x08 186 strdge r2, [ip], #0x08 187 strdge r2, [ip], #0x08 188#else 189 stmiage ip!, {r2-r3} 190 stmiage ip!, {r2-r3} 191 stmiage ip!, {r2-r3} 192 stmiage ip!, {r2-r3} 193 stmiage ip!, {r2-r3} 194 stmiage ip!, {r2-r3} 195 stmiage ip!, {r2-r3} 196 stmiage ip!, {r2-r3} 197 stmiage ip!, {r2-r3} 198 stmiage ip!, {r2-r3} 199 stmiage ip!, {r2-r3} 200 stmiage ip!, {r2-r3} 201 stmiage ip!, {r2-r3} 202 stmiage ip!, {r2-r3} 203 stmiage ip!, {r2-r3} 204 stmiage ip!, {r2-r3} 205#endif 206 bgt .Lmemset_loop128 207 RETeq /* Zero length so just exit */ 208 209 add r1, r1, #0x80 /* Adjust for extra sub */ 210 211 /* Do 32 bytes at a time */ 212.Lmemset_loop32: 213 subs r1, r1, #0x20 214#ifdef _ARM_ARCH_5E 215 strdge r2, [ip], #0x08 216 strdge r2, [ip], #0x08 217 strdge r2, [ip], #0x08 218 strdge r2, [ip], #0x08 219#else 220 stmiage ip!, {r2-r3} 221 stmiage ip!, {r2-r3} 222 stmiage ip!, {r2-r3} 223 stmiage ip!, {r2-r3} 224#endif 225 bgt .Lmemset_loop32 226 RETeq /* Zero length so just exit */ 227 228 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 229 230 /* Deal with 16 bytes or more */ 231#ifdef _ARM_ARCH_5E 232 strdge r2, [ip], #0x08 233 strdge r2, [ip], #0x08 234#else 235 stmiage ip!, {r2-r3} 236 stmiage ip!, {r2-r3} 237#endif 238 RETeq /* Zero length so just exit */ 239 240 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 241 242 /* We have at least 4 bytes so copy as words */ 243.Lmemset_loop4: 244 subs r1, r1, #0x04 245 strge r3, [ip], #0x04 246 bgt .Lmemset_loop4 247 RETeq /* Zero length so just exit */ 248 249#ifdef _ARM_ARCH_5E 250 /* Compensate for 64-bit alignment check */ 251 adds r1, r1, #0x04 252 RETeq 253 cmp r1, #2 254#else 255 cmp r1, #-2 256#endif 257 258 strb r3, [ip], #0x01 /* Set 1 byte */ 259 strbge r3, [ip], #0x01 /* Set another byte */ 260 strbgt r3, [ip] /* and a third */ 261 RET /* Exit */ 262 263.Lmemset_wordunaligned: 264 rsb r2, r2, #0x004 265 strb r3, [ip], #0x01 /* Set 1 byte */ 266 cmp r2, #0x02 267 strbge r3, [ip], #0x01 /* Set another byte */ 268 sub r1, r1, r2 269 strbgt r3, [ip], #0x01 /* and a third */ 270 cmp r1, #0x04 /* More than 4 bytes left? */ 271 bge .Lmemset_wordaligned /* Yup */ 272 273.Lmemset_lessthanfour: 274 cmp r1, #0x00 275 RETeq /* Zero length so exit */ 276 strb r3, [ip], #0x01 /* Set 1 byte */ 277 cmp r1, #0x02 278 strbge r3, [ip], #0x01 /* Set another byte */ 279 strbgt r3, [ip] /* and a third */ 280 RET /* Exit */ 281EEND(memset) 282END(bzero) 283 284ENTRY(bcmp) 285 mov ip, r0 286 cmp r2, #0x06 287 beq .Lmemcmp_6bytes 288 mov r0, #0x00 289 290 /* Are both addresses aligned the same way? */ 291 cmp r2, #0x00 292 eorsne r3, ip, r1 293 RETeq /* len == 0, or same addresses! */ 294 tst r3, #0x03 295 subne r2, r2, #0x01 296 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 297 298 /* Word-align the addresses, if necessary */ 299 sub r3, r1, #0x05 300 ands r3, r3, #0x03 301 add r3, r3, r3, lsl #1 302 addne pc, pc, r3, lsl #3 303 nop 304 305 /* Compare up to 3 bytes */ 306 ldrb r0, [ip], #0x01 307 ldrb r3, [r1], #0x01 308 subs r0, r0, r3 309 RETne 310 subs r2, r2, #0x01 311 RETeq 312 313 /* Compare up to 2 bytes */ 314 ldrb r0, [ip], #0x01 315 ldrb r3, [r1], #0x01 316 subs r0, r0, r3 317 RETne 318 subs r2, r2, #0x01 319 RETeq 320 321 /* Compare 1 byte */ 322 ldrb r0, [ip], #0x01 323 ldrb r3, [r1], #0x01 324 subs r0, r0, r3 325 RETne 326 subs r2, r2, #0x01 327 RETeq 328 329 /* Compare 4 bytes at a time, if possible */ 330 subs r2, r2, #0x04 331 bcc .Lmemcmp_bytewise 332.Lmemcmp_word_aligned: 333 ldr r0, [ip], #0x04 334 ldr r3, [r1], #0x04 335 subs r2, r2, #0x04 336 cmpcs r0, r3 337 beq .Lmemcmp_word_aligned 338 sub r0, r0, r3 339 340 /* Correct for extra subtraction, and check if done */ 341 adds r2, r2, #0x04 342 cmpeq r0, #0x00 /* If done, did all bytes match? */ 343 RETeq /* Yup. Just return */ 344 345 /* Re-do the final word byte-wise */ 346 sub ip, ip, #0x04 347 sub r1, r1, #0x04 348 349.Lmemcmp_bytewise: 350 add r2, r2, #0x03 351.Lmemcmp_bytewise2: 352 ldrb r0, [ip], #0x01 353 ldrb r3, [r1], #0x01 354 subs r2, r2, #0x01 355 cmpcs r0, r3 356 beq .Lmemcmp_bytewise2 357 sub r0, r0, r3 358 RET 359 360 /* 361 * 6 byte compares are very common, thanks to the network stack. 362 * This code is hand-scheduled to reduce the number of stalls for 363 * load results. Everything else being equal, this will be ~32% 364 * faster than a byte-wise memcmp. 365 */ 366 .align 5 367.Lmemcmp_6bytes: 368 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 369 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 370 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 371 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 372 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ 373 RETne /* Return if mismatch on #0 */ 374 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 375 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ 376 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ 377 RETne /* Return if mismatch on #1 */ 378 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 379 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 380 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ 381 RETne /* Return if mismatch on #2 */ 382 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 383 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ 384 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ 385 RETne /* Return if mismatch on #3 */ 386 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 387 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 388 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ 389 RETne /* Return if mismatch on #4 */ 390 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 391 RET 392END(bcmp) 393 394ENTRY(bcopy) 395 /* switch the source and destination registers */ 396 eor r0, r1, r0 397 eor r1, r0, r1 398 eor r0, r1, r0 399EENTRY(memmove) 400 /* Do the buffers overlap? */ 401 cmp r0, r1 402 RETeq /* Bail now if src/dst are the same */ 403 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 404 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 405 cmp r3, r2 /* if (r3 < len) we have an overlap */ 406 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 407 408 /* Determine copy direction */ 409 cmp r1, r0 410 bcc .Lmemmove_backwards 411 412 moveq r0, #0 /* Quick abort for len=0 */ 413 RETeq 414 415 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 416 subs r2, r2, #4 417 blt .Lmemmove_fl4 /* less than 4 bytes */ 418 ands r12, r0, #3 419 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 420 ands r12, r1, #3 421 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 422 423.Lmemmove_ft8: 424 /* We have aligned source and destination */ 425 subs r2, r2, #8 426 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 427 subs r2, r2, #0x14 428 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 429 stmdb sp!, {r4} /* borrow r4 */ 430 431 /* blat 32 bytes at a time */ 432 /* XXX for really big copies perhaps we should use more registers */ 433.Lmemmove_floop32: 434 ldmia r1!, {r3, r4, r12, lr} 435 stmia r0!, {r3, r4, r12, lr} 436 ldmia r1!, {r3, r4, r12, lr} 437 stmia r0!, {r3, r4, r12, lr} 438 subs r2, r2, #0x20 439 bge .Lmemmove_floop32 440 441 cmn r2, #0x10 442 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 443 stmiage r0!, {r3, r4, r12, lr} 444 subge r2, r2, #0x10 445 ldmia sp!, {r4} /* return r4 */ 446 447.Lmemmove_fl32: 448 adds r2, r2, #0x14 449 450 /* blat 12 bytes at a time */ 451.Lmemmove_floop12: 452 ldmiage r1!, {r3, r12, lr} 453 stmiage r0!, {r3, r12, lr} 454 subsge r2, r2, #0x0c 455 bge .Lmemmove_floop12 456 457.Lmemmove_fl12: 458 adds r2, r2, #8 459 blt .Lmemmove_fl4 460 461 subs r2, r2, #4 462 ldrlt r3, [r1], #4 463 strlt r3, [r0], #4 464 ldmiage r1!, {r3, r12} 465 stmiage r0!, {r3, r12} 466 subge r2, r2, #4 467 468.Lmemmove_fl4: 469 /* less than 4 bytes to go */ 470 adds r2, r2, #4 471 ldmiaeq sp!, {r0, pc} /* done */ 472 473 /* copy the crud byte at a time */ 474 cmp r2, #2 475 ldrb r3, [r1], #1 476 strb r3, [r0], #1 477 ldrbge r3, [r1], #1 478 strbge r3, [r0], #1 479 ldrbgt r3, [r1], #1 480 strbgt r3, [r0], #1 481 ldmia sp!, {r0, pc} 482 483 /* erg - unaligned destination */ 484.Lmemmove_fdestul: 485 rsb r12, r12, #4 486 cmp r12, #2 487 488 /* align destination with byte copies */ 489 ldrb r3, [r1], #1 490 strb r3, [r0], #1 491 ldrbge r3, [r1], #1 492 strbge r3, [r0], #1 493 ldrbgt r3, [r1], #1 494 strbgt r3, [r0], #1 495 subs r2, r2, r12 496 blt .Lmemmove_fl4 /* less the 4 bytes */ 497 498 ands r12, r1, #3 499 beq .Lmemmove_ft8 /* we have an aligned source */ 500 501 /* erg - unaligned source */ 502 /* This is where it gets nasty ... */ 503.Lmemmove_fsrcul: 504 bic r1, r1, #3 505 ldr lr, [r1], #4 506 cmp r12, #2 507 bgt .Lmemmove_fsrcul3 508 beq .Lmemmove_fsrcul2 509 cmp r2, #0x0c 510 blt .Lmemmove_fsrcul1loop4 511 sub r2, r2, #0x0c 512 stmdb sp!, {r4, r5} 513 514.Lmemmove_fsrcul1loop16: 515#ifdef __ARMEB__ 516 mov r3, lr, lsl #8 517#else 518 mov r3, lr, lsr #8 519#endif 520 ldmia r1!, {r4, r5, r12, lr} 521#ifdef __ARMEB__ 522 orr r3, r3, r4, lsr #24 523 mov r4, r4, lsl #8 524 orr r4, r4, r5, lsr #24 525 mov r5, r5, lsl #8 526 orr r5, r5, r12, lsr #24 527 mov r12, r12, lsl #8 528 orr r12, r12, lr, lsr #24 529#else 530 orr r3, r3, r4, lsl #24 531 mov r4, r4, lsr #8 532 orr r4, r4, r5, lsl #24 533 mov r5, r5, lsr #8 534 orr r5, r5, r12, lsl #24 535 mov r12, r12, lsr #8 536 orr r12, r12, lr, lsl #24 537#endif 538 stmia r0!, {r3-r5, r12} 539 subs r2, r2, #0x10 540 bge .Lmemmove_fsrcul1loop16 541 ldmia sp!, {r4, r5} 542 adds r2, r2, #0x0c 543 blt .Lmemmove_fsrcul1l4 544 545.Lmemmove_fsrcul1loop4: 546#ifdef __ARMEB__ 547 mov r12, lr, lsl #8 548#else 549 mov r12, lr, lsr #8 550#endif 551 ldr lr, [r1], #4 552#ifdef __ARMEB__ 553 orr r12, r12, lr, lsr #24 554#else 555 orr r12, r12, lr, lsl #24 556#endif 557 str r12, [r0], #4 558 subs r2, r2, #4 559 bge .Lmemmove_fsrcul1loop4 560 561.Lmemmove_fsrcul1l4: 562 sub r1, r1, #3 563 b .Lmemmove_fl4 564 565.Lmemmove_fsrcul2: 566 cmp r2, #0x0c 567 blt .Lmemmove_fsrcul2loop4 568 sub r2, r2, #0x0c 569 stmdb sp!, {r4, r5} 570 571.Lmemmove_fsrcul2loop16: 572#ifdef __ARMEB__ 573 mov r3, lr, lsl #16 574#else 575 mov r3, lr, lsr #16 576#endif 577 ldmia r1!, {r4, r5, r12, lr} 578#ifdef __ARMEB__ 579 orr r3, r3, r4, lsr #16 580 mov r4, r4, lsl #16 581 orr r4, r4, r5, lsr #16 582 mov r5, r5, lsl #16 583 orr r5, r5, r12, lsr #16 584 mov r12, r12, lsl #16 585 orr r12, r12, lr, lsr #16 586#else 587 orr r3, r3, r4, lsl #16 588 mov r4, r4, lsr #16 589 orr r4, r4, r5, lsl #16 590 mov r5, r5, lsr #16 591 orr r5, r5, r12, lsl #16 592 mov r12, r12, lsr #16 593 orr r12, r12, lr, lsl #16 594#endif 595 stmia r0!, {r3-r5, r12} 596 subs r2, r2, #0x10 597 bge .Lmemmove_fsrcul2loop16 598 ldmia sp!, {r4, r5} 599 adds r2, r2, #0x0c 600 blt .Lmemmove_fsrcul2l4 601 602.Lmemmove_fsrcul2loop4: 603#ifdef __ARMEB__ 604 mov r12, lr, lsl #16 605#else 606 mov r12, lr, lsr #16 607#endif 608 ldr lr, [r1], #4 609#ifdef __ARMEB__ 610 orr r12, r12, lr, lsr #16 611#else 612 orr r12, r12, lr, lsl #16 613#endif 614 str r12, [r0], #4 615 subs r2, r2, #4 616 bge .Lmemmove_fsrcul2loop4 617 618.Lmemmove_fsrcul2l4: 619 sub r1, r1, #2 620 b .Lmemmove_fl4 621 622.Lmemmove_fsrcul3: 623 cmp r2, #0x0c 624 blt .Lmemmove_fsrcul3loop4 625 sub r2, r2, #0x0c 626 stmdb sp!, {r4, r5} 627 628.Lmemmove_fsrcul3loop16: 629#ifdef __ARMEB__ 630 mov r3, lr, lsl #24 631#else 632 mov r3, lr, lsr #24 633#endif 634 ldmia r1!, {r4, r5, r12, lr} 635#ifdef __ARMEB__ 636 orr r3, r3, r4, lsr #8 637 mov r4, r4, lsl #24 638 orr r4, r4, r5, lsr #8 639 mov r5, r5, lsl #24 640 orr r5, r5, r12, lsr #8 641 mov r12, r12, lsl #24 642 orr r12, r12, lr, lsr #8 643#else 644 orr r3, r3, r4, lsl #8 645 mov r4, r4, lsr #24 646 orr r4, r4, r5, lsl #8 647 mov r5, r5, lsr #24 648 orr r5, r5, r12, lsl #8 649 mov r12, r12, lsr #24 650 orr r12, r12, lr, lsl #8 651#endif 652 stmia r0!, {r3-r5, r12} 653 subs r2, r2, #0x10 654 bge .Lmemmove_fsrcul3loop16 655 ldmia sp!, {r4, r5} 656 adds r2, r2, #0x0c 657 blt .Lmemmove_fsrcul3l4 658 659.Lmemmove_fsrcul3loop4: 660#ifdef __ARMEB__ 661 mov r12, lr, lsl #24 662#else 663 mov r12, lr, lsr #24 664#endif 665 ldr lr, [r1], #4 666#ifdef __ARMEB__ 667 orr r12, r12, lr, lsr #8 668#else 669 orr r12, r12, lr, lsl #8 670#endif 671 str r12, [r0], #4 672 subs r2, r2, #4 673 bge .Lmemmove_fsrcul3loop4 674 675.Lmemmove_fsrcul3l4: 676 sub r1, r1, #1 677 b .Lmemmove_fl4 678 679.Lmemmove_backwards: 680 add r1, r1, r2 681 add r0, r0, r2 682 subs r2, r2, #4 683 blt .Lmemmove_bl4 /* less than 4 bytes */ 684 ands r12, r0, #3 685 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 686 ands r12, r1, #3 687 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 688 689.Lmemmove_bt8: 690 /* We have aligned source and destination */ 691 subs r2, r2, #8 692 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 693 stmdb sp!, {r4, lr} 694 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 695 blt .Lmemmove_bl32 696 697 /* blat 32 bytes at a time */ 698 /* XXX for really big copies perhaps we should use more registers */ 699.Lmemmove_bloop32: 700 ldmdb r1!, {r3, r4, r12, lr} 701 stmdb r0!, {r3, r4, r12, lr} 702 ldmdb r1!, {r3, r4, r12, lr} 703 stmdb r0!, {r3, r4, r12, lr} 704 subs r2, r2, #0x20 705 bge .Lmemmove_bloop32 706 707.Lmemmove_bl32: 708 cmn r2, #0x10 709 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 710 stmdbge r0!, {r3, r4, r12, lr} 711 subge r2, r2, #0x10 712 adds r2, r2, #0x14 713 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 714 stmdbge r0!, {r3, r12, lr} 715 subge r2, r2, #0x0c 716 ldmia sp!, {r4, lr} 717 718.Lmemmove_bl12: 719 adds r2, r2, #8 720 blt .Lmemmove_bl4 721 subs r2, r2, #4 722 ldrlt r3, [r1, #-4]! 723 strlt r3, [r0, #-4]! 724 ldmdbge r1!, {r3, r12} 725 stmdbge r0!, {r3, r12} 726 subge r2, r2, #4 727 728.Lmemmove_bl4: 729 /* less than 4 bytes to go */ 730 adds r2, r2, #4 731 RETeq /* done */ 732 733 /* copy the crud byte at a time */ 734 cmp r2, #2 735 ldrb r3, [r1, #-1]! 736 strb r3, [r0, #-1]! 737 ldrbge r3, [r1, #-1]! 738 strbge r3, [r0, #-1]! 739 ldrbgt r3, [r1, #-1]! 740 strbgt r3, [r0, #-1]! 741 RET 742 743 /* erg - unaligned destination */ 744.Lmemmove_bdestul: 745 cmp r12, #2 746 747 /* align destination with byte copies */ 748 ldrb r3, [r1, #-1]! 749 strb r3, [r0, #-1]! 750 ldrbge r3, [r1, #-1]! 751 strbge r3, [r0, #-1]! 752 ldrbgt r3, [r1, #-1]! 753 strbgt r3, [r0, #-1]! 754 subs r2, r2, r12 755 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 756 ands r12, r1, #3 757 beq .Lmemmove_bt8 /* we have an aligned source */ 758 759 /* erg - unaligned source */ 760 /* This is where it gets nasty ... */ 761.Lmemmove_bsrcul: 762 bic r1, r1, #3 763 ldr r3, [r1, #0] 764 cmp r12, #2 765 blt .Lmemmove_bsrcul1 766 beq .Lmemmove_bsrcul2 767 cmp r2, #0x0c 768 blt .Lmemmove_bsrcul3loop4 769 sub r2, r2, #0x0c 770 stmdb sp!, {r4, r5, lr} 771 772.Lmemmove_bsrcul3loop16: 773#ifdef __ARMEB__ 774 mov lr, r3, lsr #8 775#else 776 mov lr, r3, lsl #8 777#endif 778 ldmdb r1!, {r3-r5, r12} 779#ifdef __ARMEB__ 780 orr lr, lr, r12, lsl #24 781 mov r12, r12, lsr #8 782 orr r12, r12, r5, lsl #24 783 mov r5, r5, lsr #8 784 orr r5, r5, r4, lsl #24 785 mov r4, r4, lsr #8 786 orr r4, r4, r3, lsl #24 787#else 788 orr lr, lr, r12, lsr #24 789 mov r12, r12, lsl #8 790 orr r12, r12, r5, lsr #24 791 mov r5, r5, lsl #8 792 orr r5, r5, r4, lsr #24 793 mov r4, r4, lsl #8 794 orr r4, r4, r3, lsr #24 795#endif 796 stmdb r0!, {r4, r5, r12, lr} 797 subs r2, r2, #0x10 798 bge .Lmemmove_bsrcul3loop16 799 ldmia sp!, {r4, r5, lr} 800 adds r2, r2, #0x0c 801 blt .Lmemmove_bsrcul3l4 802 803.Lmemmove_bsrcul3loop4: 804#ifdef __ARMEB__ 805 mov r12, r3, lsr #8 806#else 807 mov r12, r3, lsl #8 808#endif 809 ldr r3, [r1, #-4]! 810#ifdef __ARMEB__ 811 orr r12, r12, r3, lsl #24 812#else 813 orr r12, r12, r3, lsr #24 814#endif 815 str r12, [r0, #-4]! 816 subs r2, r2, #4 817 bge .Lmemmove_bsrcul3loop4 818 819.Lmemmove_bsrcul3l4: 820 add r1, r1, #3 821 b .Lmemmove_bl4 822 823.Lmemmove_bsrcul2: 824 cmp r2, #0x0c 825 blt .Lmemmove_bsrcul2loop4 826 sub r2, r2, #0x0c 827 stmdb sp!, {r4, r5, lr} 828 829.Lmemmove_bsrcul2loop16: 830#ifdef __ARMEB__ 831 mov lr, r3, lsr #16 832#else 833 mov lr, r3, lsl #16 834#endif 835 ldmdb r1!, {r3-r5, r12} 836#ifdef __ARMEB__ 837 orr lr, lr, r12, lsl #16 838 mov r12, r12, lsr #16 839 orr r12, r12, r5, lsl #16 840 mov r5, r5, lsr #16 841 orr r5, r5, r4, lsl #16 842 mov r4, r4, lsr #16 843 orr r4, r4, r3, lsl #16 844#else 845 orr lr, lr, r12, lsr #16 846 mov r12, r12, lsl #16 847 orr r12, r12, r5, lsr #16 848 mov r5, r5, lsl #16 849 orr r5, r5, r4, lsr #16 850 mov r4, r4, lsl #16 851 orr r4, r4, r3, lsr #16 852#endif 853 stmdb r0!, {r4, r5, r12, lr} 854 subs r2, r2, #0x10 855 bge .Lmemmove_bsrcul2loop16 856 ldmia sp!, {r4, r5, lr} 857 adds r2, r2, #0x0c 858 blt .Lmemmove_bsrcul2l4 859 860.Lmemmove_bsrcul2loop4: 861#ifdef __ARMEB__ 862 mov r12, r3, lsr #16 863#else 864 mov r12, r3, lsl #16 865#endif 866 ldr r3, [r1, #-4]! 867#ifdef __ARMEB__ 868 orr r12, r12, r3, lsl #16 869#else 870 orr r12, r12, r3, lsr #16 871#endif 872 str r12, [r0, #-4]! 873 subs r2, r2, #4 874 bge .Lmemmove_bsrcul2loop4 875 876.Lmemmove_bsrcul2l4: 877 add r1, r1, #2 878 b .Lmemmove_bl4 879 880.Lmemmove_bsrcul1: 881 cmp r2, #0x0c 882 blt .Lmemmove_bsrcul1loop4 883 sub r2, r2, #0x0c 884 stmdb sp!, {r4, r5, lr} 885 886.Lmemmove_bsrcul1loop32: 887#ifdef __ARMEB__ 888 mov lr, r3, lsr #24 889#else 890 mov lr, r3, lsl #24 891#endif 892 ldmdb r1!, {r3-r5, r12} 893#ifdef __ARMEB__ 894 orr lr, lr, r12, lsl #8 895 mov r12, r12, lsr #24 896 orr r12, r12, r5, lsl #8 897 mov r5, r5, lsr #24 898 orr r5, r5, r4, lsl #8 899 mov r4, r4, lsr #24 900 orr r4, r4, r3, lsl #8 901#else 902 orr lr, lr, r12, lsr #8 903 mov r12, r12, lsl #24 904 orr r12, r12, r5, lsr #8 905 mov r5, r5, lsl #24 906 orr r5, r5, r4, lsr #8 907 mov r4, r4, lsl #24 908 orr r4, r4, r3, lsr #8 909#endif 910 stmdb r0!, {r4, r5, r12, lr} 911 subs r2, r2, #0x10 912 bge .Lmemmove_bsrcul1loop32 913 ldmia sp!, {r4, r5, lr} 914 adds r2, r2, #0x0c 915 blt .Lmemmove_bsrcul1l4 916 917.Lmemmove_bsrcul1loop4: 918#ifdef __ARMEB__ 919 mov r12, r3, lsr #24 920#else 921 mov r12, r3, lsl #24 922#endif 923 ldr r3, [r1, #-4]! 924#ifdef __ARMEB__ 925 orr r12, r12, r3, lsl #8 926#else 927 orr r12, r12, r3, lsr #8 928#endif 929 str r12, [r0, #-4]! 930 subs r2, r2, #4 931 bge .Lmemmove_bsrcul1loop4 932 933.Lmemmove_bsrcul1l4: 934 add r1, r1, #1 935 b .Lmemmove_bl4 936EEND(memmove) 937END(bcopy) 938 939#if !defined(_ARM_ARCH_5E) 940ENTRY(memcpy) 941 /* save leaf functions having to store this away */ 942 /* Do not check arm_memcpy if we're running from flash */ 943#if defined(FLASHADDR) && defined(PHYSADDR) 944#if FLASHADDR > PHYSADDR 945 ldr r3, =FLASHADDR 946 cmp r3, pc 947 bls .Lnormal 948#else 949 ldr r3, =FLASHADDR 950 cmp r3, pc 951 bhi .Lnormal 952#endif 953#endif 954 ldr r3, .L_arm_memcpy 955 ldr r3, [r3] 956 cmp r3, #0 957 beq .Lnormal 958 ldr r3, .L_min_memcpy_size 959 ldr r3, [r3] 960 cmp r2, r3 961 blt .Lnormal 962 stmfd sp!, {r0-r2, r4, lr} 963 mov r3, #0 964 ldr r4, .L_arm_memcpy 965 mov lr, pc 966 ldr pc, [r4] 967 cmp r0, #0 968 ldmfd sp!, {r0-r2, r4, lr} 969 RETeq 970 971.Lnormal: 972 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 973 974 subs r2, r2, #4 975 blt .Lmemcpy_l4 /* less than 4 bytes */ 976 ands r12, r0, #3 977 bne .Lmemcpy_destul /* oh unaligned destination addr */ 978 ands r12, r1, #3 979 bne .Lmemcpy_srcul /* oh unaligned source addr */ 980 981.Lmemcpy_t8: 982 /* We have aligned source and destination */ 983 subs r2, r2, #8 984 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 985 subs r2, r2, #0x14 986 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 987 stmdb sp!, {r4} /* borrow r4 */ 988 989 /* blat 32 bytes at a time */ 990 /* XXX for really big copies perhaps we should use more registers */ 991.Lmemcpy_loop32: 992 ldmia r1!, {r3, r4, r12, lr} 993 stmia r0!, {r3, r4, r12, lr} 994 ldmia r1!, {r3, r4, r12, lr} 995 stmia r0!, {r3, r4, r12, lr} 996 subs r2, r2, #0x20 997 bge .Lmemcpy_loop32 998 999 cmn r2, #0x10 1000 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1001 stmiage r0!, {r3, r4, r12, lr} 1002 subge r2, r2, #0x10 1003 ldmia sp!, {r4} /* return r4 */ 1004 1005.Lmemcpy_l32: 1006 adds r2, r2, #0x14 1007 1008 /* blat 12 bytes at a time */ 1009.Lmemcpy_loop12: 1010 ldmiage r1!, {r3, r12, lr} 1011 stmiage r0!, {r3, r12, lr} 1012 subsge r2, r2, #0x0c 1013 bge .Lmemcpy_loop12 1014 1015.Lmemcpy_l12: 1016 adds r2, r2, #8 1017 blt .Lmemcpy_l4 1018 1019 subs r2, r2, #4 1020 ldrlt r3, [r1], #4 1021 strlt r3, [r0], #4 1022 ldmiage r1!, {r3, r12} 1023 stmiage r0!, {r3, r12} 1024 subge r2, r2, #4 1025 1026.Lmemcpy_l4: 1027 /* less than 4 bytes to go */ 1028 adds r2, r2, #4 1029#ifdef __APCS_26_ 1030 ldmiaeq sp!, {r0, pc}^ /* done */ 1031#else 1032 ldmiaeq sp!, {r0, pc} /* done */ 1033#endif 1034 /* copy the crud byte at a time */ 1035 cmp r2, #2 1036 ldrb r3, [r1], #1 1037 strb r3, [r0], #1 1038 ldrbge r3, [r1], #1 1039 strbge r3, [r0], #1 1040 ldrbgt r3, [r1], #1 1041 strbgt r3, [r0], #1 1042 ldmia sp!, {r0, pc} 1043 1044 /* erg - unaligned destination */ 1045.Lmemcpy_destul: 1046 rsb r12, r12, #4 1047 cmp r12, #2 1048 1049 /* align destination with byte copies */ 1050 ldrb r3, [r1], #1 1051 strb r3, [r0], #1 1052 ldrbge r3, [r1], #1 1053 strbge r3, [r0], #1 1054 ldrbgt r3, [r1], #1 1055 strbgt r3, [r0], #1 1056 subs r2, r2, r12 1057 blt .Lmemcpy_l4 /* less the 4 bytes */ 1058 1059 ands r12, r1, #3 1060 beq .Lmemcpy_t8 /* we have an aligned source */ 1061 1062 /* erg - unaligned source */ 1063 /* This is where it gets nasty ... */ 1064.Lmemcpy_srcul: 1065 bic r1, r1, #3 1066 ldr lr, [r1], #4 1067 cmp r12, #2 1068 bgt .Lmemcpy_srcul3 1069 beq .Lmemcpy_srcul2 1070 cmp r2, #0x0c 1071 blt .Lmemcpy_srcul1loop4 1072 sub r2, r2, #0x0c 1073 stmdb sp!, {r4, r5} 1074 1075.Lmemcpy_srcul1loop16: 1076 mov r3, lr, lsr #8 1077 ldmia r1!, {r4, r5, r12, lr} 1078 orr r3, r3, r4, lsl #24 1079 mov r4, r4, lsr #8 1080 orr r4, r4, r5, lsl #24 1081 mov r5, r5, lsr #8 1082 orr r5, r5, r12, lsl #24 1083 mov r12, r12, lsr #8 1084 orr r12, r12, lr, lsl #24 1085 stmia r0!, {r3-r5, r12} 1086 subs r2, r2, #0x10 1087 bge .Lmemcpy_srcul1loop16 1088 ldmia sp!, {r4, r5} 1089 adds r2, r2, #0x0c 1090 blt .Lmemcpy_srcul1l4 1091 1092.Lmemcpy_srcul1loop4: 1093 mov r12, lr, lsr #8 1094 ldr lr, [r1], #4 1095 orr r12, r12, lr, lsl #24 1096 str r12, [r0], #4 1097 subs r2, r2, #4 1098 bge .Lmemcpy_srcul1loop4 1099 1100.Lmemcpy_srcul1l4: 1101 sub r1, r1, #3 1102 b .Lmemcpy_l4 1103 1104.Lmemcpy_srcul2: 1105 cmp r2, #0x0c 1106 blt .Lmemcpy_srcul2loop4 1107 sub r2, r2, #0x0c 1108 stmdb sp!, {r4, r5} 1109 1110.Lmemcpy_srcul2loop16: 1111 mov r3, lr, lsr #16 1112 ldmia r1!, {r4, r5, r12, lr} 1113 orr r3, r3, r4, lsl #16 1114 mov r4, r4, lsr #16 1115 orr r4, r4, r5, lsl #16 1116 mov r5, r5, lsr #16 1117 orr r5, r5, r12, lsl #16 1118 mov r12, r12, lsr #16 1119 orr r12, r12, lr, lsl #16 1120 stmia r0!, {r3-r5, r12} 1121 subs r2, r2, #0x10 1122 bge .Lmemcpy_srcul2loop16 1123 ldmia sp!, {r4, r5} 1124 adds r2, r2, #0x0c 1125 blt .Lmemcpy_srcul2l4 1126 1127.Lmemcpy_srcul2loop4: 1128 mov r12, lr, lsr #16 1129 ldr lr, [r1], #4 1130 orr r12, r12, lr, lsl #16 1131 str r12, [r0], #4 1132 subs r2, r2, #4 1133 bge .Lmemcpy_srcul2loop4 1134 1135.Lmemcpy_srcul2l4: 1136 sub r1, r1, #2 1137 b .Lmemcpy_l4 1138 1139.Lmemcpy_srcul3: 1140 cmp r2, #0x0c 1141 blt .Lmemcpy_srcul3loop4 1142 sub r2, r2, #0x0c 1143 stmdb sp!, {r4, r5} 1144 1145.Lmemcpy_srcul3loop16: 1146 mov r3, lr, lsr #24 1147 ldmia r1!, {r4, r5, r12, lr} 1148 orr r3, r3, r4, lsl #8 1149 mov r4, r4, lsr #24 1150 orr r4, r4, r5, lsl #8 1151 mov r5, r5, lsr #24 1152 orr r5, r5, r12, lsl #8 1153 mov r12, r12, lsr #24 1154 orr r12, r12, lr, lsl #8 1155 stmia r0!, {r3-r5, r12} 1156 subs r2, r2, #0x10 1157 bge .Lmemcpy_srcul3loop16 1158 ldmia sp!, {r4, r5} 1159 adds r2, r2, #0x0c 1160 blt .Lmemcpy_srcul3l4 1161 1162.Lmemcpy_srcul3loop4: 1163 mov r12, lr, lsr #24 1164 ldr lr, [r1], #4 1165 orr r12, r12, lr, lsl #8 1166 str r12, [r0], #4 1167 subs r2, r2, #4 1168 bge .Lmemcpy_srcul3loop4 1169 1170.Lmemcpy_srcul3l4: 1171 sub r1, r1, #1 1172 b .Lmemcpy_l4 1173END(memcpy) 1174 1175#else 1176/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1177ENTRY(memcpy) 1178 pld [r1] 1179 cmp r2, #0x0c 1180 ble .Lmemcpy_short /* <= 12 bytes */ 1181#ifdef FLASHADDR 1182#if FLASHADDR > PHYSADDR 1183 ldr r3, =FLASHADDR 1184 cmp r3, pc 1185 bls .Lnormal 1186#else 1187 ldr r3, =FLASHADDR 1188 cmp r3, pc 1189 bhi .Lnormal 1190#endif 1191#endif 1192 ldr r3, .L_arm_memcpy 1193 ldr r3, [r3] 1194 cmp r3, #0 1195 beq .Lnormal 1196 ldr r3, .L_min_memcpy_size 1197 ldr r3, [r3] 1198 cmp r2, r3 1199 blt .Lnormal 1200 stmfd sp!, {r0-r2, r4, lr} 1201 mov r3, #0 1202 ldr r4, .L_arm_memcpy 1203 mov lr, pc 1204 ldr pc, [r4] 1205 cmp r0, #0 1206 ldmfd sp!, {r0-r2, r4, lr} 1207 RETeq 1208.Lnormal: 1209 mov r3, r0 /* We must not clobber r0 */ 1210 1211 /* Word-align the destination buffer */ 1212 ands ip, r3, #0x03 /* Already word aligned? */ 1213 beq .Lmemcpy_wordaligned /* Yup */ 1214 cmp ip, #0x02 1215 ldrb ip, [r1], #0x01 1216 sub r2, r2, #0x01 1217 strb ip, [r3], #0x01 1218 ldrble ip, [r1], #0x01 1219 suble r2, r2, #0x01 1220 strble ip, [r3], #0x01 1221 ldrblt ip, [r1], #0x01 1222 sublt r2, r2, #0x01 1223 strblt ip, [r3], #0x01 1224 1225 /* Destination buffer is now word aligned */ 1226.Lmemcpy_wordaligned: 1227 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1228 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1229 1230 /* Quad-align the destination buffer */ 1231 tst r3, #0x07 /* Already quad aligned? */ 1232 ldrne ip, [r1], #0x04 1233 stmfd sp!, {r4-r9} /* Free up some registers */ 1234 subne r2, r2, #0x04 1235 strne ip, [r3], #0x04 1236 1237 /* Destination buffer quad aligned, source is at least word aligned */ 1238 subs r2, r2, #0x80 1239 blt .Lmemcpy_w_lessthan128 1240 1241 /* Copy 128 bytes at a time */ 1242.Lmemcpy_w_loop128: 1243 ldr r4, [r1], #0x04 /* LD:00-03 */ 1244 ldr r5, [r1], #0x04 /* LD:04-07 */ 1245 pld [r1, #0x18] /* Prefetch 0x20 */ 1246 ldr r6, [r1], #0x04 /* LD:08-0b */ 1247 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1248 ldr r8, [r1], #0x04 /* LD:10-13 */ 1249 ldr r9, [r1], #0x04 /* LD:14-17 */ 1250 strd r4, [r3], #0x08 /* ST:00-07 */ 1251 ldr r4, [r1], #0x04 /* LD:18-1b */ 1252 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1253 strd r6, [r3], #0x08 /* ST:08-0f */ 1254 ldr r6, [r1], #0x04 /* LD:20-23 */ 1255 ldr r7, [r1], #0x04 /* LD:24-27 */ 1256 pld [r1, #0x18] /* Prefetch 0x40 */ 1257 strd r8, [r3], #0x08 /* ST:10-17 */ 1258 ldr r8, [r1], #0x04 /* LD:28-2b */ 1259 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1260 strd r4, [r3], #0x08 /* ST:18-1f */ 1261 ldr r4, [r1], #0x04 /* LD:30-33 */ 1262 ldr r5, [r1], #0x04 /* LD:34-37 */ 1263 strd r6, [r3], #0x08 /* ST:20-27 */ 1264 ldr r6, [r1], #0x04 /* LD:38-3b */ 1265 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1266 strd r8, [r3], #0x08 /* ST:28-2f */ 1267 ldr r8, [r1], #0x04 /* LD:40-43 */ 1268 ldr r9, [r1], #0x04 /* LD:44-47 */ 1269 pld [r1, #0x18] /* Prefetch 0x60 */ 1270 strd r4, [r3], #0x08 /* ST:30-37 */ 1271 ldr r4, [r1], #0x04 /* LD:48-4b */ 1272 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1273 strd r6, [r3], #0x08 /* ST:38-3f */ 1274 ldr r6, [r1], #0x04 /* LD:50-53 */ 1275 ldr r7, [r1], #0x04 /* LD:54-57 */ 1276 strd r8, [r3], #0x08 /* ST:40-47 */ 1277 ldr r8, [r1], #0x04 /* LD:58-5b */ 1278 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1279 strd r4, [r3], #0x08 /* ST:48-4f */ 1280 ldr r4, [r1], #0x04 /* LD:60-63 */ 1281 ldr r5, [r1], #0x04 /* LD:64-67 */ 1282 pld [r1, #0x18] /* Prefetch 0x80 */ 1283 strd r6, [r3], #0x08 /* ST:50-57 */ 1284 ldr r6, [r1], #0x04 /* LD:68-6b */ 1285 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1286 strd r8, [r3], #0x08 /* ST:58-5f */ 1287 ldr r8, [r1], #0x04 /* LD:70-73 */ 1288 ldr r9, [r1], #0x04 /* LD:74-77 */ 1289 strd r4, [r3], #0x08 /* ST:60-67 */ 1290 ldr r4, [r1], #0x04 /* LD:78-7b */ 1291 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1292 strd r6, [r3], #0x08 /* ST:68-6f */ 1293 strd r8, [r3], #0x08 /* ST:70-77 */ 1294 subs r2, r2, #0x80 1295 strd r4, [r3], #0x08 /* ST:78-7f */ 1296 bge .Lmemcpy_w_loop128 1297 1298.Lmemcpy_w_lessthan128: 1299 adds r2, r2, #0x80 /* Adjust for extra sub */ 1300 ldmfdeq sp!, {r4-r9} 1301 RETeq /* Return now if done */ 1302 subs r2, r2, #0x20 1303 blt .Lmemcpy_w_lessthan32 1304 1305 /* Copy 32 bytes at a time */ 1306.Lmemcpy_w_loop32: 1307 ldr r4, [r1], #0x04 1308 ldr r5, [r1], #0x04 1309 pld [r1, #0x18] 1310 ldr r6, [r1], #0x04 1311 ldr r7, [r1], #0x04 1312 ldr r8, [r1], #0x04 1313 ldr r9, [r1], #0x04 1314 strd r4, [r3], #0x08 1315 ldr r4, [r1], #0x04 1316 ldr r5, [r1], #0x04 1317 strd r6, [r3], #0x08 1318 strd r8, [r3], #0x08 1319 subs r2, r2, #0x20 1320 strd r4, [r3], #0x08 1321 bge .Lmemcpy_w_loop32 1322 1323.Lmemcpy_w_lessthan32: 1324 adds r2, r2, #0x20 /* Adjust for extra sub */ 1325 ldmfdeq sp!, {r4-r9} 1326 RETeq /* Return now if done */ 1327 1328 and r4, r2, #0x18 1329 rsbs r4, r4, #0x18 1330 addne pc, pc, r4, lsl #1 1331 nop 1332 1333 /* At least 24 bytes remaining */ 1334 ldr r4, [r1], #0x04 1335 ldr r5, [r1], #0x04 1336 sub r2, r2, #0x08 1337 strd r4, [r3], #0x08 1338 1339 /* At least 16 bytes remaining */ 1340 ldr r4, [r1], #0x04 1341 ldr r5, [r1], #0x04 1342 sub r2, r2, #0x08 1343 strd r4, [r3], #0x08 1344 1345 /* At least 8 bytes remaining */ 1346 ldr r4, [r1], #0x04 1347 ldr r5, [r1], #0x04 1348 subs r2, r2, #0x08 1349 strd r4, [r3], #0x08 1350 1351 /* Less than 8 bytes remaining */ 1352 ldmfd sp!, {r4-r9} 1353 RETeq /* Return now if done */ 1354 subs r2, r2, #0x04 1355 ldrge ip, [r1], #0x04 1356 strge ip, [r3], #0x04 1357 RETeq /* Return now if done */ 1358 addlt r2, r2, #0x04 1359 ldrb ip, [r1], #0x01 1360 cmp r2, #0x02 1361 ldrbge r2, [r1], #0x01 1362 strb ip, [r3], #0x01 1363 ldrbgt ip, [r1] 1364 strbge r2, [r3], #0x01 1365 strbgt ip, [r3] 1366 RET 1367/* Place a literal pool here for the above ldr instructions to use */ 1368.ltorg 1369 1370 1371/* 1372 * At this point, it has not been possible to word align both buffers. 1373 * The destination buffer is word aligned, but the source buffer is not. 1374 */ 1375.Lmemcpy_bad_align: 1376 stmfd sp!, {r4-r7} 1377 bic r1, r1, #0x03 1378 cmp ip, #2 1379 ldr ip, [r1], #0x04 1380 bgt .Lmemcpy_bad3 1381 beq .Lmemcpy_bad2 1382 b .Lmemcpy_bad1 1383 1384.Lmemcpy_bad1_loop16: 1385#ifdef __ARMEB__ 1386 mov r4, ip, lsl #8 1387#else 1388 mov r4, ip, lsr #8 1389#endif 1390 ldr r5, [r1], #0x04 1391 pld [r1, #0x018] 1392 ldr r6, [r1], #0x04 1393 ldr r7, [r1], #0x04 1394 ldr ip, [r1], #0x04 1395#ifdef __ARMEB__ 1396 orr r4, r4, r5, lsr #24 1397 mov r5, r5, lsl #8 1398 orr r5, r5, r6, lsr #24 1399 mov r6, r6, lsl #8 1400 orr r6, r6, r7, lsr #24 1401 mov r7, r7, lsl #8 1402 orr r7, r7, ip, lsr #24 1403#else 1404 orr r4, r4, r5, lsl #24 1405 mov r5, r5, lsr #8 1406 orr r5, r5, r6, lsl #24 1407 mov r6, r6, lsr #8 1408 orr r6, r6, r7, lsl #24 1409 mov r7, r7, lsr #8 1410 orr r7, r7, ip, lsl #24 1411#endif 1412 str r4, [r3], #0x04 1413 str r5, [r3], #0x04 1414 str r6, [r3], #0x04 1415 str r7, [r3], #0x04 1416.Lmemcpy_bad1: 1417 subs r2, r2, #0x10 1418 bge .Lmemcpy_bad1_loop16 1419 1420 adds r2, r2, #0x10 1421 ldmfdeq sp!, {r4-r7} 1422 RETeq /* Return now if done */ 1423 subs r2, r2, #0x04 1424 sublt r1, r1, #0x03 1425 blt .Lmemcpy_bad_done 1426 1427.Lmemcpy_bad1_loop4: 1428#ifdef __ARMEB__ 1429 mov r4, ip, lsl #8 1430#else 1431 mov r4, ip, lsr #8 1432#endif 1433 ldr ip, [r1], #0x04 1434 subs r2, r2, #0x04 1435#ifdef __ARMEB__ 1436 orr r4, r4, ip, lsr #24 1437#else 1438 orr r4, r4, ip, lsl #24 1439#endif 1440 str r4, [r3], #0x04 1441 bge .Lmemcpy_bad1_loop4 1442 sub r1, r1, #0x03 1443 b .Lmemcpy_bad_done 1444 1445.Lmemcpy_bad2_loop16: 1446#ifdef __ARMEB__ 1447 mov r4, ip, lsl #16 1448#else 1449 mov r4, ip, lsr #16 1450#endif 1451 ldr r5, [r1], #0x04 1452 pld [r1, #0x018] 1453 ldr r6, [r1], #0x04 1454 ldr r7, [r1], #0x04 1455 ldr ip, [r1], #0x04 1456#ifdef __ARMEB__ 1457 orr r4, r4, r5, lsr #16 1458 mov r5, r5, lsl #16 1459 orr r5, r5, r6, lsr #16 1460 mov r6, r6, lsl #16 1461 orr r6, r6, r7, lsr #16 1462 mov r7, r7, lsl #16 1463 orr r7, r7, ip, lsr #16 1464#else 1465 orr r4, r4, r5, lsl #16 1466 mov r5, r5, lsr #16 1467 orr r5, r5, r6, lsl #16 1468 mov r6, r6, lsr #16 1469 orr r6, r6, r7, lsl #16 1470 mov r7, r7, lsr #16 1471 orr r7, r7, ip, lsl #16 1472#endif 1473 str r4, [r3], #0x04 1474 str r5, [r3], #0x04 1475 str r6, [r3], #0x04 1476 str r7, [r3], #0x04 1477.Lmemcpy_bad2: 1478 subs r2, r2, #0x10 1479 bge .Lmemcpy_bad2_loop16 1480 1481 adds r2, r2, #0x10 1482 ldmfdeq sp!, {r4-r7} 1483 RETeq /* Return now if done */ 1484 subs r2, r2, #0x04 1485 sublt r1, r1, #0x02 1486 blt .Lmemcpy_bad_done 1487 1488.Lmemcpy_bad2_loop4: 1489#ifdef __ARMEB__ 1490 mov r4, ip, lsl #16 1491#else 1492 mov r4, ip, lsr #16 1493#endif 1494 ldr ip, [r1], #0x04 1495 subs r2, r2, #0x04 1496#ifdef __ARMEB__ 1497 orr r4, r4, ip, lsr #16 1498#else 1499 orr r4, r4, ip, lsl #16 1500#endif 1501 str r4, [r3], #0x04 1502 bge .Lmemcpy_bad2_loop4 1503 sub r1, r1, #0x02 1504 b .Lmemcpy_bad_done 1505 1506.Lmemcpy_bad3_loop16: 1507#ifdef __ARMEB__ 1508 mov r4, ip, lsl #24 1509#else 1510 mov r4, ip, lsr #24 1511#endif 1512 ldr r5, [r1], #0x04 1513 pld [r1, #0x018] 1514 ldr r6, [r1], #0x04 1515 ldr r7, [r1], #0x04 1516 ldr ip, [r1], #0x04 1517#ifdef __ARMEB__ 1518 orr r4, r4, r5, lsr #8 1519 mov r5, r5, lsl #24 1520 orr r5, r5, r6, lsr #8 1521 mov r6, r6, lsl #24 1522 orr r6, r6, r7, lsr #8 1523 mov r7, r7, lsl #24 1524 orr r7, r7, ip, lsr #8 1525#else 1526 orr r4, r4, r5, lsl #8 1527 mov r5, r5, lsr #24 1528 orr r5, r5, r6, lsl #8 1529 mov r6, r6, lsr #24 1530 orr r6, r6, r7, lsl #8 1531 mov r7, r7, lsr #24 1532 orr r7, r7, ip, lsl #8 1533#endif 1534 str r4, [r3], #0x04 1535 str r5, [r3], #0x04 1536 str r6, [r3], #0x04 1537 str r7, [r3], #0x04 1538.Lmemcpy_bad3: 1539 subs r2, r2, #0x10 1540 bge .Lmemcpy_bad3_loop16 1541 1542 adds r2, r2, #0x10 1543 ldmfdeq sp!, {r4-r7} 1544 RETeq /* Return now if done */ 1545 subs r2, r2, #0x04 1546 sublt r1, r1, #0x01 1547 blt .Lmemcpy_bad_done 1548 1549.Lmemcpy_bad3_loop4: 1550#ifdef __ARMEB__ 1551 mov r4, ip, lsl #24 1552#else 1553 mov r4, ip, lsr #24 1554#endif 1555 ldr ip, [r1], #0x04 1556 subs r2, r2, #0x04 1557#ifdef __ARMEB__ 1558 orr r4, r4, ip, lsr #8 1559#else 1560 orr r4, r4, ip, lsl #8 1561#endif 1562 str r4, [r3], #0x04 1563 bge .Lmemcpy_bad3_loop4 1564 sub r1, r1, #0x01 1565 1566.Lmemcpy_bad_done: 1567 ldmfd sp!, {r4-r7} 1568 adds r2, r2, #0x04 1569 RETeq 1570 ldrb ip, [r1], #0x01 1571 cmp r2, #0x02 1572 ldrbge r2, [r1], #0x01 1573 strb ip, [r3], #0x01 1574 ldrbgt ip, [r1] 1575 strbge r2, [r3], #0x01 1576 strbgt ip, [r3] 1577 RET 1578 1579 1580/* 1581 * Handle short copies (less than 16 bytes), possibly misaligned. 1582 * Some of these are *very* common, thanks to the network stack, 1583 * and so are handled specially. 1584 */ 1585.Lmemcpy_short: 1586 add pc, pc, r2, lsl #2 1587 nop 1588 RET /* 0x00 */ 1589 b .Lmemcpy_bytewise /* 0x01 */ 1590 b .Lmemcpy_bytewise /* 0x02 */ 1591 b .Lmemcpy_bytewise /* 0x03 */ 1592 b .Lmemcpy_4 /* 0x04 */ 1593 b .Lmemcpy_bytewise /* 0x05 */ 1594 b .Lmemcpy_6 /* 0x06 */ 1595 b .Lmemcpy_bytewise /* 0x07 */ 1596 b .Lmemcpy_8 /* 0x08 */ 1597 b .Lmemcpy_bytewise /* 0x09 */ 1598 b .Lmemcpy_bytewise /* 0x0a */ 1599 b .Lmemcpy_bytewise /* 0x0b */ 1600 b .Lmemcpy_c /* 0x0c */ 1601.Lmemcpy_bytewise: 1602 mov r3, r0 /* We must not clobber r0 */ 1603 ldrb ip, [r1], #0x01 16041: subs r2, r2, #0x01 1605 strb ip, [r3], #0x01 1606 ldrbne ip, [r1], #0x01 1607 bne 1b 1608 RET 1609 1610/****************************************************************************** 1611 * Special case for 4 byte copies 1612 */ 1613#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1614#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1615 LMEMCPY_4_PAD 1616.Lmemcpy_4: 1617 and r2, r1, #0x03 1618 orr r2, r2, r0, lsl #2 1619 ands r2, r2, #0x0f 1620 sub r3, pc, #0x14 1621 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1622 1623/* 1624 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1625 */ 1626 ldr r2, [r1] 1627 str r2, [r0] 1628 RET 1629 LMEMCPY_4_PAD 1630 1631/* 1632 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1633 */ 1634 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1635 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1636#ifdef __ARMEB__ 1637 mov r3, r3, lsl #8 /* r3 = 012. */ 1638 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1639#else 1640 mov r3, r3, lsr #8 /* r3 = .210 */ 1641 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1642#endif 1643 str r3, [r0] 1644 RET 1645 LMEMCPY_4_PAD 1646 1647/* 1648 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1649 */ 1650#ifdef __ARMEB__ 1651 ldrh r3, [r1] 1652 ldrh r2, [r1, #0x02] 1653#else 1654 ldrh r3, [r1, #0x02] 1655 ldrh r2, [r1] 1656#endif 1657 orr r3, r2, r3, lsl #16 1658 str r3, [r0] 1659 RET 1660 LMEMCPY_4_PAD 1661 1662/* 1663 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1664 */ 1665 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1666 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1667#ifdef __ARMEB__ 1668 mov r3, r3, lsl #24 /* r3 = 0... */ 1669 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1670#else 1671 mov r3, r3, lsr #24 /* r3 = ...0 */ 1672 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1673#endif 1674 str r3, [r0] 1675 RET 1676 LMEMCPY_4_PAD 1677 1678/* 1679 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1680 */ 1681 ldr r2, [r1] 1682#ifdef __ARMEB__ 1683 strb r2, [r0, #0x03] 1684 mov r3, r2, lsr #8 1685 mov r1, r2, lsr #24 1686 strb r1, [r0] 1687#else 1688 strb r2, [r0] 1689 mov r3, r2, lsr #8 1690 mov r1, r2, lsr #24 1691 strb r1, [r0, #0x03] 1692#endif 1693 strh r3, [r0, #0x01] 1694 RET 1695 LMEMCPY_4_PAD 1696 1697/* 1698 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1699 */ 1700 ldrb r2, [r1] 1701 ldrh r3, [r1, #0x01] 1702 ldrb r1, [r1, #0x03] 1703 strb r2, [r0] 1704 strh r3, [r0, #0x01] 1705 strb r1, [r0, #0x03] 1706 RET 1707 LMEMCPY_4_PAD 1708 1709/* 1710 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1711 */ 1712 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1713 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1714#ifdef __ARMEB__ 1715 mov r1, r2, lsr #8 /* r1 = ...0 */ 1716 strb r1, [r0] 1717 mov r2, r2, lsl #8 /* r2 = .01. */ 1718 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1719#else 1720 strb r2, [r0] 1721 mov r2, r2, lsr #8 /* r2 = ...1 */ 1722 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1723 mov r3, r3, lsr #8 /* r3 = ...3 */ 1724#endif 1725 strh r2, [r0, #0x01] 1726 strb r3, [r0, #0x03] 1727 RET 1728 LMEMCPY_4_PAD 1729 1730/* 1731 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1732 */ 1733 ldrb r2, [r1] 1734 ldrh r3, [r1, #0x01] 1735 ldrb r1, [r1, #0x03] 1736 strb r2, [r0] 1737 strh r3, [r0, #0x01] 1738 strb r1, [r0, #0x03] 1739 RET 1740 LMEMCPY_4_PAD 1741 1742/* 1743 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1744 */ 1745 ldr r2, [r1] 1746#ifdef __ARMEB__ 1747 strh r2, [r0, #0x02] 1748 mov r3, r2, lsr #16 1749 strh r3, [r0] 1750#else 1751 strh r2, [r0] 1752 mov r3, r2, lsr #16 1753 strh r3, [r0, #0x02] 1754#endif 1755 RET 1756 LMEMCPY_4_PAD 1757 1758/* 1759 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1760 */ 1761 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1762 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1763 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1764 strh r1, [r0] 1765#ifdef __ARMEB__ 1766 mov r2, r2, lsl #8 /* r2 = 012. */ 1767 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1768#else 1769 mov r2, r2, lsr #24 /* r2 = ...2 */ 1770 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1771#endif 1772 strh r2, [r0, #0x02] 1773 RET 1774 LMEMCPY_4_PAD 1775 1776/* 1777 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1778 */ 1779 ldrh r2, [r1] 1780 ldrh r3, [r1, #0x02] 1781 strh r2, [r0] 1782 strh r3, [r0, #0x02] 1783 RET 1784 LMEMCPY_4_PAD 1785 1786/* 1787 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1788 */ 1789 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1790 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1791 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1792 strh r1, [r0, #0x02] 1793#ifdef __ARMEB__ 1794 mov r3, r3, lsr #24 /* r3 = ...1 */ 1795 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1796#else 1797 mov r3, r3, lsl #8 /* r3 = 321. */ 1798 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1799#endif 1800 strh r3, [r0] 1801 RET 1802 LMEMCPY_4_PAD 1803 1804/* 1805 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1806 */ 1807 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1808#ifdef __ARMEB__ 1809 strb r2, [r0, #0x03] 1810 mov r3, r2, lsr #8 1811 mov r1, r2, lsr #24 1812 strh r3, [r0, #0x01] 1813 strb r1, [r0] 1814#else 1815 strb r2, [r0] 1816 mov r3, r2, lsr #8 1817 mov r1, r2, lsr #24 1818 strh r3, [r0, #0x01] 1819 strb r1, [r0, #0x03] 1820#endif 1821 RET 1822 LMEMCPY_4_PAD 1823 1824/* 1825 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1826 */ 1827 ldrb r2, [r1] 1828 ldrh r3, [r1, #0x01] 1829 ldrb r1, [r1, #0x03] 1830 strb r2, [r0] 1831 strh r3, [r0, #0x01] 1832 strb r1, [r0, #0x03] 1833 RET 1834 LMEMCPY_4_PAD 1835 1836/* 1837 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1838 */ 1839#ifdef __ARMEB__ 1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1841 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1842 strb r3, [r0, #0x03] 1843 mov r3, r3, lsr #8 /* r3 = ...2 */ 1844 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1845 strh r3, [r0, #0x01] 1846 mov r2, r2, lsr #8 /* r2 = ...0 */ 1847 strb r2, [r0] 1848#else 1849 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1850 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1851 strb r2, [r0] 1852 mov r2, r2, lsr #8 /* r2 = ...1 */ 1853 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1854 strh r2, [r0, #0x01] 1855 mov r3, r3, lsr #8 /* r3 = ...3 */ 1856 strb r3, [r0, #0x03] 1857#endif 1858 RET 1859 LMEMCPY_4_PAD 1860 1861/* 1862 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1863 */ 1864 ldrb r2, [r1] 1865 ldrh r3, [r1, #0x01] 1866 ldrb r1, [r1, #0x03] 1867 strb r2, [r0] 1868 strh r3, [r0, #0x01] 1869 strb r1, [r0, #0x03] 1870 RET 1871 LMEMCPY_4_PAD 1872 1873 1874/****************************************************************************** 1875 * Special case for 6 byte copies 1876 */ 1877#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1878#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1879 LMEMCPY_6_PAD 1880.Lmemcpy_6: 1881 and r2, r1, #0x03 1882 orr r2, r2, r0, lsl #2 1883 ands r2, r2, #0x0f 1884 sub r3, pc, #0x14 1885 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1886 1887/* 1888 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1889 */ 1890 ldr r2, [r1] 1891 ldrh r3, [r1, #0x04] 1892 str r2, [r0] 1893 strh r3, [r0, #0x04] 1894 RET 1895 LMEMCPY_6_PAD 1896 1897/* 1898 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1899 */ 1900 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1901 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1902#ifdef __ARMEB__ 1903 mov r2, r2, lsl #8 /* r2 = 012. */ 1904 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1905#else 1906 mov r2, r2, lsr #8 /* r2 = .210 */ 1907 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1908#endif 1909 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1910 str r2, [r0] 1911 strh r3, [r0, #0x04] 1912 RET 1913 LMEMCPY_6_PAD 1914 1915/* 1916 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1917 */ 1918 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1919 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1920#ifdef __ARMEB__ 1921 mov r1, r3, lsr #16 /* r1 = ..23 */ 1922 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1923 str r1, [r0] 1924 strh r3, [r0, #0x04] 1925#else 1926 mov r1, r3, lsr #16 /* r1 = ..54 */ 1927 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1928 str r2, [r0] 1929 strh r1, [r0, #0x04] 1930#endif 1931 RET 1932 LMEMCPY_6_PAD 1933 1934/* 1935 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1936 */ 1937 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1938 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1939 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1940#ifdef __ARMEB__ 1941 mov r2, r2, lsl #24 /* r2 = 0... */ 1942 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1943 mov r3, r3, lsl #8 /* r3 = 234. */ 1944 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1945#else 1946 mov r2, r2, lsr #24 /* r2 = ...0 */ 1947 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1948 mov r1, r1, lsl #8 /* r1 = xx5. */ 1949 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1950#endif 1951 str r2, [r0] 1952 strh r1, [r0, #0x04] 1953 RET 1954 LMEMCPY_6_PAD 1955 1956/* 1957 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1958 */ 1959 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1960 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1961 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1962 strh r1, [r0, #0x01] 1963#ifdef __ARMEB__ 1964 mov r1, r3, lsr #24 /* r1 = ...0 */ 1965 strb r1, [r0] 1966 mov r3, r3, lsl #8 /* r3 = 123. */ 1967 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1968#else 1969 strb r3, [r0] 1970 mov r3, r3, lsr #24 /* r3 = ...3 */ 1971 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1972 mov r2, r2, lsr #8 /* r2 = ...5 */ 1973#endif 1974 strh r3, [r0, #0x03] 1975 strb r2, [r0, #0x05] 1976 RET 1977 LMEMCPY_6_PAD 1978 1979/* 1980 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1981 */ 1982 ldrb r2, [r1] 1983 ldrh r3, [r1, #0x01] 1984 ldrh ip, [r1, #0x03] 1985 ldrb r1, [r1, #0x05] 1986 strb r2, [r0] 1987 strh r3, [r0, #0x01] 1988 strh ip, [r0, #0x03] 1989 strb r1, [r0, #0x05] 1990 RET 1991 LMEMCPY_6_PAD 1992 1993/* 1994 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1995 */ 1996 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1997 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1998#ifdef __ARMEB__ 1999 mov r3, r2, lsr #8 /* r3 = ...0 */ 2000 strb r3, [r0] 2001 strb r1, [r0, #0x05] 2002 mov r3, r1, lsr #8 /* r3 = .234 */ 2003 strh r3, [r0, #0x03] 2004 mov r3, r2, lsl #8 /* r3 = .01. */ 2005 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2006 strh r3, [r0, #0x01] 2007#else 2008 strb r2, [r0] 2009 mov r3, r1, lsr #24 2010 strb r3, [r0, #0x05] 2011 mov r3, r1, lsr #8 /* r3 = .543 */ 2012 strh r3, [r0, #0x03] 2013 mov r3, r2, lsr #8 /* r3 = ...1 */ 2014 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2015 strh r3, [r0, #0x01] 2016#endif 2017 RET 2018 LMEMCPY_6_PAD 2019 2020/* 2021 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2022 */ 2023 ldrb r2, [r1] 2024 ldrh r3, [r1, #0x01] 2025 ldrh ip, [r1, #0x03] 2026 ldrb r1, [r1, #0x05] 2027 strb r2, [r0] 2028 strh r3, [r0, #0x01] 2029 strh ip, [r0, #0x03] 2030 strb r1, [r0, #0x05] 2031 RET 2032 LMEMCPY_6_PAD 2033 2034/* 2035 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2036 */ 2037#ifdef __ARMEB__ 2038 ldr r2, [r1] /* r2 = 0123 */ 2039 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2040 mov r1, r2, lsr #16 /* r1 = ..01 */ 2041 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2042 strh r1, [r0] 2043 str r3, [r0, #0x02] 2044#else 2045 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2046 ldr r3, [r1] /* r3 = 3210 */ 2047 mov r2, r2, lsl #16 /* r2 = 54.. */ 2048 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2049 strh r3, [r0] 2050 str r2, [r0, #0x02] 2051#endif 2052 RET 2053 LMEMCPY_6_PAD 2054 2055/* 2056 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2057 */ 2058 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2059 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2060 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2061#ifdef __ARMEB__ 2062 mov r2, r2, lsr #8 /* r2 = .345 */ 2063 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2064#else 2065 mov r2, r2, lsl #8 /* r2 = 543. */ 2066 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2067#endif 2068 strh r1, [r0] 2069 str r2, [r0, #0x02] 2070 RET 2071 LMEMCPY_6_PAD 2072 2073/* 2074 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2075 */ 2076 ldrh r2, [r1] 2077 ldr r3, [r1, #0x02] 2078 strh r2, [r0] 2079 str r3, [r0, #0x02] 2080 RET 2081 LMEMCPY_6_PAD 2082 2083/* 2084 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2085 */ 2086 ldrb r3, [r1] /* r3 = ...0 */ 2087 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2088 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2089#ifdef __ARMEB__ 2090 mov r3, r3, lsl #8 /* r3 = ..0. */ 2091 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2092 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2093#else 2094 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2095 mov r1, r1, lsl #24 /* r1 = 5... */ 2096 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2097#endif 2098 strh r3, [r0] 2099 str r1, [r0, #0x02] 2100 RET 2101 LMEMCPY_6_PAD 2102 2103/* 2104 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2105 */ 2106 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2107 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2108#ifdef __ARMEB__ 2109 mov r3, r2, lsr #24 /* r3 = ...0 */ 2110 strb r3, [r0] 2111 mov r2, r2, lsl #8 /* r2 = 123. */ 2112 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2113#else 2114 strb r2, [r0] 2115 mov r2, r2, lsr #8 /* r2 = .321 */ 2116 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2117 mov r1, r1, lsr #8 /* r1 = ...5 */ 2118#endif 2119 str r2, [r0, #0x01] 2120 strb r1, [r0, #0x05] 2121 RET 2122 LMEMCPY_6_PAD 2123 2124/* 2125 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2126 */ 2127 ldrb r2, [r1] 2128 ldrh r3, [r1, #0x01] 2129 ldrh ip, [r1, #0x03] 2130 ldrb r1, [r1, #0x05] 2131 strb r2, [r0] 2132 strh r3, [r0, #0x01] 2133 strh ip, [r0, #0x03] 2134 strb r1, [r0, #0x05] 2135 RET 2136 LMEMCPY_6_PAD 2137 2138/* 2139 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2140 */ 2141 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2142 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2143#ifdef __ARMEB__ 2144 mov r3, r2, lsr #8 /* r3 = ...0 */ 2145 strb r3, [r0] 2146 mov r2, r2, lsl #24 /* r2 = 1... */ 2147 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2148#else 2149 strb r2, [r0] 2150 mov r2, r2, lsr #8 /* r2 = ...1 */ 2151 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2152 mov r1, r1, lsr #24 /* r1 = ...5 */ 2153#endif 2154 str r2, [r0, #0x01] 2155 strb r1, [r0, #0x05] 2156 RET 2157 LMEMCPY_6_PAD 2158 2159/* 2160 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2161 */ 2162 ldrb r2, [r1] 2163 ldr r3, [r1, #0x01] 2164 ldrb r1, [r1, #0x05] 2165 strb r2, [r0] 2166 str r3, [r0, #0x01] 2167 strb r1, [r0, #0x05] 2168 RET 2169 LMEMCPY_6_PAD 2170 2171 2172/****************************************************************************** 2173 * Special case for 8 byte copies 2174 */ 2175#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2176#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2177 LMEMCPY_8_PAD 2178.Lmemcpy_8: 2179 and r2, r1, #0x03 2180 orr r2, r2, r0, lsl #2 2181 ands r2, r2, #0x0f 2182 sub r3, pc, #0x14 2183 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2184 2185/* 2186 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2187 */ 2188 ldr r2, [r1] 2189 ldr r3, [r1, #0x04] 2190 str r2, [r0] 2191 str r3, [r0, #0x04] 2192 RET 2193 LMEMCPY_8_PAD 2194 2195/* 2196 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2197 */ 2198 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2199 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2200 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2201#ifdef __ARMEB__ 2202 mov r3, r3, lsl #8 /* r3 = 012. */ 2203 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2204 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2205#else 2206 mov r3, r3, lsr #8 /* r3 = .210 */ 2207 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2208 mov r1, r1, lsl #24 /* r1 = 7... */ 2209 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2210#endif 2211 str r3, [r0] 2212 str r2, [r0, #0x04] 2213 RET 2214 LMEMCPY_8_PAD 2215 2216/* 2217 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2218 */ 2219 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2220 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2221 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2222#ifdef __ARMEB__ 2223 mov r2, r2, lsl #16 /* r2 = 01.. */ 2224 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2225 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2226#else 2227 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2228 mov r3, r3, lsr #16 /* r3 = ..54 */ 2229 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2230#endif 2231 str r2, [r0] 2232 str r3, [r0, #0x04] 2233 RET 2234 LMEMCPY_8_PAD 2235 2236/* 2237 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2238 */ 2239 ldrb r3, [r1] /* r3 = ...0 */ 2240 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2241 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2242#ifdef __ARMEB__ 2243 mov r3, r3, lsl #24 /* r3 = 0... */ 2244 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2245 mov r2, r2, lsl #24 /* r2 = 4... */ 2246 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2247#else 2248 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2249 mov r2, r2, lsr #24 /* r2 = ...4 */ 2250 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2251#endif 2252 str r3, [r0] 2253 str r2, [r0, #0x04] 2254 RET 2255 LMEMCPY_8_PAD 2256 2257/* 2258 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2259 */ 2260 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2261 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2262#ifdef __ARMEB__ 2263 mov r1, r3, lsr #24 /* r1 = ...0 */ 2264 strb r1, [r0] 2265 mov r1, r3, lsr #8 /* r1 = .012 */ 2266 strb r2, [r0, #0x07] 2267 mov r3, r3, lsl #24 /* r3 = 3... */ 2268 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2269#else 2270 strb r3, [r0] 2271 mov r1, r2, lsr #24 /* r1 = ...7 */ 2272 strb r1, [r0, #0x07] 2273 mov r1, r3, lsr #8 /* r1 = .321 */ 2274 mov r3, r3, lsr #24 /* r3 = ...3 */ 2275 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2276#endif 2277 strh r1, [r0, #0x01] 2278 str r3, [r0, #0x03] 2279 RET 2280 LMEMCPY_8_PAD 2281 2282/* 2283 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2284 */ 2285 ldrb r2, [r1] 2286 ldrh r3, [r1, #0x01] 2287 ldr ip, [r1, #0x03] 2288 ldrb r1, [r1, #0x07] 2289 strb r2, [r0] 2290 strh r3, [r0, #0x01] 2291 str ip, [r0, #0x03] 2292 strb r1, [r0, #0x07] 2293 RET 2294 LMEMCPY_8_PAD 2295 2296/* 2297 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2298 */ 2299 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2300 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2301 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2302#ifdef __ARMEB__ 2303 mov ip, r2, lsr #8 /* ip = ...0 */ 2304 strb ip, [r0] 2305 mov ip, r2, lsl #8 /* ip = .01. */ 2306 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2307 strb r1, [r0, #0x07] 2308 mov r3, r3, lsl #8 /* r3 = 345. */ 2309 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2310#else 2311 strb r2, [r0] /* 0 */ 2312 mov ip, r1, lsr #8 /* ip = ...7 */ 2313 strb ip, [r0, #0x07] /* 7 */ 2314 mov ip, r2, lsr #8 /* ip = ...1 */ 2315 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2316 mov r3, r3, lsr #8 /* r3 = .543 */ 2317 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2318#endif 2319 strh ip, [r0, #0x01] 2320 str r3, [r0, #0x03] 2321 RET 2322 LMEMCPY_8_PAD 2323 2324/* 2325 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2326 */ 2327 ldrb r3, [r1] /* r3 = ...0 */ 2328 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2329 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2330 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2331 strb r3, [r0] 2332 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2333#ifdef __ARMEB__ 2334 strh r3, [r0, #0x01] 2335 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2336#else 2337 strh ip, [r0, #0x01] 2338 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2339#endif 2340 str r2, [r0, #0x03] 2341 strb r1, [r0, #0x07] 2342 RET 2343 LMEMCPY_8_PAD 2344 2345/* 2346 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2347 */ 2348 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2349 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2350 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2351#ifdef __ARMEB__ 2352 strh r1, [r0] 2353 mov r1, r3, lsr #16 /* r1 = ..45 */ 2354 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2355#else 2356 strh r2, [r0] 2357 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2358 mov r3, r3, lsr #16 /* r3 = ..76 */ 2359#endif 2360 str r2, [r0, #0x02] 2361 strh r3, [r0, #0x06] 2362 RET 2363 LMEMCPY_8_PAD 2364 2365/* 2366 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2367 */ 2368 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2369 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2370 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2371 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2372 strh r1, [r0] 2373#ifdef __ARMEB__ 2374 mov r1, r2, lsl #24 /* r1 = 2... */ 2375 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2376 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2377#else 2378 mov r1, r2, lsr #24 /* r1 = ...2 */ 2379 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2380 mov r3, r3, lsr #24 /* r3 = ...6 */ 2381 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2382#endif 2383 str r1, [r0, #0x02] 2384 strh r3, [r0, #0x06] 2385 RET 2386 LMEMCPY_8_PAD 2387 2388/* 2389 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2390 */ 2391 ldrh r2, [r1] 2392 ldr ip, [r1, #0x02] 2393 ldrh r3, [r1, #0x06] 2394 strh r2, [r0] 2395 str ip, [r0, #0x02] 2396 strh r3, [r0, #0x06] 2397 RET 2398 LMEMCPY_8_PAD 2399 2400/* 2401 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2402 */ 2403 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2404 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2405 ldrb ip, [r1] /* ip = ...0 */ 2406 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2407 strh r1, [r0, #0x06] 2408#ifdef __ARMEB__ 2409 mov r3, r3, lsr #24 /* r3 = ...5 */ 2410 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2411 mov r2, r2, lsr #24 /* r2 = ...1 */ 2412 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2413#else 2414 mov r3, r3, lsl #24 /* r3 = 5... */ 2415 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2416 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2417#endif 2418 str r3, [r0, #0x02] 2419 strh r2, [r0] 2420 RET 2421 LMEMCPY_8_PAD 2422 2423/* 2424 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2425 */ 2426 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2427 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2428 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2429 strh r1, [r0, #0x05] 2430#ifdef __ARMEB__ 2431 strb r3, [r0, #0x07] 2432 mov r1, r2, lsr #24 /* r1 = ...0 */ 2433 strb r1, [r0] 2434 mov r2, r2, lsl #8 /* r2 = 123. */ 2435 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2436 str r2, [r0, #0x01] 2437#else 2438 strb r2, [r0] 2439 mov r1, r3, lsr #24 /* r1 = ...7 */ 2440 strb r1, [r0, #0x07] 2441 mov r2, r2, lsr #8 /* r2 = .321 */ 2442 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2443 str r2, [r0, #0x01] 2444#endif 2445 RET 2446 LMEMCPY_8_PAD 2447 2448/* 2449 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2450 */ 2451 ldrb r3, [r1] /* r3 = ...0 */ 2452 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2453 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2454 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2455 strb r3, [r0] 2456 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2457#ifdef __ARMEB__ 2458 strh ip, [r0, #0x05] 2459 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2460#else 2461 strh r3, [r0, #0x05] 2462 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2463#endif 2464 str r2, [r0, #0x01] 2465 strb r1, [r0, #0x07] 2466 RET 2467 LMEMCPY_8_PAD 2468 2469/* 2470 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2471 */ 2472 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2473 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2474 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2475#ifdef __ARMEB__ 2476 mov ip, r2, lsr #8 /* ip = ...0 */ 2477 strb ip, [r0] 2478 mov ip, r2, lsl #24 /* ip = 1... */ 2479 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2480 strb r1, [r0, #0x07] 2481 mov r1, r1, lsr #8 /* r1 = ...6 */ 2482 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2483#else 2484 strb r2, [r0] 2485 mov ip, r2, lsr #8 /* ip = ...1 */ 2486 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2487 mov r2, r1, lsr #8 /* r2 = ...7 */ 2488 strb r2, [r0, #0x07] 2489 mov r1, r1, lsl #8 /* r1 = .76. */ 2490 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2491#endif 2492 str ip, [r0, #0x01] 2493 strh r1, [r0, #0x05] 2494 RET 2495 LMEMCPY_8_PAD 2496 2497/* 2498 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2499 */ 2500 ldrb r2, [r1] 2501 ldr ip, [r1, #0x01] 2502 ldrh r3, [r1, #0x05] 2503 ldrb r1, [r1, #0x07] 2504 strb r2, [r0] 2505 str ip, [r0, #0x01] 2506 strh r3, [r0, #0x05] 2507 strb r1, [r0, #0x07] 2508 RET 2509 LMEMCPY_8_PAD 2510 2511/****************************************************************************** 2512 * Special case for 12 byte copies 2513 */ 2514#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2515#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2516 LMEMCPY_C_PAD 2517.Lmemcpy_c: 2518 and r2, r1, #0x03 2519 orr r2, r2, r0, lsl #2 2520 ands r2, r2, #0x0f 2521 sub r3, pc, #0x14 2522 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2523 2524/* 2525 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2526 */ 2527 ldr r2, [r1] 2528 ldr r3, [r1, #0x04] 2529 ldr r1, [r1, #0x08] 2530 str r2, [r0] 2531 str r3, [r0, #0x04] 2532 str r1, [r0, #0x08] 2533 RET 2534 LMEMCPY_C_PAD 2535 2536/* 2537 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2538 */ 2539 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2540 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2541 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2542 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2543#ifdef __ARMEB__ 2544 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2545 str r2, [r0, #0x08] 2546 mov r2, ip, lsr #24 /* r2 = ...7 */ 2547 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2548 mov r1, r1, lsl #8 /* r1 = 012. */ 2549 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2550#else 2551 mov r2, r2, lsl #24 /* r2 = B... */ 2552 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2553 str r2, [r0, #0x08] 2554 mov r2, ip, lsl #24 /* r2 = 7... */ 2555 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2556 mov r1, r1, lsr #8 /* r1 = .210 */ 2557 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2558#endif 2559 str r2, [r0, #0x04] 2560 str r1, [r0] 2561 RET 2562 LMEMCPY_C_PAD 2563 2564/* 2565 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2566 */ 2567 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2568 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2569 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2570 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2571#ifdef __ARMEB__ 2572 mov r2, r2, lsl #16 /* r2 = 01.. */ 2573 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2574 str r2, [r0] 2575 mov r3, r3, lsl #16 /* r3 = 45.. */ 2576 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2577 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2578#else 2579 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2580 str r2, [r0] 2581 mov r3, r3, lsr #16 /* r3 = ..54 */ 2582 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2583 mov r1, r1, lsl #16 /* r1 = BA.. */ 2584 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2585#endif 2586 str r3, [r0, #0x04] 2587 str r1, [r0, #0x08] 2588 RET 2589 LMEMCPY_C_PAD 2590 2591/* 2592 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2593 */ 2594 ldrb r2, [r1] /* r2 = ...0 */ 2595 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2596 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2597 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2598#ifdef __ARMEB__ 2599 mov r2, r2, lsl #24 /* r2 = 0... */ 2600 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2601 str r2, [r0] 2602 mov r3, r3, lsl #24 /* r3 = 4... */ 2603 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2604 mov r1, r1, lsr #8 /* r1 = .9AB */ 2605 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2606#else 2607 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2608 str r2, [r0] 2609 mov r3, r3, lsr #24 /* r3 = ...4 */ 2610 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2611 mov r1, r1, lsl #8 /* r1 = BA9. */ 2612 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2613#endif 2614 str r3, [r0, #0x04] 2615 str r1, [r0, #0x08] 2616 RET 2617 LMEMCPY_C_PAD 2618 2619/* 2620 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2621 */ 2622 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2623 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2624 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2625 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2626 strh r1, [r0, #0x01] 2627#ifdef __ARMEB__ 2628 mov r1, r2, lsr #24 /* r1 = ...0 */ 2629 strb r1, [r0] 2630 mov r1, r2, lsl #24 /* r1 = 3... */ 2631 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2632 mov r1, r3, lsl #24 /* r1 = 7... */ 2633 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2634#else 2635 strb r2, [r0] 2636 mov r1, r2, lsr #24 /* r1 = ...3 */ 2637 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2638 mov r1, r3, lsr #24 /* r1 = ...7 */ 2639 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2640 mov ip, ip, lsr #24 /* ip = ...B */ 2641#endif 2642 str r2, [r0, #0x03] 2643 str r1, [r0, #0x07] 2644 strb ip, [r0, #0x0b] 2645 RET 2646 LMEMCPY_C_PAD 2647 2648/* 2649 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2650 */ 2651 ldrb r2, [r1] 2652 ldrh r3, [r1, #0x01] 2653 ldr ip, [r1, #0x03] 2654 strb r2, [r0] 2655 ldr r2, [r1, #0x07] 2656 ldrb r1, [r1, #0x0b] 2657 strh r3, [r0, #0x01] 2658 str ip, [r0, #0x03] 2659 str r2, [r0, #0x07] 2660 strb r1, [r0, #0x0b] 2661 RET 2662 LMEMCPY_C_PAD 2663 2664/* 2665 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2666 */ 2667 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2668 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2669 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2670 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2671#ifdef __ARMEB__ 2672 mov r2, r2, ror #8 /* r2 = 1..0 */ 2673 strb r2, [r0] 2674 mov r2, r2, lsr #16 /* r2 = ..1. */ 2675 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2676 strh r2, [r0, #0x01] 2677 mov r2, r3, lsl #8 /* r2 = 345. */ 2678 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2679 mov r2, ip, lsl #8 /* r2 = 789. */ 2680 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2681#else 2682 strb r2, [r0] 2683 mov r2, r2, lsr #8 /* r2 = ...1 */ 2684 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2685 strh r2, [r0, #0x01] 2686 mov r2, r3, lsr #8 /* r2 = .543 */ 2687 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2688 mov r2, ip, lsr #8 /* r2 = .987 */ 2689 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2690 mov r1, r1, lsr #8 /* r1 = ...B */ 2691#endif 2692 str r3, [r0, #0x03] 2693 str r2, [r0, #0x07] 2694 strb r1, [r0, #0x0b] 2695 RET 2696 LMEMCPY_C_PAD 2697 2698/* 2699 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2700 */ 2701 ldrb r2, [r1] 2702 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2703 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2704 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2705 strb r2, [r0] 2706#ifdef __ARMEB__ 2707 mov r2, r3, lsr #16 /* r2 = ..12 */ 2708 strh r2, [r0, #0x01] 2709 mov r3, r3, lsl #16 /* r3 = 34.. */ 2710 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2711 mov ip, ip, lsl #16 /* ip = 78.. */ 2712 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2713 mov r1, r1, lsr #8 /* r1 = .9AB */ 2714#else 2715 strh r3, [r0, #0x01] 2716 mov r3, r3, lsr #16 /* r3 = ..43 */ 2717 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2718 mov ip, ip, lsr #16 /* ip = ..87 */ 2719 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2720 mov r1, r1, lsr #16 /* r1 = ..xB */ 2721#endif 2722 str r3, [r0, #0x03] 2723 str ip, [r0, #0x07] 2724 strb r1, [r0, #0x0b] 2725 RET 2726 LMEMCPY_C_PAD 2727 2728/* 2729 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2730 */ 2731 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2732 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2733 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2734 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2735#ifdef __ARMEB__ 2736 strh r1, [r0] 2737 mov r1, ip, lsl #16 /* r1 = 23.. */ 2738 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2739 mov r3, r3, lsl #16 /* r3 = 67.. */ 2740 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2741#else 2742 strh ip, [r0] 2743 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2744 mov r3, r3, lsr #16 /* r3 = ..76 */ 2745 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2746 mov r2, r2, lsr #16 /* r2 = ..BA */ 2747#endif 2748 str r1, [r0, #0x02] 2749 str r3, [r0, #0x06] 2750 strh r2, [r0, #0x0a] 2751 RET 2752 LMEMCPY_C_PAD 2753 2754/* 2755 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2756 */ 2757 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2758 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2759 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2760 strh ip, [r0] 2761 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2762 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2763#ifdef __ARMEB__ 2764 mov r2, r2, lsl #24 /* r2 = 2... */ 2765 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2766 mov r3, r3, lsl #24 /* r3 = 6... */ 2767 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2768 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2769#else 2770 mov r2, r2, lsr #24 /* r2 = ...2 */ 2771 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2772 mov r3, r3, lsr #24 /* r3 = ...6 */ 2773 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2774 mov r1, r1, lsl #8 /* r1 = ..B. */ 2775 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2776#endif 2777 str r2, [r0, #0x02] 2778 str r3, [r0, #0x06] 2779 strh r1, [r0, #0x0a] 2780 RET 2781 LMEMCPY_C_PAD 2782 2783/* 2784 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2785 */ 2786 ldrh r2, [r1] 2787 ldr r3, [r1, #0x02] 2788 ldr ip, [r1, #0x06] 2789 ldrh r1, [r1, #0x0a] 2790 strh r2, [r0] 2791 str r3, [r0, #0x02] 2792 str ip, [r0, #0x06] 2793 strh r1, [r0, #0x0a] 2794 RET 2795 LMEMCPY_C_PAD 2796 2797/* 2798 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2799 */ 2800 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2801 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2802 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2803 strh ip, [r0, #0x0a] 2804 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2805 ldrb r1, [r1] /* r1 = ...0 */ 2806#ifdef __ARMEB__ 2807 mov r2, r2, lsr #24 /* r2 = ...9 */ 2808 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2809 mov r3, r3, lsr #24 /* r3 = ...5 */ 2810 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2811 mov r1, r1, lsl #8 /* r1 = ..0. */ 2812 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2813#else 2814 mov r2, r2, lsl #24 /* r2 = 9... */ 2815 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2816 mov r3, r3, lsl #24 /* r3 = 5... */ 2817 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2818 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2819#endif 2820 str r2, [r0, #0x06] 2821 str r3, [r0, #0x02] 2822 strh r1, [r0] 2823 RET 2824 LMEMCPY_C_PAD 2825 2826/* 2827 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2828 */ 2829 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2830 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2831 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2832#ifdef __ARMEB__ 2833 mov r3, r2, lsr #24 /* r3 = ...0 */ 2834 strb r3, [r0] 2835 mov r2, r2, lsl #8 /* r2 = 123. */ 2836 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2837 str r2, [r0, #0x01] 2838 mov r2, ip, lsl #8 /* r2 = 567. */ 2839 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2840 str r2, [r0, #0x05] 2841 mov r2, r1, lsr #8 /* r2 = ..9A */ 2842 strh r2, [r0, #0x09] 2843 strb r1, [r0, #0x0b] 2844#else 2845 strb r2, [r0] 2846 mov r3, r2, lsr #8 /* r3 = .321 */ 2847 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2848 str r3, [r0, #0x01] 2849 mov r3, ip, lsr #8 /* r3 = .765 */ 2850 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2851 str r3, [r0, #0x05] 2852 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2853 strh r1, [r0, #0x09] 2854 mov r1, r1, lsr #16 /* r1 = ...B */ 2855 strb r1, [r0, #0x0b] 2856#endif 2857 RET 2858 LMEMCPY_C_PAD 2859 2860/* 2861 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2862 */ 2863 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2864 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2865 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2866 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2867 strb r2, [r0, #0x0b] 2868#ifdef __ARMEB__ 2869 strh r3, [r0, #0x09] 2870 mov r3, r3, lsr #16 /* r3 = ..78 */ 2871 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2872 mov ip, ip, lsr #16 /* ip = ..34 */ 2873 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2874 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2875#else 2876 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2877 strh r2, [r0, #0x09] 2878 mov r3, r3, lsl #16 /* r3 = 87.. */ 2879 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2880 mov ip, ip, lsl #16 /* ip = 43.. */ 2881 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2882 mov r1, r1, lsr #8 /* r1 = .210 */ 2883#endif 2884 str r3, [r0, #0x05] 2885 str ip, [r0, #0x01] 2886 strb r1, [r0] 2887 RET 2888 LMEMCPY_C_PAD 2889 2890/* 2891 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2892 */ 2893#ifdef __ARMEB__ 2894 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2895 ldr ip, [r1, #0x06] /* ip = 6789 */ 2896 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2897 ldrh r1, [r1] /* r1 = ..01 */ 2898 strb r2, [r0, #0x0b] 2899 mov r2, r2, lsr #8 /* r2 = ...A */ 2900 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2901 mov ip, ip, lsr #8 /* ip = .678 */ 2902 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2903 mov r3, r3, lsr #8 /* r3 = .234 */ 2904 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2905 mov r1, r1, lsr #8 /* r1 = ...0 */ 2906 strb r1, [r0] 2907 str r3, [r0, #0x01] 2908 str ip, [r0, #0x05] 2909 strh r2, [r0, #0x09] 2910#else 2911 ldrh r2, [r1] /* r2 = ..10 */ 2912 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2913 ldr ip, [r1, #0x06] /* ip = 9876 */ 2914 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2915 strb r2, [r0] 2916 mov r2, r2, lsr #8 /* r2 = ...1 */ 2917 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2918 mov r3, r3, lsr #24 /* r3 = ...5 */ 2919 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2920 mov ip, ip, lsr #24 /* ip = ...9 */ 2921 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2922 mov r1, r1, lsr #8 /* r1 = ...B */ 2923 str r2, [r0, #0x01] 2924 str r3, [r0, #0x05] 2925 strh ip, [r0, #0x09] 2926 strb r1, [r0, #0x0b] 2927#endif 2928 RET 2929 LMEMCPY_C_PAD 2930 2931/* 2932 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2933 */ 2934 ldrb r2, [r1] 2935 ldr r3, [r1, #0x01] 2936 ldr ip, [r1, #0x05] 2937 strb r2, [r0] 2938 ldrh r2, [r1, #0x09] 2939 ldrb r1, [r1, #0x0b] 2940 str r3, [r0, #0x01] 2941 str ip, [r0, #0x05] 2942 strh r2, [r0, #0x09] 2943 strb r1, [r0, #0x0b] 2944 RET 2945END(memcpy) 2946#endif /* _ARM_ARCH_5E */ 2947 2948#ifdef GPROF 2949 2950ENTRY(user) 2951 nop 2952END(user) 2953ENTRY(btrap) 2954 nop 2955END(btrap) 2956ENTRY(etrap) 2957 nop 2958END(etrap) 2959ENTRY(bintr) 2960 nop 2961END(bintr) 2962ENTRY(eintr) 2963 nop 2964END(eintr) 2965#endif 2966