1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90#include "assym.inc" 91 92 .syntax unified 93 94/* 95 * memset: Sets a block of memory to the specified value 96 * 97 * On entry: 98 * r0 - dest address 99 * r1 - byte to write 100 * r2 - number of bytes to write 101 * 102 * On exit: 103 * r0 - dest address 104 */ 105/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 106ENTRY(memset) 107 and r3, r1, #0xff /* We deal with bytes */ 108 mov r1, r2 109do_memset: 110 cmp r1, #0x04 /* Do we have less than 4 bytes */ 111 mov ip, r0 112 blt .Lmemset_lessthanfour 113 114 /* Ok first we will word align the address */ 115 ands r2, ip, #0x03 /* Get the bottom two bits */ 116 bne .Lmemset_wordunaligned /* The address is not word aligned */ 117 118 /* We are now word aligned */ 119.Lmemset_wordaligned: 120 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 121 tst ip, #0x04 /* Quad-align for armv5e */ 122 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 123 subne r1, r1, #0x04 /* Quad-align if necessary */ 124 strne r3, [ip], #0x04 125 cmp r1, #0x10 126 blt .Lmemset_loop4 /* If less than 16 then use words */ 127 mov r2, r3 /* Duplicate data */ 128 cmp r1, #0x80 /* If < 128 then skip the big loop */ 129 blt .Lmemset_loop32 130 131 /* Do 128 bytes at a time */ 132.Lmemset_loop128: 133 subs r1, r1, #0x80 134 strdge r2, [ip], #0x08 135 strdge r2, [ip], #0x08 136 strdge r2, [ip], #0x08 137 strdge r2, [ip], #0x08 138 strdge r2, [ip], #0x08 139 strdge r2, [ip], #0x08 140 strdge r2, [ip], #0x08 141 strdge r2, [ip], #0x08 142 strdge r2, [ip], #0x08 143 strdge r2, [ip], #0x08 144 strdge r2, [ip], #0x08 145 strdge r2, [ip], #0x08 146 strdge r2, [ip], #0x08 147 strdge r2, [ip], #0x08 148 strdge r2, [ip], #0x08 149 strdge r2, [ip], #0x08 150 bgt .Lmemset_loop128 151 RETeq /* Zero length so just exit */ 152 153 add r1, r1, #0x80 /* Adjust for extra sub */ 154 155 /* Do 32 bytes at a time */ 156.Lmemset_loop32: 157 subs r1, r1, #0x20 158 strdge r2, [ip], #0x08 159 strdge r2, [ip], #0x08 160 strdge r2, [ip], #0x08 161 strdge r2, [ip], #0x08 162 bgt .Lmemset_loop32 163 RETeq /* Zero length so just exit */ 164 165 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 166 167 /* Deal with 16 bytes or more */ 168 strdge r2, [ip], #0x08 169 strdge r2, [ip], #0x08 170 RETeq /* Zero length so just exit */ 171 172 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 173 174 /* We have at least 4 bytes so copy as words */ 175.Lmemset_loop4: 176 subs r1, r1, #0x04 177 strge r3, [ip], #0x04 178 bgt .Lmemset_loop4 179 RETeq /* Zero length so just exit */ 180 181 /* Compensate for 64-bit alignment check */ 182 adds r1, r1, #0x04 183 RETeq 184 cmp r1, #2 185 186 strb r3, [ip], #0x01 /* Set 1 byte */ 187 strbge r3, [ip], #0x01 /* Set another byte */ 188 strbgt r3, [ip] /* and a third */ 189 RET /* Exit */ 190 191.Lmemset_wordunaligned: 192 rsb r2, r2, #0x004 193 strb r3, [ip], #0x01 /* Set 1 byte */ 194 cmp r2, #0x02 195 strbge r3, [ip], #0x01 /* Set another byte */ 196 sub r1, r1, r2 197 strbgt r3, [ip], #0x01 /* and a third */ 198 cmp r1, #0x04 /* More than 4 bytes left? */ 199 bge .Lmemset_wordaligned /* Yup */ 200 201.Lmemset_lessthanfour: 202 cmp r1, #0x00 203 RETeq /* Zero length so exit */ 204 strb r3, [ip], #0x01 /* Set 1 byte */ 205 cmp r1, #0x02 206 strbge r3, [ip], #0x01 /* Set another byte */ 207 strbgt r3, [ip] /* and a third */ 208 RET /* Exit */ 209END(memset) 210 211ENTRY(memcmp) 212 mov ip, r0 213 cmp r2, #0x06 214 beq .Lmemcmp_6bytes 215 mov r0, #0x00 216 217 /* Are both addresses aligned the same way? */ 218 cmp r2, #0x00 219 eorsne r3, ip, r1 220 RETeq /* len == 0, or same addresses! */ 221 tst r3, #0x03 222 subne r2, r2, #0x01 223 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 224 225 /* Word-align the addresses, if necessary */ 226 sub r3, r1, #0x05 227 ands r3, r3, #0x03 228 add r3, r3, r3, lsl #1 229 addne pc, pc, r3, lsl #3 230 nop 231 232 /* Compare up to 3 bytes */ 233 ldrb r0, [ip], #0x01 234 ldrb r3, [r1], #0x01 235 subs r0, r0, r3 236 RETne 237 subs r2, r2, #0x01 238 RETeq 239 240 /* Compare up to 2 bytes */ 241 ldrb r0, [ip], #0x01 242 ldrb r3, [r1], #0x01 243 subs r0, r0, r3 244 RETne 245 subs r2, r2, #0x01 246 RETeq 247 248 /* Compare 1 byte */ 249 ldrb r0, [ip], #0x01 250 ldrb r3, [r1], #0x01 251 subs r0, r0, r3 252 RETne 253 subs r2, r2, #0x01 254 RETeq 255 256 /* Compare 4 bytes at a time, if possible */ 257 subs r2, r2, #0x04 258 bcc .Lmemcmp_bytewise 259.Lmemcmp_word_aligned: 260 ldr r0, [ip], #0x04 261 ldr r3, [r1], #0x04 262 subs r2, r2, #0x04 263 cmpcs r0, r3 264 beq .Lmemcmp_word_aligned 265 sub r0, r0, r3 266 267 /* Correct for extra subtraction, and check if done */ 268 adds r2, r2, #0x04 269 cmpeq r0, #0x00 /* If done, did all bytes match? */ 270 RETeq /* Yup. Just return */ 271 272 /* Re-do the final word byte-wise */ 273 sub ip, ip, #0x04 274 sub r1, r1, #0x04 275 276.Lmemcmp_bytewise: 277 add r2, r2, #0x03 278.Lmemcmp_bytewise2: 279 ldrb r0, [ip], #0x01 280 ldrb r3, [r1], #0x01 281 subs r2, r2, #0x01 282 cmpcs r0, r3 283 beq .Lmemcmp_bytewise2 284 sub r0, r0, r3 285 RET 286 287 /* 288 * 6 byte compares are very common, thanks to the network stack. 289 * This code is hand-scheduled to reduce the number of stalls for 290 * load results. Everything else being equal, this will be ~32% 291 * faster than a byte-wise memcmp. 292 */ 293 .align 5 294.Lmemcmp_6bytes: 295 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 296 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 297 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 298 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 299 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ 300 RETne /* Return if mismatch on #0 */ 301 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 302 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ 303 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ 304 RETne /* Return if mismatch on #1 */ 305 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 306 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 307 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ 308 RETne /* Return if mismatch on #2 */ 309 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 310 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ 311 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ 312 RETne /* Return if mismatch on #3 */ 313 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 314 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 315 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ 316 RETne /* Return if mismatch on #4 */ 317 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 318 RET 319END(memcmp) 320 321ENTRY(memmove) 322 /* Do the buffers overlap? */ 323 cmp r0, r1 324 RETeq /* Bail now if src/dst are the same */ 325 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 326 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 327 cmp r3, r2 /* if (r3 < len) we have an overlap */ 328 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 329 330 /* Determine copy direction */ 331 cmp r1, r0 332 bcc .Lmemmove_backwards 333 334 moveq r0, #0 /* Quick abort for len=0 */ 335 RETeq 336 337 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 338 subs r2, r2, #4 339 blt .Lmemmove_fl4 /* less than 4 bytes */ 340 ands r12, r0, #3 341 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 342 ands r12, r1, #3 343 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 344 345.Lmemmove_ft8: 346 /* We have aligned source and destination */ 347 subs r2, r2, #8 348 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 349 subs r2, r2, #0x14 350 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 351 stmdb sp!, {r4} /* borrow r4 */ 352 353 /* blat 32 bytes at a time */ 354 /* XXX for really big copies perhaps we should use more registers */ 355.Lmemmove_floop32: 356 ldmia r1!, {r3, r4, r12, lr} 357 stmia r0!, {r3, r4, r12, lr} 358 ldmia r1!, {r3, r4, r12, lr} 359 stmia r0!, {r3, r4, r12, lr} 360 subs r2, r2, #0x20 361 bge .Lmemmove_floop32 362 363 cmn r2, #0x10 364 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 365 stmiage r0!, {r3, r4, r12, lr} 366 subge r2, r2, #0x10 367 ldmia sp!, {r4} /* return r4 */ 368 369.Lmemmove_fl32: 370 adds r2, r2, #0x14 371 372 /* blat 12 bytes at a time */ 373.Lmemmove_floop12: 374 ldmiage r1!, {r3, r12, lr} 375 stmiage r0!, {r3, r12, lr} 376 subsge r2, r2, #0x0c 377 bge .Lmemmove_floop12 378 379.Lmemmove_fl12: 380 adds r2, r2, #8 381 blt .Lmemmove_fl4 382 383 subs r2, r2, #4 384 ldrlt r3, [r1], #4 385 strlt r3, [r0], #4 386 ldmiage r1!, {r3, r12} 387 stmiage r0!, {r3, r12} 388 subge r2, r2, #4 389 390.Lmemmove_fl4: 391 /* less than 4 bytes to go */ 392 adds r2, r2, #4 393 ldmiaeq sp!, {r0, pc} /* done */ 394 395 /* copy the crud byte at a time */ 396 cmp r2, #2 397 ldrb r3, [r1], #1 398 strb r3, [r0], #1 399 ldrbge r3, [r1], #1 400 strbge r3, [r0], #1 401 ldrbgt r3, [r1], #1 402 strbgt r3, [r0], #1 403 ldmia sp!, {r0, pc} 404 405 /* erg - unaligned destination */ 406.Lmemmove_fdestul: 407 rsb r12, r12, #4 408 cmp r12, #2 409 410 /* align destination with byte copies */ 411 ldrb r3, [r1], #1 412 strb r3, [r0], #1 413 ldrbge r3, [r1], #1 414 strbge r3, [r0], #1 415 ldrbgt r3, [r1], #1 416 strbgt r3, [r0], #1 417 subs r2, r2, r12 418 blt .Lmemmove_fl4 /* less the 4 bytes */ 419 420 ands r12, r1, #3 421 beq .Lmemmove_ft8 /* we have an aligned source */ 422 423 /* erg - unaligned source */ 424 /* This is where it gets nasty ... */ 425.Lmemmove_fsrcul: 426 bic r1, r1, #3 427 ldr lr, [r1], #4 428 cmp r12, #2 429 bgt .Lmemmove_fsrcul3 430 beq .Lmemmove_fsrcul2 431 cmp r2, #0x0c 432 blt .Lmemmove_fsrcul1loop4 433 sub r2, r2, #0x0c 434 stmdb sp!, {r4, r5} 435 436.Lmemmove_fsrcul1loop16: 437 mov r3, lr, lsr #8 438 ldmia r1!, {r4, r5, r12, lr} 439 orr r3, r3, r4, lsl #24 440 mov r4, r4, lsr #8 441 orr r4, r4, r5, lsl #24 442 mov r5, r5, lsr #8 443 orr r5, r5, r12, lsl #24 444 mov r12, r12, lsr #8 445 orr r12, r12, lr, lsl #24 446 stmia r0!, {r3-r5, r12} 447 subs r2, r2, #0x10 448 bge .Lmemmove_fsrcul1loop16 449 ldmia sp!, {r4, r5} 450 adds r2, r2, #0x0c 451 blt .Lmemmove_fsrcul1l4 452 453.Lmemmove_fsrcul1loop4: 454 mov r12, lr, lsr #8 455 ldr lr, [r1], #4 456 orr r12, r12, lr, lsl #24 457 str r12, [r0], #4 458 subs r2, r2, #4 459 bge .Lmemmove_fsrcul1loop4 460 461.Lmemmove_fsrcul1l4: 462 sub r1, r1, #3 463 b .Lmemmove_fl4 464 465.Lmemmove_fsrcul2: 466 cmp r2, #0x0c 467 blt .Lmemmove_fsrcul2loop4 468 sub r2, r2, #0x0c 469 stmdb sp!, {r4, r5} 470 471.Lmemmove_fsrcul2loop16: 472 mov r3, lr, lsr #16 473 ldmia r1!, {r4, r5, r12, lr} 474 orr r3, r3, r4, lsl #16 475 mov r4, r4, lsr #16 476 orr r4, r4, r5, lsl #16 477 mov r5, r5, lsr #16 478 orr r5, r5, r12, lsl #16 479 mov r12, r12, lsr #16 480 orr r12, r12, lr, lsl #16 481 stmia r0!, {r3-r5, r12} 482 subs r2, r2, #0x10 483 bge .Lmemmove_fsrcul2loop16 484 ldmia sp!, {r4, r5} 485 adds r2, r2, #0x0c 486 blt .Lmemmove_fsrcul2l4 487 488.Lmemmove_fsrcul2loop4: 489 mov r12, lr, lsr #16 490 ldr lr, [r1], #4 491 orr r12, r12, lr, lsl #16 492 str r12, [r0], #4 493 subs r2, r2, #4 494 bge .Lmemmove_fsrcul2loop4 495 496.Lmemmove_fsrcul2l4: 497 sub r1, r1, #2 498 b .Lmemmove_fl4 499 500.Lmemmove_fsrcul3: 501 cmp r2, #0x0c 502 blt .Lmemmove_fsrcul3loop4 503 sub r2, r2, #0x0c 504 stmdb sp!, {r4, r5} 505 506.Lmemmove_fsrcul3loop16: 507 mov r3, lr, lsr #24 508 ldmia r1!, {r4, r5, r12, lr} 509 orr r3, r3, r4, lsl #8 510 mov r4, r4, lsr #24 511 orr r4, r4, r5, lsl #8 512 mov r5, r5, lsr #24 513 orr r5, r5, r12, lsl #8 514 mov r12, r12, lsr #24 515 orr r12, r12, lr, lsl #8 516 stmia r0!, {r3-r5, r12} 517 subs r2, r2, #0x10 518 bge .Lmemmove_fsrcul3loop16 519 ldmia sp!, {r4, r5} 520 adds r2, r2, #0x0c 521 blt .Lmemmove_fsrcul3l4 522 523.Lmemmove_fsrcul3loop4: 524 mov r12, lr, lsr #24 525 ldr lr, [r1], #4 526 orr r12, r12, lr, lsl #8 527 str r12, [r0], #4 528 subs r2, r2, #4 529 bge .Lmemmove_fsrcul3loop4 530 531.Lmemmove_fsrcul3l4: 532 sub r1, r1, #1 533 b .Lmemmove_fl4 534 535.Lmemmove_backwards: 536 add r1, r1, r2 537 add r0, r0, r2 538 subs r2, r2, #4 539 blt .Lmemmove_bl4 /* less than 4 bytes */ 540 ands r12, r0, #3 541 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 542 ands r12, r1, #3 543 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 544 545.Lmemmove_bt8: 546 /* We have aligned source and destination */ 547 subs r2, r2, #8 548 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 549 stmdb sp!, {r4, lr} 550 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 551 blt .Lmemmove_bl32 552 553 /* blat 32 bytes at a time */ 554 /* XXX for really big copies perhaps we should use more registers */ 555.Lmemmove_bloop32: 556 ldmdb r1!, {r3, r4, r12, lr} 557 stmdb r0!, {r3, r4, r12, lr} 558 ldmdb r1!, {r3, r4, r12, lr} 559 stmdb r0!, {r3, r4, r12, lr} 560 subs r2, r2, #0x20 561 bge .Lmemmove_bloop32 562 563.Lmemmove_bl32: 564 cmn r2, #0x10 565 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 566 stmdbge r0!, {r3, r4, r12, lr} 567 subge r2, r2, #0x10 568 adds r2, r2, #0x14 569 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 570 stmdbge r0!, {r3, r12, lr} 571 subge r2, r2, #0x0c 572 ldmia sp!, {r4, lr} 573 574.Lmemmove_bl12: 575 adds r2, r2, #8 576 blt .Lmemmove_bl4 577 subs r2, r2, #4 578 ldrlt r3, [r1, #-4]! 579 strlt r3, [r0, #-4]! 580 ldmdbge r1!, {r3, r12} 581 stmdbge r0!, {r3, r12} 582 subge r2, r2, #4 583 584.Lmemmove_bl4: 585 /* less than 4 bytes to go */ 586 adds r2, r2, #4 587 RETeq /* done */ 588 589 /* copy the crud byte at a time */ 590 cmp r2, #2 591 ldrb r3, [r1, #-1]! 592 strb r3, [r0, #-1]! 593 ldrbge r3, [r1, #-1]! 594 strbge r3, [r0, #-1]! 595 ldrbgt r3, [r1, #-1]! 596 strbgt r3, [r0, #-1]! 597 RET 598 599 /* erg - unaligned destination */ 600.Lmemmove_bdestul: 601 cmp r12, #2 602 603 /* align destination with byte copies */ 604 ldrb r3, [r1, #-1]! 605 strb r3, [r0, #-1]! 606 ldrbge r3, [r1, #-1]! 607 strbge r3, [r0, #-1]! 608 ldrbgt r3, [r1, #-1]! 609 strbgt r3, [r0, #-1]! 610 subs r2, r2, r12 611 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 612 ands r12, r1, #3 613 beq .Lmemmove_bt8 /* we have an aligned source */ 614 615 /* erg - unaligned source */ 616 /* This is where it gets nasty ... */ 617.Lmemmove_bsrcul: 618 bic r1, r1, #3 619 ldr r3, [r1, #0] 620 cmp r12, #2 621 blt .Lmemmove_bsrcul1 622 beq .Lmemmove_bsrcul2 623 cmp r2, #0x0c 624 blt .Lmemmove_bsrcul3loop4 625 sub r2, r2, #0x0c 626 stmdb sp!, {r4, r5, lr} 627 628.Lmemmove_bsrcul3loop16: 629 mov lr, r3, lsl #8 630 ldmdb r1!, {r3-r5, r12} 631 orr lr, lr, r12, lsr #24 632 mov r12, r12, lsl #8 633 orr r12, r12, r5, lsr #24 634 mov r5, r5, lsl #8 635 orr r5, r5, r4, lsr #24 636 mov r4, r4, lsl #8 637 orr r4, r4, r3, lsr #24 638 stmdb r0!, {r4, r5, r12, lr} 639 subs r2, r2, #0x10 640 bge .Lmemmove_bsrcul3loop16 641 ldmia sp!, {r4, r5, lr} 642 adds r2, r2, #0x0c 643 blt .Lmemmove_bsrcul3l4 644 645.Lmemmove_bsrcul3loop4: 646 mov r12, r3, lsl #8 647 ldr r3, [r1, #-4]! 648 orr r12, r12, r3, lsr #24 649 str r12, [r0, #-4]! 650 subs r2, r2, #4 651 bge .Lmemmove_bsrcul3loop4 652 653.Lmemmove_bsrcul3l4: 654 add r1, r1, #3 655 b .Lmemmove_bl4 656 657.Lmemmove_bsrcul2: 658 cmp r2, #0x0c 659 blt .Lmemmove_bsrcul2loop4 660 sub r2, r2, #0x0c 661 stmdb sp!, {r4, r5, lr} 662 663.Lmemmove_bsrcul2loop16: 664 mov lr, r3, lsl #16 665 ldmdb r1!, {r3-r5, r12} 666 orr lr, lr, r12, lsr #16 667 mov r12, r12, lsl #16 668 orr r12, r12, r5, lsr #16 669 mov r5, r5, lsl #16 670 orr r5, r5, r4, lsr #16 671 mov r4, r4, lsl #16 672 orr r4, r4, r3, lsr #16 673 stmdb r0!, {r4, r5, r12, lr} 674 subs r2, r2, #0x10 675 bge .Lmemmove_bsrcul2loop16 676 ldmia sp!, {r4, r5, lr} 677 adds r2, r2, #0x0c 678 blt .Lmemmove_bsrcul2l4 679 680.Lmemmove_bsrcul2loop4: 681 mov r12, r3, lsl #16 682 ldr r3, [r1, #-4]! 683 orr r12, r12, r3, lsr #16 684 str r12, [r0, #-4]! 685 subs r2, r2, #4 686 bge .Lmemmove_bsrcul2loop4 687 688.Lmemmove_bsrcul2l4: 689 add r1, r1, #2 690 b .Lmemmove_bl4 691 692.Lmemmove_bsrcul1: 693 cmp r2, #0x0c 694 blt .Lmemmove_bsrcul1loop4 695 sub r2, r2, #0x0c 696 stmdb sp!, {r4, r5, lr} 697 698.Lmemmove_bsrcul1loop32: 699 mov lr, r3, lsl #24 700 ldmdb r1!, {r3-r5, r12} 701 orr lr, lr, r12, lsr #8 702 mov r12, r12, lsl #24 703 orr r12, r12, r5, lsr #8 704 mov r5, r5, lsl #24 705 orr r5, r5, r4, lsr #8 706 mov r4, r4, lsl #24 707 orr r4, r4, r3, lsr #8 708 stmdb r0!, {r4, r5, r12, lr} 709 subs r2, r2, #0x10 710 bge .Lmemmove_bsrcul1loop32 711 ldmia sp!, {r4, r5, lr} 712 adds r2, r2, #0x0c 713 blt .Lmemmove_bsrcul1l4 714 715.Lmemmove_bsrcul1loop4: 716 mov r12, r3, lsl #24 717 ldr r3, [r1, #-4]! 718 orr r12, r12, r3, lsr #8 719 str r12, [r0, #-4]! 720 subs r2, r2, #4 721 bge .Lmemmove_bsrcul1loop4 722 723.Lmemmove_bsrcul1l4: 724 add r1, r1, #1 725 b .Lmemmove_bl4 726END(memmove) 727 728/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 729ENTRY(memcpy) 730 pld [r1] 731 cmp r2, #0x0c 732 ble .Lmemcpy_short /* <= 12 bytes */ 733#ifdef FLASHADDR 734#if FLASHADDR > PHYSADDR 735 ldr r3, =FLASHADDR 736 cmp r3, pc 737 bls .Lnormal 738#else 739 ldr r3, =FLASHADDR 740 cmp r3, pc 741 bhi .Lnormal 742#endif 743#endif 744 mov r3, r0 /* We must not clobber r0 */ 745 746 /* Word-align the destination buffer */ 747 ands ip, r3, #0x03 /* Already word aligned? */ 748 beq .Lmemcpy_wordaligned /* Yup */ 749 cmp ip, #0x02 750 ldrb ip, [r1], #0x01 751 sub r2, r2, #0x01 752 strb ip, [r3], #0x01 753 ldrble ip, [r1], #0x01 754 suble r2, r2, #0x01 755 strble ip, [r3], #0x01 756 ldrblt ip, [r1], #0x01 757 sublt r2, r2, #0x01 758 strblt ip, [r3], #0x01 759 760 /* Destination buffer is now word aligned */ 761.Lmemcpy_wordaligned: 762 ands ip, r1, #0x03 /* Is src also word-aligned? */ 763 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 764 765 /* Quad-align the destination buffer */ 766 tst r3, #0x07 /* Already quad aligned? */ 767 ldrne ip, [r1], #0x04 768 stmfd sp!, {r4-r9} /* Free up some registers */ 769 subne r2, r2, #0x04 770 strne ip, [r3], #0x04 771 772 /* Destination buffer quad aligned, source is at least word aligned */ 773 subs r2, r2, #0x80 774 blt .Lmemcpy_w_lessthan128 775 776 /* Copy 128 bytes at a time */ 777.Lmemcpy_w_loop128: 778 ldr r4, [r1], #0x04 /* LD:00-03 */ 779 ldr r5, [r1], #0x04 /* LD:04-07 */ 780 pld [r1, #0x18] /* Prefetch 0x20 */ 781 ldr r6, [r1], #0x04 /* LD:08-0b */ 782 ldr r7, [r1], #0x04 /* LD:0c-0f */ 783 ldr r8, [r1], #0x04 /* LD:10-13 */ 784 ldr r9, [r1], #0x04 /* LD:14-17 */ 785 strd r4, [r3], #0x08 /* ST:00-07 */ 786 ldr r4, [r1], #0x04 /* LD:18-1b */ 787 ldr r5, [r1], #0x04 /* LD:1c-1f */ 788 strd r6, [r3], #0x08 /* ST:08-0f */ 789 ldr r6, [r1], #0x04 /* LD:20-23 */ 790 ldr r7, [r1], #0x04 /* LD:24-27 */ 791 pld [r1, #0x18] /* Prefetch 0x40 */ 792 strd r8, [r3], #0x08 /* ST:10-17 */ 793 ldr r8, [r1], #0x04 /* LD:28-2b */ 794 ldr r9, [r1], #0x04 /* LD:2c-2f */ 795 strd r4, [r3], #0x08 /* ST:18-1f */ 796 ldr r4, [r1], #0x04 /* LD:30-33 */ 797 ldr r5, [r1], #0x04 /* LD:34-37 */ 798 strd r6, [r3], #0x08 /* ST:20-27 */ 799 ldr r6, [r1], #0x04 /* LD:38-3b */ 800 ldr r7, [r1], #0x04 /* LD:3c-3f */ 801 strd r8, [r3], #0x08 /* ST:28-2f */ 802 ldr r8, [r1], #0x04 /* LD:40-43 */ 803 ldr r9, [r1], #0x04 /* LD:44-47 */ 804 pld [r1, #0x18] /* Prefetch 0x60 */ 805 strd r4, [r3], #0x08 /* ST:30-37 */ 806 ldr r4, [r1], #0x04 /* LD:48-4b */ 807 ldr r5, [r1], #0x04 /* LD:4c-4f */ 808 strd r6, [r3], #0x08 /* ST:38-3f */ 809 ldr r6, [r1], #0x04 /* LD:50-53 */ 810 ldr r7, [r1], #0x04 /* LD:54-57 */ 811 strd r8, [r3], #0x08 /* ST:40-47 */ 812 ldr r8, [r1], #0x04 /* LD:58-5b */ 813 ldr r9, [r1], #0x04 /* LD:5c-5f */ 814 strd r4, [r3], #0x08 /* ST:48-4f */ 815 ldr r4, [r1], #0x04 /* LD:60-63 */ 816 ldr r5, [r1], #0x04 /* LD:64-67 */ 817 pld [r1, #0x18] /* Prefetch 0x80 */ 818 strd r6, [r3], #0x08 /* ST:50-57 */ 819 ldr r6, [r1], #0x04 /* LD:68-6b */ 820 ldr r7, [r1], #0x04 /* LD:6c-6f */ 821 strd r8, [r3], #0x08 /* ST:58-5f */ 822 ldr r8, [r1], #0x04 /* LD:70-73 */ 823 ldr r9, [r1], #0x04 /* LD:74-77 */ 824 strd r4, [r3], #0x08 /* ST:60-67 */ 825 ldr r4, [r1], #0x04 /* LD:78-7b */ 826 ldr r5, [r1], #0x04 /* LD:7c-7f */ 827 strd r6, [r3], #0x08 /* ST:68-6f */ 828 strd r8, [r3], #0x08 /* ST:70-77 */ 829 subs r2, r2, #0x80 830 strd r4, [r3], #0x08 /* ST:78-7f */ 831 bge .Lmemcpy_w_loop128 832 833.Lmemcpy_w_lessthan128: 834 adds r2, r2, #0x80 /* Adjust for extra sub */ 835 ldmfdeq sp!, {r4-r9} 836 RETeq /* Return now if done */ 837 subs r2, r2, #0x20 838 blt .Lmemcpy_w_lessthan32 839 840 /* Copy 32 bytes at a time */ 841.Lmemcpy_w_loop32: 842 ldr r4, [r1], #0x04 843 ldr r5, [r1], #0x04 844 pld [r1, #0x18] 845 ldr r6, [r1], #0x04 846 ldr r7, [r1], #0x04 847 ldr r8, [r1], #0x04 848 ldr r9, [r1], #0x04 849 strd r4, [r3], #0x08 850 ldr r4, [r1], #0x04 851 ldr r5, [r1], #0x04 852 strd r6, [r3], #0x08 853 strd r8, [r3], #0x08 854 subs r2, r2, #0x20 855 strd r4, [r3], #0x08 856 bge .Lmemcpy_w_loop32 857 858.Lmemcpy_w_lessthan32: 859 adds r2, r2, #0x20 /* Adjust for extra sub */ 860 ldmfdeq sp!, {r4-r9} 861 RETeq /* Return now if done */ 862 863 and r4, r2, #0x18 864 rsbs r4, r4, #0x18 865 addne pc, pc, r4, lsl #1 866 nop 867 868 /* At least 24 bytes remaining */ 869 ldr r4, [r1], #0x04 870 ldr r5, [r1], #0x04 871 sub r2, r2, #0x08 872 strd r4, [r3], #0x08 873 874 /* At least 16 bytes remaining */ 875 ldr r4, [r1], #0x04 876 ldr r5, [r1], #0x04 877 sub r2, r2, #0x08 878 strd r4, [r3], #0x08 879 880 /* At least 8 bytes remaining */ 881 ldr r4, [r1], #0x04 882 ldr r5, [r1], #0x04 883 subs r2, r2, #0x08 884 strd r4, [r3], #0x08 885 886 /* Less than 8 bytes remaining */ 887 ldmfd sp!, {r4-r9} 888 RETeq /* Return now if done */ 889 subs r2, r2, #0x04 890 ldrge ip, [r1], #0x04 891 strge ip, [r3], #0x04 892 RETeq /* Return now if done */ 893 addlt r2, r2, #0x04 894 ldrb ip, [r1], #0x01 895 cmp r2, #0x02 896 ldrbge r2, [r1], #0x01 897 strb ip, [r3], #0x01 898 ldrbgt ip, [r1] 899 strbge r2, [r3], #0x01 900 strbgt ip, [r3] 901 RET 902/* Place a literal pool here for the above ldr instructions to use */ 903.ltorg 904 905 906/* 907 * At this point, it has not been possible to word align both buffers. 908 * The destination buffer is word aligned, but the source buffer is not. 909 */ 910.Lmemcpy_bad_align: 911 stmfd sp!, {r4-r7} 912 bic r1, r1, #0x03 913 cmp ip, #2 914 ldr ip, [r1], #0x04 915 bgt .Lmemcpy_bad3 916 beq .Lmemcpy_bad2 917 b .Lmemcpy_bad1 918 919.Lmemcpy_bad1_loop16: 920 mov r4, ip, lsr #8 921 ldr r5, [r1], #0x04 922 pld [r1, #0x018] 923 ldr r6, [r1], #0x04 924 ldr r7, [r1], #0x04 925 ldr ip, [r1], #0x04 926 orr r4, r4, r5, lsl #24 927 mov r5, r5, lsr #8 928 orr r5, r5, r6, lsl #24 929 mov r6, r6, lsr #8 930 orr r6, r6, r7, lsl #24 931 mov r7, r7, lsr #8 932 orr r7, r7, ip, lsl #24 933 str r4, [r3], #0x04 934 str r5, [r3], #0x04 935 str r6, [r3], #0x04 936 str r7, [r3], #0x04 937.Lmemcpy_bad1: 938 subs r2, r2, #0x10 939 bge .Lmemcpy_bad1_loop16 940 941 adds r2, r2, #0x10 942 ldmfdeq sp!, {r4-r7} 943 RETeq /* Return now if done */ 944 subs r2, r2, #0x04 945 sublt r1, r1, #0x03 946 blt .Lmemcpy_bad_done 947 948.Lmemcpy_bad1_loop4: 949 mov r4, ip, lsr #8 950 ldr ip, [r1], #0x04 951 subs r2, r2, #0x04 952 orr r4, r4, ip, lsl #24 953 str r4, [r3], #0x04 954 bge .Lmemcpy_bad1_loop4 955 sub r1, r1, #0x03 956 b .Lmemcpy_bad_done 957 958.Lmemcpy_bad2_loop16: 959 mov r4, ip, lsr #16 960 ldr r5, [r1], #0x04 961 pld [r1, #0x018] 962 ldr r6, [r1], #0x04 963 ldr r7, [r1], #0x04 964 ldr ip, [r1], #0x04 965 orr r4, r4, r5, lsl #16 966 mov r5, r5, lsr #16 967 orr r5, r5, r6, lsl #16 968 mov r6, r6, lsr #16 969 orr r6, r6, r7, lsl #16 970 mov r7, r7, lsr #16 971 orr r7, r7, ip, lsl #16 972 str r4, [r3], #0x04 973 str r5, [r3], #0x04 974 str r6, [r3], #0x04 975 str r7, [r3], #0x04 976.Lmemcpy_bad2: 977 subs r2, r2, #0x10 978 bge .Lmemcpy_bad2_loop16 979 980 adds r2, r2, #0x10 981 ldmfdeq sp!, {r4-r7} 982 RETeq /* Return now if done */ 983 subs r2, r2, #0x04 984 sublt r1, r1, #0x02 985 blt .Lmemcpy_bad_done 986 987.Lmemcpy_bad2_loop4: 988 mov r4, ip, lsr #16 989 ldr ip, [r1], #0x04 990 subs r2, r2, #0x04 991 orr r4, r4, ip, lsl #16 992 str r4, [r3], #0x04 993 bge .Lmemcpy_bad2_loop4 994 sub r1, r1, #0x02 995 b .Lmemcpy_bad_done 996 997.Lmemcpy_bad3_loop16: 998 mov r4, ip, lsr #24 999 ldr r5, [r1], #0x04 1000 pld [r1, #0x018] 1001 ldr r6, [r1], #0x04 1002 ldr r7, [r1], #0x04 1003 ldr ip, [r1], #0x04 1004 orr r4, r4, r5, lsl #8 1005 mov r5, r5, lsr #24 1006 orr r5, r5, r6, lsl #8 1007 mov r6, r6, lsr #24 1008 orr r6, r6, r7, lsl #8 1009 mov r7, r7, lsr #24 1010 orr r7, r7, ip, lsl #8 1011 str r4, [r3], #0x04 1012 str r5, [r3], #0x04 1013 str r6, [r3], #0x04 1014 str r7, [r3], #0x04 1015.Lmemcpy_bad3: 1016 subs r2, r2, #0x10 1017 bge .Lmemcpy_bad3_loop16 1018 1019 adds r2, r2, #0x10 1020 ldmfdeq sp!, {r4-r7} 1021 RETeq /* Return now if done */ 1022 subs r2, r2, #0x04 1023 sublt r1, r1, #0x01 1024 blt .Lmemcpy_bad_done 1025 1026.Lmemcpy_bad3_loop4: 1027 mov r4, ip, lsr #24 1028 ldr ip, [r1], #0x04 1029 subs r2, r2, #0x04 1030 orr r4, r4, ip, lsl #8 1031 str r4, [r3], #0x04 1032 bge .Lmemcpy_bad3_loop4 1033 sub r1, r1, #0x01 1034 1035.Lmemcpy_bad_done: 1036 ldmfd sp!, {r4-r7} 1037 adds r2, r2, #0x04 1038 RETeq 1039 ldrb ip, [r1], #0x01 1040 cmp r2, #0x02 1041 ldrbge r2, [r1], #0x01 1042 strb ip, [r3], #0x01 1043 ldrbgt ip, [r1] 1044 strbge r2, [r3], #0x01 1045 strbgt ip, [r3] 1046 RET 1047 1048 1049/* 1050 * Handle short copies (less than 16 bytes), possibly misaligned. 1051 * Some of these are *very* common, thanks to the network stack, 1052 * and so are handled specially. 1053 */ 1054.Lmemcpy_short: 1055 add pc, pc, r2, lsl #2 1056 nop 1057 RET /* 0x00 */ 1058 b .Lmemcpy_bytewise /* 0x01 */ 1059 b .Lmemcpy_bytewise /* 0x02 */ 1060 b .Lmemcpy_bytewise /* 0x03 */ 1061 b .Lmemcpy_4 /* 0x04 */ 1062 b .Lmemcpy_bytewise /* 0x05 */ 1063 b .Lmemcpy_6 /* 0x06 */ 1064 b .Lmemcpy_bytewise /* 0x07 */ 1065 b .Lmemcpy_8 /* 0x08 */ 1066 b .Lmemcpy_bytewise /* 0x09 */ 1067 b .Lmemcpy_bytewise /* 0x0a */ 1068 b .Lmemcpy_bytewise /* 0x0b */ 1069 b .Lmemcpy_c /* 0x0c */ 1070.Lmemcpy_bytewise: 1071 mov r3, r0 /* We must not clobber r0 */ 1072 ldrb ip, [r1], #0x01 10731: subs r2, r2, #0x01 1074 strb ip, [r3], #0x01 1075 ldrbne ip, [r1], #0x01 1076 bne 1b 1077 RET 1078 1079/****************************************************************************** 1080 * Special case for 4 byte copies 1081 */ 1082#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1083#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1084 LMEMCPY_4_PAD 1085.Lmemcpy_4: 1086 and r2, r1, #0x03 1087 orr r2, r2, r0, lsl #2 1088 ands r2, r2, #0x0f 1089 sub r3, pc, #0x14 1090 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1091 1092/* 1093 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1094 */ 1095 ldr r2, [r1] 1096 str r2, [r0] 1097 RET 1098 LMEMCPY_4_PAD 1099 1100/* 1101 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1102 */ 1103 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1104 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1105 mov r3, r3, lsr #8 /* r3 = .210 */ 1106 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1107 str r3, [r0] 1108 RET 1109 LMEMCPY_4_PAD 1110 1111/* 1112 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1113 */ 1114 ldrh r3, [r1, #0x02] 1115 ldrh r2, [r1] 1116 orr r3, r2, r3, lsl #16 1117 str r3, [r0] 1118 RET 1119 LMEMCPY_4_PAD 1120 1121/* 1122 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1123 */ 1124 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1125 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1126 mov r3, r3, lsr #24 /* r3 = ...0 */ 1127 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1128 str r3, [r0] 1129 RET 1130 LMEMCPY_4_PAD 1131 1132/* 1133 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1134 */ 1135 ldr r2, [r1] 1136 strb r2, [r0] 1137 mov r3, r2, lsr #8 1138 mov r1, r2, lsr #24 1139 strb r1, [r0, #0x03] 1140 strh r3, [r0, #0x01] 1141 RET 1142 LMEMCPY_4_PAD 1143 1144/* 1145 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1146 */ 1147 ldrb r2, [r1] 1148 ldrh r3, [r1, #0x01] 1149 ldrb r1, [r1, #0x03] 1150 strb r2, [r0] 1151 strh r3, [r0, #0x01] 1152 strb r1, [r0, #0x03] 1153 RET 1154 LMEMCPY_4_PAD 1155 1156/* 1157 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1158 */ 1159 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1160 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1161 strb r2, [r0] 1162 mov r2, r2, lsr #8 /* r2 = ...1 */ 1163 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1164 mov r3, r3, lsr #8 /* r3 = ...3 */ 1165 strh r2, [r0, #0x01] 1166 strb r3, [r0, #0x03] 1167 RET 1168 LMEMCPY_4_PAD 1169 1170/* 1171 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1172 */ 1173 ldrb r2, [r1] 1174 ldrh r3, [r1, #0x01] 1175 ldrb r1, [r1, #0x03] 1176 strb r2, [r0] 1177 strh r3, [r0, #0x01] 1178 strb r1, [r0, #0x03] 1179 RET 1180 LMEMCPY_4_PAD 1181 1182/* 1183 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1184 */ 1185 ldr r2, [r1] 1186 strh r2, [r0] 1187 mov r3, r2, lsr #16 1188 strh r3, [r0, #0x02] 1189 RET 1190 LMEMCPY_4_PAD 1191 1192/* 1193 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1194 */ 1195 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1196 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1197 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1198 strh r1, [r0] 1199 mov r2, r2, lsr #24 /* r2 = ...2 */ 1200 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1201 strh r2, [r0, #0x02] 1202 RET 1203 LMEMCPY_4_PAD 1204 1205/* 1206 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1207 */ 1208 ldrh r2, [r1] 1209 ldrh r3, [r1, #0x02] 1210 strh r2, [r0] 1211 strh r3, [r0, #0x02] 1212 RET 1213 LMEMCPY_4_PAD 1214 1215/* 1216 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1217 */ 1218 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1219 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1220 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1221 strh r1, [r0, #0x02] 1222 mov r3, r3, lsl #8 /* r3 = 321. */ 1223 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1224 strh r3, [r0] 1225 RET 1226 LMEMCPY_4_PAD 1227 1228/* 1229 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1230 */ 1231 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1232 strb r2, [r0] 1233 mov r3, r2, lsr #8 1234 mov r1, r2, lsr #24 1235 strh r3, [r0, #0x01] 1236 strb r1, [r0, #0x03] 1237 RET 1238 LMEMCPY_4_PAD 1239 1240/* 1241 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1242 */ 1243 ldrb r2, [r1] 1244 ldrh r3, [r1, #0x01] 1245 ldrb r1, [r1, #0x03] 1246 strb r2, [r0] 1247 strh r3, [r0, #0x01] 1248 strb r1, [r0, #0x03] 1249 RET 1250 LMEMCPY_4_PAD 1251 1252/* 1253 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1254 */ 1255 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1256 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1257 strb r2, [r0] 1258 mov r2, r2, lsr #8 /* r2 = ...1 */ 1259 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1260 strh r2, [r0, #0x01] 1261 mov r3, r3, lsr #8 /* r3 = ...3 */ 1262 strb r3, [r0, #0x03] 1263 RET 1264 LMEMCPY_4_PAD 1265 1266/* 1267 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1268 */ 1269 ldrb r2, [r1] 1270 ldrh r3, [r1, #0x01] 1271 ldrb r1, [r1, #0x03] 1272 strb r2, [r0] 1273 strh r3, [r0, #0x01] 1274 strb r1, [r0, #0x03] 1275 RET 1276 LMEMCPY_4_PAD 1277 1278 1279/****************************************************************************** 1280 * Special case for 6 byte copies 1281 */ 1282#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1283#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1284 LMEMCPY_6_PAD 1285.Lmemcpy_6: 1286 and r2, r1, #0x03 1287 orr r2, r2, r0, lsl #2 1288 ands r2, r2, #0x0f 1289 sub r3, pc, #0x14 1290 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1291 1292/* 1293 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1294 */ 1295 ldr r2, [r1] 1296 ldrh r3, [r1, #0x04] 1297 str r2, [r0] 1298 strh r3, [r0, #0x04] 1299 RET 1300 LMEMCPY_6_PAD 1301 1302/* 1303 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1304 */ 1305 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1306 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1307 mov r2, r2, lsr #8 /* r2 = .210 */ 1308 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1309 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1310 str r2, [r0] 1311 strh r3, [r0, #0x04] 1312 RET 1313 LMEMCPY_6_PAD 1314 1315/* 1316 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1317 */ 1318 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1319 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1320 mov r1, r3, lsr #16 /* r1 = ..54 */ 1321 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1322 str r2, [r0] 1323 strh r1, [r0, #0x04] 1324 RET 1325 LMEMCPY_6_PAD 1326 1327/* 1328 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1329 */ 1330 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1331 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1332 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1333 mov r2, r2, lsr #24 /* r2 = ...0 */ 1334 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1335 mov r1, r1, lsl #8 /* r1 = xx5. */ 1336 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1337 str r2, [r0] 1338 strh r1, [r0, #0x04] 1339 RET 1340 LMEMCPY_6_PAD 1341 1342/* 1343 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1344 */ 1345 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1346 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1347 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1348 strh r1, [r0, #0x01] 1349 strb r3, [r0] 1350 mov r3, r3, lsr #24 /* r3 = ...3 */ 1351 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1352 mov r2, r2, lsr #8 /* r2 = ...5 */ 1353 strh r3, [r0, #0x03] 1354 strb r2, [r0, #0x05] 1355 RET 1356 LMEMCPY_6_PAD 1357 1358/* 1359 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1360 */ 1361 ldrb r2, [r1] 1362 ldrh r3, [r1, #0x01] 1363 ldrh ip, [r1, #0x03] 1364 ldrb r1, [r1, #0x05] 1365 strb r2, [r0] 1366 strh r3, [r0, #0x01] 1367 strh ip, [r0, #0x03] 1368 strb r1, [r0, #0x05] 1369 RET 1370 LMEMCPY_6_PAD 1371 1372/* 1373 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1374 */ 1375 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1376 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1377 strb r2, [r0] 1378 mov r3, r1, lsr #24 1379 strb r3, [r0, #0x05] 1380 mov r3, r1, lsr #8 /* r3 = .543 */ 1381 strh r3, [r0, #0x03] 1382 mov r3, r2, lsr #8 /* r3 = ...1 */ 1383 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 1384 strh r3, [r0, #0x01] 1385 RET 1386 LMEMCPY_6_PAD 1387 1388/* 1389 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1390 */ 1391 ldrb r2, [r1] 1392 ldrh r3, [r1, #0x01] 1393 ldrh ip, [r1, #0x03] 1394 ldrb r1, [r1, #0x05] 1395 strb r2, [r0] 1396 strh r3, [r0, #0x01] 1397 strh ip, [r0, #0x03] 1398 strb r1, [r0, #0x05] 1399 RET 1400 LMEMCPY_6_PAD 1401 1402/* 1403 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1404 */ 1405 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 1406 ldr r3, [r1] /* r3 = 3210 */ 1407 mov r2, r2, lsl #16 /* r2 = 54.. */ 1408 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 1409 strh r3, [r0] 1410 str r2, [r0, #0x02] 1411 RET 1412 LMEMCPY_6_PAD 1413 1414/* 1415 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1416 */ 1417 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1418 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1419 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1420 mov r2, r2, lsl #8 /* r2 = 543. */ 1421 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1422 strh r1, [r0] 1423 str r2, [r0, #0x02] 1424 RET 1425 LMEMCPY_6_PAD 1426 1427/* 1428 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1429 */ 1430 ldrh r2, [r1] 1431 ldr r3, [r1, #0x02] 1432 strh r2, [r0] 1433 str r3, [r0, #0x02] 1434 RET 1435 LMEMCPY_6_PAD 1436 1437/* 1438 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1439 */ 1440 ldrb r3, [r1] /* r3 = ...0 */ 1441 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1442 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 1443 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1444 mov r1, r1, lsl #24 /* r1 = 5... */ 1445 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 1446 strh r3, [r0] 1447 str r1, [r0, #0x02] 1448 RET 1449 LMEMCPY_6_PAD 1450 1451/* 1452 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1453 */ 1454 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1455 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 1456 strb r2, [r0] 1457 mov r2, r2, lsr #8 /* r2 = .321 */ 1458 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 1459 mov r1, r1, lsr #8 /* r1 = ...5 */ 1460 str r2, [r0, #0x01] 1461 strb r1, [r0, #0x05] 1462 RET 1463 LMEMCPY_6_PAD 1464 1465/* 1466 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1467 */ 1468 ldrb r2, [r1] 1469 ldrh r3, [r1, #0x01] 1470 ldrh ip, [r1, #0x03] 1471 ldrb r1, [r1, #0x05] 1472 strb r2, [r0] 1473 strh r3, [r0, #0x01] 1474 strh ip, [r0, #0x03] 1475 strb r1, [r0, #0x05] 1476 RET 1477 LMEMCPY_6_PAD 1478 1479/* 1480 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1481 */ 1482 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1483 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1484 strb r2, [r0] 1485 mov r2, r2, lsr #8 /* r2 = ...1 */ 1486 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 1487 mov r1, r1, lsr #24 /* r1 = ...5 */ 1488 str r2, [r0, #0x01] 1489 strb r1, [r0, #0x05] 1490 RET 1491 LMEMCPY_6_PAD 1492 1493/* 1494 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1495 */ 1496 ldrb r2, [r1] 1497 ldr r3, [r1, #0x01] 1498 ldrb r1, [r1, #0x05] 1499 strb r2, [r0] 1500 str r3, [r0, #0x01] 1501 strb r1, [r0, #0x05] 1502 RET 1503 LMEMCPY_6_PAD 1504 1505 1506/****************************************************************************** 1507 * Special case for 8 byte copies 1508 */ 1509#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1510#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1511 LMEMCPY_8_PAD 1512.Lmemcpy_8: 1513 and r2, r1, #0x03 1514 orr r2, r2, r0, lsl #2 1515 ands r2, r2, #0x0f 1516 sub r3, pc, #0x14 1517 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1518 1519/* 1520 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1521 */ 1522 ldr r2, [r1] 1523 ldr r3, [r1, #0x04] 1524 str r2, [r0] 1525 str r3, [r0, #0x04] 1526 RET 1527 LMEMCPY_8_PAD 1528 1529/* 1530 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1531 */ 1532 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1533 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1534 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1535 mov r3, r3, lsr #8 /* r3 = .210 */ 1536 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1537 mov r1, r1, lsl #24 /* r1 = 7... */ 1538 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1539 str r3, [r0] 1540 str r2, [r0, #0x04] 1541 RET 1542 LMEMCPY_8_PAD 1543 1544/* 1545 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1546 */ 1547 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1548 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1549 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1550 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1551 mov r3, r3, lsr #16 /* r3 = ..54 */ 1552 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1553 str r2, [r0] 1554 str r3, [r0, #0x04] 1555 RET 1556 LMEMCPY_8_PAD 1557 1558/* 1559 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1560 */ 1561 ldrb r3, [r1] /* r3 = ...0 */ 1562 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1563 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1564 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1565 mov r2, r2, lsr #24 /* r2 = ...4 */ 1566 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1567 str r3, [r0] 1568 str r2, [r0, #0x04] 1569 RET 1570 LMEMCPY_8_PAD 1571 1572/* 1573 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1574 */ 1575 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1576 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1577 strb r3, [r0] 1578 mov r1, r2, lsr #24 /* r1 = ...7 */ 1579 strb r1, [r0, #0x07] 1580 mov r1, r3, lsr #8 /* r1 = .321 */ 1581 mov r3, r3, lsr #24 /* r3 = ...3 */ 1582 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1583 strh r1, [r0, #0x01] 1584 str r3, [r0, #0x03] 1585 RET 1586 LMEMCPY_8_PAD 1587 1588/* 1589 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1590 */ 1591 ldrb r2, [r1] 1592 ldrh r3, [r1, #0x01] 1593 ldr ip, [r1, #0x03] 1594 ldrb r1, [r1, #0x07] 1595 strb r2, [r0] 1596 strh r3, [r0, #0x01] 1597 str ip, [r0, #0x03] 1598 strb r1, [r0, #0x07] 1599 RET 1600 LMEMCPY_8_PAD 1601 1602/* 1603 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1604 */ 1605 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1606 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1607 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1608 strb r2, [r0] /* 0 */ 1609 mov ip, r1, lsr #8 /* ip = ...7 */ 1610 strb ip, [r0, #0x07] /* 7 */ 1611 mov ip, r2, lsr #8 /* ip = ...1 */ 1612 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1613 mov r3, r3, lsr #8 /* r3 = .543 */ 1614 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1615 strh ip, [r0, #0x01] 1616 str r3, [r0, #0x03] 1617 RET 1618 LMEMCPY_8_PAD 1619 1620/* 1621 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1622 */ 1623 ldrb r3, [r1] /* r3 = ...0 */ 1624 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1625 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1626 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1627 strb r3, [r0] 1628 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1629 strh ip, [r0, #0x01] 1630 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1631 str r2, [r0, #0x03] 1632 strb r1, [r0, #0x07] 1633 RET 1634 LMEMCPY_8_PAD 1635 1636/* 1637 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1638 */ 1639 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1640 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1641 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1642 strh r2, [r0] 1643 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1644 mov r3, r3, lsr #16 /* r3 = ..76 */ 1645 str r2, [r0, #0x02] 1646 strh r3, [r0, #0x06] 1647 RET 1648 LMEMCPY_8_PAD 1649 1650/* 1651 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1652 */ 1653 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1654 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1655 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1656 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1657 strh r1, [r0] 1658 mov r1, r2, lsr #24 /* r1 = ...2 */ 1659 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1660 mov r3, r3, lsr #24 /* r3 = ...6 */ 1661 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1662 str r1, [r0, #0x02] 1663 strh r3, [r0, #0x06] 1664 RET 1665 LMEMCPY_8_PAD 1666 1667/* 1668 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1669 */ 1670 ldrh r2, [r1] 1671 ldr ip, [r1, #0x02] 1672 ldrh r3, [r1, #0x06] 1673 strh r2, [r0] 1674 str ip, [r0, #0x02] 1675 strh r3, [r0, #0x06] 1676 RET 1677 LMEMCPY_8_PAD 1678 1679/* 1680 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1681 */ 1682 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1683 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1684 ldrb ip, [r1] /* ip = ...0 */ 1685 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1686 strh r1, [r0, #0x06] 1687 mov r3, r3, lsl #24 /* r3 = 5... */ 1688 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1689 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1690 str r3, [r0, #0x02] 1691 strh r2, [r0] 1692 RET 1693 LMEMCPY_8_PAD 1694 1695/* 1696 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1697 */ 1698 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1699 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1700 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1701 strh r1, [r0, #0x05] 1702 strb r2, [r0] 1703 mov r1, r3, lsr #24 /* r1 = ...7 */ 1704 strb r1, [r0, #0x07] 1705 mov r2, r2, lsr #8 /* r2 = .321 */ 1706 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1707 str r2, [r0, #0x01] 1708 RET 1709 LMEMCPY_8_PAD 1710 1711/* 1712 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1713 */ 1714 ldrb r3, [r1] /* r3 = ...0 */ 1715 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1716 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1717 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1718 strb r3, [r0] 1719 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1720 strh r3, [r0, #0x05] 1721 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1722 str r2, [r0, #0x01] 1723 strb r1, [r0, #0x07] 1724 RET 1725 LMEMCPY_8_PAD 1726 1727/* 1728 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1729 */ 1730 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1731 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1732 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1733 strb r2, [r0] 1734 mov ip, r2, lsr #8 /* ip = ...1 */ 1735 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1736 mov r2, r1, lsr #8 /* r2 = ...7 */ 1737 strb r2, [r0, #0x07] 1738 mov r1, r1, lsl #8 /* r1 = .76. */ 1739 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1740 str ip, [r0, #0x01] 1741 strh r1, [r0, #0x05] 1742 RET 1743 LMEMCPY_8_PAD 1744 1745/* 1746 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1747 */ 1748 ldrb r2, [r1] 1749 ldr ip, [r1, #0x01] 1750 ldrh r3, [r1, #0x05] 1751 ldrb r1, [r1, #0x07] 1752 strb r2, [r0] 1753 str ip, [r0, #0x01] 1754 strh r3, [r0, #0x05] 1755 strb r1, [r0, #0x07] 1756 RET 1757 LMEMCPY_8_PAD 1758 1759/****************************************************************************** 1760 * Special case for 12 byte copies 1761 */ 1762#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1763#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1764 LMEMCPY_C_PAD 1765.Lmemcpy_c: 1766 and r2, r1, #0x03 1767 orr r2, r2, r0, lsl #2 1768 ands r2, r2, #0x0f 1769 sub r3, pc, #0x14 1770 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1771 1772/* 1773 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1774 */ 1775 ldr r2, [r1] 1776 ldr r3, [r1, #0x04] 1777 ldr r1, [r1, #0x08] 1778 str r2, [r0] 1779 str r3, [r0, #0x04] 1780 str r1, [r0, #0x08] 1781 RET 1782 LMEMCPY_C_PAD 1783 1784/* 1785 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1786 */ 1787 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1788 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1789 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1790 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1791 mov r2, r2, lsl #24 /* r2 = B... */ 1792 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1793 str r2, [r0, #0x08] 1794 mov r2, ip, lsl #24 /* r2 = 7... */ 1795 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1796 mov r1, r1, lsr #8 /* r1 = .210 */ 1797 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1798 str r2, [r0, #0x04] 1799 str r1, [r0] 1800 RET 1801 LMEMCPY_C_PAD 1802 1803/* 1804 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1805 */ 1806 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1807 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1808 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1809 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1810 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1811 str r2, [r0] 1812 mov r3, r3, lsr #16 /* r3 = ..54 */ 1813 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1814 mov r1, r1, lsl #16 /* r1 = BA.. */ 1815 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1816 str r3, [r0, #0x04] 1817 str r1, [r0, #0x08] 1818 RET 1819 LMEMCPY_C_PAD 1820 1821/* 1822 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1823 */ 1824 ldrb r2, [r1] /* r2 = ...0 */ 1825 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1826 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1827 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1828 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1829 str r2, [r0] 1830 mov r3, r3, lsr #24 /* r3 = ...4 */ 1831 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1832 mov r1, r1, lsl #8 /* r1 = BA9. */ 1833 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1834 str r3, [r0, #0x04] 1835 str r1, [r0, #0x08] 1836 RET 1837 LMEMCPY_C_PAD 1838 1839/* 1840 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1841 */ 1842 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1843 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1844 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1845 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1846 strh r1, [r0, #0x01] 1847 strb r2, [r0] 1848 mov r1, r2, lsr #24 /* r1 = ...3 */ 1849 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1850 mov r1, r3, lsr #24 /* r1 = ...7 */ 1851 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1852 mov ip, ip, lsr #24 /* ip = ...B */ 1853 str r2, [r0, #0x03] 1854 str r1, [r0, #0x07] 1855 strb ip, [r0, #0x0b] 1856 RET 1857 LMEMCPY_C_PAD 1858 1859/* 1860 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1861 */ 1862 ldrb r2, [r1] 1863 ldrh r3, [r1, #0x01] 1864 ldr ip, [r1, #0x03] 1865 strb r2, [r0] 1866 ldr r2, [r1, #0x07] 1867 ldrb r1, [r1, #0x0b] 1868 strh r3, [r0, #0x01] 1869 str ip, [r0, #0x03] 1870 str r2, [r0, #0x07] 1871 strb r1, [r0, #0x0b] 1872 RET 1873 LMEMCPY_C_PAD 1874 1875/* 1876 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1877 */ 1878 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1879 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1880 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1881 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1882 strb r2, [r0] 1883 mov r2, r2, lsr #8 /* r2 = ...1 */ 1884 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1885 strh r2, [r0, #0x01] 1886 mov r2, r3, lsr #8 /* r2 = .543 */ 1887 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1888 mov r2, ip, lsr #8 /* r2 = .987 */ 1889 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1890 mov r1, r1, lsr #8 /* r1 = ...B */ 1891 str r3, [r0, #0x03] 1892 str r2, [r0, #0x07] 1893 strb r1, [r0, #0x0b] 1894 RET 1895 LMEMCPY_C_PAD 1896 1897/* 1898 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1899 */ 1900 ldrb r2, [r1] 1901 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1902 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1903 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1904 strb r2, [r0] 1905 strh r3, [r0, #0x01] 1906 mov r3, r3, lsr #16 /* r3 = ..43 */ 1907 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1908 mov ip, ip, lsr #16 /* ip = ..87 */ 1909 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1910 mov r1, r1, lsr #16 /* r1 = ..xB */ 1911 str r3, [r0, #0x03] 1912 str ip, [r0, #0x07] 1913 strb r1, [r0, #0x0b] 1914 RET 1915 LMEMCPY_C_PAD 1916 1917/* 1918 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1919 */ 1920 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1921 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1922 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1923 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1924 strh ip, [r0] 1925 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1926 mov r3, r3, lsr #16 /* r3 = ..76 */ 1927 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1928 mov r2, r2, lsr #16 /* r2 = ..BA */ 1929 str r1, [r0, #0x02] 1930 str r3, [r0, #0x06] 1931 strh r2, [r0, #0x0a] 1932 RET 1933 LMEMCPY_C_PAD 1934 1935/* 1936 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1937 */ 1938 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1939 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1940 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1941 strh ip, [r0] 1942 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1943 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1944 mov r2, r2, lsr #24 /* r2 = ...2 */ 1945 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1946 mov r3, r3, lsr #24 /* r3 = ...6 */ 1947 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1948 mov r1, r1, lsl #8 /* r1 = ..B. */ 1949 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1950 str r2, [r0, #0x02] 1951 str r3, [r0, #0x06] 1952 strh r1, [r0, #0x0a] 1953 RET 1954 LMEMCPY_C_PAD 1955 1956/* 1957 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1958 */ 1959 ldrh r2, [r1] 1960 ldr r3, [r1, #0x02] 1961 ldr ip, [r1, #0x06] 1962 ldrh r1, [r1, #0x0a] 1963 strh r2, [r0] 1964 str r3, [r0, #0x02] 1965 str ip, [r0, #0x06] 1966 strh r1, [r0, #0x0a] 1967 RET 1968 LMEMCPY_C_PAD 1969 1970/* 1971 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1972 */ 1973 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1974 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1975 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1976 strh ip, [r0, #0x0a] 1977 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1978 ldrb r1, [r1] /* r1 = ...0 */ 1979 mov r2, r2, lsl #24 /* r2 = 9... */ 1980 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1981 mov r3, r3, lsl #24 /* r3 = 5... */ 1982 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1983 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1984 str r2, [r0, #0x06] 1985 str r3, [r0, #0x02] 1986 strh r1, [r0] 1987 RET 1988 LMEMCPY_C_PAD 1989 1990/* 1991 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1992 */ 1993 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1994 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1995 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1996 strb r2, [r0] 1997 mov r3, r2, lsr #8 /* r3 = .321 */ 1998 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1999 str r3, [r0, #0x01] 2000 mov r3, ip, lsr #8 /* r3 = .765 */ 2001 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2002 str r3, [r0, #0x05] 2003 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2004 strh r1, [r0, #0x09] 2005 mov r1, r1, lsr #16 /* r1 = ...B */ 2006 strb r1, [r0, #0x0b] 2007 RET 2008 LMEMCPY_C_PAD 2009 2010/* 2011 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2012 */ 2013 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2014 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2015 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2016 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2017 strb r2, [r0, #0x0b] 2018 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2019 strh r2, [r0, #0x09] 2020 mov r3, r3, lsl #16 /* r3 = 87.. */ 2021 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2022 mov ip, ip, lsl #16 /* ip = 43.. */ 2023 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2024 mov r1, r1, lsr #8 /* r1 = .210 */ 2025 str r3, [r0, #0x05] 2026 str ip, [r0, #0x01] 2027 strb r1, [r0] 2028 RET 2029 LMEMCPY_C_PAD 2030 2031/* 2032 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2033 */ 2034 ldrh r2, [r1] /* r2 = ..10 */ 2035 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2036 ldr ip, [r1, #0x06] /* ip = 9876 */ 2037 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2038 strb r2, [r0] 2039 mov r2, r2, lsr #8 /* r2 = ...1 */ 2040 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2041 mov r3, r3, lsr #24 /* r3 = ...5 */ 2042 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2043 mov ip, ip, lsr #24 /* ip = ...9 */ 2044 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2045 mov r1, r1, lsr #8 /* r1 = ...B */ 2046 str r2, [r0, #0x01] 2047 str r3, [r0, #0x05] 2048 strh ip, [r0, #0x09] 2049 strb r1, [r0, #0x0b] 2050 RET 2051 LMEMCPY_C_PAD 2052 2053/* 2054 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2055 */ 2056 ldrb r2, [r1] 2057 ldr r3, [r1, #0x01] 2058 ldr ip, [r1, #0x05] 2059 strb r2, [r0] 2060 ldrh r2, [r1, #0x09] 2061 ldrb r1, [r1, #0x0b] 2062 str r3, [r0, #0x01] 2063 str ip, [r0, #0x05] 2064 strh r2, [r0, #0x09] 2065 strb r1, [r0, #0x0b] 2066 RET 2067END(memcpy) 2068