1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90__FBSDID("$FreeBSD$"); 91 92#include "assym.inc" 93 94 .syntax unified 95 96/* 97 * memset: Sets a block of memory to the specified value 98 * 99 * On entry: 100 * r0 - dest address 101 * r1 - byte to write 102 * r2 - number of bytes to write 103 * 104 * On exit: 105 * r0 - dest address 106 */ 107/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 108ENTRY(memset) 109 and r3, r1, #0xff /* We deal with bytes */ 110 mov r1, r2 111do_memset: 112 cmp r1, #0x04 /* Do we have less than 4 bytes */ 113 mov ip, r0 114 blt .Lmemset_lessthanfour 115 116 /* Ok first we will word align the address */ 117 ands r2, ip, #0x03 /* Get the bottom two bits */ 118 bne .Lmemset_wordunaligned /* The address is not word aligned */ 119 120 /* We are now word aligned */ 121.Lmemset_wordaligned: 122 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 123 tst ip, #0x04 /* Quad-align for armv5e */ 124 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 125 subne r1, r1, #0x04 /* Quad-align if necessary */ 126 strne r3, [ip], #0x04 127 cmp r1, #0x10 128 blt .Lmemset_loop4 /* If less than 16 then use words */ 129 mov r2, r3 /* Duplicate data */ 130 cmp r1, #0x80 /* If < 128 then skip the big loop */ 131 blt .Lmemset_loop32 132 133 /* Do 128 bytes at a time */ 134.Lmemset_loop128: 135 subs r1, r1, #0x80 136 strdge r2, [ip], #0x08 137 strdge r2, [ip], #0x08 138 strdge r2, [ip], #0x08 139 strdge r2, [ip], #0x08 140 strdge r2, [ip], #0x08 141 strdge r2, [ip], #0x08 142 strdge r2, [ip], #0x08 143 strdge r2, [ip], #0x08 144 strdge r2, [ip], #0x08 145 strdge r2, [ip], #0x08 146 strdge r2, [ip], #0x08 147 strdge r2, [ip], #0x08 148 strdge r2, [ip], #0x08 149 strdge r2, [ip], #0x08 150 strdge r2, [ip], #0x08 151 strdge r2, [ip], #0x08 152 bgt .Lmemset_loop128 153 RETeq /* Zero length so just exit */ 154 155 add r1, r1, #0x80 /* Adjust for extra sub */ 156 157 /* Do 32 bytes at a time */ 158.Lmemset_loop32: 159 subs r1, r1, #0x20 160 strdge r2, [ip], #0x08 161 strdge r2, [ip], #0x08 162 strdge r2, [ip], #0x08 163 strdge r2, [ip], #0x08 164 bgt .Lmemset_loop32 165 RETeq /* Zero length so just exit */ 166 167 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 168 169 /* Deal with 16 bytes or more */ 170 strdge r2, [ip], #0x08 171 strdge r2, [ip], #0x08 172 RETeq /* Zero length so just exit */ 173 174 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 175 176 /* We have at least 4 bytes so copy as words */ 177.Lmemset_loop4: 178 subs r1, r1, #0x04 179 strge r3, [ip], #0x04 180 bgt .Lmemset_loop4 181 RETeq /* Zero length so just exit */ 182 183 /* Compensate for 64-bit alignment check */ 184 adds r1, r1, #0x04 185 RETeq 186 cmp r1, #2 187 188 strb r3, [ip], #0x01 /* Set 1 byte */ 189 strbge r3, [ip], #0x01 /* Set another byte */ 190 strbgt r3, [ip] /* and a third */ 191 RET /* Exit */ 192 193.Lmemset_wordunaligned: 194 rsb r2, r2, #0x004 195 strb r3, [ip], #0x01 /* Set 1 byte */ 196 cmp r2, #0x02 197 strbge r3, [ip], #0x01 /* Set another byte */ 198 sub r1, r1, r2 199 strbgt r3, [ip], #0x01 /* and a third */ 200 cmp r1, #0x04 /* More than 4 bytes left? */ 201 bge .Lmemset_wordaligned /* Yup */ 202 203.Lmemset_lessthanfour: 204 cmp r1, #0x00 205 RETeq /* Zero length so exit */ 206 strb r3, [ip], #0x01 /* Set 1 byte */ 207 cmp r1, #0x02 208 strbge r3, [ip], #0x01 /* Set another byte */ 209 strbgt r3, [ip] /* and a third */ 210 RET /* Exit */ 211END(memset) 212 213ENTRY(memcmp) 214 mov ip, r0 215 cmp r2, #0x06 216 beq .Lmemcmp_6bytes 217 mov r0, #0x00 218 219 /* Are both addresses aligned the same way? */ 220 cmp r2, #0x00 221 eorsne r3, ip, r1 222 RETeq /* len == 0, or same addresses! */ 223 tst r3, #0x03 224 subne r2, r2, #0x01 225 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 226 227 /* Word-align the addresses, if necessary */ 228 sub r3, r1, #0x05 229 ands r3, r3, #0x03 230 add r3, r3, r3, lsl #1 231 addne pc, pc, r3, lsl #3 232 nop 233 234 /* Compare up to 3 bytes */ 235 ldrb r0, [ip], #0x01 236 ldrb r3, [r1], #0x01 237 subs r0, r0, r3 238 RETne 239 subs r2, r2, #0x01 240 RETeq 241 242 /* Compare up to 2 bytes */ 243 ldrb r0, [ip], #0x01 244 ldrb r3, [r1], #0x01 245 subs r0, r0, r3 246 RETne 247 subs r2, r2, #0x01 248 RETeq 249 250 /* Compare 1 byte */ 251 ldrb r0, [ip], #0x01 252 ldrb r3, [r1], #0x01 253 subs r0, r0, r3 254 RETne 255 subs r2, r2, #0x01 256 RETeq 257 258 /* Compare 4 bytes at a time, if possible */ 259 subs r2, r2, #0x04 260 bcc .Lmemcmp_bytewise 261.Lmemcmp_word_aligned: 262 ldr r0, [ip], #0x04 263 ldr r3, [r1], #0x04 264 subs r2, r2, #0x04 265 cmpcs r0, r3 266 beq .Lmemcmp_word_aligned 267 sub r0, r0, r3 268 269 /* Correct for extra subtraction, and check if done */ 270 adds r2, r2, #0x04 271 cmpeq r0, #0x00 /* If done, did all bytes match? */ 272 RETeq /* Yup. Just return */ 273 274 /* Re-do the final word byte-wise */ 275 sub ip, ip, #0x04 276 sub r1, r1, #0x04 277 278.Lmemcmp_bytewise: 279 add r2, r2, #0x03 280.Lmemcmp_bytewise2: 281 ldrb r0, [ip], #0x01 282 ldrb r3, [r1], #0x01 283 subs r2, r2, #0x01 284 cmpcs r0, r3 285 beq .Lmemcmp_bytewise2 286 sub r0, r0, r3 287 RET 288 289 /* 290 * 6 byte compares are very common, thanks to the network stack. 291 * This code is hand-scheduled to reduce the number of stalls for 292 * load results. Everything else being equal, this will be ~32% 293 * faster than a byte-wise memcmp. 294 */ 295 .align 5 296.Lmemcmp_6bytes: 297 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 298 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 299 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 300 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 301 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ 302 RETne /* Return if mismatch on #0 */ 303 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 304 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ 305 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ 306 RETne /* Return if mismatch on #1 */ 307 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 308 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 309 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ 310 RETne /* Return if mismatch on #2 */ 311 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 312 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ 313 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ 314 RETne /* Return if mismatch on #3 */ 315 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 316 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 317 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ 318 RETne /* Return if mismatch on #4 */ 319 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 320 RET 321END(memcmp) 322 323ENTRY(memmove) 324 /* Do the buffers overlap? */ 325 cmp r0, r1 326 RETeq /* Bail now if src/dst are the same */ 327 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 328 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 329 cmp r3, r2 /* if (r3 < len) we have an overlap */ 330 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 331 332 /* Determine copy direction */ 333 cmp r1, r0 334 bcc .Lmemmove_backwards 335 336 moveq r0, #0 /* Quick abort for len=0 */ 337 RETeq 338 339 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 340 subs r2, r2, #4 341 blt .Lmemmove_fl4 /* less than 4 bytes */ 342 ands r12, r0, #3 343 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 344 ands r12, r1, #3 345 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 346 347.Lmemmove_ft8: 348 /* We have aligned source and destination */ 349 subs r2, r2, #8 350 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 351 subs r2, r2, #0x14 352 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 353 stmdb sp!, {r4} /* borrow r4 */ 354 355 /* blat 32 bytes at a time */ 356 /* XXX for really big copies perhaps we should use more registers */ 357.Lmemmove_floop32: 358 ldmia r1!, {r3, r4, r12, lr} 359 stmia r0!, {r3, r4, r12, lr} 360 ldmia r1!, {r3, r4, r12, lr} 361 stmia r0!, {r3, r4, r12, lr} 362 subs r2, r2, #0x20 363 bge .Lmemmove_floop32 364 365 cmn r2, #0x10 366 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 367 stmiage r0!, {r3, r4, r12, lr} 368 subge r2, r2, #0x10 369 ldmia sp!, {r4} /* return r4 */ 370 371.Lmemmove_fl32: 372 adds r2, r2, #0x14 373 374 /* blat 12 bytes at a time */ 375.Lmemmove_floop12: 376 ldmiage r1!, {r3, r12, lr} 377 stmiage r0!, {r3, r12, lr} 378 subsge r2, r2, #0x0c 379 bge .Lmemmove_floop12 380 381.Lmemmove_fl12: 382 adds r2, r2, #8 383 blt .Lmemmove_fl4 384 385 subs r2, r2, #4 386 ldrlt r3, [r1], #4 387 strlt r3, [r0], #4 388 ldmiage r1!, {r3, r12} 389 stmiage r0!, {r3, r12} 390 subge r2, r2, #4 391 392.Lmemmove_fl4: 393 /* less than 4 bytes to go */ 394 adds r2, r2, #4 395 ldmiaeq sp!, {r0, pc} /* done */ 396 397 /* copy the crud byte at a time */ 398 cmp r2, #2 399 ldrb r3, [r1], #1 400 strb r3, [r0], #1 401 ldrbge r3, [r1], #1 402 strbge r3, [r0], #1 403 ldrbgt r3, [r1], #1 404 strbgt r3, [r0], #1 405 ldmia sp!, {r0, pc} 406 407 /* erg - unaligned destination */ 408.Lmemmove_fdestul: 409 rsb r12, r12, #4 410 cmp r12, #2 411 412 /* align destination with byte copies */ 413 ldrb r3, [r1], #1 414 strb r3, [r0], #1 415 ldrbge r3, [r1], #1 416 strbge r3, [r0], #1 417 ldrbgt r3, [r1], #1 418 strbgt r3, [r0], #1 419 subs r2, r2, r12 420 blt .Lmemmove_fl4 /* less the 4 bytes */ 421 422 ands r12, r1, #3 423 beq .Lmemmove_ft8 /* we have an aligned source */ 424 425 /* erg - unaligned source */ 426 /* This is where it gets nasty ... */ 427.Lmemmove_fsrcul: 428 bic r1, r1, #3 429 ldr lr, [r1], #4 430 cmp r12, #2 431 bgt .Lmemmove_fsrcul3 432 beq .Lmemmove_fsrcul2 433 cmp r2, #0x0c 434 blt .Lmemmove_fsrcul1loop4 435 sub r2, r2, #0x0c 436 stmdb sp!, {r4, r5} 437 438.Lmemmove_fsrcul1loop16: 439 mov r3, lr, lsr #8 440 ldmia r1!, {r4, r5, r12, lr} 441 orr r3, r3, r4, lsl #24 442 mov r4, r4, lsr #8 443 orr r4, r4, r5, lsl #24 444 mov r5, r5, lsr #8 445 orr r5, r5, r12, lsl #24 446 mov r12, r12, lsr #8 447 orr r12, r12, lr, lsl #24 448 stmia r0!, {r3-r5, r12} 449 subs r2, r2, #0x10 450 bge .Lmemmove_fsrcul1loop16 451 ldmia sp!, {r4, r5} 452 adds r2, r2, #0x0c 453 blt .Lmemmove_fsrcul1l4 454 455.Lmemmove_fsrcul1loop4: 456 mov r12, lr, lsr #8 457 ldr lr, [r1], #4 458 orr r12, r12, lr, lsl #24 459 str r12, [r0], #4 460 subs r2, r2, #4 461 bge .Lmemmove_fsrcul1loop4 462 463.Lmemmove_fsrcul1l4: 464 sub r1, r1, #3 465 b .Lmemmove_fl4 466 467.Lmemmove_fsrcul2: 468 cmp r2, #0x0c 469 blt .Lmemmove_fsrcul2loop4 470 sub r2, r2, #0x0c 471 stmdb sp!, {r4, r5} 472 473.Lmemmove_fsrcul2loop16: 474 mov r3, lr, lsr #16 475 ldmia r1!, {r4, r5, r12, lr} 476 orr r3, r3, r4, lsl #16 477 mov r4, r4, lsr #16 478 orr r4, r4, r5, lsl #16 479 mov r5, r5, lsr #16 480 orr r5, r5, r12, lsl #16 481 mov r12, r12, lsr #16 482 orr r12, r12, lr, lsl #16 483 stmia r0!, {r3-r5, r12} 484 subs r2, r2, #0x10 485 bge .Lmemmove_fsrcul2loop16 486 ldmia sp!, {r4, r5} 487 adds r2, r2, #0x0c 488 blt .Lmemmove_fsrcul2l4 489 490.Lmemmove_fsrcul2loop4: 491 mov r12, lr, lsr #16 492 ldr lr, [r1], #4 493 orr r12, r12, lr, lsl #16 494 str r12, [r0], #4 495 subs r2, r2, #4 496 bge .Lmemmove_fsrcul2loop4 497 498.Lmemmove_fsrcul2l4: 499 sub r1, r1, #2 500 b .Lmemmove_fl4 501 502.Lmemmove_fsrcul3: 503 cmp r2, #0x0c 504 blt .Lmemmove_fsrcul3loop4 505 sub r2, r2, #0x0c 506 stmdb sp!, {r4, r5} 507 508.Lmemmove_fsrcul3loop16: 509 mov r3, lr, lsr #24 510 ldmia r1!, {r4, r5, r12, lr} 511 orr r3, r3, r4, lsl #8 512 mov r4, r4, lsr #24 513 orr r4, r4, r5, lsl #8 514 mov r5, r5, lsr #24 515 orr r5, r5, r12, lsl #8 516 mov r12, r12, lsr #24 517 orr r12, r12, lr, lsl #8 518 stmia r0!, {r3-r5, r12} 519 subs r2, r2, #0x10 520 bge .Lmemmove_fsrcul3loop16 521 ldmia sp!, {r4, r5} 522 adds r2, r2, #0x0c 523 blt .Lmemmove_fsrcul3l4 524 525.Lmemmove_fsrcul3loop4: 526 mov r12, lr, lsr #24 527 ldr lr, [r1], #4 528 orr r12, r12, lr, lsl #8 529 str r12, [r0], #4 530 subs r2, r2, #4 531 bge .Lmemmove_fsrcul3loop4 532 533.Lmemmove_fsrcul3l4: 534 sub r1, r1, #1 535 b .Lmemmove_fl4 536 537.Lmemmove_backwards: 538 add r1, r1, r2 539 add r0, r0, r2 540 subs r2, r2, #4 541 blt .Lmemmove_bl4 /* less than 4 bytes */ 542 ands r12, r0, #3 543 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 544 ands r12, r1, #3 545 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 546 547.Lmemmove_bt8: 548 /* We have aligned source and destination */ 549 subs r2, r2, #8 550 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 551 stmdb sp!, {r4, lr} 552 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 553 blt .Lmemmove_bl32 554 555 /* blat 32 bytes at a time */ 556 /* XXX for really big copies perhaps we should use more registers */ 557.Lmemmove_bloop32: 558 ldmdb r1!, {r3, r4, r12, lr} 559 stmdb r0!, {r3, r4, r12, lr} 560 ldmdb r1!, {r3, r4, r12, lr} 561 stmdb r0!, {r3, r4, r12, lr} 562 subs r2, r2, #0x20 563 bge .Lmemmove_bloop32 564 565.Lmemmove_bl32: 566 cmn r2, #0x10 567 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 568 stmdbge r0!, {r3, r4, r12, lr} 569 subge r2, r2, #0x10 570 adds r2, r2, #0x14 571 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 572 stmdbge r0!, {r3, r12, lr} 573 subge r2, r2, #0x0c 574 ldmia sp!, {r4, lr} 575 576.Lmemmove_bl12: 577 adds r2, r2, #8 578 blt .Lmemmove_bl4 579 subs r2, r2, #4 580 ldrlt r3, [r1, #-4]! 581 strlt r3, [r0, #-4]! 582 ldmdbge r1!, {r3, r12} 583 stmdbge r0!, {r3, r12} 584 subge r2, r2, #4 585 586.Lmemmove_bl4: 587 /* less than 4 bytes to go */ 588 adds r2, r2, #4 589 RETeq /* done */ 590 591 /* copy the crud byte at a time */ 592 cmp r2, #2 593 ldrb r3, [r1, #-1]! 594 strb r3, [r0, #-1]! 595 ldrbge r3, [r1, #-1]! 596 strbge r3, [r0, #-1]! 597 ldrbgt r3, [r1, #-1]! 598 strbgt r3, [r0, #-1]! 599 RET 600 601 /* erg - unaligned destination */ 602.Lmemmove_bdestul: 603 cmp r12, #2 604 605 /* align destination with byte copies */ 606 ldrb r3, [r1, #-1]! 607 strb r3, [r0, #-1]! 608 ldrbge r3, [r1, #-1]! 609 strbge r3, [r0, #-1]! 610 ldrbgt r3, [r1, #-1]! 611 strbgt r3, [r0, #-1]! 612 subs r2, r2, r12 613 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 614 ands r12, r1, #3 615 beq .Lmemmove_bt8 /* we have an aligned source */ 616 617 /* erg - unaligned source */ 618 /* This is where it gets nasty ... */ 619.Lmemmove_bsrcul: 620 bic r1, r1, #3 621 ldr r3, [r1, #0] 622 cmp r12, #2 623 blt .Lmemmove_bsrcul1 624 beq .Lmemmove_bsrcul2 625 cmp r2, #0x0c 626 blt .Lmemmove_bsrcul3loop4 627 sub r2, r2, #0x0c 628 stmdb sp!, {r4, r5, lr} 629 630.Lmemmove_bsrcul3loop16: 631 mov lr, r3, lsl #8 632 ldmdb r1!, {r3-r5, r12} 633 orr lr, lr, r12, lsr #24 634 mov r12, r12, lsl #8 635 orr r12, r12, r5, lsr #24 636 mov r5, r5, lsl #8 637 orr r5, r5, r4, lsr #24 638 mov r4, r4, lsl #8 639 orr r4, r4, r3, lsr #24 640 stmdb r0!, {r4, r5, r12, lr} 641 subs r2, r2, #0x10 642 bge .Lmemmove_bsrcul3loop16 643 ldmia sp!, {r4, r5, lr} 644 adds r2, r2, #0x0c 645 blt .Lmemmove_bsrcul3l4 646 647.Lmemmove_bsrcul3loop4: 648 mov r12, r3, lsl #8 649 ldr r3, [r1, #-4]! 650 orr r12, r12, r3, lsr #24 651 str r12, [r0, #-4]! 652 subs r2, r2, #4 653 bge .Lmemmove_bsrcul3loop4 654 655.Lmemmove_bsrcul3l4: 656 add r1, r1, #3 657 b .Lmemmove_bl4 658 659.Lmemmove_bsrcul2: 660 cmp r2, #0x0c 661 blt .Lmemmove_bsrcul2loop4 662 sub r2, r2, #0x0c 663 stmdb sp!, {r4, r5, lr} 664 665.Lmemmove_bsrcul2loop16: 666 mov lr, r3, lsl #16 667 ldmdb r1!, {r3-r5, r12} 668 orr lr, lr, r12, lsr #16 669 mov r12, r12, lsl #16 670 orr r12, r12, r5, lsr #16 671 mov r5, r5, lsl #16 672 orr r5, r5, r4, lsr #16 673 mov r4, r4, lsl #16 674 orr r4, r4, r3, lsr #16 675 stmdb r0!, {r4, r5, r12, lr} 676 subs r2, r2, #0x10 677 bge .Lmemmove_bsrcul2loop16 678 ldmia sp!, {r4, r5, lr} 679 adds r2, r2, #0x0c 680 blt .Lmemmove_bsrcul2l4 681 682.Lmemmove_bsrcul2loop4: 683 mov r12, r3, lsl #16 684 ldr r3, [r1, #-4]! 685 orr r12, r12, r3, lsr #16 686 str r12, [r0, #-4]! 687 subs r2, r2, #4 688 bge .Lmemmove_bsrcul2loop4 689 690.Lmemmove_bsrcul2l4: 691 add r1, r1, #2 692 b .Lmemmove_bl4 693 694.Lmemmove_bsrcul1: 695 cmp r2, #0x0c 696 blt .Lmemmove_bsrcul1loop4 697 sub r2, r2, #0x0c 698 stmdb sp!, {r4, r5, lr} 699 700.Lmemmove_bsrcul1loop32: 701 mov lr, r3, lsl #24 702 ldmdb r1!, {r3-r5, r12} 703 orr lr, lr, r12, lsr #8 704 mov r12, r12, lsl #24 705 orr r12, r12, r5, lsr #8 706 mov r5, r5, lsl #24 707 orr r5, r5, r4, lsr #8 708 mov r4, r4, lsl #24 709 orr r4, r4, r3, lsr #8 710 stmdb r0!, {r4, r5, r12, lr} 711 subs r2, r2, #0x10 712 bge .Lmemmove_bsrcul1loop32 713 ldmia sp!, {r4, r5, lr} 714 adds r2, r2, #0x0c 715 blt .Lmemmove_bsrcul1l4 716 717.Lmemmove_bsrcul1loop4: 718 mov r12, r3, lsl #24 719 ldr r3, [r1, #-4]! 720 orr r12, r12, r3, lsr #8 721 str r12, [r0, #-4]! 722 subs r2, r2, #4 723 bge .Lmemmove_bsrcul1loop4 724 725.Lmemmove_bsrcul1l4: 726 add r1, r1, #1 727 b .Lmemmove_bl4 728END(memmove) 729 730/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 731ENTRY(memcpy) 732 pld [r1] 733 cmp r2, #0x0c 734 ble .Lmemcpy_short /* <= 12 bytes */ 735#ifdef FLASHADDR 736#if FLASHADDR > PHYSADDR 737 ldr r3, =FLASHADDR 738 cmp r3, pc 739 bls .Lnormal 740#else 741 ldr r3, =FLASHADDR 742 cmp r3, pc 743 bhi .Lnormal 744#endif 745#endif 746 mov r3, r0 /* We must not clobber r0 */ 747 748 /* Word-align the destination buffer */ 749 ands ip, r3, #0x03 /* Already word aligned? */ 750 beq .Lmemcpy_wordaligned /* Yup */ 751 cmp ip, #0x02 752 ldrb ip, [r1], #0x01 753 sub r2, r2, #0x01 754 strb ip, [r3], #0x01 755 ldrble ip, [r1], #0x01 756 suble r2, r2, #0x01 757 strble ip, [r3], #0x01 758 ldrblt ip, [r1], #0x01 759 sublt r2, r2, #0x01 760 strblt ip, [r3], #0x01 761 762 /* Destination buffer is now word aligned */ 763.Lmemcpy_wordaligned: 764 ands ip, r1, #0x03 /* Is src also word-aligned? */ 765 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 766 767 /* Quad-align the destination buffer */ 768 tst r3, #0x07 /* Already quad aligned? */ 769 ldrne ip, [r1], #0x04 770 stmfd sp!, {r4-r9} /* Free up some registers */ 771 subne r2, r2, #0x04 772 strne ip, [r3], #0x04 773 774 /* Destination buffer quad aligned, source is at least word aligned */ 775 subs r2, r2, #0x80 776 blt .Lmemcpy_w_lessthan128 777 778 /* Copy 128 bytes at a time */ 779.Lmemcpy_w_loop128: 780 ldr r4, [r1], #0x04 /* LD:00-03 */ 781 ldr r5, [r1], #0x04 /* LD:04-07 */ 782 pld [r1, #0x18] /* Prefetch 0x20 */ 783 ldr r6, [r1], #0x04 /* LD:08-0b */ 784 ldr r7, [r1], #0x04 /* LD:0c-0f */ 785 ldr r8, [r1], #0x04 /* LD:10-13 */ 786 ldr r9, [r1], #0x04 /* LD:14-17 */ 787 strd r4, [r3], #0x08 /* ST:00-07 */ 788 ldr r4, [r1], #0x04 /* LD:18-1b */ 789 ldr r5, [r1], #0x04 /* LD:1c-1f */ 790 strd r6, [r3], #0x08 /* ST:08-0f */ 791 ldr r6, [r1], #0x04 /* LD:20-23 */ 792 ldr r7, [r1], #0x04 /* LD:24-27 */ 793 pld [r1, #0x18] /* Prefetch 0x40 */ 794 strd r8, [r3], #0x08 /* ST:10-17 */ 795 ldr r8, [r1], #0x04 /* LD:28-2b */ 796 ldr r9, [r1], #0x04 /* LD:2c-2f */ 797 strd r4, [r3], #0x08 /* ST:18-1f */ 798 ldr r4, [r1], #0x04 /* LD:30-33 */ 799 ldr r5, [r1], #0x04 /* LD:34-37 */ 800 strd r6, [r3], #0x08 /* ST:20-27 */ 801 ldr r6, [r1], #0x04 /* LD:38-3b */ 802 ldr r7, [r1], #0x04 /* LD:3c-3f */ 803 strd r8, [r3], #0x08 /* ST:28-2f */ 804 ldr r8, [r1], #0x04 /* LD:40-43 */ 805 ldr r9, [r1], #0x04 /* LD:44-47 */ 806 pld [r1, #0x18] /* Prefetch 0x60 */ 807 strd r4, [r3], #0x08 /* ST:30-37 */ 808 ldr r4, [r1], #0x04 /* LD:48-4b */ 809 ldr r5, [r1], #0x04 /* LD:4c-4f */ 810 strd r6, [r3], #0x08 /* ST:38-3f */ 811 ldr r6, [r1], #0x04 /* LD:50-53 */ 812 ldr r7, [r1], #0x04 /* LD:54-57 */ 813 strd r8, [r3], #0x08 /* ST:40-47 */ 814 ldr r8, [r1], #0x04 /* LD:58-5b */ 815 ldr r9, [r1], #0x04 /* LD:5c-5f */ 816 strd r4, [r3], #0x08 /* ST:48-4f */ 817 ldr r4, [r1], #0x04 /* LD:60-63 */ 818 ldr r5, [r1], #0x04 /* LD:64-67 */ 819 pld [r1, #0x18] /* Prefetch 0x80 */ 820 strd r6, [r3], #0x08 /* ST:50-57 */ 821 ldr r6, [r1], #0x04 /* LD:68-6b */ 822 ldr r7, [r1], #0x04 /* LD:6c-6f */ 823 strd r8, [r3], #0x08 /* ST:58-5f */ 824 ldr r8, [r1], #0x04 /* LD:70-73 */ 825 ldr r9, [r1], #0x04 /* LD:74-77 */ 826 strd r4, [r3], #0x08 /* ST:60-67 */ 827 ldr r4, [r1], #0x04 /* LD:78-7b */ 828 ldr r5, [r1], #0x04 /* LD:7c-7f */ 829 strd r6, [r3], #0x08 /* ST:68-6f */ 830 strd r8, [r3], #0x08 /* ST:70-77 */ 831 subs r2, r2, #0x80 832 strd r4, [r3], #0x08 /* ST:78-7f */ 833 bge .Lmemcpy_w_loop128 834 835.Lmemcpy_w_lessthan128: 836 adds r2, r2, #0x80 /* Adjust for extra sub */ 837 ldmfdeq sp!, {r4-r9} 838 RETeq /* Return now if done */ 839 subs r2, r2, #0x20 840 blt .Lmemcpy_w_lessthan32 841 842 /* Copy 32 bytes at a time */ 843.Lmemcpy_w_loop32: 844 ldr r4, [r1], #0x04 845 ldr r5, [r1], #0x04 846 pld [r1, #0x18] 847 ldr r6, [r1], #0x04 848 ldr r7, [r1], #0x04 849 ldr r8, [r1], #0x04 850 ldr r9, [r1], #0x04 851 strd r4, [r3], #0x08 852 ldr r4, [r1], #0x04 853 ldr r5, [r1], #0x04 854 strd r6, [r3], #0x08 855 strd r8, [r3], #0x08 856 subs r2, r2, #0x20 857 strd r4, [r3], #0x08 858 bge .Lmemcpy_w_loop32 859 860.Lmemcpy_w_lessthan32: 861 adds r2, r2, #0x20 /* Adjust for extra sub */ 862 ldmfdeq sp!, {r4-r9} 863 RETeq /* Return now if done */ 864 865 and r4, r2, #0x18 866 rsbs r4, r4, #0x18 867 addne pc, pc, r4, lsl #1 868 nop 869 870 /* At least 24 bytes remaining */ 871 ldr r4, [r1], #0x04 872 ldr r5, [r1], #0x04 873 sub r2, r2, #0x08 874 strd r4, [r3], #0x08 875 876 /* At least 16 bytes remaining */ 877 ldr r4, [r1], #0x04 878 ldr r5, [r1], #0x04 879 sub r2, r2, #0x08 880 strd r4, [r3], #0x08 881 882 /* At least 8 bytes remaining */ 883 ldr r4, [r1], #0x04 884 ldr r5, [r1], #0x04 885 subs r2, r2, #0x08 886 strd r4, [r3], #0x08 887 888 /* Less than 8 bytes remaining */ 889 ldmfd sp!, {r4-r9} 890 RETeq /* Return now if done */ 891 subs r2, r2, #0x04 892 ldrge ip, [r1], #0x04 893 strge ip, [r3], #0x04 894 RETeq /* Return now if done */ 895 addlt r2, r2, #0x04 896 ldrb ip, [r1], #0x01 897 cmp r2, #0x02 898 ldrbge r2, [r1], #0x01 899 strb ip, [r3], #0x01 900 ldrbgt ip, [r1] 901 strbge r2, [r3], #0x01 902 strbgt ip, [r3] 903 RET 904/* Place a literal pool here for the above ldr instructions to use */ 905.ltorg 906 907 908/* 909 * At this point, it has not been possible to word align both buffers. 910 * The destination buffer is word aligned, but the source buffer is not. 911 */ 912.Lmemcpy_bad_align: 913 stmfd sp!, {r4-r7} 914 bic r1, r1, #0x03 915 cmp ip, #2 916 ldr ip, [r1], #0x04 917 bgt .Lmemcpy_bad3 918 beq .Lmemcpy_bad2 919 b .Lmemcpy_bad1 920 921.Lmemcpy_bad1_loop16: 922 mov r4, ip, lsr #8 923 ldr r5, [r1], #0x04 924 pld [r1, #0x018] 925 ldr r6, [r1], #0x04 926 ldr r7, [r1], #0x04 927 ldr ip, [r1], #0x04 928 orr r4, r4, r5, lsl #24 929 mov r5, r5, lsr #8 930 orr r5, r5, r6, lsl #24 931 mov r6, r6, lsr #8 932 orr r6, r6, r7, lsl #24 933 mov r7, r7, lsr #8 934 orr r7, r7, ip, lsl #24 935 str r4, [r3], #0x04 936 str r5, [r3], #0x04 937 str r6, [r3], #0x04 938 str r7, [r3], #0x04 939.Lmemcpy_bad1: 940 subs r2, r2, #0x10 941 bge .Lmemcpy_bad1_loop16 942 943 adds r2, r2, #0x10 944 ldmfdeq sp!, {r4-r7} 945 RETeq /* Return now if done */ 946 subs r2, r2, #0x04 947 sublt r1, r1, #0x03 948 blt .Lmemcpy_bad_done 949 950.Lmemcpy_bad1_loop4: 951 mov r4, ip, lsr #8 952 ldr ip, [r1], #0x04 953 subs r2, r2, #0x04 954 orr r4, r4, ip, lsl #24 955 str r4, [r3], #0x04 956 bge .Lmemcpy_bad1_loop4 957 sub r1, r1, #0x03 958 b .Lmemcpy_bad_done 959 960.Lmemcpy_bad2_loop16: 961 mov r4, ip, lsr #16 962 ldr r5, [r1], #0x04 963 pld [r1, #0x018] 964 ldr r6, [r1], #0x04 965 ldr r7, [r1], #0x04 966 ldr ip, [r1], #0x04 967 orr r4, r4, r5, lsl #16 968 mov r5, r5, lsr #16 969 orr r5, r5, r6, lsl #16 970 mov r6, r6, lsr #16 971 orr r6, r6, r7, lsl #16 972 mov r7, r7, lsr #16 973 orr r7, r7, ip, lsl #16 974 str r4, [r3], #0x04 975 str r5, [r3], #0x04 976 str r6, [r3], #0x04 977 str r7, [r3], #0x04 978.Lmemcpy_bad2: 979 subs r2, r2, #0x10 980 bge .Lmemcpy_bad2_loop16 981 982 adds r2, r2, #0x10 983 ldmfdeq sp!, {r4-r7} 984 RETeq /* Return now if done */ 985 subs r2, r2, #0x04 986 sublt r1, r1, #0x02 987 blt .Lmemcpy_bad_done 988 989.Lmemcpy_bad2_loop4: 990 mov r4, ip, lsr #16 991 ldr ip, [r1], #0x04 992 subs r2, r2, #0x04 993 orr r4, r4, ip, lsl #16 994 str r4, [r3], #0x04 995 bge .Lmemcpy_bad2_loop4 996 sub r1, r1, #0x02 997 b .Lmemcpy_bad_done 998 999.Lmemcpy_bad3_loop16: 1000 mov r4, ip, lsr #24 1001 ldr r5, [r1], #0x04 1002 pld [r1, #0x018] 1003 ldr r6, [r1], #0x04 1004 ldr r7, [r1], #0x04 1005 ldr ip, [r1], #0x04 1006 orr r4, r4, r5, lsl #8 1007 mov r5, r5, lsr #24 1008 orr r5, r5, r6, lsl #8 1009 mov r6, r6, lsr #24 1010 orr r6, r6, r7, lsl #8 1011 mov r7, r7, lsr #24 1012 orr r7, r7, ip, lsl #8 1013 str r4, [r3], #0x04 1014 str r5, [r3], #0x04 1015 str r6, [r3], #0x04 1016 str r7, [r3], #0x04 1017.Lmemcpy_bad3: 1018 subs r2, r2, #0x10 1019 bge .Lmemcpy_bad3_loop16 1020 1021 adds r2, r2, #0x10 1022 ldmfdeq sp!, {r4-r7} 1023 RETeq /* Return now if done */ 1024 subs r2, r2, #0x04 1025 sublt r1, r1, #0x01 1026 blt .Lmemcpy_bad_done 1027 1028.Lmemcpy_bad3_loop4: 1029 mov r4, ip, lsr #24 1030 ldr ip, [r1], #0x04 1031 subs r2, r2, #0x04 1032 orr r4, r4, ip, lsl #8 1033 str r4, [r3], #0x04 1034 bge .Lmemcpy_bad3_loop4 1035 sub r1, r1, #0x01 1036 1037.Lmemcpy_bad_done: 1038 ldmfd sp!, {r4-r7} 1039 adds r2, r2, #0x04 1040 RETeq 1041 ldrb ip, [r1], #0x01 1042 cmp r2, #0x02 1043 ldrbge r2, [r1], #0x01 1044 strb ip, [r3], #0x01 1045 ldrbgt ip, [r1] 1046 strbge r2, [r3], #0x01 1047 strbgt ip, [r3] 1048 RET 1049 1050 1051/* 1052 * Handle short copies (less than 16 bytes), possibly misaligned. 1053 * Some of these are *very* common, thanks to the network stack, 1054 * and so are handled specially. 1055 */ 1056.Lmemcpy_short: 1057 add pc, pc, r2, lsl #2 1058 nop 1059 RET /* 0x00 */ 1060 b .Lmemcpy_bytewise /* 0x01 */ 1061 b .Lmemcpy_bytewise /* 0x02 */ 1062 b .Lmemcpy_bytewise /* 0x03 */ 1063 b .Lmemcpy_4 /* 0x04 */ 1064 b .Lmemcpy_bytewise /* 0x05 */ 1065 b .Lmemcpy_6 /* 0x06 */ 1066 b .Lmemcpy_bytewise /* 0x07 */ 1067 b .Lmemcpy_8 /* 0x08 */ 1068 b .Lmemcpy_bytewise /* 0x09 */ 1069 b .Lmemcpy_bytewise /* 0x0a */ 1070 b .Lmemcpy_bytewise /* 0x0b */ 1071 b .Lmemcpy_c /* 0x0c */ 1072.Lmemcpy_bytewise: 1073 mov r3, r0 /* We must not clobber r0 */ 1074 ldrb ip, [r1], #0x01 10751: subs r2, r2, #0x01 1076 strb ip, [r3], #0x01 1077 ldrbne ip, [r1], #0x01 1078 bne 1b 1079 RET 1080 1081/****************************************************************************** 1082 * Special case for 4 byte copies 1083 */ 1084#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1085#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1086 LMEMCPY_4_PAD 1087.Lmemcpy_4: 1088 and r2, r1, #0x03 1089 orr r2, r2, r0, lsl #2 1090 ands r2, r2, #0x0f 1091 sub r3, pc, #0x14 1092 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1093 1094/* 1095 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1096 */ 1097 ldr r2, [r1] 1098 str r2, [r0] 1099 RET 1100 LMEMCPY_4_PAD 1101 1102/* 1103 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1104 */ 1105 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1106 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1107 mov r3, r3, lsr #8 /* r3 = .210 */ 1108 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1109 str r3, [r0] 1110 RET 1111 LMEMCPY_4_PAD 1112 1113/* 1114 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1115 */ 1116 ldrh r3, [r1, #0x02] 1117 ldrh r2, [r1] 1118 orr r3, r2, r3, lsl #16 1119 str r3, [r0] 1120 RET 1121 LMEMCPY_4_PAD 1122 1123/* 1124 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1125 */ 1126 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1127 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1128 mov r3, r3, lsr #24 /* r3 = ...0 */ 1129 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1130 str r3, [r0] 1131 RET 1132 LMEMCPY_4_PAD 1133 1134/* 1135 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1136 */ 1137 ldr r2, [r1] 1138 strb r2, [r0] 1139 mov r3, r2, lsr #8 1140 mov r1, r2, lsr #24 1141 strb r1, [r0, #0x03] 1142 strh r3, [r0, #0x01] 1143 RET 1144 LMEMCPY_4_PAD 1145 1146/* 1147 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1148 */ 1149 ldrb r2, [r1] 1150 ldrh r3, [r1, #0x01] 1151 ldrb r1, [r1, #0x03] 1152 strb r2, [r0] 1153 strh r3, [r0, #0x01] 1154 strb r1, [r0, #0x03] 1155 RET 1156 LMEMCPY_4_PAD 1157 1158/* 1159 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1160 */ 1161 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1162 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1163 strb r2, [r0] 1164 mov r2, r2, lsr #8 /* r2 = ...1 */ 1165 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1166 mov r3, r3, lsr #8 /* r3 = ...3 */ 1167 strh r2, [r0, #0x01] 1168 strb r3, [r0, #0x03] 1169 RET 1170 LMEMCPY_4_PAD 1171 1172/* 1173 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1174 */ 1175 ldrb r2, [r1] 1176 ldrh r3, [r1, #0x01] 1177 ldrb r1, [r1, #0x03] 1178 strb r2, [r0] 1179 strh r3, [r0, #0x01] 1180 strb r1, [r0, #0x03] 1181 RET 1182 LMEMCPY_4_PAD 1183 1184/* 1185 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1186 */ 1187 ldr r2, [r1] 1188 strh r2, [r0] 1189 mov r3, r2, lsr #16 1190 strh r3, [r0, #0x02] 1191 RET 1192 LMEMCPY_4_PAD 1193 1194/* 1195 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1196 */ 1197 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1198 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1199 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1200 strh r1, [r0] 1201 mov r2, r2, lsr #24 /* r2 = ...2 */ 1202 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1203 strh r2, [r0, #0x02] 1204 RET 1205 LMEMCPY_4_PAD 1206 1207/* 1208 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1209 */ 1210 ldrh r2, [r1] 1211 ldrh r3, [r1, #0x02] 1212 strh r2, [r0] 1213 strh r3, [r0, #0x02] 1214 RET 1215 LMEMCPY_4_PAD 1216 1217/* 1218 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1219 */ 1220 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1221 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1222 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1223 strh r1, [r0, #0x02] 1224 mov r3, r3, lsl #8 /* r3 = 321. */ 1225 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1226 strh r3, [r0] 1227 RET 1228 LMEMCPY_4_PAD 1229 1230/* 1231 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1232 */ 1233 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1234 strb r2, [r0] 1235 mov r3, r2, lsr #8 1236 mov r1, r2, lsr #24 1237 strh r3, [r0, #0x01] 1238 strb r1, [r0, #0x03] 1239 RET 1240 LMEMCPY_4_PAD 1241 1242/* 1243 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1244 */ 1245 ldrb r2, [r1] 1246 ldrh r3, [r1, #0x01] 1247 ldrb r1, [r1, #0x03] 1248 strb r2, [r0] 1249 strh r3, [r0, #0x01] 1250 strb r1, [r0, #0x03] 1251 RET 1252 LMEMCPY_4_PAD 1253 1254/* 1255 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1256 */ 1257 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1258 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1259 strb r2, [r0] 1260 mov r2, r2, lsr #8 /* r2 = ...1 */ 1261 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1262 strh r2, [r0, #0x01] 1263 mov r3, r3, lsr #8 /* r3 = ...3 */ 1264 strb r3, [r0, #0x03] 1265 RET 1266 LMEMCPY_4_PAD 1267 1268/* 1269 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1270 */ 1271 ldrb r2, [r1] 1272 ldrh r3, [r1, #0x01] 1273 ldrb r1, [r1, #0x03] 1274 strb r2, [r0] 1275 strh r3, [r0, #0x01] 1276 strb r1, [r0, #0x03] 1277 RET 1278 LMEMCPY_4_PAD 1279 1280 1281/****************************************************************************** 1282 * Special case for 6 byte copies 1283 */ 1284#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1285#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1286 LMEMCPY_6_PAD 1287.Lmemcpy_6: 1288 and r2, r1, #0x03 1289 orr r2, r2, r0, lsl #2 1290 ands r2, r2, #0x0f 1291 sub r3, pc, #0x14 1292 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1293 1294/* 1295 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1296 */ 1297 ldr r2, [r1] 1298 ldrh r3, [r1, #0x04] 1299 str r2, [r0] 1300 strh r3, [r0, #0x04] 1301 RET 1302 LMEMCPY_6_PAD 1303 1304/* 1305 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1306 */ 1307 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1308 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1309 mov r2, r2, lsr #8 /* r2 = .210 */ 1310 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1311 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1312 str r2, [r0] 1313 strh r3, [r0, #0x04] 1314 RET 1315 LMEMCPY_6_PAD 1316 1317/* 1318 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1319 */ 1320 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1321 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1322 mov r1, r3, lsr #16 /* r1 = ..54 */ 1323 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1324 str r2, [r0] 1325 strh r1, [r0, #0x04] 1326 RET 1327 LMEMCPY_6_PAD 1328 1329/* 1330 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1331 */ 1332 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1333 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1334 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1335 mov r2, r2, lsr #24 /* r2 = ...0 */ 1336 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1337 mov r1, r1, lsl #8 /* r1 = xx5. */ 1338 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1339 str r2, [r0] 1340 strh r1, [r0, #0x04] 1341 RET 1342 LMEMCPY_6_PAD 1343 1344/* 1345 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1346 */ 1347 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1348 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1349 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1350 strh r1, [r0, #0x01] 1351 strb r3, [r0] 1352 mov r3, r3, lsr #24 /* r3 = ...3 */ 1353 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1354 mov r2, r2, lsr #8 /* r2 = ...5 */ 1355 strh r3, [r0, #0x03] 1356 strb r2, [r0, #0x05] 1357 RET 1358 LMEMCPY_6_PAD 1359 1360/* 1361 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1362 */ 1363 ldrb r2, [r1] 1364 ldrh r3, [r1, #0x01] 1365 ldrh ip, [r1, #0x03] 1366 ldrb r1, [r1, #0x05] 1367 strb r2, [r0] 1368 strh r3, [r0, #0x01] 1369 strh ip, [r0, #0x03] 1370 strb r1, [r0, #0x05] 1371 RET 1372 LMEMCPY_6_PAD 1373 1374/* 1375 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1376 */ 1377 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1378 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1379 strb r2, [r0] 1380 mov r3, r1, lsr #24 1381 strb r3, [r0, #0x05] 1382 mov r3, r1, lsr #8 /* r3 = .543 */ 1383 strh r3, [r0, #0x03] 1384 mov r3, r2, lsr #8 /* r3 = ...1 */ 1385 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 1386 strh r3, [r0, #0x01] 1387 RET 1388 LMEMCPY_6_PAD 1389 1390/* 1391 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1392 */ 1393 ldrb r2, [r1] 1394 ldrh r3, [r1, #0x01] 1395 ldrh ip, [r1, #0x03] 1396 ldrb r1, [r1, #0x05] 1397 strb r2, [r0] 1398 strh r3, [r0, #0x01] 1399 strh ip, [r0, #0x03] 1400 strb r1, [r0, #0x05] 1401 RET 1402 LMEMCPY_6_PAD 1403 1404/* 1405 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1406 */ 1407 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 1408 ldr r3, [r1] /* r3 = 3210 */ 1409 mov r2, r2, lsl #16 /* r2 = 54.. */ 1410 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 1411 strh r3, [r0] 1412 str r2, [r0, #0x02] 1413 RET 1414 LMEMCPY_6_PAD 1415 1416/* 1417 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1418 */ 1419 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1420 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1421 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1422 mov r2, r2, lsl #8 /* r2 = 543. */ 1423 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1424 strh r1, [r0] 1425 str r2, [r0, #0x02] 1426 RET 1427 LMEMCPY_6_PAD 1428 1429/* 1430 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1431 */ 1432 ldrh r2, [r1] 1433 ldr r3, [r1, #0x02] 1434 strh r2, [r0] 1435 str r3, [r0, #0x02] 1436 RET 1437 LMEMCPY_6_PAD 1438 1439/* 1440 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1441 */ 1442 ldrb r3, [r1] /* r3 = ...0 */ 1443 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1444 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 1445 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1446 mov r1, r1, lsl #24 /* r1 = 5... */ 1447 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 1448 strh r3, [r0] 1449 str r1, [r0, #0x02] 1450 RET 1451 LMEMCPY_6_PAD 1452 1453/* 1454 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1455 */ 1456 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1457 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 1458 strb r2, [r0] 1459 mov r2, r2, lsr #8 /* r2 = .321 */ 1460 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 1461 mov r1, r1, lsr #8 /* r1 = ...5 */ 1462 str r2, [r0, #0x01] 1463 strb r1, [r0, #0x05] 1464 RET 1465 LMEMCPY_6_PAD 1466 1467/* 1468 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1469 */ 1470 ldrb r2, [r1] 1471 ldrh r3, [r1, #0x01] 1472 ldrh ip, [r1, #0x03] 1473 ldrb r1, [r1, #0x05] 1474 strb r2, [r0] 1475 strh r3, [r0, #0x01] 1476 strh ip, [r0, #0x03] 1477 strb r1, [r0, #0x05] 1478 RET 1479 LMEMCPY_6_PAD 1480 1481/* 1482 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1483 */ 1484 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1485 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1486 strb r2, [r0] 1487 mov r2, r2, lsr #8 /* r2 = ...1 */ 1488 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 1489 mov r1, r1, lsr #24 /* r1 = ...5 */ 1490 str r2, [r0, #0x01] 1491 strb r1, [r0, #0x05] 1492 RET 1493 LMEMCPY_6_PAD 1494 1495/* 1496 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1497 */ 1498 ldrb r2, [r1] 1499 ldr r3, [r1, #0x01] 1500 ldrb r1, [r1, #0x05] 1501 strb r2, [r0] 1502 str r3, [r0, #0x01] 1503 strb r1, [r0, #0x05] 1504 RET 1505 LMEMCPY_6_PAD 1506 1507 1508/****************************************************************************** 1509 * Special case for 8 byte copies 1510 */ 1511#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1512#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1513 LMEMCPY_8_PAD 1514.Lmemcpy_8: 1515 and r2, r1, #0x03 1516 orr r2, r2, r0, lsl #2 1517 ands r2, r2, #0x0f 1518 sub r3, pc, #0x14 1519 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1520 1521/* 1522 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1523 */ 1524 ldr r2, [r1] 1525 ldr r3, [r1, #0x04] 1526 str r2, [r0] 1527 str r3, [r0, #0x04] 1528 RET 1529 LMEMCPY_8_PAD 1530 1531/* 1532 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1533 */ 1534 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1535 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1536 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1537 mov r3, r3, lsr #8 /* r3 = .210 */ 1538 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1539 mov r1, r1, lsl #24 /* r1 = 7... */ 1540 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1541 str r3, [r0] 1542 str r2, [r0, #0x04] 1543 RET 1544 LMEMCPY_8_PAD 1545 1546/* 1547 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1548 */ 1549 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1550 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1551 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1552 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1553 mov r3, r3, lsr #16 /* r3 = ..54 */ 1554 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1555 str r2, [r0] 1556 str r3, [r0, #0x04] 1557 RET 1558 LMEMCPY_8_PAD 1559 1560/* 1561 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1562 */ 1563 ldrb r3, [r1] /* r3 = ...0 */ 1564 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1565 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1566 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1567 mov r2, r2, lsr #24 /* r2 = ...4 */ 1568 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1569 str r3, [r0] 1570 str r2, [r0, #0x04] 1571 RET 1572 LMEMCPY_8_PAD 1573 1574/* 1575 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1576 */ 1577 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1578 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1579 strb r3, [r0] 1580 mov r1, r2, lsr #24 /* r1 = ...7 */ 1581 strb r1, [r0, #0x07] 1582 mov r1, r3, lsr #8 /* r1 = .321 */ 1583 mov r3, r3, lsr #24 /* r3 = ...3 */ 1584 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1585 strh r1, [r0, #0x01] 1586 str r3, [r0, #0x03] 1587 RET 1588 LMEMCPY_8_PAD 1589 1590/* 1591 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1592 */ 1593 ldrb r2, [r1] 1594 ldrh r3, [r1, #0x01] 1595 ldr ip, [r1, #0x03] 1596 ldrb r1, [r1, #0x07] 1597 strb r2, [r0] 1598 strh r3, [r0, #0x01] 1599 str ip, [r0, #0x03] 1600 strb r1, [r0, #0x07] 1601 RET 1602 LMEMCPY_8_PAD 1603 1604/* 1605 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1606 */ 1607 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1608 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1609 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1610 strb r2, [r0] /* 0 */ 1611 mov ip, r1, lsr #8 /* ip = ...7 */ 1612 strb ip, [r0, #0x07] /* 7 */ 1613 mov ip, r2, lsr #8 /* ip = ...1 */ 1614 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1615 mov r3, r3, lsr #8 /* r3 = .543 */ 1616 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1617 strh ip, [r0, #0x01] 1618 str r3, [r0, #0x03] 1619 RET 1620 LMEMCPY_8_PAD 1621 1622/* 1623 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1624 */ 1625 ldrb r3, [r1] /* r3 = ...0 */ 1626 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1627 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1628 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1629 strb r3, [r0] 1630 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1631 strh ip, [r0, #0x01] 1632 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1633 str r2, [r0, #0x03] 1634 strb r1, [r0, #0x07] 1635 RET 1636 LMEMCPY_8_PAD 1637 1638/* 1639 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1640 */ 1641 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1642 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1643 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1644 strh r2, [r0] 1645 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1646 mov r3, r3, lsr #16 /* r3 = ..76 */ 1647 str r2, [r0, #0x02] 1648 strh r3, [r0, #0x06] 1649 RET 1650 LMEMCPY_8_PAD 1651 1652/* 1653 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1654 */ 1655 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1656 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1657 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1658 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1659 strh r1, [r0] 1660 mov r1, r2, lsr #24 /* r1 = ...2 */ 1661 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1662 mov r3, r3, lsr #24 /* r3 = ...6 */ 1663 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1664 str r1, [r0, #0x02] 1665 strh r3, [r0, #0x06] 1666 RET 1667 LMEMCPY_8_PAD 1668 1669/* 1670 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1671 */ 1672 ldrh r2, [r1] 1673 ldr ip, [r1, #0x02] 1674 ldrh r3, [r1, #0x06] 1675 strh r2, [r0] 1676 str ip, [r0, #0x02] 1677 strh r3, [r0, #0x06] 1678 RET 1679 LMEMCPY_8_PAD 1680 1681/* 1682 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1683 */ 1684 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1685 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1686 ldrb ip, [r1] /* ip = ...0 */ 1687 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1688 strh r1, [r0, #0x06] 1689 mov r3, r3, lsl #24 /* r3 = 5... */ 1690 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1691 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1692 str r3, [r0, #0x02] 1693 strh r2, [r0] 1694 RET 1695 LMEMCPY_8_PAD 1696 1697/* 1698 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1699 */ 1700 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1701 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1702 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1703 strh r1, [r0, #0x05] 1704 strb r2, [r0] 1705 mov r1, r3, lsr #24 /* r1 = ...7 */ 1706 strb r1, [r0, #0x07] 1707 mov r2, r2, lsr #8 /* r2 = .321 */ 1708 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1709 str r2, [r0, #0x01] 1710 RET 1711 LMEMCPY_8_PAD 1712 1713/* 1714 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1715 */ 1716 ldrb r3, [r1] /* r3 = ...0 */ 1717 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1718 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1719 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1720 strb r3, [r0] 1721 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1722 strh r3, [r0, #0x05] 1723 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1724 str r2, [r0, #0x01] 1725 strb r1, [r0, #0x07] 1726 RET 1727 LMEMCPY_8_PAD 1728 1729/* 1730 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1731 */ 1732 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1733 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1734 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1735 strb r2, [r0] 1736 mov ip, r2, lsr #8 /* ip = ...1 */ 1737 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1738 mov r2, r1, lsr #8 /* r2 = ...7 */ 1739 strb r2, [r0, #0x07] 1740 mov r1, r1, lsl #8 /* r1 = .76. */ 1741 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1742 str ip, [r0, #0x01] 1743 strh r1, [r0, #0x05] 1744 RET 1745 LMEMCPY_8_PAD 1746 1747/* 1748 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1749 */ 1750 ldrb r2, [r1] 1751 ldr ip, [r1, #0x01] 1752 ldrh r3, [r1, #0x05] 1753 ldrb r1, [r1, #0x07] 1754 strb r2, [r0] 1755 str ip, [r0, #0x01] 1756 strh r3, [r0, #0x05] 1757 strb r1, [r0, #0x07] 1758 RET 1759 LMEMCPY_8_PAD 1760 1761/****************************************************************************** 1762 * Special case for 12 byte copies 1763 */ 1764#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1765#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1766 LMEMCPY_C_PAD 1767.Lmemcpy_c: 1768 and r2, r1, #0x03 1769 orr r2, r2, r0, lsl #2 1770 ands r2, r2, #0x0f 1771 sub r3, pc, #0x14 1772 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1773 1774/* 1775 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1776 */ 1777 ldr r2, [r1] 1778 ldr r3, [r1, #0x04] 1779 ldr r1, [r1, #0x08] 1780 str r2, [r0] 1781 str r3, [r0, #0x04] 1782 str r1, [r0, #0x08] 1783 RET 1784 LMEMCPY_C_PAD 1785 1786/* 1787 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1788 */ 1789 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1790 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1791 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1792 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1793 mov r2, r2, lsl #24 /* r2 = B... */ 1794 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1795 str r2, [r0, #0x08] 1796 mov r2, ip, lsl #24 /* r2 = 7... */ 1797 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1798 mov r1, r1, lsr #8 /* r1 = .210 */ 1799 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1800 str r2, [r0, #0x04] 1801 str r1, [r0] 1802 RET 1803 LMEMCPY_C_PAD 1804 1805/* 1806 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1807 */ 1808 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1809 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1810 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1811 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1812 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1813 str r2, [r0] 1814 mov r3, r3, lsr #16 /* r3 = ..54 */ 1815 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1816 mov r1, r1, lsl #16 /* r1 = BA.. */ 1817 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1818 str r3, [r0, #0x04] 1819 str r1, [r0, #0x08] 1820 RET 1821 LMEMCPY_C_PAD 1822 1823/* 1824 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1825 */ 1826 ldrb r2, [r1] /* r2 = ...0 */ 1827 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1828 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1829 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1830 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1831 str r2, [r0] 1832 mov r3, r3, lsr #24 /* r3 = ...4 */ 1833 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1834 mov r1, r1, lsl #8 /* r1 = BA9. */ 1835 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1836 str r3, [r0, #0x04] 1837 str r1, [r0, #0x08] 1838 RET 1839 LMEMCPY_C_PAD 1840 1841/* 1842 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1843 */ 1844 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1845 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1846 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1847 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1848 strh r1, [r0, #0x01] 1849 strb r2, [r0] 1850 mov r1, r2, lsr #24 /* r1 = ...3 */ 1851 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1852 mov r1, r3, lsr #24 /* r1 = ...7 */ 1853 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1854 mov ip, ip, lsr #24 /* ip = ...B */ 1855 str r2, [r0, #0x03] 1856 str r1, [r0, #0x07] 1857 strb ip, [r0, #0x0b] 1858 RET 1859 LMEMCPY_C_PAD 1860 1861/* 1862 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1863 */ 1864 ldrb r2, [r1] 1865 ldrh r3, [r1, #0x01] 1866 ldr ip, [r1, #0x03] 1867 strb r2, [r0] 1868 ldr r2, [r1, #0x07] 1869 ldrb r1, [r1, #0x0b] 1870 strh r3, [r0, #0x01] 1871 str ip, [r0, #0x03] 1872 str r2, [r0, #0x07] 1873 strb r1, [r0, #0x0b] 1874 RET 1875 LMEMCPY_C_PAD 1876 1877/* 1878 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1879 */ 1880 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1881 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1882 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1883 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1884 strb r2, [r0] 1885 mov r2, r2, lsr #8 /* r2 = ...1 */ 1886 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1887 strh r2, [r0, #0x01] 1888 mov r2, r3, lsr #8 /* r2 = .543 */ 1889 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1890 mov r2, ip, lsr #8 /* r2 = .987 */ 1891 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1892 mov r1, r1, lsr #8 /* r1 = ...B */ 1893 str r3, [r0, #0x03] 1894 str r2, [r0, #0x07] 1895 strb r1, [r0, #0x0b] 1896 RET 1897 LMEMCPY_C_PAD 1898 1899/* 1900 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1901 */ 1902 ldrb r2, [r1] 1903 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1904 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1905 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1906 strb r2, [r0] 1907 strh r3, [r0, #0x01] 1908 mov r3, r3, lsr #16 /* r3 = ..43 */ 1909 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1910 mov ip, ip, lsr #16 /* ip = ..87 */ 1911 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1912 mov r1, r1, lsr #16 /* r1 = ..xB */ 1913 str r3, [r0, #0x03] 1914 str ip, [r0, #0x07] 1915 strb r1, [r0, #0x0b] 1916 RET 1917 LMEMCPY_C_PAD 1918 1919/* 1920 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1921 */ 1922 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1923 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1924 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1925 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1926 strh ip, [r0] 1927 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1928 mov r3, r3, lsr #16 /* r3 = ..76 */ 1929 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1930 mov r2, r2, lsr #16 /* r2 = ..BA */ 1931 str r1, [r0, #0x02] 1932 str r3, [r0, #0x06] 1933 strh r2, [r0, #0x0a] 1934 RET 1935 LMEMCPY_C_PAD 1936 1937/* 1938 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1939 */ 1940 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1941 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1942 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1943 strh ip, [r0] 1944 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1945 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1946 mov r2, r2, lsr #24 /* r2 = ...2 */ 1947 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1948 mov r3, r3, lsr #24 /* r3 = ...6 */ 1949 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1950 mov r1, r1, lsl #8 /* r1 = ..B. */ 1951 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1952 str r2, [r0, #0x02] 1953 str r3, [r0, #0x06] 1954 strh r1, [r0, #0x0a] 1955 RET 1956 LMEMCPY_C_PAD 1957 1958/* 1959 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1960 */ 1961 ldrh r2, [r1] 1962 ldr r3, [r1, #0x02] 1963 ldr ip, [r1, #0x06] 1964 ldrh r1, [r1, #0x0a] 1965 strh r2, [r0] 1966 str r3, [r0, #0x02] 1967 str ip, [r0, #0x06] 1968 strh r1, [r0, #0x0a] 1969 RET 1970 LMEMCPY_C_PAD 1971 1972/* 1973 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1974 */ 1975 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1976 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1977 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1978 strh ip, [r0, #0x0a] 1979 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1980 ldrb r1, [r1] /* r1 = ...0 */ 1981 mov r2, r2, lsl #24 /* r2 = 9... */ 1982 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1983 mov r3, r3, lsl #24 /* r3 = 5... */ 1984 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1985 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1986 str r2, [r0, #0x06] 1987 str r3, [r0, #0x02] 1988 strh r1, [r0] 1989 RET 1990 LMEMCPY_C_PAD 1991 1992/* 1993 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1994 */ 1995 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1996 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1997 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1998 strb r2, [r0] 1999 mov r3, r2, lsr #8 /* r3 = .321 */ 2000 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2001 str r3, [r0, #0x01] 2002 mov r3, ip, lsr #8 /* r3 = .765 */ 2003 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2004 str r3, [r0, #0x05] 2005 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2006 strh r1, [r0, #0x09] 2007 mov r1, r1, lsr #16 /* r1 = ...B */ 2008 strb r1, [r0, #0x0b] 2009 RET 2010 LMEMCPY_C_PAD 2011 2012/* 2013 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2014 */ 2015 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2016 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2017 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2018 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2019 strb r2, [r0, #0x0b] 2020 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2021 strh r2, [r0, #0x09] 2022 mov r3, r3, lsl #16 /* r3 = 87.. */ 2023 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2024 mov ip, ip, lsl #16 /* ip = 43.. */ 2025 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2026 mov r1, r1, lsr #8 /* r1 = .210 */ 2027 str r3, [r0, #0x05] 2028 str ip, [r0, #0x01] 2029 strb r1, [r0] 2030 RET 2031 LMEMCPY_C_PAD 2032 2033/* 2034 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2035 */ 2036 ldrh r2, [r1] /* r2 = ..10 */ 2037 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2038 ldr ip, [r1, #0x06] /* ip = 9876 */ 2039 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2040 strb r2, [r0] 2041 mov r2, r2, lsr #8 /* r2 = ...1 */ 2042 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2043 mov r3, r3, lsr #24 /* r3 = ...5 */ 2044 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2045 mov ip, ip, lsr #24 /* ip = ...9 */ 2046 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2047 mov r1, r1, lsr #8 /* r1 = ...B */ 2048 str r2, [r0, #0x01] 2049 str r3, [r0, #0x05] 2050 strh ip, [r0, #0x09] 2051 strb r1, [r0, #0x0b] 2052 RET 2053 LMEMCPY_C_PAD 2054 2055/* 2056 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2057 */ 2058 ldrb r2, [r1] 2059 ldr r3, [r1, #0x01] 2060 ldr ip, [r1, #0x05] 2061 strb r2, [r0] 2062 ldrh r2, [r1, #0x09] 2063 ldrb r1, [r1, #0x0b] 2064 str r3, [r0, #0x01] 2065 str ip, [r0, #0x05] 2066 strh r2, [r0, #0x09] 2067 strb r1, [r0, #0x0b] 2068 RET 2069END(memcpy) 2070