1/* $NetBSD: memcpy_xscale.S,v 1.5 2013/12/17 01:27:21 joerg Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39 40/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 41ENTRY(memcpy) 42 pld [r1] 43 cmp r2, #0x0c 44 ble .Lmemcpy_short /* <= 12 bytes */ 45 mov r3, r0 /* We must not clobber r0 */ 46 47 /* Word-align the destination buffer */ 48 ands ip, r3, #0x03 /* Already word aligned? */ 49 beq .Lmemcpy_wordaligned /* Yup */ 50 cmp ip, #0x02 51 ldrb ip, [r1], #0x01 52 sub r2, r2, #0x01 53 strb ip, [r3], #0x01 54 ldrble ip, [r1], #0x01 55 suble r2, r2, #0x01 56 strble ip, [r3], #0x01 57 ldrblt ip, [r1], #0x01 58 sublt r2, r2, #0x01 59 strblt ip, [r3], #0x01 60 61 /* Destination buffer is now word aligned */ 62.Lmemcpy_wordaligned: 63 ands ip, r1, #0x03 /* Is src also word-aligned? */ 64 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 65 66 /* Quad-align the destination buffer */ 67 tst r3, #0x07 /* Already quad aligned? */ 68 ldrne ip, [r1], #0x04 69 push {r4-r9} /* Free up some registers */ 70 subne r2, r2, #0x04 71 strne ip, [r3], #0x04 72 73 /* Destination buffer quad aligned, source is at least word aligned */ 74 subs r2, r2, #0x80 75 blt .Lmemcpy_w_lessthan128 76 77 /* Copy 128 bytes at a time */ 78.Lmemcpy_w_loop128: 79 ldr r4, [r1], #0x04 /* LD:00-03 */ 80 ldr r5, [r1], #0x04 /* LD:04-07 */ 81 pld [r1, #0x18] /* Prefetch 0x20 */ 82 ldr r6, [r1], #0x04 /* LD:08-0b */ 83 ldr r7, [r1], #0x04 /* LD:0c-0f */ 84 ldr r8, [r1], #0x04 /* LD:10-13 */ 85 ldr r9, [r1], #0x04 /* LD:14-17 */ 86 strd r4, r5, [r3], #0x08 /* ST:00-07 */ 87 ldr r4, [r1], #0x04 /* LD:18-1b */ 88 ldr r5, [r1], #0x04 /* LD:1c-1f */ 89 strd r6, r7, [r3], #0x08 /* ST:08-0f */ 90 ldr r6, [r1], #0x04 /* LD:20-23 */ 91 ldr r7, [r1], #0x04 /* LD:24-27 */ 92 pld [r1, #0x18] /* Prefetch 0x40 */ 93 strd r8, r9, [r3], #0x08 /* ST:10-17 */ 94 ldr r8, [r1], #0x04 /* LD:28-2b */ 95 ldr r9, [r1], #0x04 /* LD:2c-2f */ 96 strd r4, r5, [r3], #0x08 /* ST:18-1f */ 97 ldr r4, [r1], #0x04 /* LD:30-33 */ 98 ldr r5, [r1], #0x04 /* LD:34-37 */ 99 strd r6, r7, [r3], #0x08 /* ST:20-27 */ 100 ldr r6, [r1], #0x04 /* LD:38-3b */ 101 ldr r7, [r1], #0x04 /* LD:3c-3f */ 102 strd r8, r9, [r3], #0x08 /* ST:28-2f */ 103 ldr r8, [r1], #0x04 /* LD:40-43 */ 104 ldr r9, [r1], #0x04 /* LD:44-47 */ 105 pld [r1, #0x18] /* Prefetch 0x60 */ 106 strd r4, r5, [r3], #0x08 /* ST:30-37 */ 107 ldr r4, [r1], #0x04 /* LD:48-4b */ 108 ldr r5, [r1], #0x04 /* LD:4c-4f */ 109 strd r6, r7, [r3], #0x08 /* ST:38-3f */ 110 ldr r6, [r1], #0x04 /* LD:50-53 */ 111 ldr r7, [r1], #0x04 /* LD:54-57 */ 112 strd r8, r9, [r3], #0x08 /* ST:40-47 */ 113 ldr r8, [r1], #0x04 /* LD:58-5b */ 114 ldr r9, [r1], #0x04 /* LD:5c-5f */ 115 strd r4, r5, [r3], #0x08 /* ST:48-4f */ 116 ldr r4, [r1], #0x04 /* LD:60-63 */ 117 ldr r5, [r1], #0x04 /* LD:64-67 */ 118 pld [r1, #0x18] /* Prefetch 0x80 */ 119 strd r6, r7, [r3], #0x08 /* ST:50-57 */ 120 ldr r6, [r1], #0x04 /* LD:68-6b */ 121 ldr r7, [r1], #0x04 /* LD:6c-6f */ 122 strd r8, r9, [r3], #0x08 /* ST:58-5f */ 123 ldr r8, [r1], #0x04 /* LD:70-73 */ 124 ldr r9, [r1], #0x04 /* LD:74-77 */ 125 strd r4, r5, [r3], #0x08 /* ST:60-67 */ 126 ldr r4, [r1], #0x04 /* LD:78-7b */ 127 ldr r5, [r1], #0x04 /* LD:7c-7f */ 128 strd r6, r7, [r3], #0x08 /* ST:68-6f */ 129 strd r8, r9, [r3], #0x08 /* ST:70-77 */ 130 subs r2, r2, #0x80 131 strd r4, r5, [r3], #0x08 /* ST:78-7f */ 132 bge .Lmemcpy_w_loop128 133 134.Lmemcpy_w_lessthan128: 135 adds r2, r2, #0x80 /* Adjust for extra sub */ 136 popeq {r4-r9} 137 RETc(eq) /* Return now if done */ 138 subs r2, r2, #0x20 139 blt .Lmemcpy_w_lessthan32 140 141 /* Copy 32 bytes at a time */ 142.Lmemcpy_w_loop32: 143 ldr r4, [r1], #0x04 144 ldr r5, [r1], #0x04 145 pld [r1, #0x18] 146 ldr r6, [r1], #0x04 147 ldr r7, [r1], #0x04 148 ldr r8, [r1], #0x04 149 ldr r9, [r1], #0x04 150 strd r4, r5, [r3], #0x08 151 ldr r4, [r1], #0x04 152 ldr r5, [r1], #0x04 153 strd r6, r7, [r3], #0x08 154 strd r8, r9, [r3], #0x08 155 subs r2, r2, #0x20 156 strd r4, r5, [r3], #0x08 157 bge .Lmemcpy_w_loop32 158 159.Lmemcpy_w_lessthan32: 160 adds r2, r2, #0x20 /* Adjust for extra sub */ 161 popeq {r4-r9} 162 RETc(eq) /* Return now if done */ 163 164 and r4, r2, #0x18 165 rsbs r4, r4, #0x18 166 addne pc, pc, r4, lsl #1 167 nop 168 169 /* At least 24 bytes remaining */ 170 ldr r4, [r1], #0x04 171 ldr r5, [r1], #0x04 172 sub r2, r2, #0x08 173 strd r4, r5, [r3], #0x08 174 175 /* At least 16 bytes remaining */ 176 ldr r4, [r1], #0x04 177 ldr r5, [r1], #0x04 178 sub r2, r2, #0x08 179 strd r4, r5, [r3], #0x08 180 181 /* At least 8 bytes remaining */ 182 ldr r4, [r1], #0x04 183 ldr r5, [r1], #0x04 184 subs r2, r2, #0x08 185 strd r4, r5, [r3], #0x08 186 187 /* Less than 8 bytes remaining */ 188 pop {r4-r9} 189 RETc(eq) /* Return now if done */ 190 subs r2, r2, #0x04 191 ldrge ip, [r1], #0x04 192 strge ip, [r3], #0x04 193 RETc(eq) /* Return now if done */ 194 addlt r2, r2, #0x04 195 ldrb ip, [r1], #0x01 196 cmp r2, #0x02 197 ldrbge r2, [r1], #0x01 198 strb ip, [r3], #0x01 199 ldrbgt ip, [r1] 200 strbge r2, [r3], #0x01 201 strbgt ip, [r3] 202 RET 203 204 205/* 206 * At this point, it has not been possible to word align both buffers. 207 * The destination buffer is word aligned, but the source buffer is not. 208 */ 209.Lmemcpy_bad_align: 210 push {r4-r7} 211 bic r1, r1, #0x03 212 cmp ip, #2 213 ldr ip, [r1], #0x04 214 bgt .Lmemcpy_bad3 215 beq .Lmemcpy_bad2 216 b .Lmemcpy_bad1 217 218.Lmemcpy_bad1_loop16: 219#ifdef __ARMEB__ 220 mov r4, ip, lsl #8 221#else 222 mov r4, ip, lsr #8 223#endif 224 ldr r5, [r1], #0x04 225 pld [r1, #0x018] 226 ldr r6, [r1], #0x04 227 ldr r7, [r1], #0x04 228 ldr ip, [r1], #0x04 229#ifdef __ARMEB__ 230 orr r4, r4, r5, lsr #24 231 mov r5, r5, lsl #8 232 orr r5, r5, r6, lsr #24 233 mov r6, r6, lsl #8 234 orr r6, r6, r7, lsr #24 235 mov r7, r7, lsl #8 236 orr r7, r7, ip, lsr #24 237#else 238 orr r4, r4, r5, lsl #24 239 mov r5, r5, lsr #8 240 orr r5, r5, r6, lsl #24 241 mov r6, r6, lsr #8 242 orr r6, r6, r7, lsl #24 243 mov r7, r7, lsr #8 244 orr r7, r7, ip, lsl #24 245#endif 246 str r4, [r3], #0x04 247 str r5, [r3], #0x04 248 str r6, [r3], #0x04 249 str r7, [r3], #0x04 250 sub r2, r2, #0x10 251 252.Lmemcpy_bad1: 253 cmp r2, #0x20 254 bge .Lmemcpy_bad1_loop16 255 cmp r2, #0x10 256 blt .Lmemcpy_bad1_loop16_short 257 258 /* copy last 16 bytes (without preload) */ 259#ifdef __ARMEB__ 260 mov r4, ip, lsl #8 261#else 262 mov r4, ip, lsr #8 263#endif 264 ldr r5, [r1], #0x04 265 ldr r6, [r1], #0x04 266 ldr r7, [r1], #0x04 267 ldr ip, [r1], #0x04 268#ifdef __ARMEB__ 269 orr r4, r4, r5, lsr #24 270 mov r5, r5, lsl #8 271 orr r5, r5, r6, lsr #24 272 mov r6, r6, lsl #8 273 orr r6, r6, r7, lsr #24 274 mov r7, r7, lsl #8 275 orr r7, r7, ip, lsr #24 276#else 277 orr r4, r4, r5, lsl #24 278 mov r5, r5, lsr #8 279 orr r5, r5, r6, lsl #24 280 mov r6, r6, lsr #8 281 orr r6, r6, r7, lsl #24 282 mov r7, r7, lsr #8 283 orr r7, r7, ip, lsl #24 284#endif 285 str r4, [r3], #0x04 286 str r5, [r3], #0x04 287 str r6, [r3], #0x04 288 str r7, [r3], #0x04 289 subs r2, r2, #0x10 290 popeq {r4-r7} 291 RETc(eq) /* Return now if done */ 292 293.Lmemcpy_bad1_loop16_short: 294 subs r2, r2, #0x04 295 sublt r1, r1, #0x03 296 blt .Lmemcpy_bad_done 297 298.Lmemcpy_bad1_loop4: 299#ifdef __ARMEB__ 300 mov r4, ip, lsl #8 301#else 302 mov r4, ip, lsr #8 303#endif 304 ldr ip, [r1], #0x04 305 subs r2, r2, #0x04 306#ifdef __ARMEB__ 307 orr r4, r4, ip, lsr #24 308#else 309 orr r4, r4, ip, lsl #24 310#endif 311 str r4, [r3], #0x04 312 bge .Lmemcpy_bad1_loop4 313 sub r1, r1, #0x03 314 b .Lmemcpy_bad_done 315 316.Lmemcpy_bad2_loop16: 317#ifdef __ARMEB__ 318 mov r4, ip, lsl #16 319#else 320 mov r4, ip, lsr #16 321#endif 322 ldr r5, [r1], #0x04 323 pld [r1, #0x018] 324 ldr r6, [r1], #0x04 325 ldr r7, [r1], #0x04 326 ldr ip, [r1], #0x04 327#ifdef __ARMEB__ 328 orr r4, r4, r5, lsr #16 329 mov r5, r5, lsl #16 330 orr r5, r5, r6, lsr #16 331 mov r6, r6, lsl #16 332 orr r6, r6, r7, lsr #16 333 mov r7, r7, lsl #16 334 orr r7, r7, ip, lsr #16 335#else 336 orr r4, r4, r5, lsl #16 337 mov r5, r5, lsr #16 338 orr r5, r5, r6, lsl #16 339 mov r6, r6, lsr #16 340 orr r6, r6, r7, lsl #16 341 mov r7, r7, lsr #16 342 orr r7, r7, ip, lsl #16 343#endif 344 str r4, [r3], #0x04 345 str r5, [r3], #0x04 346 str r6, [r3], #0x04 347 str r7, [r3], #0x04 348 sub r2, r2, #0x10 349 350.Lmemcpy_bad2: 351 cmp r2, #0x20 352 bge .Lmemcpy_bad2_loop16 353 cmp r2, #0x10 354 blt .Lmemcpy_bad2_loop16_short 355 356 /* copy last 16 bytes (without preload) */ 357#ifdef __ARMEB__ 358 mov r4, ip, lsl #16 359#else 360 mov r4, ip, lsr #16 361#endif 362 ldr r5, [r1], #0x04 363 ldr r6, [r1], #0x04 364 ldr r7, [r1], #0x04 365 ldr ip, [r1], #0x04 366#ifdef __ARMEB__ 367 orr r4, r4, r5, lsr #16 368 mov r5, r5, lsl #16 369 orr r5, r5, r6, lsr #16 370 mov r6, r6, lsl #16 371 orr r6, r6, r7, lsr #16 372 mov r7, r7, lsl #16 373 orr r7, r7, ip, lsr #16 374#else 375 orr r4, r4, r5, lsl #16 376 mov r5, r5, lsr #16 377 orr r5, r5, r6, lsl #16 378 mov r6, r6, lsr #16 379 orr r6, r6, r7, lsl #16 380 mov r7, r7, lsr #16 381 orr r7, r7, ip, lsl #16 382#endif 383 str r4, [r3], #0x04 384 str r5, [r3], #0x04 385 str r6, [r3], #0x04 386 str r7, [r3], #0x04 387 subs r2, r2, #0x10 388 popeq {r4-r7} 389 RETc(eq) /* Return now if done */ 390 391.Lmemcpy_bad2_loop16_short: 392 subs r2, r2, #0x04 393 sublt r1, r1, #0x02 394 blt .Lmemcpy_bad_done 395 396.Lmemcpy_bad2_loop4: 397#ifdef __ARMEB__ 398 mov r4, ip, lsl #16 399#else 400 mov r4, ip, lsr #16 401#endif 402 ldr ip, [r1], #0x04 403 subs r2, r2, #0x04 404#ifdef __ARMEB__ 405 orr r4, r4, ip, lsr #16 406#else 407 orr r4, r4, ip, lsl #16 408#endif 409 str r4, [r3], #0x04 410 bge .Lmemcpy_bad2_loop4 411 sub r1, r1, #0x02 412 b .Lmemcpy_bad_done 413 414.Lmemcpy_bad3_loop16: 415#ifdef __ARMEB__ 416 mov r4, ip, lsl #24 417#else 418 mov r4, ip, lsr #24 419#endif 420 ldr r5, [r1], #0x04 421 pld [r1, #0x018] 422 ldr r6, [r1], #0x04 423 ldr r7, [r1], #0x04 424 ldr ip, [r1], #0x04 425#ifdef __ARMEB__ 426 orr r4, r4, r5, lsr #8 427 mov r5, r5, lsl #24 428 orr r5, r5, r6, lsr #8 429 mov r6, r6, lsl #24 430 orr r6, r6, r7, lsr #8 431 mov r7, r7, lsl #24 432 orr r7, r7, ip, lsr #8 433#else 434 orr r4, r4, r5, lsl #8 435 mov r5, r5, lsr #24 436 orr r5, r5, r6, lsl #8 437 mov r6, r6, lsr #24 438 orr r6, r6, r7, lsl #8 439 mov r7, r7, lsr #24 440 orr r7, r7, ip, lsl #8 441#endif 442 str r4, [r3], #0x04 443 str r5, [r3], #0x04 444 str r6, [r3], #0x04 445 str r7, [r3], #0x04 446 sub r2, r2, #0x10 447 448.Lmemcpy_bad3: 449 cmp r2, #0x20 450 bge .Lmemcpy_bad3_loop16 451 cmp r2, #0x10 452 blt .Lmemcpy_bad3_loop16_short 453 454 /* copy last 16 bytes (without preload) */ 455#ifdef __ARMEB__ 456 mov r4, ip, lsl #24 457#else 458 mov r4, ip, lsr #24 459#endif 460 ldr r5, [r1], #0x04 461 ldr r6, [r1], #0x04 462 ldr r7, [r1], #0x04 463 ldr ip, [r1], #0x04 464#ifdef __ARMEB__ 465 orr r4, r4, r5, lsr #8 466 mov r5, r5, lsl #24 467 orr r5, r5, r6, lsr #8 468 mov r6, r6, lsl #24 469 orr r6, r6, r7, lsr #8 470 mov r7, r7, lsl #24 471 orr r7, r7, ip, lsr #8 472#else 473 orr r4, r4, r5, lsl #8 474 mov r5, r5, lsr #24 475 orr r5, r5, r6, lsl #8 476 mov r6, r6, lsr #24 477 orr r6, r6, r7, lsl #8 478 mov r7, r7, lsr #24 479 orr r7, r7, ip, lsl #8 480#endif 481 str r4, [r3], #0x04 482 str r5, [r3], #0x04 483 str r6, [r3], #0x04 484 str r7, [r3], #0x04 485 subs r2, r2, #0x10 486 popeq {r4-r7} 487 RETc(eq) /* Return now if done */ 488 489.Lmemcpy_bad3_loop16_short: 490 subs r2, r2, #0x04 491 sublt r1, r1, #0x01 492 blt .Lmemcpy_bad_done 493 494.Lmemcpy_bad3_loop4: 495#ifdef __ARMEB__ 496 mov r4, ip, lsl #24 497#else 498 mov r4, ip, lsr #24 499#endif 500 ldr ip, [r1], #0x04 501 subs r2, r2, #0x04 502#ifdef __ARMEB__ 503 orr r4, r4, ip, lsr #8 504#else 505 orr r4, r4, ip, lsl #8 506#endif 507 str r4, [r3], #0x04 508 bge .Lmemcpy_bad3_loop4 509 sub r1, r1, #0x01 510 511.Lmemcpy_bad_done: 512 pop {r4-r7} 513 adds r2, r2, #0x04 514 RETc(eq) 515 ldrb ip, [r1], #0x01 516 cmp r2, #0x02 517 ldrbge r2, [r1], #0x01 518 strb ip, [r3], #0x01 519 ldrbgt ip, [r1] 520 strbge r2, [r3], #0x01 521 strbgt ip, [r3] 522 RET 523 524 525/* 526 * Handle short copies (less than 16 bytes), possibly misaligned. 527 * Some of these are *very* common, thanks to the network stack, 528 * and so are handled specially. 529 */ 530.Lmemcpy_short: 531#ifndef _STANDALONE 532 add pc, pc, r2, lsl #2 533 nop 534 RET /* 0x00 */ 535 b .Lmemcpy_bytewise /* 0x01 */ 536 b .Lmemcpy_bytewise /* 0x02 */ 537 b .Lmemcpy_bytewise /* 0x03 */ 538 b .Lmemcpy_4 /* 0x04 */ 539 b .Lmemcpy_bytewise /* 0x05 */ 540 b .Lmemcpy_6 /* 0x06 */ 541 b .Lmemcpy_bytewise /* 0x07 */ 542 b .Lmemcpy_8 /* 0x08 */ 543 b .Lmemcpy_bytewise /* 0x09 */ 544 b .Lmemcpy_bytewise /* 0x0a */ 545 b .Lmemcpy_bytewise /* 0x0b */ 546 b .Lmemcpy_c /* 0x0c */ 547#endif 548.Lmemcpy_bytewise: 549 mov r3, r0 /* We must not clobber r0 */ 550 ldrb ip, [r1], #0x01 5511: subs r2, r2, #0x01 552 strb ip, [r3], #0x01 553 ldrbne ip, [r1], #0x01 554 bne 1b 555 RET 556 557#ifndef _STANDALONE 558/****************************************************************************** 559 * Special case for 4 byte copies 560 */ 561#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 562#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 563 LMEMCPY_4_PAD 564.Lmemcpy_4: 565 and r2, r1, #0x03 566 orr r2, r2, r0, lsl #2 567 ands r2, r2, #0x0f 568 sub r3, pc, #0x14 569 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 570 571/* 572 * 0000: dst is 32-bit aligned, src is 32-bit aligned 573 */ 574 ldr r2, [r1] 575 str r2, [r0] 576 RET 577 LMEMCPY_4_PAD 578 579/* 580 * 0001: dst is 32-bit aligned, src is 8-bit aligned 581 */ 582 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 583 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 584#ifdef __ARMEB__ 585 mov r3, r3, lsl #8 /* r3 = 012. */ 586 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 587#else 588 mov r3, r3, lsr #8 /* r3 = .210 */ 589 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 590#endif 591 str r3, [r0] 592 RET 593 LMEMCPY_4_PAD 594 595/* 596 * 0010: dst is 32-bit aligned, src is 16-bit aligned 597 */ 598#ifdef __ARMEB__ 599 ldrh r3, [r1] 600 ldrh r2, [r1, #0x02] 601#else 602 ldrh r3, [r1, #0x02] 603 ldrh r2, [r1] 604#endif 605 orr r3, r2, r3, lsl #16 606 str r3, [r0] 607 RET 608 LMEMCPY_4_PAD 609 610/* 611 * 0011: dst is 32-bit aligned, src is 8-bit aligned 612 */ 613 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 614 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 615#ifdef __ARMEB__ 616 mov r3, r3, lsl #24 /* r3 = 0... */ 617 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 618#else 619 mov r3, r3, lsr #24 /* r3 = ...0 */ 620 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 621#endif 622 str r3, [r0] 623 RET 624 LMEMCPY_4_PAD 625 626/* 627 * 0100: dst is 8-bit aligned, src is 32-bit aligned 628 */ 629 ldr r2, [r1] 630#ifdef __ARMEB__ 631 strb r2, [r0, #0x03] 632 mov r3, r2, lsr #8 633 mov r1, r2, lsr #24 634 strb r1, [r0] 635#else 636 strb r2, [r0] 637 mov r3, r2, lsr #8 638 mov r1, r2, lsr #24 639 strb r1, [r0, #0x03] 640#endif 641 strh r3, [r0, #0x01] 642 RET 643 LMEMCPY_4_PAD 644 645/* 646 * 0101: dst is 8-bit aligned, src is 8-bit aligned 647 */ 648 ldrb r2, [r1] 649 ldrh r3, [r1, #0x01] 650 ldrb r1, [r1, #0x03] 651 strb r2, [r0] 652 strh r3, [r0, #0x01] 653 strb r1, [r0, #0x03] 654 RET 655 LMEMCPY_4_PAD 656 657/* 658 * 0110: dst is 8-bit aligned, src is 16-bit aligned 659 */ 660 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 661 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 662#ifdef __ARMEB__ 663 mov r1, r2, lsr #8 /* r1 = ...0 */ 664 strb r1, [r0] 665 mov r2, r2, lsl #8 /* r2 = .01. */ 666 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 667#else 668 strb r2, [r0] 669 mov r2, r2, lsr #8 /* r2 = ...1 */ 670 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 671 mov r3, r3, lsr #8 /* r3 = ...3 */ 672#endif 673 strh r2, [r0, #0x01] 674 strb r3, [r0, #0x03] 675 RET 676 LMEMCPY_4_PAD 677 678/* 679 * 0111: dst is 8-bit aligned, src is 8-bit aligned 680 */ 681 ldrb r2, [r1] 682 ldrh r3, [r1, #0x01] 683 ldrb r1, [r1, #0x03] 684 strb r2, [r0] 685 strh r3, [r0, #0x01] 686 strb r1, [r0, #0x03] 687 RET 688 LMEMCPY_4_PAD 689 690/* 691 * 1000: dst is 16-bit aligned, src is 32-bit aligned 692 */ 693 ldr r2, [r1] 694#ifdef __ARMEB__ 695 strh r2, [r0, #0x02] 696 mov r3, r2, lsr #16 697 strh r3, [r0] 698#else 699 strh r2, [r0] 700 mov r3, r2, lsr #16 701 strh r3, [r0, #0x02] 702#endif 703 RET 704 LMEMCPY_4_PAD 705 706/* 707 * 1001: dst is 16-bit aligned, src is 8-bit aligned 708 */ 709 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 710 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 711 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 712 strh r1, [r0] 713#ifdef __ARMEB__ 714 mov r2, r2, lsl #8 /* r2 = 012. */ 715 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 716#else 717 mov r2, r2, lsr #24 /* r2 = ...2 */ 718 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 719#endif 720 strh r2, [r0, #0x02] 721 RET 722 LMEMCPY_4_PAD 723 724/* 725 * 1010: dst is 16-bit aligned, src is 16-bit aligned 726 */ 727 ldrh r2, [r1] 728 ldrh r3, [r1, #0x02] 729 strh r2, [r0] 730 strh r3, [r0, #0x02] 731 RET 732 LMEMCPY_4_PAD 733 734/* 735 * 1011: dst is 16-bit aligned, src is 8-bit aligned 736 */ 737 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 738 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 739 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 740 strh r1, [r0, #0x02] 741#ifdef __ARMEB__ 742 mov r3, r3, lsr #24 /* r3 = ...1 */ 743 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 744#else 745 mov r3, r3, lsl #8 /* r3 = 321. */ 746 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 747#endif 748 strh r3, [r0] 749 RET 750 LMEMCPY_4_PAD 751 752/* 753 * 1100: dst is 8-bit aligned, src is 32-bit aligned 754 */ 755 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 756#ifdef __ARMEB__ 757 strb r2, [r0, #0x03] 758 mov r3, r2, lsr #8 759 mov r1, r2, lsr #24 760 strh r3, [r0, #0x01] 761 strb r1, [r0] 762#else 763 strb r2, [r0] 764 mov r3, r2, lsr #8 765 mov r1, r2, lsr #24 766 strh r3, [r0, #0x01] 767 strb r1, [r0, #0x03] 768#endif 769 RET 770 LMEMCPY_4_PAD 771 772/* 773 * 1101: dst is 8-bit aligned, src is 8-bit aligned 774 */ 775 ldrb r2, [r1] 776 ldrh r3, [r1, #0x01] 777 ldrb r1, [r1, #0x03] 778 strb r2, [r0] 779 strh r3, [r0, #0x01] 780 strb r1, [r0, #0x03] 781 RET 782 LMEMCPY_4_PAD 783 784/* 785 * 1110: dst is 8-bit aligned, src is 16-bit aligned 786 */ 787#ifdef __ARMEB__ 788 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 789 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 790 strb r3, [r0, #0x03] 791 mov r3, r3, lsr #8 /* r3 = ...2 */ 792 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 793 strh r3, [r0, #0x01] 794 mov r2, r2, lsr #8 /* r2 = ...0 */ 795 strb r2, [r0] 796#else 797 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 798 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 799 strb r2, [r0] 800 mov r2, r2, lsr #8 /* r2 = ...1 */ 801 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 802 strh r2, [r0, #0x01] 803 mov r3, r3, lsr #8 /* r3 = ...3 */ 804 strb r3, [r0, #0x03] 805#endif 806 RET 807 LMEMCPY_4_PAD 808 809/* 810 * 1111: dst is 8-bit aligned, src is 8-bit aligned 811 */ 812 ldrb r2, [r1] 813 ldrh r3, [r1, #0x01] 814 ldrb r1, [r1, #0x03] 815 strb r2, [r0] 816 strh r3, [r0, #0x01] 817 strb r1, [r0, #0x03] 818 RET 819 LMEMCPY_4_PAD 820 821 822/****************************************************************************** 823 * Special case for 6 byte copies 824 */ 825#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 826#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 827 LMEMCPY_6_PAD 828.Lmemcpy_6: 829 and r2, r1, #0x03 830 orr r2, r2, r0, lsl #2 831 ands r2, r2, #0x0f 832 sub r3, pc, #0x14 833 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 834 835/* 836 * 0000: dst is 32-bit aligned, src is 32-bit aligned 837 */ 838 ldr r2, [r1] 839 ldrh r3, [r1, #0x04] 840 str r2, [r0] 841 strh r3, [r0, #0x04] 842 RET 843 LMEMCPY_6_PAD 844 845/* 846 * 0001: dst is 32-bit aligned, src is 8-bit aligned 847 */ 848 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 849 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 850#ifdef __ARMEB__ 851 mov r2, r2, lsl #8 /* r2 = 012. */ 852 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 853#else 854 mov r2, r2, lsr #8 /* r2 = .210 */ 855 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 856#endif 857 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 858 str r2, [r0] 859 strh r3, [r0, #0x04] 860 RET 861 LMEMCPY_6_PAD 862 863/* 864 * 0010: dst is 32-bit aligned, src is 16-bit aligned 865 */ 866 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 867 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 868#ifdef __ARMEB__ 869 mov r1, r3, lsr #16 /* r1 = ..23 */ 870 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 871 str r1, [r0] 872 strh r3, [r0, #0x04] 873#else 874 mov r1, r3, lsr #16 /* r1 = ..54 */ 875 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 876 str r2, [r0] 877 strh r1, [r0, #0x04] 878#endif 879 RET 880 LMEMCPY_6_PAD 881 882/* 883 * 0011: dst is 32-bit aligned, src is 8-bit aligned 884 */ 885 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 886 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 887 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 888#ifdef __ARMEB__ 889 mov r2, r2, lsl #24 /* r2 = 0... */ 890 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 891 mov r3, r3, lsl #8 /* r3 = 234. */ 892 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 893#else 894 mov r2, r2, lsr #24 /* r2 = ...0 */ 895 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 896 mov r1, r1, lsl #8 /* r1 = xx5. */ 897 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 898#endif 899 str r2, [r0] 900 strh r1, [r0, #0x04] 901 RET 902 LMEMCPY_6_PAD 903 904/* 905 * 0100: dst is 8-bit aligned, src is 32-bit aligned 906 */ 907 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 908 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 909 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 910 strh r1, [r0, #0x01] 911#ifdef __ARMEB__ 912 mov r1, r3, lsr #24 /* r1 = ...0 */ 913 strb r1, [r0] 914 mov r3, r3, lsl #8 /* r3 = 123. */ 915 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 916#else 917 strb r3, [r0] 918 mov r3, r3, lsr #24 /* r3 = ...3 */ 919 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 920 mov r2, r2, lsr #8 /* r2 = ...5 */ 921#endif 922 strh r3, [r0, #0x03] 923 strb r2, [r0, #0x05] 924 RET 925 LMEMCPY_6_PAD 926 927/* 928 * 0101: dst is 8-bit aligned, src is 8-bit aligned 929 */ 930 ldrb r2, [r1] 931 ldrh r3, [r1, #0x01] 932 ldrh ip, [r1, #0x03] 933 ldrb r1, [r1, #0x05] 934 strb r2, [r0] 935 strh r3, [r0, #0x01] 936 strh ip, [r0, #0x03] 937 strb r1, [r0, #0x05] 938 RET 939 LMEMCPY_6_PAD 940 941/* 942 * 0110: dst is 8-bit aligned, src is 16-bit aligned 943 */ 944 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 945 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 946#ifdef __ARMEB__ 947 mov r3, r2, lsr #8 /* r3 = ...0 */ 948 strb r3, [r0] 949 strb r1, [r0, #0x05] 950 mov r3, r1, lsr #8 /* r3 = .234 */ 951 strh r3, [r0, #0x03] 952 mov r3, r2, lsl #8 /* r3 = .01. */ 953 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 954 strh r3, [r0, #0x01] 955#else 956 strb r2, [r0] 957 mov r3, r1, lsr #24 958 strb r3, [r0, #0x05] 959 mov r3, r1, lsr #8 /* r3 = .543 */ 960 strh r3, [r0, #0x03] 961 mov r3, r2, lsr #8 /* r3 = ...1 */ 962 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 963 strh r3, [r0, #0x01] 964#endif 965 RET 966 LMEMCPY_6_PAD 967 968/* 969 * 0111: dst is 8-bit aligned, src is 8-bit aligned 970 */ 971 ldrb r2, [r1] 972 ldrh r3, [r1, #0x01] 973 ldrh ip, [r1, #0x03] 974 ldrb r1, [r1, #0x05] 975 strb r2, [r0] 976 strh r3, [r0, #0x01] 977 strh ip, [r0, #0x03] 978 strb r1, [r0, #0x05] 979 RET 980 LMEMCPY_6_PAD 981 982/* 983 * 1000: dst is 16-bit aligned, src is 32-bit aligned 984 */ 985#ifdef __ARMEB__ 986 ldr r2, [r1] /* r2 = 0123 */ 987 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 988 mov r1, r2, lsr #16 /* r1 = ..01 */ 989 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 990 strh r1, [r0] 991 str r3, [r0, #0x02] 992#else 993 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 994 ldr r3, [r1] /* r3 = 3210 */ 995 mov r2, r2, lsl #16 /* r2 = 54.. */ 996 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 997 strh r3, [r0] 998 str r2, [r0, #0x02] 999#endif 1000 RET 1001 LMEMCPY_6_PAD 1002 1003/* 1004 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1005 */ 1006 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1007 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1008 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1009#ifdef __ARMEB__ 1010 mov r2, r2, lsr #8 /* r2 = .345 */ 1011 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 1012#else 1013 mov r2, r2, lsl #8 /* r2 = 543. */ 1014 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1015#endif 1016 strh r1, [r0] 1017 str r2, [r0, #0x02] 1018 RET 1019 LMEMCPY_6_PAD 1020 1021/* 1022 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1023 */ 1024 ldrh r2, [r1] 1025 ldr r3, [r1, #0x02] 1026 strh r2, [r0] 1027 str r3, [r0, #0x02] 1028 RET 1029 LMEMCPY_6_PAD 1030 1031/* 1032 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1033 */ 1034 ldrb r3, [r1] /* r3 = ...0 */ 1035 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1036 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 1037#ifdef __ARMEB__ 1038 mov r3, r3, lsl #8 /* r3 = ..0. */ 1039 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 1040 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 1041#else 1042 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1043 mov r1, r1, lsl #24 /* r1 = 5... */ 1044 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 1045#endif 1046 strh r3, [r0] 1047 str r1, [r0, #0x02] 1048 RET 1049 LMEMCPY_6_PAD 1050 1051/* 1052 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1053 */ 1054 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1055 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 1056#ifdef __ARMEB__ 1057 mov r3, r2, lsr #24 /* r3 = ...0 */ 1058 strb r3, [r0] 1059 mov r2, r2, lsl #8 /* r2 = 123. */ 1060 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 1061#else 1062 strb r2, [r0] 1063 mov r2, r2, lsr #8 /* r2 = .321 */ 1064 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 1065 mov r1, r1, lsr #8 /* r1 = ...5 */ 1066#endif 1067 str r2, [r0, #0x01] 1068 strb r1, [r0, #0x05] 1069 RET 1070 LMEMCPY_6_PAD 1071 1072/* 1073 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1074 */ 1075 ldrb r2, [r1] 1076 ldrh r3, [r1, #0x01] 1077 ldrh ip, [r1, #0x03] 1078 ldrb r1, [r1, #0x05] 1079 strb r2, [r0] 1080 strh r3, [r0, #0x01] 1081 strh ip, [r0, #0x03] 1082 strb r1, [r0, #0x05] 1083 RET 1084 LMEMCPY_6_PAD 1085 1086/* 1087 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1088 */ 1089 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1090 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1091#ifdef __ARMEB__ 1092 mov r3, r2, lsr #8 /* r3 = ...0 */ 1093 strb r3, [r0] 1094 mov r2, r2, lsl #24 /* r2 = 1... */ 1095 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 1096#else 1097 strb r2, [r0] 1098 mov r2, r2, lsr #8 /* r2 = ...1 */ 1099 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 1100 mov r1, r1, lsr #24 /* r1 = ...5 */ 1101#endif 1102 str r2, [r0, #0x01] 1103 strb r1, [r0, #0x05] 1104 RET 1105 LMEMCPY_6_PAD 1106 1107/* 1108 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1109 */ 1110 ldrb r2, [r1] 1111 ldr r3, [r1, #0x01] 1112 ldrb r1, [r1, #0x05] 1113 strb r2, [r0] 1114 str r3, [r0, #0x01] 1115 strb r1, [r0, #0x05] 1116 RET 1117 LMEMCPY_6_PAD 1118 1119 1120/****************************************************************************** 1121 * Special case for 8 byte copies 1122 */ 1123#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1124#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1125 LMEMCPY_8_PAD 1126.Lmemcpy_8: 1127 and r2, r1, #0x03 1128 orr r2, r2, r0, lsl #2 1129 ands r2, r2, #0x0f 1130 sub r3, pc, #0x14 1131 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1132 1133/* 1134 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1135 */ 1136 ldr r2, [r1] 1137 ldr r3, [r1, #0x04] 1138 str r2, [r0] 1139 str r3, [r0, #0x04] 1140 RET 1141 LMEMCPY_8_PAD 1142 1143/* 1144 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1145 */ 1146 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1147 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1148 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1149#ifdef __ARMEB__ 1150 mov r3, r3, lsl #8 /* r3 = 012. */ 1151 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1152 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 1153#else 1154 mov r3, r3, lsr #8 /* r3 = .210 */ 1155 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1156 mov r1, r1, lsl #24 /* r1 = 7... */ 1157 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1158#endif 1159 str r3, [r0] 1160 str r2, [r0, #0x04] 1161 RET 1162 LMEMCPY_8_PAD 1163 1164/* 1165 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1166 */ 1167 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1168 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1169 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1170#ifdef __ARMEB__ 1171 mov r2, r2, lsl #16 /* r2 = 01.. */ 1172 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1173 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 1174#else 1175 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1176 mov r3, r3, lsr #16 /* r3 = ..54 */ 1177 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1178#endif 1179 str r2, [r0] 1180 str r3, [r0, #0x04] 1181 RET 1182 LMEMCPY_8_PAD 1183 1184/* 1185 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1186 */ 1187 ldrb r3, [r1] /* r3 = ...0 */ 1188 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1189 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1190#ifdef __ARMEB__ 1191 mov r3, r3, lsl #24 /* r3 = 0... */ 1192 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1193 mov r2, r2, lsl #24 /* r2 = 4... */ 1194 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 1195#else 1196 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1197 mov r2, r2, lsr #24 /* r2 = ...4 */ 1198 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1199#endif 1200 str r3, [r0] 1201 str r2, [r0, #0x04] 1202 RET 1203 LMEMCPY_8_PAD 1204 1205/* 1206 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1207 */ 1208 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1209 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1210#ifdef __ARMEB__ 1211 mov r1, r3, lsr #24 /* r1 = ...0 */ 1212 strb r1, [r0] 1213 mov r1, r3, lsr #8 /* r1 = .012 */ 1214 strb r2, [r0, #0x07] 1215 mov r3, r3, lsl #24 /* r3 = 3... */ 1216 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 1217#else 1218 strb r3, [r0] 1219 mov r1, r2, lsr #24 /* r1 = ...7 */ 1220 strb r1, [r0, #0x07] 1221 mov r1, r3, lsr #8 /* r1 = .321 */ 1222 mov r3, r3, lsr #24 /* r3 = ...3 */ 1223 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1224#endif 1225 strh r1, [r0, #0x01] 1226 str r3, [r0, #0x03] 1227 RET 1228 LMEMCPY_8_PAD 1229 1230/* 1231 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1232 */ 1233 ldrb r2, [r1] 1234 ldrh r3, [r1, #0x01] 1235 ldr ip, [r1, #0x03] 1236 ldrb r1, [r1, #0x07] 1237 strb r2, [r0] 1238 strh r3, [r0, #0x01] 1239 str ip, [r0, #0x03] 1240 strb r1, [r0, #0x07] 1241 RET 1242 LMEMCPY_8_PAD 1243 1244/* 1245 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1246 */ 1247 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1248 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1249 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1250#ifdef __ARMEB__ 1251 mov ip, r2, lsr #8 /* ip = ...0 */ 1252 strb ip, [r0] 1253 mov ip, r2, lsl #8 /* ip = .01. */ 1254 orr ip, ip, r3, lsr #24 /* ip = .012 */ 1255 strb r1, [r0, #0x07] 1256 mov r3, r3, lsl #8 /* r3 = 345. */ 1257 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 1258#else 1259 strb r2, [r0] /* 0 */ 1260 mov ip, r1, lsr #8 /* ip = ...7 */ 1261 strb ip, [r0, #0x07] /* 7 */ 1262 mov ip, r2, lsr #8 /* ip = ...1 */ 1263 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1264 mov r3, r3, lsr #8 /* r3 = .543 */ 1265 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1266#endif 1267 strh ip, [r0, #0x01] 1268 str r3, [r0, #0x03] 1269 RET 1270 LMEMCPY_8_PAD 1271 1272/* 1273 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1274 */ 1275 ldrb r3, [r1] /* r3 = ...0 */ 1276 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1277 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1278 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1279 strb r3, [r0] 1280 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1281#ifdef __ARMEB__ 1282 strh r3, [r0, #0x01] 1283 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 1284#else 1285 strh ip, [r0, #0x01] 1286 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1287#endif 1288 str r2, [r0, #0x03] 1289 strb r1, [r0, #0x07] 1290 RET 1291 LMEMCPY_8_PAD 1292 1293/* 1294 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1295 */ 1296 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1297 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1298 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1299#ifdef __ARMEB__ 1300 strh r1, [r0] 1301 mov r1, r3, lsr #16 /* r1 = ..45 */ 1302 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 1303#else 1304 strh r2, [r0] 1305 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1306 mov r3, r3, lsr #16 /* r3 = ..76 */ 1307#endif 1308 str r2, [r0, #0x02] 1309 strh r3, [r0, #0x06] 1310 RET 1311 LMEMCPY_8_PAD 1312 1313/* 1314 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1315 */ 1316 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1317 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1318 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1319 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1320 strh r1, [r0] 1321#ifdef __ARMEB__ 1322 mov r1, r2, lsl #24 /* r1 = 2... */ 1323 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 1324 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 1325#else 1326 mov r1, r2, lsr #24 /* r1 = ...2 */ 1327 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1328 mov r3, r3, lsr #24 /* r3 = ...6 */ 1329 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1330#endif 1331 str r1, [r0, #0x02] 1332 strh r3, [r0, #0x06] 1333 RET 1334 LMEMCPY_8_PAD 1335 1336/* 1337 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1338 */ 1339 ldrh r2, [r1] 1340 ldr ip, [r1, #0x02] 1341 ldrh r3, [r1, #0x06] 1342 strh r2, [r0] 1343 str ip, [r0, #0x02] 1344 strh r3, [r0, #0x06] 1345 RET 1346 LMEMCPY_8_PAD 1347 1348/* 1349 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1350 */ 1351 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1352 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1353 ldrb ip, [r1] /* ip = ...0 */ 1354 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1355 strh r1, [r0, #0x06] 1356#ifdef __ARMEB__ 1357 mov r3, r3, lsr #24 /* r3 = ...5 */ 1358 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 1359 mov r2, r2, lsr #24 /* r2 = ...1 */ 1360 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 1361#else 1362 mov r3, r3, lsl #24 /* r3 = 5... */ 1363 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1364 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1365#endif 1366 str r3, [r0, #0x02] 1367 strh r2, [r0] 1368 RET 1369 LMEMCPY_8_PAD 1370 1371/* 1372 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1373 */ 1374 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1375 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1376 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1377 strh r1, [r0, #0x05] 1378#ifdef __ARMEB__ 1379 strb r3, [r0, #0x07] 1380 mov r1, r2, lsr #24 /* r1 = ...0 */ 1381 strb r1, [r0] 1382 mov r2, r2, lsl #8 /* r2 = 123. */ 1383 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 1384 str r2, [r0, #0x01] 1385#else 1386 strb r2, [r0] 1387 mov r1, r3, lsr #24 /* r1 = ...7 */ 1388 strb r1, [r0, #0x07] 1389 mov r2, r2, lsr #8 /* r2 = .321 */ 1390 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1391 str r2, [r0, #0x01] 1392#endif 1393 RET 1394 LMEMCPY_8_PAD 1395 1396/* 1397 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1398 */ 1399 ldrb r3, [r1] /* r3 = ...0 */ 1400 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1401 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1402 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1403 strb r3, [r0] 1404 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1405#ifdef __ARMEB__ 1406 strh ip, [r0, #0x05] 1407 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 1408#else 1409 strh r3, [r0, #0x05] 1410 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1411#endif 1412 str r2, [r0, #0x01] 1413 strb r1, [r0, #0x07] 1414 RET 1415 LMEMCPY_8_PAD 1416 1417/* 1418 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1419 */ 1420 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1421 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1422 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1423#ifdef __ARMEB__ 1424 mov ip, r2, lsr #8 /* ip = ...0 */ 1425 strb ip, [r0] 1426 mov ip, r2, lsl #24 /* ip = 1... */ 1427 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 1428 strb r1, [r0, #0x07] 1429 mov r1, r1, lsr #8 /* r1 = ...6 */ 1430 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 1431#else 1432 strb r2, [r0] 1433 mov ip, r2, lsr #8 /* ip = ...1 */ 1434 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1435 mov r2, r1, lsr #8 /* r2 = ...7 */ 1436 strb r2, [r0, #0x07] 1437 mov r1, r1, lsl #8 /* r1 = .76. */ 1438 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1439#endif 1440 str ip, [r0, #0x01] 1441 strh r1, [r0, #0x05] 1442 RET 1443 LMEMCPY_8_PAD 1444 1445/* 1446 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1447 */ 1448 ldrb r2, [r1] 1449 ldr ip, [r1, #0x01] 1450 ldrh r3, [r1, #0x05] 1451 ldrb r1, [r1, #0x07] 1452 strb r2, [r0] 1453 str ip, [r0, #0x01] 1454 strh r3, [r0, #0x05] 1455 strb r1, [r0, #0x07] 1456 RET 1457 LMEMCPY_8_PAD 1458 1459/****************************************************************************** 1460 * Special case for 12 byte copies 1461 */ 1462#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1463#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1464 LMEMCPY_C_PAD 1465.Lmemcpy_c: 1466 and r2, r1, #0x03 1467 orr r2, r2, r0, lsl #2 1468 ands r2, r2, #0x0f 1469 sub r3, pc, #0x14 1470 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1471 1472/* 1473 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1474 */ 1475 ldr r2, [r1] 1476 ldr r3, [r1, #0x04] 1477 ldr r1, [r1, #0x08] 1478 str r2, [r0] 1479 str r3, [r0, #0x04] 1480 str r1, [r0, #0x08] 1481 RET 1482 LMEMCPY_C_PAD 1483 1484/* 1485 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1486 */ 1487 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1488 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1489 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1490 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1491#ifdef __ARMEB__ 1492 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 1493 str r2, [r0, #0x08] 1494 mov r2, ip, lsr #24 /* r2 = ...7 */ 1495 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 1496 mov r1, r1, lsl #8 /* r1 = 012. */ 1497 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 1498#else 1499 mov r2, r2, lsl #24 /* r2 = B... */ 1500 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1501 str r2, [r0, #0x08] 1502 mov r2, ip, lsl #24 /* r2 = 7... */ 1503 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1504 mov r1, r1, lsr #8 /* r1 = .210 */ 1505 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1506#endif 1507 str r2, [r0, #0x04] 1508 str r1, [r0] 1509 RET 1510 LMEMCPY_C_PAD 1511 1512/* 1513 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1514 */ 1515 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1516 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1517 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1518 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1519#ifdef __ARMEB__ 1520 mov r2, r2, lsl #16 /* r2 = 01.. */ 1521 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1522 str r2, [r0] 1523 mov r3, r3, lsl #16 /* r3 = 45.. */ 1524 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 1525 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 1526#else 1527 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1528 str r2, [r0] 1529 mov r3, r3, lsr #16 /* r3 = ..54 */ 1530 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1531 mov r1, r1, lsl #16 /* r1 = BA.. */ 1532 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1533#endif 1534 str r3, [r0, #0x04] 1535 str r1, [r0, #0x08] 1536 RET 1537 LMEMCPY_C_PAD 1538 1539/* 1540 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1541 */ 1542 ldrb r2, [r1] /* r2 = ...0 */ 1543 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1544 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1545 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1546#ifdef __ARMEB__ 1547 mov r2, r2, lsl #24 /* r2 = 0... */ 1548 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1549 str r2, [r0] 1550 mov r3, r3, lsl #24 /* r3 = 4... */ 1551 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 1552 mov r1, r1, lsr #8 /* r1 = .9AB */ 1553 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 1554#else 1555 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1556 str r2, [r0] 1557 mov r3, r3, lsr #24 /* r3 = ...4 */ 1558 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1559 mov r1, r1, lsl #8 /* r1 = BA9. */ 1560 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1561#endif 1562 str r3, [r0, #0x04] 1563 str r1, [r0, #0x08] 1564 RET 1565 LMEMCPY_C_PAD 1566 1567/* 1568 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1569 */ 1570 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1571 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1572 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1573 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1574 strh r1, [r0, #0x01] 1575#ifdef __ARMEB__ 1576 mov r1, r2, lsr #24 /* r1 = ...0 */ 1577 strb r1, [r0] 1578 mov r1, r2, lsl #24 /* r1 = 3... */ 1579 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 1580 mov r1, r3, lsl #24 /* r1 = 7... */ 1581 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 1582#else 1583 strb r2, [r0] 1584 mov r1, r2, lsr #24 /* r1 = ...3 */ 1585 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1586 mov r1, r3, lsr #24 /* r1 = ...7 */ 1587 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1588 mov ip, ip, lsr #24 /* ip = ...B */ 1589#endif 1590 str r2, [r0, #0x03] 1591 str r1, [r0, #0x07] 1592 strb ip, [r0, #0x0b] 1593 RET 1594 LMEMCPY_C_PAD 1595 1596/* 1597 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1598 */ 1599 ldrb r2, [r1] 1600 ldrh r3, [r1, #0x01] 1601 ldr ip, [r1, #0x03] 1602 strb r2, [r0] 1603 ldr r2, [r1, #0x07] 1604 ldrb r1, [r1, #0x0b] 1605 strh r3, [r0, #0x01] 1606 str ip, [r0, #0x03] 1607 str r2, [r0, #0x07] 1608 strb r1, [r0, #0x0b] 1609 RET 1610 LMEMCPY_C_PAD 1611 1612/* 1613 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1614 */ 1615 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1616 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1617 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1618 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1619#ifdef __ARMEB__ 1620 mov r2, r2, ror #8 /* r2 = 1..0 */ 1621 strb r2, [r0] 1622 mov r2, r2, lsr #16 /* r2 = ..1. */ 1623 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 1624 strh r2, [r0, #0x01] 1625 mov r2, r3, lsl #8 /* r2 = 345. */ 1626 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 1627 mov r2, ip, lsl #8 /* r2 = 789. */ 1628 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 1629#else 1630 strb r2, [r0] 1631 mov r2, r2, lsr #8 /* r2 = ...1 */ 1632 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1633 strh r2, [r0, #0x01] 1634 mov r2, r3, lsr #8 /* r2 = .543 */ 1635 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1636 mov r2, ip, lsr #8 /* r2 = .987 */ 1637 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1638 mov r1, r1, lsr #8 /* r1 = ...B */ 1639#endif 1640 str r3, [r0, #0x03] 1641 str r2, [r0, #0x07] 1642 strb r1, [r0, #0x0b] 1643 RET 1644 LMEMCPY_C_PAD 1645 1646/* 1647 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1648 */ 1649 ldrb r2, [r1] 1650 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1651 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1652 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1653 strb r2, [r0] 1654#ifdef __ARMEB__ 1655 mov r2, r3, lsr #16 /* r2 = ..12 */ 1656 strh r2, [r0, #0x01] 1657 mov r3, r3, lsl #16 /* r3 = 34.. */ 1658 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 1659 mov ip, ip, lsl #16 /* ip = 78.. */ 1660 orr ip, ip, r1, lsr #16 /* ip = 789A */ 1661 mov r1, r1, lsr #8 /* r1 = .9AB */ 1662#else 1663 strh r3, [r0, #0x01] 1664 mov r3, r3, lsr #16 /* r3 = ..43 */ 1665 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1666 mov ip, ip, lsr #16 /* ip = ..87 */ 1667 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1668 mov r1, r1, lsr #16 /* r1 = ..xB */ 1669#endif 1670 str r3, [r0, #0x03] 1671 str ip, [r0, #0x07] 1672 strb r1, [r0, #0x0b] 1673 RET 1674 LMEMCPY_C_PAD 1675 1676/* 1677 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1678 */ 1679 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1680 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1681 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1682 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1683#ifdef __ARMEB__ 1684 strh r1, [r0] 1685 mov r1, ip, lsl #16 /* r1 = 23.. */ 1686 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 1687 mov r3, r3, lsl #16 /* r3 = 67.. */ 1688 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 1689#else 1690 strh ip, [r0] 1691 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1692 mov r3, r3, lsr #16 /* r3 = ..76 */ 1693 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1694 mov r2, r2, lsr #16 /* r2 = ..BA */ 1695#endif 1696 str r1, [r0, #0x02] 1697 str r3, [r0, #0x06] 1698 strh r2, [r0, #0x0a] 1699 RET 1700 LMEMCPY_C_PAD 1701 1702/* 1703 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1704 */ 1705 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1706 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1707 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1708 strh ip, [r0] 1709 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1710 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1711#ifdef __ARMEB__ 1712 mov r2, r2, lsl #24 /* r2 = 2... */ 1713 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 1714 mov r3, r3, lsl #24 /* r3 = 6... */ 1715 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 1716 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 1717#else 1718 mov r2, r2, lsr #24 /* r2 = ...2 */ 1719 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1720 mov r3, r3, lsr #24 /* r3 = ...6 */ 1721 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1722 mov r1, r1, lsl #8 /* r1 = ..B. */ 1723 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1724#endif 1725 str r2, [r0, #0x02] 1726 str r3, [r0, #0x06] 1727 strh r1, [r0, #0x0a] 1728 RET 1729 LMEMCPY_C_PAD 1730 1731/* 1732 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1733 */ 1734 ldrh r2, [r1] 1735 ldr r3, [r1, #0x02] 1736 ldr ip, [r1, #0x06] 1737 ldrh r1, [r1, #0x0a] 1738 strh r2, [r0] 1739 str r3, [r0, #0x02] 1740 str ip, [r0, #0x06] 1741 strh r1, [r0, #0x0a] 1742 RET 1743 LMEMCPY_C_PAD 1744 1745/* 1746 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1747 */ 1748 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1749 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1750 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1751 strh ip, [r0, #0x0a] 1752 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1753 ldrb r1, [r1] /* r1 = ...0 */ 1754#ifdef __ARMEB__ 1755 mov r2, r2, lsr #24 /* r2 = ...9 */ 1756 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 1757 mov r3, r3, lsr #24 /* r3 = ...5 */ 1758 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 1759 mov r1, r1, lsl #8 /* r1 = ..0. */ 1760 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 1761#else 1762 mov r2, r2, lsl #24 /* r2 = 9... */ 1763 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1764 mov r3, r3, lsl #24 /* r3 = 5... */ 1765 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1766 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1767#endif 1768 str r2, [r0, #0x06] 1769 str r3, [r0, #0x02] 1770 strh r1, [r0] 1771 RET 1772 LMEMCPY_C_PAD 1773 1774/* 1775 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1776 */ 1777 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1778 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1779 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1780#ifdef __ARMEB__ 1781 mov r3, r2, lsr #24 /* r3 = ...0 */ 1782 strb r3, [r0] 1783 mov r2, r2, lsl #8 /* r2 = 123. */ 1784 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 1785 str r2, [r0, #0x01] 1786 mov r2, ip, lsl #8 /* r2 = 567. */ 1787 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 1788 str r2, [r0, #0x05] 1789 mov r2, r1, lsr #8 /* r2 = ..9A */ 1790 strh r2, [r0, #0x09] 1791 strb r1, [r0, #0x0b] 1792#else 1793 strb r2, [r0] 1794 mov r3, r2, lsr #8 /* r3 = .321 */ 1795 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1796 str r3, [r0, #0x01] 1797 mov r3, ip, lsr #8 /* r3 = .765 */ 1798 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1799 str r3, [r0, #0x05] 1800 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1801 strh r1, [r0, #0x09] 1802 mov r1, r1, lsr #16 /* r1 = ...B */ 1803 strb r1, [r0, #0x0b] 1804#endif 1805 RET 1806 LMEMCPY_C_PAD 1807 1808/* 1809 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1810 */ 1811 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1812 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1813 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1814 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1815 strb r2, [r0, #0x0b] 1816#ifdef __ARMEB__ 1817 strh r3, [r0, #0x09] 1818 mov r3, r3, lsr #16 /* r3 = ..78 */ 1819 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 1820 mov ip, ip, lsr #16 /* ip = ..34 */ 1821 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 1822 mov r1, r1, lsr #16 /* r1 = ..x0 */ 1823#else 1824 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1825 strh r2, [r0, #0x09] 1826 mov r3, r3, lsl #16 /* r3 = 87.. */ 1827 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1828 mov ip, ip, lsl #16 /* ip = 43.. */ 1829 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1830 mov r1, r1, lsr #8 /* r1 = .210 */ 1831#endif 1832 str r3, [r0, #0x05] 1833 str ip, [r0, #0x01] 1834 strb r1, [r0] 1835 RET 1836 LMEMCPY_C_PAD 1837 1838/* 1839 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1840 */ 1841#ifdef __ARMEB__ 1842 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 1843 ldr ip, [r1, #0x06] /* ip = 6789 */ 1844 ldr r3, [r1, #0x02] /* r3 = 2345 */ 1845 ldrh r1, [r1] /* r1 = ..01 */ 1846 strb r2, [r0, #0x0b] 1847 mov r2, r2, lsr #8 /* r2 = ...A */ 1848 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 1849 mov ip, ip, lsr #8 /* ip = .678 */ 1850 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 1851 mov r3, r3, lsr #8 /* r3 = .234 */ 1852 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 1853 mov r1, r1, lsr #8 /* r1 = ...0 */ 1854 strb r1, [r0] 1855 str r3, [r0, #0x01] 1856 str ip, [r0, #0x05] 1857 strh r2, [r0, #0x09] 1858#else 1859 ldrh r2, [r1] /* r2 = ..10 */ 1860 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1861 ldr ip, [r1, #0x06] /* ip = 9876 */ 1862 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1863 strb r2, [r0] 1864 mov r2, r2, lsr #8 /* r2 = ...1 */ 1865 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1866 mov r3, r3, lsr #24 /* r3 = ...5 */ 1867 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1868 mov ip, ip, lsr #24 /* ip = ...9 */ 1869 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1870 mov r1, r1, lsr #8 /* r1 = ...B */ 1871 str r2, [r0, #0x01] 1872 str r3, [r0, #0x05] 1873 strh ip, [r0, #0x09] 1874 strb r1, [r0, #0x0b] 1875#endif 1876 RET 1877 LMEMCPY_C_PAD 1878 1879/* 1880 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1881 */ 1882 ldrb r2, [r1] 1883 ldr r3, [r1, #0x01] 1884 ldr ip, [r1, #0x05] 1885 strb r2, [r0] 1886 ldrh r2, [r1, #0x09] 1887 ldrb r1, [r1, #0x0b] 1888 str r3, [r0, #0x01] 1889 str ip, [r0, #0x05] 1890 strh r2, [r0, #0x09] 1891 strb r1, [r0, #0x0b] 1892 RET 1893END(memcpy) 1894#endif /* !_STANDALONE */ 1895