1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41#include <machine/acle-compat.h> 42 43 .syntax unified 44 .text 45 .align 2 46 47#if __ARM_ARCH >= 6 48#define GET_PCB(tmp) \ 49 mrc p15, 0, tmp, c13, c0, 4; \ 50 add tmp, tmp, #(TD_PCB) 51#else 52.Lcurpcb: 53 .word _C_LABEL(__pcpu) + PC_CURPCB 54#define GET_PCB(tmp) \ 55 ldr tmp, .Lcurpcb 56#endif 57 58/* 59 * r0 = user space address 60 * r1 = kernel space address 61 * r2 = length 62 * 63 * Copies bytes from user space to kernel space 64 */ 65ENTRY(copyin) 66 cmp r2, #0x00 67 movle r0, #0x00 68 movle pc, lr /* Bail early if length is <= 0 */ 69 70 adds r3, r0, r2 71 movcs r0, #EFAULT 72 RETc(cs) 73 74 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 75 cmp r3, r12 76 movcs r0, #EFAULT 77 RETc(cs) 78 79 ldr r3, .L_arm_memcpy 80 ldr r3, [r3] 81 cmp r3, #0 82 beq .Lnormal 83 ldr r3, .L_min_memcpy_size 84 ldr r3, [r3] 85 cmp r2, r3 86 blt .Lnormal 87 stmfd sp!, {r0-r2, r4, lr} 88 mov r3, r0 89 mov r0, r1 90 mov r1, r3 91 mov r3, #2 /* SRC_IS_USER */ 92 ldr r4, .L_arm_memcpy 93 mov lr, pc 94 ldr pc, [r4] 95 cmp r0, #0 96 ldmfd sp!, {r0-r2, r4, lr} 97 moveq r0, #0 98 RETeq 99 100.Lnormal: 101 stmfd sp!, {r10-r11, lr} 102 103 GET_PCB(r10) 104 ldr r10, [r10] 105 106 mov r3, #0x00 107 adr ip, .Lcopyin_fault 108 ldr r11, [r10, #PCB_ONFAULT] 109 str ip, [r10, #PCB_ONFAULT] 110 bl .Lcopyin_guts 111 str r11, [r10, #PCB_ONFAULT] 112 mov r0, #0x00 113 ldmfd sp!, {r10-r11, pc} 114 115.Lcopyin_fault: 116 ldr r0, =EFAULT 117 str r11, [r10, #PCB_ONFAULT] 118 cmp r3, #0x00 119 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 120 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 121 ldmfd sp!, {r10-r11, pc} 122 123.Lcopyin_guts: 124 pld [r0] 125 /* Word-align the destination buffer */ 126 ands ip, r1, #0x03 /* Already word aligned? */ 127 beq .Lcopyin_wordaligned /* Yup */ 128 rsb ip, ip, #0x04 129 cmp r2, ip /* Enough bytes left to align it? */ 130 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 131 sub r2, r2, ip 132 rsbs ip, ip, #0x03 133 addne pc, pc, ip, lsl #3 134 nop 135 ldrbt ip, [r0], #0x01 136 strb ip, [r1], #0x01 137 ldrbt ip, [r0], #0x01 138 strb ip, [r1], #0x01 139 ldrbt ip, [r0], #0x01 140 strb ip, [r1], #0x01 141 cmp r2, #0x00 /* All done? */ 142 RETeq 143 144 /* Destination buffer is now word aligned */ 145.Lcopyin_wordaligned: 146 ands ip, r0, #0x03 /* Is src also word-aligned? */ 147 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 148 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 149 blt .Lcopyin_w_less_than8 150 151 /* Quad-align the destination buffer */ 152 tst r1, #0x07 /* Already quad aligned? */ 153 ldrtne ip, [r0], #0x04 154 strne ip, [r1], #0x04 155 subne r2, r2, #0x04 156 stmfd sp!, {r4-r9} /* Free up some registers */ 157 mov r3, #-1 /* Signal restore r4-r9 */ 158 159 /* Destination buffer quad aligned, source is word aligned */ 160 subs r2, r2, #0x80 161 blt .Lcopyin_w_lessthan128 162 163 /* Copy 128 bytes at a time */ 164.Lcopyin_w_loop128: 165 ldrt r4, [r0], #0x04 /* LD:00-03 */ 166 ldrt r5, [r0], #0x04 /* LD:04-07 */ 167 pld [r0, #0x18] /* Prefetch 0x20 */ 168 ldrt r6, [r0], #0x04 /* LD:08-0b */ 169 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 170 ldrt r8, [r0], #0x04 /* LD:10-13 */ 171 ldrt r9, [r0], #0x04 /* LD:14-17 */ 172 strd r4, [r1], #0x08 /* ST:00-07 */ 173 ldrt r4, [r0], #0x04 /* LD:18-1b */ 174 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 175 strd r6, [r1], #0x08 /* ST:08-0f */ 176 ldrt r6, [r0], #0x04 /* LD:20-23 */ 177 ldrt r7, [r0], #0x04 /* LD:24-27 */ 178 pld [r0, #0x18] /* Prefetch 0x40 */ 179 strd r8, [r1], #0x08 /* ST:10-17 */ 180 ldrt r8, [r0], #0x04 /* LD:28-2b */ 181 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 182 strd r4, [r1], #0x08 /* ST:18-1f */ 183 ldrt r4, [r0], #0x04 /* LD:30-33 */ 184 ldrt r5, [r0], #0x04 /* LD:34-37 */ 185 strd r6, [r1], #0x08 /* ST:20-27 */ 186 ldrt r6, [r0], #0x04 /* LD:38-3b */ 187 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 188 strd r8, [r1], #0x08 /* ST:28-2f */ 189 ldrt r8, [r0], #0x04 /* LD:40-43 */ 190 ldrt r9, [r0], #0x04 /* LD:44-47 */ 191 pld [r0, #0x18] /* Prefetch 0x60 */ 192 strd r4, [r1], #0x08 /* ST:30-37 */ 193 ldrt r4, [r0], #0x04 /* LD:48-4b */ 194 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 195 strd r6, [r1], #0x08 /* ST:38-3f */ 196 ldrt r6, [r0], #0x04 /* LD:50-53 */ 197 ldrt r7, [r0], #0x04 /* LD:54-57 */ 198 strd r8, [r1], #0x08 /* ST:40-47 */ 199 ldrt r8, [r0], #0x04 /* LD:58-5b */ 200 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 201 strd r4, [r1], #0x08 /* ST:48-4f */ 202 ldrt r4, [r0], #0x04 /* LD:60-63 */ 203 ldrt r5, [r0], #0x04 /* LD:64-67 */ 204 pld [r0, #0x18] /* Prefetch 0x80 */ 205 strd r6, [r1], #0x08 /* ST:50-57 */ 206 ldrt r6, [r0], #0x04 /* LD:68-6b */ 207 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 208 strd r8, [r1], #0x08 /* ST:58-5f */ 209 ldrt r8, [r0], #0x04 /* LD:70-73 */ 210 ldrt r9, [r0], #0x04 /* LD:74-77 */ 211 strd r4, [r1], #0x08 /* ST:60-67 */ 212 ldrt r4, [r0], #0x04 /* LD:78-7b */ 213 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 214 strd r6, [r1], #0x08 /* ST:68-6f */ 215 strd r8, [r1], #0x08 /* ST:70-77 */ 216 subs r2, r2, #0x80 217 strd r4, [r1], #0x08 /* ST:78-7f */ 218 bge .Lcopyin_w_loop128 219 220.Lcopyin_w_lessthan128: 221 adds r2, r2, #0x80 /* Adjust for extra sub */ 222 ldmfdeq sp!, {r4-r9} 223 RETeq 224 subs r2, r2, #0x20 225 blt .Lcopyin_w_lessthan32 226 227 /* Copy 32 bytes at a time */ 228.Lcopyin_w_loop32: 229 ldrt r4, [r0], #0x04 230 ldrt r5, [r0], #0x04 231 pld [r0, #0x18] 232 ldrt r6, [r0], #0x04 233 ldrt r7, [r0], #0x04 234 ldrt r8, [r0], #0x04 235 ldrt r9, [r0], #0x04 236 strd r4, [r1], #0x08 237 ldrt r4, [r0], #0x04 238 ldrt r5, [r0], #0x04 239 strd r6, [r1], #0x08 240 strd r8, [r1], #0x08 241 subs r2, r2, #0x20 242 strd r4, [r1], #0x08 243 bge .Lcopyin_w_loop32 244 245.Lcopyin_w_lessthan32: 246 adds r2, r2, #0x20 /* Adjust for extra sub */ 247 ldmfdeq sp!, {r4-r9} 248 RETeq /* Return now if done */ 249 250 and r4, r2, #0x18 251 rsb r5, r4, #0x18 252 subs r2, r2, r4 253 add pc, pc, r5, lsl #1 254 nop 255 256 /* At least 24 bytes remaining */ 257 ldrt r4, [r0], #0x04 258 ldrt r5, [r0], #0x04 259 nop 260 strd r4, [r1], #0x08 261 262 /* At least 16 bytes remaining */ 263 ldrt r4, [r0], #0x04 264 ldrt r5, [r0], #0x04 265 nop 266 strd r4, [r1], #0x08 267 268 /* At least 8 bytes remaining */ 269 ldrt r4, [r0], #0x04 270 ldrt r5, [r0], #0x04 271 nop 272 strd r4, [r1], #0x08 273 274 /* Less than 8 bytes remaining */ 275 ldmfd sp!, {r4-r9} 276 RETeq /* Return now if done */ 277 mov r3, #0x00 278 279.Lcopyin_w_less_than8: 280 subs r2, r2, #0x04 281 ldrtge ip, [r0], #0x04 282 strge ip, [r1], #0x04 283 RETeq /* Return now if done */ 284 addlt r2, r2, #0x04 285 ldrbt ip, [r0], #0x01 286 cmp r2, #0x02 287 ldrbtge r2, [r0], #0x01 288 strb ip, [r1], #0x01 289 ldrbtgt ip, [r0] 290 strbge r2, [r1], #0x01 291 strbgt ip, [r1] 292 RET 293 294/* 295 * At this point, it has not been possible to word align both buffers. 296 * The destination buffer (r1) is word aligned, but the source buffer 297 * (r0) is not. 298 */ 299.Lcopyin_bad_align: 300 stmfd sp!, {r4-r7} 301 mov r3, #0x01 302 bic r0, r0, #0x03 303 cmp ip, #2 304 ldrt ip, [r0], #0x04 305 bgt .Lcopyin_bad3 306 beq .Lcopyin_bad2 307 b .Lcopyin_bad1 308 309.Lcopyin_bad1_loop16: 310#ifdef __ARMEB__ 311 mov r4, ip, lsl #8 312#else 313 mov r4, ip, lsr #8 314#endif 315 ldrt r5, [r0], #0x04 316 pld [r0, #0x018] 317 ldrt r6, [r0], #0x04 318 ldrt r7, [r0], #0x04 319 ldrt ip, [r0], #0x04 320#ifdef __ARMEB__ 321 orr r4, r4, r5, lsr #24 322 mov r5, r5, lsl #8 323 orr r5, r5, r6, lsr #24 324 mov r6, r6, lsl #8 325 orr r6, r6, r7, lsr #24 326 mov r7, r7, lsl #8 327 orr r7, r7, ip, lsr #24 328#else 329 orr r4, r4, r5, lsl #24 330 mov r5, r5, lsr #8 331 orr r5, r5, r6, lsl #24 332 mov r6, r6, lsr #8 333 orr r6, r6, r7, lsl #24 334 mov r7, r7, lsr #8 335 orr r7, r7, ip, lsl #24 336#endif 337 str r4, [r1], #0x04 338 str r5, [r1], #0x04 339 str r6, [r1], #0x04 340 str r7, [r1], #0x04 341.Lcopyin_bad1: 342 subs r2, r2, #0x10 343 bge .Lcopyin_bad1_loop16 344 345 adds r2, r2, #0x10 346 ldmfdeq sp!, {r4-r7} 347 RETeq /* Return now if done */ 348 subs r2, r2, #0x04 349 sublt r0, r0, #0x03 350 blt .Lcopyin_l4 351 352.Lcopyin_bad1_loop4: 353#ifdef __ARMEB__ 354 mov r4, ip, lsl #8 355#else 356 mov r4, ip, lsr #8 357#endif 358 ldrt ip, [r0], #0x04 359 subs r2, r2, #0x04 360#ifdef __ARMEB__ 361 orr r4, r4, ip, lsr #24 362#else 363 orr r4, r4, ip, lsl #24 364#endif 365 str r4, [r1], #0x04 366 bge .Lcopyin_bad1_loop4 367 sub r0, r0, #0x03 368 b .Lcopyin_l4 369 370.Lcopyin_bad2_loop16: 371#ifdef __ARMEB__ 372 mov r4, ip, lsl #16 373#else 374 mov r4, ip, lsr #16 375#endif 376 ldrt r5, [r0], #0x04 377 pld [r0, #0x018] 378 ldrt r6, [r0], #0x04 379 ldrt r7, [r0], #0x04 380 ldrt ip, [r0], #0x04 381#ifdef __ARMEB__ 382 orr r4, r4, r5, lsr #16 383 mov r5, r5, lsl #16 384 orr r5, r5, r6, lsr #16 385 mov r6, r6, lsl #16 386 orr r6, r6, r7, lsr #16 387 mov r7, r7, lsl #16 388 orr r7, r7, ip, lsr #16 389#else 390 orr r4, r4, r5, lsl #16 391 mov r5, r5, lsr #16 392 orr r5, r5, r6, lsl #16 393 mov r6, r6, lsr #16 394 orr r6, r6, r7, lsl #16 395 mov r7, r7, lsr #16 396 orr r7, r7, ip, lsl #16 397#endif 398 str r4, [r1], #0x04 399 str r5, [r1], #0x04 400 str r6, [r1], #0x04 401 str r7, [r1], #0x04 402.Lcopyin_bad2: 403 subs r2, r2, #0x10 404 bge .Lcopyin_bad2_loop16 405 406 adds r2, r2, #0x10 407 ldmfdeq sp!, {r4-r7} 408 RETeq /* Return now if done */ 409 subs r2, r2, #0x04 410 sublt r0, r0, #0x02 411 blt .Lcopyin_l4 412 413.Lcopyin_bad2_loop4: 414#ifdef __ARMEB__ 415 mov r4, ip, lsl #16 416#else 417 mov r4, ip, lsr #16 418#endif 419 ldrt ip, [r0], #0x04 420 subs r2, r2, #0x04 421#ifdef __ARMEB__ 422 orr r4, r4, ip, lsr #16 423#else 424 orr r4, r4, ip, lsl #16 425#endif 426 str r4, [r1], #0x04 427 bge .Lcopyin_bad2_loop4 428 sub r0, r0, #0x02 429 b .Lcopyin_l4 430 431.Lcopyin_bad3_loop16: 432#ifdef __ARMEB__ 433 mov r4, ip, lsl #24 434#else 435 mov r4, ip, lsr #24 436#endif 437 ldrt r5, [r0], #0x04 438 pld [r0, #0x018] 439 ldrt r6, [r0], #0x04 440 ldrt r7, [r0], #0x04 441 ldrt ip, [r0], #0x04 442#ifdef __ARMEB__ 443 orr r4, r4, r5, lsr #8 444 mov r5, r5, lsl #24 445 orr r5, r5, r6, lsr #8 446 mov r6, r6, lsl #24 447 orr r6, r6, r7, lsr #8 448 mov r7, r7, lsl #24 449 orr r7, r7, ip, lsr #8 450#else 451 orr r4, r4, r5, lsl #8 452 mov r5, r5, lsr #24 453 orr r5, r5, r6, lsl #8 454 mov r6, r6, lsr #24 455 orr r6, r6, r7, lsl #8 456 mov r7, r7, lsr #24 457 orr r7, r7, ip, lsl #8 458#endif 459 str r4, [r1], #0x04 460 str r5, [r1], #0x04 461 str r6, [r1], #0x04 462 str r7, [r1], #0x04 463.Lcopyin_bad3: 464 subs r2, r2, #0x10 465 bge .Lcopyin_bad3_loop16 466 467 adds r2, r2, #0x10 468 ldmfdeq sp!, {r4-r7} 469 RETeq /* Return now if done */ 470 subs r2, r2, #0x04 471 sublt r0, r0, #0x01 472 blt .Lcopyin_l4 473 474.Lcopyin_bad3_loop4: 475#ifdef __ARMEB__ 476 mov r4, ip, lsl #24 477#else 478 mov r4, ip, lsr #24 479#endif 480 ldrt ip, [r0], #0x04 481 subs r2, r2, #0x04 482#ifdef __ARMEB__ 483 orr r4, r4, ip, lsr #8 484#else 485 orr r4, r4, ip, lsl #8 486#endif 487 str r4, [r1], #0x04 488 bge .Lcopyin_bad3_loop4 489 sub r0, r0, #0x01 490 491.Lcopyin_l4: 492 ldmfd sp!, {r4-r7} 493 mov r3, #0x00 494 adds r2, r2, #0x04 495 RETeq 496.Lcopyin_l4_2: 497 rsbs r2, r2, #0x03 498 addne pc, pc, r2, lsl #3 499 nop 500 ldrbt ip, [r0], #0x01 501 strb ip, [r1], #0x01 502 ldrbt ip, [r0], #0x01 503 strb ip, [r1], #0x01 504 ldrbt ip, [r0] 505 strb ip, [r1] 506 RET 507END(copyin) 508 509/* 510 * r0 = kernel space address 511 * r1 = user space address 512 * r2 = length 513 * 514 * Copies bytes from kernel space to user space 515 */ 516ENTRY(copyout) 517 cmp r2, #0x00 518 movle r0, #0x00 519 movle pc, lr /* Bail early if length is <= 0 */ 520 521 adds r3, r1, r2 522 movcs r0, #EFAULT 523 RETc(cs) 524 525 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 526 cmp r3, r12 527 movcs r0, #EFAULT 528 RETc(cs) 529 530 ldr r3, .L_arm_memcpy 531 ldr r3, [r3] 532 cmp r3, #0 533 beq .Lnormale 534 ldr r3, .L_min_memcpy_size 535 ldr r3, [r3] 536 cmp r2, r3 537 blt .Lnormale 538 stmfd sp!, {r0-r2, r4, lr} 539 mov r3, r0 540 mov r0, r1 541 mov r1, r3 542 mov r3, #1 /* DST_IS_USER */ 543 ldr r4, .L_arm_memcpy 544 mov lr, pc 545 ldr pc, [r4] 546 cmp r0, #0 547 ldmfd sp!, {r0-r2, r4, lr} 548 moveq r0, #0 549 RETeq 550 551.Lnormale: 552 stmfd sp!, {r10-r11, lr} 553 554 GET_PCB(r10) 555 ldr r10, [r10] 556 557 mov r3, #0x00 558 adr ip, .Lcopyout_fault 559 ldr r11, [r10, #PCB_ONFAULT] 560 str ip, [r10, #PCB_ONFAULT] 561 bl .Lcopyout_guts 562 str r11, [r10, #PCB_ONFAULT] 563 mov r0, #0x00 564 ldmfd sp!, {r10-r11, pc} 565 566.Lcopyout_fault: 567 ldr r0, =EFAULT 568 str r11, [r10, #PCB_ONFAULT] 569 cmp r3, #0x00 570 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 571 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 572 ldmfd sp!, {r10-r11, pc} 573 574.Lcopyout_guts: 575 pld [r0] 576 /* Word-align the destination buffer */ 577 ands ip, r1, #0x03 /* Already word aligned? */ 578 beq .Lcopyout_wordaligned /* Yup */ 579 rsb ip, ip, #0x04 580 cmp r2, ip /* Enough bytes left to align it? */ 581 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 582 sub r2, r2, ip 583 rsbs ip, ip, #0x03 584 addne pc, pc, ip, lsl #3 585 nop 586 ldrb ip, [r0], #0x01 587 strbt ip, [r1], #0x01 588 ldrb ip, [r0], #0x01 589 strbt ip, [r1], #0x01 590 ldrb ip, [r0], #0x01 591 strbt ip, [r1], #0x01 592 cmp r2, #0x00 /* All done? */ 593 RETeq 594 595 /* Destination buffer is now word aligned */ 596.Lcopyout_wordaligned: 597 ands ip, r0, #0x03 /* Is src also word-aligned? */ 598 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 599 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 600 blt .Lcopyout_w_less_than8 601 602 /* Quad-align the destination buffer */ 603 tst r0, #0x07 /* Already quad aligned? */ 604 ldrne ip, [r0], #0x04 605 subne r2, r2, #0x04 606 strtne ip, [r1], #0x04 607 608 stmfd sp!, {r4-r9} /* Free up some registers */ 609 mov r3, #-1 /* Signal restore r4-r9 */ 610 611 /* Destination buffer word aligned, source is quad aligned */ 612 subs r2, r2, #0x80 613 blt .Lcopyout_w_lessthan128 614 615 /* Copy 128 bytes at a time */ 616.Lcopyout_w_loop128: 617 ldrd r4, [r0], #0x08 /* LD:00-07 */ 618 pld [r0, #0x18] /* Prefetch 0x20 */ 619 ldrd r6, [r0], #0x08 /* LD:08-0f */ 620 ldrd r8, [r0], #0x08 /* LD:10-17 */ 621 strt r4, [r1], #0x04 /* ST:00-03 */ 622 strt r5, [r1], #0x04 /* ST:04-07 */ 623 ldrd r4, [r0], #0x08 /* LD:18-1f */ 624 strt r6, [r1], #0x04 /* ST:08-0b */ 625 strt r7, [r1], #0x04 /* ST:0c-0f */ 626 ldrd r6, [r0], #0x08 /* LD:20-27 */ 627 pld [r0, #0x18] /* Prefetch 0x40 */ 628 strt r8, [r1], #0x04 /* ST:10-13 */ 629 strt r9, [r1], #0x04 /* ST:14-17 */ 630 ldrd r8, [r0], #0x08 /* LD:28-2f */ 631 strt r4, [r1], #0x04 /* ST:18-1b */ 632 strt r5, [r1], #0x04 /* ST:1c-1f */ 633 ldrd r4, [r0], #0x08 /* LD:30-37 */ 634 strt r6, [r1], #0x04 /* ST:20-23 */ 635 strt r7, [r1], #0x04 /* ST:24-27 */ 636 ldrd r6, [r0], #0x08 /* LD:38-3f */ 637 strt r8, [r1], #0x04 /* ST:28-2b */ 638 strt r9, [r1], #0x04 /* ST:2c-2f */ 639 ldrd r8, [r0], #0x08 /* LD:40-47 */ 640 pld [r0, #0x18] /* Prefetch 0x60 */ 641 strt r4, [r1], #0x04 /* ST:30-33 */ 642 strt r5, [r1], #0x04 /* ST:34-37 */ 643 ldrd r4, [r0], #0x08 /* LD:48-4f */ 644 strt r6, [r1], #0x04 /* ST:38-3b */ 645 strt r7, [r1], #0x04 /* ST:3c-3f */ 646 ldrd r6, [r0], #0x08 /* LD:50-57 */ 647 strt r8, [r1], #0x04 /* ST:40-43 */ 648 strt r9, [r1], #0x04 /* ST:44-47 */ 649 ldrd r8, [r0], #0x08 /* LD:58-4f */ 650 strt r4, [r1], #0x04 /* ST:48-4b */ 651 strt r5, [r1], #0x04 /* ST:4c-4f */ 652 ldrd r4, [r0], #0x08 /* LD:60-67 */ 653 pld [r0, #0x18] /* Prefetch 0x80 */ 654 strt r6, [r1], #0x04 /* ST:50-53 */ 655 strt r7, [r1], #0x04 /* ST:54-57 */ 656 ldrd r6, [r0], #0x08 /* LD:68-6f */ 657 strt r8, [r1], #0x04 /* ST:58-5b */ 658 strt r9, [r1], #0x04 /* ST:5c-5f */ 659 ldrd r8, [r0], #0x08 /* LD:70-77 */ 660 strt r4, [r1], #0x04 /* ST:60-63 */ 661 strt r5, [r1], #0x04 /* ST:64-67 */ 662 ldrd r4, [r0], #0x08 /* LD:78-7f */ 663 strt r6, [r1], #0x04 /* ST:68-6b */ 664 strt r7, [r1], #0x04 /* ST:6c-6f */ 665 strt r8, [r1], #0x04 /* ST:70-73 */ 666 strt r9, [r1], #0x04 /* ST:74-77 */ 667 subs r2, r2, #0x80 668 strt r4, [r1], #0x04 /* ST:78-7b */ 669 strt r5, [r1], #0x04 /* ST:7c-7f */ 670 bge .Lcopyout_w_loop128 671 672.Lcopyout_w_lessthan128: 673 adds r2, r2, #0x80 /* Adjust for extra sub */ 674 ldmfdeq sp!, {r4-r9} 675 RETeq /* Return now if done */ 676 subs r2, r2, #0x20 677 blt .Lcopyout_w_lessthan32 678 679 /* Copy 32 bytes at a time */ 680.Lcopyout_w_loop32: 681 ldrd r4, [r0], #0x08 682 pld [r0, #0x18] 683 ldrd r6, [r0], #0x08 684 ldrd r8, [r0], #0x08 685 strt r4, [r1], #0x04 686 strt r5, [r1], #0x04 687 ldrd r4, [r0], #0x08 688 strt r6, [r1], #0x04 689 strt r7, [r1], #0x04 690 strt r8, [r1], #0x04 691 strt r9, [r1], #0x04 692 subs r2, r2, #0x20 693 strt r4, [r1], #0x04 694 strt r5, [r1], #0x04 695 bge .Lcopyout_w_loop32 696 697.Lcopyout_w_lessthan32: 698 adds r2, r2, #0x20 /* Adjust for extra sub */ 699 ldmfdeq sp!, {r4-r9} 700 RETeq /* Return now if done */ 701 702 and r4, r2, #0x18 703 rsb r5, r4, #0x18 704 subs r2, r2, r4 705 add pc, pc, r5, lsl #1 706 nop 707 708 /* At least 24 bytes remaining */ 709 ldrd r4, [r0], #0x08 710 strt r4, [r1], #0x04 711 strt r5, [r1], #0x04 712 nop 713 714 /* At least 16 bytes remaining */ 715 ldrd r4, [r0], #0x08 716 strt r4, [r1], #0x04 717 strt r5, [r1], #0x04 718 nop 719 720 /* At least 8 bytes remaining */ 721 ldrd r4, [r0], #0x08 722 strt r4, [r1], #0x04 723 strt r5, [r1], #0x04 724 nop 725 726 /* Less than 8 bytes remaining */ 727 ldmfd sp!, {r4-r9} 728 RETeq /* Return now if done */ 729 mov r3, #0x00 730 731.Lcopyout_w_less_than8: 732 subs r2, r2, #0x04 733 ldrge ip, [r0], #0x04 734 strtge ip, [r1], #0x04 735 RETeq /* Return now if done */ 736 addlt r2, r2, #0x04 737 ldrb ip, [r0], #0x01 738 cmp r2, #0x02 739 ldrbge r2, [r0], #0x01 740 strbt ip, [r1], #0x01 741 ldrbgt ip, [r0] 742 strbtge r2, [r1], #0x01 743 strbtgt ip, [r1] 744 RET 745 746/* 747 * At this point, it has not been possible to word align both buffers. 748 * The destination buffer (r1) is word aligned, but the source buffer 749 * (r0) is not. 750 */ 751.Lcopyout_bad_align: 752 stmfd sp!, {r4-r7} 753 mov r3, #0x01 754 bic r0, r0, #0x03 755 cmp ip, #2 756 ldr ip, [r0], #0x04 757 bgt .Lcopyout_bad3 758 beq .Lcopyout_bad2 759 b .Lcopyout_bad1 760 761.Lcopyout_bad1_loop16: 762#ifdef __ARMEB__ 763 mov r4, ip, lsl #8 764#else 765 mov r4, ip, lsr #8 766#endif 767 ldr r5, [r0], #0x04 768 pld [r0, #0x018] 769 ldr r6, [r0], #0x04 770 ldr r7, [r0], #0x04 771 ldr ip, [r0], #0x04 772#ifdef __ARMEB__ 773 orr r4, r4, r5, lsr #24 774 mov r5, r5, lsl #8 775 orr r5, r5, r6, lsr #24 776 mov r6, r6, lsl #8 777 orr r6, r6, r7, lsr #24 778 mov r7, r7, lsl #8 779 orr r7, r7, ip, lsr #24 780#else 781 orr r4, r4, r5, lsl #24 782 mov r5, r5, lsr #8 783 orr r5, r5, r6, lsl #24 784 mov r6, r6, lsr #8 785 orr r6, r6, r7, lsl #24 786 mov r7, r7, lsr #8 787 orr r7, r7, ip, lsl #24 788#endif 789 strt r4, [r1], #0x04 790 strt r5, [r1], #0x04 791 strt r6, [r1], #0x04 792 strt r7, [r1], #0x04 793.Lcopyout_bad1: 794 subs r2, r2, #0x10 795 bge .Lcopyout_bad1_loop16 796 797 adds r2, r2, #0x10 798 ldmfdeq sp!, {r4-r7} 799 RETeq /* Return now if done */ 800 subs r2, r2, #0x04 801 sublt r0, r0, #0x03 802 blt .Lcopyout_l4 803 804.Lcopyout_bad1_loop4: 805#ifdef __ARMEB__ 806 mov r4, ip, lsl #8 807#else 808 mov r4, ip, lsr #8 809#endif 810 ldr ip, [r0], #0x04 811 subs r2, r2, #0x04 812#ifdef __ARMEB__ 813 orr r4, r4, ip, lsr #24 814#else 815 orr r4, r4, ip, lsl #24 816#endif 817 strt r4, [r1], #0x04 818 bge .Lcopyout_bad1_loop4 819 sub r0, r0, #0x03 820 b .Lcopyout_l4 821 822.Lcopyout_bad2_loop16: 823#ifdef __ARMEB__ 824 mov r4, ip, lsl #16 825#else 826 mov r4, ip, lsr #16 827#endif 828 ldr r5, [r0], #0x04 829 pld [r0, #0x018] 830 ldr r6, [r0], #0x04 831 ldr r7, [r0], #0x04 832 ldr ip, [r0], #0x04 833#ifdef __ARMEB__ 834 orr r4, r4, r5, lsr #16 835 mov r5, r5, lsl #16 836 orr r5, r5, r6, lsr #16 837 mov r6, r6, lsl #16 838 orr r6, r6, r7, lsr #16 839 mov r7, r7, lsl #16 840 orr r7, r7, ip, lsr #16 841#else 842 orr r4, r4, r5, lsl #16 843 mov r5, r5, lsr #16 844 orr r5, r5, r6, lsl #16 845 mov r6, r6, lsr #16 846 orr r6, r6, r7, lsl #16 847 mov r7, r7, lsr #16 848 orr r7, r7, ip, lsl #16 849#endif 850 strt r4, [r1], #0x04 851 strt r5, [r1], #0x04 852 strt r6, [r1], #0x04 853 strt r7, [r1], #0x04 854.Lcopyout_bad2: 855 subs r2, r2, #0x10 856 bge .Lcopyout_bad2_loop16 857 858 adds r2, r2, #0x10 859 ldmfdeq sp!, {r4-r7} 860 RETeq /* Return now if done */ 861 subs r2, r2, #0x04 862 sublt r0, r0, #0x02 863 blt .Lcopyout_l4 864 865.Lcopyout_bad2_loop4: 866#ifdef __ARMEB__ 867 mov r4, ip, lsl #16 868#else 869 mov r4, ip, lsr #16 870#endif 871 ldr ip, [r0], #0x04 872 subs r2, r2, #0x04 873#ifdef __ARMEB__ 874 orr r4, r4, ip, lsr #16 875#else 876 orr r4, r4, ip, lsl #16 877#endif 878 strt r4, [r1], #0x04 879 bge .Lcopyout_bad2_loop4 880 sub r0, r0, #0x02 881 b .Lcopyout_l4 882 883.Lcopyout_bad3_loop16: 884#ifdef __ARMEB__ 885 mov r4, ip, lsl #24 886#else 887 mov r4, ip, lsr #24 888#endif 889 ldr r5, [r0], #0x04 890 pld [r0, #0x018] 891 ldr r6, [r0], #0x04 892 ldr r7, [r0], #0x04 893 ldr ip, [r0], #0x04 894#ifdef __ARMEB__ 895 orr r4, r4, r5, lsr #8 896 mov r5, r5, lsl #24 897 orr r5, r5, r6, lsr #8 898 mov r6, r6, lsl #24 899 orr r6, r6, r7, lsr #8 900 mov r7, r7, lsl #24 901 orr r7, r7, ip, lsr #8 902#else 903 orr r4, r4, r5, lsl #8 904 mov r5, r5, lsr #24 905 orr r5, r5, r6, lsl #8 906 mov r6, r6, lsr #24 907 orr r6, r6, r7, lsl #8 908 mov r7, r7, lsr #24 909 orr r7, r7, ip, lsl #8 910#endif 911 strt r4, [r1], #0x04 912 strt r5, [r1], #0x04 913 strt r6, [r1], #0x04 914 strt r7, [r1], #0x04 915.Lcopyout_bad3: 916 subs r2, r2, #0x10 917 bge .Lcopyout_bad3_loop16 918 919 adds r2, r2, #0x10 920 ldmfdeq sp!, {r4-r7} 921 RETeq /* Return now if done */ 922 subs r2, r2, #0x04 923 sublt r0, r0, #0x01 924 blt .Lcopyout_l4 925 926.Lcopyout_bad3_loop4: 927#ifdef __ARMEB__ 928 mov r4, ip, lsl #24 929#else 930 mov r4, ip, lsr #24 931#endif 932 ldr ip, [r0], #0x04 933 subs r2, r2, #0x04 934#ifdef __ARMEB__ 935 orr r4, r4, ip, lsr #8 936#else 937 orr r4, r4, ip, lsl #8 938#endif 939 strt r4, [r1], #0x04 940 bge .Lcopyout_bad3_loop4 941 sub r0, r0, #0x01 942 943.Lcopyout_l4: 944 ldmfd sp!, {r4-r7} 945 mov r3, #0x00 946 adds r2, r2, #0x04 947 RETeq 948.Lcopyout_l4_2: 949 rsbs r2, r2, #0x03 950 addne pc, pc, r2, lsl #3 951 nop 952 ldrb ip, [r0], #0x01 953 strbt ip, [r1], #0x01 954 ldrb ip, [r0], #0x01 955 strbt ip, [r1], #0x01 956 ldrb ip, [r0] 957 strbt ip, [r1] 958 RET 959END(copyout) 960 961