1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39 .syntax unified 40 .text 41 .align 2 42 43#define GET_PCB(tmp) \ 44 mrc p15, 0, tmp, c13, c0, 4; \ 45 add tmp, tmp, #(TD_PCB) 46 47/* 48 * r0 = user space address 49 * r1 = kernel space address 50 * r2 = length 51 * 52 * Copies bytes from user space to kernel space 53 */ 54ENTRY(copyin) 55 cmp r2, #0x00 56 movle r0, #0x00 57 movle pc, lr /* Bail early if length is <= 0 */ 58 59 adds r3, r0, r2 60 movcs r0, #EFAULT 61 RETc(cs) 62 63 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 64 cmp r3, r12 65 movcs r0, #EFAULT 66 RETc(cs) 67 68 stmfd sp!, {r10-r11, lr} 69 70 GET_PCB(r10) 71 ldr r10, [r10] 72 73 mov r3, #0x00 74 adr ip, .Lcopyin_fault 75 ldr r11, [r10, #PCB_ONFAULT] 76 str ip, [r10, #PCB_ONFAULT] 77 bl .Lcopyin_guts 78 str r11, [r10, #PCB_ONFAULT] 79 mov r0, #0x00 80 ldmfd sp!, {r10-r11, pc} 81 82.Lcopyin_fault: 83 ldr r0, =EFAULT 84 str r11, [r10, #PCB_ONFAULT] 85 cmp r3, #0x00 86 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 87 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 88 ldmfd sp!, {r10-r11, pc} 89 90.Lcopyin_guts: 91 pld [r0] 92 /* Word-align the destination buffer */ 93 ands ip, r1, #0x03 /* Already word aligned? */ 94 beq .Lcopyin_wordaligned /* Yup */ 95 rsb ip, ip, #0x04 96 cmp r2, ip /* Enough bytes left to align it? */ 97 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 98 sub r2, r2, ip 99 rsbs ip, ip, #0x03 100 addne pc, pc, ip, lsl #3 101 nop 102 ldrbt ip, [r0], #0x01 103 strb ip, [r1], #0x01 104 ldrbt ip, [r0], #0x01 105 strb ip, [r1], #0x01 106 ldrbt ip, [r0], #0x01 107 strb ip, [r1], #0x01 108 cmp r2, #0x00 /* All done? */ 109 RETeq 110 111 /* Destination buffer is now word aligned */ 112.Lcopyin_wordaligned: 113 ands ip, r0, #0x03 /* Is src also word-aligned? */ 114 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 115 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 116 blt .Lcopyin_w_less_than8 117 118 /* Quad-align the destination buffer */ 119 tst r1, #0x07 /* Already quad aligned? */ 120 ldrtne ip, [r0], #0x04 121 strne ip, [r1], #0x04 122 subne r2, r2, #0x04 123 stmfd sp!, {r4-r9} /* Free up some registers */ 124 mov r3, #-1 /* Signal restore r4-r9 */ 125 126 /* Destination buffer quad aligned, source is word aligned */ 127 subs r2, r2, #0x80 128 blt .Lcopyin_w_lessthan128 129 130 /* Copy 128 bytes at a time */ 131.Lcopyin_w_loop128: 132 ldrt r4, [r0], #0x04 /* LD:00-03 */ 133 ldrt r5, [r0], #0x04 /* LD:04-07 */ 134 pld [r0, #0x18] /* Prefetch 0x20 */ 135 ldrt r6, [r0], #0x04 /* LD:08-0b */ 136 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 137 ldrt r8, [r0], #0x04 /* LD:10-13 */ 138 ldrt r9, [r0], #0x04 /* LD:14-17 */ 139 strd r4, [r1], #0x08 /* ST:00-07 */ 140 ldrt r4, [r0], #0x04 /* LD:18-1b */ 141 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 142 strd r6, [r1], #0x08 /* ST:08-0f */ 143 ldrt r6, [r0], #0x04 /* LD:20-23 */ 144 ldrt r7, [r0], #0x04 /* LD:24-27 */ 145 pld [r0, #0x18] /* Prefetch 0x40 */ 146 strd r8, [r1], #0x08 /* ST:10-17 */ 147 ldrt r8, [r0], #0x04 /* LD:28-2b */ 148 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 149 strd r4, [r1], #0x08 /* ST:18-1f */ 150 ldrt r4, [r0], #0x04 /* LD:30-33 */ 151 ldrt r5, [r0], #0x04 /* LD:34-37 */ 152 strd r6, [r1], #0x08 /* ST:20-27 */ 153 ldrt r6, [r0], #0x04 /* LD:38-3b */ 154 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 155 strd r8, [r1], #0x08 /* ST:28-2f */ 156 ldrt r8, [r0], #0x04 /* LD:40-43 */ 157 ldrt r9, [r0], #0x04 /* LD:44-47 */ 158 pld [r0, #0x18] /* Prefetch 0x60 */ 159 strd r4, [r1], #0x08 /* ST:30-37 */ 160 ldrt r4, [r0], #0x04 /* LD:48-4b */ 161 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 162 strd r6, [r1], #0x08 /* ST:38-3f */ 163 ldrt r6, [r0], #0x04 /* LD:50-53 */ 164 ldrt r7, [r0], #0x04 /* LD:54-57 */ 165 strd r8, [r1], #0x08 /* ST:40-47 */ 166 ldrt r8, [r0], #0x04 /* LD:58-5b */ 167 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 168 strd r4, [r1], #0x08 /* ST:48-4f */ 169 ldrt r4, [r0], #0x04 /* LD:60-63 */ 170 ldrt r5, [r0], #0x04 /* LD:64-67 */ 171 pld [r0, #0x18] /* Prefetch 0x80 */ 172 strd r6, [r1], #0x08 /* ST:50-57 */ 173 ldrt r6, [r0], #0x04 /* LD:68-6b */ 174 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 175 strd r8, [r1], #0x08 /* ST:58-5f */ 176 ldrt r8, [r0], #0x04 /* LD:70-73 */ 177 ldrt r9, [r0], #0x04 /* LD:74-77 */ 178 strd r4, [r1], #0x08 /* ST:60-67 */ 179 ldrt r4, [r0], #0x04 /* LD:78-7b */ 180 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 181 strd r6, [r1], #0x08 /* ST:68-6f */ 182 strd r8, [r1], #0x08 /* ST:70-77 */ 183 subs r2, r2, #0x80 184 strd r4, [r1], #0x08 /* ST:78-7f */ 185 bge .Lcopyin_w_loop128 186 187.Lcopyin_w_lessthan128: 188 adds r2, r2, #0x80 /* Adjust for extra sub */ 189 ldmfdeq sp!, {r4-r9} 190 RETeq 191 subs r2, r2, #0x20 192 blt .Lcopyin_w_lessthan32 193 194 /* Copy 32 bytes at a time */ 195.Lcopyin_w_loop32: 196 ldrt r4, [r0], #0x04 197 ldrt r5, [r0], #0x04 198 pld [r0, #0x18] 199 ldrt r6, [r0], #0x04 200 ldrt r7, [r0], #0x04 201 ldrt r8, [r0], #0x04 202 ldrt r9, [r0], #0x04 203 strd r4, [r1], #0x08 204 ldrt r4, [r0], #0x04 205 ldrt r5, [r0], #0x04 206 strd r6, [r1], #0x08 207 strd r8, [r1], #0x08 208 subs r2, r2, #0x20 209 strd r4, [r1], #0x08 210 bge .Lcopyin_w_loop32 211 212.Lcopyin_w_lessthan32: 213 adds r2, r2, #0x20 /* Adjust for extra sub */ 214 ldmfdeq sp!, {r4-r9} 215 RETeq /* Return now if done */ 216 217 and r4, r2, #0x18 218 rsb r5, r4, #0x18 219 subs r2, r2, r4 220 add pc, pc, r5, lsl #1 221 nop 222 223 /* At least 24 bytes remaining */ 224 ldrt r4, [r0], #0x04 225 ldrt r5, [r0], #0x04 226 nop 227 strd r4, [r1], #0x08 228 229 /* At least 16 bytes remaining */ 230 ldrt r4, [r0], #0x04 231 ldrt r5, [r0], #0x04 232 nop 233 strd r4, [r1], #0x08 234 235 /* At least 8 bytes remaining */ 236 ldrt r4, [r0], #0x04 237 ldrt r5, [r0], #0x04 238 nop 239 strd r4, [r1], #0x08 240 241 /* Less than 8 bytes remaining */ 242 ldmfd sp!, {r4-r9} 243 RETeq /* Return now if done */ 244 mov r3, #0x00 245 246.Lcopyin_w_less_than8: 247 subs r2, r2, #0x04 248 ldrtge ip, [r0], #0x04 249 strge ip, [r1], #0x04 250 RETeq /* Return now if done */ 251 addlt r2, r2, #0x04 252 ldrbt ip, [r0], #0x01 253 cmp r2, #0x02 254 ldrbtge r2, [r0], #0x01 255 strb ip, [r1], #0x01 256 ldrbtgt ip, [r0] 257 strbge r2, [r1], #0x01 258 strbgt ip, [r1] 259 RET 260 261/* 262 * At this point, it has not been possible to word align both buffers. 263 * The destination buffer (r1) is word aligned, but the source buffer 264 * (r0) is not. 265 */ 266.Lcopyin_bad_align: 267 stmfd sp!, {r4-r7} 268 mov r3, #0x01 269 bic r0, r0, #0x03 270 cmp ip, #2 271 ldrt ip, [r0], #0x04 272 bgt .Lcopyin_bad3 273 beq .Lcopyin_bad2 274 b .Lcopyin_bad1 275 276.Lcopyin_bad1_loop16: 277 mov r4, ip, lsr #8 278 ldrt r5, [r0], #0x04 279 pld [r0, #0x018] 280 ldrt r6, [r0], #0x04 281 ldrt r7, [r0], #0x04 282 ldrt ip, [r0], #0x04 283 orr r4, r4, r5, lsl #24 284 mov r5, r5, lsr #8 285 orr r5, r5, r6, lsl #24 286 mov r6, r6, lsr #8 287 orr r6, r6, r7, lsl #24 288 mov r7, r7, lsr #8 289 orr r7, r7, ip, lsl #24 290 str r4, [r1], #0x04 291 str r5, [r1], #0x04 292 str r6, [r1], #0x04 293 str r7, [r1], #0x04 294.Lcopyin_bad1: 295 subs r2, r2, #0x10 296 bge .Lcopyin_bad1_loop16 297 298 adds r2, r2, #0x10 299 ldmfdeq sp!, {r4-r7} 300 RETeq /* Return now if done */ 301 subs r2, r2, #0x04 302 sublt r0, r0, #0x03 303 blt .Lcopyin_l4 304 305.Lcopyin_bad1_loop4: 306 mov r4, ip, lsr #8 307 ldrt ip, [r0], #0x04 308 subs r2, r2, #0x04 309 orr r4, r4, ip, lsl #24 310 str r4, [r1], #0x04 311 bge .Lcopyin_bad1_loop4 312 sub r0, r0, #0x03 313 b .Lcopyin_l4 314 315.Lcopyin_bad2_loop16: 316 mov r4, ip, lsr #16 317 ldrt r5, [r0], #0x04 318 pld [r0, #0x018] 319 ldrt r6, [r0], #0x04 320 ldrt r7, [r0], #0x04 321 ldrt ip, [r0], #0x04 322 orr r4, r4, r5, lsl #16 323 mov r5, r5, lsr #16 324 orr r5, r5, r6, lsl #16 325 mov r6, r6, lsr #16 326 orr r6, r6, r7, lsl #16 327 mov r7, r7, lsr #16 328 orr r7, r7, ip, lsl #16 329 str r4, [r1], #0x04 330 str r5, [r1], #0x04 331 str r6, [r1], #0x04 332 str r7, [r1], #0x04 333.Lcopyin_bad2: 334 subs r2, r2, #0x10 335 bge .Lcopyin_bad2_loop16 336 337 adds r2, r2, #0x10 338 ldmfdeq sp!, {r4-r7} 339 RETeq /* Return now if done */ 340 subs r2, r2, #0x04 341 sublt r0, r0, #0x02 342 blt .Lcopyin_l4 343 344.Lcopyin_bad2_loop4: 345 mov r4, ip, lsr #16 346 ldrt ip, [r0], #0x04 347 subs r2, r2, #0x04 348 orr r4, r4, ip, lsl #16 349 str r4, [r1], #0x04 350 bge .Lcopyin_bad2_loop4 351 sub r0, r0, #0x02 352 b .Lcopyin_l4 353 354.Lcopyin_bad3_loop16: 355 mov r4, ip, lsr #24 356 ldrt r5, [r0], #0x04 357 pld [r0, #0x018] 358 ldrt r6, [r0], #0x04 359 ldrt r7, [r0], #0x04 360 ldrt ip, [r0], #0x04 361 orr r4, r4, r5, lsl #8 362 mov r5, r5, lsr #24 363 orr r5, r5, r6, lsl #8 364 mov r6, r6, lsr #24 365 orr r6, r6, r7, lsl #8 366 mov r7, r7, lsr #24 367 orr r7, r7, ip, lsl #8 368 str r4, [r1], #0x04 369 str r5, [r1], #0x04 370 str r6, [r1], #0x04 371 str r7, [r1], #0x04 372.Lcopyin_bad3: 373 subs r2, r2, #0x10 374 bge .Lcopyin_bad3_loop16 375 376 adds r2, r2, #0x10 377 ldmfdeq sp!, {r4-r7} 378 RETeq /* Return now if done */ 379 subs r2, r2, #0x04 380 sublt r0, r0, #0x01 381 blt .Lcopyin_l4 382 383.Lcopyin_bad3_loop4: 384 mov r4, ip, lsr #24 385 ldrt ip, [r0], #0x04 386 subs r2, r2, #0x04 387 orr r4, r4, ip, lsl #8 388 str r4, [r1], #0x04 389 bge .Lcopyin_bad3_loop4 390 sub r0, r0, #0x01 391 392.Lcopyin_l4: 393 ldmfd sp!, {r4-r7} 394 mov r3, #0x00 395 adds r2, r2, #0x04 396 RETeq 397.Lcopyin_l4_2: 398 rsbs r2, r2, #0x03 399 addne pc, pc, r2, lsl #3 400 nop 401 ldrbt ip, [r0], #0x01 402 strb ip, [r1], #0x01 403 ldrbt ip, [r0], #0x01 404 strb ip, [r1], #0x01 405 ldrbt ip, [r0] 406 strb ip, [r1] 407 RET 408END(copyin) 409 410/* 411 * r0 = kernel space address 412 * r1 = user space address 413 * r2 = length 414 * 415 * Copies bytes from kernel space to user space 416 */ 417ENTRY(copyout) 418 cmp r2, #0x00 419 movle r0, #0x00 420 movle pc, lr /* Bail early if length is <= 0 */ 421 422 adds r3, r1, r2 423 movcs r0, #EFAULT 424 RETc(cs) 425 426 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 427 cmp r3, r12 428 movcs r0, #EFAULT 429 RETc(cs) 430 431 stmfd sp!, {r10-r11, lr} 432 433 GET_PCB(r10) 434 ldr r10, [r10] 435 436 mov r3, #0x00 437 adr ip, .Lcopyout_fault 438 ldr r11, [r10, #PCB_ONFAULT] 439 str ip, [r10, #PCB_ONFAULT] 440 bl .Lcopyout_guts 441 str r11, [r10, #PCB_ONFAULT] 442 mov r0, #0x00 443 ldmfd sp!, {r10-r11, pc} 444 445.Lcopyout_fault: 446 ldr r0, =EFAULT 447 str r11, [r10, #PCB_ONFAULT] 448 cmp r3, #0x00 449 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 450 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 451 ldmfd sp!, {r10-r11, pc} 452 453.Lcopyout_guts: 454 pld [r0] 455 /* Word-align the destination buffer */ 456 ands ip, r1, #0x03 /* Already word aligned? */ 457 beq .Lcopyout_wordaligned /* Yup */ 458 rsb ip, ip, #0x04 459 cmp r2, ip /* Enough bytes left to align it? */ 460 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 461 sub r2, r2, ip 462 rsbs ip, ip, #0x03 463 addne pc, pc, ip, lsl #3 464 nop 465 ldrb ip, [r0], #0x01 466 strbt ip, [r1], #0x01 467 ldrb ip, [r0], #0x01 468 strbt ip, [r1], #0x01 469 ldrb ip, [r0], #0x01 470 strbt ip, [r1], #0x01 471 cmp r2, #0x00 /* All done? */ 472 RETeq 473 474 /* Destination buffer is now word aligned */ 475.Lcopyout_wordaligned: 476 ands ip, r0, #0x03 /* Is src also word-aligned? */ 477 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 478 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 479 blt .Lcopyout_w_less_than8 480 481 /* Quad-align the destination buffer */ 482 tst r0, #0x07 /* Already quad aligned? */ 483 ldrne ip, [r0], #0x04 484 subne r2, r2, #0x04 485 strtne ip, [r1], #0x04 486 487 stmfd sp!, {r4-r9} /* Free up some registers */ 488 mov r3, #-1 /* Signal restore r4-r9 */ 489 490 /* Destination buffer word aligned, source is quad aligned */ 491 subs r2, r2, #0x80 492 blt .Lcopyout_w_lessthan128 493 494 /* Copy 128 bytes at a time */ 495.Lcopyout_w_loop128: 496 ldrd r4, [r0], #0x08 /* LD:00-07 */ 497 pld [r0, #0x18] /* Prefetch 0x20 */ 498 ldrd r6, [r0], #0x08 /* LD:08-0f */ 499 ldrd r8, [r0], #0x08 /* LD:10-17 */ 500 strt r4, [r1], #0x04 /* ST:00-03 */ 501 strt r5, [r1], #0x04 /* ST:04-07 */ 502 ldrd r4, [r0], #0x08 /* LD:18-1f */ 503 strt r6, [r1], #0x04 /* ST:08-0b */ 504 strt r7, [r1], #0x04 /* ST:0c-0f */ 505 ldrd r6, [r0], #0x08 /* LD:20-27 */ 506 pld [r0, #0x18] /* Prefetch 0x40 */ 507 strt r8, [r1], #0x04 /* ST:10-13 */ 508 strt r9, [r1], #0x04 /* ST:14-17 */ 509 ldrd r8, [r0], #0x08 /* LD:28-2f */ 510 strt r4, [r1], #0x04 /* ST:18-1b */ 511 strt r5, [r1], #0x04 /* ST:1c-1f */ 512 ldrd r4, [r0], #0x08 /* LD:30-37 */ 513 strt r6, [r1], #0x04 /* ST:20-23 */ 514 strt r7, [r1], #0x04 /* ST:24-27 */ 515 ldrd r6, [r0], #0x08 /* LD:38-3f */ 516 strt r8, [r1], #0x04 /* ST:28-2b */ 517 strt r9, [r1], #0x04 /* ST:2c-2f */ 518 ldrd r8, [r0], #0x08 /* LD:40-47 */ 519 pld [r0, #0x18] /* Prefetch 0x60 */ 520 strt r4, [r1], #0x04 /* ST:30-33 */ 521 strt r5, [r1], #0x04 /* ST:34-37 */ 522 ldrd r4, [r0], #0x08 /* LD:48-4f */ 523 strt r6, [r1], #0x04 /* ST:38-3b */ 524 strt r7, [r1], #0x04 /* ST:3c-3f */ 525 ldrd r6, [r0], #0x08 /* LD:50-57 */ 526 strt r8, [r1], #0x04 /* ST:40-43 */ 527 strt r9, [r1], #0x04 /* ST:44-47 */ 528 ldrd r8, [r0], #0x08 /* LD:58-4f */ 529 strt r4, [r1], #0x04 /* ST:48-4b */ 530 strt r5, [r1], #0x04 /* ST:4c-4f */ 531 ldrd r4, [r0], #0x08 /* LD:60-67 */ 532 pld [r0, #0x18] /* Prefetch 0x80 */ 533 strt r6, [r1], #0x04 /* ST:50-53 */ 534 strt r7, [r1], #0x04 /* ST:54-57 */ 535 ldrd r6, [r0], #0x08 /* LD:68-6f */ 536 strt r8, [r1], #0x04 /* ST:58-5b */ 537 strt r9, [r1], #0x04 /* ST:5c-5f */ 538 ldrd r8, [r0], #0x08 /* LD:70-77 */ 539 strt r4, [r1], #0x04 /* ST:60-63 */ 540 strt r5, [r1], #0x04 /* ST:64-67 */ 541 ldrd r4, [r0], #0x08 /* LD:78-7f */ 542 strt r6, [r1], #0x04 /* ST:68-6b */ 543 strt r7, [r1], #0x04 /* ST:6c-6f */ 544 strt r8, [r1], #0x04 /* ST:70-73 */ 545 strt r9, [r1], #0x04 /* ST:74-77 */ 546 subs r2, r2, #0x80 547 strt r4, [r1], #0x04 /* ST:78-7b */ 548 strt r5, [r1], #0x04 /* ST:7c-7f */ 549 bge .Lcopyout_w_loop128 550 551.Lcopyout_w_lessthan128: 552 adds r2, r2, #0x80 /* Adjust for extra sub */ 553 ldmfdeq sp!, {r4-r9} 554 RETeq /* Return now if done */ 555 subs r2, r2, #0x20 556 blt .Lcopyout_w_lessthan32 557 558 /* Copy 32 bytes at a time */ 559.Lcopyout_w_loop32: 560 ldrd r4, [r0], #0x08 561 pld [r0, #0x18] 562 ldrd r6, [r0], #0x08 563 ldrd r8, [r0], #0x08 564 strt r4, [r1], #0x04 565 strt r5, [r1], #0x04 566 ldrd r4, [r0], #0x08 567 strt r6, [r1], #0x04 568 strt r7, [r1], #0x04 569 strt r8, [r1], #0x04 570 strt r9, [r1], #0x04 571 subs r2, r2, #0x20 572 strt r4, [r1], #0x04 573 strt r5, [r1], #0x04 574 bge .Lcopyout_w_loop32 575 576.Lcopyout_w_lessthan32: 577 adds r2, r2, #0x20 /* Adjust for extra sub */ 578 ldmfdeq sp!, {r4-r9} 579 RETeq /* Return now if done */ 580 581 and r4, r2, #0x18 582 rsb r5, r4, #0x18 583 subs r2, r2, r4 584 add pc, pc, r5, lsl #1 585 nop 586 587 /* At least 24 bytes remaining */ 588 ldrd r4, [r0], #0x08 589 strt r4, [r1], #0x04 590 strt r5, [r1], #0x04 591 nop 592 593 /* At least 16 bytes remaining */ 594 ldrd r4, [r0], #0x08 595 strt r4, [r1], #0x04 596 strt r5, [r1], #0x04 597 nop 598 599 /* At least 8 bytes remaining */ 600 ldrd r4, [r0], #0x08 601 strt r4, [r1], #0x04 602 strt r5, [r1], #0x04 603 nop 604 605 /* Less than 8 bytes remaining */ 606 ldmfd sp!, {r4-r9} 607 RETeq /* Return now if done */ 608 mov r3, #0x00 609 610.Lcopyout_w_less_than8: 611 subs r2, r2, #0x04 612 ldrge ip, [r0], #0x04 613 strtge ip, [r1], #0x04 614 RETeq /* Return now if done */ 615 addlt r2, r2, #0x04 616 ldrb ip, [r0], #0x01 617 cmp r2, #0x02 618 ldrbge r2, [r0], #0x01 619 strbt ip, [r1], #0x01 620 ldrbgt ip, [r0] 621 strbtge r2, [r1], #0x01 622 strbtgt ip, [r1] 623 RET 624 625/* 626 * At this point, it has not been possible to word align both buffers. 627 * The destination buffer (r1) is word aligned, but the source buffer 628 * (r0) is not. 629 */ 630.Lcopyout_bad_align: 631 stmfd sp!, {r4-r7} 632 mov r3, #0x01 633 bic r0, r0, #0x03 634 cmp ip, #2 635 ldr ip, [r0], #0x04 636 bgt .Lcopyout_bad3 637 beq .Lcopyout_bad2 638 b .Lcopyout_bad1 639 640.Lcopyout_bad1_loop16: 641 mov r4, ip, lsr #8 642 ldr r5, [r0], #0x04 643 pld [r0, #0x018] 644 ldr r6, [r0], #0x04 645 ldr r7, [r0], #0x04 646 ldr ip, [r0], #0x04 647 orr r4, r4, r5, lsl #24 648 mov r5, r5, lsr #8 649 orr r5, r5, r6, lsl #24 650 mov r6, r6, lsr #8 651 orr r6, r6, r7, lsl #24 652 mov r7, r7, lsr #8 653 orr r7, r7, ip, lsl #24 654 strt r4, [r1], #0x04 655 strt r5, [r1], #0x04 656 strt r6, [r1], #0x04 657 strt r7, [r1], #0x04 658.Lcopyout_bad1: 659 subs r2, r2, #0x10 660 bge .Lcopyout_bad1_loop16 661 662 adds r2, r2, #0x10 663 ldmfdeq sp!, {r4-r7} 664 RETeq /* Return now if done */ 665 subs r2, r2, #0x04 666 sublt r0, r0, #0x03 667 blt .Lcopyout_l4 668 669.Lcopyout_bad1_loop4: 670 mov r4, ip, lsr #8 671 ldr ip, [r0], #0x04 672 subs r2, r2, #0x04 673 orr r4, r4, ip, lsl #24 674 strt r4, [r1], #0x04 675 bge .Lcopyout_bad1_loop4 676 sub r0, r0, #0x03 677 b .Lcopyout_l4 678 679.Lcopyout_bad2_loop16: 680 mov r4, ip, lsr #16 681 ldr r5, [r0], #0x04 682 pld [r0, #0x018] 683 ldr r6, [r0], #0x04 684 ldr r7, [r0], #0x04 685 ldr ip, [r0], #0x04 686 orr r4, r4, r5, lsl #16 687 mov r5, r5, lsr #16 688 orr r5, r5, r6, lsl #16 689 mov r6, r6, lsr #16 690 orr r6, r6, r7, lsl #16 691 mov r7, r7, lsr #16 692 orr r7, r7, ip, lsl #16 693 strt r4, [r1], #0x04 694 strt r5, [r1], #0x04 695 strt r6, [r1], #0x04 696 strt r7, [r1], #0x04 697.Lcopyout_bad2: 698 subs r2, r2, #0x10 699 bge .Lcopyout_bad2_loop16 700 701 adds r2, r2, #0x10 702 ldmfdeq sp!, {r4-r7} 703 RETeq /* Return now if done */ 704 subs r2, r2, #0x04 705 sublt r0, r0, #0x02 706 blt .Lcopyout_l4 707 708.Lcopyout_bad2_loop4: 709 mov r4, ip, lsr #16 710 ldr ip, [r0], #0x04 711 subs r2, r2, #0x04 712 orr r4, r4, ip, lsl #16 713 strt r4, [r1], #0x04 714 bge .Lcopyout_bad2_loop4 715 sub r0, r0, #0x02 716 b .Lcopyout_l4 717 718.Lcopyout_bad3_loop16: 719 mov r4, ip, lsr #24 720 ldr r5, [r0], #0x04 721 pld [r0, #0x018] 722 ldr r6, [r0], #0x04 723 ldr r7, [r0], #0x04 724 ldr ip, [r0], #0x04 725 orr r4, r4, r5, lsl #8 726 mov r5, r5, lsr #24 727 orr r5, r5, r6, lsl #8 728 mov r6, r6, lsr #24 729 orr r6, r6, r7, lsl #8 730 mov r7, r7, lsr #24 731 orr r7, r7, ip, lsl #8 732 strt r4, [r1], #0x04 733 strt r5, [r1], #0x04 734 strt r6, [r1], #0x04 735 strt r7, [r1], #0x04 736.Lcopyout_bad3: 737 subs r2, r2, #0x10 738 bge .Lcopyout_bad3_loop16 739 740 adds r2, r2, #0x10 741 ldmfdeq sp!, {r4-r7} 742 RETeq /* Return now if done */ 743 subs r2, r2, #0x04 744 sublt r0, r0, #0x01 745 blt .Lcopyout_l4 746 747.Lcopyout_bad3_loop4: 748 mov r4, ip, lsr #24 749 ldr ip, [r0], #0x04 750 subs r2, r2, #0x04 751 orr r4, r4, ip, lsl #8 752 strt r4, [r1], #0x04 753 bge .Lcopyout_bad3_loop4 754 sub r0, r0, #0x01 755 756.Lcopyout_l4: 757 ldmfd sp!, {r4-r7} 758 mov r3, #0x00 759 adds r2, r2, #0x04 760 RETeq 761.Lcopyout_l4_2: 762 rsbs r2, r2, #0x03 763 addne pc, pc, r2, lsl #3 764 nop 765 ldrb ip, [r0], #0x01 766 strbt ip, [r1], #0x01 767 ldrb ip, [r0], #0x01 768 strbt ip, [r1], #0x01 769 ldrb ip, [r0] 770 strbt ip, [r1] 771 RET 772END(copyout) 773 774