1/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41 .syntax unified 42 .text 43 .align 2 44 45#define GET_PCB(tmp) \ 46 mrc p15, 0, tmp, c13, c0, 4; \ 47 add tmp, tmp, #(TD_PCB) 48 49/* 50 * r0 = user space address 51 * r1 = kernel space address 52 * r2 = length 53 * 54 * Copies bytes from user space to kernel space 55 */ 56ENTRY(copyin) 57 cmp r2, #0x00 58 movle r0, #0x00 59 movle pc, lr /* Bail early if length is <= 0 */ 60 61 adds r3, r0, r2 62 movcs r0, #EFAULT 63 RETc(cs) 64 65 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 66 cmp r3, r12 67 movcs r0, #EFAULT 68 RETc(cs) 69 70 stmfd sp!, {r10-r11, lr} 71 72 GET_PCB(r10) 73 ldr r10, [r10] 74 75 mov r3, #0x00 76 adr ip, .Lcopyin_fault 77 ldr r11, [r10, #PCB_ONFAULT] 78 str ip, [r10, #PCB_ONFAULT] 79 bl .Lcopyin_guts 80 str r11, [r10, #PCB_ONFAULT] 81 mov r0, #0x00 82 ldmfd sp!, {r10-r11, pc} 83 84.Lcopyin_fault: 85 ldr r0, =EFAULT 86 str r11, [r10, #PCB_ONFAULT] 87 cmp r3, #0x00 88 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 89 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 90 ldmfd sp!, {r10-r11, pc} 91 92.Lcopyin_guts: 93 pld [r0] 94 /* Word-align the destination buffer */ 95 ands ip, r1, #0x03 /* Already word aligned? */ 96 beq .Lcopyin_wordaligned /* Yup */ 97 rsb ip, ip, #0x04 98 cmp r2, ip /* Enough bytes left to align it? */ 99 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ 100 sub r2, r2, ip 101 rsbs ip, ip, #0x03 102 addne pc, pc, ip, lsl #3 103 nop 104 ldrbt ip, [r0], #0x01 105 strb ip, [r1], #0x01 106 ldrbt ip, [r0], #0x01 107 strb ip, [r1], #0x01 108 ldrbt ip, [r0], #0x01 109 strb ip, [r1], #0x01 110 cmp r2, #0x00 /* All done? */ 111 RETeq 112 113 /* Destination buffer is now word aligned */ 114.Lcopyin_wordaligned: 115 ands ip, r0, #0x03 /* Is src also word-aligned? */ 116 bne .Lcopyin_bad_align /* Nope. Things just got bad */ 117 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 118 blt .Lcopyin_w_less_than8 119 120 /* Quad-align the destination buffer */ 121 tst r1, #0x07 /* Already quad aligned? */ 122 ldrtne ip, [r0], #0x04 123 strne ip, [r1], #0x04 124 subne r2, r2, #0x04 125 stmfd sp!, {r4-r9} /* Free up some registers */ 126 mov r3, #-1 /* Signal restore r4-r9 */ 127 128 /* Destination buffer quad aligned, source is word aligned */ 129 subs r2, r2, #0x80 130 blt .Lcopyin_w_lessthan128 131 132 /* Copy 128 bytes at a time */ 133.Lcopyin_w_loop128: 134 ldrt r4, [r0], #0x04 /* LD:00-03 */ 135 ldrt r5, [r0], #0x04 /* LD:04-07 */ 136 pld [r0, #0x18] /* Prefetch 0x20 */ 137 ldrt r6, [r0], #0x04 /* LD:08-0b */ 138 ldrt r7, [r0], #0x04 /* LD:0c-0f */ 139 ldrt r8, [r0], #0x04 /* LD:10-13 */ 140 ldrt r9, [r0], #0x04 /* LD:14-17 */ 141 strd r4, [r1], #0x08 /* ST:00-07 */ 142 ldrt r4, [r0], #0x04 /* LD:18-1b */ 143 ldrt r5, [r0], #0x04 /* LD:1c-1f */ 144 strd r6, [r1], #0x08 /* ST:08-0f */ 145 ldrt r6, [r0], #0x04 /* LD:20-23 */ 146 ldrt r7, [r0], #0x04 /* LD:24-27 */ 147 pld [r0, #0x18] /* Prefetch 0x40 */ 148 strd r8, [r1], #0x08 /* ST:10-17 */ 149 ldrt r8, [r0], #0x04 /* LD:28-2b */ 150 ldrt r9, [r0], #0x04 /* LD:2c-2f */ 151 strd r4, [r1], #0x08 /* ST:18-1f */ 152 ldrt r4, [r0], #0x04 /* LD:30-33 */ 153 ldrt r5, [r0], #0x04 /* LD:34-37 */ 154 strd r6, [r1], #0x08 /* ST:20-27 */ 155 ldrt r6, [r0], #0x04 /* LD:38-3b */ 156 ldrt r7, [r0], #0x04 /* LD:3c-3f */ 157 strd r8, [r1], #0x08 /* ST:28-2f */ 158 ldrt r8, [r0], #0x04 /* LD:40-43 */ 159 ldrt r9, [r0], #0x04 /* LD:44-47 */ 160 pld [r0, #0x18] /* Prefetch 0x60 */ 161 strd r4, [r1], #0x08 /* ST:30-37 */ 162 ldrt r4, [r0], #0x04 /* LD:48-4b */ 163 ldrt r5, [r0], #0x04 /* LD:4c-4f */ 164 strd r6, [r1], #0x08 /* ST:38-3f */ 165 ldrt r6, [r0], #0x04 /* LD:50-53 */ 166 ldrt r7, [r0], #0x04 /* LD:54-57 */ 167 strd r8, [r1], #0x08 /* ST:40-47 */ 168 ldrt r8, [r0], #0x04 /* LD:58-5b */ 169 ldrt r9, [r0], #0x04 /* LD:5c-5f */ 170 strd r4, [r1], #0x08 /* ST:48-4f */ 171 ldrt r4, [r0], #0x04 /* LD:60-63 */ 172 ldrt r5, [r0], #0x04 /* LD:64-67 */ 173 pld [r0, #0x18] /* Prefetch 0x80 */ 174 strd r6, [r1], #0x08 /* ST:50-57 */ 175 ldrt r6, [r0], #0x04 /* LD:68-6b */ 176 ldrt r7, [r0], #0x04 /* LD:6c-6f */ 177 strd r8, [r1], #0x08 /* ST:58-5f */ 178 ldrt r8, [r0], #0x04 /* LD:70-73 */ 179 ldrt r9, [r0], #0x04 /* LD:74-77 */ 180 strd r4, [r1], #0x08 /* ST:60-67 */ 181 ldrt r4, [r0], #0x04 /* LD:78-7b */ 182 ldrt r5, [r0], #0x04 /* LD:7c-7f */ 183 strd r6, [r1], #0x08 /* ST:68-6f */ 184 strd r8, [r1], #0x08 /* ST:70-77 */ 185 subs r2, r2, #0x80 186 strd r4, [r1], #0x08 /* ST:78-7f */ 187 bge .Lcopyin_w_loop128 188 189.Lcopyin_w_lessthan128: 190 adds r2, r2, #0x80 /* Adjust for extra sub */ 191 ldmfdeq sp!, {r4-r9} 192 RETeq 193 subs r2, r2, #0x20 194 blt .Lcopyin_w_lessthan32 195 196 /* Copy 32 bytes at a time */ 197.Lcopyin_w_loop32: 198 ldrt r4, [r0], #0x04 199 ldrt r5, [r0], #0x04 200 pld [r0, #0x18] 201 ldrt r6, [r0], #0x04 202 ldrt r7, [r0], #0x04 203 ldrt r8, [r0], #0x04 204 ldrt r9, [r0], #0x04 205 strd r4, [r1], #0x08 206 ldrt r4, [r0], #0x04 207 ldrt r5, [r0], #0x04 208 strd r6, [r1], #0x08 209 strd r8, [r1], #0x08 210 subs r2, r2, #0x20 211 strd r4, [r1], #0x08 212 bge .Lcopyin_w_loop32 213 214.Lcopyin_w_lessthan32: 215 adds r2, r2, #0x20 /* Adjust for extra sub */ 216 ldmfdeq sp!, {r4-r9} 217 RETeq /* Return now if done */ 218 219 and r4, r2, #0x18 220 rsb r5, r4, #0x18 221 subs r2, r2, r4 222 add pc, pc, r5, lsl #1 223 nop 224 225 /* At least 24 bytes remaining */ 226 ldrt r4, [r0], #0x04 227 ldrt r5, [r0], #0x04 228 nop 229 strd r4, [r1], #0x08 230 231 /* At least 16 bytes remaining */ 232 ldrt r4, [r0], #0x04 233 ldrt r5, [r0], #0x04 234 nop 235 strd r4, [r1], #0x08 236 237 /* At least 8 bytes remaining */ 238 ldrt r4, [r0], #0x04 239 ldrt r5, [r0], #0x04 240 nop 241 strd r4, [r1], #0x08 242 243 /* Less than 8 bytes remaining */ 244 ldmfd sp!, {r4-r9} 245 RETeq /* Return now if done */ 246 mov r3, #0x00 247 248.Lcopyin_w_less_than8: 249 subs r2, r2, #0x04 250 ldrtge ip, [r0], #0x04 251 strge ip, [r1], #0x04 252 RETeq /* Return now if done */ 253 addlt r2, r2, #0x04 254 ldrbt ip, [r0], #0x01 255 cmp r2, #0x02 256 ldrbtge r2, [r0], #0x01 257 strb ip, [r1], #0x01 258 ldrbtgt ip, [r0] 259 strbge r2, [r1], #0x01 260 strbgt ip, [r1] 261 RET 262 263/* 264 * At this point, it has not been possible to word align both buffers. 265 * The destination buffer (r1) is word aligned, but the source buffer 266 * (r0) is not. 267 */ 268.Lcopyin_bad_align: 269 stmfd sp!, {r4-r7} 270 mov r3, #0x01 271 bic r0, r0, #0x03 272 cmp ip, #2 273 ldrt ip, [r0], #0x04 274 bgt .Lcopyin_bad3 275 beq .Lcopyin_bad2 276 b .Lcopyin_bad1 277 278.Lcopyin_bad1_loop16: 279 mov r4, ip, lsr #8 280 ldrt r5, [r0], #0x04 281 pld [r0, #0x018] 282 ldrt r6, [r0], #0x04 283 ldrt r7, [r0], #0x04 284 ldrt ip, [r0], #0x04 285 orr r4, r4, r5, lsl #24 286 mov r5, r5, lsr #8 287 orr r5, r5, r6, lsl #24 288 mov r6, r6, lsr #8 289 orr r6, r6, r7, lsl #24 290 mov r7, r7, lsr #8 291 orr r7, r7, ip, lsl #24 292 str r4, [r1], #0x04 293 str r5, [r1], #0x04 294 str r6, [r1], #0x04 295 str r7, [r1], #0x04 296.Lcopyin_bad1: 297 subs r2, r2, #0x10 298 bge .Lcopyin_bad1_loop16 299 300 adds r2, r2, #0x10 301 ldmfdeq sp!, {r4-r7} 302 RETeq /* Return now if done */ 303 subs r2, r2, #0x04 304 sublt r0, r0, #0x03 305 blt .Lcopyin_l4 306 307.Lcopyin_bad1_loop4: 308 mov r4, ip, lsr #8 309 ldrt ip, [r0], #0x04 310 subs r2, r2, #0x04 311 orr r4, r4, ip, lsl #24 312 str r4, [r1], #0x04 313 bge .Lcopyin_bad1_loop4 314 sub r0, r0, #0x03 315 b .Lcopyin_l4 316 317.Lcopyin_bad2_loop16: 318 mov r4, ip, lsr #16 319 ldrt r5, [r0], #0x04 320 pld [r0, #0x018] 321 ldrt r6, [r0], #0x04 322 ldrt r7, [r0], #0x04 323 ldrt ip, [r0], #0x04 324 orr r4, r4, r5, lsl #16 325 mov r5, r5, lsr #16 326 orr r5, r5, r6, lsl #16 327 mov r6, r6, lsr #16 328 orr r6, r6, r7, lsl #16 329 mov r7, r7, lsr #16 330 orr r7, r7, ip, lsl #16 331 str r4, [r1], #0x04 332 str r5, [r1], #0x04 333 str r6, [r1], #0x04 334 str r7, [r1], #0x04 335.Lcopyin_bad2: 336 subs r2, r2, #0x10 337 bge .Lcopyin_bad2_loop16 338 339 adds r2, r2, #0x10 340 ldmfdeq sp!, {r4-r7} 341 RETeq /* Return now if done */ 342 subs r2, r2, #0x04 343 sublt r0, r0, #0x02 344 blt .Lcopyin_l4 345 346.Lcopyin_bad2_loop4: 347 mov r4, ip, lsr #16 348 ldrt ip, [r0], #0x04 349 subs r2, r2, #0x04 350 orr r4, r4, ip, lsl #16 351 str r4, [r1], #0x04 352 bge .Lcopyin_bad2_loop4 353 sub r0, r0, #0x02 354 b .Lcopyin_l4 355 356.Lcopyin_bad3_loop16: 357 mov r4, ip, lsr #24 358 ldrt r5, [r0], #0x04 359 pld [r0, #0x018] 360 ldrt r6, [r0], #0x04 361 ldrt r7, [r0], #0x04 362 ldrt ip, [r0], #0x04 363 orr r4, r4, r5, lsl #8 364 mov r5, r5, lsr #24 365 orr r5, r5, r6, lsl #8 366 mov r6, r6, lsr #24 367 orr r6, r6, r7, lsl #8 368 mov r7, r7, lsr #24 369 orr r7, r7, ip, lsl #8 370 str r4, [r1], #0x04 371 str r5, [r1], #0x04 372 str r6, [r1], #0x04 373 str r7, [r1], #0x04 374.Lcopyin_bad3: 375 subs r2, r2, #0x10 376 bge .Lcopyin_bad3_loop16 377 378 adds r2, r2, #0x10 379 ldmfdeq sp!, {r4-r7} 380 RETeq /* Return now if done */ 381 subs r2, r2, #0x04 382 sublt r0, r0, #0x01 383 blt .Lcopyin_l4 384 385.Lcopyin_bad3_loop4: 386 mov r4, ip, lsr #24 387 ldrt ip, [r0], #0x04 388 subs r2, r2, #0x04 389 orr r4, r4, ip, lsl #8 390 str r4, [r1], #0x04 391 bge .Lcopyin_bad3_loop4 392 sub r0, r0, #0x01 393 394.Lcopyin_l4: 395 ldmfd sp!, {r4-r7} 396 mov r3, #0x00 397 adds r2, r2, #0x04 398 RETeq 399.Lcopyin_l4_2: 400 rsbs r2, r2, #0x03 401 addne pc, pc, r2, lsl #3 402 nop 403 ldrbt ip, [r0], #0x01 404 strb ip, [r1], #0x01 405 ldrbt ip, [r0], #0x01 406 strb ip, [r1], #0x01 407 ldrbt ip, [r0] 408 strb ip, [r1] 409 RET 410END(copyin) 411 412/* 413 * r0 = kernel space address 414 * r1 = user space address 415 * r2 = length 416 * 417 * Copies bytes from kernel space to user space 418 */ 419ENTRY(copyout) 420 cmp r2, #0x00 421 movle r0, #0x00 422 movle pc, lr /* Bail early if length is <= 0 */ 423 424 adds r3, r1, r2 425 movcs r0, #EFAULT 426 RETc(cs) 427 428 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 429 cmp r3, r12 430 movcs r0, #EFAULT 431 RETc(cs) 432 433 stmfd sp!, {r10-r11, lr} 434 435 GET_PCB(r10) 436 ldr r10, [r10] 437 438 mov r3, #0x00 439 adr ip, .Lcopyout_fault 440 ldr r11, [r10, #PCB_ONFAULT] 441 str ip, [r10, #PCB_ONFAULT] 442 bl .Lcopyout_guts 443 str r11, [r10, #PCB_ONFAULT] 444 mov r0, #0x00 445 ldmfd sp!, {r10-r11, pc} 446 447.Lcopyout_fault: 448 ldr r0, =EFAULT 449 str r11, [r10, #PCB_ONFAULT] 450 cmp r3, #0x00 451 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ 452 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ 453 ldmfd sp!, {r10-r11, pc} 454 455.Lcopyout_guts: 456 pld [r0] 457 /* Word-align the destination buffer */ 458 ands ip, r1, #0x03 /* Already word aligned? */ 459 beq .Lcopyout_wordaligned /* Yup */ 460 rsb ip, ip, #0x04 461 cmp r2, ip /* Enough bytes left to align it? */ 462 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ 463 sub r2, r2, ip 464 rsbs ip, ip, #0x03 465 addne pc, pc, ip, lsl #3 466 nop 467 ldrb ip, [r0], #0x01 468 strbt ip, [r1], #0x01 469 ldrb ip, [r0], #0x01 470 strbt ip, [r1], #0x01 471 ldrb ip, [r0], #0x01 472 strbt ip, [r1], #0x01 473 cmp r2, #0x00 /* All done? */ 474 RETeq 475 476 /* Destination buffer is now word aligned */ 477.Lcopyout_wordaligned: 478 ands ip, r0, #0x03 /* Is src also word-aligned? */ 479 bne .Lcopyout_bad_align /* Nope. Things just got bad */ 480 cmp r2, #0x08 /* Less than 8 bytes remaining? */ 481 blt .Lcopyout_w_less_than8 482 483 /* Quad-align the destination buffer */ 484 tst r0, #0x07 /* Already quad aligned? */ 485 ldrne ip, [r0], #0x04 486 subne r2, r2, #0x04 487 strtne ip, [r1], #0x04 488 489 stmfd sp!, {r4-r9} /* Free up some registers */ 490 mov r3, #-1 /* Signal restore r4-r9 */ 491 492 /* Destination buffer word aligned, source is quad aligned */ 493 subs r2, r2, #0x80 494 blt .Lcopyout_w_lessthan128 495 496 /* Copy 128 bytes at a time */ 497.Lcopyout_w_loop128: 498 ldrd r4, [r0], #0x08 /* LD:00-07 */ 499 pld [r0, #0x18] /* Prefetch 0x20 */ 500 ldrd r6, [r0], #0x08 /* LD:08-0f */ 501 ldrd r8, [r0], #0x08 /* LD:10-17 */ 502 strt r4, [r1], #0x04 /* ST:00-03 */ 503 strt r5, [r1], #0x04 /* ST:04-07 */ 504 ldrd r4, [r0], #0x08 /* LD:18-1f */ 505 strt r6, [r1], #0x04 /* ST:08-0b */ 506 strt r7, [r1], #0x04 /* ST:0c-0f */ 507 ldrd r6, [r0], #0x08 /* LD:20-27 */ 508 pld [r0, #0x18] /* Prefetch 0x40 */ 509 strt r8, [r1], #0x04 /* ST:10-13 */ 510 strt r9, [r1], #0x04 /* ST:14-17 */ 511 ldrd r8, [r0], #0x08 /* LD:28-2f */ 512 strt r4, [r1], #0x04 /* ST:18-1b */ 513 strt r5, [r1], #0x04 /* ST:1c-1f */ 514 ldrd r4, [r0], #0x08 /* LD:30-37 */ 515 strt r6, [r1], #0x04 /* ST:20-23 */ 516 strt r7, [r1], #0x04 /* ST:24-27 */ 517 ldrd r6, [r0], #0x08 /* LD:38-3f */ 518 strt r8, [r1], #0x04 /* ST:28-2b */ 519 strt r9, [r1], #0x04 /* ST:2c-2f */ 520 ldrd r8, [r0], #0x08 /* LD:40-47 */ 521 pld [r0, #0x18] /* Prefetch 0x60 */ 522 strt r4, [r1], #0x04 /* ST:30-33 */ 523 strt r5, [r1], #0x04 /* ST:34-37 */ 524 ldrd r4, [r0], #0x08 /* LD:48-4f */ 525 strt r6, [r1], #0x04 /* ST:38-3b */ 526 strt r7, [r1], #0x04 /* ST:3c-3f */ 527 ldrd r6, [r0], #0x08 /* LD:50-57 */ 528 strt r8, [r1], #0x04 /* ST:40-43 */ 529 strt r9, [r1], #0x04 /* ST:44-47 */ 530 ldrd r8, [r0], #0x08 /* LD:58-4f */ 531 strt r4, [r1], #0x04 /* ST:48-4b */ 532 strt r5, [r1], #0x04 /* ST:4c-4f */ 533 ldrd r4, [r0], #0x08 /* LD:60-67 */ 534 pld [r0, #0x18] /* Prefetch 0x80 */ 535 strt r6, [r1], #0x04 /* ST:50-53 */ 536 strt r7, [r1], #0x04 /* ST:54-57 */ 537 ldrd r6, [r0], #0x08 /* LD:68-6f */ 538 strt r8, [r1], #0x04 /* ST:58-5b */ 539 strt r9, [r1], #0x04 /* ST:5c-5f */ 540 ldrd r8, [r0], #0x08 /* LD:70-77 */ 541 strt r4, [r1], #0x04 /* ST:60-63 */ 542 strt r5, [r1], #0x04 /* ST:64-67 */ 543 ldrd r4, [r0], #0x08 /* LD:78-7f */ 544 strt r6, [r1], #0x04 /* ST:68-6b */ 545 strt r7, [r1], #0x04 /* ST:6c-6f */ 546 strt r8, [r1], #0x04 /* ST:70-73 */ 547 strt r9, [r1], #0x04 /* ST:74-77 */ 548 subs r2, r2, #0x80 549 strt r4, [r1], #0x04 /* ST:78-7b */ 550 strt r5, [r1], #0x04 /* ST:7c-7f */ 551 bge .Lcopyout_w_loop128 552 553.Lcopyout_w_lessthan128: 554 adds r2, r2, #0x80 /* Adjust for extra sub */ 555 ldmfdeq sp!, {r4-r9} 556 RETeq /* Return now if done */ 557 subs r2, r2, #0x20 558 blt .Lcopyout_w_lessthan32 559 560 /* Copy 32 bytes at a time */ 561.Lcopyout_w_loop32: 562 ldrd r4, [r0], #0x08 563 pld [r0, #0x18] 564 ldrd r6, [r0], #0x08 565 ldrd r8, [r0], #0x08 566 strt r4, [r1], #0x04 567 strt r5, [r1], #0x04 568 ldrd r4, [r0], #0x08 569 strt r6, [r1], #0x04 570 strt r7, [r1], #0x04 571 strt r8, [r1], #0x04 572 strt r9, [r1], #0x04 573 subs r2, r2, #0x20 574 strt r4, [r1], #0x04 575 strt r5, [r1], #0x04 576 bge .Lcopyout_w_loop32 577 578.Lcopyout_w_lessthan32: 579 adds r2, r2, #0x20 /* Adjust for extra sub */ 580 ldmfdeq sp!, {r4-r9} 581 RETeq /* Return now if done */ 582 583 and r4, r2, #0x18 584 rsb r5, r4, #0x18 585 subs r2, r2, r4 586 add pc, pc, r5, lsl #1 587 nop 588 589 /* At least 24 bytes remaining */ 590 ldrd r4, [r0], #0x08 591 strt r4, [r1], #0x04 592 strt r5, [r1], #0x04 593 nop 594 595 /* At least 16 bytes remaining */ 596 ldrd r4, [r0], #0x08 597 strt r4, [r1], #0x04 598 strt r5, [r1], #0x04 599 nop 600 601 /* At least 8 bytes remaining */ 602 ldrd r4, [r0], #0x08 603 strt r4, [r1], #0x04 604 strt r5, [r1], #0x04 605 nop 606 607 /* Less than 8 bytes remaining */ 608 ldmfd sp!, {r4-r9} 609 RETeq /* Return now if done */ 610 mov r3, #0x00 611 612.Lcopyout_w_less_than8: 613 subs r2, r2, #0x04 614 ldrge ip, [r0], #0x04 615 strtge ip, [r1], #0x04 616 RETeq /* Return now if done */ 617 addlt r2, r2, #0x04 618 ldrb ip, [r0], #0x01 619 cmp r2, #0x02 620 ldrbge r2, [r0], #0x01 621 strbt ip, [r1], #0x01 622 ldrbgt ip, [r0] 623 strbtge r2, [r1], #0x01 624 strbtgt ip, [r1] 625 RET 626 627/* 628 * At this point, it has not been possible to word align both buffers. 629 * The destination buffer (r1) is word aligned, but the source buffer 630 * (r0) is not. 631 */ 632.Lcopyout_bad_align: 633 stmfd sp!, {r4-r7} 634 mov r3, #0x01 635 bic r0, r0, #0x03 636 cmp ip, #2 637 ldr ip, [r0], #0x04 638 bgt .Lcopyout_bad3 639 beq .Lcopyout_bad2 640 b .Lcopyout_bad1 641 642.Lcopyout_bad1_loop16: 643 mov r4, ip, lsr #8 644 ldr r5, [r0], #0x04 645 pld [r0, #0x018] 646 ldr r6, [r0], #0x04 647 ldr r7, [r0], #0x04 648 ldr ip, [r0], #0x04 649 orr r4, r4, r5, lsl #24 650 mov r5, r5, lsr #8 651 orr r5, r5, r6, lsl #24 652 mov r6, r6, lsr #8 653 orr r6, r6, r7, lsl #24 654 mov r7, r7, lsr #8 655 orr r7, r7, ip, lsl #24 656 strt r4, [r1], #0x04 657 strt r5, [r1], #0x04 658 strt r6, [r1], #0x04 659 strt r7, [r1], #0x04 660.Lcopyout_bad1: 661 subs r2, r2, #0x10 662 bge .Lcopyout_bad1_loop16 663 664 adds r2, r2, #0x10 665 ldmfdeq sp!, {r4-r7} 666 RETeq /* Return now if done */ 667 subs r2, r2, #0x04 668 sublt r0, r0, #0x03 669 blt .Lcopyout_l4 670 671.Lcopyout_bad1_loop4: 672 mov r4, ip, lsr #8 673 ldr ip, [r0], #0x04 674 subs r2, r2, #0x04 675 orr r4, r4, ip, lsl #24 676 strt r4, [r1], #0x04 677 bge .Lcopyout_bad1_loop4 678 sub r0, r0, #0x03 679 b .Lcopyout_l4 680 681.Lcopyout_bad2_loop16: 682 mov r4, ip, lsr #16 683 ldr r5, [r0], #0x04 684 pld [r0, #0x018] 685 ldr r6, [r0], #0x04 686 ldr r7, [r0], #0x04 687 ldr ip, [r0], #0x04 688 orr r4, r4, r5, lsl #16 689 mov r5, r5, lsr #16 690 orr r5, r5, r6, lsl #16 691 mov r6, r6, lsr #16 692 orr r6, r6, r7, lsl #16 693 mov r7, r7, lsr #16 694 orr r7, r7, ip, lsl #16 695 strt r4, [r1], #0x04 696 strt r5, [r1], #0x04 697 strt r6, [r1], #0x04 698 strt r7, [r1], #0x04 699.Lcopyout_bad2: 700 subs r2, r2, #0x10 701 bge .Lcopyout_bad2_loop16 702 703 adds r2, r2, #0x10 704 ldmfdeq sp!, {r4-r7} 705 RETeq /* Return now if done */ 706 subs r2, r2, #0x04 707 sublt r0, r0, #0x02 708 blt .Lcopyout_l4 709 710.Lcopyout_bad2_loop4: 711 mov r4, ip, lsr #16 712 ldr ip, [r0], #0x04 713 subs r2, r2, #0x04 714 orr r4, r4, ip, lsl #16 715 strt r4, [r1], #0x04 716 bge .Lcopyout_bad2_loop4 717 sub r0, r0, #0x02 718 b .Lcopyout_l4 719 720.Lcopyout_bad3_loop16: 721 mov r4, ip, lsr #24 722 ldr r5, [r0], #0x04 723 pld [r0, #0x018] 724 ldr r6, [r0], #0x04 725 ldr r7, [r0], #0x04 726 ldr ip, [r0], #0x04 727 orr r4, r4, r5, lsl #8 728 mov r5, r5, lsr #24 729 orr r5, r5, r6, lsl #8 730 mov r6, r6, lsr #24 731 orr r6, r6, r7, lsl #8 732 mov r7, r7, lsr #24 733 orr r7, r7, ip, lsl #8 734 strt r4, [r1], #0x04 735 strt r5, [r1], #0x04 736 strt r6, [r1], #0x04 737 strt r7, [r1], #0x04 738.Lcopyout_bad3: 739 subs r2, r2, #0x10 740 bge .Lcopyout_bad3_loop16 741 742 adds r2, r2, #0x10 743 ldmfdeq sp!, {r4-r7} 744 RETeq /* Return now if done */ 745 subs r2, r2, #0x04 746 sublt r0, r0, #0x01 747 blt .Lcopyout_l4 748 749.Lcopyout_bad3_loop4: 750 mov r4, ip, lsr #24 751 ldr ip, [r0], #0x04 752 subs r2, r2, #0x04 753 orr r4, r4, ip, lsl #8 754 strt r4, [r1], #0x04 755 bge .Lcopyout_bad3_loop4 756 sub r0, r0, #0x01 757 758.Lcopyout_l4: 759 ldmfd sp!, {r4-r7} 760 mov r3, #0x00 761 adds r2, r2, #0x04 762 RETeq 763.Lcopyout_l4_2: 764 rsbs r2, r2, #0x03 765 addne pc, pc, r2, lsl #3 766 nop 767 ldrb ip, [r0], #0x01 768 strbt ip, [r1], #0x01 769 ldrb ip, [r0], #0x01 770 strbt ip, [r1], #0x01 771 ldrb ip, [r0] 772 strbt ip, [r1] 773 RET 774END(copyout) 775 776