1/* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */ 2 3/*- 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Allen Briggs for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 39#include "assym.s" 40 41#include <machine/asm.h> 42 43.L_arm_memcpy: 44 .word _C_LABEL(_arm_memcpy) 45.L_min_memcpy_size: 46 .word _C_LABEL(_min_memcpy_size) 47 48__FBSDID("$FreeBSD$"); 49#ifdef _ARM_ARCH_5E 50#include <arm/arm/bcopyinout_xscale.S> 51#else 52 53 .text 54 .align 0 55 56#ifdef MULTIPROCESSOR 57.Lcpu_info: 58 .word _C_LABEL(cpu_info) 59#else 60.Lcurpcb: 61 .word _C_LABEL(__pcpu) + PC_CURPCB 62#endif 63 64#define SAVE_REGS stmfd sp!, {r4-r11} 65#define RESTORE_REGS ldmfd sp!, {r4-r11} 66 67#if defined(_ARM_ARCH_5E) 68#define HELLOCPP # 69#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] 70#else 71#define PREFETCH(rx,o) 72#endif 73 74/* 75 * r0 = user space address 76 * r1 = kernel space address 77 * r2 = length 78 * 79 * Copies bytes from user space to kernel space 80 * 81 * We save/restore r4-r11: 82 * r4-r11 are scratch 83 */ 84ENTRY(copyin) 85 /* Quick exit if length is zero */ 86 teq r2, #0 87 moveq r0, #0 88 RETeq 89 90 ldr r3, .L_arm_memcpy 91 ldr r3, [r3] 92 cmp r3, #0 93 beq .Lnormal 94 ldr r3, .L_min_memcpy_size 95 ldr r3, [r3] 96 cmp r2, r3 97 blt .Lnormal 98 stmfd sp!, {r0-r2, r4, lr} 99 mov r3, r0 100 mov r0, r1 101 mov r1, r3 102 mov r3, #2 /* SRC_IS_USER */ 103 ldr r4, .L_arm_memcpy 104 mov lr, pc 105 ldr pc, [r4] 106 cmp r0, #0 107 ldmfd sp!, {r0-r2, r4, lr} 108 moveq r0, #0 109 RETeq 110 111.Lnormal: 112 SAVE_REGS 113#ifdef MULTIPROCESSOR 114 /* XXX Probably not appropriate for non-Hydra SMPs */ 115 stmfd sp!, {r0-r2, r14} 116 bl _C_LABEL(cpu_number) 117 ldr r4, .Lcpu_info 118 ldr r4, [r4, r0, lsl #2] 119 ldr r4, [r4, #CI_CURPCB] 120 ldmfd sp!, {r0-r2, r14} 121#else 122 ldr r4, .Lcurpcb 123 ldr r4, [r4] 124#endif 125 126 ldr r5, [r4, #PCB_ONFAULT] 127 adr r3, .Lcopyfault 128 str r3, [r4, #PCB_ONFAULT] 129 130 PREFETCH(r0, 0) 131 PREFETCH(r1, 0) 132 133 /* 134 * If not too many bytes, take the slow path. 135 */ 136 cmp r2, #0x08 137 blt .Licleanup 138 139 /* 140 * Align destination to word boundary. 141 */ 142 and r6, r1, #0x3 143 ldr pc, [pc, r6, lsl #2] 144 b .Lialend 145 .word .Lialend 146 .word .Lial3 147 .word .Lial2 148 .word .Lial1 149.Lial3: ldrbt r6, [r0], #1 150 sub r2, r2, #1 151 strb r6, [r1], #1 152.Lial2: ldrbt r7, [r0], #1 153 sub r2, r2, #1 154 strb r7, [r1], #1 155.Lial1: ldrbt r6, [r0], #1 156 sub r2, r2, #1 157 strb r6, [r1], #1 158.Lialend: 159 160 /* 161 * If few bytes left, finish slow. 162 */ 163 cmp r2, #0x08 164 blt .Licleanup 165 166 /* 167 * If source is not aligned, finish slow. 168 */ 169 ands r3, r0, #0x03 170 bne .Licleanup 171 172 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 173 blt .Licleanup8 174 175 /* 176 * Align destination to cacheline boundary. 177 * If source and destination are nicely aligned, this can be a big 178 * win. If not, it's still cheaper to copy in groups of 32 even if 179 * we don't get the nice cacheline alignment. 180 */ 181 and r6, r1, #0x1f 182 ldr pc, [pc, r6] 183 b .Licaligned 184 .word .Licaligned 185 .word .Lical28 186 .word .Lical24 187 .word .Lical20 188 .word .Lical16 189 .word .Lical12 190 .word .Lical8 191 .word .Lical4 192.Lical28:ldrt r6, [r0], #4 193 sub r2, r2, #4 194 str r6, [r1], #4 195.Lical24:ldrt r7, [r0], #4 196 sub r2, r2, #4 197 str r7, [r1], #4 198.Lical20:ldrt r6, [r0], #4 199 sub r2, r2, #4 200 str r6, [r1], #4 201.Lical16:ldrt r7, [r0], #4 202 sub r2, r2, #4 203 str r7, [r1], #4 204.Lical12:ldrt r6, [r0], #4 205 sub r2, r2, #4 206 str r6, [r1], #4 207.Lical8:ldrt r7, [r0], #4 208 sub r2, r2, #4 209 str r7, [r1], #4 210.Lical4:ldrt r6, [r0], #4 211 sub r2, r2, #4 212 str r6, [r1], #4 213 214 /* 215 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 216 * part of the code, and we may have knocked that down by as much 217 * as 0x1c getting aligned). 218 * 219 * This loop basically works out to: 220 * do { 221 * prefetch-next-cacheline(s) 222 * bytes -= 0x20; 223 * copy cacheline 224 * } while (bytes >= 0x40); 225 * bytes -= 0x20; 226 * copy cacheline 227 */ 228.Licaligned: 229 PREFETCH(r0, 32) 230 PREFETCH(r1, 32) 231 232 sub r2, r2, #0x20 233 234 /* Copy a cacheline */ 235 ldrt r10, [r0], #4 236 ldrt r11, [r0], #4 237 ldrt r6, [r0], #4 238 ldrt r7, [r0], #4 239 ldrt r8, [r0], #4 240 ldrt r9, [r0], #4 241 stmia r1!, {r10-r11} 242 ldrt r10, [r0], #4 243 ldrt r11, [r0], #4 244 stmia r1!, {r6-r11} 245 246 cmp r2, #0x40 247 bge .Licaligned 248 249 sub r2, r2, #0x20 250 251 /* Copy a cacheline */ 252 ldrt r10, [r0], #4 253 ldrt r11, [r0], #4 254 ldrt r6, [r0], #4 255 ldrt r7, [r0], #4 256 ldrt r8, [r0], #4 257 ldrt r9, [r0], #4 258 stmia r1!, {r10-r11} 259 ldrt r10, [r0], #4 260 ldrt r11, [r0], #4 261 stmia r1!, {r6-r11} 262 263 cmp r2, #0x08 264 blt .Liprecleanup 265 266.Licleanup8: 267 ldrt r8, [r0], #4 268 ldrt r9, [r0], #4 269 sub r2, r2, #8 270 stmia r1!, {r8, r9} 271 cmp r2, #8 272 bge .Licleanup8 273 274.Liprecleanup: 275 /* 276 * If we're done, bail. 277 */ 278 cmp r2, #0 279 beq .Lout 280 281.Licleanup: 282 and r6, r2, #0x3 283 ldr pc, [pc, r6, lsl #2] 284 b .Licend 285 .word .Lic4 286 .word .Lic1 287 .word .Lic2 288 .word .Lic3 289.Lic4: ldrbt r6, [r0], #1 290 sub r2, r2, #1 291 strb r6, [r1], #1 292.Lic3: ldrbt r7, [r0], #1 293 sub r2, r2, #1 294 strb r7, [r1], #1 295.Lic2: ldrbt r6, [r0], #1 296 sub r2, r2, #1 297 strb r6, [r1], #1 298.Lic1: ldrbt r7, [r0], #1 299 subs r2, r2, #1 300 strb r7, [r1], #1 301.Licend: 302 bne .Licleanup 303 304.Liout: 305 mov r0, #0 306 307 str r5, [r4, #PCB_ONFAULT] 308 RESTORE_REGS 309 310 RET 311 312.Lcopyfault: 313 mov r0, #14 /* EFAULT */ 314 str r5, [r4, #PCB_ONFAULT] 315 RESTORE_REGS 316 317 RET 318 319/* 320 * r0 = kernel space address 321 * r1 = user space address 322 * r2 = length 323 * 324 * Copies bytes from kernel space to user space 325 * 326 * We save/restore r4-r11: 327 * r4-r11 are scratch 328 */ 329 330ENTRY(copyout) 331 /* Quick exit if length is zero */ 332 teq r2, #0 333 moveq r0, #0 334 RETeq 335 336 ldr r3, .L_arm_memcpy 337 ldr r3, [r3] 338 cmp r3, #0 339 beq .Lnormale 340 ldr r3, .L_min_memcpy_size 341 ldr r3, [r3] 342 cmp r2, r3 343 blt .Lnormale 344 stmfd sp!, {r0-r2, r4, lr} 345 mov r3, r0 346 mov r0, r1 347 mov r1, r3 348 mov r3, #1 /* DST_IS_USER */ 349 ldr r4, .L_arm_memcpy 350 mov lr, pc 351 ldr pc, [r4] 352 cmp r0, #0 353 ldmfd sp!, {r0-r2, r4, lr} 354 moveq r0, #0 355 RETeq 356 357.Lnormale: 358 SAVE_REGS 359#ifdef MULTIPROCESSOR 360 /* XXX Probably not appropriate for non-Hydra SMPs */ 361 stmfd sp!, {r0-r2, r14} 362 bl _C_LABEL(cpu_number) 363 ldr r4, .Lcpu_info 364 ldr r4, [r4, r0, lsl #2] 365 ldr r4, [r4, #CI_CURPCB] 366 ldmfd sp!, {r0-r2, r14} 367#else 368 ldr r4, .Lcurpcb 369 ldr r4, [r4] 370#endif 371 372 ldr r5, [r4, #PCB_ONFAULT] 373 adr r3, .Lcopyfault 374 str r3, [r4, #PCB_ONFAULT] 375 376 PREFETCH(r0, 0) 377 PREFETCH(r1, 0) 378 379 /* 380 * If not too many bytes, take the slow path. 381 */ 382 cmp r2, #0x08 383 blt .Lcleanup 384 385 /* 386 * Align destination to word boundary. 387 */ 388 and r6, r1, #0x3 389 ldr pc, [pc, r6, lsl #2] 390 b .Lalend 391 .word .Lalend 392 .word .Lal3 393 .word .Lal2 394 .word .Lal1 395.Lal3: ldrb r6, [r0], #1 396 sub r2, r2, #1 397 strbt r6, [r1], #1 398.Lal2: ldrb r7, [r0], #1 399 sub r2, r2, #1 400 strbt r7, [r1], #1 401.Lal1: ldrb r6, [r0], #1 402 sub r2, r2, #1 403 strbt r6, [r1], #1 404.Lalend: 405 406 /* 407 * If few bytes left, finish slow. 408 */ 409 cmp r2, #0x08 410 blt .Lcleanup 411 412 /* 413 * If source is not aligned, finish slow. 414 */ 415 ands r3, r0, #0x03 416 bne .Lcleanup 417 418 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 419 blt .Lcleanup8 420 421 /* 422 * Align source & destination to cacheline boundary. 423 */ 424 and r6, r1, #0x1f 425 ldr pc, [pc, r6] 426 b .Lcaligned 427 .word .Lcaligned 428 .word .Lcal28 429 .word .Lcal24 430 .word .Lcal20 431 .word .Lcal16 432 .word .Lcal12 433 .word .Lcal8 434 .word .Lcal4 435.Lcal28:ldr r6, [r0], #4 436 sub r2, r2, #4 437 strt r6, [r1], #4 438.Lcal24:ldr r7, [r0], #4 439 sub r2, r2, #4 440 strt r7, [r1], #4 441.Lcal20:ldr r6, [r0], #4 442 sub r2, r2, #4 443 strt r6, [r1], #4 444.Lcal16:ldr r7, [r0], #4 445 sub r2, r2, #4 446 strt r7, [r1], #4 447.Lcal12:ldr r6, [r0], #4 448 sub r2, r2, #4 449 strt r6, [r1], #4 450.Lcal8: ldr r7, [r0], #4 451 sub r2, r2, #4 452 strt r7, [r1], #4 453.Lcal4: ldr r6, [r0], #4 454 sub r2, r2, #4 455 strt r6, [r1], #4 456 457 /* 458 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 459 * part of the code, and we may have knocked that down by as much 460 * as 0x1c getting aligned). 461 * 462 * This loop basically works out to: 463 * do { 464 * prefetch-next-cacheline(s) 465 * bytes -= 0x20; 466 * copy cacheline 467 * } while (bytes >= 0x40); 468 * bytes -= 0x20; 469 * copy cacheline 470 */ 471.Lcaligned: 472 PREFETCH(r0, 32) 473 PREFETCH(r1, 32) 474 475 sub r2, r2, #0x20 476 477 /* Copy a cacheline */ 478 ldmia r0!, {r6-r11} 479 strt r6, [r1], #4 480 strt r7, [r1], #4 481 ldmia r0!, {r6-r7} 482 strt r8, [r1], #4 483 strt r9, [r1], #4 484 strt r10, [r1], #4 485 strt r11, [r1], #4 486 strt r6, [r1], #4 487 strt r7, [r1], #4 488 489 cmp r2, #0x40 490 bge .Lcaligned 491 492 sub r2, r2, #0x20 493 494 /* Copy a cacheline */ 495 ldmia r0!, {r6-r11} 496 strt r6, [r1], #4 497 strt r7, [r1], #4 498 ldmia r0!, {r6-r7} 499 strt r8, [r1], #4 500 strt r9, [r1], #4 501 strt r10, [r1], #4 502 strt r11, [r1], #4 503 strt r6, [r1], #4 504 strt r7, [r1], #4 505 506 cmp r2, #0x08 507 blt .Lprecleanup 508 509.Lcleanup8: 510 ldmia r0!, {r8-r9} 511 sub r2, r2, #8 512 strt r8, [r1], #4 513 strt r9, [r1], #4 514 cmp r2, #8 515 bge .Lcleanup8 516 517.Lprecleanup: 518 /* 519 * If we're done, bail. 520 */ 521 cmp r2, #0 522 beq .Lout 523 524.Lcleanup: 525 and r6, r2, #0x3 526 ldr pc, [pc, r6, lsl #2] 527 b .Lcend 528 .word .Lc4 529 .word .Lc1 530 .word .Lc2 531 .word .Lc3 532.Lc4: ldrb r6, [r0], #1 533 sub r2, r2, #1 534 strbt r6, [r1], #1 535.Lc3: ldrb r7, [r0], #1 536 sub r2, r2, #1 537 strbt r7, [r1], #1 538.Lc2: ldrb r6, [r0], #1 539 sub r2, r2, #1 540 strbt r6, [r1], #1 541.Lc1: ldrb r7, [r0], #1 542 subs r2, r2, #1 543 strbt r7, [r1], #1 544.Lcend: 545 bne .Lcleanup 546 547.Lout: 548 mov r0, #0 549 550 str r5, [r4, #PCB_ONFAULT] 551 RESTORE_REGS 552 553 RET 554#endif 555 556/* 557 * int badaddr_read_1(const uint8_t *src, uint8_t *dest) 558 * 559 * Copies a single 8-bit value from src to dest, returning 0 on success, 560 * else EFAULT if a page fault occurred. 561 */ 562ENTRY(badaddr_read_1) 563#ifdef MULTIPROCESSOR 564 /* XXX Probably not appropriate for non-Hydra SMPs */ 565 stmfd sp!, {r0-r1, r14} 566 bl _C_LABEL(cpu_number) 567 ldr r2, .Lcpu_info 568 ldr r2, [r2, r0, lsl #2] 569 ldr r2, [r2, #CI_CURPCB] 570 ldmfd sp!, {r0-r1, r14} 571#else 572 ldr r2, .Lcurpcb 573 ldr r2, [r2] 574#endif 575 ldr ip, [r2, #PCB_ONFAULT] 576 adr r3, 1f 577 str r3, [r2, #PCB_ONFAULT] 578 nop 579 nop 580 nop 581 ldrb r3, [r0] 582 nop 583 nop 584 nop 585 strb r3, [r1] 586 mov r0, #0 /* No fault */ 5871: str ip, [r2, #PCB_ONFAULT] 588 RET 589 590/* 591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest) 592 * 593 * Copies a single 16-bit value from src to dest, returning 0 on success, 594 * else EFAULT if a page fault occurred. 595 */ 596ENTRY(badaddr_read_2) 597#ifdef MULTIPROCESSOR 598 /* XXX Probably not appropriate for non-Hydra SMPs */ 599 stmfd sp!, {r0-r1, r14} 600 bl _C_LABEL(cpu_number) 601 ldr r2, .Lcpu_info 602 ldr r2, [r2, r0, lsl #2] 603 ldr r2, [r2, #CI_CURPCB] 604 ldmfd sp!, {r0-r1, r14} 605#else 606 ldr r2, .Lcurpcb 607 ldr r2, [r2] 608#endif 609 ldr ip, [r2, #PCB_ONFAULT] 610 adr r3, 1f 611 str r3, [r2, #PCB_ONFAULT] 612 nop 613 nop 614 nop 615 ldrh r3, [r0] 616 nop 617 nop 618 nop 619 strh r3, [r1] 620 mov r0, #0 /* No fault */ 6211: str ip, [r2, #PCB_ONFAULT] 622 RET 623 624/* 625 * int badaddr_read_4(const uint32_t *src, uint32_t *dest) 626 * 627 * Copies a single 32-bit value from src to dest, returning 0 on success, 628 * else EFAULT if a page fault occurred. 629 */ 630ENTRY(badaddr_read_4) 631#ifdef MULTIPROCESSOR 632 /* XXX Probably not appropriate for non-Hydra SMPs */ 633 stmfd sp!, {r0-r1, r14} 634 bl _C_LABEL(cpu_number) 635 ldr r2, .Lcpu_info 636 ldr r2, [r2, r0, lsl #2] 637 ldr r2, [r2, #CI_CURPCB] 638 ldmfd sp!, {r0-r1, r14} 639#else 640 ldr r2, .Lcurpcb 641 ldr r2, [r2] 642#endif 643 ldr ip, [r2, #PCB_ONFAULT] 644 adr r3, 1f 645 str r3, [r2, #PCB_ONFAULT] 646 nop 647 nop 648 nop 649 ldr r3, [r0] 650 nop 651 nop 652 nop 653 str r3, [r1] 654 mov r0, #0 /* No fault */ 6551: str ip, [r2, #PCB_ONFAULT] 656 RET 657 658