1/* $OpenBSD: bcopyinout.S,v 1.11 2023/01/31 15:18:54 deraadt Exp $ */ 2/* $NetBSD: bcopyinout.S,v 1.13 2003/10/31 16:54:05 scw Exp $ */ 3 4/* 5 * Copyright (c) 2002 Wasabi Systems, Inc. 6 * All rights reserved. 7 * 8 * Written by Allen Briggs for Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include "assym.h" 40 41#include <sys/errno.h> 42 43#include <machine/asm.h> 44#include <arm/sysreg.h> 45 46 .text 47 .align 2 48 49#ifdef MULTIPROCESSOR 50.Lcpu_info: 51 .word cpu_info 52#else 53.Lcpu_info_primary: 54 .word cpu_info_primary 55#endif 56 57#define SAVE_REGS stmfd sp!, {r4-r11} 58#define RESTORE_REGS ldmfd sp!, {r4-r11} 59 60/* 61 * r0 = user space address 62 * r1 = kernel space address 63 * r2 = length 64 * 65 * Copies bytes from user space to kernel space 66 * 67 * We save/restore r4-r11: 68 * r4-r11 are scratch 69 */ 70ENTRY(_copyin) 71 /* Quick exit if length is zero */ 72 teq r2, #0 73 moveq r0, #0 74 moveq pc, lr 75 76 SAVE_REGS 77 78 /* Get curcpu from TPIDRPRW. */ 79 mrc CP15_TPIDRPRW(r4) 80 ldr r4, [r4, #CI_CURPCB] 81 82 ldr r5, [r4, #PCB_ONFAULT] 83 adr r3, .Lcopyfault 84 str r3, [r4, #PCB_ONFAULT] 85 86 /* 87 * If not too many bytes, take the slow path. 88 */ 89 cmp r2, #0x08 90 blt .Licleanup 91 92 /* 93 * Align destination to word boundary. 94 */ 95 and r6, r1, #0x3 96 ldr pc, [pc, r6, lsl #2] 97 b .Lialend 98 .word .Lialend 99 .word .Lial3 100 .word .Lial2 101 .word .Lial1 102.Lial3: ldrbt r6, [r0], #1 103 sub r2, r2, #1 104 strb r6, [r1], #1 105.Lial2: ldrbt r7, [r0], #1 106 sub r2, r2, #1 107 strb r7, [r1], #1 108.Lial1: ldrbt r6, [r0], #1 109 sub r2, r2, #1 110 strb r6, [r1], #1 111.Lialend: 112 113 /* 114 * If few bytes left, finish slow. 115 */ 116 cmp r2, #0x08 117 blt .Licleanup 118 119 /* 120 * If source is not aligned, finish slow. 121 */ 122 ands r3, r0, #0x03 123 bne .Licleanup 124 125 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 126 blt .Licleanup8 127 128 /* 129 * Align destination to cacheline boundary. 130 * If source and destination are nicely aligned, this can be a big 131 * win. If not, it's still cheaper to copy in groups of 32 even if 132 * we don't get the nice cacheline alignment. 133 */ 134 and r6, r1, #0x1f 135 ldr pc, [pc, r6] 136 b .Licaligned 137 .word .Licaligned 138 .word .Lical28 139 .word .Lical24 140 .word .Lical20 141 .word .Lical16 142 .word .Lical12 143 .word .Lical8 144 .word .Lical4 145.Lical28:ldrt r6, [r0], #4 146 sub r2, r2, #4 147 str r6, [r1], #4 148.Lical24:ldrt r7, [r0], #4 149 sub r2, r2, #4 150 str r7, [r1], #4 151.Lical20:ldrt r6, [r0], #4 152 sub r2, r2, #4 153 str r6, [r1], #4 154.Lical16:ldrt r7, [r0], #4 155 sub r2, r2, #4 156 str r7, [r1], #4 157.Lical12:ldrt r6, [r0], #4 158 sub r2, r2, #4 159 str r6, [r1], #4 160.Lical8:ldrt r7, [r0], #4 161 sub r2, r2, #4 162 str r7, [r1], #4 163.Lical4:ldrt r6, [r0], #4 164 sub r2, r2, #4 165 str r6, [r1], #4 166 167 /* 168 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 169 * part of the code, and we may have knocked that down by as much 170 * as 0x1c getting aligned). 171 * 172 * This loop basically works out to: 173 * do { 174 * prefetch-next-cacheline(s) 175 * bytes -= 0x20; 176 * copy cacheline 177 * } while (bytes >= 0x40); 178 * bytes -= 0x20; 179 * copy cacheline 180 */ 181.Licaligned: 182 sub r2, r2, #0x20 183 184 /* Copy a cacheline */ 185 ldrt r10, [r0], #4 186 ldrt r11, [r0], #4 187 ldrt r6, [r0], #4 188 ldrt r7, [r0], #4 189 ldrt r8, [r0], #4 190 ldrt r9, [r0], #4 191 stmia r1!, {r10-r11} 192 ldrt r10, [r0], #4 193 ldrt r11, [r0], #4 194 stmia r1!, {r6-r11} 195 196 cmp r2, #0x40 197 bge .Licaligned 198 199 sub r2, r2, #0x20 200 201 /* Copy a cacheline */ 202 ldrt r10, [r0], #4 203 ldrt r11, [r0], #4 204 ldrt r6, [r0], #4 205 ldrt r7, [r0], #4 206 ldrt r8, [r0], #4 207 ldrt r9, [r0], #4 208 stmia r1!, {r10-r11} 209 ldrt r10, [r0], #4 210 ldrt r11, [r0], #4 211 stmia r1!, {r6-r11} 212 213 cmp r2, #0x08 214 blt .Liprecleanup 215 216.Licleanup8: 217 ldrt r8, [r0], #4 218 ldrt r9, [r0], #4 219 sub r2, r2, #8 220 stmia r1!, {r8, r9} 221 cmp r2, #8 222 bge .Licleanup8 223 224.Liprecleanup: 225 /* 226 * If we're done, bail. 227 */ 228 cmp r2, #0 229 beq .Lout 230 231.Licleanup: 232 and r6, r2, #0x3 233 ldr pc, [pc, r6, lsl #2] 234 b .Licend 235 .word .Lic4 236 .word .Lic1 237 .word .Lic2 238 .word .Lic3 239.Lic4: ldrbt r6, [r0], #1 240 sub r2, r2, #1 241 strb r6, [r1], #1 242.Lic3: ldrbt r7, [r0], #1 243 sub r2, r2, #1 244 strb r7, [r1], #1 245.Lic2: ldrbt r6, [r0], #1 246 sub r2, r2, #1 247 strb r6, [r1], #1 248.Lic1: ldrbt r7, [r0], #1 249 subs r2, r2, #1 250 strb r7, [r1], #1 251.Licend: 252 bne .Licleanup 253 254.Liout: 255 mov r0, #0 256 257 str r5, [r4, #PCB_ONFAULT] 258 RESTORE_REGS 259 260 mov pc, lr 261 262.Lcopyfault: 263 str r5, [r4, #PCB_ONFAULT] 264 RESTORE_REGS 265 266 mov pc, lr 267 268/* 269 * r0 = user space address 270 * r1 = kernel space address 271 * 272 * Atomically copies a 32-bit word from user space to kernel space 273 * 274 * We save/restore r4-r11: 275 * r4-r11 are scratch 276 */ 277ENTRY(copyin32) 278 /* Quick exit if unaligned */ 279 tst r0, #0x3 280 movne r0, #EFAULT 281 movne pc, lr 282 283 SAVE_REGS 284 285 /* Get curcpu from TPIDRPRW. */ 286 mrc CP15_TPIDRPRW(r4) 287 ldr r4, [r4, #CI_CURPCB] 288 289 ldr r5, [r4, #PCB_ONFAULT] 290 adr r3, .Lcopyfault 291 str r3, [r4, #PCB_ONFAULT] 292 293 ldr r6, [r0] 294 str r6, [r1] 295 mov r0, #0 296 297 str r5, [r4, #PCB_ONFAULT] 298 RESTORE_REGS 299 300 mov pc, lr 301 302/* 303 * r0 = kernel space address 304 * r1 = user space address 305 * r2 = length 306 * 307 * Copies bytes from kernel space to user space 308 * 309 * We save/restore r4-r11: 310 * r4-r11 are scratch 311 */ 312ENTRY(copyout) 313 /* Quick exit if length is zero */ 314 teq r2, #0 315 moveq r0, #0 316 moveq pc, lr 317 318 SAVE_REGS 319 320 /* Get curcpu from TPIDRPRW. */ 321 mrc CP15_TPIDRPRW(r4) 322 ldr r4, [r4, #CI_CURPCB] 323 324 ldr r5, [r4, #PCB_ONFAULT] 325 adr r3, .Lcopyfault 326 str r3, [r4, #PCB_ONFAULT] 327 328 /* 329 * If not too many bytes, take the slow path. 330 */ 331 cmp r2, #0x08 332 blt .Lcleanup 333 334 /* 335 * Align destination to word boundary. 336 */ 337 and r6, r1, #0x3 338 ldr pc, [pc, r6, lsl #2] 339 b .Lalend 340 .word .Lalend 341 .word .Lal3 342 .word .Lal2 343 .word .Lal1 344.Lal3: ldrb r6, [r0], #1 345 sub r2, r2, #1 346 strbt r6, [r1], #1 347.Lal2: ldrb r7, [r0], #1 348 sub r2, r2, #1 349 strbt r7, [r1], #1 350.Lal1: ldrb r6, [r0], #1 351 sub r2, r2, #1 352 strbt r6, [r1], #1 353.Lalend: 354 355 /* 356 * If few bytes left, finish slow. 357 */ 358 cmp r2, #0x08 359 blt .Lcleanup 360 361 /* 362 * If source is not aligned, finish slow. 363 */ 364 ands r3, r0, #0x03 365 bne .Lcleanup 366 367 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 368 blt .Lcleanup8 369 370 /* 371 * Align source & destination to cacheline boundary. 372 */ 373 and r6, r1, #0x1f 374 ldr pc, [pc, r6] 375 b .Lcaligned 376 .word .Lcaligned 377 .word .Lcal28 378 .word .Lcal24 379 .word .Lcal20 380 .word .Lcal16 381 .word .Lcal12 382 .word .Lcal8 383 .word .Lcal4 384.Lcal28:ldr r6, [r0], #4 385 sub r2, r2, #4 386 strt r6, [r1], #4 387.Lcal24:ldr r7, [r0], #4 388 sub r2, r2, #4 389 strt r7, [r1], #4 390.Lcal20:ldr r6, [r0], #4 391 sub r2, r2, #4 392 strt r6, [r1], #4 393.Lcal16:ldr r7, [r0], #4 394 sub r2, r2, #4 395 strt r7, [r1], #4 396.Lcal12:ldr r6, [r0], #4 397 sub r2, r2, #4 398 strt r6, [r1], #4 399.Lcal8: ldr r7, [r0], #4 400 sub r2, r2, #4 401 strt r7, [r1], #4 402.Lcal4: ldr r6, [r0], #4 403 sub r2, r2, #4 404 strt r6, [r1], #4 405 406 /* 407 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 408 * part of the code, and we may have knocked that down by as much 409 * as 0x1c getting aligned). 410 * 411 * This loop basically works out to: 412 * do { 413 * prefetch-next-cacheline(s) 414 * bytes -= 0x20; 415 * copy cacheline 416 * } while (bytes >= 0x40); 417 * bytes -= 0x20; 418 * copy cacheline 419 */ 420.Lcaligned: 421 sub r2, r2, #0x20 422 423 /* Copy a cacheline */ 424 ldmia r0!, {r6-r11} 425 strt r6, [r1], #4 426 strt r7, [r1], #4 427 ldmia r0!, {r6-r7} 428 strt r8, [r1], #4 429 strt r9, [r1], #4 430 strt r10, [r1], #4 431 strt r11, [r1], #4 432 strt r6, [r1], #4 433 strt r7, [r1], #4 434 435 cmp r2, #0x40 436 bge .Lcaligned 437 438 sub r2, r2, #0x20 439 440 /* Copy a cacheline */ 441 ldmia r0!, {r6-r11} 442 strt r6, [r1], #4 443 strt r7, [r1], #4 444 ldmia r0!, {r6-r7} 445 strt r8, [r1], #4 446 strt r9, [r1], #4 447 strt r10, [r1], #4 448 strt r11, [r1], #4 449 strt r6, [r1], #4 450 strt r7, [r1], #4 451 452 cmp r2, #0x08 453 blt .Lprecleanup 454 455.Lcleanup8: 456 ldmia r0!, {r8-r9} 457 sub r2, r2, #8 458 strt r8, [r1], #4 459 strt r9, [r1], #4 460 cmp r2, #8 461 bge .Lcleanup8 462 463.Lprecleanup: 464 /* 465 * If we're done, bail. 466 */ 467 cmp r2, #0 468 beq .Lout 469 470.Lcleanup: 471 and r6, r2, #0x3 472 ldr pc, [pc, r6, lsl #2] 473 b .Lcend 474 .word .Lc4 475 .word .Lc1 476 .word .Lc2 477 .word .Lc3 478.Lc4: ldrb r6, [r0], #1 479 sub r2, r2, #1 480 strbt r6, [r1], #1 481.Lc3: ldrb r7, [r0], #1 482 sub r2, r2, #1 483 strbt r7, [r1], #1 484.Lc2: ldrb r6, [r0], #1 485 sub r2, r2, #1 486 strbt r6, [r1], #1 487.Lc1: ldrb r7, [r0], #1 488 subs r2, r2, #1 489 strbt r7, [r1], #1 490.Lcend: 491 bne .Lcleanup 492 493.Lout: 494 mov r0, #0 495 496 str r5, [r4, #PCB_ONFAULT] 497 RESTORE_REGS 498 499 mov pc, lr 500 501/* 502 * r0 = kernel space source address 503 * r1 = kernel space destination address 504 * r2 = length 505 * 506 * Copies bytes from kernel space to kernel space, aborting on page fault 507 * 508 * Copy of copyout, but without the ldrt/strt instructions. 509 */ 510 511ENTRY(kcopy) 512 /* Quick exit if length is zero */ 513 teq r2, #0 514 moveq r0, #0 515 moveq pc, lr 516 517 SAVE_REGS 518 519 /* Get curcpu from TPIDRPRW. */ 520 mrc CP15_TPIDRPRW(r4) 521 ldr r4, [r4, #CI_CURPCB] 522 523 ldr r5, [r4, #PCB_ONFAULT] 524 adr r3, .Lcopyfault 525 str r3, [r4, #PCB_ONFAULT] 526 527 /* 528 * If not too many bytes, take the slow path. 529 */ 530 cmp r2, #0x08 531 blt .Lkcleanup 532 533 /* 534 * Align destination to word boundary. 535 */ 536 and r6, r1, #0x3 537 ldr pc, [pc, r6, lsl #2] 538 b .Lkalend 539 .word .Lkalend 540 .word .Lkal3 541 .word .Lkal2 542 .word .Lkal1 543.Lkal3: ldrb r6, [r0], #1 544 sub r2, r2, #1 545 strb r6, [r1], #1 546.Lkal2: ldrb r7, [r0], #1 547 sub r2, r2, #1 548 strb r7, [r1], #1 549.Lkal1: ldrb r6, [r0], #1 550 sub r2, r2, #1 551 strb r6, [r1], #1 552.Lkalend: 553 554 /* 555 * If few bytes left, finish slow. 556 */ 557 cmp r2, #0x08 558 blt .Lkcleanup 559 560 /* 561 * If source is not aligned, finish slow. 562 */ 563 ands r3, r0, #0x03 564 bne .Lkcleanup 565 566 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 567 blt .Lkcleanup8 568 569 /* 570 * Align source & destination to cacheline boundary. 571 */ 572 and r6, r1, #0x1f 573 ldr pc, [pc, r6] 574 b .Lkcaligned 575 .word .Lkcaligned 576 .word .Lkcal28 577 .word .Lkcal24 578 .word .Lkcal20 579 .word .Lkcal16 580 .word .Lkcal12 581 .word .Lkcal8 582 .word .Lkcal4 583.Lkcal28:ldr r6, [r0], #4 584 sub r2, r2, #4 585 str r6, [r1], #4 586.Lkcal24:ldr r7, [r0], #4 587 sub r2, r2, #4 588 str r7, [r1], #4 589.Lkcal20:ldr r6, [r0], #4 590 sub r2, r2, #4 591 str r6, [r1], #4 592.Lkcal16:ldr r7, [r0], #4 593 sub r2, r2, #4 594 str r7, [r1], #4 595.Lkcal12:ldr r6, [r0], #4 596 sub r2, r2, #4 597 str r6, [r1], #4 598.Lkcal8:ldr r7, [r0], #4 599 sub r2, r2, #4 600 str r7, [r1], #4 601.Lkcal4:ldr r6, [r0], #4 602 sub r2, r2, #4 603 str r6, [r1], #4 604 605 /* 606 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 607 * part of the code, and we may have knocked that down by as much 608 * as 0x1c getting aligned). 609 * 610 * This loop basically works out to: 611 * do { 612 * prefetch-next-cacheline(s) 613 * bytes -= 0x20; 614 * copy cacheline 615 * } while (bytes >= 0x40); 616 * bytes -= 0x20; 617 * copy cacheline 618 */ 619.Lkcaligned: 620 sub r2, r2, #0x20 621 622 /* Copy a cacheline */ 623 ldmia r0!, {r6-r11} 624 stmia r1!, {r6, r7} 625 ldmia r0!, {r6, r7} 626 stmia r1!, {r8-r11} 627 stmia r1!, {r6, r7} 628 629 cmp r2, #0x40 630 bge .Lkcaligned 631 632 sub r2, r2, #0x20 633 634 /* Copy a cacheline */ 635 ldmia r0!, {r6-r11} 636 stmia r1!, {r6-r7} 637 ldmia r0!, {r6-r7} 638 stmia r1!, {r8-r11} 639 stmia r1!, {r6-r7} 640 641 cmp r2, #0x08 642 blt .Lkprecleanup 643 644.Lkcleanup8: 645 ldmia r0!, {r8-r9} 646 sub r2, r2, #8 647 stmia r1!, {r8-r9} 648 cmp r2, #8 649 bge .Lkcleanup8 650 651.Lkprecleanup: 652 /* 653 * If we're done, bail. 654 */ 655 cmp r2, #0 656 beq .Lkout 657 658.Lkcleanup: 659 and r6, r2, #0x3 660 ldr pc, [pc, r6, lsl #2] 661 b .Lkcend 662 .word .Lkc4 663 .word .Lkc1 664 .word .Lkc2 665 .word .Lkc3 666.Lkc4: ldrb r6, [r0], #1 667 sub r2, r2, #1 668 strb r6, [r1], #1 669.Lkc3: ldrb r7, [r0], #1 670 sub r2, r2, #1 671 strb r7, [r1], #1 672.Lkc2: ldrb r6, [r0], #1 673 sub r2, r2, #1 674 strb r6, [r1], #1 675.Lkc1: ldrb r7, [r0], #1 676 subs r2, r2, #1 677 strb r7, [r1], #1 678.Lkcend: 679 bne .Lkcleanup 680 681.Lkout: 682 mov r0, #0 683 684 str r5, [r4, #PCB_ONFAULT] 685 RESTORE_REGS 686 687 mov pc, lr 688 689/* 690 * int badaddr_read_1(const uint8_t *src, uint8_t *dest) 691 * 692 * Copies a single 8-bit value from src to dest, returning 0 on success, 693 * else EFAULT if a page fault occurred. 694 */ 695ENTRY(badaddr_read_1) 696 /* Get curcpu from TPIDRPRW. */ 697 mrc CP15_TPIDRPRW(r2) 698 ldr r2, [r2, #CI_CURPCB] 699 ldr ip, [r2, #PCB_ONFAULT] 700 adr r3, 1f 701 str r3, [r2, #PCB_ONFAULT] 702 nop 703 nop 704 nop 705 ldrb r3, [r0] 706 nop 707 nop 708 nop 709 strb r3, [r1] 710 mov r0, #0 /* No fault */ 7111: str ip, [r2, #PCB_ONFAULT] 712 mov pc, lr 713 714/* 715 * int badaddr_read_2(const uint16_t *src, uint16_t *dest) 716 * 717 * Copies a single 16-bit value from src to dest, returning 0 on success, 718 * else EFAULT if a page fault occurred. 719 */ 720ENTRY(badaddr_read_2) 721 /* Get curcpu from TPIDRPRW. */ 722 mrc CP15_TPIDRPRW(r2) 723 ldr r2, [r2, #CI_CURPCB] 724 ldr ip, [r2, #PCB_ONFAULT] 725 adr r3, 1f 726 str r3, [r2, #PCB_ONFAULT] 727 nop 728 nop 729 nop 730 ldrh r3, [r0] 731 nop 732 nop 733 nop 734 strh r3, [r1] 735 mov r0, #0 /* No fault */ 7361: str ip, [r2, #PCB_ONFAULT] 737 mov pc, lr 738 739/* 740 * int badaddr_read_4(const uint32_t *src, uint32_t *dest) 741 * 742 * Copies a single 32-bit value from src to dest, returning 0 on success, 743 * else EFAULT if a page fault occurred. 744 */ 745ENTRY(badaddr_read_4) 746 /* Get curcpu from TPIDRPRW. */ 747 mrc CP15_TPIDRPRW(r2) 748 ldr r2, [r2, #CI_CURPCB] 749 ldr ip, [r2, #PCB_ONFAULT] 750 adr r3, 1f 751 str r3, [r2, #PCB_ONFAULT] 752 nop 753 nop 754 nop 755 ldr r3, [r0] 756 nop 757 nop 758 nop 759 str r3, [r1] 760 mov r0, #0 /* No fault */ 7611: str ip, [r2, #PCB_ONFAULT] 762 mov pc, lr 763