1/* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */ 2 3/*- 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Allen Briggs for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 39#include "assym.s" 40 41#include <machine/asm.h> 42#include <sys/errno.h> 43 44.L_arm_memcpy: 45 .word _C_LABEL(_arm_memcpy) 46.L_min_memcpy_size: 47 .word _C_LABEL(_min_memcpy_size) 48 49__FBSDID("$FreeBSD$"); 50#ifdef _ARM_ARCH_5E 51#include <arm/arm/bcopyinout_xscale.S> 52#else 53 54 .text 55 .align 2 56 57#if __ARM_ARCH >= 6 58#define GET_PCB(tmp) \ 59 mrc p15, 0, tmp, c13, c0, 4; \ 60 add tmp, tmp, #(TD_PCB) 61#else 62.Lcurpcb: 63 .word _C_LABEL(__pcpu) + PC_CURPCB 64 65#define GET_PCB(tmp) \ 66 ldr tmp, .Lcurpcb 67#endif 68 69 70#define SAVE_REGS stmfd sp!, {r4-r11}; _SAVE({r4-r11}) 71#define RESTORE_REGS ldmfd sp!, {r4-r11} 72 73#if defined(_ARM_ARCH_5E) 74#define HELLOCPP # 75#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] 76#else 77#define PREFETCH(rx,o) 78#endif 79 80/* 81 * r0 = user space address 82 * r1 = kernel space address 83 * r2 = length 84 * 85 * Copies bytes from user space to kernel space 86 * 87 * We save/restore r4-r11: 88 * r4-r11 are scratch 89 */ 90ENTRY(copyin) 91 /* Quick exit if length is zero */ 92 teq r2, #0 93 moveq r0, #0 94 RETeq 95 96 adds r3, r0, r2 97 movcs r0, #EFAULT 98 RETc(cs) 99 100 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 101 cmp r3, r12 102 movcs r0, #EFAULT 103 RETc(cs) 104 105 ldr r3, .L_arm_memcpy 106 ldr r3, [r3] 107 cmp r3, #0 108 beq .Lnormal 109 ldr r3, .L_min_memcpy_size 110 ldr r3, [r3] 111 cmp r2, r3 112 blt .Lnormal 113 stmfd sp!, {r0-r2, r4, lr} 114 mov r3, r0 115 mov r0, r1 116 mov r1, r3 117 mov r3, #2 /* SRC_IS_USER */ 118 ldr r4, .L_arm_memcpy 119 mov lr, pc 120 ldr pc, [r4] 121 cmp r0, #0 122 ldmfd sp!, {r0-r2, r4, lr} 123 moveq r0, #0 124 RETeq 125 126.Lnormal: 127 SAVE_REGS 128 GET_PCB(r4) 129 ldr r4, [r4] 130 131 132 ldr r5, [r4, #PCB_ONFAULT] 133 adr r3, .Lcopyfault 134 str r3, [r4, #PCB_ONFAULT] 135 136 PREFETCH(r0, 0) 137 PREFETCH(r1, 0) 138 139 /* 140 * If not too many bytes, take the slow path. 141 */ 142 cmp r2, #0x08 143 blt .Licleanup 144 145 /* 146 * Align destination to word boundary. 147 */ 148 and r6, r1, #0x3 149 ldr pc, [pc, r6, lsl #2] 150 b .Lialend 151 .word .Lialend 152 .word .Lial3 153 .word .Lial2 154 .word .Lial1 155.Lial3: ldrbt r6, [r0], #1 156 sub r2, r2, #1 157 strb r6, [r1], #1 158.Lial2: ldrbt r7, [r0], #1 159 sub r2, r2, #1 160 strb r7, [r1], #1 161.Lial1: ldrbt r6, [r0], #1 162 sub r2, r2, #1 163 strb r6, [r1], #1 164.Lialend: 165 166 /* 167 * If few bytes left, finish slow. 168 */ 169 cmp r2, #0x08 170 blt .Licleanup 171 172 /* 173 * If source is not aligned, finish slow. 174 */ 175 ands r3, r0, #0x03 176 bne .Licleanup 177 178 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 179 blt .Licleanup8 180 181 /* 182 * Align destination to cacheline boundary. 183 * If source and destination are nicely aligned, this can be a big 184 * win. If not, it's still cheaper to copy in groups of 32 even if 185 * we don't get the nice cacheline alignment. 186 */ 187 and r6, r1, #0x1f 188 ldr pc, [pc, r6] 189 b .Licaligned 190 .word .Licaligned 191 .word .Lical28 192 .word .Lical24 193 .word .Lical20 194 .word .Lical16 195 .word .Lical12 196 .word .Lical8 197 .word .Lical4 198.Lical28:ldrt r6, [r0], #4 199 sub r2, r2, #4 200 str r6, [r1], #4 201.Lical24:ldrt r7, [r0], #4 202 sub r2, r2, #4 203 str r7, [r1], #4 204.Lical20:ldrt r6, [r0], #4 205 sub r2, r2, #4 206 str r6, [r1], #4 207.Lical16:ldrt r7, [r0], #4 208 sub r2, r2, #4 209 str r7, [r1], #4 210.Lical12:ldrt r6, [r0], #4 211 sub r2, r2, #4 212 str r6, [r1], #4 213.Lical8:ldrt r7, [r0], #4 214 sub r2, r2, #4 215 str r7, [r1], #4 216.Lical4:ldrt r6, [r0], #4 217 sub r2, r2, #4 218 str r6, [r1], #4 219 220 /* 221 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 222 * part of the code, and we may have knocked that down by as much 223 * as 0x1c getting aligned). 224 * 225 * This loop basically works out to: 226 * do { 227 * prefetch-next-cacheline(s) 228 * bytes -= 0x20; 229 * copy cacheline 230 * } while (bytes >= 0x40); 231 * bytes -= 0x20; 232 * copy cacheline 233 */ 234.Licaligned: 235 PREFETCH(r0, 32) 236 PREFETCH(r1, 32) 237 238 sub r2, r2, #0x20 239 240 /* Copy a cacheline */ 241 ldrt r10, [r0], #4 242 ldrt r11, [r0], #4 243 ldrt r6, [r0], #4 244 ldrt r7, [r0], #4 245 ldrt r8, [r0], #4 246 ldrt r9, [r0], #4 247 stmia r1!, {r10-r11} 248 ldrt r10, [r0], #4 249 ldrt r11, [r0], #4 250 stmia r1!, {r6-r11} 251 252 cmp r2, #0x40 253 bge .Licaligned 254 255 sub r2, r2, #0x20 256 257 /* Copy a cacheline */ 258 ldrt r10, [r0], #4 259 ldrt r11, [r0], #4 260 ldrt r6, [r0], #4 261 ldrt r7, [r0], #4 262 ldrt r8, [r0], #4 263 ldrt r9, [r0], #4 264 stmia r1!, {r10-r11} 265 ldrt r10, [r0], #4 266 ldrt r11, [r0], #4 267 stmia r1!, {r6-r11} 268 269 cmp r2, #0x08 270 blt .Liprecleanup 271 272.Licleanup8: 273 ldrt r8, [r0], #4 274 ldrt r9, [r0], #4 275 sub r2, r2, #8 276 stmia r1!, {r8, r9} 277 cmp r2, #8 278 bge .Licleanup8 279 280.Liprecleanup: 281 /* 282 * If we're done, bail. 283 */ 284 cmp r2, #0 285 beq .Lout 286 287.Licleanup: 288 and r6, r2, #0x3 289 ldr pc, [pc, r6, lsl #2] 290 b .Licend 291 .word .Lic4 292 .word .Lic1 293 .word .Lic2 294 .word .Lic3 295.Lic4: ldrbt r6, [r0], #1 296 sub r2, r2, #1 297 strb r6, [r1], #1 298.Lic3: ldrbt r7, [r0], #1 299 sub r2, r2, #1 300 strb r7, [r1], #1 301.Lic2: ldrbt r6, [r0], #1 302 sub r2, r2, #1 303 strb r6, [r1], #1 304.Lic1: ldrbt r7, [r0], #1 305 subs r2, r2, #1 306 strb r7, [r1], #1 307.Licend: 308 bne .Licleanup 309 310.Liout: 311 mov r0, #0 312 313 str r5, [r4, #PCB_ONFAULT] 314 RESTORE_REGS 315 316 RET 317 318.Lcopyfault: 319 ldr r0, =EFAULT 320 str r5, [r4, #PCB_ONFAULT] 321 RESTORE_REGS 322 323 RET 324END(copyin) 325 326/* 327 * r0 = kernel space address 328 * r1 = user space address 329 * r2 = length 330 * 331 * Copies bytes from kernel space to user space 332 * 333 * We save/restore r4-r11: 334 * r4-r11 are scratch 335 */ 336 337ENTRY(copyout) 338 /* Quick exit if length is zero */ 339 teq r2, #0 340 moveq r0, #0 341 RETeq 342 343 adds r3, r1, r2 344 movcs r0, #EFAULT 345 RETc(cs) 346 347 ldr r12, =(VM_MAXUSER_ADDRESS + 1) 348 cmp r3, r12 349 movcs r0, #EFAULT 350 RETc(cs) 351 352 ldr r3, .L_arm_memcpy 353 ldr r3, [r3] 354 cmp r3, #0 355 beq .Lnormale 356 ldr r3, .L_min_memcpy_size 357 ldr r3, [r3] 358 cmp r2, r3 359 blt .Lnormale 360 stmfd sp!, {r0-r2, r4, lr} 361 _SAVE({r0-r2, r4, lr}) 362 mov r3, r0 363 mov r0, r1 364 mov r1, r3 365 mov r3, #1 /* DST_IS_USER */ 366 ldr r4, .L_arm_memcpy 367 mov lr, pc 368 ldr pc, [r4] 369 cmp r0, #0 370 ldmfd sp!, {r0-r2, r4, lr} 371 moveq r0, #0 372 RETeq 373 374.Lnormale: 375 SAVE_REGS 376 GET_PCB(r4) 377 ldr r4, [r4] 378 379 ldr r5, [r4, #PCB_ONFAULT] 380 adr r3, .Lcopyfault 381 str r3, [r4, #PCB_ONFAULT] 382 383 PREFETCH(r0, 0) 384 PREFETCH(r1, 0) 385 386 /* 387 * If not too many bytes, take the slow path. 388 */ 389 cmp r2, #0x08 390 blt .Lcleanup 391 392 /* 393 * Align destination to word boundary. 394 */ 395 and r6, r1, #0x3 396 ldr pc, [pc, r6, lsl #2] 397 b .Lalend 398 .word .Lalend 399 .word .Lal3 400 .word .Lal2 401 .word .Lal1 402.Lal3: ldrb r6, [r0], #1 403 sub r2, r2, #1 404 strbt r6, [r1], #1 405.Lal2: ldrb r7, [r0], #1 406 sub r2, r2, #1 407 strbt r7, [r1], #1 408.Lal1: ldrb r6, [r0], #1 409 sub r2, r2, #1 410 strbt r6, [r1], #1 411.Lalend: 412 413 /* 414 * If few bytes left, finish slow. 415 */ 416 cmp r2, #0x08 417 blt .Lcleanup 418 419 /* 420 * If source is not aligned, finish slow. 421 */ 422 ands r3, r0, #0x03 423 bne .Lcleanup 424 425 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ 426 blt .Lcleanup8 427 428 /* 429 * Align source & destination to cacheline boundary. 430 */ 431 and r6, r1, #0x1f 432 ldr pc, [pc, r6] 433 b .Lcaligned 434 .word .Lcaligned 435 .word .Lcal28 436 .word .Lcal24 437 .word .Lcal20 438 .word .Lcal16 439 .word .Lcal12 440 .word .Lcal8 441 .word .Lcal4 442.Lcal28:ldr r6, [r0], #4 443 sub r2, r2, #4 444 strt r6, [r1], #4 445.Lcal24:ldr r7, [r0], #4 446 sub r2, r2, #4 447 strt r7, [r1], #4 448.Lcal20:ldr r6, [r0], #4 449 sub r2, r2, #4 450 strt r6, [r1], #4 451.Lcal16:ldr r7, [r0], #4 452 sub r2, r2, #4 453 strt r7, [r1], #4 454.Lcal12:ldr r6, [r0], #4 455 sub r2, r2, #4 456 strt r6, [r1], #4 457.Lcal8: ldr r7, [r0], #4 458 sub r2, r2, #4 459 strt r7, [r1], #4 460.Lcal4: ldr r6, [r0], #4 461 sub r2, r2, #4 462 strt r6, [r1], #4 463 464 /* 465 * We start with > 0x40 bytes to copy (>= 0x60 got us into this 466 * part of the code, and we may have knocked that down by as much 467 * as 0x1c getting aligned). 468 * 469 * This loop basically works out to: 470 * do { 471 * prefetch-next-cacheline(s) 472 * bytes -= 0x20; 473 * copy cacheline 474 * } while (bytes >= 0x40); 475 * bytes -= 0x20; 476 * copy cacheline 477 */ 478.Lcaligned: 479 PREFETCH(r0, 32) 480 PREFETCH(r1, 32) 481 482 sub r2, r2, #0x20 483 484 /* Copy a cacheline */ 485 ldmia r0!, {r6-r11} 486 strt r6, [r1], #4 487 strt r7, [r1], #4 488 ldmia r0!, {r6-r7} 489 strt r8, [r1], #4 490 strt r9, [r1], #4 491 strt r10, [r1], #4 492 strt r11, [r1], #4 493 strt r6, [r1], #4 494 strt r7, [r1], #4 495 496 cmp r2, #0x40 497 bge .Lcaligned 498 499 sub r2, r2, #0x20 500 501 /* Copy a cacheline */ 502 ldmia r0!, {r6-r11} 503 strt r6, [r1], #4 504 strt r7, [r1], #4 505 ldmia r0!, {r6-r7} 506 strt r8, [r1], #4 507 strt r9, [r1], #4 508 strt r10, [r1], #4 509 strt r11, [r1], #4 510 strt r6, [r1], #4 511 strt r7, [r1], #4 512 513 cmp r2, #0x08 514 blt .Lprecleanup 515 516.Lcleanup8: 517 ldmia r0!, {r8-r9} 518 sub r2, r2, #8 519 strt r8, [r1], #4 520 strt r9, [r1], #4 521 cmp r2, #8 522 bge .Lcleanup8 523 524.Lprecleanup: 525 /* 526 * If we're done, bail. 527 */ 528 cmp r2, #0 529 beq .Lout 530 531.Lcleanup: 532 and r6, r2, #0x3 533 ldr pc, [pc, r6, lsl #2] 534 b .Lcend 535 .word .Lc4 536 .word .Lc1 537 .word .Lc2 538 .word .Lc3 539.Lc4: ldrb r6, [r0], #1 540 sub r2, r2, #1 541 strbt r6, [r1], #1 542.Lc3: ldrb r7, [r0], #1 543 sub r2, r2, #1 544 strbt r7, [r1], #1 545.Lc2: ldrb r6, [r0], #1 546 sub r2, r2, #1 547 strbt r6, [r1], #1 548.Lc1: ldrb r7, [r0], #1 549 subs r2, r2, #1 550 strbt r7, [r1], #1 551.Lcend: 552 bne .Lcleanup 553 554.Lout: 555 mov r0, #0 556 557 str r5, [r4, #PCB_ONFAULT] 558 RESTORE_REGS 559 560 RET 561END(copyout) 562#endif 563 564/* 565 * int badaddr_read_1(const uint8_t *src, uint8_t *dest) 566 * 567 * Copies a single 8-bit value from src to dest, returning 0 on success, 568 * else EFAULT if a page fault occurred. 569 */ 570ENTRY(badaddr_read_1) 571 GET_PCB(r2) 572 ldr r2, [r2] 573 574 ldr ip, [r2, #PCB_ONFAULT] 575 adr r3, 1f 576 str r3, [r2, #PCB_ONFAULT] 577 nop 578 nop 579 nop 580 ldrb r3, [r0] 581 nop 582 nop 583 nop 584 strb r3, [r1] 585 mov r0, #0 /* No fault */ 5861: str ip, [r2, #PCB_ONFAULT] 587 RET 588END(badaddr_read_1) 589 590/* 591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest) 592 * 593 * Copies a single 16-bit value from src to dest, returning 0 on success, 594 * else EFAULT if a page fault occurred. 595 */ 596ENTRY(badaddr_read_2) 597 GET_PCB(r2) 598 ldr r2, [r2] 599 600 ldr ip, [r2, #PCB_ONFAULT] 601 adr r3, 1f 602 str r3, [r2, #PCB_ONFAULT] 603 nop 604 nop 605 nop 606 ldrh r3, [r0] 607 nop 608 nop 609 nop 610 strh r3, [r1] 611 mov r0, #0 /* No fault */ 6121: str ip, [r2, #PCB_ONFAULT] 613 RET 614END(badaddr_read_2) 615 616/* 617 * int badaddr_read_4(const uint32_t *src, uint32_t *dest) 618 * 619 * Copies a single 32-bit value from src to dest, returning 0 on success, 620 * else EFAULT if a page fault occurred. 621 */ 622ENTRY(badaddr_read_4) 623 GET_PCB(r2) 624 ldr r2, [r2] 625 626 ldr ip, [r2, #PCB_ONFAULT] 627 adr r3, 1f 628 str r3, [r2, #PCB_ONFAULT] 629 nop 630 nop 631 nop 632 ldr r3, [r0] 633 nop 634 nop 635 nop 636 str r3, [r1] 637 mov r0, #0 /* No fault */ 6381: str ip, [r2, #PCB_ONFAULT] 639 RET 640END(badaddr_read_4) 641 642