1/* $NetBSD: copy.S,v 1.18 2010/07/07 01:13:29 chs Exp $ */ 2 3/* 4 * Copyright (c) 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Frank van der Linden for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include "assym.h" 39 40#include <sys/errno.h> 41#include <sys/syscall.h> 42 43#include <machine/asm.h> 44#include <machine/frameasm.h> 45 46#define GET_CURPCB(reg) \ 47 movq CPUVAR(CURLWP), reg; \ 48 movq L_PCB(reg), reg 49 50/* 51 * These are arranged so that the abnormal case is a forwards 52 * conditional branch - which will be predicted not-taken by 53 * both Intel and AMD processors. 54 */ 55#define DEFERRED_SWITCH_CHECK \ 56 CHECK_DEFERRED_SWITCH ; \ 57 jnz 99f ; \ 58 98: 59 60#define DEFERRED_SWITCH_CALL \ 61 99: ; \ 62 call _C_LABEL(do_pmap_load) ; \ 63 jmp 98b 64 65/* 66 * The following primitives are to copy regions of memory. 67 * Label must be before all copy functions. 68 */ 69 .text 70 71x86_copyfunc_start: .globl x86_copyfunc_start 72 73/* 74 * Handle deferred pmap switch. We must re-enable preemption without 75 * making a function call, so that the program counter is visible to 76 * cpu_kpreempt_exit(). It can then know if it needs to restore the 77 * pmap on returning, because a preemption occurred within one of the 78 * copy functions. 79 */ 80ENTRY(do_pmap_load) 81 pushq %rbp 82 movq %rsp,%rbp 83 pushq %rdi 84 pushq %rsi 85 pushq %rdx 86 pushq %rcx 87 pushq %rbx 88 movq CPUVAR(CURLWP), %rbx 891: 90 incl L_NOPREEMPT(%rbx) 91 call _C_LABEL(pmap_load) 92 decl L_NOPREEMPT(%rbx) 93 jnz 2f 94 cmpl $0, L_DOPREEMPT(%rbx) 95 jz 2f 96 xorq %rdi, %rdi 97 call _C_LABEL(kpreempt) 982: 99 cmpl $0, CPUVAR(WANT_PMAPLOAD) 100 jnz 1b 101 popq %rbx 102 popq %rcx 103 popq %rdx 104 popq %rsi 105 popq %rdi 106 leaveq 107 ret 108 109/* 110 * int kcopy(const void *from, void *to, size_t len); 111 * Copy len bytes, abort on fault. 112 * 113 * Copy routines from and to userland, plus a few more. See the 114 * section 9 manpages for info. Some cases can be optimized more. 115 * 116 * I wonder if it's worthwhile to make these use SSE2 registers? 117 * (dsl) Not from info I've read from the AMD guides. 118 * 119 * Also note that the setup time for 'rep movs' is horrid - especially on P4 120 * netburst - but on my AMD X2 it manages one copy (read+write) per clock 121 * which can be achieved with a code loop, but is probably impossible to beat. 122 * Howver the use of 'rep movsb' for the final bytes should be killed. 123 * 124 * Newer Intel cpus have a much lower setup time, and may (someday) 125 * be ably to do cache-line size copies.... 126 */ 127 128ENTRY(kcopy) 129 xchgq %rdi,%rsi 130 movq %rdx,%rcx 131.Lkcopy_start: 132 movq %rdi,%rax 133 subq %rsi,%rax 134 cmpq %rcx,%rax # overlapping? 135 jb 1f 136 # nope, copy forward 137 shrq $3,%rcx # copy by 64-bit words 138 rep 139 movsq 140 141 movq %rdx,%rcx 142 andl $7,%ecx # any bytes left? 143 rep 144 movsb 145 146 xorq %rax,%rax 147 ret 148 149# Using 'rep movs' to copy backwards is not as fast as for forwards copies 150# and ought not be done when the copy doesn't acually overlap. 151# However kcopy() isn't used any that looks even vaguely used often. 152# I'm also not sure it is ever asked to do overlapping copies! 153 1541: addq %rcx,%rdi # copy backward 155 addq %rcx,%rsi 156 std 157 andq $7,%rcx # any fractional bytes? 158 decq %rdi 159 decq %rsi 160 rep 161 movsb 162 movq %rdx,%rcx # copy remainder by 64-bit words 163 shrq $3,%rcx 164 subq $7,%rsi 165 subq $7,%rdi 166 rep 167 movsq 168 cld 169.Lkcopy_end: 170 xorq %rax,%rax 171 ret 172 173ENTRY(copyout) 174 DEFERRED_SWITCH_CHECK 175 176 xchgq %rdi,%rsi # kernel address to %rsi, user to %rdi 177 movq %rdx,%rax # save transfer length (bytes) 178 179 addq %rdi,%rdx # end address to %rdx 180 jc _C_LABEL(copy_efault) # jump if wraps 181 movq $VM_MAXUSER_ADDRESS,%r8 182 cmpq %r8,%rdx 183 ja _C_LABEL(copy_efault) # jump if end in kernel space 184 185.Lcopyout_start: 186 movq %rax,%rcx # length 187 shrq $3,%rcx # count of 8-byte words 188 rep 189 movsq # copy from %rsi to %rdi 190 movb %al,%cl 191 andb $7,%cl # remaining number of bytes 192 rep 193 movsb # copy remaining bytes 194.Lcopyout_end: 195 xorl %eax,%eax 196 ret 197 DEFERRED_SWITCH_CALL 198 199ENTRY(copyin) 200 DEFERRED_SWITCH_CHECK 201 202 xchgq %rdi,%rsi 203 movq %rdx,%rax 204 205 addq %rsi,%rdx # Check source address not wrapped 206 jc _C_LABEL(copy_efault) 207 movq $VM_MAXUSER_ADDRESS,%r8 208 cmpq %r8,%rdx 209 ja _C_LABEL(copy_efault) # j if end in kernel space 210 211.Lcopyin_start: 2123: /* bcopy(%rsi, %rdi, %rax); */ 213 movq %rax,%rcx 214 shrq $3,%rcx 215 rep 216 movsq 217 movb %al,%cl 218 andb $7,%cl 219 rep 220 movsb 221.Lcopyin_end: 222 xorl %eax,%eax 223 ret 224 DEFERRED_SWITCH_CALL 225 226NENTRY(copy_efault) 227 movq $EFAULT,%rax 228 229/* 230 * kcopy_fault is used by kcopy and copy_fault is used by copyin/out. 231 * 232 * they're distinguished for lazy pmap switching. see trap(). 233 */ 234 235NENTRY(kcopy_fault) 236 ret 237 238NENTRY(copy_fault) 239 ret 240 241ENTRY(copyoutstr) 242 DEFERRED_SWITCH_CHECK 243 xchgq %rdi,%rsi 244 movq %rdx,%r8 245 movq %rcx,%r9 246 247 /* 248 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi). 249 */ 250 movq $VM_MAXUSER_ADDRESS,%rax 251 subq %rdi,%rax 252 jc _C_LABEL(copystr_efault) 253 cmpq %rdx,%rax 254 jae 1f 255 movq %rax,%rdx 256 movq %rax,%r8 257.Lcopyoutstr_start: 2581: incq %rdx 259 2601: decq %rdx 261 jz 2f 262 lodsb 263 stosb 264 testb %al,%al 265 jnz 1b 266.Lcopyoutstr_end: 267 /* Success -- 0 byte reached. */ 268 decq %rdx 269 xorq %rax,%rax 270 jmp copystr_return 271 2722: /* rdx is zero -- return EFAULT or ENAMETOOLONG. */ 273 movq $VM_MAXUSER_ADDRESS,%r11 274 cmpq %r11,%rdi 275 jae _C_LABEL(copystr_efault) 276 movq $ENAMETOOLONG,%rax 277 jmp copystr_return 278 DEFERRED_SWITCH_CALL 279 280ENTRY(copyinstr) 281 DEFERRED_SWITCH_CHECK 282 xchgq %rdi,%rsi 283 movq %rdx,%r8 284 movq %rcx,%r9 285 286 /* 287 * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi). 288 */ 289 movq $VM_MAXUSER_ADDRESS,%rax 290 subq %rsi,%rax 291 jc _C_LABEL(copystr_efault) 292 cmpq %rdx,%rax 293 jae 1f 294 movq %rax,%rdx 295 movq %rax,%r8 296.Lcopyinstr_start: 2971: incq %rdx 298 2991: decq %rdx 300 jz 2f 301 lodsb 302 stosb 303 testb %al,%al 304 jnz 1b 305.Lcopyinstr_end: 306 307 /* Success -- 0 byte reached. */ 308 decq %rdx 309 xorq %rax,%rax 310 jmp copystr_return 311 3122: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ 313 movq $VM_MAXUSER_ADDRESS,%r11 314 cmpq %r11,%rsi 315 jae _C_LABEL(copystr_efault) 316 movq $ENAMETOOLONG,%rax 317 jmp copystr_return 318 DEFERRED_SWITCH_CALL 319 320ENTRY(copystr_efault) 321 movl $EFAULT,%eax 322 323ENTRY(copystr_fault) 324copystr_return: 325 /* Set *lencopied and return %eax. */ 326 testq %r9,%r9 327 jz 8f 328 subq %rdx,%r8 329 movq %r8,(%r9) 330 3318: ret 332 333ENTRY(copystr) 334 xchgq %rdi,%rsi 335 movq %rdx,%r8 336 337 incq %rdx 338 3391: decq %rdx 340 jz 4f 341 lodsb 342 stosb 343 testb %al,%al 344 jnz 1b 345 346 /* Success -- 0 byte reached. */ 347 decq %rdx 348 xorl %eax,%eax 349 jmp 6f 350 3514: /* edx is zero -- return ENAMETOOLONG. */ 352 movl $ENAMETOOLONG,%eax 353 3546: /* Set *lencopied and return %eax. */ 355 testq %rcx,%rcx 356 jz 7f 357 subq %rdx,%r8 358 movq %r8,(%rcx) 359 3607: ret 361 362ENTRY(fuword) 363 DEFERRED_SWITCH_CHECK 364 movq $VM_MAXUSER_ADDRESS-4,%r11 365 cmpq %r11,%rdi 366 ja _C_LABEL(fusuaddrfault) 367 GET_CURPCB(%rcx) 368 leaq _C_LABEL(fusufailure)(%rip),%r11 369 movq %r11,PCB_ONFAULT(%rcx) 370 movl (%rdi),%eax 371 movq $0,PCB_ONFAULT(%rcx) 372 ret 373 DEFERRED_SWITCH_CALL 374 375ENTRY(fusword) 376 DEFERRED_SWITCH_CHECK 377 movq $VM_MAXUSER_ADDRESS-2,%r11 378 cmpq %r11,%rdi 379 ja _C_LABEL(fusuaddrfault) 380 GET_CURPCB(%rcx) 381 leaq _C_LABEL(fusufailure)(%rip),%r11 382 movq %r11,PCB_ONFAULT(%rcx) 383 movzwl (%rdi),%eax 384 movq $0,PCB_ONFAULT(%rcx) 385 ret 386 DEFERRED_SWITCH_CALL 387 388ENTRY(fuswintr) 389 cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) 390 jnz _C_LABEL(fusuaddrfault) 391 movq $VM_MAXUSER_ADDRESS-2,%r11 392 cmpq %r11,%rdi 393 ja _C_LABEL(fusuaddrfault) 394 GET_CURPCB(%rcx) 395 leaq _C_LABEL(fusuintrfailure)(%rip),%r11 396 movq %r11,PCB_ONFAULT(%rcx) 397 movzwl (%rdi),%eax 398 movq $0,PCB_ONFAULT(%rcx) 399 ret 400 401ENTRY(fubyte) 402 DEFERRED_SWITCH_CHECK 403 movq $VM_MAXUSER_ADDRESS-1,%r11 404 cmpq %r11,%rdi 405 ja _C_LABEL(fusuaddrfault) 406 GET_CURPCB(%rcx) 407 leaq _C_LABEL(fusufailure)(%rip),%r11 408 movq %r11,PCB_ONFAULT(%rcx) 409 movzbl (%rdi),%eax 410 movq $0,PCB_ONFAULT(%rcx) 411 ret 412 DEFERRED_SWITCH_CALL 413 414ENTRY(suword) 415 DEFERRED_SWITCH_CHECK 416 movq $VM_MAXUSER_ADDRESS-4,%r11 417 cmpq %r11,%rdi 418 ja _C_LABEL(fusuaddrfault) 419 420 GET_CURPCB(%rcx) 421 leaq _C_LABEL(fusufailure)(%rip),%r11 422 movq %r11,PCB_ONFAULT(%rcx) 423 424 movq %rsi,(%rdi) 425 xorq %rax,%rax 426 movq %rax,PCB_ONFAULT(%rcx) 427 ret 428 DEFERRED_SWITCH_CALL 429 430ENTRY(susword) 431 DEFERRED_SWITCH_CHECK 432 movq $VM_MAXUSER_ADDRESS-2,%r11 433 cmpq %r11,%rdi 434 ja _C_LABEL(fusuaddrfault) 435 436 GET_CURPCB(%rcx) 437 leaq _C_LABEL(fusufailure)(%rip),%r11 438 movq %r11,PCB_ONFAULT(%rcx) 439 440 movw %si,(%rdi) 441 xorq %rax,%rax 442 movq %rax,PCB_ONFAULT(%rcx) 443 ret 444 DEFERRED_SWITCH_CALL 445 446ENTRY(suswintr) 447 cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) 448 jnz _C_LABEL(fusuaddrfault) 449 movq $VM_MAXUSER_ADDRESS-2,%r11 450 cmpq %r11,%rdi 451 ja _C_LABEL(fusuaddrfault) 452 GET_CURPCB(%rcx) 453 leaq _C_LABEL(fusuintrfailure)(%rip),%r11 454 movq %r11,PCB_ONFAULT(%rcx) 455 movw %si,(%rdi) 456 xorq %rax,%rax 457 movq %rax,PCB_ONFAULT(%rcx) 458 ret 459 460ENTRY(subyte) 461 DEFERRED_SWITCH_CHECK 462 movq $VM_MAXUSER_ADDRESS-1,%r11 463 cmpq %r11,%rdi 464 ja _C_LABEL(fusuaddrfault) 465 466 GET_CURPCB(%rcx) 467 leaq _C_LABEL(fusufailure)(%rip),%r11 468 movq %r11,PCB_ONFAULT(%rcx) 469 470 movb %sil,(%rdi) 471 xorq %rax,%rax 472 movq %rax,PCB_ONFAULT(%rcx) 473 ret 474 DEFERRED_SWITCH_CALL 475 476/* 477 * These are the same, but must reside at different addresses, 478 * because trap.c checks for them. 479 */ 480ENTRY(fusuintrfailure) 481 movq $0,PCB_ONFAULT(%rcx) 482 movl $-1,%eax 483 ret 484 485ENTRY(fusufailure) 486 movq $0,PCB_ONFAULT(%rcx) 487 movl $-1,%eax 488 ret 489 490ENTRY(fusuaddrfault) 491 movl $-1,%eax 492 ret 493 494/* 495 * Compare-and-swap the 64-bit integer in the user-space. 496 * 497 * int ucas_64(volatile int64_t *uptr, int64_t old, int64_t new, int64_t *ret); 498 */ 499ENTRY(ucas_64) 500 DEFERRED_SWITCH_CHECK 501 /* Fail if kernel-space */ 502 movq $VM_MAXUSER_ADDRESS-8, %r8 503 cmpq %r8, %rdi 504 ja _C_LABEL(ucas_fault) 505 movq %rsi, %rax 506.Lucas64_start: 507 /* Perform the CAS */ 508 lock 509 cmpxchgq %rdx, (%rdi) 510.Lucas64_end: 511 /* 512 * Note: %rax is "old" value. 513 * Set the return values. 514 */ 515 movq %rax, (%rcx) 516 xorq %rax, %rax 517 ret 518 DEFERRED_SWITCH_CALL 519 520/* 521 * int ucas_32(volatile int32_t *uptr, int32_t old, int32_t new, int32_t *ret); 522 */ 523ENTRY(ucas_32) 524 DEFERRED_SWITCH_CHECK 525 /* Fail if kernel-space */ 526 movq $VM_MAXUSER_ADDRESS-4, %r8 527 cmpq %r8, %rdi 528 ja _C_LABEL(ucas_fault) 529 movl %esi, %eax 530.Lucas32_start: 531 /* Perform the CAS */ 532 lock 533 cmpxchgl %edx, (%rdi) 534.Lucas32_end: 535 /* 536 * Note: %eax is "old" value. 537 * Set the return values. 538 */ 539 movl %eax, (%rcx) 540 xorq %rax, %rax 541 ret 542 DEFERRED_SWITCH_CALL 543 544/* 545 * Fault handler for ucas_*(). 546 * Just return the error set by trap(). 547 */ 548NENTRY(ucas_fault) 549 ret 550 551/* 552 * int ucas_ptr(volatile void **uptr, void *old, void *new, void **ret); 553 * int ucas_int(volatile int *uptr, int old, intnew, int *ret); 554 */ 555STRONG_ALIAS(ucas_ptr, ucas_64) 556STRONG_ALIAS(ucas_int, ucas_32) 557 558/* 559 * Label must be after all copy functions. 560 */ 561x86_copyfunc_end: .globl x86_copyfunc_end 562 563/* 564 * Fault table of copy functions for trap(). 565 */ 566 .section ".rodata" 567 .globl _C_LABEL(onfault_table) 568_C_LABEL(onfault_table): 569 .quad .Lcopyin_start 570 .quad .Lcopyin_end 571 .quad _C_LABEL(copy_fault) 572 573 .quad .Lcopyout_start 574 .quad .Lcopyout_end 575 .quad _C_LABEL(copy_fault) 576 577 .quad .Lkcopy_start 578 .quad .Lkcopy_end 579 .quad _C_LABEL(kcopy_fault) 580 581 .quad .Lcopyoutstr_start 582 .quad .Lcopyoutstr_end 583 .quad _C_LABEL(copystr_fault) 584 585 .quad .Lcopyinstr_start 586 .quad .Lcopyinstr_end 587 .quad _C_LABEL(copystr_fault) 588 589 .quad .Lucas64_start 590 .quad .Lucas64_end 591 .quad _C_LABEL(ucas_fault) 592 593 .quad .Lucas32_start 594 .quad .Lucas32_end 595 .quad _C_LABEL(ucas_fault) 596 597 .quad 0 /* terminate */ 598 599 .text 600