1;------------------------------------------------------------------------------ 2; 3; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR> 4; SPDX-License-Identifier: BSD-2-Clause-Patent 5; 6; Module Name: 7; 8; CopyMem.nasm 9; 10; Abstract: 11; 12; CopyMem function 13; 14; Notes: 15; 16;------------------------------------------------------------------------------ 17 18 DEFAULT REL 19 SECTION .text 20 21;------------------------------------------------------------------------------ 22; VOID * 23; EFIAPI 24; InternalMemCopyMem ( 25; IN VOID *Destination, 26; IN VOID *Source, 27; IN UINTN Count 28; ); 29;------------------------------------------------------------------------------ 30global ASM_PFX(InternalMemCopyMem) 31ASM_PFX(InternalMemCopyMem): 32 push rsi 33 push rdi 34 mov rsi, rdx ; rsi <- Source 35 mov rdi, rcx ; rdi <- Destination 36 lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source 37 cmp rsi, rdi 38 mov rax, rdi ; rax <- Destination as return value 39 jae .0 ; Copy forward if Source > Destination 40 cmp r9, rdi ; Overlapped? 41 jae @CopyBackward ; Copy backward if overlapped 42.0: 43 xor rcx, rcx 44 sub rcx, rdi ; rcx <- -rdi 45 and rcx, 15 ; rcx + rsi should be 16 bytes aligned 46 jz .1 ; skip if rcx == 0 47 cmp rcx, r8 48 cmova rcx, r8 49 sub r8, rcx 50 rep movsb 51.1: 52 mov rcx, r8 53 and r8, 15 54 shr rcx, 4 ; rcx <- # of DQwords to copy 55 jz @CopyBytes 56 movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack 57.2: 58 movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned 59 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned 60 add rsi, 16 61 add rdi, 16 62 loop .2 63 mfence 64 movdqa xmm0, [rsp + 0x18] ; restore xmm0 65 jmp @CopyBytes ; copy remaining bytes 66@CopyBackward: 67 mov rsi, r9 ; rsi <- Last byte of Source 68 lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination 69 std 70@CopyBytes: 71 mov rcx, r8 72 rep movsb 73 cld 74 pop rdi 75 pop rsi 76 ret 77 78