;------------------------------------------------------------------------------ ; ; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR> ; This program and the accompanying materials ; are licensed and made available under the terms and conditions of the BSD License ; which accompanies this distribution. The full text of the license may be found at ; http://opensource.org/licenses/bsd-license.php. ; ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. ; ; Module Name: ; ; CopyMem.nasm ; ; Abstract: ; ; CopyMem function ; ; Notes: ; ;------------------------------------------------------------------------------ DEFAULT REL SECTION .text ;------------------------------------------------------------------------------ ; VOID * ; EFIAPI ; InternalMemCopyMem ( ; IN VOID *Destination, ; IN VOID *Source, ; IN UINTN Count ; ); ;------------------------------------------------------------------------------ global ASM_PFX(InternalMemCopyMem) ASM_PFX(InternalMemCopyMem): push rsi push rdi mov rsi, rdx ; rsi <- Source mov rdi, rcx ; rdi <- Destination lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source cmp rsi, rdi mov rax, rdi ; rax <- Destination as return value jae .0 ; Copy forward if Source > Destination cmp r9, rdi ; Overlapped? jae @CopyBackward ; Copy backward if overlapped .0: xor rcx, rcx sub rcx, rdi ; rcx <- -rdi and rcx, 15 ; rcx + rsi should be 16 bytes aligned jz .1 ; skip if rcx == 0 cmp rcx, r8 cmova rcx, r8 sub r8, rcx rep movsb .1: mov rcx, r8 and r8, 15 shr rcx, 4 ; rcx <- # of DQwords to copy jz @CopyBytes movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack .2: movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned movntdq [rdi], xmm0 ; rdi should be 16-byte aligned add rsi, 16 add rdi, 16 loop .2 mfence movdqa xmm0, [rsp + 0x18] ; restore xmm0 jmp @CopyBytes ; copy remaining bytes @CopyBackward: mov rsi, r9 ; rsi <- Last byte of Source lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination std @CopyBytes: mov rcx, r8 rep movsb cld pop rdi pop rsi ret