1bits 32 2section .text 3;extern "C" int mmx_memcpy 4; (unsigned char *dest, unsigned char *src, int len); 5 6global mmx_memcpy 7 8times ($$-$) & 3 db 0 9 10mmx_memcpy: 11 12pushad ; save registers 13mov edi,[esp+36] ; get 1st argument 14mov esi,[esp+40] ; ...2nd 15mov eax,[esp+44] ; ...3rd 16 17mov edx, eax 18shr eax, byte 3 ; figure out how many 8 byte chunks we have 19and edx, byte 7 ; also figure out slack 20test eax, eax ; Do we have any big chunks? 21push edx 22jz .slack ; If not, let's just do slack 23 24mov ecx,eax 25 26.mmx_move: 27 movq mm0,qword[esi] ; move 8 byte blocks using MMX 28 movq qword[edi],mm0 29 add esi, byte 8 ; increment pointers 30 add edi, byte 8 31loopnz .mmx_move ; continue until CX=0 32 33.slack: 34pop ecx 35rep movsb ; move 1 byte slack 36 37emms ; Free up for the FPU 38 39popad ; clean up 40ret 41 42; -------------------------------------- 43 44%ifdef NASM_STACK_NOEXEC 45section .note.GNU-stack noalloc noexec nowrite progbits 46%endif 47