xref: /illumos-gate/usr/src/lib/libc/i386/gen/memcpy.S (revision 55fea89d)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe
22*5d9d9091SRichard Lowe/*
23*5d9d9091SRichard Lowe * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24*5d9d9091SRichard Lowe * Use is subject to license terms.
25*5d9d9091SRichard Lowe */
26*5d9d9091SRichard Lowe
27*5d9d9091SRichard Lowe	.file	"memcpy.s"
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
30*5d9d9091SRichard Lowe
31*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memmove,function)
32*5d9d9091SRichard Lowe	ANSI_PRAGMA_WEAK(memcpy,function)
33*5d9d9091SRichard Lowe
34*5d9d9091SRichard Lowe#include "SYS.h"
35*5d9d9091SRichard Lowe
36*5d9d9091SRichard Lowe	ENTRY(memcpy)
37*5d9d9091SRichard Lowe	movl	%edi,%edx	/ save register variables
38*5d9d9091SRichard Lowe	pushl	%esi
39*5d9d9091SRichard Lowe	movl	8(%esp),%edi	/ %edi = dest address
40*5d9d9091SRichard Lowe	movl	12(%esp),%esi	/ %esi = source address
41*5d9d9091SRichard Lowe	movl	16(%esp),%ecx	/ %ecx = length of string
42*5d9d9091SRichard Lowe	movl	%edi,%eax	/ return value from the call
43*5d9d9091SRichard Lowe
44*5d9d9091SRichard Lowe	shrl	$2,%ecx		/ %ecx = number of words to move
45*5d9d9091SRichard Lowe	rep ; smovl		/ move the words
46*5d9d9091SRichard Lowe
47*5d9d9091SRichard Lowe	movl	16(%esp),%ecx	/ %ecx = number of bytes to move
48*5d9d9091SRichard Lowe	andl	$0x3,%ecx	/ %ecx = number of bytes left to move
49*5d9d9091SRichard Lowe	rep ; smovb		/ move the bytes
50*5d9d9091SRichard Lowe
51*5d9d9091SRichard Lowe	popl	%esi		/ restore register variables
52*5d9d9091SRichard Lowe	movl	%edx,%edi
53*5d9d9091SRichard Lowe	ret
54*5d9d9091SRichard Lowe	SET_SIZE(memcpy)
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe
57*5d9d9091SRichard Lowe	ENTRY(memmove)
58*5d9d9091SRichard Lowe	pushl	%edi		/ save off %edi, %esi and move destination
59*5d9d9091SRichard Lowe	movl	4+12(%esp),%ecx	/ get number of bytes to move
60*5d9d9091SRichard Lowe	pushl	%esi
61*5d9d9091SRichard Lowe	testl	%ecx,%ecx	/ if (n == 0)
62*5d9d9091SRichard Lowe	je	.CleanupReturn	/    return(s);
63*5d9d9091SRichard Lowe	movl	8+ 4(%esp),%edi	/ destination buffer address
64*5d9d9091SRichard Lowe	movl	8+ 8(%esp),%esi	/ source buffer address
65*5d9d9091SRichard Lowe.Common:
66*5d9d9091SRichard Lowe	movl	$3,%eax		/ heavily used constant
67*5d9d9091SRichard Lowe	cmpl	%esi,%edi	/ if (source addr > dest addr)
68*5d9d9091SRichard Lowe	leal	-1(%esi,%ecx),%edx
69*5d9d9091SRichard Lowe	jbe	.CopyRight	/
70*5d9d9091SRichard Lowe	cmpl	%edx,%edi
71*5d9d9091SRichard Lowe	jbe	.CopyLeft
72*5d9d9091SRichard Lowe.CopyRight:
73*5d9d9091SRichard Lowe	cmpl	$8,%ecx		/    if (size < 8 bytes)
74*5d9d9091SRichard Lowe	jbe	.OneByteCopy	/        goto fast short copy loop
75*5d9d9091SRichard Lowe.FourByteCopy:
76*5d9d9091SRichard Lowe	movl	%ecx,%edx	/    save count
77*5d9d9091SRichard Lowe	movl	%esi,%ecx	/    get source buffer 4 byte aligned
78*5d9d9091SRichard Lowe	andl	%eax,%ecx
79*5d9d9091SRichard Lowe	jz	.SkipAlignRight
80*5d9d9091SRichard Lowe	subl	%ecx,%edx
81*5d9d9091SRichard Lowe	rep;	smovb		/    do the byte part of copy
82*5d9d9091SRichard Lowe.SkipAlignRight:
83*5d9d9091SRichard Lowe	movl	%edx,%ecx
84*5d9d9091SRichard Lowe	shrl	$2,%ecx
85*5d9d9091SRichard Lowe	rep;	smovl		/    do the long word part
86*5d9d9091SRichard Lowe	movl	%edx,%ecx	/    compute bytes left to move
87*5d9d9091SRichard Lowe	andl	%eax,%ecx	/    complete copy of remaining bytes
88*5d9d9091SRichard Lowe	jz	.CleanupReturn
89*5d9d9091SRichard Lowe.OneByteCopy:
90*5d9d9091SRichard Lowe	rep;	smovb		/    do the byte part of copy
91*5d9d9091SRichard Lowe.CleanupReturn:
92*5d9d9091SRichard Lowe	popl	%esi		/  }
93*5d9d9091SRichard Lowe	popl	%edi		/  restore registers
94*5d9d9091SRichard Lowe	movl	4(%esp),%eax	/  set up return value
95*5d9d9091SRichard Lowe.Return:
96*5d9d9091SRichard Lowe	ret			/  return(dba);
97*5d9d9091SRichard Lowe
98*5d9d9091SRichard Lowe.CopyLeft:
99*5d9d9091SRichard Lowe	std				/ reverse direction bit (RtoL)
100*5d9d9091SRichard Lowe	cmpl	$12,%ecx		/ if (size < 12)
101*5d9d9091SRichard Lowe	ja	.BigCopyLeft		/ {
102*5d9d9091SRichard Lowe	movl	%edx,%esi		/     src = src + size - 1
103*5d9d9091SRichard Lowe	leal	-1(%ecx,%edi),%edi	/     dst = dst + size - 1
104*5d9d9091SRichard Lowe	rep;	smovb			/    do the byte copy
105*5d9d9091SRichard Lowe	cld				/    reset direction flag to LtoR
106*5d9d9091SRichard Lowe	popl	%esi			/  }
107*5d9d9091SRichard Lowe	popl	%edi			/  restore registers
108*5d9d9091SRichard Lowe	movl	4(%esp),%eax		/  set up return value
109*5d9d9091SRichard Lowe	ret				/  return(dba);
110*5d9d9091SRichard Lowe.BigCopyLeft:				/ } else {
111*5d9d9091SRichard Lowe	xchgl	%edx,%ecx
112*5d9d9091SRichard Lowe	movl	%ecx,%esi		/ align source w/byte copy
113*5d9d9091SRichard Lowe	leal	-1(%edx,%edi),%edi
114*5d9d9091SRichard Lowe	andl	%eax,%ecx
115*5d9d9091SRichard Lowe	jz	.SkipAlignLeft
116*5d9d9091SRichard Lowe	addl	$1, %ecx		/ we need to insure that future
117*5d9d9091SRichard Lowe	subl	%ecx,%edx		/ copy is done on aligned boundary
118*5d9d9091SRichard Lowe	rep;	smovb
119*5d9d9091SRichard Lowe.SkipAlignLeft:
120*5d9d9091SRichard Lowe	movl	%edx,%ecx
121*5d9d9091SRichard Lowe	subl	%eax,%esi
122*5d9d9091SRichard Lowe	shrl	$2,%ecx			/ do 4 byte copy RtoL
123*5d9d9091SRichard Lowe	subl	%eax,%edi
124*5d9d9091SRichard Lowe	rep;	smovl
125*5d9d9091SRichard Lowe	andl	%eax,%edx		/ do 1 byte copy whats left
126*5d9d9091SRichard Lowe	jz	.CleanupReturnLeft
127*5d9d9091SRichard Lowe	movl	%edx,%ecx
128*5d9d9091SRichard Lowe	addl	%eax,%esi		/ rep; smovl instruction will decrement
129*5d9d9091SRichard Lowe	addl	%eax,%edi		/ %edi, %esi by four after each copy
130*5d9d9091SRichard Lowe					/ adding 3 will restore pointers to byte
131*5d9d9091SRichard Lowe					/ before last double word copied
132*5d9d9091SRichard Lowe					/ which is where they are expected to
133*5d9d9091SRichard Lowe					/ be for the single byte copy code
134*5d9d9091SRichard Lowe	rep;	smovb
135*5d9d9091SRichard Lowe.CleanupReturnLeft:
136*5d9d9091SRichard Lowe	cld				/ reset direction flag to LtoR
137*5d9d9091SRichard Lowe	popl	%esi
138*5d9d9091SRichard Lowe	popl	%edi			/ restore registers
139*5d9d9091SRichard Lowe	movl	4(%esp),%eax		/ set up return value
140*5d9d9091SRichard Lowe	ret				/ return(dba);
141*5d9d9091SRichard Lowe	SET_SIZE(memmove)
142