xref: /openbsd/sys/lib/libkern/arch/amd64/memset.S (revision 5cd8e87f)
1f5df1827Smickey/*
2f5df1827Smickey * Written by J.T. Conklin <jtc@netbsd.org>.
3f5df1827Smickey * Public domain.
4f5df1827Smickey * Adapted for NetBSD/x86_64 by Frank van der Linden <fvdl@wasabisystems.com>
5f5df1827Smickey */
6f5df1827Smickey
7f5df1827Smickey#include <machine/asm.h>
8f5df1827Smickey
9f5df1827SmickeyENTRY(memset)
101d66f0a0Smortimer	RETGUARD_SETUP(memset, r10)
11f5df1827Smickey	movq	%rsi,%rax
12df3a1dceSderaadt	andq	$0xff,%rax
13f5df1827Smickey	movq	%rdx,%rcx
14f5df1827Smickey	movq	%rdi,%r11
15f5df1827Smickey
16f5df1827Smickey	/*
17f5df1827Smickey	 * if the string is too short, it's really not worth the overhead
18f70d55c3Skrw	 * of aligning to word boundaries, etc.  So we jump to a plain
19f5df1827Smickey	 * unaligned set.
20f5df1827Smickey	 */
21f5df1827Smickey	cmpq	$0x0f,%rcx
22*5cd8e87fSguenther	jle	1f
23f5df1827Smickey
24f5df1827Smickey	movb	%al,%ah			/* copy char to all bytes in word */
25f5df1827Smickey	movl	%eax,%edx
26f5df1827Smickey	sall	$16,%eax
27f5df1827Smickey	orl	%edx,%eax
28f5df1827Smickey
29f5df1827Smickey	movl	%eax,%edx
30f5df1827Smickey	salq	$32,%rax
31f5df1827Smickey	orq	%rdx,%rax
32f5df1827Smickey
33f5df1827Smickey	movq	%rdi,%rdx		/* compute misalignment */
34f5df1827Smickey	negq	%rdx
35f5df1827Smickey	andq	$7,%rdx
36f5df1827Smickey	movq	%rcx,%r8
37f5df1827Smickey	subq	%rdx,%r8
38f5df1827Smickey
39f5df1827Smickey	movq	%rdx,%rcx		/* set until word aligned */
40f5df1827Smickey	rep
41f5df1827Smickey	stosb
42f5df1827Smickey
43f5df1827Smickey	movq	%r8,%rcx
44f5df1827Smickey	shrq	$3,%rcx			/* set by words */
45f5df1827Smickey	rep
46f5df1827Smickey	stosq
47f5df1827Smickey
48f5df1827Smickey	movq	%r8,%rcx		/* set remainder by bytes */
49f5df1827Smickey	andq	$7,%rcx
50*5cd8e87fSguenther1:	rep
51f5df1827Smickey	stosb
52f5df1827Smickey	movq	%r11,%rax
531d66f0a0Smortimer	RETGUARD_CHECK(memset, r10)
54f5df1827Smickey	ret
55fc541c5dSguenther	lfence
56*5cd8e87fSguentherEND(memset)
57