xref: /minix/lib/libc/arch/i386/string/swab.S (revision 0a6a1f1d)
1/*
2 * Written by J.T. Conklin <jtc@NetBSD.org>.
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: swab.S,v 1.14 2014/05/23 02:34:19 uebayasi Exp $")
10#endif
11
12/*
13 * On the i486, this code is negligibly faster than the code generated
14 * by gcc at about half the size.  If my i386 databook is correct, it
15 * should be considerably faster than the gcc code on a i386.
16 */
17
18ENTRY(swab)
19	pushl	%esi
20	pushl	%edi
21	movl	12(%esp),%esi
22	movl	16(%esp),%edi
23	movl	20(%esp),%ecx
24
25	shrl	$1,%ecx
26	testl	$7,%ecx			# copy first group of 1 to 7 words
27	jz	L2			# while swapping alternate bytes.
28	_ALIGN_TEXT,0x90
29L1:	lodsw
30	rorw	$8,%ax
31	stosw
32	decl	%ecx
33	testl	$7,%ecx
34	jnz	L1
35
36L2:	shrl	$3,%ecx			# copy remainder 8 words at a time
37	jz	L4			# while swapping alternate bytes.
38	_ALIGN_TEXT,0x90
39L3:	lodsw
40	rorw	$8,%ax
41	stosw
42	lodsw
43	rorw	$8,%ax
44	stosw
45	lodsw
46	rorw	$8,%ax
47	stosw
48	lodsw
49	rorw	$8,%ax
50	stosw
51	lodsw
52	rorw	$8,%ax
53	stosw
54	lodsw
55	rorw	$8,%ax
56	stosw
57	lodsw
58	rorw	$8,%ax
59	stosw
60	lodsw
61	rorw	$8,%ax
62	stosw
63	decl	%ecx
64	jnz	L3
65
66L4:	popl	%edi
67	popl	%esi
68	ret
69END(swab)
70