xref: /netbsd/common/lib/libc/arch/powerpc/string/strlen.S (revision 6550d01e)
1/*	$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32
33__RCSID("$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $");
34
35/*----------------------------------------------------------------------*/
36/* The algorithm here uses the following techniques:
37
38   1) Given a word 'x', we can test to see if it contains any 0 bytes
39      by subtracting 0x01010101, and seeing if any of the high bits of each
40      byte changed from 0 to 1. This works because the least significant
41      0 byte must have had no incoming carry (otherwise it's not the least
42      significant), so it is 0x00 - 0x01 == 0xff. For all other
43      byte values, either they have the high bit set initially, or when
44      1 is subtracted you get a value in the range 0x00-0x7f, none of which
45      have their high bit set. The expression here is
46      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
47      there were no 0x00 bytes in the word.
48
49   2) Given a word 'x', we can test to see _which_ byte was zero by
50      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
51      This produces 0x80 in each byte that was zero, and 0x00 in all
52      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
53      byte, and the '| x' part ensures that bytes with the high bit set
54      produce 0x00. The addition will carry into the high bit of each byte
55      iff that byte had one of its low 7 bits set. We can then just see
56      which was the most significant bit set and divide by 8 to find how
57      many to add to the index.
58      This is from the book 'The PowerPC Compiler Writer's Guide',
59      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
60*/
61/*----------------------------------------------------------------------*/
62
63		.text
64		.align 4
65
66ENTRY(strlen)
67
68		/* Setup constants */
69		lis	%r10, 0x7f7f
70		lis	%r9, 0xfefe
71		ori	%r10, %r10, 0x7f7f
72		ori	%r9, %r9, 0xfeff
73
74		/* Mask out leading bytes on non aligned strings */
75		rlwinm.	%r8, %r3, 3, 27, 28	/* leading bits to mask */
76#ifdef _LP64
77		clrrdi	%r5, %r3, 2		/*  clear low 2 addr bits */
78#else
79		clrrwi	%r5, %r3, 2		/*  clear low 2 addr bits */
80#endif
81		li	%r0, -1
82		beq+	3f			/* skip alignment if already */
83						/* aligned */
84
85		srw	%r0, %r0, %r8		/* make 0000...1111 mask */
86
87		lwz	%r7, 0(%r5)
88		nor	%r0, %r0, %r0		/* invert mask */
89		or	%r7, %r7, %r0		/* make leading bytes != 0 */
90		b	2f
91
923:		subi	%r5, %r5, 4
93
941:		lwzu	%r7, 4(%r5)		/* fetch data word */
95
962:		nor	%r0, %r7, %r10		/* do step 1 */
97		add	%r6, %r7, %r9
98		and.	%r0, %r0, %r6
99
100		beq+	1b			/* no NUL bytes here */
101
102		and	%r8, %r7, %r10		/* ok, a NUL is somewhere */
103		or	%r7, %r7, %r10		/* do step 2 to find out */
104		add	%r0, %r8, %r10		/* where */
105		nor	%r8, %r7, %r0
106
107		cntlzw	%r0, %r8		/* offset from this word */
108		srwi	%r4, %r0, 3
109
110		add	%r4, %r5, %r4		/* r4 contains end pointer */
111		/* NOTE: Keep it so this function returns the end pointer
112		   in r4, so we can it use from other str* calls (strcat
113		   comes to mind */
114
115		subf	%r3, %r3, %r4
116		blr
117END(strlen)
118/*----------------------------------------------------------------------*/
119