1/* 2 * Written by J.T. Conklin <jtc@acorntoolworks.com> 3 * Public domain. 4 */ 5 6#include <machine/asm.h> 7 8#if defined(LIBC_SCCS) 9 RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") 10#endif 11 12ENTRY(memchr) 13 pushl %esi 14 movl 8(%esp),%eax 15 movzbl 12(%esp),%ecx 16 movl 16(%esp),%esi 17 18 /* 19 * Align to word boundary. 20 * Consider unrolling loop? 21 */ 22 testl %esi,%esi /* nbytes == 0? */ 23 je .Lzero 24.Lalign: 25 testb $3,%al 26 je .Lword_aligned 27 cmpb (%eax),%cl 28 je .Ldone 29 incl %eax 30 decl %esi 31 jnz .Lalign 32 jmp .Lzero 33 34.Lword_aligned: 35 /* copy char to all bytes in word */ 36 movb %cl,%ch 37 movl %ecx,%edx 38 sall $16,%ecx 39 orl %edx,%ecx 40 41 _ALIGN_TEXT 42.Lloop: 43 cmpl $3,%esi /* nbytes > 4 */ 44 jbe .Lbyte 45 movl (%eax),%edx 46 addl $4,%eax 47 xorl %ecx,%edx 48 subl $4,%esi 49 subl $0x01010101,%edx 50 testl $0x80808080,%edx 51 je .Lloop 52 53 /* 54 * In rare cases, the above loop may exit prematurely. We must 55 * return to the loop if none of the bytes in the word are 56 * equal to ch. 57 */ 58 59 /* 60 * High load-use latency on the Athlon leads to significant 61 * stalls, so we preload the next char as soon as possible 62 * instead of using cmp mem8, reg8. 63 * 64 * Alignment here avoids a stall on the Athlon, even though 65 * it's not a branch target. 66 */ 67 _ALIGN_TEXT 68 cmpb -4(%eax),%cl /* 1st byte == ch? */ 69 movb -3(%eax),%dl 70 jne 1f 71 subl $4,%eax 72 jmp .Ldone 73 74 _ALIGN_TEXT 751: cmpb %dl,%cl /* 2nd byte == ch? */ 76 movb -2(%eax),%dl 77 jne 1f 78 subl $3,%eax 79 jmp .Ldone 80 81 _ALIGN_TEXT 821: cmpb %dl,%cl /* 3rd byte == ch? */ 83 movb -1(%eax),%dl 84 jne 1f 85 subl $2,%eax 86 jmp .Ldone 87 88 _ALIGN_TEXT 891: cmpb %dl,%cl /* 4th byte == ch? */ 90 jne .Lloop 91 decl %eax 92 jmp .Ldone 93 94.Lbyte: 95 testl %esi,%esi 96 je .Lzero 97.Lbyte_loop: 98 cmpb (%eax),%cl 99 je .Ldone 100 incl %eax 101 decl %esi 102 jnz .Lbyte_loop 103 104.Lzero: 105 xorl %eax,%eax 106 107.Ldone: 108 popl %esi 109 ret 110END(memchr) 111