1/* 2 * Written by J.T. Conklin <jtc@acorntoolworks.com> 3 * Public domain. 4 */ 5 6#include <machine/asm.h> 7 8#if defined(LIBC_SCCS) 9 RCSID("$NetBSD: strcat.S,v 1.1 2005/12/20 19:28:51 christos Exp $") 10#endif 11 12ENTRY(strcat) 13 movq %rdi,%rax 14 movabsq $0x0101010101010101,%r8 15 movabsq $0x8080808080808080,%r9 16 17 /* 18 * Align destination to word boundary. 19 * Consider unrolling loop? 20 */ 21.Lscan: 22.Lscan_align: 23 testb $7,%dil 24 je .Lscan_aligned 25 cmpb $0,(%rdi) 26 je .Lcopy 27 incq %rdi 28 jmp .Lscan_align 29 30 _ALIGN_TEXT 31.Lscan_aligned: 32.Lscan_loop: 33 movq (%rdi),%rdx 34 addq $8,%rdi 35 subq %r8,%rdx 36 testq %r9,%rdx 37 je .Lscan_loop 38 39 /* 40 * In rare cases, the above loop may exit prematurely. We must 41 * return to the loop if none of the bytes in the word equal 0. 42 */ 43 44 cmpb $0,-8(%rdi) /* 1st byte == 0? */ 45 jne 1f 46 subq $8,%rdi 47 jmp .Lcopy 48 491: cmpb $0,-7(%rdi) /* 2nd byte == 0? */ 50 jne 1f 51 subq $7,%rdi 52 jmp .Lcopy 53 541: cmpb $0,-6(%rdi) /* 3rd byte == 0? */ 55 jne 1f 56 subq $6,%rdi 57 jmp .Lcopy 58 591: cmpb $0,-5(%rdi) /* 4th byte == 0? */ 60 jne 1f 61 subq $5,%rdi 62 jmp .Lcopy 63 641: cmpb $0,-4(%rdi) /* 5th byte == 0? */ 65 jne 1f 66 subq $4,%rdi 67 jmp .Lcopy 68 691: cmpb $0,-3(%rdi) /* 6th byte == 0? */ 70 jne 1f 71 subq $3,%rdi 72 jmp .Lcopy 73 741: cmpb $0,-2(%rdi) /* 7th byte == 0? */ 75 jne 1f 76 subq $2,%rdi 77 jmp .Lcopy 78 791: cmpb $0,-1(%rdi) /* 8th byte == 0? */ 80 jne .Lscan_loop 81 subq $1,%rdi 82 83 /* 84 * Align source to a word boundary. 85 * Consider unrolling loop? 86 */ 87.Lcopy: 88.Lcopy_align: 89 testb $7,%sil 90 je .Lcopy_aligned 91 movb (%rsi),%dl 92 incq %rsi 93 movb %dl,(%rdi) 94 incq %rdi 95 testb %dl,%dl 96 jne .Lcopy_align 97 ret 98 99 _ALIGN_TEXT 100.Lcopy_loop: 101 movq %rdx,(%rdi) 102 addq $8,%rdi 103.Lcopy_aligned: 104 movq (%rsi),%rdx 105 movq %rdx,%rcx 106 addq $8,%rsi 107 subq %r8,%rcx 108 testq %r9,%rcx 109 je .Lcopy_loop 110 111 /* 112 * In rare cases, the above loop may exit prematurely. We must 113 * return to the loop if none of the bytes in the word equal 0. 114 */ 115 116 movb %dl,(%rdi) 117 incq %rdi 118 testb %dl,%dl /* 1st byte == 0? */ 119 je .Ldone 120 121 shrq $8,%rdx 122 movb %dl,(%rdi) 123 incq %rdi 124 testb %dl,%dl /* 2nd byte == 0? */ 125 je .Ldone 126 127 shrq $8,%rdx 128 movb %dl,(%rdi) 129 incq %rdi 130 testb %dl,%dl /* 3rd byte == 0? */ 131 je .Ldone 132 133 shrq $8,%rdx 134 movb %dl,(%rdi) 135 incq %rdi 136 testb %dl,%dl /* 4th byte == 0? */ 137 je .Ldone 138 139 shrq $8,%rdx 140 movb %dl,(%rdi) 141 incq %rdi 142 testb %dl,%dl /* 5th byte == 0? */ 143 je .Ldone 144 145 shrq $8,%rdx 146 movb %dl,(%rdi) 147 incq %rdi 148 testb %dl,%dl /* 6th byte == 0? */ 149 je .Ldone 150 151 shrq $8,%rdx 152 movb %dl,(%rdi) 153 incq %rdi 154 testb %dl,%dl /* 7th byte == 0? */ 155 je .Ldone 156 157 shrq $8,%rdx 158 movb %dl,(%rdi) 159 incq %rdi 160 testb %dl,%dl /* 8th byte == 0? */ 161 jne .Lcopy_aligned 162 163.Ldone: 164 ret 165