1/* $NetBSD: memcmp.S,v 1.1 2014/08/10 05:47:35 matt Exp $ */ 2 3/*- 4 * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas of 3am Software Foundry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34RCSID("$NetBSD: memcmp.S,v 1.1 2014/08/10 05:47:35 matt Exp $") 35 36ENTRY(memcmp) 37 mov x9, x0 38 mov x10, x1 39 mov x0, xzr 40 cbz x2, .Lmemcmp_ret 41#ifdef _KERNEL 42 cmp x2, #6 43 b.eq .Lmemcmp_6bytes 44#endif 45 cmp x2, #7 46 b.ls .Lmemcmp_lessthan8 47 48 ands x3, x9, #7 49 b.eq .Lmemcmp_dword_loop 50 51/* 52 * The two addresses have identical alignment but are not yet dword aligned. 53 */ 54 add x2, x2, x3 /* add unalignment to length */ 55 sub x2, x2, #8 /* now subtract a dword */ 56 57 sub x9, x9, x3 /* dword align src1 */ 58 sub x10, x10, x3 /* adjust src2 */ 59 60 lsl x3, x3, #3 /* convert bytes to bits */ 61 ldr x4, [x9], #8 /* load dword from src1 */ 62 ldr x6, [x10], #8 /* load dword from src2 */ 63#ifdef __AARCH64EB__ 64 lsl x4, x4, x3 /* discard leading bytes from data1 */ 65 lsl x6, x6, x3 /* discard leading bytes from data2 */ 66#else 67 lsr x4, x4, x3 /* discard leading bytes from data1 */ 68 lsr x6, x6, x3 /* discard leading bytes from data2 */ 69#endif 70 subs x0, x4, x6 /* compare data */ 71#ifdef __AARCH64EL__ 72 b.ne .Lmemcmp_last_compare /* difference. find it */ 73#else 74 b.eq .Lmemcmp_dword_loop /* no difference. go to loop */ 75 rev x4, x4 /* byte swap data1 */ 76 rev x6, x6 /* byte swap data2 */ 77 b .Lmemcmp_last_compare /* go find the difference. */ 78#endif 79 80.Lmemcmp_dword_loop: 81 subs x2, x2, #8 82 b.mi .Lmemcmp_finish_dword 83 ldr x4, [x9], #8 84 ldr x6, [x10], #8 85 subs x0, x4, x6 86 b.eq .Lmemcmp_dword_loop /* no difference. go to loop */ 87#ifdef __AARCH64EB__ 88 rev x4, x4 /* byte swap data1 */ 89 rev x6, x6 /* byte swap data2 */ 90#endif 91 b .Lmemcmp_last_compare /* go find the difference. */ 92 93.Lmemcmp_finish_dword: 94 /* 95 * we might have gotten here with nothing left. If so, just bail. 96 */ 97 tst x2, #7 98 b.eq .Lmemcmp_ret 99 /* 100 * 101 */ 102 tbz x2, #2, .Lmemcmp_finish_word 103 ldr w4, [x9], #4 104 ldr w6, [x10], #4 105#ifdef __AARCH64EB__ 106 lsl x4, x4, #32 /* move to MSW */ 107 lsl x6, x6, #32 /* move to MSW */ 108#endif 109 110.Lmemcmp_finish_word: 111 tbz x2, #1, .Lmemcmp_finish_hword 112 ldrh w5, [x9], #2 113 ldrh w7, [x10], #2 114#ifdef __AARCH64EB__ 115 orr x4, x4, x5, lsl #16 116 orr x6, x6, x7, lsl #16 117#else 118 orr x4, x4, x5, lsl #32 119 orr x6, x6, x7, lsl #32 120#endif 121 122.Lmemcmp_finish_hword: 123#ifdef __AARCH64EB__ 124 rev x4, x4 /* byte swap data1 */ 125 rev x6, x6 /* byte swap data1 */ 126#endif 127 tbz x2, #0, .Lmemcmp_last_compare 128 ldrb w5, [x9] 129 ldrb w7, [x10] 130 orr x4, x4, x5, lsl #48 131 orr x6, x6, x7, lsl #48 132 b .Lmemcmp_last_compare /* go find the difference. */ 133 134/* 135 * D 136 */ 137.Lmemcmp_lessthan8: 138 sub x2, x2, #1 1391: ldrb w4, [x9], #1 140 ldrb w5, [x10], #1 141 subs x2, x2, #1 142 ccmp x4, x5, #0, cs 143 b.eq 1b 144 sub x0, x4, x5 145 146.Lmemcmp_ret: 147 ret 148 149#ifdef _KERNEL 150.Lmemcmp_6bytes: 151 ldr w4, [x9], #4 152 ldrh w5, [x9] 153#if __AARCH64EB__ 154 orr x4, x4, x5, lsl #48 155 rev x4, x4 156#else 157 orr x4, x4, x5, lsl #32 158#endif 159 ldr w6, [x10], #4 160 ldrh w7, [x10] 161#if __AARCH64EB__ 162 orr x6, x6, x7, lsl #48 163 rev x6, x6 164#else 165 orr x6, x6, x7, lsl #32 166#endif 167#endif /* _KERNEL */ 168 169/* 170 * We have loaded the final bytes in x4 and x6 in LE format. Now we have 171 * to figure what the difference is (if any). First we subtract. Any bytes 172 * that are the same will be 0. So to find the first non-zero byte we byterev 173 * and then use clz to find that byte. 174 * We mask the location to get the start of the byte. We shift both 175 * data dwords left to remove the equal part. Then we shift right to discard 176 * the trailing bytes. Then we subtract and return. 177 */ 178 subs x0, x4, x6 179 b.eq .Lmemcmp_ret 180.Lmemcmp_last_compare: 181 rev x1, x0 /* byte reverse */ 182 clz x1, x1 /* find first non-zero byte */ 183 bfi x1, xzr, #0, #3 /* make it byte aligned */ 184 lsr x0, x0, x1 /* shift to LSB */ 185 sxtb w0, w0 /* sign extend */ 186 ret 187END(memcmp) 188