151a1bf7bSAndrew Turner/* memcmp - compare memory 251a1bf7bSAndrew Turner * 351a1bf7bSAndrew Turner * Copyright (c) 2013-2020, Arm Limited. 451a1bf7bSAndrew Turner * SPDX-License-Identifier: MIT 551a1bf7bSAndrew Turner */ 651a1bf7bSAndrew Turner 751a1bf7bSAndrew Turner/* Assumptions: 851a1bf7bSAndrew Turner * 951a1bf7bSAndrew Turner * ARMv8-a, AArch64, unaligned accesses. 1051a1bf7bSAndrew Turner */ 1151a1bf7bSAndrew Turner 1251a1bf7bSAndrew Turner#include <machine/asm.h> 1351a1bf7bSAndrew Turner 1451a1bf7bSAndrew Turner#define L(l) .L ## l 1551a1bf7bSAndrew Turner 1651a1bf7bSAndrew Turner/* Parameters and result. */ 1751a1bf7bSAndrew Turner#define src1 x0 1851a1bf7bSAndrew Turner#define src2 x1 1951a1bf7bSAndrew Turner#define limit x2 2051a1bf7bSAndrew Turner#define result w0 2151a1bf7bSAndrew Turner 2251a1bf7bSAndrew Turner/* Internal variables. */ 2351a1bf7bSAndrew Turner#define data1 x3 2451a1bf7bSAndrew Turner#define data1w w3 2551a1bf7bSAndrew Turner#define data1h x4 2651a1bf7bSAndrew Turner#define data2 x5 2751a1bf7bSAndrew Turner#define data2w w5 2851a1bf7bSAndrew Turner#define data2h x6 2951a1bf7bSAndrew Turner#define tmp1 x7 3051a1bf7bSAndrew Turner#define tmp2 x8 3151a1bf7bSAndrew Turner 3251a1bf7bSAndrew TurnerENTRY (memcmp) 3351a1bf7bSAndrew Turner subs limit, limit, 8 3451a1bf7bSAndrew Turner b.lo L(less8) 3551a1bf7bSAndrew Turner 3651a1bf7bSAndrew Turner ldr data1, [src1], 8 3751a1bf7bSAndrew Turner ldr data2, [src2], 8 3851a1bf7bSAndrew Turner cmp data1, data2 3951a1bf7bSAndrew Turner b.ne L(return) 4051a1bf7bSAndrew Turner 4151a1bf7bSAndrew Turner subs limit, limit, 8 4251a1bf7bSAndrew Turner b.gt L(more16) 4351a1bf7bSAndrew Turner 4451a1bf7bSAndrew Turner ldr data1, [src1, limit] 4551a1bf7bSAndrew Turner ldr data2, [src2, limit] 4651a1bf7bSAndrew Turner b L(return) 4751a1bf7bSAndrew Turner 4851a1bf7bSAndrew TurnerL(more16): 4951a1bf7bSAndrew Turner ldr data1, [src1], 8 5051a1bf7bSAndrew Turner ldr data2, [src2], 8 5151a1bf7bSAndrew Turner cmp data1, data2 5251a1bf7bSAndrew Turner bne L(return) 5351a1bf7bSAndrew Turner 5451a1bf7bSAndrew Turner /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 5551a1bf7bSAndrew Turner strings. */ 5651a1bf7bSAndrew Turner subs limit, limit, 16 5751a1bf7bSAndrew Turner b.ls L(last_bytes) 5851a1bf7bSAndrew Turner 5951a1bf7bSAndrew Turner /* We overlap loads between 0-32 bytes at either side of SRC1 when we 6051a1bf7bSAndrew Turner try to align, so limit it only to strings larger than 128 bytes. */ 6151a1bf7bSAndrew Turner cmp limit, 96 6251a1bf7bSAndrew Turner b.ls L(loop16) 6351a1bf7bSAndrew Turner 6451a1bf7bSAndrew Turner /* Align src1 and adjust src2 with bytes not yet done. */ 6551a1bf7bSAndrew Turner and tmp1, src1, 15 6651a1bf7bSAndrew Turner add limit, limit, tmp1 6751a1bf7bSAndrew Turner sub src1, src1, tmp1 6851a1bf7bSAndrew Turner sub src2, src2, tmp1 6951a1bf7bSAndrew Turner 7051a1bf7bSAndrew Turner /* Loop performing 16 bytes per iteration using aligned src1. 7151a1bf7bSAndrew Turner Limit is pre-decremented by 16 and must be larger than zero. 7251a1bf7bSAndrew Turner Exit if <= 16 bytes left to do or if the data is not equal. */ 7351a1bf7bSAndrew Turner .p2align 4 7451a1bf7bSAndrew TurnerL(loop16): 7551a1bf7bSAndrew Turner ldp data1, data1h, [src1], 16 7651a1bf7bSAndrew Turner ldp data2, data2h, [src2], 16 7751a1bf7bSAndrew Turner subs limit, limit, 16 7851a1bf7bSAndrew Turner ccmp data1, data2, 0, hi 7951a1bf7bSAndrew Turner ccmp data1h, data2h, 0, eq 8051a1bf7bSAndrew Turner b.eq L(loop16) 8151a1bf7bSAndrew Turner 8251a1bf7bSAndrew Turner cmp data1, data2 8351a1bf7bSAndrew Turner bne L(return) 8451a1bf7bSAndrew Turner mov data1, data1h 8551a1bf7bSAndrew Turner mov data2, data2h 8651a1bf7bSAndrew Turner cmp data1, data2 8751a1bf7bSAndrew Turner bne L(return) 8851a1bf7bSAndrew Turner 8951a1bf7bSAndrew Turner /* Compare last 1-16 bytes using unaligned access. */ 9051a1bf7bSAndrew TurnerL(last_bytes): 9151a1bf7bSAndrew Turner add src1, src1, limit 9251a1bf7bSAndrew Turner add src2, src2, limit 9351a1bf7bSAndrew Turner ldp data1, data1h, [src1] 9451a1bf7bSAndrew Turner ldp data2, data2h, [src2] 9551a1bf7bSAndrew Turner cmp data1, data2 9651a1bf7bSAndrew Turner bne L(return) 9751a1bf7bSAndrew Turner mov data1, data1h 9851a1bf7bSAndrew Turner mov data2, data2h 9951a1bf7bSAndrew Turner cmp data1, data2 10051a1bf7bSAndrew Turner 10151a1bf7bSAndrew Turner /* Compare data bytes and set return value to 0, -1 or 1. */ 10251a1bf7bSAndrew TurnerL(return): 10351a1bf7bSAndrew Turner#ifndef __AARCH64EB__ 10451a1bf7bSAndrew Turner rev data1, data1 10551a1bf7bSAndrew Turner rev data2, data2 10651a1bf7bSAndrew Turner#endif 10751a1bf7bSAndrew Turner cmp data1, data2 10851a1bf7bSAndrew TurnerL(ret_eq): 10951a1bf7bSAndrew Turner cset result, ne 11051a1bf7bSAndrew Turner cneg result, result, lo 11151a1bf7bSAndrew Turner ret 11251a1bf7bSAndrew Turner 11351a1bf7bSAndrew Turner .p2align 4 11451a1bf7bSAndrew Turner /* Compare up to 8 bytes. Limit is [-8..-1]. */ 11551a1bf7bSAndrew TurnerL(less8): 11651a1bf7bSAndrew Turner adds limit, limit, 4 11751a1bf7bSAndrew Turner b.lo L(less4) 11851a1bf7bSAndrew Turner ldr data1w, [src1], 4 11951a1bf7bSAndrew Turner ldr data2w, [src2], 4 12051a1bf7bSAndrew Turner cmp data1w, data2w 12151a1bf7bSAndrew Turner b.ne L(return) 12251a1bf7bSAndrew Turner sub limit, limit, 4 12351a1bf7bSAndrew TurnerL(less4): 12451a1bf7bSAndrew Turner adds limit, limit, 4 12551a1bf7bSAndrew Turner beq L(ret_eq) 12651a1bf7bSAndrew TurnerL(byte_loop): 12751a1bf7bSAndrew Turner ldrb data1w, [src1], 1 12851a1bf7bSAndrew Turner ldrb data2w, [src2], 1 12951a1bf7bSAndrew Turner subs limit, limit, 1 13051a1bf7bSAndrew Turner ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 13151a1bf7bSAndrew Turner b.eq L(byte_loop) 13251a1bf7bSAndrew Turner sub result, data1w, data2w 13351a1bf7bSAndrew Turner ret 13451a1bf7bSAndrew Turner 13551a1bf7bSAndrew TurnerEND (memcmp) 13651a1bf7bSAndrew Turner 137