1/* memcmp - compare memory 2 * 3 * Copyright (c) 2013-2020, Arm Limited. 4 * SPDX-License-Identifier: MIT 5 */ 6 7/* Assumptions: 8 * 9 * ARMv8-a, AArch64, unaligned accesses. 10 */ 11 12#include <machine/asm.h> 13 14#define L(l) .L ## l 15 16/* Parameters and result. */ 17#define src1 x0 18#define src2 x1 19#define limit x2 20#define result w0 21 22/* Internal variables. */ 23#define data1 x3 24#define data1w w3 25#define data1h x4 26#define data2 x5 27#define data2w w5 28#define data2h x6 29#define tmp1 x7 30#define tmp2 x8 31 32ENTRY (memcmp) 33 subs limit, limit, 8 34 b.lo L(less8) 35 36 ldr data1, [src1], 8 37 ldr data2, [src2], 8 38 cmp data1, data2 39 b.ne L(return) 40 41 subs limit, limit, 8 42 b.gt L(more16) 43 44 ldr data1, [src1, limit] 45 ldr data2, [src2, limit] 46 b L(return) 47 48L(more16): 49 ldr data1, [src1], 8 50 ldr data2, [src2], 8 51 cmp data1, data2 52 bne L(return) 53 54 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 55 strings. */ 56 subs limit, limit, 16 57 b.ls L(last_bytes) 58 59 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 60 try to align, so limit it only to strings larger than 128 bytes. */ 61 cmp limit, 96 62 b.ls L(loop16) 63 64 /* Align src1 and adjust src2 with bytes not yet done. */ 65 and tmp1, src1, 15 66 add limit, limit, tmp1 67 sub src1, src1, tmp1 68 sub src2, src2, tmp1 69 70 /* Loop performing 16 bytes per iteration using aligned src1. 71 Limit is pre-decremented by 16 and must be larger than zero. 72 Exit if <= 16 bytes left to do or if the data is not equal. */ 73 .p2align 4 74L(loop16): 75 ldp data1, data1h, [src1], 16 76 ldp data2, data2h, [src2], 16 77 subs limit, limit, 16 78 ccmp data1, data2, 0, hi 79 ccmp data1h, data2h, 0, eq 80 b.eq L(loop16) 81 82 cmp data1, data2 83 bne L(return) 84 mov data1, data1h 85 mov data2, data2h 86 cmp data1, data2 87 bne L(return) 88 89 /* Compare last 1-16 bytes using unaligned access. */ 90L(last_bytes): 91 add src1, src1, limit 92 add src2, src2, limit 93 ldp data1, data1h, [src1] 94 ldp data2, data2h, [src2] 95 cmp data1, data2 96 bne L(return) 97 mov data1, data1h 98 mov data2, data2h 99 cmp data1, data2 100 101 /* Compare data bytes and set return value to 0, -1 or 1. */ 102L(return): 103#ifndef __AARCH64EB__ 104 rev data1, data1 105 rev data2, data2 106#endif 107 cmp data1, data2 108L(ret_eq): 109 cset result, ne 110 cneg result, result, lo 111 ret 112 113 .p2align 4 114 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 115L(less8): 116 adds limit, limit, 4 117 b.lo L(less4) 118 ldr data1w, [src1], 4 119 ldr data2w, [src2], 4 120 cmp data1w, data2w 121 b.ne L(return) 122 sub limit, limit, 4 123L(less4): 124 adds limit, limit, 4 125 beq L(ret_eq) 126L(byte_loop): 127 ldrb data1w, [src1], 1 128 ldrb data2w, [src2], 1 129 subs limit, limit, 1 130 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 131 b.eq L(byte_loop) 132 sub result, data1w, data2w 133 ret 134 135END (memcmp) 136 137