1 //===-- Elementary operations for aarch64 --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
11 
12 #include <src/string/memory_utils/elements.h>
13 #include <stddef.h> // size_t
14 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
15 
16 #ifdef __ARM_NEON
17 #include <arm_neon.h>
18 #endif
19 
20 namespace __llvm_libc {
21 namespace aarch64 {
22 
23 using _1 = __llvm_libc::scalar::_1;
24 using _2 = __llvm_libc::scalar::_2;
25 using _3 = __llvm_libc::scalar::_3;
26 using _4 = __llvm_libc::scalar::_4;
27 using _8 = __llvm_libc::scalar::_8;
28 using _16 = __llvm_libc::scalar::_16;
29 
30 #ifdef __ARM_NEON
31 struct N32 {
32   static constexpr size_t kSize = 32;
EqualsN3233   static bool Equals(const char *lhs, const char *rhs) {
34     uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
35     uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
36     uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
37     uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
38     uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
39     uint64_t res =
40         vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
41     return res == 0;
42   }
ThreeWayCompareN3243   static int ThreeWayCompare(const char *lhs, const char *rhs) {
44     uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
45     uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
46     uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
47     uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
48     uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
49     uint64_t res =
50         vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
51     if (res == 0)
52       return 0;
53     size_t index = (__builtin_ctzl(res) >> 3) << 2;
54     uint32_t l = *((const uint32_t *)(lhs + index));
55     uint32_t r = *((const uint32_t *)(rhs + index));
56     return __llvm_libc::scalar::_4::ScalarThreeWayCompare(l, r);
57   }
58 };
59 
60 using _32 = N32;
61 #else
62 using _32 = __llvm_libc::scalar::_32;
63 #endif // __ARM_NEON
64 
65 } // namespace aarch64
66 } // namespace __llvm_libc
67 
68 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
69