1 //===-- Elementary operations for aarch64 --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
11
12 #if defined(__arm__) || defined(__aarch64__)
13
14 #include <src/string/memory_utils/elements.h>
15 #include <stddef.h> // size_t
16 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
17
18 #ifdef __ARM_NEON
19 #include <arm_neon.h>
20 #endif
21
22 namespace __llvm_libc {
23 namespace aarch64_memset {
24 #ifdef __ARM_NEON
25 struct Splat8 {
26 static constexpr size_t kSize = 8;
SplatSetSplat827 static void SplatSet(char *dst, const unsigned char value) {
28 vst1_u8((uint8_t *)dst, vdup_n_u8(value));
29 }
30 };
31
32 struct Splat16 {
33 static constexpr size_t kSize = 16;
SplatSetSplat1634 static void SplatSet(char *dst, const unsigned char value) {
35 vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
36 }
37 };
38
39 using _8 = Splat8;
40 using _16 = Splat16;
41 #else
42 using _8 = __llvm_libc::scalar::_8;
43 using _16 = Repeated<_8, 2>;
44 #endif // __ARM_NEON
45
46 using _1 = __llvm_libc::scalar::_1;
47 using _2 = __llvm_libc::scalar::_2;
48 using _3 = __llvm_libc::scalar::_3;
49 using _4 = __llvm_libc::scalar::_4;
50 using _32 = Chained<_16, _16>;
51 using _64 = Chained<_32, _32>;
52
53 struct ZVA {
54 static constexpr size_t kSize = 64;
SplatSetZVA55 static void SplatSet(char *dst, const unsigned char value) {
56 asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
57 }
58 };
59
AArch64ZVA(char * dst,size_t count)60 inline static bool AArch64ZVA(char *dst, size_t count) {
61 uint64_t zva_val;
62 asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
63 if ((zva_val & 31) != 4)
64 return false;
65 SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
66 return true;
67 }
68
69 } // namespace aarch64_memset
70
71 namespace aarch64 {
72
73 using _1 = __llvm_libc::scalar::_1;
74 using _2 = __llvm_libc::scalar::_2;
75 using _3 = __llvm_libc::scalar::_3;
76 using _4 = __llvm_libc::scalar::_4;
77 using _8 = __llvm_libc::scalar::_8;
78 using _16 = __llvm_libc::scalar::_16;
79
80 #ifdef __ARM_NEON
81 struct N32 {
82 static constexpr size_t kSize = 32;
EqualsN3283 static bool Equals(const char *lhs, const char *rhs) {
84 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
85 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
86 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
87 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
88 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
89 uint64_t res =
90 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
91 return res == 0;
92 }
ThreeWayCompareN3293 static int ThreeWayCompare(const char *lhs, const char *rhs) {
94 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
95 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
96 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
97 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
98 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
99 uint64_t res =
100 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
101 if (res == 0)
102 return 0;
103 size_t index = (__builtin_ctzl(res) >> 3) << 2;
104 uint32_t l = *((const uint32_t *)(lhs + index));
105 uint32_t r = *((const uint32_t *)(rhs + index));
106 return __llvm_libc::scalar::_4::ScalarThreeWayCompare(l, r);
107 }
108 };
109
110 using _32 = N32;
111 #else
112 using _32 = __llvm_libc::scalar::_32;
113 #endif // __ARM_NEON
114
115 } // namespace aarch64
116 } // namespace __llvm_libc
117
118 #endif // defined(__arm__) || defined(__aarch64__)
119
120 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
121