1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17
18 namespace __llvm_libc {
19
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26
27 // Fixed-size copies from 'src' to 'dst'.
28 template <typename Element>
Copy(char * __restrict dst,const char * __restrict src)29 void Copy(char *__restrict dst, const char *__restrict src) {
30 Element::Copy(dst, src);
31 }
32 // Runtime-size copies from 'src' to 'dst'.
33 template <typename Element>
Copy(char * __restrict dst,const char * __restrict src,size_t size)34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
35 Element::Copy(dst, src, size);
36 }
37
38 // Fixed-size equality between 'lhs' and 'rhs'.
Equals(const char * lhs,const char * rhs)39 template <typename Element> bool Equals(const char *lhs, const char *rhs) {
40 return Element::Equals(lhs, rhs);
41 }
42 // Runtime-size equality between 'lhs' and 'rhs'.
43 template <typename Element>
Equals(const char * lhs,const char * rhs,size_t size)44 bool Equals(const char *lhs, const char *rhs, size_t size) {
45 return Element::Equals(lhs, rhs, size);
46 }
47
48 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
49 template <typename Element>
ThreeWayCompare(const char * lhs,const char * rhs)50 int ThreeWayCompare(const char *lhs, const char *rhs) {
51 return Element::ThreeWayCompare(lhs, rhs);
52 }
53 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
54 template <typename Element>
ThreeWayCompare(const char * lhs,const char * rhs,size_t size)55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
56 return Element::ThreeWayCompare(lhs, rhs, size);
57 }
58
59 // Fixed-size initialization.
60 template <typename Element>
SplatSet(char * dst,const unsigned char value)61 void SplatSet(char *dst, const unsigned char value) {
62 Element::SplatSet(dst, value);
63 }
64 // Runtime-size initialization.
65 template <typename Element>
SplatSet(char * dst,const unsigned char value,size_t size)66 void SplatSet(char *dst, const unsigned char value, size_t size) {
67 Element::SplatSet(dst, value, size);
68 }
69
70 // Fixed-size Higher-Order Operations
71 // ----------------------------------
72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
73 // - Chained<Types...>: Chain the operation of several types.
74
75 // Repeat the operation several times in a row.
76 template <typename Element, size_t ElementCount> struct Repeated {
77 static constexpr size_t kSize = ElementCount * Element::kSize;
78
CopyRepeated79 static void Copy(char *__restrict dst, const char *__restrict src) {
80 for (size_t i = 0; i < ElementCount; ++i) {
81 const size_t offset = i * Element::kSize;
82 Element::Copy(dst + offset, src + offset);
83 }
84 }
85
EqualsRepeated86 static bool Equals(const char *lhs, const char *rhs) {
87 for (size_t i = 0; i < ElementCount; ++i) {
88 const size_t offset = i * Element::kSize;
89 if (!Element::Equals(lhs + offset, rhs + offset))
90 return false;
91 }
92 return true;
93 }
94
ThreeWayCompareRepeated95 static int ThreeWayCompare(const char *lhs, const char *rhs) {
96 for (size_t i = 0; i < ElementCount; ++i) {
97 const size_t offset = i * Element::kSize;
98 // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
99 if (Element::Equals(lhs + offset, rhs + offset))
100 continue;
101 return Element::ThreeWayCompare(lhs + offset, rhs + offset);
102 }
103 return 0;
104 }
105
SplatSetRepeated106 static void SplatSet(char *dst, const unsigned char value) {
107 for (size_t i = 0; i < ElementCount; ++i) {
108 const size_t offset = i * Element::kSize;
109 Element::SplatSet(dst + offset, value);
110 }
111 }
112 };
113
114 // Chain the operation of several types.
115 // For instance, to handle a 3 bytes operation, one can use:
116 // Chained<UINT16, UINT8>::Operation();
117 template <typename... Types> struct Chained;
118
119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
120 static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
121
122 static void Copy(char *__restrict dst, const char *__restrict src) {
123 Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
124 __llvm_libc::Copy<Head>(dst, src);
125 }
126
127 static bool Equals(const char *lhs, const char *rhs) {
128 if (!__llvm_libc::Equals<Head>(lhs, rhs))
129 return false;
130 return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
131 }
132
133 static int ThreeWayCompare(const char *lhs, const char *rhs) {
134 if (__llvm_libc::Equals<Head>(lhs, rhs))
135 return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
136 rhs + Head::kSize);
137 return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
138 }
139
140 static void SplatSet(char *dst, const unsigned char value) {
141 Chained<Tail...>::SplatSet(dst + Head::kSize, value);
142 __llvm_libc::SplatSet<Head>(dst, value);
143 }
144 };
145
146 template <> struct Chained<> {
147 static constexpr size_t kSize = 0;
148 static void Copy(char *__restrict dst, const char *__restrict src) {}
149 static bool Equals(const char *lhs, const char *rhs) { return true; }
150 static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
151 static void SplatSet(char *dst, const unsigned char value) {}
152 };
153
154 // Runtime-size Higher-Order Operations
155 // ------------------------------------
156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
158 // of the buffer.
159 // - Loop<T>: Perform a loop of fixed-sized operations.
160
161 // Perform the operation on the last 'T::kSize' bytes of the buffer.
162 //
163 // e.g. with
164 // [1234567812345678123]
165 // [__XXXXXXXXXXXXXX___]
166 // [________XXXXXXXX___]
167 //
168 // Precondition: `size >= T::kSize`.
169 template <typename T> struct Tail {
170 static void Copy(char *__restrict dst, const char *__restrict src,
171 size_t size) {
172 return T::Copy(dst + offset(size), src + offset(size));
173 }
174
175 static bool Equals(const char *lhs, const char *rhs, size_t size) {
176 return T::Equals(lhs + offset(size), rhs + offset(size));
177 }
178
179 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
180 return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
181 }
182
183 static void SplatSet(char *dst, const unsigned char value, size_t size) {
184 return T::SplatSet(dst + offset(size), value);
185 }
186
187 static size_t offset(size_t size) { return size - T::kSize; }
188 };
189
190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer.
191 // This is useful for overlapping operations.
192 //
193 // e.g. with
194 // [1234567812345678123]
195 // [__XXXXXXXXXXXXXX___]
196 // [__XXXXXXXX_________]
197 // [________XXXXXXXX___]
198 //
199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
200 template <typename T> struct HeadTail {
201 static void Copy(char *__restrict dst, const char *__restrict src,
202 size_t size) {
203 T::Copy(dst, src);
204 Tail<T>::Copy(dst, src, size);
205 }
206
207 static bool Equals(const char *lhs, const char *rhs, size_t size) {
208 if (!T::Equals(lhs, rhs))
209 return false;
210 return Tail<T>::Equals(lhs, rhs, size);
211 }
212
213 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
214 if (!T::Equals(lhs, rhs))
215 return T::ThreeWayCompare(lhs, rhs);
216 return Tail<T>::ThreeWayCompare(lhs, rhs, size);
217 }
218
219 static void SplatSet(char *dst, const unsigned char value, size_t size) {
220 T::SplatSet(dst, value);
221 Tail<T>::SplatSet(dst, value, size);
222 }
223 };
224
225 // Simple loop ending with a Tail operation.
226 //
227 // e.g. with
228 // [12345678123456781234567812345678]
229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
230 // [__XXXXXXXX_______________________]
231 // [__________XXXXXXXX_______________]
232 // [__________________XXXXXXXX_______]
233 // [______________________XXXXXXXX___]
234 //
235 // Precondition:
236 // - size >= T::kSize
237 template <typename T> struct Loop {
238 static void Copy(char *__restrict dst, const char *__restrict src,
239 size_t size) {
240 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
241 T::Copy(dst + offset, src + offset);
242 Tail<T>::Copy(dst, src, size);
243 }
244
245 static bool Equals(const char *lhs, const char *rhs, size_t size) {
246 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
247 if (!T::Equals(lhs + offset, rhs + offset))
248 return false;
249 return Tail<T>::Equals(lhs, rhs, size);
250 }
251
252 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
253 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
254 if (!T::Equals(lhs + offset, rhs + offset))
255 return T::ThreeWayCompare(lhs + offset, rhs + offset);
256 return Tail<T>::ThreeWayCompare(lhs, rhs, size);
257 }
258
259 static void SplatSet(char *dst, const unsigned char value, size_t size) {
260 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
261 T::SplatSet(dst + offset, value);
262 Tail<T>::SplatSet(dst, value, size);
263 }
264 };
265
266 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
267
268 namespace internal {
269
270 // Provides a specialized Bump function that adjusts pointers and size so first
271 // argument (resp. second argument) gets aligned to Alignment.
272 // We make sure the compiler knows about the adjusted pointer alignment.
273 template <Arg arg, size_t Alignment> struct AlignHelper {};
274
275 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
276 template <typename T1, typename T2>
277 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
278 const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
279 p1ref += offset;
280 p2ref += offset;
281 size -= offset;
282 p1ref = assume_aligned<Alignment>(p1ref);
283 }
284 };
285
286 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
287 template <typename T1, typename T2>
288 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
289 const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
290 p1ref += offset;
291 p2ref += offset;
292 size -= offset;
293 p2ref = assume_aligned<Alignment>(p2ref);
294 }
295 };
296
297 } // namespace internal
298
299 // An alignment operation that:
300 // - executes the 'AlignmentT' operation
301 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
302 // pointer gets aligned, size is decreased accordingly.
303 // - calls the 'NextT' operation.
304 //
305 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
306 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
307 template <typename AlignmentT, Arg AlignOn> struct Align {
308 private:
309 static constexpr size_t Alignment = AlignmentT::kSize;
310 static_assert(Alignment > 1, "Alignment must be more than 1");
311 static_assert(is_power2(Alignment), "Alignment must be a power of 2");
312
313 public:
314 template <typename NextT> struct Then {
315 static void Copy(char *__restrict dst, const char *__restrict src,
316 size_t size) {
317 AlignmentT::Copy(dst, src);
318 internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
319 NextT::Copy(dst, src, size);
320 }
321
322 static bool Equals(const char *lhs, const char *rhs, size_t size) {
323 if (!AlignmentT::Equals(lhs, rhs))
324 return false;
325 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
326 return NextT::Equals(lhs, rhs, size);
327 }
328
329 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
330 if (!AlignmentT::Equals(lhs, rhs))
331 return AlignmentT::ThreeWayCompare(lhs, rhs);
332 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
333 return NextT::ThreeWayCompare(lhs, rhs, size);
334 }
335
336 static void SplatSet(char *dst, const unsigned char value, size_t size) {
337 AlignmentT::SplatSet(dst, value);
338 char *dummy = nullptr;
339 internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
340 NextT::SplatSet(dst, value, size);
341 }
342 };
343 };
344
345 // Fixed-size Builtin Operations
346 // -----------------------------
347 // Note: Do not use 'builtin' right now as it requires the implementation of the
348 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
349 // them into calls to the C library leading to reentrancy problems.
350 namespace builtin {
351
352 #ifndef __has_builtin
353 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
354 #endif
355
356 template <size_t Size> struct Builtin {
357 static constexpr size_t kSize = Size;
358
359 static void Copy(char *__restrict dst, const char *__restrict src) {
360 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
361 ForLoopCopy(dst, src);
362 #elif __has_builtin(__builtin_memcpy_inline)
363 // __builtin_memcpy_inline guarantees to never call external functions.
364 // Unfortunately it is not widely available.
365 __builtin_memcpy_inline(dst, src, kSize);
366 #elif __has_builtin(__builtin_memcpy)
367 __builtin_memcpy(dst, src, kSize);
368 #else
369 ForLoopCopy(dst, src);
370 #endif
371 }
372
373 #if __has_builtin(__builtin_memcmp_inline)
374 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
375 #else
376 #define LLVM_LIBC_MEMCMP __builtin_memcmp
377 #endif
378
379 static bool Equals(const char *lhs, const char *rhs) {
380 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0;
381 }
382
383 static int ThreeWayCompare(const char *lhs, const char *rhs) {
384 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize);
385 }
386
387 static void SplatSet(char *dst, const unsigned char value) {
388 __builtin_memset(dst, value, kSize);
389 }
390
391 private:
392 // Copies `kSize` bytes from `src` to `dst` using a for loop.
393 // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
394 // from turning the for-loop back into `__builtin_memcpy`.
395 static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
396 for (size_t i = 0; i < kSize; ++i)
397 dst[i] = src[i];
398 }
399 };
400
401 using _1 = Builtin<1>;
402 using _2 = Builtin<2>;
403 using _3 = Builtin<3>;
404 using _4 = Builtin<4>;
405 using _8 = Builtin<8>;
406 using _16 = Builtin<16>;
407 using _32 = Builtin<32>;
408 using _64 = Builtin<64>;
409 using _128 = Builtin<128>;
410
411 } // namespace builtin
412
413 // Fixed-size Scalar Operations
414 // ----------------------------
415 namespace scalar {
416
417 // The Scalar type makes use of simple sized integers.
418 template <typename T> struct Scalar {
419 static constexpr size_t kSize = sizeof(T);
420
421 static void Copy(char *__restrict dst, const char *__restrict src) {
422 Store(dst, Load(src));
423 }
424
425 static bool Equals(const char *lhs, const char *rhs) {
426 return Load(lhs) == Load(rhs);
427 }
428
429 static int ThreeWayCompare(const char *lhs, const char *rhs) {
430 return ScalarThreeWayCompare(Load(lhs), Load(rhs));
431 }
432
433 static void SplatSet(char *dst, const unsigned char value) {
434 Store(dst, GetSplattedValue(value));
435 }
436
437 static int ScalarThreeWayCompare(T a, T b);
438
439 private:
440 static T Load(const char *ptr) {
441 T value;
442 builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr);
443 return value;
444 }
445 static void Store(char *ptr, T value) {
446 builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value));
447 }
448 static T GetSplattedValue(const unsigned char value) {
449 return T(~0) / T(0xFF) * T(value);
450 }
451 };
452
453 template <>
454 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
455 const int16_t la = Endian::ToBigEndian(a);
456 const int16_t lb = Endian::ToBigEndian(b);
457 return la - lb;
458 }
459 template <>
460 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
461 const int32_t la = Endian::ToBigEndian(a);
462 const int32_t lb = Endian::ToBigEndian(b);
463 return la - lb;
464 }
465 template <>
466 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
467 const uint32_t la = Endian::ToBigEndian(a);
468 const uint32_t lb = Endian::ToBigEndian(b);
469 return la > lb ? 1 : la < lb ? -1 : 0;
470 }
471 template <>
472 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
473 const uint64_t la = Endian::ToBigEndian(a);
474 const uint64_t lb = Endian::ToBigEndian(b);
475 return la > lb ? 1 : la < lb ? -1 : 0;
476 }
477
478 using UINT8 = Scalar<uint8_t>; // 1 Byte
479 using UINT16 = Scalar<uint16_t>; // 2 Bytes
480 using UINT32 = Scalar<uint32_t>; // 4 Bytes
481 using UINT64 = Scalar<uint64_t>; // 8 Bytes
482
483 using _1 = UINT8;
484 using _2 = UINT16;
485 using _3 = Chained<UINT16, UINT8>;
486 using _4 = UINT32;
487 using _8 = UINT64;
488 using _16 = Repeated<_8, 2>;
489 using _32 = Repeated<_8, 4>;
490 using _64 = Repeated<_8, 8>;
491 using _128 = Repeated<_8, 16>;
492
493 } // namespace scalar
494 } // namespace __llvm_libc
495
496 #include <src/string/memory_utils/elements_aarch64.h>
497 #include <src/string/memory_utils/elements_x86.h>
498
499 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
500