1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11 
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14 
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17 
18 namespace __llvm_libc {
19 
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26 
27 // Fixed-size copies from 'src' to 'dst'.
28 template <typename Element>
Copy(char * __restrict dst,const char * __restrict src)29 void Copy(char *__restrict dst, const char *__restrict src) {
30   Element::Copy(dst, src);
31 }
32 // Runtime-size copies from 'src' to 'dst'.
33 template <typename Element>
Copy(char * __restrict dst,const char * __restrict src,size_t size)34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
35   Element::Copy(dst, src, size);
36 }
37 
38 // Fixed-size equality between 'lhs' and 'rhs'.
Equals(const char * lhs,const char * rhs)39 template <typename Element> bool Equals(const char *lhs, const char *rhs) {
40   return Element::Equals(lhs, rhs);
41 }
42 // Runtime-size equality between 'lhs' and 'rhs'.
43 template <typename Element>
Equals(const char * lhs,const char * rhs,size_t size)44 bool Equals(const char *lhs, const char *rhs, size_t size) {
45   return Element::Equals(lhs, rhs, size);
46 }
47 
48 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
49 template <typename Element>
ThreeWayCompare(const char * lhs,const char * rhs)50 int ThreeWayCompare(const char *lhs, const char *rhs) {
51   return Element::ThreeWayCompare(lhs, rhs);
52 }
53 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
54 template <typename Element>
ThreeWayCompare(const char * lhs,const char * rhs,size_t size)55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
56   return Element::ThreeWayCompare(lhs, rhs, size);
57 }
58 
59 // Fixed-size initialization.
60 template <typename Element>
SplatSet(char * dst,const unsigned char value)61 void SplatSet(char *dst, const unsigned char value) {
62   Element::SplatSet(dst, value);
63 }
64 // Runtime-size initialization.
65 template <typename Element>
SplatSet(char * dst,const unsigned char value,size_t size)66 void SplatSet(char *dst, const unsigned char value, size_t size) {
67   Element::SplatSet(dst, value, size);
68 }
69 
70 // Fixed-size Higher-Order Operations
71 // ----------------------------------
72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
73 // - Chained<Types...>: Chain the operation of several types.
74 
75 // Repeat the operation several times in a row.
76 template <typename Element, size_t ElementCount> struct Repeated {
77   static constexpr size_t kSize = ElementCount * Element::kSize;
78 
CopyRepeated79   static void Copy(char *__restrict dst, const char *__restrict src) {
80     for (size_t i = 0; i < ElementCount; ++i) {
81       const size_t offset = i * Element::kSize;
82       Element::Copy(dst + offset, src + offset);
83     }
84   }
85 
EqualsRepeated86   static bool Equals(const char *lhs, const char *rhs) {
87     for (size_t i = 0; i < ElementCount; ++i) {
88       const size_t offset = i * Element::kSize;
89       if (!Element::Equals(lhs + offset, rhs + offset))
90         return false;
91     }
92     return true;
93   }
94 
ThreeWayCompareRepeated95   static int ThreeWayCompare(const char *lhs, const char *rhs) {
96     for (size_t i = 0; i < ElementCount; ++i) {
97       const size_t offset = i * Element::kSize;
98       // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
99       if (Element::Equals(lhs + offset, rhs + offset))
100         continue;
101       return Element::ThreeWayCompare(lhs + offset, rhs + offset);
102     }
103     return 0;
104   }
105 
SplatSetRepeated106   static void SplatSet(char *dst, const unsigned char value) {
107     for (size_t i = 0; i < ElementCount; ++i) {
108       const size_t offset = i * Element::kSize;
109       Element::SplatSet(dst + offset, value);
110     }
111   }
112 };
113 
114 // Chain the operation of several types.
115 // For instance, to handle a 3 bytes operation, one can use:
116 // Chained<UINT16, UINT8>::Operation();
117 template <typename... Types> struct Chained;
118 
119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
120   static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
121 
122   static void Copy(char *__restrict dst, const char *__restrict src) {
123     Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
124     __llvm_libc::Copy<Head>(dst, src);
125   }
126 
127   static bool Equals(const char *lhs, const char *rhs) {
128     if (!__llvm_libc::Equals<Head>(lhs, rhs))
129       return false;
130     return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
131   }
132 
133   static int ThreeWayCompare(const char *lhs, const char *rhs) {
134     if (__llvm_libc::Equals<Head>(lhs, rhs))
135       return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
136                                                rhs + Head::kSize);
137     return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
138   }
139 
140   static void SplatSet(char *dst, const unsigned char value) {
141     Chained<Tail...>::SplatSet(dst + Head::kSize, value);
142     __llvm_libc::SplatSet<Head>(dst, value);
143   }
144 };
145 
146 template <> struct Chained<> {
147   static constexpr size_t kSize = 0;
148   static void Copy(char *__restrict dst, const char *__restrict src) {}
149   static bool Equals(const char *lhs, const char *rhs) { return true; }
150   static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
151   static void SplatSet(char *dst, const unsigned char value) {}
152 };
153 
154 // Runtime-size Higher-Order Operations
155 // ------------------------------------
156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
158 //   of the buffer.
159 // - Loop<T>: Perform a loop of fixed-sized operations.
160 
161 // Perform the operation on the last 'T::kSize' bytes of the buffer.
162 //
163 // e.g. with
164 // [1234567812345678123]
165 // [__XXXXXXXXXXXXXX___]
166 // [________XXXXXXXX___]
167 //
168 // Precondition: `size >= T::kSize`.
169 template <typename T> struct Tail {
170   static void Copy(char *__restrict dst, const char *__restrict src,
171                    size_t size) {
172     return T::Copy(dst + offset(size), src + offset(size));
173   }
174 
175   static bool Equals(const char *lhs, const char *rhs, size_t size) {
176     return T::Equals(lhs + offset(size), rhs + offset(size));
177   }
178 
179   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
180     return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
181   }
182 
183   static void SplatSet(char *dst, const unsigned char value, size_t size) {
184     return T::SplatSet(dst + offset(size), value);
185   }
186 
187   static size_t offset(size_t size) { return size - T::kSize; }
188 };
189 
190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer.
191 // This is useful for overlapping operations.
192 //
193 // e.g. with
194 // [1234567812345678123]
195 // [__XXXXXXXXXXXXXX___]
196 // [__XXXXXXXX_________]
197 // [________XXXXXXXX___]
198 //
199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
200 template <typename T> struct HeadTail {
201   static void Copy(char *__restrict dst, const char *__restrict src,
202                    size_t size) {
203     T::Copy(dst, src);
204     Tail<T>::Copy(dst, src, size);
205   }
206 
207   static bool Equals(const char *lhs, const char *rhs, size_t size) {
208     if (!T::Equals(lhs, rhs))
209       return false;
210     return Tail<T>::Equals(lhs, rhs, size);
211   }
212 
213   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
214     if (!T::Equals(lhs, rhs))
215       return T::ThreeWayCompare(lhs, rhs);
216     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
217   }
218 
219   static void SplatSet(char *dst, const unsigned char value, size_t size) {
220     T::SplatSet(dst, value);
221     Tail<T>::SplatSet(dst, value, size);
222   }
223 };
224 
225 // Simple loop ending with a Tail operation.
226 //
227 // e.g. with
228 // [12345678123456781234567812345678]
229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
230 // [__XXXXXXXX_______________________]
231 // [__________XXXXXXXX_______________]
232 // [__________________XXXXXXXX_______]
233 // [______________________XXXXXXXX___]
234 //
235 // Precondition:
236 // - size >= T::kSize
237 template <typename T> struct Loop {
238   static void Copy(char *__restrict dst, const char *__restrict src,
239                    size_t size) {
240     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
241       T::Copy(dst + offset, src + offset);
242     Tail<T>::Copy(dst, src, size);
243   }
244 
245   static bool Equals(const char *lhs, const char *rhs, size_t size) {
246     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
247       if (!T::Equals(lhs + offset, rhs + offset))
248         return false;
249     return Tail<T>::Equals(lhs, rhs, size);
250   }
251 
252   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
253     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
254       if (!T::Equals(lhs + offset, rhs + offset))
255         return T::ThreeWayCompare(lhs + offset, rhs + offset);
256     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
257   }
258 
259   static void SplatSet(char *dst, const unsigned char value, size_t size) {
260     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
261       T::SplatSet(dst + offset, value);
262     Tail<T>::SplatSet(dst, value, size);
263   }
264 };
265 
266 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
267 
268 namespace internal {
269 
270 // Provides a specialized Bump function that adjusts pointers and size so first
271 // argument (resp. second argument) gets aligned to Alignment.
272 // We make sure the compiler knows about the adjusted pointer alignment.
273 template <Arg arg, size_t Alignment> struct AlignHelper {};
274 
275 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
276   template <typename T1, typename T2>
277   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
278     const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
279     p1ref += offset;
280     p2ref += offset;
281     size -= offset;
282     p1ref = assume_aligned<Alignment>(p1ref);
283   }
284 };
285 
286 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
287   template <typename T1, typename T2>
288   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
289     const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
290     p1ref += offset;
291     p2ref += offset;
292     size -= offset;
293     p2ref = assume_aligned<Alignment>(p2ref);
294   }
295 };
296 
297 } // namespace internal
298 
299 // An alignment operation that:
300 // - executes the 'AlignmentT' operation
301 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
302 //   pointer gets aligned, size is decreased accordingly.
303 // - calls the 'NextT' operation.
304 //
305 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
306 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
307 template <typename AlignmentT, Arg AlignOn> struct Align {
308 private:
309   static constexpr size_t Alignment = AlignmentT::kSize;
310   static_assert(Alignment > 1, "Alignment must be more than 1");
311   static_assert(is_power2(Alignment), "Alignment must be a power of 2");
312 
313 public:
314   template <typename NextT> struct Then {
315     static void Copy(char *__restrict dst, const char *__restrict src,
316                      size_t size) {
317       AlignmentT::Copy(dst, src);
318       internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
319       NextT::Copy(dst, src, size);
320     }
321 
322     static bool Equals(const char *lhs, const char *rhs, size_t size) {
323       if (!AlignmentT::Equals(lhs, rhs))
324         return false;
325       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
326       return NextT::Equals(lhs, rhs, size);
327     }
328 
329     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
330       if (!AlignmentT::Equals(lhs, rhs))
331         return AlignmentT::ThreeWayCompare(lhs, rhs);
332       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
333       return NextT::ThreeWayCompare(lhs, rhs, size);
334     }
335 
336     static void SplatSet(char *dst, const unsigned char value, size_t size) {
337       AlignmentT::SplatSet(dst, value);
338       char *dummy = nullptr;
339       internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
340       NextT::SplatSet(dst, value, size);
341     }
342   };
343 };
344 
345 // Fixed-size Builtin Operations
346 // -----------------------------
347 // Note: Do not use 'builtin' right now as it requires the implementation of the
348 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
349 // them into calls to the C library leading to reentrancy problems.
350 namespace builtin {
351 
352 #ifndef __has_builtin
353 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
354 #endif
355 
356 template <size_t Size> struct Builtin {
357   static constexpr size_t kSize = Size;
358 
359   static void Copy(char *__restrict dst, const char *__restrict src) {
360 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
361     ForLoopCopy(dst, src);
362 #elif __has_builtin(__builtin_memcpy_inline)
363     // __builtin_memcpy_inline guarantees to never call external functions.
364     // Unfortunately it is not widely available.
365     __builtin_memcpy_inline(dst, src, kSize);
366 #elif __has_builtin(__builtin_memcpy)
367     __builtin_memcpy(dst, src, kSize);
368 #else
369     ForLoopCopy(dst, src);
370 #endif
371   }
372 
373 #if __has_builtin(__builtin_memcmp_inline)
374 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
375 #else
376 #define LLVM_LIBC_MEMCMP __builtin_memcmp
377 #endif
378 
379   static bool Equals(const char *lhs, const char *rhs) {
380     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0;
381   }
382 
383   static int ThreeWayCompare(const char *lhs, const char *rhs) {
384     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize);
385   }
386 
387   static void SplatSet(char *dst, const unsigned char value) {
388     __builtin_memset(dst, value, kSize);
389   }
390 
391 private:
392   // Copies `kSize` bytes from `src` to `dst` using a for loop.
393   // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
394   // from turning the for-loop back into `__builtin_memcpy`.
395   static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
396     for (size_t i = 0; i < kSize; ++i)
397       dst[i] = src[i];
398   }
399 };
400 
401 using _1 = Builtin<1>;
402 using _2 = Builtin<2>;
403 using _3 = Builtin<3>;
404 using _4 = Builtin<4>;
405 using _8 = Builtin<8>;
406 using _16 = Builtin<16>;
407 using _32 = Builtin<32>;
408 using _64 = Builtin<64>;
409 using _128 = Builtin<128>;
410 
411 } // namespace builtin
412 
413 // Fixed-size Scalar Operations
414 // ----------------------------
415 namespace scalar {
416 
417 // The Scalar type makes use of simple sized integers.
418 template <typename T> struct Scalar {
419   static constexpr size_t kSize = sizeof(T);
420 
421   static void Copy(char *__restrict dst, const char *__restrict src) {
422     Store(dst, Load(src));
423   }
424 
425   static bool Equals(const char *lhs, const char *rhs) {
426     return Load(lhs) == Load(rhs);
427   }
428 
429   static int ThreeWayCompare(const char *lhs, const char *rhs) {
430     return ScalarThreeWayCompare(Load(lhs), Load(rhs));
431   }
432 
433   static void SplatSet(char *dst, const unsigned char value) {
434     Store(dst, GetSplattedValue(value));
435   }
436 
437   static int ScalarThreeWayCompare(T a, T b);
438 
439 private:
440   static T Load(const char *ptr) {
441     T value;
442     builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr);
443     return value;
444   }
445   static void Store(char *ptr, T value) {
446     builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value));
447   }
448   static T GetSplattedValue(const unsigned char value) {
449     return T(~0) / T(0xFF) * T(value);
450   }
451 };
452 
453 template <>
454 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
455   const int16_t la = Endian::ToBigEndian(a);
456   const int16_t lb = Endian::ToBigEndian(b);
457   return la - lb;
458 }
459 template <>
460 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
461   const int32_t la = Endian::ToBigEndian(a);
462   const int32_t lb = Endian::ToBigEndian(b);
463   return la - lb;
464 }
465 template <>
466 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
467   const uint32_t la = Endian::ToBigEndian(a);
468   const uint32_t lb = Endian::ToBigEndian(b);
469   return la > lb ? 1 : la < lb ? -1 : 0;
470 }
471 template <>
472 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
473   const uint64_t la = Endian::ToBigEndian(a);
474   const uint64_t lb = Endian::ToBigEndian(b);
475   return la > lb ? 1 : la < lb ? -1 : 0;
476 }
477 
478 using UINT8 = Scalar<uint8_t>;   // 1 Byte
479 using UINT16 = Scalar<uint16_t>; // 2 Bytes
480 using UINT32 = Scalar<uint32_t>; // 4 Bytes
481 using UINT64 = Scalar<uint64_t>; // 8 Bytes
482 
483 using _1 = UINT8;
484 using _2 = UINT16;
485 using _3 = Chained<UINT16, UINT8>;
486 using _4 = UINT32;
487 using _8 = UINT64;
488 using _16 = Repeated<_8, 2>;
489 using _32 = Repeated<_8, 4>;
490 using _64 = Repeated<_8, 8>;
491 using _128 = Repeated<_8, 16>;
492 
493 } // namespace scalar
494 } // namespace __llvm_libc
495 
496 #include <src/string/memory_utils/elements_aarch64.h>
497 #include <src/string/memory_utils/elements_x86.h>
498 
499 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
500