1 //===-- Implementation of memcpy ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/string/memcpy.h"
10 #include "src/__support/common.h"
11 #include "src/string/memory_utils/elements.h"
12 
13 namespace __llvm_libc {
14 
15 using _1 = scalar::UINT8;
16 using _2 = scalar::UINT16;
17 using _3 = Chained<scalar::UINT16, scalar::UINT8>;
18 using _4 = scalar::UINT32;
19 using _8 = scalar::UINT64;
20 using _16 = Repeated<scalar::UINT64, 2>;
21 using _32 = Repeated<scalar::UINT64, 4>;
22 using _64 = Repeated<scalar::UINT64, 8>;
23 
24 // Design rationale
25 // ================
26 //
27 // Using a profiler to observe size distributions for calls into libc
28 // functions, it was found most operations act on a small number of bytes.
29 // This makes it important to favor small sizes.
30 //
31 // We have used __builtin_expect to tell the compiler to favour lower sizes as
32 // that will reduce the branching overhead where that would hurt most
33 // proportional to total cost of copying.
34 //
35 // The function is written in C++ for several reasons:
36 // - The compiler can __see__ the code, this is useful when performing Profile
37 //   Guided Optimization as the optimized code can take advantage of branching
38 //   probabilities.
39 // - It also allows for easier customization and favors testing multiple
40 //   implementation parameters.
41 // - As compilers and processors get better, the generated code is improved
42 //   with little change on the code side.
43 // This implementation has been tuned for Neoverse-N1.
memcpy_aarch64(char * __restrict dst,const char * __restrict src,size_t count)44 static void memcpy_aarch64(char *__restrict dst, const char *__restrict src,
45                            size_t count) {
46   if (count == 0)
47     return;
48   if (count == 1)
49     return Copy<_1>(dst, src);
50   if (count == 2)
51     return Copy<_2>(dst, src);
52   if (count == 3)
53     return Copy<_3>(dst, src);
54   if (count == 4)
55     return Copy<_4>(dst, src);
56   if (count < 8)
57     return Copy<HeadTail<_4>>(dst, src, count);
58   if (count < 16)
59     return Copy<HeadTail<_8>>(dst, src, count);
60   if (count < 32)
61     return Copy<HeadTail<_16>>(dst, src, count);
62   if (count < 64)
63     return Copy<HeadTail<_32>>(dst, src, count);
64   if (count < 128)
65     return Copy<HeadTail<_64>>(dst, src, count);
66   return Copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
67 }
68 
69 LLVM_LIBC_FUNCTION(void *, memcpy,
70                    (void *__restrict dst, const void *__restrict src,
71                     size_t size)) {
72   memcpy_aarch64(reinterpret_cast<char *>(dst),
73                  reinterpret_cast<const char *>(src), size);
74   return dst;
75 }
76 
77 } // namespace __llvm_libc
78