1 //===-- Implementation of memcpy ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/string/memcpy.h"
10 #include "src/__support/common.h"
11 #include "src/string/memory_utils/elements.h"
12 
13 namespace __llvm_libc {
14 
15 // Whether to use only rep;movsb.
16 constexpr bool kUseOnlyRepMovsb =
17     LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
18 
19 // kRepMovsBSize == -1 : Only CopyAligned is used.
20 // kRepMovsBSize ==  0 : Only RepMovsb is used.
21 // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
22 constexpr size_t kRepMovsBSize =
23 #ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
24     LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
25 #else
26     -1;
27 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
28 
29 // Whether target supports AVX instructions.
30 constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
31 
32 #ifdef __AVX__
33 using LoopBlockSize = __llvm_libc::x86::_64;
34 #else
35 using LoopBlockSize = __llvm_libc::x86::_32;
36 #endif
37 
CopyRepMovsb(char * __restrict dst,const char * __restrict src,size_t count)38 static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
39                          size_t count) {
40   // FIXME: Add MSVC support with
41   // #include <intrin.h>
42   // __movsb(reinterpret_cast<unsigned char *>(dst),
43   //         reinterpret_cast<const unsigned char *>(src), count);
44   asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
45 }
46 
47 // Design rationale
48 // ================
49 //
50 // Using a profiler to observe size distributions for calls into libc
51 // functions, it was found most operations act on a small number of bytes.
52 // This makes it important to favor small sizes.
53 //
54 // The tests for `count` are in ascending order so the cost of branching is
55 // proportional to the cost of copying.
56 //
57 // The function is written in C++ for several reasons:
58 // - The compiler can __see__ the code, this is useful when performing Profile
59 //   Guided Optimization as the optimized code can take advantage of branching
60 //   probabilities.
61 // - It also allows for easier customization and favors testing multiple
62 //   implementation parameters.
63 // - As compilers and processors get better, the generated code is improved
64 //   with little change on the code side.
memcpy_x86(char * __restrict dst,const char * __restrict src,size_t count)65 static void memcpy_x86(char *__restrict dst, const char *__restrict src,
66                        size_t count) {
67   // Use x86 strategies (_1, _2, _3 ...)
68   using namespace __llvm_libc::x86;
69 
70   if (kUseOnlyRepMovsb)
71     return CopyRepMovsb(dst, src, count);
72 
73   if (count == 0)
74     return;
75   if (count == 1)
76     return Copy<_1>(dst, src);
77   if (count == 2)
78     return Copy<_2>(dst, src);
79   if (count == 3)
80     return Copy<_3>(dst, src);
81   if (count == 4)
82     return Copy<_4>(dst, src);
83   if (count < 8)
84     return Copy<HeadTail<_4>>(dst, src, count);
85   if (count < 16)
86     return Copy<HeadTail<_8>>(dst, src, count);
87   if (count < 32)
88     return Copy<HeadTail<_16>>(dst, src, count);
89   if (count < 64)
90     return Copy<HeadTail<_32>>(dst, src, count);
91   if (count < 128)
92     return Copy<HeadTail<_64>>(dst, src, count);
93   if (kHasAvx && count < 256)
94     return Copy<HeadTail<_128>>(dst, src, count);
95   if (count <= kRepMovsBSize)
96     return Copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
97                                                                  count);
98   return CopyRepMovsb(dst, src, count);
99 }
100 
101 LLVM_LIBC_FUNCTION(void *, memcpy,
102                    (void *__restrict dst, const void *__restrict src,
103                     size_t size)) {
104   memcpy_x86(reinterpret_cast<char *>(dst), reinterpret_cast<const char *>(src),
105              size);
106   return dst;
107 }
108 
109 } // namespace __llvm_libc
110