1*7a6dacacSDimitry Andric #include <stdlib.h>
2*7a6dacacSDimitry Andric 
3*7a6dacacSDimitry Andric // WARNING: When building the scalar versions of these functions you need to
4*7a6dacacSDimitry Andric // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
5*7a6dacacSDimitry Andric // from recognising a loop idiom and planting calls to memcpy!
6*7a6dacacSDimitry Andric 
__arm_sc_memcpy_fwd(void * dest,const void * src,size_t n)7*7a6dacacSDimitry Andric static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
8*7a6dacacSDimitry Andric                                  size_t n) __arm_streaming_compatible {
9*7a6dacacSDimitry Andric   unsigned char *destp = (unsigned char *)dest;
10*7a6dacacSDimitry Andric   const unsigned char *srcp = (const unsigned char *)src;
11*7a6dacacSDimitry Andric   for (size_t i = 0; i < n; ++i)
12*7a6dacacSDimitry Andric     destp[i] = srcp[i];
13*7a6dacacSDimitry Andric 
14*7a6dacacSDimitry Andric   return dest;
15*7a6dacacSDimitry Andric }
16*7a6dacacSDimitry Andric 
17*7a6dacacSDimitry Andric // If dest and src overlap then behaviour is undefined, hence we can add the
18*7a6dacacSDimitry Andric // restrict keywords here. This also matches the definition of the libc memcpy
19*7a6dacacSDimitry Andric // according to the man page.
__arm_sc_memcpy(void * __restrict__ dest,const void * __restrict__ src,size_t n)20*7a6dacacSDimitry Andric void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
21*7a6dacacSDimitry Andric                       size_t n) __arm_streaming_compatible {
22*7a6dacacSDimitry Andric   return __arm_sc_memcpy_fwd(dest, src, n);
23*7a6dacacSDimitry Andric }
24*7a6dacacSDimitry Andric 
__arm_sc_memset(void * dest,int c,size_t n)25*7a6dacacSDimitry Andric void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
26*7a6dacacSDimitry Andric   unsigned char *destp = (unsigned char *)dest;
27*7a6dacacSDimitry Andric   unsigned char c8 = (unsigned char)c;
28*7a6dacacSDimitry Andric   for (size_t i = 0; i < n; ++i)
29*7a6dacacSDimitry Andric     destp[i] = c8;
30*7a6dacacSDimitry Andric 
31*7a6dacacSDimitry Andric   return dest;
32*7a6dacacSDimitry Andric }
33*7a6dacacSDimitry Andric 
__arm_sc_memcpy_rev(void * dest,const void * src,size_t n)34*7a6dacacSDimitry Andric static void *__arm_sc_memcpy_rev(void *dest, const void *src,
35*7a6dacacSDimitry Andric                                  size_t n) __arm_streaming_compatible {
36*7a6dacacSDimitry Andric   unsigned char *destp = (unsigned char *)dest;
37*7a6dacacSDimitry Andric   const unsigned char *srcp = (const unsigned char *)src;
38*7a6dacacSDimitry Andric   // TODO: Improve performance by copying larger chunks in reverse, or by
39*7a6dacacSDimitry Andric   // using SVE.
40*7a6dacacSDimitry Andric   while (n > 0) {
41*7a6dacacSDimitry Andric     --n;
42*7a6dacacSDimitry Andric     destp[n] = srcp[n];
43*7a6dacacSDimitry Andric   }
44*7a6dacacSDimitry Andric   return dest;
45*7a6dacacSDimitry Andric }
46*7a6dacacSDimitry Andric 
47*7a6dacacSDimitry Andric // Semantically a memmove is equivalent to the following:
48*7a6dacacSDimitry Andric //   1. Copy the entire contents of src to a temporary array that does not
49*7a6dacacSDimitry Andric //      overlap with src or dest.
50*7a6dacacSDimitry Andric //   2. Copy the contents of the temporary array into dest.
__arm_sc_memmove(void * dest,const void * src,size_t n)51*7a6dacacSDimitry Andric void *__arm_sc_memmove(void *dest, const void *src,
52*7a6dacacSDimitry Andric                        size_t n) __arm_streaming_compatible {
53*7a6dacacSDimitry Andric   unsigned char *destp = (unsigned char *)dest;
54*7a6dacacSDimitry Andric   const unsigned char *srcp = (const unsigned char *)src;
55*7a6dacacSDimitry Andric 
56*7a6dacacSDimitry Andric   // If src and dest don't overlap then just invoke memcpy
57*7a6dacacSDimitry Andric   if ((srcp > (destp + n)) || (destp > (srcp + n)))
58*7a6dacacSDimitry Andric     return __arm_sc_memcpy_fwd(dest, src, n);
59*7a6dacacSDimitry Andric 
60*7a6dacacSDimitry Andric   // Overlap case 1:
61*7a6dacacSDimitry Andric   //     src: Low     |   ->   |     High
62*7a6dacacSDimitry Andric   //    dest: Low  |   ->   |        High
63*7a6dacacSDimitry Andric   // Here src is always ahead of dest at a higher addres. If we first read a
64*7a6dacacSDimitry Andric   // chunk of data from src we can safely write the same chunk to dest without
65*7a6dacacSDimitry Andric   // corrupting future reads of src.
66*7a6dacacSDimitry Andric   if (srcp > destp)
67*7a6dacacSDimitry Andric     return __arm_sc_memcpy_fwd(dest, src, n);
68*7a6dacacSDimitry Andric 
69*7a6dacacSDimitry Andric   // Overlap case 2:
70*7a6dacacSDimitry Andric   //     src: Low  |   ->   |        High
71*7a6dacacSDimitry Andric   //    dest: Low     |   ->   |     High
72*7a6dacacSDimitry Andric   // While we're in the overlap region we're always corrupting future reads of
73*7a6dacacSDimitry Andric   // src when writing to dest. An efficient way to do this is to copy the data
74*7a6dacacSDimitry Andric   // in reverse by starting at the highest address.
75*7a6dacacSDimitry Andric   return __arm_sc_memcpy_rev(dest, src, n);
76*7a6dacacSDimitry Andric }
77*7a6dacacSDimitry Andric 
__arm_sc_memchr(const void * src,int c,size_t n)78*7a6dacacSDimitry Andric const void *__arm_sc_memchr(const void *src, int c,
79*7a6dacacSDimitry Andric                             size_t n) __arm_streaming_compatible {
80*7a6dacacSDimitry Andric   const unsigned char *srcp = (const unsigned char *)src;
81*7a6dacacSDimitry Andric   unsigned char c8 = (unsigned char)c;
82*7a6dacacSDimitry Andric   for (size_t i = 0; i < n; ++i)
83*7a6dacacSDimitry Andric     if (srcp[i] == c8)
84*7a6dacacSDimitry Andric       return &srcp[i];
85*7a6dacacSDimitry Andric 
86*7a6dacacSDimitry Andric   return NULL;
87*7a6dacacSDimitry Andric }
88