1 /*  Copyright (c) MediaArea.net SARL. All Rights Reserved.
2  *
3  *  Use of this source code is governed by a zlib-style license that can
4  *  be found in the License.txt file in the root of the source tree.
5  */
6 
7 //---------------------------------------------------------------------------
8 #ifndef ZenLib_MemoryUtilsH
9 #define ZenLib_MemoryUtilsH
10 //---------------------------------------------------------------------------
11 
12 //---------------------------------------------------------------------------
13 #include "ZenLib/Conf.h"
14 #include "ZenLib/Conf.h"
15 //---------------------------------------------------------------------------
16 
17 #include <cstring>
18 #ifdef ZENLIB_MEMUTILS_SSE2
19     #include <emmintrin.h>
20 #endif //ZENLIB_MEMUTILS_SSE2
21 
22 namespace ZenLib
23 {
24 
25 #ifndef ZENLIB_MEMUTILS_SSE2
26     //-----------------------------------------------------------------------
27     // Memory alloc/free
28     #define malloc_Aligned128 (size) \
29             malloc (size)
30     #define free_Aligned128 (ptr) \
31             free (ptr)
32 
33     //-----------------------------------------------------------------------
34     // Arbitrary size - To Unaligned
35     #define memcpy_Unaligned_Unaligned memcpy
36     #define memcpy_Aligned128_Unaligned memcpy
37 
38     //-----------------------------------------------------------------------
39     // Arbitrary size - To Aligned 128 bits (16 bytes)
40     #define memcpy_Unaligned_Aligned128 memcpy
41     #define memcpy_Aligned128_Aligned128 memcpy
42 
43     //-----------------------------------------------------------------------
44     // 128 bits - To Unaligned
45     #define memcpy_Unaligned_Unaligned_Once128 memcpy
46 
47     //-----------------------------------------------------------------------
48     // 128 bits - To Aligned 128 bits (16 bytes)
49     #define memcpy_Aligned128_Aligned128_Once128 memcpy
50 
51     //-----------------------------------------------------------------------
52     // 1024 bits - To Unaligned
53     #define memcpy_Unaligned_Unaligned_Once1024 memcpy
54 
55     //-----------------------------------------------------------------------
56     // 1024 bits - To Aligned 128 bits (16 bytes)
57     #define memcpy_Aligned128_Aligned128_Once1024 memcpy
58 
59     //-----------------------------------------------------------------------
60     // 128-bit multiple - To Aligned 128 bits (16 bytes)
61     #define memcpy_Unaligned_Aligned128_Size128 memcpy
62     #define memcpy_Aligned128_Aligned128_Size128 memcpy
63 
64 #else // ZENLIB_MEMUTILS_SSE2
65 
66     //-----------------------------------------------------------------------
67     // Memory alloc/free
68 
69     inline void*    malloc_Aligned128 (size_t size)
70     {
71         return _aligned_malloc (size, 16); //aligned_alloc in C11
72     }
73 
74     inline void     free_Aligned128 ( void *ptr )
75     {
76         _aligned_free (ptr); //free in C11
77     }
78 
79     //-----------------------------------------------------------------------
80     // Arbitrary size - To Unaligned
81 
82     inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num)
83     {
84         size_t extra=num&0xF;
85         __m128i* destination16=(__m128i*)destination;
86         const __m128i* source16=(const __m128i*)source;
87 
88         num>>=4;
89         while (num--)
90             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
91 
92         char* destination1=(char*)destination16;
93         char* source1=(char*)source16;
94         while (extra--)
95             *destination1++=*source1++;
96     }
97 
98     inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num)
99     {
100         size_t extra=num&0xF;
101         __m128i* destination16=(__m128i*)destination;
102         const __m128i* source16=(const __m128i*)source;
103 
104         num>>=4;
105         while (num--)
106             _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
107 
108         char* destination1=(char*)destination16;
109         char* source1=(char*)source16;
110         while (extra--)
111             *destination1++=*source1++;
112     }
113 
114     //-----------------------------------------------------------------------
115     // Arbitrary size - To Aligned 128 bits (16 bytes)
116 
117     inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num)
118     {
119         size_t extra=num&0xF;
120         __m128i* destination16=(__m128i*)destination;
121         const __m128i* source16=(const __m128i*)source;
122 
123         num>>=4;
124         while (num--)
125             _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
126 
127         char* destination1=(char*)destination16;
128         char* source1=(char*)source16;
129         while (extra--)
130             *destination1++=*source1++;
131     }
132 
133     //-----------------------------------------------------------------------
134     // 128 bits - To Unaligned
135 
136     inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source)
137     {
138         _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source));
139     }
140 
141     //-----------------------------------------------------------------------
142     // 128 bits - To Aligned 128 bits (16 bytes)
143 
144     inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num)
145     {
146         size_t extra=num&0xF;
147         __m128i* destination16=(__m128i*)destination;
148         const __m128i* source16=(const __m128i*)source;
149 
150         num>>=4;
151         while (num--)
152             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
153 
154         char* destination1=(char*)destination16;
155         char* source1=(char*)source16;
156         while (extra--)
157             *destination1++=*source1++;
158     }
159 
160     inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num)
161     {
162         __m128i* destination16=(__m128i*)destination;
163         const __m128i* source16=(__m128i*)source;
164 
165         num>>=4;
166         while (num--)
167             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
168     }
169 
170     //-----------------------------------------------------------------------
171     // 1024 bits - To Unaligned
172 
173     inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t)
174     {
175         __m128i* destination16=(__m128i*)destination;
176         const __m128i* source16=(__m128i*)source;
177 
178         size_t num=8;
179         while (num--)
180             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
181     }
182 
183     //-----------------------------------------------------------------------
184     // 1024 bits - To Aligned 128 bits (16 bytes)
185 
186     inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source)
187     {
188         _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source));
189     }
190 
191     //-----------------------------------------------------------------------
192     // 128-bit multiple - To Unaligned (16 bytes)
193 
194     inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num)
195     {
196         __m128i* destination16=(__m128i*)destination;
197         const __m128i* source16=(const __m128i*)source;
198 
199         num>>=4;
200         while (num--)
201             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
202     }
203 
204     inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num)
205     {
206         __m128i* destination16=(__m128i*)destination;
207         const __m128i* source16=(__m128i*)source;
208 
209         num>>=4;
210         while (num--)
211             _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
212     }
213 
214     //-----------------------------------------------------------------------
215     // 128-bit multiple - To Aligned 128 bits (16 bytes)
216 
217     inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num)
218     {
219         __m128i* destination16=(__m128i*)destination;
220         const __m128i* source16=(__m128i*)source;
221 
222         num>>=4;
223         while (num--)
224             _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
225     }
226 
227 
228     /* Slower
229     inline void memcpy_Aligned128_Aligned128_Once1024 (void* destination, const void* source)
230     {
231         __m128i* destination16=(__m128i*)destination;
232         const __m128i* source16=(__m128i*)source;
233 
234         size_t num=8;
235         while (num--)
236             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
237     }
238     */
239 
240     /*
241     inline void memcpy_Aligned256_Aligned256 (void* destination, const void* source, size_t num) //with AVX, actually slower
242     {
243         size_t extra=num&0x1F;
244         __m256i* destination16=(__m256i*)destination;
245         const __m256i* source16=(const __m256i*)source;
246 
247         num>>=5;
248         while (num--)
249             _mm256_storeu_si256 (destination16++, _mm256_loadu_si256(source16++));
250 
251         char* destination1=(char*)destination16;
252         char* source1=(char*)source16;
253         while (extra--)
254             *destination1++=*source1++;
255     }
256     */
257 
258 #endif // ZENLIB_MEMUTILS_SSE2
259 
260 } //NameSpace
261 
262 #endif
263