1 /* Copyright (c) MediaArea.net SARL. All Rights Reserved. 2 * 3 * Use of this source code is governed by a zlib-style license that can 4 * be found in the License.txt file in the root of the source tree. 5 */ 6 7 //--------------------------------------------------------------------------- 8 #ifndef ZenLib_MemoryUtilsH 9 #define ZenLib_MemoryUtilsH 10 //--------------------------------------------------------------------------- 11 12 //--------------------------------------------------------------------------- 13 #include "ZenLib/Conf.h" 14 #include "ZenLib/Conf.h" 15 //--------------------------------------------------------------------------- 16 17 #include <cstring> 18 #ifdef ZENLIB_MEMUTILS_SSE2 19 #include <emmintrin.h> 20 #endif //ZENLIB_MEMUTILS_SSE2 21 22 namespace ZenLib 23 { 24 25 #ifndef ZENLIB_MEMUTILS_SSE2 26 //----------------------------------------------------------------------- 27 // Memory alloc/free 28 #define malloc_Aligned128 (size) \ 29 malloc (size) 30 #define free_Aligned128 (ptr) \ 31 free (ptr) 32 33 //----------------------------------------------------------------------- 34 // Arbitrary size - To Unaligned 35 #define memcpy_Unaligned_Unaligned memcpy 36 #define memcpy_Aligned128_Unaligned memcpy 37 38 //----------------------------------------------------------------------- 39 // Arbitrary size - To Aligned 128 bits (16 bytes) 40 #define memcpy_Unaligned_Aligned128 memcpy 41 #define memcpy_Aligned128_Aligned128 memcpy 42 43 //----------------------------------------------------------------------- 44 // 128 bits - To Unaligned 45 #define memcpy_Unaligned_Unaligned_Once128 memcpy 46 47 //----------------------------------------------------------------------- 48 // 128 bits - To Aligned 128 bits (16 bytes) 49 #define memcpy_Aligned128_Aligned128_Once128 memcpy 50 51 //----------------------------------------------------------------------- 52 // 1024 bits - To Unaligned 53 #define memcpy_Unaligned_Unaligned_Once1024 memcpy 54 55 //----------------------------------------------------------------------- 56 // 1024 bits - To Aligned 128 bits (16 bytes) 57 #define memcpy_Aligned128_Aligned128_Once1024 memcpy 58 59 //----------------------------------------------------------------------- 60 // 128-bit multiple - To Aligned 128 bits (16 bytes) 61 #define memcpy_Unaligned_Aligned128_Size128 memcpy 62 #define memcpy_Aligned128_Aligned128_Size128 memcpy 63 64 #else // ZENLIB_MEMUTILS_SSE2 65 66 //----------------------------------------------------------------------- 67 // Memory alloc/free 68 69 inline void* malloc_Aligned128 (size_t size) 70 { 71 return _aligned_malloc (size, 16); //aligned_alloc in C11 72 } 73 74 inline void free_Aligned128 ( void *ptr ) 75 { 76 _aligned_free (ptr); //free in C11 77 } 78 79 //----------------------------------------------------------------------- 80 // Arbitrary size - To Unaligned 81 82 inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num) 83 { 84 size_t extra=num&0xF; 85 __m128i* destination16=(__m128i*)destination; 86 const __m128i* source16=(const __m128i*)source; 87 88 num>>=4; 89 while (num--) 90 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 91 92 char* destination1=(char*)destination16; 93 char* source1=(char*)source16; 94 while (extra--) 95 *destination1++=*source1++; 96 } 97 98 inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num) 99 { 100 size_t extra=num&0xF; 101 __m128i* destination16=(__m128i*)destination; 102 const __m128i* source16=(const __m128i*)source; 103 104 num>>=4; 105 while (num--) 106 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++)); 107 108 char* destination1=(char*)destination16; 109 char* source1=(char*)source16; 110 while (extra--) 111 *destination1++=*source1++; 112 } 113 114 //----------------------------------------------------------------------- 115 // Arbitrary size - To Aligned 128 bits (16 bytes) 116 117 inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num) 118 { 119 size_t extra=num&0xF; 120 __m128i* destination16=(__m128i*)destination; 121 const __m128i* source16=(const __m128i*)source; 122 123 num>>=4; 124 while (num--) 125 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++)); 126 127 char* destination1=(char*)destination16; 128 char* source1=(char*)source16; 129 while (extra--) 130 *destination1++=*source1++; 131 } 132 133 //----------------------------------------------------------------------- 134 // 128 bits - To Unaligned 135 136 inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source) 137 { 138 _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source)); 139 } 140 141 //----------------------------------------------------------------------- 142 // 128 bits - To Aligned 128 bits (16 bytes) 143 144 inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num) 145 { 146 size_t extra=num&0xF; 147 __m128i* destination16=(__m128i*)destination; 148 const __m128i* source16=(const __m128i*)source; 149 150 num>>=4; 151 while (num--) 152 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 153 154 char* destination1=(char*)destination16; 155 char* source1=(char*)source16; 156 while (extra--) 157 *destination1++=*source1++; 158 } 159 160 inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num) 161 { 162 __m128i* destination16=(__m128i*)destination; 163 const __m128i* source16=(__m128i*)source; 164 165 num>>=4; 166 while (num--) 167 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 168 } 169 170 //----------------------------------------------------------------------- 171 // 1024 bits - To Unaligned 172 173 inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t) 174 { 175 __m128i* destination16=(__m128i*)destination; 176 const __m128i* source16=(__m128i*)source; 177 178 size_t num=8; 179 while (num--) 180 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 181 } 182 183 //----------------------------------------------------------------------- 184 // 1024 bits - To Aligned 128 bits (16 bytes) 185 186 inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source) 187 { 188 _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source)); 189 } 190 191 //----------------------------------------------------------------------- 192 // 128-bit multiple - To Unaligned (16 bytes) 193 194 inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num) 195 { 196 __m128i* destination16=(__m128i*)destination; 197 const __m128i* source16=(const __m128i*)source; 198 199 num>>=4; 200 while (num--) 201 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 202 } 203 204 inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num) 205 { 206 __m128i* destination16=(__m128i*)destination; 207 const __m128i* source16=(__m128i*)source; 208 209 num>>=4; 210 while (num--) 211 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++)); 212 } 213 214 //----------------------------------------------------------------------- 215 // 128-bit multiple - To Aligned 128 bits (16 bytes) 216 217 inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num) 218 { 219 __m128i* destination16=(__m128i*)destination; 220 const __m128i* source16=(__m128i*)source; 221 222 num>>=4; 223 while (num--) 224 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++)); 225 } 226 227 228 /* Slower 229 inline void memcpy_Aligned128_Aligned128_Once1024 (void* destination, const void* source) 230 { 231 __m128i* destination16=(__m128i*)destination; 232 const __m128i* source16=(__m128i*)source; 233 234 size_t num=8; 235 while (num--) 236 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 237 } 238 */ 239 240 /* 241 inline void memcpy_Aligned256_Aligned256 (void* destination, const void* source, size_t num) //with AVX, actually slower 242 { 243 size_t extra=num&0x1F; 244 __m256i* destination16=(__m256i*)destination; 245 const __m256i* source16=(const __m256i*)source; 246 247 num>>=5; 248 while (num--) 249 _mm256_storeu_si256 (destination16++, _mm256_loadu_si256(source16++)); 250 251 char* destination1=(char*)destination16; 252 char* source1=(char*)source16; 253 while (extra--) 254 *destination1++=*source1++; 255 } 256 */ 257 258 #endif // ZENLIB_MEMUTILS_SSE2 259 260 } //NameSpace 261 262 #endif 263