1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved. *
3 * This file is part of the LIBXSMM library. *
4 * *
5 * For information on the license, see the LICENSE file. *
6 * Further information: https://github.com/hfp/libxsmm/ *
7 * SPDX-License-Identifier: BSD-3-Clause *
8 ******************************************************************************/
9 /* Hans Pabst (Intel Corp.)
10 ******************************************************************************/
11 #include <libxsmm_memory.h>
12 #include "libxsmm_hash.h"
13 #include "libxsmm_diff.h"
14 #include "libxsmm_main.h"
15
16 #if !defined(LIBXSMM_MEMORY_STDLIB) && 0
17 # define LIBXSMM_MEMORY_STDLIB
18 #endif
19 #if !defined(LIBXSMM_MEMORY_SW) && 0
20 # define LIBXSMM_MEMORY_SW
21 #endif
22
23
24 #if !defined(LIBXSMM_MEMORY_SW)
25 LIBXSMM_APIVAR_DEFINE(unsigned char (*internal_diff_function)(const void*, const void*, unsigned char));
26 LIBXSMM_APIVAR_DEFINE(int (*internal_memcmp_function)(const void*, const void*, size_t));
27 #endif
28
29
30 LIBXSMM_API_INLINE
internal_diff_sw(const void * a,const void * b,unsigned char size)31 unsigned char internal_diff_sw(const void* a, const void* b, unsigned char size)
32 {
33 #if defined(LIBXSMM_MEMORY_STDLIB) && defined(LIBXSMM_MEMORY_SW)
34 return (unsigned char)memcmp(a, b, size);
35 #else
36 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
37 unsigned char i;
38 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
39 for (i = 0; i < (size & 0xF0); i += 16) {
40 LIBXSMM_DIFF_16_DECL(aa);
41 LIBXSMM_DIFF_16_LOAD(aa, a8 + i);
42 if (LIBXSMM_DIFF_16(aa, b8 + i, 0/*dummy*/)) return 1;
43 }
44 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
45 return 0;
46 #endif
47 }
48
49
LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE3)50 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE3)
51 unsigned char internal_diff_sse3(const void* a, const void* b, unsigned char size)
52 {
53 #if defined(LIBXSMM_INTRINSICS_SSE3) && !defined(LIBXSMM_MEMORY_SW)
54 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
55 unsigned char i;
56 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
57 for (i = 0; i < (size & 0xF0); i += 16) {
58 LIBXSMM_DIFF_SSE3_DECL(aa);
59 LIBXSMM_DIFF_SSE3_LOAD(aa, a8 + i);
60 if (LIBXSMM_DIFF_SSE3(aa, b8 + i, 0/*dummy*/)) return 1;
61 }
62 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
63 return 0;
64 #else
65 return internal_diff_sw(a, b, size);
66 #endif
67 }
68
69
LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX2)70 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX2)
71 unsigned char internal_diff_avx2(const void* a, const void* b, unsigned char size)
72 {
73 #if defined(LIBXSMM_INTRINSICS_AVX2) && !defined(LIBXSMM_MEMORY_SW)
74 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
75 unsigned char i;
76 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
77 for (i = 0; i < (size & 0xE0); i += 32) {
78 LIBXSMM_DIFF_AVX2_DECL(aa);
79 LIBXSMM_DIFF_AVX2_LOAD(aa, a8 + i);
80 if (LIBXSMM_DIFF_AVX2(aa, b8 + i, 0/*dummy*/)) return 1;
81 }
82 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
83 return 0;
84 #else
85 return internal_diff_sw(a, b, size);
86 #endif
87 }
88
89
LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX512)90 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX512)
91 unsigned char internal_diff_avx512(const void* a, const void* b, unsigned char size)
92 {
93 #if defined(LIBXSMM_INTRINSICS_AVX512) && !defined(LIBXSMM_MEMORY_SW)
94 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
95 unsigned char i;
96 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
97 for (i = 0; i < (size & 0xC0); i += 64) {
98 LIBXSMM_DIFF_AVX512_DECL(aa);
99 LIBXSMM_DIFF_AVX512_LOAD(aa, a8 + i);
100 if (LIBXSMM_DIFF_AVX512(aa, b8 + i, 0/*dummy*/)) return 1;
101 }
102 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
103 return 0;
104 #else
105 return internal_diff_sw(a, b, size);
106 #endif
107 }
108
109
110 LIBXSMM_API_INLINE
internal_memcmp_sw(const void * a,const void * b,size_t size)111 int internal_memcmp_sw(const void* a, const void* b, size_t size)
112 {
113 #if defined(LIBXSMM_MEMORY_STDLIB)
114 return memcmp(a, b, size);
115 #else
116 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
117 size_t i;
118 LIBXSMM_DIFF_16_DECL(aa);
119 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
120 for (i = 0; i < (size & 0xFFFFFFFFFFFFFFF0); i += 16) {
121 LIBXSMM_DIFF_16_LOAD(aa, a8 + i);
122 if (LIBXSMM_DIFF_16(aa, b8 + i, 0/*dummy*/)) return 1;
123 }
124 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
125 return 0;
126 #endif
127 }
128
129
LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE3)130 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_SSE3)
131 int internal_memcmp_sse3(const void* a, const void* b, size_t size)
132 {
133 #if defined(LIBXSMM_INTRINSICS_SSE3) && !defined(LIBXSMM_MEMORY_SW)
134 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
135 size_t i;
136 LIBXSMM_DIFF_SSE3_DECL(aa);
137 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
138 for (i = 0; i < (size & 0xFFFFFFFFFFFFFFF0); i += 16) {
139 LIBXSMM_DIFF_SSE3_LOAD(aa, a8 + i);
140 if (LIBXSMM_DIFF_SSE3(aa, b8 + i, 0/*dummy*/)) return 1;
141 }
142 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
143 return 0;
144 #else
145 return internal_memcmp_sw(a, b, size);
146 #endif
147 }
148
149
LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX2)150 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX2)
151 int internal_memcmp_avx2(const void* a, const void* b, size_t size)
152 {
153 #if defined(LIBXSMM_INTRINSICS_AVX2) && !defined(LIBXSMM_MEMORY_SW)
154 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
155 size_t i;
156 LIBXSMM_DIFF_AVX2_DECL(aa);
157 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
158 for (i = 0; i < (size & 0xFFFFFFFFFFFFFFE0); i += 32) {
159 LIBXSMM_DIFF_AVX2_LOAD(aa, a8 + i);
160 if (LIBXSMM_DIFF_AVX2(aa, b8 + i, 0/*dummy*/)) return 1;
161 }
162 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
163 return 0;
164 #else
165 return internal_memcmp_sw(a, b, size);
166 #endif
167 }
168
169
LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX512)170 LIBXSMM_API_INLINE LIBXSMM_INTRINSICS(LIBXSMM_X86_AVX512)
171 int internal_memcmp_avx512(const void* a, const void* b, size_t size)
172 {
173 #if defined(LIBXSMM_INTRINSICS_AVX512) && !defined(LIBXSMM_MEMORY_SW)
174 const uint8_t *const a8 = (const uint8_t*)a, *const b8 = (const uint8_t*)b;
175 size_t i;
176 LIBXSMM_DIFF_AVX512_DECL(aa);
177 LIBXSMM_PRAGMA_UNROLL/*_N(2)*/
178 for (i = 0; i < (size & 0xFFFFFFFFFFFFFFC0); i += 64) {
179 LIBXSMM_DIFF_AVX512_LOAD(aa, a8 + i);
180 if (LIBXSMM_DIFF_AVX512(aa, b8 + i, 0/*dummy*/)) return 1;
181 }
182 for (; i < size; ++i) if (a8[i] ^ b8[i]) return 1;
183 return 0;
184 #else
185 return internal_memcmp_sw(a, b, size);
186 #endif
187 }
188
189
libxsmm_memory_init(int target_arch)190 LIBXSMM_API_INTERN void libxsmm_memory_init(int target_arch)
191 {
192 #if defined(LIBXSMM_MEMORY_SW)
193 LIBXSMM_UNUSED(target_arch);
194 #else
195 if (LIBXSMM_X86_AVX512 <= target_arch) {
196 # if defined(LIBXSMM_DIFF_AVX512_ENABLED)
197 internal_diff_function = internal_diff_avx512;
198 # else
199 internal_diff_function = internal_diff_avx2;
200 # endif
201 # if defined(LIBXSMM_DIFF_AVX512_ENABLED)
202 internal_memcmp_function = internal_memcmp_avx512;
203 # else
204 internal_memcmp_function = internal_memcmp_avx2;
205 # endif
206 }
207 else if (LIBXSMM_X86_AVX2 <= target_arch) {
208 internal_diff_function = internal_diff_avx2;
209 internal_memcmp_function = internal_memcmp_avx2;
210 }
211 else if (LIBXSMM_X86_SSE3 <= target_arch) {
212 internal_diff_function = internal_diff_sse3;
213 internal_memcmp_function = internal_memcmp_sse3;
214 }
215 else {
216 internal_diff_function = internal_diff_sw;
217 internal_memcmp_function = internal_memcmp_sw;
218 }
219 LIBXSMM_ASSERT(NULL != internal_diff_function);
220 LIBXSMM_ASSERT(NULL != internal_memcmp_function);
221 #endif
222 }
223
224
libxsmm_memory_finalize(void)225 LIBXSMM_API_INTERN void libxsmm_memory_finalize(void)
226 {
227 #if !defined(NDEBUG) && !defined(LIBXSMM_MEMORY_SW)
228 internal_diff_function = NULL;
229 internal_memcmp_function = NULL;
230 #endif
231 }
232
233
libxsmm_diff_16(const void * a,const void * b,...)234 LIBXSMM_API unsigned char libxsmm_diff_16(const void* a, const void* b, ...)
235 {
236 #if defined(LIBXSMM_MEMORY_SW)
237 return internal_diff_sw(a, b, 16);
238 #else
239 LIBXSMM_DIFF_16_DECL(a16);
240 LIBXSMM_DIFF_16_LOAD(a16, a);
241 return LIBXSMM_DIFF_16(a16, b, 0/*dummy*/);
242 #endif
243 }
244
245
libxsmm_diff_32(const void * a,const void * b,...)246 LIBXSMM_API unsigned char libxsmm_diff_32(const void* a, const void* b, ...)
247 {
248 #if defined(LIBXSMM_MEMORY_SW)
249 return internal_diff_sw(a, b, 32);
250 #else
251 LIBXSMM_DIFF_32_DECL(a32);
252 LIBXSMM_DIFF_32_LOAD(a32, a);
253 return LIBXSMM_DIFF_32(a32, b, 0/*dummy*/);
254 #endif
255 }
256
257
libxsmm_diff_48(const void * a,const void * b,...)258 LIBXSMM_API unsigned char libxsmm_diff_48(const void* a, const void* b, ...)
259 {
260 #if defined(LIBXSMM_MEMORY_SW)
261 return internal_diff_sw(a, b, 48);
262 #else
263 LIBXSMM_DIFF_48_DECL(a48);
264 LIBXSMM_DIFF_48_LOAD(a48, a);
265 return LIBXSMM_DIFF_48(a48, b, 0/*dummy*/);
266 #endif
267 }
268
269
libxsmm_diff_64(const void * a,const void * b,...)270 LIBXSMM_API unsigned char libxsmm_diff_64(const void* a, const void* b, ...)
271 {
272 #if defined(LIBXSMM_MEMORY_SW)
273 return internal_diff_sw(a, b, 64);
274 #else
275 LIBXSMM_DIFF_64_DECL(a64);
276 LIBXSMM_DIFF_64_LOAD(a64, a);
277 return LIBXSMM_DIFF_64(a64, b, 0/*dummy*/);
278 #endif
279 }
280
281
libxsmm_diff(const void * a,const void * b,unsigned char size)282 LIBXSMM_API unsigned char libxsmm_diff(const void* a, const void* b, unsigned char size)
283 {
284 #if defined(LIBXSMM_MEMORY_SW) && !defined(LIBXSMM_MEMORY_STDLIB)
285 return internal_diff_sw(a, b, size);
286 #else
287 # if defined(LIBXSMM_MEMORY_STDLIB)
288 return 0 != memcmp(a, b, size);
289 # elif (LIBXSMM_X86_AVX512 <= LIBXSMM_STATIC_TARGET_ARCH) && defined(LIBXSMM_DIFF_AVX512_ENABLED)
290 return internal_diff_avx512(a, b, size);
291 # elif (LIBXSMM_X86_AVX2 <= LIBXSMM_STATIC_TARGET_ARCH)
292 return internal_diff_avx2(a, b, size);
293 # elif (LIBXSMM_X86_SSE3 <= LIBXSMM_STATIC_TARGET_ARCH)
294 # if (LIBXSMM_X86_AVX2 > LIBXSMM_MAX_STATIC_TARGET_ARCH)
295 return internal_diff_sse3(a, b, size);
296 # else /* pointer based function call */
297 # if defined(LIBXSMM_INIT_COMPLETED)
298 LIBXSMM_ASSERT(NULL != internal_diff_function);
299 return internal_diff_function(a, b, size);
300 # else
301 return (unsigned char)(NULL != internal_diff_function
302 ? internal_diff_function(a, b, size)
303 : internal_diff_sse3(a, b, size));
304 # endif
305 # endif
306 # else
307 return internal_diff_sw(a, b, size);
308 # endif
309 #endif
310 }
311
312
libxsmm_diff_n(const void * a,const void * bn,unsigned char size,unsigned char stride,unsigned int hint,unsigned int n)313 LIBXSMM_API unsigned int libxsmm_diff_n(const void* a, const void* bn, unsigned char size,
314 unsigned char stride, unsigned int hint, unsigned int n)
315 {
316 unsigned int result;
317 LIBXSMM_ASSERT(size <= stride);
318 #if defined(LIBXSMM_MEMORY_STDLIB) && !defined(LIBXSMM_MEMORY_SW)
319 LIBXSMM_DIFF_N(unsigned int, result, memcmp, a, bn, size, stride, hint, n);
320 #else
321 # if !defined(LIBXSMM_MEMORY_SW)
322 switch (size) {
323 case 64: {
324 LIBXSMM_DIFF_64_DECL(a64);
325 LIBXSMM_DIFF_64_LOAD(a64, a);
326 LIBXSMM_DIFF_N(unsigned int, result, LIBXSMM_DIFF_64, a64, bn, size, stride, hint, n);
327 } break;
328 case 48: {
329 LIBXSMM_DIFF_48_DECL(a48);
330 LIBXSMM_DIFF_48_LOAD(a48, a);
331 LIBXSMM_DIFF_N(unsigned int, result, LIBXSMM_DIFF_48, a48, bn, size, stride, hint, n);
332 } break;
333 case 32: {
334 LIBXSMM_DIFF_32_DECL(a32);
335 LIBXSMM_DIFF_32_LOAD(a32, a);
336 LIBXSMM_DIFF_N(unsigned int, result, LIBXSMM_DIFF_32, a32, bn, size, stride, hint, n);
337 } break;
338 case 16: {
339 LIBXSMM_DIFF_16_DECL(a16);
340 LIBXSMM_DIFF_16_LOAD(a16, a);
341 LIBXSMM_DIFF_N(unsigned int, result, LIBXSMM_DIFF_16, a16, bn, size, stride, hint, n);
342 } break;
343 default:
344 # endif
345 {
346 LIBXSMM_DIFF_N(unsigned int, result, libxsmm_diff, a, bn, size, stride, hint, n);
347 }
348 # if !defined(LIBXSMM_MEMORY_SW)
349 }
350 # endif
351 #endif
352 return result;
353 }
354
355
libxsmm_memcmp(const void * a,const void * b,size_t size)356 LIBXSMM_API int libxsmm_memcmp(const void* a, const void* b, size_t size)
357 {
358 #if defined(LIBXSMM_MEMORY_SW) && !defined(LIBXSMM_MEMORY_STDLIB)
359 return internal_memcmp_sw(a, b, size);
360 #else
361 # if defined(LIBXSMM_MEMORY_STDLIB)
362 return memcmp(a, b, size);
363 # elif (LIBXSMM_X86_AVX512 <= LIBXSMM_STATIC_TARGET_ARCH) && defined(LIBXSMM_DIFF_AVX512_ENABLED)
364 return internal_memcmp_avx512(a, b, size);
365 # elif (LIBXSMM_X86_AVX2 <= LIBXSMM_STATIC_TARGET_ARCH)
366 return internal_memcmp_avx2(a, b, size);
367 # elif (LIBXSMM_X86_SSE3 <= LIBXSMM_STATIC_TARGET_ARCH)
368 # if (LIBXSMM_X86_AVX2 > LIBXSMM_MAX_STATIC_TARGET_ARCH)
369 return internal_memcmp_sse3(a, b, size);
370 # else /* pointer based function call */
371 # if defined(LIBXSMM_INIT_COMPLETED)
372 LIBXSMM_ASSERT(NULL != internal_memcmp_function);
373 return internal_memcmp_function(a, b, size);
374 # else
375 return NULL != internal_memcmp_function
376 ? internal_memcmp_function(a, b, size)
377 : internal_memcmp_sse3(a, b, size);
378 # endif
379 # endif
380 # else
381 return internal_memcmp_sw(a, b, size);
382 # endif
383 #endif
384 }
385
386
libxsmm_hash(const void * data,unsigned int size,unsigned int seed)387 LIBXSMM_API unsigned int libxsmm_hash(const void* data, unsigned int size, unsigned int seed)
388 {
389 LIBXSMM_INIT
390 return libxsmm_crc32(seed, data, size);
391 }
392
393
libxsmm_hash_string(const char * string)394 LIBXSMM_API unsigned long long libxsmm_hash_string(const char* string)
395 {
396 unsigned long long result;
397 const size_t length = NULL != string ? strlen(string) : 0;
398 if (sizeof(result) < length) {
399 const size_t length2 = length / 2;
400 unsigned int seed32 = 0; /* seed=0: match else-optimization */
401 LIBXSMM_INIT
402 seed32 = libxsmm_crc32(seed32, string, length2);
403 result = libxsmm_crc32(seed32, string + length2, length - length2);
404 result = (result << 32) | seed32;
405 }
406 else { /* reinterpret directly as hash value */
407 char *const s = (char*)&result; signed char i;
408 for (i = 0; i < (signed char)length; ++i) s[i] = string[i];
409 for (; i < (signed char)sizeof(result); ++i) s[i] = 0;
410 }
411 return result;
412 }
413
414
415 #if defined(LIBXSMM_BUILD) && (!defined(LIBXSMM_NOFORTRAN) || defined(__clang_analyzer__))
416
417 /* implementation provided for Fortran 77 compatibility */
418 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xhash)(int* /*hash_seed*/, const void* /*data*/, const int* /*size*/);
LIBXSMM_FSYMBOL(libxsmm_xhash)419 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xhash)(int* hash_seed, const void* data, const int* size)
420 {
421 #if !defined(NDEBUG)
422 static int error_once = 0;
423 if (NULL != hash_seed && NULL != data && NULL != size && 0 <= *size)
424 #endif
425 {
426 *hash_seed = (int)(libxsmm_hash(data, (unsigned int)*size, (unsigned int)*hash_seed) & 0x7FFFFFFF/*sign-bit*/);
427 }
428 #if !defined(NDEBUG)
429 else if (0 != libxsmm_verbosity /* library code is expected to be mute */
430 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
431 {
432 fprintf(stderr, "LIBXSMM ERROR: invalid arguments for libxsmm_xhash specified!\n");
433 }
434 #endif
435 }
436
437
438 /* implementation provided for Fortran 77 compatibility */
439 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xdiff)(int* /*result*/, const void* /*a*/, const void* /*b*/, const long long* /*size*/);
LIBXSMM_FSYMBOL(libxsmm_xdiff)440 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xdiff)(int* result, const void* a, const void* b, const long long* size)
441 {
442 #if !defined(NDEBUG)
443 static int error_once = 0;
444 if (NULL != result && NULL != a && NULL != b && NULL != size && 0 <= *size)
445 #endif
446 {
447 *result = libxsmm_memcmp(a, b, (size_t)*size);
448 }
449 #if !defined(NDEBUG)
450 else if (0 != libxsmm_verbosity /* library code is expected to be mute */
451 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
452 {
453 fprintf(stderr, "LIBXSMM ERROR: invalid arguments for libxsmm_xdiff specified!\n");
454 }
455 #endif
456 }
457
458
459 /* implementation provided for Fortran 77 compatibility */
460 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xclear)(void* /*dst*/, const int* /*size*/);
LIBXSMM_FSYMBOL(libxsmm_xclear)461 LIBXSMM_API void LIBXSMM_FSYMBOL(libxsmm_xclear)(void* dst, const int* size)
462 {
463 #if !defined(NDEBUG)
464 static int error_once = 0;
465 if (NULL != dst && NULL != size && 0 <= *size && 128 > *size)
466 #endif
467 {
468 LIBXSMM_MEMSET127(dst, 0, *size);
469 }
470 #if !defined(NDEBUG)
471 else if (0 != libxsmm_verbosity /* library code is expected to be mute */
472 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
473 {
474 fprintf(stderr, "LIBXSMM ERROR: invalid arguments for libxsmm_xclear specified!\n");
475 }
476 #endif
477 }
478
479 #endif /*defined(LIBXSMM_BUILD) && (!defined(LIBXSMM_NOFORTRAN) || defined(__clang_analyzer__))*/
480
481