1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Hans Pabst (Intel Corp.)
10 ******************************************************************************/
11 #ifndef LIBXSMM_DIFF_H
12 #define LIBXSMM_DIFF_H
13 
14 #include <libxsmm_intrinsics_x86.h>
15 
16 #if !defined(LIBXSMM_DIFF_AVX512_ENABLED) && 0
17 # define LIBXSMM_DIFF_AVX512_ENABLED
18 #endif
19 
20 #define LIBXSMM_DIFF_SSE3_DECL(A) __m128i A
21 #define LIBXSMM_DIFF_SSE3_ASSIGN(A, B) (A) = (B)
22 #define LIBXSMM_DIFF_SSE3_LOAD(A, SRC) A = LIBXSMM_INTRINSICS_LDDQU_SI128((const __m128i*)(SRC))
23 #define LIBXSMM_DIFF_SSE3(A, B, ...) ((unsigned char)(0xFFFF != _mm_movemask_epi8(_mm_cmpeq_epi8( \
24   A, LIBXSMM_INTRINSICS_LDDQU_SI128((const __m128i*)(B))))))
25 
26 #if (LIBXSMM_X86_SSE3 <= LIBXSMM_STATIC_TARGET_ARCH) /*|| defined(LIBXSMM_INTRINSICS_TARGET)*/
27 # define LIBXSMM_DIFF_16_DECL LIBXSMM_DIFF_SSE3_DECL
28 # define LIBXSMM_DIFF_16_ASSIGN LIBXSMM_DIFF_SSE3_ASSIGN
29 # define LIBXSMM_DIFF_16_LOAD LIBXSMM_DIFF_SSE3_LOAD
30 # define LIBXSMM_DIFF_16 LIBXSMM_DIFF_SSE3
31 #else
32 # define LIBXSMM_DIFF_16_DECL(A) const uint64_t */*const*/ A
33 # define LIBXSMM_DIFF_16_ASSIGN(A, B) (A) = (B)
34 # define LIBXSMM_DIFF_16_LOAD(A, SRC) A = (const uint64_t*)(SRC)
35 # define LIBXSMM_DIFF_16(A, B, ...) ((unsigned char)(0 != (((A)[0] ^ (*(const uint64_t*)(B))) | \
36     ((A)[1] ^ ((const uint64_t*)(B))[1]))))
37 #endif
38 
39 #define LIBXSMM_DIFF_AVX2_DECL(A) __m256i A
40 #define LIBXSMM_DIFF_AVX2_ASSIGN(A, B) (A) = (B)
41 #define LIBXSMM_DIFF_AVX2_LOAD(A, SRC) A = _mm256_loadu_si256((const __m256i*)(SRC))
42 #define LIBXSMM_DIFF_AVX2(A, B, ...) ((unsigned char)(-1 != _mm256_movemask_epi8(_mm256_cmpeq_epi8( \
43   A, _mm256_loadu_si256((const __m256i*)(B))))))
44 
45 #if (LIBXSMM_X86_AVX2 <= LIBXSMM_STATIC_TARGET_ARCH)
46 # define LIBXSMM_DIFF_32_DECL LIBXSMM_DIFF_AVX2_DECL
47 # define LIBXSMM_DIFF_32_ASSIGN LIBXSMM_DIFF_AVX2_ASSIGN
48 # define LIBXSMM_DIFF_32_LOAD LIBXSMM_DIFF_AVX2_LOAD
49 # define LIBXSMM_DIFF_32 LIBXSMM_DIFF_AVX2
50 #else
51 # define LIBXSMM_DIFF_32_DECL(A) LIBXSMM_DIFF_16_DECL(A); LIBXSMM_DIFF_16_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _))
52 # define LIBXSMM_DIFF_32_ASSIGN(A, B) LIBXSMM_DIFF_16_ASSIGN(A, B); LIBXSMM_DIFF_16_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_32_, B, _))
53 # define LIBXSMM_DIFF_32_LOAD(A, SRC) LIBXSMM_DIFF_16_LOAD(A, SRC); LIBXSMM_DIFF_16_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), (const uint64_t*)(SRC) + 2)
54 # define LIBXSMM_DIFF_32(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_16(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_16(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), (const uint64_t*)(B) + 2, __VA_ARGS__)))
55 #endif
56 
57 #define LIBXSMM_DIFF_48_DECL(A) LIBXSMM_DIFF_16_DECL(A); LIBXSMM_DIFF_32_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _))
58 #define LIBXSMM_DIFF_48_ASSIGN(A, B) LIBXSMM_DIFF_16_ASSIGN(A, B); LIBXSMM_DIFF_32_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_48_, B, _))
59 #define LIBXSMM_DIFF_48_LOAD(A, SRC) LIBXSMM_DIFF_16_LOAD(A, SRC); LIBXSMM_DIFF_32_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), (const uint64_t*)(SRC) + 2)
60 #define LIBXSMM_DIFF_48(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_16(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_32(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), (const uint64_t*)(B) + 2, __VA_ARGS__)))
61 
62 #define LIBXSMM_DIFF_64SW_DECL(A) LIBXSMM_DIFF_32_DECL(A); LIBXSMM_DIFF_32_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _))
63 #define LIBXSMM_DIFF_64SW_ASSIGN(A, B) LIBXSMM_DIFF_32_ASSIGN(A, B); LIBXSMM_DIFF_32_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_64_, B, _))
64 #define LIBXSMM_DIFF_64SW_LOAD(A, SRC) LIBXSMM_DIFF_32_LOAD(A, SRC); LIBXSMM_DIFF_32_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), (const uint64_t*)(SRC) + 4)
65 #define LIBXSMM_DIFF_64SW(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_32(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_32(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), (const uint64_t*)(B) + 4, __VA_ARGS__)))
66 
67 #if defined(LIBXSMM_DIFF_AVX512_ENABLED)
68 # define LIBXSMM_DIFF_AVX512_DECL(A) __m512i A
69 # define LIBXSMM_DIFF_AVX512_ASSIGN(A, B) (A) = (B)
70 # define LIBXSMM_DIFF_AVX512_LOAD(A, SRC) A = _mm512_loadu_si512((const __m512i*)(SRC))
71 # define LIBXSMM_DIFF_AVX512(A, B, ...) ((unsigned char)(0xFFFF != (unsigned int)/*_cvtmask16_u32*/(_mm512_cmpeq_epi32_mask( \
72     A, _mm512_loadu_si512((const __m512i*)(B))))))
73 #else
74 # define LIBXSMM_DIFF_AVX512_DECL LIBXSMM_DIFF_64SW_DECL
75 # define LIBXSMM_DIFF_AVX512_ASSIGN LIBXSMM_DIFF_64SW_ASSIGN
76 # define LIBXSMM_DIFF_AVX512_LOAD LIBXSMM_DIFF_64SW_LOAD
77 # define LIBXSMM_DIFF_AVX512 LIBXSMM_DIFF_64SW
78 #endif
79 
80 #if (LIBXSMM_X86_AVX512 <= LIBXSMM_STATIC_TARGET_ARCH)
81 # define LIBXSMM_DIFF_64_DECL LIBXSMM_DIFF_AVX512_DECL
82 # define LIBXSMM_DIFF_64_ASSIGN LIBXSMM_DIFF_AVX512_ASSIGN
83 # define LIBXSMM_DIFF_64_LOAD LIBXSMM_DIFF_AVX512_LOAD
84 # define LIBXSMM_DIFF_64 LIBXSMM_DIFF_AVX512
85 #else
86 # define LIBXSMM_DIFF_64_DECL LIBXSMM_DIFF_64SW_DECL
87 # define LIBXSMM_DIFF_64_ASSIGN LIBXSMM_DIFF_64SW_ASSIGN
88 # define LIBXSMM_DIFF_64_LOAD LIBXSMM_DIFF_64SW_LOAD
89 # define LIBXSMM_DIFF_64 LIBXSMM_DIFF_64SW
90 #endif
91 
92 #define LIBXSMM_DIFF_DECL(N, A) LIBXSMM_CONCATENATE3(LIBXSMM_DIFF_, N, _DECL)(A)
93 #define LIBXSMM_DIFF_LOAD(N, A, SRC) LIBXSMM_CONCATENATE3(LIBXSMM_DIFF_, N, _LOAD)(A, SRC)
94 #define LIBXSMM_DIFF(N) LIBXSMM_CONCATENATE(LIBXSMM_DIFF_, N)
95 
96 #define LIBXSMM_DIFF_N(TYPE, RESULT, DIFF, A, BN, ELEMSIZE, STRIDE, HINT, N) { \
97   const char* libxsmm_diff_b_ = (const char*)(BN) + (size_t)(HINT) * (STRIDE); \
98   for (RESULT = (HINT); (RESULT) < (N); ++(RESULT)) { \
99     if (0 == DIFF(A, libxsmm_diff_b_, ELEMSIZE)) break; \
100     libxsmm_diff_b_ += (STRIDE); \
101   } \
102   if ((N) == (RESULT)) { /* wrong hint */ \
103     TYPE libxsmm_diff_r_ = 0; \
104     libxsmm_diff_b_ = (const char*)(BN); /* reset */ \
105     for (; libxsmm_diff_r_ < (HINT); ++libxsmm_diff_r_) { \
106       if (0 == DIFF(A, libxsmm_diff_b_, ELEMSIZE)) { \
107         RESULT = libxsmm_diff_r_; \
108         break; \
109       } \
110       libxsmm_diff_b_ += (STRIDE); \
111     } \
112   } \
113 }
114 
115 
116 /** Function type representing the diff-functionality. */
117 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE unsigned int (*libxsmm_diff_function)(
118   const void* /*a*/, const void* /*b*/, ... /*size*/);
119 
120 /** Compare two data blocks of 16 Byte each. */
121 LIBXSMM_API unsigned char libxsmm_diff_16(const void* a, const void* b, ...);
122 /** Compare two data blocks of 32 Byte each. */
123 LIBXSMM_API unsigned char libxsmm_diff_32(const void* a, const void* b, ...);
124 /** Compare two data blocks of 48 Byte each. */
125 LIBXSMM_API unsigned char libxsmm_diff_48(const void* a, const void* b, ...);
126 /** Compare two data blocks of 64 Byte each. */
127 LIBXSMM_API unsigned char libxsmm_diff_64(const void* a, const void* b, ...);
128 
129 #endif /*LIBXSMM_DIFF_H*/
130 
131