1 /****************************************************************************** 2 * Copyright (c) Intel Corporation - All rights reserved. * 3 * This file is part of the LIBXSMM library. * 4 * * 5 * For information on the license, see the LICENSE file. * 6 * Further information: https://github.com/hfp/libxsmm/ * 7 * SPDX-License-Identifier: BSD-3-Clause * 8 ******************************************************************************/ 9 /* Hans Pabst (Intel Corp.) 10 ******************************************************************************/ 11 #ifndef LIBXSMM_DIFF_H 12 #define LIBXSMM_DIFF_H 13 14 #include <libxsmm_intrinsics_x86.h> 15 16 #if !defined(LIBXSMM_DIFF_AVX512_ENABLED) && 0 17 # define LIBXSMM_DIFF_AVX512_ENABLED 18 #endif 19 20 #define LIBXSMM_DIFF_SSE3_DECL(A) __m128i A 21 #define LIBXSMM_DIFF_SSE3_ASSIGN(A, B) (A) = (B) 22 #define LIBXSMM_DIFF_SSE3_LOAD(A, SRC) A = LIBXSMM_INTRINSICS_LDDQU_SI128((const __m128i*)(SRC)) 23 #define LIBXSMM_DIFF_SSE3(A, B, ...) ((unsigned char)(0xFFFF != _mm_movemask_epi8(_mm_cmpeq_epi8( \ 24 A, LIBXSMM_INTRINSICS_LDDQU_SI128((const __m128i*)(B)))))) 25 26 #if (LIBXSMM_X86_SSE3 <= LIBXSMM_STATIC_TARGET_ARCH) /*|| defined(LIBXSMM_INTRINSICS_TARGET)*/ 27 # define LIBXSMM_DIFF_16_DECL LIBXSMM_DIFF_SSE3_DECL 28 # define LIBXSMM_DIFF_16_ASSIGN LIBXSMM_DIFF_SSE3_ASSIGN 29 # define LIBXSMM_DIFF_16_LOAD LIBXSMM_DIFF_SSE3_LOAD 30 # define LIBXSMM_DIFF_16 LIBXSMM_DIFF_SSE3 31 #else 32 # define LIBXSMM_DIFF_16_DECL(A) const uint64_t */*const*/ A 33 # define LIBXSMM_DIFF_16_ASSIGN(A, B) (A) = (B) 34 # define LIBXSMM_DIFF_16_LOAD(A, SRC) A = (const uint64_t*)(SRC) 35 # define LIBXSMM_DIFF_16(A, B, ...) ((unsigned char)(0 != (((A)[0] ^ (*(const uint64_t*)(B))) | \ 36 ((A)[1] ^ ((const uint64_t*)(B))[1])))) 37 #endif 38 39 #define LIBXSMM_DIFF_AVX2_DECL(A) __m256i A 40 #define LIBXSMM_DIFF_AVX2_ASSIGN(A, B) (A) = (B) 41 #define LIBXSMM_DIFF_AVX2_LOAD(A, SRC) A = _mm256_loadu_si256((const __m256i*)(SRC)) 42 #define LIBXSMM_DIFF_AVX2(A, B, ...) ((unsigned char)(-1 != _mm256_movemask_epi8(_mm256_cmpeq_epi8( \ 43 A, _mm256_loadu_si256((const __m256i*)(B)))))) 44 45 #if (LIBXSMM_X86_AVX2 <= LIBXSMM_STATIC_TARGET_ARCH) 46 # define LIBXSMM_DIFF_32_DECL LIBXSMM_DIFF_AVX2_DECL 47 # define LIBXSMM_DIFF_32_ASSIGN LIBXSMM_DIFF_AVX2_ASSIGN 48 # define LIBXSMM_DIFF_32_LOAD LIBXSMM_DIFF_AVX2_LOAD 49 # define LIBXSMM_DIFF_32 LIBXSMM_DIFF_AVX2 50 #else 51 # define LIBXSMM_DIFF_32_DECL(A) LIBXSMM_DIFF_16_DECL(A); LIBXSMM_DIFF_16_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _)) 52 # define LIBXSMM_DIFF_32_ASSIGN(A, B) LIBXSMM_DIFF_16_ASSIGN(A, B); LIBXSMM_DIFF_16_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_32_, B, _)) 53 # define LIBXSMM_DIFF_32_LOAD(A, SRC) LIBXSMM_DIFF_16_LOAD(A, SRC); LIBXSMM_DIFF_16_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), (const uint64_t*)(SRC) + 2) 54 # define LIBXSMM_DIFF_32(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_16(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_16(LIBXSMM_CONCATENATE3(libxsmm_diff_32_, A, _), (const uint64_t*)(B) + 2, __VA_ARGS__))) 55 #endif 56 57 #define LIBXSMM_DIFF_48_DECL(A) LIBXSMM_DIFF_16_DECL(A); LIBXSMM_DIFF_32_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _)) 58 #define LIBXSMM_DIFF_48_ASSIGN(A, B) LIBXSMM_DIFF_16_ASSIGN(A, B); LIBXSMM_DIFF_32_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_48_, B, _)) 59 #define LIBXSMM_DIFF_48_LOAD(A, SRC) LIBXSMM_DIFF_16_LOAD(A, SRC); LIBXSMM_DIFF_32_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), (const uint64_t*)(SRC) + 2) 60 #define LIBXSMM_DIFF_48(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_16(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_32(LIBXSMM_CONCATENATE3(libxsmm_diff_48_, A, _), (const uint64_t*)(B) + 2, __VA_ARGS__))) 61 62 #define LIBXSMM_DIFF_64SW_DECL(A) LIBXSMM_DIFF_32_DECL(A); LIBXSMM_DIFF_32_DECL(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _)) 63 #define LIBXSMM_DIFF_64SW_ASSIGN(A, B) LIBXSMM_DIFF_32_ASSIGN(A, B); LIBXSMM_DIFF_32_ASSIGN(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), LIBXSMM_CONCATENATE3(libxsmm_diff_64_, B, _)) 64 #define LIBXSMM_DIFF_64SW_LOAD(A, SRC) LIBXSMM_DIFF_32_LOAD(A, SRC); LIBXSMM_DIFF_32_LOAD(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), (const uint64_t*)(SRC) + 4) 65 #define LIBXSMM_DIFF_64SW(A, B, ...) ((unsigned char)(0 != LIBXSMM_DIFF_32(A, B, __VA_ARGS__) ? 1 : LIBXSMM_DIFF_32(LIBXSMM_CONCATENATE3(libxsmm_diff_64_, A, _), (const uint64_t*)(B) + 4, __VA_ARGS__))) 66 67 #if defined(LIBXSMM_DIFF_AVX512_ENABLED) 68 # define LIBXSMM_DIFF_AVX512_DECL(A) __m512i A 69 # define LIBXSMM_DIFF_AVX512_ASSIGN(A, B) (A) = (B) 70 # define LIBXSMM_DIFF_AVX512_LOAD(A, SRC) A = _mm512_loadu_si512((const __m512i*)(SRC)) 71 # define LIBXSMM_DIFF_AVX512(A, B, ...) ((unsigned char)(0xFFFF != (unsigned int)/*_cvtmask16_u32*/(_mm512_cmpeq_epi32_mask( \ 72 A, _mm512_loadu_si512((const __m512i*)(B)))))) 73 #else 74 # define LIBXSMM_DIFF_AVX512_DECL LIBXSMM_DIFF_64SW_DECL 75 # define LIBXSMM_DIFF_AVX512_ASSIGN LIBXSMM_DIFF_64SW_ASSIGN 76 # define LIBXSMM_DIFF_AVX512_LOAD LIBXSMM_DIFF_64SW_LOAD 77 # define LIBXSMM_DIFF_AVX512 LIBXSMM_DIFF_64SW 78 #endif 79 80 #if (LIBXSMM_X86_AVX512 <= LIBXSMM_STATIC_TARGET_ARCH) 81 # define LIBXSMM_DIFF_64_DECL LIBXSMM_DIFF_AVX512_DECL 82 # define LIBXSMM_DIFF_64_ASSIGN LIBXSMM_DIFF_AVX512_ASSIGN 83 # define LIBXSMM_DIFF_64_LOAD LIBXSMM_DIFF_AVX512_LOAD 84 # define LIBXSMM_DIFF_64 LIBXSMM_DIFF_AVX512 85 #else 86 # define LIBXSMM_DIFF_64_DECL LIBXSMM_DIFF_64SW_DECL 87 # define LIBXSMM_DIFF_64_ASSIGN LIBXSMM_DIFF_64SW_ASSIGN 88 # define LIBXSMM_DIFF_64_LOAD LIBXSMM_DIFF_64SW_LOAD 89 # define LIBXSMM_DIFF_64 LIBXSMM_DIFF_64SW 90 #endif 91 92 #define LIBXSMM_DIFF_DECL(N, A) LIBXSMM_CONCATENATE3(LIBXSMM_DIFF_, N, _DECL)(A) 93 #define LIBXSMM_DIFF_LOAD(N, A, SRC) LIBXSMM_CONCATENATE3(LIBXSMM_DIFF_, N, _LOAD)(A, SRC) 94 #define LIBXSMM_DIFF(N) LIBXSMM_CONCATENATE(LIBXSMM_DIFF_, N) 95 96 #define LIBXSMM_DIFF_N(TYPE, RESULT, DIFF, A, BN, ELEMSIZE, STRIDE, HINT, N) { \ 97 const char* libxsmm_diff_b_ = (const char*)(BN) + (size_t)(HINT) * (STRIDE); \ 98 for (RESULT = (HINT); (RESULT) < (N); ++(RESULT)) { \ 99 if (0 == DIFF(A, libxsmm_diff_b_, ELEMSIZE)) break; \ 100 libxsmm_diff_b_ += (STRIDE); \ 101 } \ 102 if ((N) == (RESULT)) { /* wrong hint */ \ 103 TYPE libxsmm_diff_r_ = 0; \ 104 libxsmm_diff_b_ = (const char*)(BN); /* reset */ \ 105 for (; libxsmm_diff_r_ < (HINT); ++libxsmm_diff_r_) { \ 106 if (0 == DIFF(A, libxsmm_diff_b_, ELEMSIZE)) { \ 107 RESULT = libxsmm_diff_r_; \ 108 break; \ 109 } \ 110 libxsmm_diff_b_ += (STRIDE); \ 111 } \ 112 } \ 113 } 114 115 116 /** Function type representing the diff-functionality. */ 117 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE unsigned int (*libxsmm_diff_function)( 118 const void* /*a*/, const void* /*b*/, ... /*size*/); 119 120 /** Compare two data blocks of 16 Byte each. */ 121 LIBXSMM_API unsigned char libxsmm_diff_16(const void* a, const void* b, ...); 122 /** Compare two data blocks of 32 Byte each. */ 123 LIBXSMM_API unsigned char libxsmm_diff_32(const void* a, const void* b, ...); 124 /** Compare two data blocks of 48 Byte each. */ 125 LIBXSMM_API unsigned char libxsmm_diff_48(const void* a, const void* b, ...); 126 /** Compare two data blocks of 64 Byte each. */ 127 LIBXSMM_API unsigned char libxsmm_diff_64(const void* a, const void* b, ...); 128 129 #endif /*LIBXSMM_DIFF_H*/ 130 131