1*7d565a4fSMatthew Dillon /* 2*7d565a4fSMatthew Dillon xxHash - Extremely Fast Hash algorithm 3*7d565a4fSMatthew Dillon Header File 4*7d565a4fSMatthew Dillon Copyright (C) 2012-2016, Yann Collet. 5*7d565a4fSMatthew Dillon 6*7d565a4fSMatthew Dillon BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7*7d565a4fSMatthew Dillon 8*7d565a4fSMatthew Dillon Redistribution and use in source and binary forms, with or without 9*7d565a4fSMatthew Dillon modification, are permitted provided that the following conditions are 10*7d565a4fSMatthew Dillon met: 11*7d565a4fSMatthew Dillon 12*7d565a4fSMatthew Dillon * Redistributions of source code must retain the above copyright 13*7d565a4fSMatthew Dillon notice, this list of conditions and the following disclaimer. 14*7d565a4fSMatthew Dillon * Redistributions in binary form must reproduce the above 15*7d565a4fSMatthew Dillon copyright notice, this list of conditions and the following disclaimer 16*7d565a4fSMatthew Dillon in the documentation and/or other materials provided with the 17*7d565a4fSMatthew Dillon distribution. 18*7d565a4fSMatthew Dillon 19*7d565a4fSMatthew Dillon THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20*7d565a4fSMatthew Dillon "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21*7d565a4fSMatthew Dillon LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22*7d565a4fSMatthew Dillon A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23*7d565a4fSMatthew Dillon OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24*7d565a4fSMatthew Dillon SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25*7d565a4fSMatthew Dillon LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*7d565a4fSMatthew Dillon DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*7d565a4fSMatthew Dillon THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*7d565a4fSMatthew Dillon (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29*7d565a4fSMatthew Dillon OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*7d565a4fSMatthew Dillon 31*7d565a4fSMatthew Dillon You can contact the author at : 32*7d565a4fSMatthew Dillon - xxHash source repository : https://github.com/Cyan4973/xxHash 33*7d565a4fSMatthew Dillon */ 34*7d565a4fSMatthew Dillon 35*7d565a4fSMatthew Dillon /* Notice extracted from xxHash homepage : 36*7d565a4fSMatthew Dillon 37*7d565a4fSMatthew Dillon xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 38*7d565a4fSMatthew Dillon It also successfully passes all tests from the SMHasher suite. 39*7d565a4fSMatthew Dillon 40*7d565a4fSMatthew Dillon Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 41*7d565a4fSMatthew Dillon 42*7d565a4fSMatthew Dillon Name Speed Q.Score Author 43*7d565a4fSMatthew Dillon xxHash 5.4 GB/s 10 44*7d565a4fSMatthew Dillon CrapWow 3.2 GB/s 2 Andrew 45*7d565a4fSMatthew Dillon MumurHash 3a 2.7 GB/s 10 Austin Appleby 46*7d565a4fSMatthew Dillon SpookyHash 2.0 GB/s 10 Bob Jenkins 47*7d565a4fSMatthew Dillon SBox 1.4 GB/s 9 Bret Mulvey 48*7d565a4fSMatthew Dillon Lookup3 1.2 GB/s 9 Bob Jenkins 49*7d565a4fSMatthew Dillon SuperFastHash 1.2 GB/s 1 Paul Hsieh 50*7d565a4fSMatthew Dillon CityHash64 1.05 GB/s 10 Pike & Alakuijala 51*7d565a4fSMatthew Dillon FNV 0.55 GB/s 5 Fowler, Noll, Vo 52*7d565a4fSMatthew Dillon CRC32 0.43 GB/s 9 53*7d565a4fSMatthew Dillon MD5-32 0.33 GB/s 10 Ronald L. Rivest 54*7d565a4fSMatthew Dillon SHA1-32 0.28 GB/s 10 55*7d565a4fSMatthew Dillon 56*7d565a4fSMatthew Dillon Q.Score is a measure of quality of the hash function. 57*7d565a4fSMatthew Dillon It depends on successfully passing SMHasher test set. 58*7d565a4fSMatthew Dillon 10 is a perfect score. 59*7d565a4fSMatthew Dillon 60*7d565a4fSMatthew Dillon A 64-bits version, named XXH64, is available since r35. 61*7d565a4fSMatthew Dillon It offers much better speed, but for 64-bits applications only. 62*7d565a4fSMatthew Dillon Name Speed on 64 bits Speed on 32 bits 63*7d565a4fSMatthew Dillon XXH64 13.8 GB/s 1.9 GB/s 64*7d565a4fSMatthew Dillon XXH32 6.8 GB/s 6.0 GB/s 65*7d565a4fSMatthew Dillon */ 66*7d565a4fSMatthew Dillon 67*7d565a4fSMatthew Dillon #ifndef XXHASH_H_5627135585666179 68*7d565a4fSMatthew Dillon #define XXHASH_H_5627135585666179 1 69*7d565a4fSMatthew Dillon 70*7d565a4fSMatthew Dillon #if defined (__cplusplus) 71*7d565a4fSMatthew Dillon extern "C" { 72*7d565a4fSMatthew Dillon #endif 73*7d565a4fSMatthew Dillon 74*7d565a4fSMatthew Dillon 75*7d565a4fSMatthew Dillon /* **************************** 76*7d565a4fSMatthew Dillon * Definitions 77*7d565a4fSMatthew Dillon ******************************/ 78*7d565a4fSMatthew Dillon #if !defined(_KERNEL) 79*7d565a4fSMatthew Dillon #include <stddef.h> /* size_t */ 80*7d565a4fSMatthew Dillon #endif 81*7d565a4fSMatthew Dillon typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 82*7d565a4fSMatthew Dillon 83*7d565a4fSMatthew Dillon 84*7d565a4fSMatthew Dillon /* **************************** 85*7d565a4fSMatthew Dillon * API modifier 86*7d565a4fSMatthew Dillon ******************************/ 87*7d565a4fSMatthew Dillon /*!XXH_PRIVATE_API 88*7d565a4fSMatthew Dillon * Transforms all publics symbols within `xxhash.c` into private ones. 89*7d565a4fSMatthew Dillon * Methodology : 90*7d565a4fSMatthew Dillon * instead of : #include "xxhash.h" 91*7d565a4fSMatthew Dillon * do : 92*7d565a4fSMatthew Dillon * #define XXH_PRIVATE_API 93*7d565a4fSMatthew Dillon * #include "xxhash.c" // note the .c , instead of .h 94*7d565a4fSMatthew Dillon * also : don't compile and link xxhash.c separately 95*7d565a4fSMatthew Dillon */ 96*7d565a4fSMatthew Dillon #ifdef XXH_PRIVATE_API 97*7d565a4fSMatthew Dillon # if defined(__GNUC__) 98*7d565a4fSMatthew Dillon # define XXH_PUBLIC_API static __attribute__((unused)) 99*7d565a4fSMatthew Dillon # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 100*7d565a4fSMatthew Dillon # define XXH_PUBLIC_API static inline 101*7d565a4fSMatthew Dillon # elif defined(_MSC_VER) 102*7d565a4fSMatthew Dillon # define XXH_PUBLIC_API static __inline 103*7d565a4fSMatthew Dillon # else 104*7d565a4fSMatthew Dillon # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 105*7d565a4fSMatthew Dillon # endif 106*7d565a4fSMatthew Dillon #else 107*7d565a4fSMatthew Dillon # define XXH_PUBLIC_API /* do nothing */ 108*7d565a4fSMatthew Dillon #endif 109*7d565a4fSMatthew Dillon 110*7d565a4fSMatthew Dillon /*!XXH_NAMESPACE, aka Namespace Emulation : 111*7d565a4fSMatthew Dillon 112*7d565a4fSMatthew Dillon If you want to include _and expose_ xxHash functions from within your own library, 113*7d565a4fSMatthew Dillon but also want to avoid symbol collisions with another library which also includes xxHash, 114*7d565a4fSMatthew Dillon 115*7d565a4fSMatthew Dillon you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c` 116*7d565a4fSMatthew Dillon with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 117*7d565a4fSMatthew Dillon 118*7d565a4fSMatthew Dillon Note that no change is required within the calling program as long as it also includes `xxhash.h` : 119*7d565a4fSMatthew Dillon regular symbol name will be automatically translated by this header. 120*7d565a4fSMatthew Dillon */ 121*7d565a4fSMatthew Dillon #ifdef XXH_NAMESPACE 122*7d565a4fSMatthew Dillon # define XXH_CAT(A,B) A##B 123*7d565a4fSMatthew Dillon # define XXH_NAME2(A,B) XXH_CAT(A,B) 124*7d565a4fSMatthew Dillon # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 125*7d565a4fSMatthew Dillon # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 126*7d565a4fSMatthew Dillon # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 127*7d565a4fSMatthew Dillon # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 128*7d565a4fSMatthew Dillon # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 129*7d565a4fSMatthew Dillon # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 130*7d565a4fSMatthew Dillon # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 131*7d565a4fSMatthew Dillon # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 132*7d565a4fSMatthew Dillon # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 133*7d565a4fSMatthew Dillon # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 134*7d565a4fSMatthew Dillon # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 135*7d565a4fSMatthew Dillon # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 136*7d565a4fSMatthew Dillon # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 137*7d565a4fSMatthew Dillon #endif 138*7d565a4fSMatthew Dillon 139*7d565a4fSMatthew Dillon 140*7d565a4fSMatthew Dillon /* ************************************* 141*7d565a4fSMatthew Dillon * Version 142*7d565a4fSMatthew Dillon ***************************************/ 143*7d565a4fSMatthew Dillon #define XXH_VERSION_MAJOR 0 144*7d565a4fSMatthew Dillon #define XXH_VERSION_MINOR 6 145*7d565a4fSMatthew Dillon #define XXH_VERSION_RELEASE 0 146*7d565a4fSMatthew Dillon #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 147*7d565a4fSMatthew Dillon XXH_PUBLIC_API unsigned XXH_versionNumber (void); 148*7d565a4fSMatthew Dillon 149*7d565a4fSMatthew Dillon 150*7d565a4fSMatthew Dillon /* **************************** 151*7d565a4fSMatthew Dillon * Simple Hash Functions 152*7d565a4fSMatthew Dillon ******************************/ 153*7d565a4fSMatthew Dillon typedef unsigned int XXH32_hash_t; 154*7d565a4fSMatthew Dillon typedef unsigned long long XXH64_hash_t; 155*7d565a4fSMatthew Dillon 156*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 157*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 158*7d565a4fSMatthew Dillon 159*7d565a4fSMatthew Dillon /*! 160*7d565a4fSMatthew Dillon XXH32() : 161*7d565a4fSMatthew Dillon Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 162*7d565a4fSMatthew Dillon The memory between input & input+length must be valid (allocated and read-accessible). 163*7d565a4fSMatthew Dillon "seed" can be used to alter the result predictably. 164*7d565a4fSMatthew Dillon Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 165*7d565a4fSMatthew Dillon XXH64() : 166*7d565a4fSMatthew Dillon Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 167*7d565a4fSMatthew Dillon "seed" can be used to alter the result predictably. 168*7d565a4fSMatthew Dillon This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 169*7d565a4fSMatthew Dillon */ 170*7d565a4fSMatthew Dillon 171*7d565a4fSMatthew Dillon 172*7d565a4fSMatthew Dillon /* **************************** 173*7d565a4fSMatthew Dillon * Streaming Hash Functions 174*7d565a4fSMatthew Dillon ******************************/ 175*7d565a4fSMatthew Dillon typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 176*7d565a4fSMatthew Dillon typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 177*7d565a4fSMatthew Dillon 178*7d565a4fSMatthew Dillon /*! Dynamic allocation of states 179*7d565a4fSMatthew Dillon Compatible with dynamic libraries */ 180*7d565a4fSMatthew Dillon 181*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 182*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 183*7d565a4fSMatthew Dillon 184*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 185*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 186*7d565a4fSMatthew Dillon 187*7d565a4fSMatthew Dillon 188*7d565a4fSMatthew Dillon /* hash streaming */ 189*7d565a4fSMatthew Dillon 190*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 191*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 192*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 193*7d565a4fSMatthew Dillon 194*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 195*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 196*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 197*7d565a4fSMatthew Dillon 198*7d565a4fSMatthew Dillon /*! 199*7d565a4fSMatthew Dillon These functions generate the xxHash of an input provided in multiple segments, 200*7d565a4fSMatthew Dillon as opposed to provided as a single block. 201*7d565a4fSMatthew Dillon 202*7d565a4fSMatthew Dillon XXH state must first be allocated, using either static or dynamic method provided above. 203*7d565a4fSMatthew Dillon 204*7d565a4fSMatthew Dillon Start a new hash by initializing state with a seed, using XXHnn_reset(). 205*7d565a4fSMatthew Dillon 206*7d565a4fSMatthew Dillon Then, feed the hash state by calling XXHnn_update() as many times as necessary. 207*7d565a4fSMatthew Dillon Obviously, input must be valid, hence allocated and read accessible. 208*7d565a4fSMatthew Dillon The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 209*7d565a4fSMatthew Dillon 210*7d565a4fSMatthew Dillon Finally, a hash value can be produced anytime, by using XXHnn_digest(). 211*7d565a4fSMatthew Dillon This function returns the nn-bits hash as an int or long long. 212*7d565a4fSMatthew Dillon 213*7d565a4fSMatthew Dillon It's still possible to continue inserting input into the hash state after a digest, 214*7d565a4fSMatthew Dillon and later on generate some new hashes, by calling again XXHnn_digest(). 215*7d565a4fSMatthew Dillon 216*7d565a4fSMatthew Dillon When done, free XXH state space if it was allocated dynamically. 217*7d565a4fSMatthew Dillon */ 218*7d565a4fSMatthew Dillon 219*7d565a4fSMatthew Dillon 220*7d565a4fSMatthew Dillon /* ************************** 221*7d565a4fSMatthew Dillon * Canonical representation 222*7d565a4fSMatthew Dillon ****************************/ 223*7d565a4fSMatthew Dillon typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 224*7d565a4fSMatthew Dillon typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 225*7d565a4fSMatthew Dillon 226*7d565a4fSMatthew Dillon XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 227*7d565a4fSMatthew Dillon XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 228*7d565a4fSMatthew Dillon 229*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 230*7d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 231*7d565a4fSMatthew Dillon 232*7d565a4fSMatthew Dillon /*! Default result type for XXH functions are primitive unsigned 32 and 64 bits. 233*7d565a4fSMatthew Dillon * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 234*7d565a4fSMatthew Dillon * These functions allow transformation of hash result into and from its canonical format. 235*7d565a4fSMatthew Dillon * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 236*7d565a4fSMatthew Dillon */ 237*7d565a4fSMatthew Dillon 238*7d565a4fSMatthew Dillon 239*7d565a4fSMatthew Dillon #ifdef XXH_STATIC_LINKING_ONLY 240*7d565a4fSMatthew Dillon 241*7d565a4fSMatthew Dillon /* This part contains definition which shall only be used with static linking. 242*7d565a4fSMatthew Dillon The prototypes / types defined here are not guaranteed to remain stable. 243*7d565a4fSMatthew Dillon They could change in a future version, becoming incompatible with a different version of the library */ 244*7d565a4fSMatthew Dillon 245*7d565a4fSMatthew Dillon struct XXH32_state_s { 246*7d565a4fSMatthew Dillon unsigned long long total_len; 247*7d565a4fSMatthew Dillon unsigned seed; 248*7d565a4fSMatthew Dillon unsigned v1; 249*7d565a4fSMatthew Dillon unsigned v2; 250*7d565a4fSMatthew Dillon unsigned v3; 251*7d565a4fSMatthew Dillon unsigned v4; 252*7d565a4fSMatthew Dillon unsigned mem32[4]; /* buffer defined as U32 for alignment */ 253*7d565a4fSMatthew Dillon unsigned memsize; 254*7d565a4fSMatthew Dillon }; /* typedef'd to XXH32_state_t */ 255*7d565a4fSMatthew Dillon 256*7d565a4fSMatthew Dillon struct XXH64_state_s { 257*7d565a4fSMatthew Dillon unsigned long long total_len; 258*7d565a4fSMatthew Dillon unsigned long long seed; 259*7d565a4fSMatthew Dillon unsigned long long v1; 260*7d565a4fSMatthew Dillon unsigned long long v2; 261*7d565a4fSMatthew Dillon unsigned long long v3; 262*7d565a4fSMatthew Dillon unsigned long long v4; 263*7d565a4fSMatthew Dillon unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 264*7d565a4fSMatthew Dillon unsigned memsize; 265*7d565a4fSMatthew Dillon }; /* typedef'd to XXH64_state_t */ 266*7d565a4fSMatthew Dillon 267*7d565a4fSMatthew Dillon 268*7d565a4fSMatthew Dillon #endif 269*7d565a4fSMatthew Dillon 270*7d565a4fSMatthew Dillon 271*7d565a4fSMatthew Dillon #if defined (__cplusplus) 272*7d565a4fSMatthew Dillon } 273*7d565a4fSMatthew Dillon #endif 274*7d565a4fSMatthew Dillon 275*7d565a4fSMatthew Dillon #endif /* XXHASH_H_5627135585666179 */ 276