17d565a4fSMatthew Dillon /* 27d565a4fSMatthew Dillon xxHash - Extremely Fast Hash algorithm 37d565a4fSMatthew Dillon Header File 47d565a4fSMatthew Dillon Copyright (C) 2012-2016, Yann Collet. 57d565a4fSMatthew Dillon 67d565a4fSMatthew Dillon BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 77d565a4fSMatthew Dillon 87d565a4fSMatthew Dillon Redistribution and use in source and binary forms, with or without 97d565a4fSMatthew Dillon modification, are permitted provided that the following conditions are 107d565a4fSMatthew Dillon met: 117d565a4fSMatthew Dillon 127d565a4fSMatthew Dillon * Redistributions of source code must retain the above copyright 137d565a4fSMatthew Dillon notice, this list of conditions and the following disclaimer. 147d565a4fSMatthew Dillon * Redistributions in binary form must reproduce the above 157d565a4fSMatthew Dillon copyright notice, this list of conditions and the following disclaimer 167d565a4fSMatthew Dillon in the documentation and/or other materials provided with the 177d565a4fSMatthew Dillon distribution. 187d565a4fSMatthew Dillon 197d565a4fSMatthew Dillon THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 207d565a4fSMatthew Dillon "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 217d565a4fSMatthew Dillon LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 227d565a4fSMatthew Dillon A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 237d565a4fSMatthew Dillon OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 247d565a4fSMatthew Dillon SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 257d565a4fSMatthew Dillon LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 267d565a4fSMatthew Dillon DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 277d565a4fSMatthew Dillon THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 287d565a4fSMatthew Dillon (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 297d565a4fSMatthew Dillon OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 307d565a4fSMatthew Dillon 317d565a4fSMatthew Dillon You can contact the author at : 327d565a4fSMatthew Dillon - xxHash source repository : https://github.com/Cyan4973/xxHash 337d565a4fSMatthew Dillon */ 347d565a4fSMatthew Dillon 35*30ce27d4SMatthew Dillon /* DRAGONFLY ADDITION - allows inclusion in conf/files */ 36*30ce27d4SMatthew Dillon #define XXH_NAMESPACE h2_ 37*30ce27d4SMatthew Dillon 387d565a4fSMatthew Dillon /* Notice extracted from xxHash homepage : 397d565a4fSMatthew Dillon 407d565a4fSMatthew Dillon xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 417d565a4fSMatthew Dillon It also successfully passes all tests from the SMHasher suite. 427d565a4fSMatthew Dillon 437d565a4fSMatthew Dillon Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 447d565a4fSMatthew Dillon 457d565a4fSMatthew Dillon Name Speed Q.Score Author 467d565a4fSMatthew Dillon xxHash 5.4 GB/s 10 477d565a4fSMatthew Dillon CrapWow 3.2 GB/s 2 Andrew 487d565a4fSMatthew Dillon MumurHash 3a 2.7 GB/s 10 Austin Appleby 497d565a4fSMatthew Dillon SpookyHash 2.0 GB/s 10 Bob Jenkins 507d565a4fSMatthew Dillon SBox 1.4 GB/s 9 Bret Mulvey 517d565a4fSMatthew Dillon Lookup3 1.2 GB/s 9 Bob Jenkins 527d565a4fSMatthew Dillon SuperFastHash 1.2 GB/s 1 Paul Hsieh 537d565a4fSMatthew Dillon CityHash64 1.05 GB/s 10 Pike & Alakuijala 547d565a4fSMatthew Dillon FNV 0.55 GB/s 5 Fowler, Noll, Vo 557d565a4fSMatthew Dillon CRC32 0.43 GB/s 9 567d565a4fSMatthew Dillon MD5-32 0.33 GB/s 10 Ronald L. Rivest 577d565a4fSMatthew Dillon SHA1-32 0.28 GB/s 10 587d565a4fSMatthew Dillon 597d565a4fSMatthew Dillon Q.Score is a measure of quality of the hash function. 607d565a4fSMatthew Dillon It depends on successfully passing SMHasher test set. 617d565a4fSMatthew Dillon 10 is a perfect score. 627d565a4fSMatthew Dillon 637d565a4fSMatthew Dillon A 64-bits version, named XXH64, is available since r35. 647d565a4fSMatthew Dillon It offers much better speed, but for 64-bits applications only. 657d565a4fSMatthew Dillon Name Speed on 64 bits Speed on 32 bits 667d565a4fSMatthew Dillon XXH64 13.8 GB/s 1.9 GB/s 677d565a4fSMatthew Dillon XXH32 6.8 GB/s 6.0 GB/s 687d565a4fSMatthew Dillon */ 697d565a4fSMatthew Dillon 707d565a4fSMatthew Dillon #ifndef XXHASH_H_5627135585666179 717d565a4fSMatthew Dillon #define XXHASH_H_5627135585666179 1 727d565a4fSMatthew Dillon 737d565a4fSMatthew Dillon #if defined (__cplusplus) 747d565a4fSMatthew Dillon extern "C" { 757d565a4fSMatthew Dillon #endif 767d565a4fSMatthew Dillon 777d565a4fSMatthew Dillon 787d565a4fSMatthew Dillon /* **************************** 797d565a4fSMatthew Dillon * Definitions 807d565a4fSMatthew Dillon ******************************/ 817d565a4fSMatthew Dillon #if !defined(_KERNEL) 827d565a4fSMatthew Dillon #include <stddef.h> /* size_t */ 837d565a4fSMatthew Dillon #endif 847d565a4fSMatthew Dillon typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 857d565a4fSMatthew Dillon 867d565a4fSMatthew Dillon 877d565a4fSMatthew Dillon /* **************************** 887d565a4fSMatthew Dillon * API modifier 897d565a4fSMatthew Dillon ******************************/ 907d565a4fSMatthew Dillon /*!XXH_PRIVATE_API 917d565a4fSMatthew Dillon * Transforms all publics symbols within `xxhash.c` into private ones. 927d565a4fSMatthew Dillon * Methodology : 937d565a4fSMatthew Dillon * instead of : #include "xxhash.h" 947d565a4fSMatthew Dillon * do : 957d565a4fSMatthew Dillon * #define XXH_PRIVATE_API 967d565a4fSMatthew Dillon * #include "xxhash.c" // note the .c , instead of .h 977d565a4fSMatthew Dillon * also : don't compile and link xxhash.c separately 987d565a4fSMatthew Dillon */ 997d565a4fSMatthew Dillon #ifdef XXH_PRIVATE_API 1007d565a4fSMatthew Dillon # if defined(__GNUC__) 1017d565a4fSMatthew Dillon # define XXH_PUBLIC_API static __attribute__((unused)) 1027d565a4fSMatthew Dillon # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 1037d565a4fSMatthew Dillon # define XXH_PUBLIC_API static inline 1047d565a4fSMatthew Dillon # elif defined(_MSC_VER) 1057d565a4fSMatthew Dillon # define XXH_PUBLIC_API static __inline 1067d565a4fSMatthew Dillon # else 1077d565a4fSMatthew Dillon # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 1087d565a4fSMatthew Dillon # endif 1097d565a4fSMatthew Dillon #else 1107d565a4fSMatthew Dillon # define XXH_PUBLIC_API /* do nothing */ 1117d565a4fSMatthew Dillon #endif 1127d565a4fSMatthew Dillon 1137d565a4fSMatthew Dillon /*!XXH_NAMESPACE, aka Namespace Emulation : 1147d565a4fSMatthew Dillon 1157d565a4fSMatthew Dillon If you want to include _and expose_ xxHash functions from within your own library, 1167d565a4fSMatthew Dillon but also want to avoid symbol collisions with another library which also includes xxHash, 1177d565a4fSMatthew Dillon 1187d565a4fSMatthew Dillon you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c` 1197d565a4fSMatthew Dillon with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 1207d565a4fSMatthew Dillon 1217d565a4fSMatthew Dillon Note that no change is required within the calling program as long as it also includes `xxhash.h` : 1227d565a4fSMatthew Dillon regular symbol name will be automatically translated by this header. 1237d565a4fSMatthew Dillon */ 1247d565a4fSMatthew Dillon #ifdef XXH_NAMESPACE 1257d565a4fSMatthew Dillon # define XXH_CAT(A,B) A##B 1267d565a4fSMatthew Dillon # define XXH_NAME2(A,B) XXH_CAT(A,B) 1277d565a4fSMatthew Dillon # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 1287d565a4fSMatthew Dillon # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 1297d565a4fSMatthew Dillon # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 1307d565a4fSMatthew Dillon # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 1317d565a4fSMatthew Dillon # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 1327d565a4fSMatthew Dillon # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 1337d565a4fSMatthew Dillon # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 1347d565a4fSMatthew Dillon # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 1357d565a4fSMatthew Dillon # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 1367d565a4fSMatthew Dillon # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 1377d565a4fSMatthew Dillon # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 1387d565a4fSMatthew Dillon # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 1397d565a4fSMatthew Dillon # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 1407d565a4fSMatthew Dillon #endif 1417d565a4fSMatthew Dillon 1427d565a4fSMatthew Dillon 1437d565a4fSMatthew Dillon /* ************************************* 1447d565a4fSMatthew Dillon * Version 1457d565a4fSMatthew Dillon ***************************************/ 1467d565a4fSMatthew Dillon #define XXH_VERSION_MAJOR 0 1477d565a4fSMatthew Dillon #define XXH_VERSION_MINOR 6 1487d565a4fSMatthew Dillon #define XXH_VERSION_RELEASE 0 1497d565a4fSMatthew Dillon #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 1507d565a4fSMatthew Dillon XXH_PUBLIC_API unsigned XXH_versionNumber (void); 1517d565a4fSMatthew Dillon 1527d565a4fSMatthew Dillon 1537d565a4fSMatthew Dillon /* **************************** 1547d565a4fSMatthew Dillon * Simple Hash Functions 1557d565a4fSMatthew Dillon ******************************/ 1567d565a4fSMatthew Dillon typedef unsigned int XXH32_hash_t; 1577d565a4fSMatthew Dillon typedef unsigned long long XXH64_hash_t; 1587d565a4fSMatthew Dillon 1597d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 1607d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 1617d565a4fSMatthew Dillon 1627d565a4fSMatthew Dillon /*! 1637d565a4fSMatthew Dillon XXH32() : 1647d565a4fSMatthew Dillon Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 1657d565a4fSMatthew Dillon The memory between input & input+length must be valid (allocated and read-accessible). 1667d565a4fSMatthew Dillon "seed" can be used to alter the result predictably. 1677d565a4fSMatthew Dillon Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 1687d565a4fSMatthew Dillon XXH64() : 1697d565a4fSMatthew Dillon Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 1707d565a4fSMatthew Dillon "seed" can be used to alter the result predictably. 1717d565a4fSMatthew Dillon This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 1727d565a4fSMatthew Dillon */ 1737d565a4fSMatthew Dillon 1747d565a4fSMatthew Dillon 1757d565a4fSMatthew Dillon /* **************************** 1767d565a4fSMatthew Dillon * Streaming Hash Functions 1777d565a4fSMatthew Dillon ******************************/ 1787d565a4fSMatthew Dillon typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 1797d565a4fSMatthew Dillon typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 1807d565a4fSMatthew Dillon 1817d565a4fSMatthew Dillon /*! Dynamic allocation of states 1827d565a4fSMatthew Dillon Compatible with dynamic libraries */ 1837d565a4fSMatthew Dillon 1847d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 1857d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 1867d565a4fSMatthew Dillon 1877d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 1887d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 1897d565a4fSMatthew Dillon 1907d565a4fSMatthew Dillon 1917d565a4fSMatthew Dillon /* hash streaming */ 1927d565a4fSMatthew Dillon 1937d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 1947d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 1957d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 1967d565a4fSMatthew Dillon 1977d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 1987d565a4fSMatthew Dillon XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 1997d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 2007d565a4fSMatthew Dillon 2017d565a4fSMatthew Dillon /*! 2027d565a4fSMatthew Dillon These functions generate the xxHash of an input provided in multiple segments, 2037d565a4fSMatthew Dillon as opposed to provided as a single block. 2047d565a4fSMatthew Dillon 2057d565a4fSMatthew Dillon XXH state must first be allocated, using either static or dynamic method provided above. 2067d565a4fSMatthew Dillon 2077d565a4fSMatthew Dillon Start a new hash by initializing state with a seed, using XXHnn_reset(). 2087d565a4fSMatthew Dillon 2097d565a4fSMatthew Dillon Then, feed the hash state by calling XXHnn_update() as many times as necessary. 2107d565a4fSMatthew Dillon Obviously, input must be valid, hence allocated and read accessible. 2117d565a4fSMatthew Dillon The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 2127d565a4fSMatthew Dillon 2137d565a4fSMatthew Dillon Finally, a hash value can be produced anytime, by using XXHnn_digest(). 2147d565a4fSMatthew Dillon This function returns the nn-bits hash as an int or long long. 2157d565a4fSMatthew Dillon 2167d565a4fSMatthew Dillon It's still possible to continue inserting input into the hash state after a digest, 2177d565a4fSMatthew Dillon and later on generate some new hashes, by calling again XXHnn_digest(). 2187d565a4fSMatthew Dillon 2197d565a4fSMatthew Dillon When done, free XXH state space if it was allocated dynamically. 2207d565a4fSMatthew Dillon */ 2217d565a4fSMatthew Dillon 2227d565a4fSMatthew Dillon 2237d565a4fSMatthew Dillon /* ************************** 2247d565a4fSMatthew Dillon * Canonical representation 2257d565a4fSMatthew Dillon ****************************/ 2267d565a4fSMatthew Dillon typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 2277d565a4fSMatthew Dillon typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 2287d565a4fSMatthew Dillon 2297d565a4fSMatthew Dillon XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 2307d565a4fSMatthew Dillon XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 2317d565a4fSMatthew Dillon 2327d565a4fSMatthew Dillon XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 2337d565a4fSMatthew Dillon XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 2347d565a4fSMatthew Dillon 2357d565a4fSMatthew Dillon /*! Default result type for XXH functions are primitive unsigned 32 and 64 bits. 2367d565a4fSMatthew Dillon * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 2377d565a4fSMatthew Dillon * These functions allow transformation of hash result into and from its canonical format. 2387d565a4fSMatthew Dillon * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 2397d565a4fSMatthew Dillon */ 2407d565a4fSMatthew Dillon 2417d565a4fSMatthew Dillon 2427d565a4fSMatthew Dillon #ifdef XXH_STATIC_LINKING_ONLY 2437d565a4fSMatthew Dillon 2447d565a4fSMatthew Dillon /* This part contains definition which shall only be used with static linking. 2457d565a4fSMatthew Dillon The prototypes / types defined here are not guaranteed to remain stable. 2467d565a4fSMatthew Dillon They could change in a future version, becoming incompatible with a different version of the library */ 2477d565a4fSMatthew Dillon 2487d565a4fSMatthew Dillon struct XXH32_state_s { 2497d565a4fSMatthew Dillon unsigned long long total_len; 2507d565a4fSMatthew Dillon unsigned seed; 2517d565a4fSMatthew Dillon unsigned v1; 2527d565a4fSMatthew Dillon unsigned v2; 2537d565a4fSMatthew Dillon unsigned v3; 2547d565a4fSMatthew Dillon unsigned v4; 2557d565a4fSMatthew Dillon unsigned mem32[4]; /* buffer defined as U32 for alignment */ 2567d565a4fSMatthew Dillon unsigned memsize; 2577d565a4fSMatthew Dillon }; /* typedef'd to XXH32_state_t */ 2587d565a4fSMatthew Dillon 2597d565a4fSMatthew Dillon struct XXH64_state_s { 2607d565a4fSMatthew Dillon unsigned long long total_len; 2617d565a4fSMatthew Dillon unsigned long long seed; 2627d565a4fSMatthew Dillon unsigned long long v1; 2637d565a4fSMatthew Dillon unsigned long long v2; 2647d565a4fSMatthew Dillon unsigned long long v3; 2657d565a4fSMatthew Dillon unsigned long long v4; 2667d565a4fSMatthew Dillon unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 2677d565a4fSMatthew Dillon unsigned memsize; 2687d565a4fSMatthew Dillon }; /* typedef'd to XXH64_state_t */ 2697d565a4fSMatthew Dillon 2707d565a4fSMatthew Dillon 2717d565a4fSMatthew Dillon #endif 2727d565a4fSMatthew Dillon 2737d565a4fSMatthew Dillon 2747d565a4fSMatthew Dillon #if defined (__cplusplus) 2757d565a4fSMatthew Dillon } 2767d565a4fSMatthew Dillon #endif 2777d565a4fSMatthew Dillon 2787d565a4fSMatthew Dillon #endif /* XXHASH_H_5627135585666179 */ 279