1 /*
2    xxHash - Extremely Fast Hash algorithm
3    Header File
4    Copyright (C) 2012-2016, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31    You can contact the author at :
32    - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34 
35 /* Notice extracted from xxHash homepage :
36 
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39 
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41 
42 Name            Speed       Q.Score   Author
43 xxHash          5.4 GB/s     10
44 CrapWow         3.2 GB/s      2       Andrew
45 MumurHash 3a    2.7 GB/s     10       Austin Appleby
46 SpookyHash      2.0 GB/s     10       Bob Jenkins
47 SBox            1.4 GB/s      9       Bret Mulvey
48 Lookup3         1.2 GB/s      9       Bob Jenkins
49 SuperFastHash   1.2 GB/s      1       Paul Hsieh
50 CityHash64      1.05 GB/s    10       Pike & Alakuijala
51 FNV             0.55 GB/s     5       Fowler, Noll, Vo
52 CRC32           0.43 GB/s     9
53 MD5-32          0.33 GB/s    10       Ronald L. Rivest
54 SHA1-32         0.28 GB/s    10
55 
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59 
60 Note : SMHasher's CRC32 implementation is not the fastest one.
61 Other speed-oriented implementations can be faster,
62 especially in combination with PCLMUL instruction :
63 http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64 
65 A 64-bit version, named XXH64, is available since r35.
66 It offers much better speed, but for 64-bit applications only.
67 Name     Speed on 64 bits    Speed on 32 bits
68 XXH64       13.8 GB/s            1.9 GB/s
69 XXH32        6.8 GB/s            6.0 GB/s
70 */
71 
72 #if defined (__cplusplus)
73 extern "C" {
74 #endif
75 
76 
77 #ifndef XXHASH_H_5627135585666179
78 #define XXHASH_H_5627135585666179 1
79 
80 /* ****************************
81  *  API modifier
82  ******************************/
83 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
84  *  This build macro includes xxhash functions in `static` mode
85  *  in order to inline them, and remove their symbol from the public list.
86  *  Inlining offers great performance improvement on small keys,
87  *  and dramatic ones when length is expressed as a compile-time constant.
88  *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
89  *  Methodology :
90  *     #define XXH_INLINE_ALL
91  *     #include "xxhash.h"
92  * `xxhash.c` is automatically included.
93  *  It's not useful to compile and link it as a separate object.
94  */
95 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
96 #  ifndef XXH_STATIC_LINKING_ONLY
97 #    define XXH_STATIC_LINKING_ONLY
98 #  endif
99 #  if defined(__GNUC__)
100 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
101 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
102 #    define XXH_PUBLIC_API static inline
103 #  elif defined(_MSC_VER)
104 #    define XXH_PUBLIC_API static __inline
105 #  else
106      /* this version may generate warnings for unused static functions */
107 #    define XXH_PUBLIC_API static
108 #  endif
109 #else
110 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
111 #    ifdef XXH_EXPORT
112 #      define XXH_PUBLIC_API __declspec(dllexport)
113 #    elif XXH_IMPORT
114 #      define XXH_PUBLIC_API __declspec(dllimport)
115 #    endif
116 #  else
117 #    define XXH_PUBLIC_API   /* do nothing */
118 #  endif
119 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
120 
121 /*! XXH_NAMESPACE, aka Namespace Emulation :
122  *
123  * If you want to include _and expose_ xxHash functions from within your own library,
124  * but also want to avoid symbol collisions with other libraries which may also include xxHash,
125  *
126  * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
127  * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
128  *
129  * Note that no change is required within the calling program as long as it includes `xxhash.h` :
130  * regular symbol name will be automatically translated by this header.
131  */
132 #ifdef XXH_NAMESPACE
133 #  define XXH_CAT(A,B) A##B
134 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
135 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
136 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
137 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
138 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
139 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
140 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
141 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
142 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
143 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
144 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
145 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
146 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
147 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
148 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
149 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
150 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
151 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
152 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
153 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
154 #endif
155 
156 
157 /* *************************************
158 *  Version
159 ***************************************/
160 #define XXH_VERSION_MAJOR    0
161 #define XXH_VERSION_MINOR    7
162 #define XXH_VERSION_RELEASE  2
163 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
164 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
165 
166 
167 /* ****************************
168 *  Definitions
169 ******************************/
170 #include <stddef.h>   /* size_t */
171 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
172 
173 
174 /*-**********************************************************************
175 *  32-bit hash
176 ************************************************************************/
177 #if !defined (__VMS) \
178   && (defined (__cplusplus) \
179   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
180 #   include <stdint.h>
181     typedef uint32_t XXH32_hash_t;
182 #else
183 #   include <limits.h>
184 #   if UINT_MAX == 0xFFFFFFFFUL
185       typedef unsigned int XXH32_hash_t;
186 #   else
187 #     if ULONG_MAX == 0xFFFFFFFFUL
188         typedef unsigned long XXH32_hash_t;
189 #     else
190 #       error "unsupported platform : need a 32-bit type"
191 #     endif
192 #   endif
193 #endif
194 
195 /*! XXH32() :
196     Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
197     The memory between input & input+length must be valid (allocated and read-accessible).
198     "seed" can be used to alter the result predictably.
199     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
200 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
201 
202 /*******   Streaming   *******/
203 
204 /*
205  * Streaming functions generate the xxHash value from an incrememtal input.
206  * This method is slower than single-call functions, due to state management.
207  * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
208  *
209  * XXH state must first be allocated, using XXH*_createState() .
210  *
211  * Start a new hash by initializing state with a seed, using XXH*_reset().
212  *
213  * Then, feed the hash state by calling XXH*_update() as many times as necessary.
214  * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
215  *
216  * Finally, a hash value can be produced anytime, by using XXH*_digest().
217  * This function returns the nn-bits hash as an int or long long.
218  *
219  * It's still possible to continue inserting input into the hash state after a digest,
220  * and generate some new hash values later on, by invoking again XXH*_digest().
221  *
222  * When done, release the state, using XXH*_freeState().
223  */
224 
225 typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
226 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
227 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
228 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
229 
230 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
231 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
232 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
233 
234 /*******   Canonical representation   *******/
235 
236 /* Default return values from XXH functions are basic unsigned 32 and 64 bits.
237  * This the simplest and fastest format for further post-processing.
238  * However, this leaves open the question of what is the order of bytes,
239  * since little and big endian conventions will write the same number differently.
240  *
241  * The canonical representation settles this issue,
242  * by mandating big-endian convention,
243  * aka, the same convention as human-readable numbers (large digits first).
244  * When writing hash values to storage, sending them over a network, or printing them,
245  * it's highly recommended to use the canonical representation,
246  * to ensure portability across a wider range of systems, present and future.
247  *
248  * The following functions allow transformation of hash values into and from canonical format.
249  */
250 
251 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
252 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
253 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
254 
255 
256 #ifndef XXH_NO_LONG_LONG
257 /*-**********************************************************************
258 *  64-bit hash
259 ************************************************************************/
260 #if !defined (__VMS) \
261   && (defined (__cplusplus) \
262   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
263 #   include <stdint.h>
264     typedef uint64_t XXH64_hash_t;
265 #else
266     /* the following type must have a width of 64-bit */
267     typedef unsigned long long XXH64_hash_t;
268 #endif
269 
270 /*! XXH64() :
271  *  Returns the 64-bit hash of sequence of length @length stored at memory address @input.
272  *  @seed can be used to alter the result predictably.
273  *  This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
274  */
275 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
276 
277 /*******   Streaming   *******/
278 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
279 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
280 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
281 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
282 
283 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
284 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
285 XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
286 
287 /*******   Canonical representation   *******/
288 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
289 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
290 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
291 
292 
293 #endif  /* XXH_NO_LONG_LONG */
294 
295 #endif /* XXHASH_H_5627135585666179 */
296 
297 
298 
299 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
300 #define XXHASH_H_STATIC_13879238742
301 /* ************************************************************************************************
302    This section contains declarations which are not guaranteed to remain stable.
303    They may change in future versions, becoming incompatible with a different version of the library.
304    These declarations should only be used with static linking.
305    Never use them in association with dynamic linking !
306 *************************************************************************************************** */
307 
308 /* These definitions are only present to allow
309  * static allocation of XXH state, on stack or in a struct for example.
310  * Never **ever** use members directly. */
311 
312 struct XXH32_state_s {
313    XXH32_hash_t total_len_32;
314    XXH32_hash_t large_len;
315    XXH32_hash_t v1;
316    XXH32_hash_t v2;
317    XXH32_hash_t v3;
318    XXH32_hash_t v4;
319    XXH32_hash_t mem32[4];
320    XXH32_hash_t memsize;
321    XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
322 };   /* typedef'd to XXH32_state_t */
323 
324 
325 #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
326 
327 struct XXH64_state_s {
328    XXH64_hash_t total_len;
329    XXH64_hash_t v1;
330    XXH64_hash_t v2;
331    XXH64_hash_t v3;
332    XXH64_hash_t v4;
333    XXH64_hash_t mem64[4];
334    XXH32_hash_t memsize;
335    XXH32_hash_t reserved32;  /* required for padding anyway */
336    XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
337 };   /* typedef'd to XXH64_state_t */
338 
339 #endif  /* XXH_NO_LONG_LONG */
340 
341 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
342 #  define XXH_IMPLEMENTATION
343 #endif
344 
345 #endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
346 
347 
348 
349 /*-**********************************************************************
350 *  xxHash implementation
351 *  Functions implementation used to be hosted within xxhash.c .
352 *  However, code inlining requires to place implementation in the header file.
353 *  As a consequence, xxhash.c used to be included within xxhash.h .
354 *  But some build systems don't like *.c inclusions.
355 *  So the implementation is now directly integrated within xxhash.h .
356 *  Another small advantage is that xxhash.c is no longer required in /includes .
357 ************************************************************************/
358 
359 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
360    || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
361 #  define XXH_IMPLEM_13a8737387
362 
363 /* *************************************
364 *  Tuning parameters
365 ***************************************/
366 /*!XXH_FORCE_MEMORY_ACCESS :
367  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
368  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
369  * The below switch allow to select different access method for improved performance.
370  * Method 0 (default) : use `memcpy()`. Safe and portable.
371  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
372  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
373  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
374  *            It can generate buggy code on targets which do not support unaligned memory accesses.
375  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
376  * See http://stackoverflow.com/a/32095106/646947 for details.
377  * Prefer these methods in priority order (0 > 1 > 2)
378  */
379 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
380 #  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
381 #    define XXH_FORCE_MEMORY_ACCESS 2
382 #  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
383   (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
384 #    define XXH_FORCE_MEMORY_ACCESS 1
385 #  endif
386 #endif
387 
388 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
389  * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
390  * When this macro is enabled, xxHash actively checks input for null pointer.
391  * It it is, result for null input pointers is the same as a null-length input.
392  */
393 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
394 #  define XXH_ACCEPT_NULL_INPUT_POINTER 0
395 #endif
396 
397 /*!XXH_FORCE_ALIGN_CHECK :
398  * This is a minor performance trick, only useful with lots of very small keys.
399  * It means : check for aligned/unaligned input.
400  * The check costs one initial branch per hash;
401  * set it to 0 when the input is guaranteed to be aligned,
402  * or when alignment doesn't matter for performance.
403  */
404 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
405 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
406 #    define XXH_FORCE_ALIGN_CHECK 0
407 #  else
408 #    define XXH_FORCE_ALIGN_CHECK 1
409 #  endif
410 #endif
411 
412 /*!XXH_REROLL:
413  * Whether to reroll XXH32_finalize, and XXH64_finalize,
414  * instead of using an unrolled jump table/if statement loop.
415  *
416  * This is automatically defined on -Os/-Oz on GCC and Clang. */
417 #ifndef XXH_REROLL
418 #  if defined(__OPTIMIZE_SIZE__)
419 #    define XXH_REROLL 1
420 #  else
421 #    define XXH_REROLL 0
422 #  endif
423 #endif
424 
425 
426 /* *************************************
427 *  Includes & Memory related functions
428 ***************************************/
429 /*! Modify the local functions below should you wish to use some other memory routines
430 *   for malloc(), free() */
431 #include <stdlib.h>
XXH_malloc(size_t s)432 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)433 static void  XXH_free  (void* p)  { free(p); }
434 /*! and for memcpy() */
435 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)436 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
437 
438 #include <limits.h>   /* ULLONG_MAX */
439 
440 
441 /* *************************************
442 *  Compiler Specific Options
443 ***************************************/
444 #ifdef _MSC_VER    /* Visual Studio */
445 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
446 #  define XXH_FORCE_INLINE static __forceinline
447 #  define XXH_NO_INLINE static __declspec(noinline)
448 #else
449 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
450 #    ifdef __GNUC__
451 #      define XXH_FORCE_INLINE static inline __attribute__((always_inline))
452 #      define XXH_NO_INLINE static __attribute__((noinline))
453 #    else
454 #      define XXH_FORCE_INLINE static inline
455 #      define XXH_NO_INLINE static
456 #    endif
457 #  else
458 #    define XXH_FORCE_INLINE static
459 #    define XXH_NO_INLINE static
460 #  endif /* __STDC_VERSION__ */
461 #endif
462 
463 
464 
465 /* *************************************
466 *  Debug
467 ***************************************/
468 /* DEBUGLEVEL is expected to be defined externally,
469  * typically through compiler command line.
470  * Value must be a number. */
471 #ifndef DEBUGLEVEL
472 #  define DEBUGLEVEL 0
473 #endif
474 
475 #if (DEBUGLEVEL>=1)
476 #  include <assert.h>   /* note : can still be disabled with NDEBUG */
477 #  define XXH_ASSERT(c)   assert(c)
478 #else
479 #  define XXH_ASSERT(c)   ((void)0)
480 #endif
481 
482 /* note : use after variable declarations */
483 #define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }
484 
485 
486 /* *************************************
487 *  Basic Types
488 ***************************************/
489 #if !defined (__VMS) \
490  && (defined (__cplusplus) \
491  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
492 # include <stdint.h>
493   typedef uint8_t  xxh_u8;
494 #else
495   typedef unsigned char      xxh_u8;
496 #endif
497 typedef XXH32_hash_t xxh_u32;
498 
499 
500 /* ***   Memory access   *** */
501 
502 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
503 
504 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)505 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
506 
507 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
508 
509 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
510 /* currently only defined for gcc and icc */
511 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
XXH_read32(const void * ptr)512 static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
513 
514 #else
515 
516 /* portable and safe solution. Generally efficient.
517  * see : http://stackoverflow.com/a/32095106/646947
518  */
XXH_read32(const void * memPtr)519 static xxh_u32 XXH_read32(const void* memPtr)
520 {
521     xxh_u32 val;
522     memcpy(&val, memPtr, sizeof(val));
523     return val;
524 }
525 
526 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
527 
528 
529 /* ***   Endianess   *** */
530 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
531 
532 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
533 #ifndef XXH_CPU_LITTLE_ENDIAN
534 #  if defined(_WIN32) /* Windows is always little endian */ \
535      || defined(__LITTLE_ENDIAN__) \
536      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
537 #    define XXH_CPU_LITTLE_ENDIAN 1
538 #  elif defined(__BIG_ENDIAN__) \
539      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
540 #    define XXH_CPU_LITTLE_ENDIAN 0
541 #  else
XXH_isLittleEndian(void)542 static int XXH_isLittleEndian(void)
543 {
544     const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
545     return one.c[0];
546 }
547 #   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
548 #  endif
549 #endif
550 
551 
552 
553 
554 /* ****************************************
555 *  Compiler-specific Functions and Macros
556 ******************************************/
557 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
558 
559 #ifndef __has_builtin
560 #  define __has_builtin(x) 0
561 #endif
562 
563 #if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
564 #  define XXH_rotl32 __builtin_rotateleft32
565 #  define XXH_rotl64 __builtin_rotateleft64
566 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
567 #elif defined(_MSC_VER)
568 #  define XXH_rotl32(x,r) _rotl(x,r)
569 #  define XXH_rotl64(x,r) _rotl64(x,r)
570 #else
571 #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
572 #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
573 #endif
574 
575 #if defined(_MSC_VER)     /* Visual Studio */
576 #  define XXH_swap32 _byteswap_ulong
577 #elif XXH_GCC_VERSION >= 403
578 #  define XXH_swap32 __builtin_bswap32
579 #else
XXH_swap32(xxh_u32 x)580 static xxh_u32 XXH_swap32 (xxh_u32 x)
581 {
582     return  ((x << 24) & 0xff000000 ) |
583             ((x <<  8) & 0x00ff0000 ) |
584             ((x >>  8) & 0x0000ff00 ) |
585             ((x >> 24) & 0x000000ff );
586 }
587 #endif
588 
589 
590 /* ***************************
591 *  Memory reads
592 *****************************/
593 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
594 
XXH_readLE32(const void * ptr)595 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
596 {
597     return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
598 }
599 
XXH_readBE32(const void * ptr)600 static xxh_u32 XXH_readBE32(const void* ptr)
601 {
602     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
603 }
604 
605 XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void * ptr,XXH_alignment align)606 XXH_readLE32_align(const void* ptr, XXH_alignment align)
607 {
608     if (align==XXH_unaligned) {
609         return XXH_readLE32(ptr);
610     } else {
611         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
612     }
613 }
614 
615 
616 /* *************************************
617 *  Misc
618 ***************************************/
XXH_versionNumber(void)619 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
620 
621 
622 /* *******************************************************************
623 *  32-bit hash functions
624 *********************************************************************/
625 static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
626 static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
627 static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
628 static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
629 static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
630 
XXH32_round(xxh_u32 acc,xxh_u32 input)631 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
632 {
633     acc += input * PRIME32_2;
634     acc  = XXH_rotl32(acc, 13);
635     acc *= PRIME32_1;
636 #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
637     /* UGLY HACK:
638      * This inline assembly hack forces acc into a normal register. This is the
639      * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
640      * (pragmas and attributes don't work for some resason) without globally
641      * disabling SSE4.1.
642      *
643      * The reason we want to avoid vectorization is because despite working on
644      * 4 integers at a time, there are multiple factors slowing XXH32 down on
645      * SSE4:
646      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
647      *   making it slightly slower to multiply four integers at once compared to four
648      *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
649      *   still not worth it to go into SSE just to multiply unless doing a long operation.
650      *
651      * - Four instructions are required to rotate,
652      *      movqda tmp,  v // not required with VEX encoding
653      *      pslld  tmp, 13 // tmp <<= 13
654      *      psrld  v,   19 // x >>= 19
655      *      por    v,  tmp // x |= tmp
656      *   compared to one for scalar:
657      *      roll   v, 13    // reliably fast across the board
658      *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
659      *
660      * - Instruction level parallelism is actually more beneficial here because the
661      *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
662      *   while v3 can multiply. SSE forces them to operate together.
663      *
664      * How this hack works:
665      * __asm__(""       // Declare an assembly block but don't declare any instructions
666      *          :       // However, as an Input/Output Operand,
667      *          "+r"    // constrain a read/write operand (+) as a general purpose register (r).
668      *          (acc)   // and set acc as the operand
669      * );
670      *
671      * Because of the 'r', the compiler has promised that seed will be in a
672      * general purpose register and the '+' says that it will be 'read/write',
673      * so it has to assume it has changed. It is like volatile without all the
674      * loads and stores.
675      *
676      * Since the argument has to be in a normal register (not an SSE register),
677      * each time XXH32_round is called, it is impossible to vectorize. */
678     __asm__("" : "+r" (acc));
679 #endif
680     return acc;
681 }
682 
683 /* mix all bits */
XXH32_avalanche(xxh_u32 h32)684 static xxh_u32 XXH32_avalanche(xxh_u32 h32)
685 {
686     h32 ^= h32 >> 15;
687     h32 *= PRIME32_2;
688     h32 ^= h32 >> 13;
689     h32 *= PRIME32_3;
690     h32 ^= h32 >> 16;
691     return(h32);
692 }
693 
694 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
695 
696 static xxh_u32
XXH32_finalize(xxh_u32 h32,const xxh_u8 * ptr,size_t len,XXH_alignment align)697 XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
698 {
699 #define PROCESS1               \
700     h32 += (*ptr++) * PRIME32_5; \
701     h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
702 
703 #define PROCESS4                         \
704     h32 += XXH_get32bits(ptr) * PRIME32_3; \
705     ptr+=4;                                \
706     h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
707 
708     /* Compact rerolled version */
709     if (XXH_REROLL) {
710         len &= 15;
711         while (len >= 4) {
712             PROCESS4;
713             len -= 4;
714         }
715         while (len > 0) {
716             PROCESS1;
717             --len;
718         }
719         return XXH32_avalanche(h32);
720     } else {
721          switch(len&15) /* or switch(bEnd - p) */ {
722            case 12:      PROCESS4;
723                          /* fallthrough */
724            case 8:       PROCESS4;
725                          /* fallthrough */
726            case 4:       PROCESS4;
727                          return XXH32_avalanche(h32);
728 
729            case 13:      PROCESS4;
730                          /* fallthrough */
731            case 9:       PROCESS4;
732                          /* fallthrough */
733            case 5:       PROCESS4;
734                          PROCESS1;
735                          return XXH32_avalanche(h32);
736 
737            case 14:      PROCESS4;
738                          /* fallthrough */
739            case 10:      PROCESS4;
740                          /* fallthrough */
741            case 6:       PROCESS4;
742                          PROCESS1;
743                          PROCESS1;
744                          return XXH32_avalanche(h32);
745 
746            case 15:      PROCESS4;
747                          /* fallthrough */
748            case 11:      PROCESS4;
749                          /* fallthrough */
750            case 7:       PROCESS4;
751                          /* fallthrough */
752            case 3:       PROCESS1;
753                          /* fallthrough */
754            case 2:       PROCESS1;
755                          /* fallthrough */
756            case 1:       PROCESS1;
757                          /* fallthrough */
758            case 0:       return XXH32_avalanche(h32);
759         }
760         XXH_ASSERT(0);
761         return h32;   /* reaching this point is deemed impossible */
762     }
763 }
764 
765 XXH_FORCE_INLINE xxh_u32
XXH32_endian_align(const xxh_u8 * input,size_t len,xxh_u32 seed,XXH_alignment align)766 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
767 {
768     const xxh_u8* bEnd = input + len;
769     xxh_u32 h32;
770 
771 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
772     if (input==NULL) {
773         len=0;
774         bEnd=input=(const xxh_u8*)(size_t)16;
775     }
776 #endif
777 
778     if (len>=16) {
779         const xxh_u8* const limit = bEnd - 15;
780         xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
781         xxh_u32 v2 = seed + PRIME32_2;
782         xxh_u32 v3 = seed + 0;
783         xxh_u32 v4 = seed - PRIME32_1;
784 
785         do {
786             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
787             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
788             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
789             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
790         } while (input < limit);
791 
792         h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
793             + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
794     } else {
795         h32  = seed + PRIME32_5;
796     }
797 
798     h32 += (xxh_u32)len;
799 
800     return XXH32_finalize(h32, input, len&15, align);
801 }
802 
803 
XXH32(const void * input,size_t len,XXH32_hash_t seed)804 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
805 {
806 #if 0
807     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
808     XXH32_state_t state;
809     XXH32_reset(&state, seed);
810     XXH32_update(&state, (const xxh_u8*)input, len);
811     return XXH32_digest(&state);
812 
813 #else
814 
815     if (XXH_FORCE_ALIGN_CHECK) {
816         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
817             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
818     }   }
819 
820     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
821 #endif
822 }
823 
824 
825 
826 /*******   Hash streaming   *******/
827 
XXH32_createState(void)828 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
829 {
830     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
831 }
XXH32_freeState(XXH32_state_t * statePtr)832 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
833 {
834     XXH_free(statePtr);
835     return XXH_OK;
836 }
837 
XXH32_copyState(XXH32_state_t * dstState,const XXH32_state_t * srcState)838 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
839 {
840     memcpy(dstState, srcState, sizeof(*dstState));
841 }
842 
XXH32_reset(XXH32_state_t * statePtr,XXH32_hash_t seed)843 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
844 {
845     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
846     memset(&state, 0, sizeof(state));
847     state.v1 = seed + PRIME32_1 + PRIME32_2;
848     state.v2 = seed + PRIME32_2;
849     state.v3 = seed + 0;
850     state.v4 = seed - PRIME32_1;
851     /* do not write into reserved, planned to be removed in a future version */
852     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
853     return XXH_OK;
854 }
855 
856 
857 XXH_PUBLIC_API XXH_errorcode
XXH32_update(XXH32_state_t * state,const void * input,size_t len)858 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
859 {
860     if (input==NULL)
861 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
862         return XXH_OK;
863 #else
864         return XXH_ERROR;
865 #endif
866 
867     {   const xxh_u8* p = (const xxh_u8*)input;
868         const xxh_u8* const bEnd = p + len;
869 
870         state->total_len_32 += (XXH32_hash_t)len;
871         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
872 
873         if (state->memsize + len < 16)  {   /* fill in tmp buffer */
874             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
875             state->memsize += (XXH32_hash_t)len;
876             return XXH_OK;
877         }
878 
879         if (state->memsize) {   /* some data left from previous update */
880             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
881             {   const xxh_u32* p32 = state->mem32;
882                 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
883                 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
884                 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
885                 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
886             }
887             p += 16-state->memsize;
888             state->memsize = 0;
889         }
890 
891         if (p <= bEnd-16) {
892             const xxh_u8* const limit = bEnd - 16;
893             xxh_u32 v1 = state->v1;
894             xxh_u32 v2 = state->v2;
895             xxh_u32 v3 = state->v3;
896             xxh_u32 v4 = state->v4;
897 
898             do {
899                 v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
900                 v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
901                 v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
902                 v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
903             } while (p<=limit);
904 
905             state->v1 = v1;
906             state->v2 = v2;
907             state->v3 = v3;
908             state->v4 = v4;
909         }
910 
911         if (p < bEnd) {
912             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
913             state->memsize = (unsigned)(bEnd-p);
914         }
915     }
916 
917     return XXH_OK;
918 }
919 
920 
XXH32_digest(const XXH32_state_t * state)921 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
922 {
923     xxh_u32 h32;
924 
925     if (state->large_len) {
926         h32 = XXH_rotl32(state->v1, 1)
927             + XXH_rotl32(state->v2, 7)
928             + XXH_rotl32(state->v3, 12)
929             + XXH_rotl32(state->v4, 18);
930     } else {
931         h32 = state->v3 /* == seed */ + PRIME32_5;
932     }
933 
934     h32 += state->total_len_32;
935 
936     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
937 }
938 
939 
940 /*******   Canonical representation   *******/
941 
942 /*! Default XXH result types are basic unsigned 32 and 64 bits.
943 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
944 *   These functions allow transformation of hash result into and from its canonical format.
945 *   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
946 */
947 
XXH32_canonicalFromHash(XXH32_canonical_t * dst,XXH32_hash_t hash)948 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
949 {
950     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
951     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
952     memcpy(dst, &hash, sizeof(*dst));
953 }
954 
XXH32_hashFromCanonical(const XXH32_canonical_t * src)955 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
956 {
957     return XXH_readBE32(src);
958 }
959 
960 
961 #ifndef XXH_NO_LONG_LONG
962 
963 /* *******************************************************************
964 *  64-bit hash functions
965 *********************************************************************/
966 
967 /*******   Memory access   *******/
968 
969 typedef XXH64_hash_t xxh_u64;
970 
971 
972 /*! XXH_REROLL_XXH64:
973  * Whether to reroll the XXH64_finalize() loop.
974  *
975  * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
976  * on 64-bit hosts, as only one jump is required.
977  *
978  * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
979  * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
980  * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
981  * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
982  * and is more likely to be inlined by the compiler.
983  *
984  * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
985 #ifndef XXH_REROLL_XXH64
986 #  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
987    || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
988      || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
989      || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
990      || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
991    || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
992 #    define XXH_REROLL_XXH64 1
993 #  else
994 #    define XXH_REROLL_XXH64 0
995 #  endif
996 #endif /* !defined(XXH_REROLL_XXH64) */
997 
998 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
999 
1000 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read64(const void * memPtr)1001 static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1002 
1003 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1004 
1005 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1006 /* currently only defined for gcc and icc */
1007 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
XXH_read64(const void * ptr)1008 static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1009 
1010 #else
1011 
1012 /* portable and safe solution. Generally efficient.
1013  * see : http://stackoverflow.com/a/32095106/646947
1014  */
1015 
XXH_read64(const void * memPtr)1016 static xxh_u64 XXH_read64(const void* memPtr)
1017 {
1018     xxh_u64 val;
1019     memcpy(&val, memPtr, sizeof(val));
1020     return val;
1021 }
1022 
1023 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1024 
1025 #if defined(_MSC_VER)     /* Visual Studio */
1026 #  define XXH_swap64 _byteswap_uint64
1027 #elif XXH_GCC_VERSION >= 403
1028 #  define XXH_swap64 __builtin_bswap64
1029 #else
XXH_swap64(xxh_u64 x)1030 static xxh_u64 XXH_swap64 (xxh_u64 x)
1031 {
1032     return  ((x << 56) & 0xff00000000000000ULL) |
1033             ((x << 40) & 0x00ff000000000000ULL) |
1034             ((x << 24) & 0x0000ff0000000000ULL) |
1035             ((x << 8)  & 0x000000ff00000000ULL) |
1036             ((x >> 8)  & 0x00000000ff000000ULL) |
1037             ((x >> 24) & 0x0000000000ff0000ULL) |
1038             ((x >> 40) & 0x000000000000ff00ULL) |
1039             ((x >> 56) & 0x00000000000000ffULL);
1040 }
1041 #endif
1042 
XXH_readLE64(const void * ptr)1043 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1044 {
1045     return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1046 }
1047 
XXH_readBE64(const void * ptr)1048 static xxh_u64 XXH_readBE64(const void* ptr)
1049 {
1050     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1051 }
1052 
1053 XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void * ptr,XXH_alignment align)1054 XXH_readLE64_align(const void* ptr, XXH_alignment align)
1055 {
1056     if (align==XXH_unaligned)
1057         return XXH_readLE64(ptr);
1058     else
1059         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1060 }
1061 
1062 
1063 /*******   xxh64   *******/
1064 
1065 static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1066 static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1067 static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1068 static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1069 static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1070 
XXH64_round(xxh_u64 acc,xxh_u64 input)1071 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1072 {
1073     acc += input * PRIME64_2;
1074     acc  = XXH_rotl64(acc, 31);
1075     acc *= PRIME64_1;
1076     return acc;
1077 }
1078 
XXH64_mergeRound(xxh_u64 acc,xxh_u64 val)1079 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1080 {
1081     val  = XXH64_round(0, val);
1082     acc ^= val;
1083     acc  = acc * PRIME64_1 + PRIME64_4;
1084     return acc;
1085 }
1086 
XXH64_avalanche(xxh_u64 h64)1087 static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1088 {
1089     h64 ^= h64 >> 33;
1090     h64 *= PRIME64_2;
1091     h64 ^= h64 >> 29;
1092     h64 *= PRIME64_3;
1093     h64 ^= h64 >> 32;
1094     return h64;
1095 }
1096 
1097 
1098 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
1099 
1100 static xxh_u64
XXH64_finalize(xxh_u64 h64,const xxh_u8 * ptr,size_t len,XXH_alignment align)1101 XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1102 {
1103 #define PROCESS1_64            \
1104     h64 ^= (*ptr++) * PRIME64_5; \
1105     h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1106 
1107 #define PROCESS4_64          \
1108     h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1109     ptr+=4;                    \
1110     h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1111 
1112 #define PROCESS8_64 {        \
1113     xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1114     ptr+=8;                    \
1115     h64 ^= k1;               \
1116     h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1117 }
1118 
1119     /* Rerolled version for 32-bit targets is faster and much smaller. */
1120     if (XXH_REROLL || XXH_REROLL_XXH64) {
1121         len &= 31;
1122         while (len >= 8) {
1123             PROCESS8_64;
1124             len -= 8;
1125         }
1126         if (len >= 4) {
1127             PROCESS4_64;
1128             len -= 4;
1129         }
1130         while (len > 0) {
1131             PROCESS1_64;
1132             --len;
1133         }
1134          return  XXH64_avalanche(h64);
1135     } else {
1136         switch(len & 31) {
1137            case 24: PROCESS8_64;
1138                          /* fallthrough */
1139            case 16: PROCESS8_64;
1140                          /* fallthrough */
1141            case  8: PROCESS8_64;
1142                     return XXH64_avalanche(h64);
1143 
1144            case 28: PROCESS8_64;
1145                          /* fallthrough */
1146            case 20: PROCESS8_64;
1147                          /* fallthrough */
1148            case 12: PROCESS8_64;
1149                          /* fallthrough */
1150            case  4: PROCESS4_64;
1151                     return XXH64_avalanche(h64);
1152 
1153            case 25: PROCESS8_64;
1154                          /* fallthrough */
1155            case 17: PROCESS8_64;
1156                          /* fallthrough */
1157            case  9: PROCESS8_64;
1158                     PROCESS1_64;
1159                     return XXH64_avalanche(h64);
1160 
1161            case 29: PROCESS8_64;
1162                          /* fallthrough */
1163            case 21: PROCESS8_64;
1164                          /* fallthrough */
1165            case 13: PROCESS8_64;
1166                          /* fallthrough */
1167            case  5: PROCESS4_64;
1168                     PROCESS1_64;
1169                     return XXH64_avalanche(h64);
1170 
1171            case 26: PROCESS8_64;
1172                          /* fallthrough */
1173            case 18: PROCESS8_64;
1174                          /* fallthrough */
1175            case 10: PROCESS8_64;
1176                     PROCESS1_64;
1177                     PROCESS1_64;
1178                     return XXH64_avalanche(h64);
1179 
1180            case 30: PROCESS8_64;
1181                          /* fallthrough */
1182            case 22: PROCESS8_64;
1183                          /* fallthrough */
1184            case 14: PROCESS8_64;
1185                          /* fallthrough */
1186            case  6: PROCESS4_64;
1187                     PROCESS1_64;
1188                     PROCESS1_64;
1189                     return XXH64_avalanche(h64);
1190 
1191            case 27: PROCESS8_64;
1192                          /* fallthrough */
1193            case 19: PROCESS8_64;
1194                          /* fallthrough */
1195            case 11: PROCESS8_64;
1196                     PROCESS1_64;
1197                     PROCESS1_64;
1198                     PROCESS1_64;
1199                     return XXH64_avalanche(h64);
1200 
1201            case 31: PROCESS8_64;
1202                          /* fallthrough */
1203            case 23: PROCESS8_64;
1204                          /* fallthrough */
1205            case 15: PROCESS8_64;
1206                          /* fallthrough */
1207            case  7: PROCESS4_64;
1208                          /* fallthrough */
1209            case  3: PROCESS1_64;
1210                          /* fallthrough */
1211            case  2: PROCESS1_64;
1212                          /* fallthrough */
1213            case  1: PROCESS1_64;
1214                          /* fallthrough */
1215            case  0: return XXH64_avalanche(h64);
1216         }
1217     }
1218     /* impossible to reach */
1219     XXH_ASSERT(0);
1220     return 0;  /* unreachable, but some compilers complain without it */
1221 }
1222 
1223 XXH_FORCE_INLINE xxh_u64
XXH64_endian_align(const xxh_u8 * input,size_t len,xxh_u64 seed,XXH_alignment align)1224 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1225 {
1226     const xxh_u8* bEnd = input + len;
1227     xxh_u64 h64;
1228 
1229 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1230     if (input==NULL) {
1231         len=0;
1232         bEnd=input=(const xxh_u8*)(size_t)32;
1233     }
1234 #endif
1235 
1236     if (len>=32) {
1237         const xxh_u8* const limit = bEnd - 32;
1238         xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1239         xxh_u64 v2 = seed + PRIME64_2;
1240         xxh_u64 v3 = seed + 0;
1241         xxh_u64 v4 = seed - PRIME64_1;
1242 
1243         do {
1244             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1245             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1246             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1247             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1248         } while (input<=limit);
1249 
1250         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1251         h64 = XXH64_mergeRound(h64, v1);
1252         h64 = XXH64_mergeRound(h64, v2);
1253         h64 = XXH64_mergeRound(h64, v3);
1254         h64 = XXH64_mergeRound(h64, v4);
1255 
1256     } else {
1257         h64  = seed + PRIME64_5;
1258     }
1259 
1260     h64 += (xxh_u64) len;
1261 
1262     return XXH64_finalize(h64, input, len, align);
1263 }
1264 
1265 
XXH64(const void * input,size_t len,XXH64_hash_t seed)1266 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1267 {
1268 #if 0
1269     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1270     XXH64_state_t state;
1271     XXH64_reset(&state, seed);
1272     XXH64_update(&state, (const xxh_u8*)input, len);
1273     return XXH64_digest(&state);
1274 
1275 #else
1276 
1277     if (XXH_FORCE_ALIGN_CHECK) {
1278         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
1279             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1280     }   }
1281 
1282     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1283 
1284 #endif
1285 }
1286 
1287 /*******   Hash Streaming   *******/
1288 
XXH64_createState(void)1289 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1290 {
1291     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1292 }
XXH64_freeState(XXH64_state_t * statePtr)1293 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1294 {
1295     XXH_free(statePtr);
1296     return XXH_OK;
1297 }
1298 
XXH64_copyState(XXH64_state_t * dstState,const XXH64_state_t * srcState)1299 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1300 {
1301     memcpy(dstState, srcState, sizeof(*dstState));
1302 }
1303 
XXH64_reset(XXH64_state_t * statePtr,XXH64_hash_t seed)1304 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1305 {
1306     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1307     memset(&state, 0, sizeof(state));
1308     state.v1 = seed + PRIME64_1 + PRIME64_2;
1309     state.v2 = seed + PRIME64_2;
1310     state.v3 = seed + 0;
1311     state.v4 = seed - PRIME64_1;
1312      /* do not write into reserved64, might be removed in a future version */
1313     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1314     return XXH_OK;
1315 }
1316 
1317 XXH_PUBLIC_API XXH_errorcode
XXH64_update(XXH64_state_t * state,const void * input,size_t len)1318 XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1319 {
1320     if (input==NULL)
1321 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1322         return XXH_OK;
1323 #else
1324         return XXH_ERROR;
1325 #endif
1326 
1327     {   const xxh_u8* p = (const xxh_u8*)input;
1328         const xxh_u8* const bEnd = p + len;
1329 
1330         state->total_len += len;
1331 
1332         if (state->memsize + len < 32) {  /* fill in tmp buffer */
1333             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1334             state->memsize += (xxh_u32)len;
1335             return XXH_OK;
1336         }
1337 
1338         if (state->memsize) {   /* tmp buffer is full */
1339             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1340             state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1341             state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1342             state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1343             state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1344             p += 32-state->memsize;
1345             state->memsize = 0;
1346         }
1347 
1348         if (p+32 <= bEnd) {
1349             const xxh_u8* const limit = bEnd - 32;
1350             xxh_u64 v1 = state->v1;
1351             xxh_u64 v2 = state->v2;
1352             xxh_u64 v3 = state->v3;
1353             xxh_u64 v4 = state->v4;
1354 
1355             do {
1356                 v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1357                 v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1358                 v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1359                 v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1360             } while (p<=limit);
1361 
1362             state->v1 = v1;
1363             state->v2 = v2;
1364             state->v3 = v3;
1365             state->v4 = v4;
1366         }
1367 
1368         if (p < bEnd) {
1369             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1370             state->memsize = (unsigned)(bEnd-p);
1371         }
1372     }
1373 
1374     return XXH_OK;
1375 }
1376 
1377 
XXH64_digest(const XXH64_state_t * state)1378 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1379 {
1380     xxh_u64 h64;
1381 
1382     if (state->total_len >= 32) {
1383         xxh_u64 const v1 = state->v1;
1384         xxh_u64 const v2 = state->v2;
1385         xxh_u64 const v3 = state->v3;
1386         xxh_u64 const v4 = state->v4;
1387 
1388         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1389         h64 = XXH64_mergeRound(h64, v1);
1390         h64 = XXH64_mergeRound(h64, v2);
1391         h64 = XXH64_mergeRound(h64, v3);
1392         h64 = XXH64_mergeRound(h64, v4);
1393     } else {
1394         h64  = state->v3 /*seed*/ + PRIME64_5;
1395     }
1396 
1397     h64 += (xxh_u64) state->total_len;
1398 
1399     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1400 }
1401 
1402 
1403 /******* Canonical representation   *******/
1404 
XXH64_canonicalFromHash(XXH64_canonical_t * dst,XXH64_hash_t hash)1405 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1406 {
1407     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1408     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1409     memcpy(dst, &hash, sizeof(*dst));
1410 }
1411 
XXH64_hashFromCanonical(const XXH64_canonical_t * src)1412 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1413 {
1414     return XXH_readBE64(src);
1415 }
1416 
1417 
1418 
1419 /* *********************************************************************
1420 *  XXH3
1421 *  New generation hash designed for speed on small keys and vectorization
1422 ************************************************************************ */
1423 
1424 /* #include "xxh3.h" */
1425 
1426 
1427 #endif  /* XXH_NO_LONG_LONG */
1428 
1429 
1430 #endif  /* XXH_IMPLEMENTATION */
1431 
1432 
1433 #if defined (__cplusplus)
1434 }
1435 #endif
1436