1 /*
2    xxHash - Extremely Fast Hash algorithm
3    Header File
4    Copyright (C) 2012-2016, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31    You can contact the author at :
32    - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34 
35 /* Notice extracted from xxHash homepage :
36 
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39 
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41 
42 Name            Speed       Q.Score   Author
43 xxHash          5.4 GB/s     10
44 CrapWow         3.2 GB/s      2       Andrew
45 MumurHash 3a    2.7 GB/s     10       Austin Appleby
46 SpookyHash      2.0 GB/s     10       Bob Jenkins
47 SBox            1.4 GB/s      9       Bret Mulvey
48 Lookup3         1.2 GB/s      9       Bob Jenkins
49 SuperFastHash   1.2 GB/s      1       Paul Hsieh
50 CityHash64      1.05 GB/s    10       Pike & Alakuijala
51 FNV             0.55 GB/s     5       Fowler, Noll, Vo
52 CRC32           0.43 GB/s     9
53 MD5-32          0.33 GB/s    10       Ronald L. Rivest
54 SHA1-32         0.28 GB/s    10
55 
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59 
60 Note : SMHasher's CRC32 implementation is not the fastest one.
61 Other speed-oriented implementations can be faster,
62 especially in combination with PCLMUL instruction :
63 http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64 
65 A 64-bit version, named XXH64, is available since r35.
66 It offers much better speed, but for 64-bit applications only.
67 Name     Speed on 64 bits    Speed on 32 bits
68 XXH64       13.8 GB/s            1.9 GB/s
69 XXH32        6.8 GB/s            6.0 GB/s
70 */
71 
72 /* Mesa leaves strict aliasing on in the compiler, and this code likes to
73  * dereference the passed in data as u32*, which means that the compiler is
74  * free to move the u32 read before the write of the struct members being
75  * hashed, and in practice it did in freedreno.  Forcing these two things
76  * prevents it.
77  */
78 #define XXH_FORCE_ALIGN_CHECK 0
79 #define XXH_FORCE_MEMORY_ACCESS 0
80 
81 #include "util/compiler.h" /* for FALLTHROUGH */
82 
83 #if defined (__cplusplus)
84 extern "C" {
85 #endif
86 
87 
88 #ifndef XXHASH_H_5627135585666179
89 #define XXHASH_H_5627135585666179 1
90 
91 /* ****************************
92  *  API modifier
93  ******************************/
94 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
95  *  This build macro includes xxhash functions in `static` mode
96  *  in order to inline them, and remove their symbol from the public list.
97  *  Inlining offers great performance improvement on small keys,
98  *  and dramatic ones when length is expressed as a compile-time constant.
99  *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
100  *  Methodology :
101  *     #define XXH_INLINE_ALL
102  *     #include "xxhash.h"
103  * `xxhash.c` is automatically included.
104  *  It's not useful to compile and link it as a separate object.
105  */
106 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
107 #  ifndef XXH_STATIC_LINKING_ONLY
108 #    define XXH_STATIC_LINKING_ONLY
109 #  endif
110 #  if defined(__GNUC__)
111 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
112 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
113 #    define XXH_PUBLIC_API static inline
114 #  elif defined(_MSC_VER)
115 #    define XXH_PUBLIC_API static __inline
116 #  else
117      /* this version may generate warnings for unused static functions */
118 #    define XXH_PUBLIC_API static
119 #  endif
120 #else
121 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
122 #    ifdef XXH_EXPORT
123 #      define XXH_PUBLIC_API __declspec(dllexport)
124 #    elif XXH_IMPORT
125 #      define XXH_PUBLIC_API __declspec(dllimport)
126 #    endif
127 #  else
128 #    define XXH_PUBLIC_API   /* do nothing */
129 #  endif
130 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
131 
132 /*! XXH_NAMESPACE, aka Namespace Emulation :
133  *
134  * If you want to include _and expose_ xxHash functions from within your own library,
135  * but also want to avoid symbol collisions with other libraries which may also include xxHash,
136  *
137  * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
138  * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
139  *
140  * Note that no change is required within the calling program as long as it includes `xxhash.h` :
141  * regular symbol name will be automatically translated by this header.
142  */
143 #ifdef XXH_NAMESPACE
144 #  define XXH_CAT(A,B) A##B
145 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
146 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
147 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
148 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
149 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
150 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
151 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
152 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
153 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
154 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
155 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
156 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
157 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
158 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
159 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
160 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
161 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
162 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
163 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
164 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
165 #endif
166 
167 
168 /* *************************************
169 *  Version
170 ***************************************/
171 #define XXH_VERSION_MAJOR    0
172 #define XXH_VERSION_MINOR    7
173 #define XXH_VERSION_RELEASE  2
174 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
175 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
176 
177 
178 /* ****************************
179 *  Definitions
180 ******************************/
181 #include <stddef.h>   /* size_t */
182 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
183 
184 
185 /*-**********************************************************************
186 *  32-bit hash
187 ************************************************************************/
188 #if !defined (__VMS) \
189   && (defined (__cplusplus) \
190   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
191 #   include <stdint.h>
192     typedef uint32_t XXH32_hash_t;
193 #else
194 #   include <limits.h>
195 #   if UINT_MAX == 0xFFFFFFFFUL
196       typedef unsigned int XXH32_hash_t;
197 #   else
198 #     if ULONG_MAX == 0xFFFFFFFFUL
199         typedef unsigned long XXH32_hash_t;
200 #     else
201 #       error "unsupported platform : need a 32-bit type"
202 #     endif
203 #   endif
204 #endif
205 
206 /*! XXH32() :
207     Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
208     The memory between input & input+length must be valid (allocated and read-accessible).
209     "seed" can be used to alter the result predictably.
210     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
211 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
212 
213 /*******   Streaming   *******/
214 
215 /*
216  * Streaming functions generate the xxHash value from an incrememtal input.
217  * This method is slower than single-call functions, due to state management.
218  * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
219  *
220  * XXH state must first be allocated, using XXH*_createState() .
221  *
222  * Start a new hash by initializing state with a seed, using XXH*_reset().
223  *
224  * Then, feed the hash state by calling XXH*_update() as many times as necessary.
225  * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
226  *
227  * Finally, a hash value can be produced anytime, by using XXH*_digest().
228  * This function returns the nn-bits hash as an int or long long.
229  *
230  * It's still possible to continue inserting input into the hash state after a digest,
231  * and generate some new hash values later on, by invoking again XXH*_digest().
232  *
233  * When done, release the state, using XXH*_freeState().
234  */
235 
236 typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
237 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
238 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
239 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
240 
241 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
242 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
243 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
244 
245 /*******   Canonical representation   *******/
246 
247 /* Default return values from XXH functions are basic unsigned 32 and 64 bits.
248  * This the simplest and fastest format for further post-processing.
249  * However, this leaves open the question of what is the order of bytes,
250  * since little and big endian conventions will write the same number differently.
251  *
252  * The canonical representation settles this issue,
253  * by mandating big-endian convention,
254  * aka, the same convention as human-readable numbers (large digits first).
255  * When writing hash values to storage, sending them over a network, or printing them,
256  * it's highly recommended to use the canonical representation,
257  * to ensure portability across a wider range of systems, present and future.
258  *
259  * The following functions allow transformation of hash values into and from canonical format.
260  */
261 
262 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
263 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
264 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
265 
266 
267 #ifndef XXH_NO_LONG_LONG
268 /*-**********************************************************************
269 *  64-bit hash
270 ************************************************************************/
271 #if !defined (__VMS) \
272   && (defined (__cplusplus) \
273   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
274 #   include <stdint.h>
275     typedef uint64_t XXH64_hash_t;
276 #else
277     /* the following type must have a width of 64-bit */
278     typedef unsigned long long XXH64_hash_t;
279 #endif
280 
281 /*! XXH64() :
282  *  Returns the 64-bit hash of sequence of length @length stored at memory address @input.
283  *  @seed can be used to alter the result predictably.
284  *  This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
285  */
286 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
287 
288 /*******   Streaming   *******/
289 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
290 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
291 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
292 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
293 
294 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
295 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
296 XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
297 
298 /*******   Canonical representation   *******/
299 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
300 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
301 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
302 
303 
304 #endif  /* XXH_NO_LONG_LONG */
305 
306 #endif /* XXHASH_H_5627135585666179 */
307 
308 
309 
310 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
311 #define XXHASH_H_STATIC_13879238742
312 /* ************************************************************************************************
313    This section contains declarations which are not guaranteed to remain stable.
314    They may change in future versions, becoming incompatible with a different version of the library.
315    These declarations should only be used with static linking.
316    Never use them in association with dynamic linking !
317 *************************************************************************************************** */
318 
319 /* These definitions are only present to allow
320  * static allocation of XXH state, on stack or in a struct for example.
321  * Never **ever** use members directly. */
322 
323 struct XXH32_state_s {
324    XXH32_hash_t total_len_32;
325    XXH32_hash_t large_len;
326    XXH32_hash_t v1;
327    XXH32_hash_t v2;
328    XXH32_hash_t v3;
329    XXH32_hash_t v4;
330    XXH32_hash_t mem32[4];
331    XXH32_hash_t memsize;
332    XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
333 };   /* typedef'd to XXH32_state_t */
334 
335 
336 #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
337 
338 struct XXH64_state_s {
339    XXH64_hash_t total_len;
340    XXH64_hash_t v1;
341    XXH64_hash_t v2;
342    XXH64_hash_t v3;
343    XXH64_hash_t v4;
344    XXH64_hash_t mem64[4];
345    XXH32_hash_t memsize;
346    XXH32_hash_t reserved32;  /* required for padding anyway */
347    XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
348 };   /* typedef'd to XXH64_state_t */
349 
350 #endif  /* XXH_NO_LONG_LONG */
351 
352 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
353 #  define XXH_IMPLEMENTATION
354 #endif
355 
356 #endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
357 
358 
359 
360 /*-**********************************************************************
361 *  xxHash implementation
362 *  Functions implementation used to be hosted within xxhash.c .
363 *  However, code inlining requires to place implementation in the header file.
364 *  As a consequence, xxhash.c used to be included within xxhash.h .
365 *  But some build systems don't like *.c inclusions.
366 *  So the implementation is now directly integrated within xxhash.h .
367 *  Another small advantage is that xxhash.c is no longer required in /includes .
368 ************************************************************************/
369 
370 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
371    || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
372 #  define XXH_IMPLEM_13a8737387
373 
374 /* *************************************
375 *  Tuning parameters
376 ***************************************/
377 /*!XXH_FORCE_MEMORY_ACCESS :
378  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
379  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
380  * The below switch allow to select different access method for improved performance.
381  * Method 0 (default) : use `memcpy()`. Safe and portable.
382  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
383  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
384  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
385  *            It can generate buggy code on targets which do not support unaligned memory accesses.
386  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
387  * See http://stackoverflow.com/a/32095106/646947 for details.
388  * Prefer these methods in priority order (0 > 1 > 2)
389  */
390 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
391 #  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
392 #    define XXH_FORCE_MEMORY_ACCESS 2
393 #  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
394   (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
395 #    define XXH_FORCE_MEMORY_ACCESS 1
396 #  endif
397 #endif
398 
399 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
400  * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
401  * When this macro is enabled, xxHash actively checks input for null pointer.
402  * It it is, result for null input pointers is the same as a null-length input.
403  */
404 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
405 #  define XXH_ACCEPT_NULL_INPUT_POINTER 0
406 #endif
407 
408 /*!XXH_FORCE_ALIGN_CHECK :
409  * This is a minor performance trick, only useful with lots of very small keys.
410  * It means : check for aligned/unaligned input.
411  * The check costs one initial branch per hash;
412  * set it to 0 when the input is guaranteed to be aligned,
413  * or when alignment doesn't matter for performance.
414  */
415 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
416 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
417 #    define XXH_FORCE_ALIGN_CHECK 0
418 #  else
419 #    define XXH_FORCE_ALIGN_CHECK 1
420 #  endif
421 #endif
422 
423 /*!XXH_REROLL:
424  * Whether to reroll XXH32_finalize, and XXH64_finalize,
425  * instead of using an unrolled jump table/if statement loop.
426  *
427  * This is automatically defined on -Os/-Oz on GCC and Clang. */
428 #ifndef XXH_REROLL
429 #  if defined(__OPTIMIZE_SIZE__)
430 #    define XXH_REROLL 1
431 #  else
432 #    define XXH_REROLL 0
433 #  endif
434 #endif
435 
436 
437 /* *************************************
438 *  Includes & Memory related functions
439 ***************************************/
440 /*! Modify the local functions below should you wish to use some other memory routines
441 *   for malloc(), free() */
442 #include <stdlib.h>
XXH_malloc(size_t s)443 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)444 static void  XXH_free  (void* p)  { free(p); }
445 /*! and for memcpy() */
446 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)447 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
448 
449 #include <limits.h>   /* ULLONG_MAX */
450 
451 
452 /* *************************************
453 *  Compiler Specific Options
454 ***************************************/
455 #ifdef _MSC_VER    /* Visual Studio */
456 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
457 #  define XXH_FORCE_INLINE static __forceinline
458 #  define XXH_NO_INLINE static __declspec(noinline)
459 #else
460 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
461 #    ifdef __GNUC__
462 #      define XXH_FORCE_INLINE static inline __attribute__((always_inline))
463 #      define XXH_NO_INLINE static __attribute__((noinline))
464 #    else
465 #      define XXH_FORCE_INLINE static inline
466 #      define XXH_NO_INLINE static
467 #    endif
468 #  else
469 #    define XXH_FORCE_INLINE static
470 #    define XXH_NO_INLINE static
471 #  endif /* __STDC_VERSION__ */
472 #endif
473 
474 
475 
476 /* *************************************
477 *  Debug
478 ***************************************/
479 /* DEBUGLEVEL is expected to be defined externally,
480  * typically through compiler command line.
481  * Value must be a number. */
482 #ifndef DEBUGLEVEL
483 #  define DEBUGLEVEL 0
484 #endif
485 
486 #if (DEBUGLEVEL>=1)
487 #  include <assert.h>   /* note : can still be disabled with NDEBUG */
488 #  define XXH_ASSERT(c)   assert(c)
489 #else
490 #  define XXH_ASSERT(c)   ((void)0)
491 #endif
492 
493 /* note : use after variable declarations */
494 #define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }
495 
496 
497 /* *************************************
498 *  Basic Types
499 ***************************************/
500 #if !defined (__VMS) \
501  && (defined (__cplusplus) \
502  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
503 # include <stdint.h>
504   typedef uint8_t  xxh_u8;
505 #else
506   typedef unsigned char      xxh_u8;
507 #endif
508 typedef XXH32_hash_t xxh_u32;
509 
510 
511 /* ***   Memory access   *** */
512 
513 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
514 
515 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)516 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
517 
518 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
519 
520 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
521 /* currently only defined for gcc and icc */
522 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
XXH_read32(const void * ptr)523 static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
524 
525 #else
526 
527 /* portable and safe solution. Generally efficient.
528  * see : http://stackoverflow.com/a/32095106/646947
529  */
XXH_read32(const void * memPtr)530 static xxh_u32 XXH_read32(const void* memPtr)
531 {
532     xxh_u32 val;
533     memcpy(&val, memPtr, sizeof(val));
534     return val;
535 }
536 
537 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
538 
539 
540 /* ***   Endianess   *** */
541 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
542 
543 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
544 #ifndef XXH_CPU_LITTLE_ENDIAN
545 #  if defined(_WIN32) /* Windows is always little endian */ \
546      || defined(__LITTLE_ENDIAN__) \
547      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
548 #    define XXH_CPU_LITTLE_ENDIAN 1
549 #  elif defined(__BIG_ENDIAN__) \
550      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
551 #    define XXH_CPU_LITTLE_ENDIAN 0
552 #  else
XXH_isLittleEndian(void)553 static int XXH_isLittleEndian(void)
554 {
555     const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
556     return one.c[0];
557 }
558 #   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
559 #  endif
560 #endif
561 
562 
563 
564 
565 /* ****************************************
566 *  Compiler-specific Functions and Macros
567 ******************************************/
568 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
569 
570 #ifndef __has_builtin
571 #  define __has_builtin(x) 0
572 #endif
573 
574 #if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
575 #  define XXH_rotl32 __builtin_rotateleft32
576 #  define XXH_rotl64 __builtin_rotateleft64
577 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
578 #elif defined(_MSC_VER)
579 #  define XXH_rotl32(x,r) _rotl(x,r)
580 #  define XXH_rotl64(x,r) _rotl64(x,r)
581 #else
582 #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
583 #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
584 #endif
585 
586 #if defined(_MSC_VER)     /* Visual Studio */
587 #  define XXH_swap32 _byteswap_ulong
588 #elif XXH_GCC_VERSION >= 403
589 #  define XXH_swap32 __builtin_bswap32
590 #else
XXH_swap32(xxh_u32 x)591 static xxh_u32 XXH_swap32 (xxh_u32 x)
592 {
593     return  ((x << 24) & 0xff000000 ) |
594             ((x <<  8) & 0x00ff0000 ) |
595             ((x >>  8) & 0x0000ff00 ) |
596             ((x >> 24) & 0x000000ff );
597 }
598 #endif
599 
600 
601 /* ***************************
602 *  Memory reads
603 *****************************/
604 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
605 
XXH_readLE32(const void * ptr)606 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
607 {
608     return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
609 }
610 
XXH_readBE32(const void * ptr)611 static xxh_u32 XXH_readBE32(const void* ptr)
612 {
613     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
614 }
615 
616 XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void * ptr,XXH_alignment align)617 XXH_readLE32_align(const void* ptr, XXH_alignment align)
618 {
619     if (align==XXH_unaligned) {
620         return XXH_readLE32(ptr);
621     } else {
622         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
623     }
624 }
625 
626 
627 /* *************************************
628 *  Misc
629 ***************************************/
XXH_versionNumber(void)630 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
631 
632 
633 /* *******************************************************************
634 *  32-bit hash functions
635 *********************************************************************/
636 static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
637 static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
638 static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
639 static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
640 static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
641 
XXH32_round(xxh_u32 acc,xxh_u32 input)642 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
643 {
644     acc += input * PRIME32_2;
645     acc  = XXH_rotl32(acc, 13);
646     acc *= PRIME32_1;
647 #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
648     /* UGLY HACK:
649      * This inline assembly hack forces acc into a normal register. This is the
650      * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
651      * (pragmas and attributes don't work for some resason) without globally
652      * disabling SSE4.1.
653      *
654      * The reason we want to avoid vectorization is because despite working on
655      * 4 integers at a time, there are multiple factors slowing XXH32 down on
656      * SSE4:
657      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
658      *   making it slightly slower to multiply four integers at once compared to four
659      *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
660      *   still not worth it to go into SSE just to multiply unless doing a long operation.
661      *
662      * - Four instructions are required to rotate,
663      *      movqda tmp,  v // not required with VEX encoding
664      *      pslld  tmp, 13 // tmp <<= 13
665      *      psrld  v,   19 // x >>= 19
666      *      por    v,  tmp // x |= tmp
667      *   compared to one for scalar:
668      *      roll   v, 13    // reliably fast across the board
669      *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
670      *
671      * - Instruction level parallelism is actually more beneficial here because the
672      *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
673      *   while v3 can multiply. SSE forces them to operate together.
674      *
675      * How this hack works:
676      * __asm__(""       // Declare an assembly block but don't declare any instructions
677      *          :       // However, as an Input/Output Operand,
678      *          "+r"    // constrain a read/write operand (+) as a general purpose register (r).
679      *          (acc)   // and set acc as the operand
680      * );
681      *
682      * Because of the 'r', the compiler has promised that seed will be in a
683      * general purpose register and the '+' says that it will be 'read/write',
684      * so it has to assume it has changed. It is like volatile without all the
685      * loads and stores.
686      *
687      * Since the argument has to be in a normal register (not an SSE register),
688      * each time XXH32_round is called, it is impossible to vectorize. */
689     __asm__("" : "+r" (acc));
690 #endif
691     return acc;
692 }
693 
694 /* mix all bits */
XXH32_avalanche(xxh_u32 h32)695 static xxh_u32 XXH32_avalanche(xxh_u32 h32)
696 {
697     h32 ^= h32 >> 15;
698     h32 *= PRIME32_2;
699     h32 ^= h32 >> 13;
700     h32 *= PRIME32_3;
701     h32 ^= h32 >> 16;
702     return(h32);
703 }
704 
705 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
706 
707 static xxh_u32
XXH32_finalize(xxh_u32 h32,const xxh_u8 * ptr,size_t len,XXH_alignment align)708 XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
709 {
710 #define PROCESS1               \
711     h32 += (*ptr++) * PRIME32_5; \
712     h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
713 
714 #define PROCESS4                         \
715     h32 += XXH_get32bits(ptr) * PRIME32_3; \
716     ptr+=4;                                \
717     h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
718 
719     /* Compact rerolled version */
720     if (XXH_REROLL) {
721         len &= 15;
722         while (len >= 4) {
723             PROCESS4;
724             len -= 4;
725         }
726         while (len > 0) {
727             PROCESS1;
728             --len;
729         }
730         return XXH32_avalanche(h32);
731     } else {
732          switch(len&15) /* or switch(bEnd - p) */ {
733            case 12:      PROCESS4;
734                          FALLTHROUGH;
735            case 8:       PROCESS4;
736                          FALLTHROUGH;
737            case 4:       PROCESS4;
738                          return XXH32_avalanche(h32);
739 
740            case 13:      PROCESS4;
741                          FALLTHROUGH;
742            case 9:       PROCESS4;
743                          FALLTHROUGH;
744            case 5:       PROCESS4;
745                          PROCESS1;
746                          return XXH32_avalanche(h32);
747 
748            case 14:      PROCESS4;
749                          FALLTHROUGH;
750            case 10:      PROCESS4;
751                          FALLTHROUGH;
752            case 6:       PROCESS4;
753                          PROCESS1;
754                          PROCESS1;
755                          return XXH32_avalanche(h32);
756 
757            case 15:      PROCESS4;
758                          FALLTHROUGH;
759            case 11:      PROCESS4;
760                          FALLTHROUGH;
761            case 7:       PROCESS4;
762                          FALLTHROUGH;
763            case 3:       PROCESS1;
764                          FALLTHROUGH;
765            case 2:       PROCESS1;
766                          FALLTHROUGH;
767            case 1:       PROCESS1;
768                          FALLTHROUGH;
769            case 0:       return XXH32_avalanche(h32);
770         }
771         XXH_ASSERT(0);
772         return h32;   /* reaching this point is deemed impossible */
773     }
774 }
775 
776 XXH_FORCE_INLINE xxh_u32
XXH32_endian_align(const xxh_u8 * input,size_t len,xxh_u32 seed,XXH_alignment align)777 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
778 {
779     const xxh_u8* bEnd = input + len;
780     xxh_u32 h32;
781 
782 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
783     if (input==NULL) {
784         len=0;
785         bEnd=input=(const xxh_u8*)(size_t)16;
786     }
787 #endif
788 
789     if (len>=16) {
790         const xxh_u8* const limit = bEnd - 15;
791         xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
792         xxh_u32 v2 = seed + PRIME32_2;
793         xxh_u32 v3 = seed + 0;
794         xxh_u32 v4 = seed - PRIME32_1;
795 
796         do {
797             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
798             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
799             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
800             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
801         } while (input < limit);
802 
803         h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
804             + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
805     } else {
806         h32  = seed + PRIME32_5;
807     }
808 
809     h32 += (xxh_u32)len;
810 
811     return XXH32_finalize(h32, input, len&15, align);
812 }
813 
814 
XXH32(const void * input,size_t len,XXH32_hash_t seed)815 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
816 {
817 #if 0
818     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
819     XXH32_state_t state;
820     XXH32_reset(&state, seed);
821     XXH32_update(&state, (const xxh_u8*)input, len);
822     return XXH32_digest(&state);
823 
824 #else
825 
826     if (XXH_FORCE_ALIGN_CHECK) {
827         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
828             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
829     }   }
830 
831     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
832 #endif
833 }
834 
835 
836 
837 /*******   Hash streaming   *******/
838 
XXH32_createState(void)839 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
840 {
841     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
842 }
XXH32_freeState(XXH32_state_t * statePtr)843 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
844 {
845     XXH_free(statePtr);
846     return XXH_OK;
847 }
848 
XXH32_copyState(XXH32_state_t * dstState,const XXH32_state_t * srcState)849 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
850 {
851     memcpy(dstState, srcState, sizeof(*dstState));
852 }
853 
XXH32_reset(XXH32_state_t * statePtr,XXH32_hash_t seed)854 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
855 {
856     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
857     memset(&state, 0, sizeof(state));
858     state.v1 = seed + PRIME32_1 + PRIME32_2;
859     state.v2 = seed + PRIME32_2;
860     state.v3 = seed + 0;
861     state.v4 = seed - PRIME32_1;
862     /* do not write into reserved, planned to be removed in a future version */
863     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
864     return XXH_OK;
865 }
866 
867 
868 XXH_PUBLIC_API XXH_errorcode
XXH32_update(XXH32_state_t * state,const void * input,size_t len)869 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
870 {
871     if (input==NULL)
872 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
873         return XXH_OK;
874 #else
875         return XXH_ERROR;
876 #endif
877 
878     {   const xxh_u8* p = (const xxh_u8*)input;
879         const xxh_u8* const bEnd = p + len;
880 
881         state->total_len_32 += (XXH32_hash_t)len;
882         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
883 
884         if (state->memsize + len < 16)  {   /* fill in tmp buffer */
885             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
886             state->memsize += (XXH32_hash_t)len;
887             return XXH_OK;
888         }
889 
890         if (state->memsize) {   /* some data left from previous update */
891             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
892             {   const xxh_u32* p32 = state->mem32;
893                 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
894                 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
895                 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
896                 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
897             }
898             p += 16-state->memsize;
899             state->memsize = 0;
900         }
901 
902         if (p <= bEnd-16) {
903             const xxh_u8* const limit = bEnd - 16;
904             xxh_u32 v1 = state->v1;
905             xxh_u32 v2 = state->v2;
906             xxh_u32 v3 = state->v3;
907             xxh_u32 v4 = state->v4;
908 
909             do {
910                 v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
911                 v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
912                 v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
913                 v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
914             } while (p<=limit);
915 
916             state->v1 = v1;
917             state->v2 = v2;
918             state->v3 = v3;
919             state->v4 = v4;
920         }
921 
922         if (p < bEnd) {
923             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
924             state->memsize = (unsigned)(bEnd-p);
925         }
926     }
927 
928     return XXH_OK;
929 }
930 
931 
XXH32_digest(const XXH32_state_t * state)932 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
933 {
934     xxh_u32 h32;
935 
936     if (state->large_len) {
937         h32 = XXH_rotl32(state->v1, 1)
938             + XXH_rotl32(state->v2, 7)
939             + XXH_rotl32(state->v3, 12)
940             + XXH_rotl32(state->v4, 18);
941     } else {
942         h32 = state->v3 /* == seed */ + PRIME32_5;
943     }
944 
945     h32 += state->total_len_32;
946 
947     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
948 }
949 
950 
951 /*******   Canonical representation   *******/
952 
953 /*! Default XXH result types are basic unsigned 32 and 64 bits.
954 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
955 *   These functions allow transformation of hash result into and from its canonical format.
956 *   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
957 */
958 
XXH32_canonicalFromHash(XXH32_canonical_t * dst,XXH32_hash_t hash)959 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
960 {
961     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
962     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
963     memcpy(dst, &hash, sizeof(*dst));
964 }
965 
XXH32_hashFromCanonical(const XXH32_canonical_t * src)966 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
967 {
968     return XXH_readBE32(src);
969 }
970 
971 
972 #ifndef XXH_NO_LONG_LONG
973 
974 /* *******************************************************************
975 *  64-bit hash functions
976 *********************************************************************/
977 
978 /*******   Memory access   *******/
979 
980 typedef XXH64_hash_t xxh_u64;
981 
982 
983 /*! XXH_REROLL_XXH64:
984  * Whether to reroll the XXH64_finalize() loop.
985  *
986  * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
987  * on 64-bit hosts, as only one jump is required.
988  *
989  * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
990  * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
991  * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
992  * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
993  * and is more likely to be inlined by the compiler.
994  *
995  * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
996 #ifndef XXH_REROLL_XXH64
997 #  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
998    || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
999      || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
1000      || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
1001      || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
1002    || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
1003 #    define XXH_REROLL_XXH64 1
1004 #  else
1005 #    define XXH_REROLL_XXH64 0
1006 #  endif
1007 #endif /* !defined(XXH_REROLL_XXH64) */
1008 
1009 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1010 
1011 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read64(const void * memPtr)1012 static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1013 
1014 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1015 
1016 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1017 /* currently only defined for gcc and icc */
1018 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
XXH_read64(const void * ptr)1019 static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1020 
1021 #else
1022 
1023 /* portable and safe solution. Generally efficient.
1024  * see : http://stackoverflow.com/a/32095106/646947
1025  */
1026 
XXH_read64(const void * memPtr)1027 static xxh_u64 XXH_read64(const void* memPtr)
1028 {
1029     xxh_u64 val;
1030     memcpy(&val, memPtr, sizeof(val));
1031     return val;
1032 }
1033 
1034 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1035 
1036 #if defined(_MSC_VER)     /* Visual Studio */
1037 #  define XXH_swap64 _byteswap_uint64
1038 #elif XXH_GCC_VERSION >= 403
1039 #  define XXH_swap64 __builtin_bswap64
1040 #else
XXH_swap64(xxh_u64 x)1041 static xxh_u64 XXH_swap64 (xxh_u64 x)
1042 {
1043     return  ((x << 56) & 0xff00000000000000ULL) |
1044             ((x << 40) & 0x00ff000000000000ULL) |
1045             ((x << 24) & 0x0000ff0000000000ULL) |
1046             ((x << 8)  & 0x000000ff00000000ULL) |
1047             ((x >> 8)  & 0x00000000ff000000ULL) |
1048             ((x >> 24) & 0x0000000000ff0000ULL) |
1049             ((x >> 40) & 0x000000000000ff00ULL) |
1050             ((x >> 56) & 0x00000000000000ffULL);
1051 }
1052 #endif
1053 
XXH_readLE64(const void * ptr)1054 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1055 {
1056     return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1057 }
1058 
XXH_readBE64(const void * ptr)1059 static xxh_u64 XXH_readBE64(const void* ptr)
1060 {
1061     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1062 }
1063 
1064 XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void * ptr,XXH_alignment align)1065 XXH_readLE64_align(const void* ptr, XXH_alignment align)
1066 {
1067     if (align==XXH_unaligned)
1068         return XXH_readLE64(ptr);
1069     else
1070         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1071 }
1072 
1073 
1074 /*******   xxh64   *******/
1075 
1076 static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1077 static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1078 static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1079 static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1080 static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1081 
XXH64_round(xxh_u64 acc,xxh_u64 input)1082 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1083 {
1084     acc += input * PRIME64_2;
1085     acc  = XXH_rotl64(acc, 31);
1086     acc *= PRIME64_1;
1087     return acc;
1088 }
1089 
XXH64_mergeRound(xxh_u64 acc,xxh_u64 val)1090 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1091 {
1092     val  = XXH64_round(0, val);
1093     acc ^= val;
1094     acc  = acc * PRIME64_1 + PRIME64_4;
1095     return acc;
1096 }
1097 
XXH64_avalanche(xxh_u64 h64)1098 static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1099 {
1100     h64 ^= h64 >> 33;
1101     h64 *= PRIME64_2;
1102     h64 ^= h64 >> 29;
1103     h64 *= PRIME64_3;
1104     h64 ^= h64 >> 32;
1105     return h64;
1106 }
1107 
1108 
1109 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
1110 
1111 static xxh_u64
XXH64_finalize(xxh_u64 h64,const xxh_u8 * ptr,size_t len,XXH_alignment align)1112 XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1113 {
1114 #define PROCESS1_64            \
1115     h64 ^= (*ptr++) * PRIME64_5; \
1116     h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1117 
1118 #define PROCESS4_64          \
1119     h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1120     ptr+=4;                    \
1121     h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1122 
1123 #define PROCESS8_64 {        \
1124     xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1125     ptr+=8;                    \
1126     h64 ^= k1;               \
1127     h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1128 }
1129 
1130     /* Rerolled version for 32-bit targets is faster and much smaller. */
1131     if (XXH_REROLL || XXH_REROLL_XXH64) {
1132         len &= 31;
1133         while (len >= 8) {
1134             PROCESS8_64;
1135             len -= 8;
1136         }
1137         if (len >= 4) {
1138             PROCESS4_64;
1139             len -= 4;
1140         }
1141         while (len > 0) {
1142             PROCESS1_64;
1143             --len;
1144         }
1145          return  XXH64_avalanche(h64);
1146     } else {
1147         switch(len & 31) {
1148            case 24: PROCESS8_64;
1149                     FALLTHROUGH;
1150            case 16: PROCESS8_64;
1151                     FALLTHROUGH;
1152            case  8: PROCESS8_64;
1153                     return XXH64_avalanche(h64);
1154 
1155            case 28: PROCESS8_64;
1156                     FALLTHROUGH;
1157            case 20: PROCESS8_64;
1158                     FALLTHROUGH;
1159            case 12: PROCESS8_64;
1160                     FALLTHROUGH;
1161            case  4: PROCESS4_64;
1162                     return XXH64_avalanche(h64);
1163 
1164            case 25: PROCESS8_64;
1165                     FALLTHROUGH;
1166            case 17: PROCESS8_64;
1167                     FALLTHROUGH;
1168            case  9: PROCESS8_64;
1169                     PROCESS1_64;
1170                     return XXH64_avalanche(h64);
1171 
1172            case 29: PROCESS8_64;
1173                     FALLTHROUGH;
1174            case 21: PROCESS8_64;
1175                     FALLTHROUGH;
1176            case 13: PROCESS8_64;
1177                     FALLTHROUGH;
1178            case  5: PROCESS4_64;
1179                     PROCESS1_64;
1180                     return XXH64_avalanche(h64);
1181 
1182            case 26: PROCESS8_64;
1183                     FALLTHROUGH;
1184            case 18: PROCESS8_64;
1185                     FALLTHROUGH;
1186            case 10: PROCESS8_64;
1187                     PROCESS1_64;
1188                     PROCESS1_64;
1189                     return XXH64_avalanche(h64);
1190 
1191            case 30: PROCESS8_64;
1192                     FALLTHROUGH;
1193            case 22: PROCESS8_64;
1194                     FALLTHROUGH;
1195            case 14: PROCESS8_64;
1196                     FALLTHROUGH;
1197            case  6: PROCESS4_64;
1198                     PROCESS1_64;
1199                     PROCESS1_64;
1200                     return XXH64_avalanche(h64);
1201 
1202            case 27: PROCESS8_64;
1203                     FALLTHROUGH;
1204            case 19: PROCESS8_64;
1205                     FALLTHROUGH;
1206            case 11: PROCESS8_64;
1207                     PROCESS1_64;
1208                     PROCESS1_64;
1209                     PROCESS1_64;
1210                     return XXH64_avalanche(h64);
1211 
1212            case 31: PROCESS8_64;
1213                     FALLTHROUGH;
1214            case 23: PROCESS8_64;
1215                     FALLTHROUGH;
1216            case 15: PROCESS8_64;
1217                     FALLTHROUGH;
1218            case  7: PROCESS4_64;
1219                     FALLTHROUGH;
1220            case  3: PROCESS1_64;
1221                     FALLTHROUGH;
1222            case  2: PROCESS1_64;
1223                     FALLTHROUGH;
1224            case  1: PROCESS1_64;
1225                     FALLTHROUGH;
1226            case  0: return XXH64_avalanche(h64);
1227         }
1228     }
1229     /* impossible to reach */
1230     XXH_ASSERT(0);
1231     return 0;  /* unreachable, but some compilers complain without it */
1232 }
1233 
1234 XXH_FORCE_INLINE xxh_u64
XXH64_endian_align(const xxh_u8 * input,size_t len,xxh_u64 seed,XXH_alignment align)1235 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1236 {
1237     const xxh_u8* bEnd = input + len;
1238     xxh_u64 h64;
1239 
1240 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1241     if (input==NULL) {
1242         len=0;
1243         bEnd=input=(const xxh_u8*)(size_t)32;
1244     }
1245 #endif
1246 
1247     if (len>=32) {
1248         const xxh_u8* const limit = bEnd - 32;
1249         xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1250         xxh_u64 v2 = seed + PRIME64_2;
1251         xxh_u64 v3 = seed + 0;
1252         xxh_u64 v4 = seed - PRIME64_1;
1253 
1254         do {
1255             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1256             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1257             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1258             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1259         } while (input<=limit);
1260 
1261         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1262         h64 = XXH64_mergeRound(h64, v1);
1263         h64 = XXH64_mergeRound(h64, v2);
1264         h64 = XXH64_mergeRound(h64, v3);
1265         h64 = XXH64_mergeRound(h64, v4);
1266 
1267     } else {
1268         h64  = seed + PRIME64_5;
1269     }
1270 
1271     h64 += (xxh_u64) len;
1272 
1273     return XXH64_finalize(h64, input, len, align);
1274 }
1275 
1276 
XXH64(const void * input,size_t len,XXH64_hash_t seed)1277 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1278 {
1279 #if 0
1280     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1281     XXH64_state_t state;
1282     XXH64_reset(&state, seed);
1283     XXH64_update(&state, (const xxh_u8*)input, len);
1284     return XXH64_digest(&state);
1285 
1286 #else
1287 
1288     if (XXH_FORCE_ALIGN_CHECK) {
1289         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
1290             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1291     }   }
1292 
1293     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1294 
1295 #endif
1296 }
1297 
1298 /*******   Hash Streaming   *******/
1299 
XXH64_createState(void)1300 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1301 {
1302     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1303 }
XXH64_freeState(XXH64_state_t * statePtr)1304 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1305 {
1306     XXH_free(statePtr);
1307     return XXH_OK;
1308 }
1309 
XXH64_copyState(XXH64_state_t * dstState,const XXH64_state_t * srcState)1310 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1311 {
1312     memcpy(dstState, srcState, sizeof(*dstState));
1313 }
1314 
XXH64_reset(XXH64_state_t * statePtr,XXH64_hash_t seed)1315 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1316 {
1317     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1318     memset(&state, 0, sizeof(state));
1319     state.v1 = seed + PRIME64_1 + PRIME64_2;
1320     state.v2 = seed + PRIME64_2;
1321     state.v3 = seed + 0;
1322     state.v4 = seed - PRIME64_1;
1323      /* do not write into reserved64, might be removed in a future version */
1324     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1325     return XXH_OK;
1326 }
1327 
1328 XXH_PUBLIC_API XXH_errorcode
XXH64_update(XXH64_state_t * state,const void * input,size_t len)1329 XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1330 {
1331     if (input==NULL)
1332 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1333         return XXH_OK;
1334 #else
1335         return XXH_ERROR;
1336 #endif
1337 
1338     {   const xxh_u8* p = (const xxh_u8*)input;
1339         const xxh_u8* const bEnd = p + len;
1340 
1341         state->total_len += len;
1342 
1343         if (state->memsize + len < 32) {  /* fill in tmp buffer */
1344             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1345             state->memsize += (xxh_u32)len;
1346             return XXH_OK;
1347         }
1348 
1349         if (state->memsize) {   /* tmp buffer is full */
1350             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1351             state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1352             state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1353             state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1354             state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1355             p += 32-state->memsize;
1356             state->memsize = 0;
1357         }
1358 
1359         if (p+32 <= bEnd) {
1360             const xxh_u8* const limit = bEnd - 32;
1361             xxh_u64 v1 = state->v1;
1362             xxh_u64 v2 = state->v2;
1363             xxh_u64 v3 = state->v3;
1364             xxh_u64 v4 = state->v4;
1365 
1366             do {
1367                 v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1368                 v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1369                 v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1370                 v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1371             } while (p<=limit);
1372 
1373             state->v1 = v1;
1374             state->v2 = v2;
1375             state->v3 = v3;
1376             state->v4 = v4;
1377         }
1378 
1379         if (p < bEnd) {
1380             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1381             state->memsize = (unsigned)(bEnd-p);
1382         }
1383     }
1384 
1385     return XXH_OK;
1386 }
1387 
1388 
XXH64_digest(const XXH64_state_t * state)1389 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1390 {
1391     xxh_u64 h64;
1392 
1393     if (state->total_len >= 32) {
1394         xxh_u64 const v1 = state->v1;
1395         xxh_u64 const v2 = state->v2;
1396         xxh_u64 const v3 = state->v3;
1397         xxh_u64 const v4 = state->v4;
1398 
1399         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1400         h64 = XXH64_mergeRound(h64, v1);
1401         h64 = XXH64_mergeRound(h64, v2);
1402         h64 = XXH64_mergeRound(h64, v3);
1403         h64 = XXH64_mergeRound(h64, v4);
1404     } else {
1405         h64  = state->v3 /*seed*/ + PRIME64_5;
1406     }
1407 
1408     h64 += (xxh_u64) state->total_len;
1409 
1410     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1411 }
1412 
1413 
1414 /******* Canonical representation   *******/
1415 
XXH64_canonicalFromHash(XXH64_canonical_t * dst,XXH64_hash_t hash)1416 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1417 {
1418     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1419     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1420     memcpy(dst, &hash, sizeof(*dst));
1421 }
1422 
XXH64_hashFromCanonical(const XXH64_canonical_t * src)1423 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1424 {
1425     return XXH_readBE64(src);
1426 }
1427 
1428 
1429 
1430 /* *********************************************************************
1431 *  XXH3
1432 *  New generation hash designed for speed on small keys and vectorization
1433 ************************************************************************ */
1434 
1435 /* #include "xxh3.h" */
1436 
1437 
1438 #endif  /* XXH_NO_LONG_LONG */
1439 
1440 
1441 #endif  /* XXH_IMPLEMENTATION */
1442 
1443 
1444 #if defined (__cplusplus)
1445 }
1446 #endif
1447