1 /*
2 xxHash - Fast Hash algorithm
3 Copyright (C) 2012-2014, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9 
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 You can contact the author at :
30 - xxHash source repository : http://code.google.com/p/xxhash/
31 */
32 #include "archive_platform.h"
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "archive_xxhash.h"
38 
39 #ifdef HAVE_LIBLZ4
40 
41 /***************************************
42 ** Tuning parameters
43 ****************************************/
44 /* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
45 ** For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
46 ** If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
47 ** You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
48 */
49 #if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
50 #  define XXH_USE_UNALIGNED_ACCESS 1
51 #endif
52 
53 /* XXH_ACCEPT_NULL_INPUT_POINTER :
54 ** If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
55 ** When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
56 ** This option has a very small performance cost (only measurable on small inputs).
57 ** By default, this option is disabled. To enable it, uncomment below define :
58 ** #define XXH_ACCEPT_NULL_INPUT_POINTER 1
59 
60 ** XXH_FORCE_NATIVE_FORMAT :
61 ** By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
62 ** Results are therefore identical for little-endian and big-endian CPU.
63 ** This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
64 ** Should endian-independence be of no importance for your application, you may set the #define below to 1.
65 ** It will improve speed for Big-endian CPU.
66 ** This option has no impact on Little_Endian CPU.
67 */
68 #define XXH_FORCE_NATIVE_FORMAT 0
69 
70 /***************************************
71 ** Compiler Specific Options
72 ****************************************/
73 /* Disable some Visual warning messages */
74 #ifdef _MSC_VER  /* Visual Studio */
75 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
76 #endif
77 
78 #ifdef _MSC_VER    /* Visual Studio */
79 #  define FORCE_INLINE __forceinline
80 #else
81 #  ifdef __GNUC__
82 #    define FORCE_INLINE inline __attribute__((always_inline))
83 #  else
84 #    define FORCE_INLINE inline
85 #  endif
86 #endif
87 
88 /***************************************
89 ** Includes & Memory related functions
90 ****************************************/
91 #define XXH_malloc malloc
92 #define XXH_free free
93 #define XXH_memcpy memcpy
94 
95 
96 static unsigned int	  XXH32 (const void*, unsigned int, unsigned int);
97 static void*		  XXH32_init   (unsigned int);
98 static XXH_errorcode	  XXH32_update (void*, const void*, unsigned int);
99 static unsigned int	  XXH32_digest (void*);
100 /*static int		  XXH32_sizeofState(void);*/
101 static XXH_errorcode	  XXH32_resetState(void*, unsigned int);
102 #define       XXH32_SIZEOFSTATE 48
103 typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
104 static unsigned int	  XXH32_intermediateDigest (void*);
105 
106 /***************************************
107 ** Basic Types
108 ****************************************/
109 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
110 # include <stdint.h>
111   typedef uint8_t  BYTE;
112   typedef uint16_t U16;
113   typedef uint32_t U32;
114   typedef  int32_t S32;
115   typedef uint64_t U64;
116 #else
117   typedef unsigned char      BYTE;
118   typedef unsigned short     U16;
119   typedef unsigned int       U32;
120   typedef   signed int       S32;
121   typedef unsigned long long U64;
122 #endif
123 
124 #if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
125 #  define _PACKED __attribute__ ((packed))
126 #else
127 #  define _PACKED
128 #endif
129 
130 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
131 #  ifdef __IBMC__
132 #    pragma pack(1)
133 #  else
134 #    pragma pack(push, 1)
135 #  endif
136 #endif
137 
138 typedef struct _U32_S { U32 v; } _PACKED U32_S;
139 
140 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
141 #  pragma pack(pop)
142 #endif
143 
144 
145 /****************************************
146 ** Compiler-specific Functions and Macros
147 *****************************************/
148 #define GCC_VERSION ((__GNUC__-0) * 100 + (__GNUC_MINOR__ - 0))
149 
150 #if GCC_VERSION >= 409
151 __attribute__((__no_sanitize_undefined__))
152 #else
153 #  if defined(__clang__)
154 __attribute__((no_sanitize("undefined")))
155 #  endif
156 #endif
157 #if defined(_MSC_VER)
158 static __inline U32 A32(const void * x)
159 #else
160 static inline U32 A32(const void* x)
161 #endif
162 {
163     return (((const U32_S *)(x))->v);
164 }
165 
166 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
167 #if defined(_MSC_VER)
168 #  define XXH_rotl32(x,r) _rotl(x,r)
169 #else
170 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
171 #endif
172 
173 #if defined(_MSC_VER)     /* Visual Studio */
174 #  define XXH_swap32 _byteswap_ulong
175 #elif GCC_VERSION >= 403
176 #  define XXH_swap32 __builtin_bswap32
177 #else
178 static inline U32 XXH_swap32 (U32 x) {
179     return  ((x << 24) & 0xff000000 ) |
180 			((x <<  8) & 0x00ff0000 ) |
181 			((x >>  8) & 0x0000ff00 ) |
182 			((x >> 24) & 0x000000ff );}
183 #endif
184 
185 
186 /***************************************
187 ** Constants
188 ****************************************/
189 #define PRIME32_1   2654435761U
190 #define PRIME32_2   2246822519U
191 #define PRIME32_3   3266489917U
192 #define PRIME32_4    668265263U
193 #define PRIME32_5    374761393U
194 
195 
196 /***************************************
197 ** Architecture Macros
198 ****************************************/
199 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
200 #ifndef XXH_CPU_LITTLE_ENDIAN   /* It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch */
201     static const int one = 1;
202 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
203 #endif
204 
205 
206 /***************************************
207 ** Macros
208 ****************************************/
209 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
210 
211 
212 /*****************************
213 ** Memory reads
214 ******************************/
215 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
216 
217 static
218 FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
219 {
220     if (align==XXH_unaligned)
221         return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
222     else
223         return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
224 }
225 
226 static
227 FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
228 
229 
230 /*****************************
231 ** Simple Hash Functions
232 ******************************/
233 static
234 FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align)
235 {
236     const BYTE* p = (const BYTE*)input;
237     const BYTE* bEnd = p + len;
238     U32 h32;
239 #define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align)
240 
241 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
242     if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; }
243 #endif
244 
245     if (len>=16)
246     {
247         const BYTE* const limit = bEnd - 16;
248         U32 v1 = seed + PRIME32_1 + PRIME32_2;
249         U32 v2 = seed + PRIME32_2;
250         U32 v3 = seed + 0;
251         U32 v4 = seed - PRIME32_1;
252 
253         do
254         {
255             v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
256             v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
257             v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
258             v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
259         } while (p<=limit);
260 
261         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
262     }
263     else
264     {
265         h32  = seed + PRIME32_5;
266     }
267 
268     h32 += (U32) len;
269 
270     while (p<=bEnd-4)
271     {
272         h32 += XXH_get32bits(p) * PRIME32_3;
273         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
274         p+=4;
275     }
276 
277     while (p<bEnd)
278     {
279         h32 += (*p) * PRIME32_5;
280         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
281         p++;
282     }
283 
284     h32 ^= h32 >> 15;
285     h32 *= PRIME32_2;
286     h32 ^= h32 >> 13;
287     h32 *= PRIME32_3;
288     h32 ^= h32 >> 16;
289 
290     return h32;
291 }
292 
293 
294 U32 XXH32(const void* input, unsigned int len, U32 seed)
295 {
296 #if 0
297     // Simple version, good for code maintenance, but unfortunately slow for small inputs
298     void* state = XXH32_init(seed);
299     XXH32_update(state, input, len);
300     return XXH32_digest(state);
301 #else
302     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
303 
304 #  if !defined(XXH_USE_UNALIGNED_ACCESS)
305     if ((((size_t)input) & 3) == 0)   /* Input is aligned, let's leverage the speed advantage */
306     {
307         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
308             return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
309         else
310             return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
311     }
312 #  endif
313 
314     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
315         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
316     else
317         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
318 #endif
319 }
320 
321 /*****************************
322 ** Advanced Hash Functions
323 ******************************/
324 
325 struct XXH_state32_t
326 {
327     U64 total_len;
328     U32 seed;
329     U32 v1;
330     U32 v2;
331     U32 v3;
332     U32 v4;
333     int memsize;
334     char memory[16];
335 };
336 
337 #if 0
338 static
339 int XXH32_sizeofState(void)
340 {
341     XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t));   /* A compilation error here means XXH32_SIZEOFSTATE is not large enough */
342     return sizeof(struct XXH_state32_t);
343 }
344 #endif
345 
346 static
347 XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
348 {
349     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
350     state->seed = seed;
351     state->v1 = seed + PRIME32_1 + PRIME32_2;
352     state->v2 = seed + PRIME32_2;
353     state->v3 = seed + 0;
354     state->v4 = seed - PRIME32_1;
355     state->total_len = 0;
356     state->memsize = 0;
357     return XXH_OK;
358 }
359 
360 static
361 void* XXH32_init (U32 seed)
362 {
363     void* state = XXH_malloc (sizeof(struct XXH_state32_t));
364     XXH32_resetState(state, seed);
365     return state;
366 }
367 
368 static
369 FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
370 {
371     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
372     const BYTE* p = (const BYTE*)input;
373     const BYTE* const bEnd = p + len;
374 
375 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
376     if (input==NULL) return XXH_ERROR;
377 #endif
378 
379     state->total_len += len;
380 
381     if (state->memsize + len < 16)   /* fill in tmp buffer */
382     {
383         XXH_memcpy(state->memory + state->memsize, input, len);
384         state->memsize +=  len;
385         return XXH_OK;
386     }
387 
388     if (state->memsize)   /* some data left from previous update */
389     {
390         XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
391         {
392             const U32* p32 = (const U32*)state->memory;
393             state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
394             state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
395             state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
396             state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
397         }
398         p += 16-state->memsize;
399         state->memsize = 0;
400     }
401 
402     if (p <= bEnd-16)
403     {
404         const BYTE* const limit = bEnd - 16;
405         U32 v1 = state->v1;
406         U32 v2 = state->v2;
407         U32 v3 = state->v3;
408         U32 v4 = state->v4;
409 
410         do
411         {
412             v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
413             v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
414             v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
415             v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
416         } while (p<=limit);
417 
418         state->v1 = v1;
419         state->v2 = v2;
420         state->v3 = v3;
421         state->v4 = v4;
422     }
423 
424     if (p < bEnd)
425     {
426         XXH_memcpy(state->memory, p, bEnd-p);
427         state->memsize = (int)(bEnd-p);
428     }
429 
430     return XXH_OK;
431 }
432 
433 static
434 XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len)
435 {
436     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
437 
438     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
439         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
440     else
441         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
442 }
443 
444 
445 
446 static
447 FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
448 {
449     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
450     const BYTE * p = (const BYTE*)state->memory;
451     BYTE* bEnd = (BYTE*)state->memory + state->memsize;
452     U32 h32;
453 
454     if (state->total_len >= 16)
455     {
456         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
457     }
458     else
459     {
460         h32  = state->seed + PRIME32_5;
461     }
462 
463     h32 += (U32) state->total_len;
464 
465     while (p<=bEnd-4)
466     {
467         h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
468         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
469         p+=4;
470     }
471 
472     while (p<bEnd)
473     {
474         h32 += (*p) * PRIME32_5;
475         h32 = XXH_rotl32(h32, 11) * PRIME32_1;
476         p++;
477     }
478 
479     h32 ^= h32 >> 15;
480     h32 *= PRIME32_2;
481     h32 ^= h32 >> 13;
482     h32 *= PRIME32_3;
483     h32 ^= h32 >> 16;
484 
485     return h32;
486 }
487 
488 static
489 U32 XXH32_intermediateDigest (void* state_in)
490 {
491     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
492 
493     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
494         return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
495     else
496         return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
497 }
498 
499 static
500 U32 XXH32_digest (void* state_in)
501 {
502     U32 h32 = XXH32_intermediateDigest(state_in);
503 
504     XXH_free(state_in);
505 
506     return h32;
507 }
508 
509 const
510 struct archive_xxhash __archive_xxhash = {
511 	XXH32,
512 	XXH32_init,
513 	XXH32_update,
514 	XXH32_digest
515 };
516 #else
517 
518 /*
519  * Define an empty version of the struct if we aren't using the LZ4 library.
520  */
521 const
522 struct archive_xxhash __archive_xxhash = {
523 	NULL,
524 	NULL,
525 	NULL,
526 	NULL
527 };
528 
529 #endif /* HAVE_LIBLZ4 */
530