1 /*
2 xxHash - Fast Hash algorithm
3 Copyright (C) 2012-2014, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9 
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 You can contact the author at :
30 - xxHash source repository : http://code.google.com/p/xxhash/
31 */
32 #include "archive_platform.h"
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "archive_xxhash.h"
38 
39 #ifdef HAVE_LIBLZ4
40 
41 /***************************************
42 ** Tuning parameters
43 ****************************************/
44 /* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
45 ** For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
46 ** If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
47 ** You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
48 */
49 #if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
50 #  define XXH_USE_UNALIGNED_ACCESS 1
51 #endif
52 
53 /* XXH_ACCEPT_NULL_INPUT_POINTER :
54 ** If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
55 ** When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
56 ** This option has a very small performance cost (only measurable on small inputs).
57 ** By default, this option is disabled. To enable it, uncomment below define :
58 ** #define XXH_ACCEPT_NULL_INPUT_POINTER 1
59 
60 ** XXH_FORCE_NATIVE_FORMAT :
61 ** By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
62 ** Results are therefore identical for little-endian and big-endian CPU.
63 ** This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
64 ** Should endian-independence be of no importance for your application, you may set the #define below to 1.
65 ** It will improve speed for Big-endian CPU.
66 ** This option has no impact on Little_Endian CPU.
67 */
68 #define XXH_FORCE_NATIVE_FORMAT 0
69 
70 /***************************************
71 ** Compiler Specific Options
72 ****************************************/
73 /* Disable some Visual warning messages */
74 #ifdef _MSC_VER  /* Visual Studio */
75 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
76 #endif
77 
78 #ifdef _MSC_VER    /* Visual Studio */
79 #  define FORCE_INLINE __forceinline
80 #else
81 #  ifdef __GNUC__
82 #    define FORCE_INLINE inline __attribute__((always_inline))
83 #  else
84 #    define FORCE_INLINE inline
85 #  endif
86 #endif
87 
88 /***************************************
89 ** Includes & Memory related functions
90 ****************************************/
91 #define XXH_malloc malloc
92 #define XXH_free free
93 #define XXH_memcpy memcpy
94 
95 
96 static unsigned int	  XXH32 (const void*, unsigned int, unsigned int);
97 static void*		  XXH32_init   (unsigned int);
98 static XXH_errorcode	  XXH32_update (void*, const void*, unsigned int);
99 static unsigned int	  XXH32_digest (void*);
100 /*static int		  XXH32_sizeofState(void);*/
101 static XXH_errorcode	  XXH32_resetState(void*, unsigned int);
102 #define       XXH32_SIZEOFSTATE 48
103 typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
104 static unsigned int	  XXH32_intermediateDigest (void*);
105 
106 /***************************************
107 ** Basic Types
108 ****************************************/
109 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
110 # include <stdint.h>
111   typedef uint8_t  BYTE;
112   typedef uint16_t U16;
113   typedef uint32_t U32;
114   typedef  int32_t S32;
115   typedef uint64_t U64;
116 #else
117   typedef unsigned char      BYTE;
118   typedef unsigned short     U16;
119   typedef unsigned int       U32;
120   typedef   signed int       S32;
121   typedef unsigned long long U64;
122 #endif
123 
124 #if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
125 #  define _PACKED __attribute__ ((packed))
126 #else
127 #  define _PACKED
128 #endif
129 
130 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
131 #  ifdef __IBMC__
132 #    pragma pack(1)
133 #  else
134 #    pragma pack(push, 1)
135 #  endif
136 #endif
137 
138 typedef struct _U32_S { U32 v; } _PACKED U32_S;
139 
140 #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
141 #  pragma pack(pop)
142 #endif
143 
144 
145 /****************************************
146 ** Compiler-specific Functions and Macros
147 *****************************************/
148 #define GCC_VERSION ((__GNUC__-0) * 100 + (__GNUC_MINOR__ - 0))
149 
150 #if GCC_VERSION >= 409
151 __attribute__((__no_sanitize_undefined__))
152 #endif
153 #if defined(_MSC_VER)
154 static __inline U32 A32(const void * x)
155 #else
156 static inline U32 A32(const void* x)
157 #endif
158 {
159     return (((const U32_S *)(x))->v);
160 }
161 
162 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
163 #if defined(_MSC_VER)
164 #  define XXH_rotl32(x,r) _rotl(x,r)
165 #else
166 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
167 #endif
168 
169 #if defined(_MSC_VER)     /* Visual Studio */
170 #  define XXH_swap32 _byteswap_ulong
171 #elif GCC_VERSION >= 403
172 #  define XXH_swap32 __builtin_bswap32
173 #else
174 static inline U32 XXH_swap32 (U32 x) {
175     return  ((x << 24) & 0xff000000 ) |
176 			((x <<  8) & 0x00ff0000 ) |
177 			((x >>  8) & 0x0000ff00 ) |
178 			((x >> 24) & 0x000000ff );}
179 #endif
180 
181 
182 /***************************************
183 ** Constants
184 ****************************************/
185 #define PRIME32_1   2654435761U
186 #define PRIME32_2   2246822519U
187 #define PRIME32_3   3266489917U
188 #define PRIME32_4    668265263U
189 #define PRIME32_5    374761393U
190 
191 
192 /***************************************
193 ** Architecture Macros
194 ****************************************/
195 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
196 #ifndef XXH_CPU_LITTLE_ENDIAN   /* It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch */
197     static const int one = 1;
198 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
199 #endif
200 
201 
202 /***************************************
203 ** Macros
204 ****************************************/
205 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
206 
207 
208 /*****************************
209 ** Memory reads
210 ******************************/
211 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
212 
213 static
214 FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
215 {
216     if (align==XXH_unaligned)
217         return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
218     else
219         return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
220 }
221 
222 static
223 FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
224 
225 
226 /*****************************
227 ** Simple Hash Functions
228 ******************************/
229 static
230 FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align)
231 {
232     const BYTE* p = (const BYTE*)input;
233     const BYTE* bEnd = p + len;
234     U32 h32;
235 #define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align)
236 
237 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
238     if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; }
239 #endif
240 
241     if (len>=16)
242     {
243         const BYTE* const limit = bEnd - 16;
244         U32 v1 = seed + PRIME32_1 + PRIME32_2;
245         U32 v2 = seed + PRIME32_2;
246         U32 v3 = seed + 0;
247         U32 v4 = seed - PRIME32_1;
248 
249         do
250         {
251             v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
252             v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
253             v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
254             v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
255         } while (p<=limit);
256 
257         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
258     }
259     else
260     {
261         h32  = seed + PRIME32_5;
262     }
263 
264     h32 += (U32) len;
265 
266     while (p<=bEnd-4)
267     {
268         h32 += XXH_get32bits(p) * PRIME32_3;
269         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
270         p+=4;
271     }
272 
273     while (p<bEnd)
274     {
275         h32 += (*p) * PRIME32_5;
276         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
277         p++;
278     }
279 
280     h32 ^= h32 >> 15;
281     h32 *= PRIME32_2;
282     h32 ^= h32 >> 13;
283     h32 *= PRIME32_3;
284     h32 ^= h32 >> 16;
285 
286     return h32;
287 }
288 
289 
290 U32 XXH32(const void* input, unsigned int len, U32 seed)
291 {
292 #if 0
293     // Simple version, good for code maintenance, but unfortunately slow for small inputs
294     void* state = XXH32_init(seed);
295     XXH32_update(state, input, len);
296     return XXH32_digest(state);
297 #else
298     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
299 
300 #  if !defined(XXH_USE_UNALIGNED_ACCESS)
301     if ((((size_t)input) & 3) == 0)   /* Input is aligned, let's leverage the speed advantage */
302     {
303         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
304             return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
305         else
306             return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
307     }
308 #  endif
309 
310     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
311         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
312     else
313         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
314 #endif
315 }
316 
317 /*****************************
318 ** Advanced Hash Functions
319 ******************************/
320 
321 struct XXH_state32_t
322 {
323     U64 total_len;
324     U32 seed;
325     U32 v1;
326     U32 v2;
327     U32 v3;
328     U32 v4;
329     int memsize;
330     char memory[16];
331 };
332 
333 #if 0
334 static
335 int XXH32_sizeofState(void)
336 {
337     XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t));   /* A compilation error here means XXH32_SIZEOFSTATE is not large enough */
338     return sizeof(struct XXH_state32_t);
339 }
340 #endif
341 
342 static
343 XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
344 {
345     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
346     state->seed = seed;
347     state->v1 = seed + PRIME32_1 + PRIME32_2;
348     state->v2 = seed + PRIME32_2;
349     state->v3 = seed + 0;
350     state->v4 = seed - PRIME32_1;
351     state->total_len = 0;
352     state->memsize = 0;
353     return XXH_OK;
354 }
355 
356 static
357 void* XXH32_init (U32 seed)
358 {
359     void* state = XXH_malloc (sizeof(struct XXH_state32_t));
360     XXH32_resetState(state, seed);
361     return state;
362 }
363 
364 static
365 FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
366 {
367     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
368     const BYTE* p = (const BYTE*)input;
369     const BYTE* const bEnd = p + len;
370 
371 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
372     if (input==NULL) return XXH_ERROR;
373 #endif
374 
375     state->total_len += len;
376 
377     if (state->memsize + len < 16)   /* fill in tmp buffer */
378     {
379         XXH_memcpy(state->memory + state->memsize, input, len);
380         state->memsize +=  len;
381         return XXH_OK;
382     }
383 
384     if (state->memsize)   /* some data left from previous update */
385     {
386         XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
387         {
388             const U32* p32 = (const U32*)state->memory;
389             state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
390             state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
391             state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
392             state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
393         }
394         p += 16-state->memsize;
395         state->memsize = 0;
396     }
397 
398     if (p <= bEnd-16)
399     {
400         const BYTE* const limit = bEnd - 16;
401         U32 v1 = state->v1;
402         U32 v2 = state->v2;
403         U32 v3 = state->v3;
404         U32 v4 = state->v4;
405 
406         do
407         {
408             v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
409             v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
410             v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
411             v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
412         } while (p<=limit);
413 
414         state->v1 = v1;
415         state->v2 = v2;
416         state->v3 = v3;
417         state->v4 = v4;
418     }
419 
420     if (p < bEnd)
421     {
422         XXH_memcpy(state->memory, p, bEnd-p);
423         state->memsize = (int)(bEnd-p);
424     }
425 
426     return XXH_OK;
427 }
428 
429 static
430 XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len)
431 {
432     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
433 
434     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
435         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
436     else
437         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
438 }
439 
440 
441 
442 static
443 FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
444 {
445     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
446     const BYTE * p = (const BYTE*)state->memory;
447     BYTE* bEnd = (BYTE*)state->memory + state->memsize;
448     U32 h32;
449 
450     if (state->total_len >= 16)
451     {
452         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
453     }
454     else
455     {
456         h32  = state->seed + PRIME32_5;
457     }
458 
459     h32 += (U32) state->total_len;
460 
461     while (p<=bEnd-4)
462     {
463         h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
464         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
465         p+=4;
466     }
467 
468     while (p<bEnd)
469     {
470         h32 += (*p) * PRIME32_5;
471         h32 = XXH_rotl32(h32, 11) * PRIME32_1;
472         p++;
473     }
474 
475     h32 ^= h32 >> 15;
476     h32 *= PRIME32_2;
477     h32 ^= h32 >> 13;
478     h32 *= PRIME32_3;
479     h32 ^= h32 >> 16;
480 
481     return h32;
482 }
483 
484 static
485 U32 XXH32_intermediateDigest (void* state_in)
486 {
487     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
488 
489     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
490         return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
491     else
492         return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
493 }
494 
495 static
496 U32 XXH32_digest (void* state_in)
497 {
498     U32 h32 = XXH32_intermediateDigest(state_in);
499 
500     XXH_free(state_in);
501 
502     return h32;
503 }
504 
505 const
506 struct archive_xxhash __archive_xxhash = {
507 	XXH32,
508 	XXH32_init,
509 	XXH32_update,
510 	XXH32_digest
511 };
512 #else
513 
514 /*
515  * Define an empty version of the struct if we aren't using the LZ4 library.
516  */
517 const
518 struct archive_xxhash __archive_xxhash = {
519 	NULL,
520 	NULL,
521 	NULL,
522 	NULL
523 };
524 
525 #endif /* HAVE_LIBLZ4 */
526