1 /*
2  * xxhsum - Command line interface for xxhash algorithms
3  * Copyright (C) 2013-2020 Yann Collet
4  *
5  * GPL v2 License
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * You can contact the author at:
22  *   - xxHash homepage: https://www.xxhash.com
23  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
24  */
25 
26 /*
27  * xxhsum:
28  * Provides hash value of a file content, or a list of files, or stdin
29  * Display convention is Big Endian, for both 32 and 64 bits algorithms
30  */
31 
32 
33 /* ************************************
34  *  Compiler Options
35  **************************************/
36 /* MS Visual */
37 #if defined(_MSC_VER) || defined(_WIN32)
38 #  ifndef _CRT_SECURE_NO_WARNINGS
39 #    define _CRT_SECURE_NO_WARNINGS   /* removes visual warnings */
40 #  endif
41 #endif
42 
43 /* Under Linux at least, pull in the *64 commands */
44 #ifndef _LARGEFILE64_SOURCE
45 #  define _LARGEFILE64_SOURCE
46 #endif
47 
48 /* ************************************
49  *  Includes
50  **************************************/
51 #include <limits.h>
52 #include <stdlib.h>     /* malloc, calloc, free, exit */
53 #include <string.h>     /* strcmp, memcpy */
54 #include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
55 #include <sys/types.h>  /* stat, stat64, _stat64 */
56 #include <sys/stat.h>   /* stat, stat64, _stat64 */
57 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
58 #include <assert.h>     /* assert */
59 #include <errno.h>      /* errno */
60 
61 #define XXH_STATIC_LINKING_ONLY   /* *_state_t */
62 #include "xxhash.h"
63 
64 #ifdef XXHSUM_DISPATCH
65 #  include "xxh_x86dispatch.h"
66 #endif
67 
68 
69 /* ************************************
70  *  OS-Specific Includes
71  **************************************/
72 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
73    || defined(__midipix__) || defined(__VMS))
74 #  if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
75      || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)  /* BSD distros */
76 #    define PLATFORM_POSIX_VERSION 200112L
77 #  else
78 #    if defined(__linux__) || defined(__linux)
79 #      ifndef _POSIX_C_SOURCE
80 #        define _POSIX_C_SOURCE 200112L  /* use feature test macro */
81 #      endif
82 #    endif
83 #    include <unistd.h>  /* declares _POSIX_VERSION */
84 #    if defined(_POSIX_VERSION)  /* POSIX compliant */
85 #      define PLATFORM_POSIX_VERSION _POSIX_VERSION
86 #    else
87 #      define PLATFORM_POSIX_VERSION 0
88 #    endif
89 #  endif
90 #endif
91 #if !defined(PLATFORM_POSIX_VERSION)
92 #  define PLATFORM_POSIX_VERSION -1
93 #endif
94 
95 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) \
96  || (PLATFORM_POSIX_VERSION >= 200112L) \
97  || defined(__DJGPP__) \
98  || defined(__MSYS__)
99 #  include <unistd.h>   /* isatty */
100 #  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
101 #elif defined(MSDOS) || defined(OS2)
102 #  include <io.h>       /* _isatty */
103 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
104 #elif defined(WIN32) || defined(_WIN32)
105 #  include <io.h>      /* _isatty */
106 #  include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
107 #  include <stdio.h>   /* FILE */
IS_CONSOLE(FILE * stdStream)108 static __inline int IS_CONSOLE(FILE* stdStream) {
109     DWORD dummy;
110     return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
111 }
112 #else
113 #  define IS_CONSOLE(stdStream) 0
114 #endif
115 
116 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
117 #  include <fcntl.h>   /* _O_BINARY */
118 #  include <io.h>      /* _setmode, _fileno, _get_osfhandle */
119 #  if !defined(__DJGPP__)
120 #    include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
121 #    include <winioctl.h> /* FSCTL_SET_SPARSE */
122 #    define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
123 #  else
124 #    define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
125 #  endif
126 #else
127 #  define SET_BINARY_MODE(file)
128 #endif
129 
130 #if !defined(S_ISREG)
131 #  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
132 #endif
133 
134 /* Unicode helpers for Windows to make UTF-8 act as it should. */
135 #ifdef _WIN32
136 /*
137  * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards.
138  * This version allows keeping the output length.
139  */
utf8_to_utf16_len(const char * str,int * lenOut)140 static wchar_t* utf8_to_utf16_len(const char* str, int* lenOut)
141 {
142     int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
143     if (lenOut != NULL) *lenOut = len;
144     if (len == 0) return NULL;
145     {   wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t));
146         if (buf != NULL) {
147             if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) {
148                 free(buf);
149                 return NULL;
150        }    }
151        return buf;
152     }
153 }
154 
155 /* Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards. */
utf8_to_utf16(const char * str)156 static wchar_t* utf8_to_utf16(const char *str)
157 {
158     return utf8_to_utf16_len(str, NULL);
159 }
160 
161 /*
162  * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards.
163  * This version allows keeping the output length.
164  */
utf16_to_utf8_len(const wchar_t * str,int * lenOut)165 static char* utf16_to_utf8_len(const wchar_t *str, int *lenOut)
166 {
167     int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
168     if (lenOut != NULL) *lenOut = len;
169     if (len == 0) return NULL;
170     {   char* const buf = (char*)malloc((size_t)len * sizeof(char));
171         if (buf != NULL) {
172             if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) {
173                 free(buf);
174                 return NULL;
175         }    }
176         return buf;
177     }
178 }
179 
180 /* Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards. */
utf16_to_utf8(const wchar_t * str)181 static char *utf16_to_utf8(const wchar_t *str)
182 {
183     return utf16_to_utf8_len(str, NULL);
184 }
185 
186 /*
187  * fopen wrapper that supports UTF-8
188  *
189  * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames.
190  *
191  * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen.
192  */
XXH_fopen_wrapped(const char * filename,const wchar_t * mode)193 static FILE* XXH_fopen_wrapped(const char *filename, const wchar_t *mode)
194 {
195     wchar_t* const wide_filename = utf8_to_utf16(filename);
196     if (wide_filename == NULL) return NULL;
197     {   FILE* const f = _wfopen(wide_filename, mode);
198         free(wide_filename);
199         return f;
200     }
201 }
202 
203 /*
204  * In case it isn't available, this is what MSVC 2019 defines in stdarg.h.
205  */
206 #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy)
207 #  define va_copy(destination, source) ((destination) = (source))
208 #endif
209 
210 /*
211  * fprintf wrapper that supports UTF-8.
212  *
213  * fprintf doesn't properly handle Unicode on Windows.
214  *
215  * Additionally, it is codepage sensitive on console and may crash the program.
216  *
217  * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16
218  * for console output and use the codepage-independent WriteConsoleW.
219  *
220  * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423
221  */
fprintf_utf8(FILE * stream,const char * format,...)222 static int fprintf_utf8(FILE *stream, const char *format, ...)
223 {
224     int result;
225     va_list args;
226     va_list copy;
227 
228     va_start(args, format);
229 
230     /*
231      * To be safe, make a va_copy.
232      *
233      * Note that Microsoft doesn't use va_copy in its sample code:
234      *   https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019
235      */
236     va_copy(copy, args);
237     /* Counts the number of characters needed for vsnprintf. */
238     result = _vscprintf(format, copy);
239     va_end(copy);
240 
241     if (result > 0) {
242         /* Create a buffer for vsnprintf */
243         const size_t nchar = (size_t)result + 1;
244         char* u8_str = (char*)malloc(nchar * sizeof(u8_str[0]));
245 
246         if (u8_str == NULL) {
247             result = -1;
248         } else {
249             /* Generate the UTF-8 string with vsnprintf. */
250             result = _vsnprintf(u8_str, nchar - 1, format, args);
251             u8_str[nchar - 1] = '\0';
252             if (result > 0) {
253                 /*
254                  * Check if we are outputting to a console. Don't use IS_CONSOLE
255                  * directly -- we don't need to call _get_osfhandle twice.
256                  */
257                 int fileNb = _fileno(stream);
258                 intptr_t handle_raw = _get_osfhandle(fileNb);
259                 HANDLE handle = (HANDLE)handle_raw;
260                 DWORD dwTemp;
261 
262                 if (handle_raw < 0) {
263                      result = -1;
264                 } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) {
265                     /*
266                      * Convert to UTF-16 and output with WriteConsoleW.
267                      *
268                      * This is codepage independent and works on Windows XP's
269                      * default msvcrt.dll.
270                      */
271                     int len;
272                     wchar_t *const u16_buf = utf8_to_utf16_len(u8_str, &len);
273                     if (u16_buf == NULL) {
274                         result = -1;
275                     } else {
276                         if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) {
277                             result = (int)dwTemp;
278                         } else {
279                             result = -1;
280                         }
281                         free(u16_buf);
282                     }
283                 } else {
284                     /* fwrite the UTF-8 string if we are printing to a file */
285                     result = (int)fwrite(u8_str, 1, nchar - 1, stream);
286                     if (result == 0) {
287                         result = -1;
288                     }
289                 }
290             }
291             free(u8_str);
292         }
293     }
294     va_end(args);
295     return result;
296 }
297 /*
298  * Since we always use literals in the "mode" argument, it is just easier to append "L" to
299  * the string to make it UTF-16 and avoid the hassle of a second manual conversion.
300  */
301 #  define XXH_fopen(filename, mode) XXH_fopen_wrapped(filename, L##mode)
302 #else
303 #  define XXH_fopen(filename, mode) fopen(filename, mode)
304 #endif
305 
306 /* ************************************
307 *  Basic Types
308 **************************************/
309 #if defined(__cplusplus) /* C++ */ \
310  || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)  /* C99 */
311 #  include <stdint.h>
312     typedef uint8_t  U8;
313     typedef uint32_t U32;
314     typedef uint64_t U64;
315 # else
316 #   include <limits.h>
317     typedef unsigned char      U8;
318 #   if UINT_MAX == 0xFFFFFFFFUL
319       typedef unsigned int     U32;
320 #   else
321       typedef unsigned long    U32;
322 #   endif
323     typedef unsigned long long U64;
324 #endif /* not C++/C99 */
325 
BMK_isLittleEndian(void)326 static unsigned BMK_isLittleEndian(void)
327 {
328     const union { U32 u; U8 c[4]; } one = { 1 };   /* don't use static: performance detrimental  */
329     return one.c[0];
330 }
331 
332 
333 /* *************************************
334  *  Constants
335  ***************************************/
336 #define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE
337 #define QUOTE(str) #str
338 #define EXPAND_AND_QUOTE(str) QUOTE(str)
339 #define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION)
340 
341 /* Show compiler versions in WELCOME_MESSAGE. CC_VERSION_FMT will return the printf specifiers,
342  * and VERSION will contain the comma separated list of arguments to the CC_VERSION_FMT string. */
343 #if defined(__clang_version__)
344 /* Clang does its own thing. */
345 #  ifdef __apple_build_version__
346 #    define CC_VERSION_FMT "Apple Clang %s"
347 #  else
348 #    define CC_VERSION_FMT "Clang %s"
349 #  endif
350 #  define CC_VERSION  __clang_version__
351 #elif defined(__VERSION__)
352 /* GCC and ICC */
353 #  define CC_VERSION_FMT "%s"
354 #  ifdef __INTEL_COMPILER /* icc adds its prefix */
355 #    define CC_VERSION __VERSION__
356 #  else /* assume GCC */
357 #    define CC_VERSION "GCC " __VERSION__
358 #  endif
359 #elif defined(_MSC_FULL_VER) && defined(_MSC_BUILD)
360 /*
361  * MSVC
362  *  "For example, if the version number of the Visual C++ compiler is
363  *   15.00.20706.01, the _MSC_FULL_VER macro evaluates to 150020706."
364  *
365  *   https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=vs-2017
366  */
367 #  define CC_VERSION_FMT "MSVC %02i.%02i.%05i.%02i"
368 #  define CC_VERSION  _MSC_FULL_VER / 10000000 % 100, _MSC_FULL_VER / 100000 % 100, _MSC_FULL_VER % 100000, _MSC_BUILD
369 #elif defined(__TINYC__)
370 /* tcc stores its version in the __TINYC__ macro. */
371 #  define CC_VERSION_FMT "tcc %i.%i.%i"
372 #  define CC_VERSION __TINYC__ / 10000 % 100, __TINYC__ / 100 % 100, __TINYC__ % 100
373 #else
374 #  define CC_VERSION_FMT "%s"
375 #  define CC_VERSION "unknown compiler"
376 #endif
377 
378 /* makes the next part easier */
379 #if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
380 #   define ARCH_X64 1
381 #   define ARCH_X86 "x86_64"
382 #elif defined(__i386__) || defined(_M_IX86) || defined(_M_IX86_FP)
383 #   define ARCH_X86 "i386"
384 #endif
385 
386 /* Try to detect the architecture. */
387 #if defined(ARCH_X86)
388 #  if defined(XXHSUM_DISPATCH)
389 #    define ARCH ARCH_X86 " autoVec"
390 #  elif defined(__AVX512F__)
391 #    define ARCH ARCH_X86 " + AVX512"
392 #  elif defined(__AVX2__)
393 #    define ARCH ARCH_X86 " + AVX2"
394 #  elif defined(__AVX__)
395 #    define ARCH ARCH_X86 " + AVX"
396 #  elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) \
397       || defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
398 #     define ARCH ARCH_X86 " + SSE2"
399 #  else
400 #     define ARCH ARCH_X86
401 #  endif
402 #elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
403 #  define ARCH "aarch64 + NEON"
404 #elif defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(_M_ARM)
405 /* ARM has a lot of different features that can change xxHash significantly. */
406 #  if defined(__thumb2__) || (defined(__thumb__) && (__thumb__ == 2 || __ARM_ARCH >= 7))
407 #    define ARCH_THUMB " Thumb-2"
408 #  elif defined(__thumb__)
409 #    define ARCH_THUMB " Thumb-1"
410 #  else
411 #    define ARCH_THUMB ""
412 #  endif
413 /* ARMv7 has unaligned by default */
414 #  if defined(__ARM_FEATURE_UNALIGNED) || __ARM_ARCH >= 7 || defined(_M_ARMV7VE)
415 #    define ARCH_UNALIGNED " + unaligned"
416 #  else
417 #    define ARCH_UNALIGNED ""
418 #  endif
419 #  if defined(__ARM_NEON) || defined(__ARM_NEON__)
420 #    define ARCH_NEON " + NEON"
421 #  else
422 #    define ARCH_NEON ""
423 #  endif
424 #  define ARCH "ARMv" EXPAND_AND_QUOTE(__ARM_ARCH) ARCH_THUMB ARCH_NEON ARCH_UNALIGNED
425 #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
426 #  if defined(__GNUC__) && defined(__POWER9_VECTOR__)
427 #    define ARCH "ppc64 + POWER9 vector"
428 #  elif defined(__GNUC__) && defined(__POWER8_VECTOR__)
429 #    define ARCH "ppc64 + POWER8 vector"
430 #  else
431 #    define ARCH "ppc64"
432 #  endif
433 #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
434 #  define ARCH "ppc"
435 #elif defined(__AVR)
436 #  define ARCH "AVR"
437 #elif defined(__mips64)
438 #  define ARCH "mips64"
439 #elif defined(__mips)
440 #  define ARCH "mips"
441 #elif defined(__s390x__)
442 #  define ARCH "s390x"
443 #elif defined(__s390__)
444 #  define ARCH "s390"
445 #else
446 #  define ARCH "unknown"
447 #endif
448 
449 static const int g_nbBits = (int)(sizeof(void*)*8);
450 static const char g_lename[] = "little endian";
451 static const char g_bename[] = "big endian";
452 #define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
453 static const char author[] = "Yann Collet";
454 #define WELCOME_MESSAGE(exename) "%s %s by %s \n", exename, PROGRAM_VERSION, author
455 #define FULL_WELCOME_MESSAGE(exename) "%s %s by %s \n" \
456                     "compiled as %i-bit %s %s with " CC_VERSION_FMT " \n", \
457                     exename, PROGRAM_VERSION, author, \
458                     g_nbBits, ARCH, ENDIAN_NAME, CC_VERSION
459 
460 #define KB *( 1<<10)
461 #define MB *( 1<<20)
462 #define GB *(1U<<30)
463 
464 static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB;
465 #define NBLOOPS    3                              /* Default number of benchmark iterations */
466 #define TIMELOOP_S 1
467 #define TIMELOOP  (TIMELOOP_S * CLOCKS_PER_SEC)   /* target timing per iteration */
468 #define TIMELOOP_MIN (TIMELOOP / 2)               /* minimum timing to validate a result */
469 #define XXHSUM32_DEFAULT_SEED 0                   /* Default seed for algo_xxh32 */
470 #define XXHSUM64_DEFAULT_SEED 0                   /* Default seed for algo_xxh64 */
471 
472 #define MAX_MEM    (2 GB - 64 MB)
473 
474 static const char stdinName[] = "-";
475 typedef enum { algo_xxh32=0, algo_xxh64=1, algo_xxh128=2 } AlgoSelected;
476 static AlgoSelected g_defaultAlgo = algo_xxh64;    /* required within main() & usage() */
477 
478 /* <16 hex char> <SPC> <SPC> <filename> <'\0'>
479  * '4096' is typical Linux PATH_MAX configuration. */
480 #define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
481 
482 /* Maximum acceptable line length. */
483 #define MAX_LINE_LENGTH (32 KB)
484 
485 
486 /* ************************************
487  *  Display macros
488  **************************************/
489 #ifdef _WIN32
490 #define DISPLAY(...)         fprintf_utf8(stderr, __VA_ARGS__)
491 #define DISPLAYRESULT(...)   fprintf_utf8(stdout, __VA_ARGS__)
492 #else
493 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
494 #define DISPLAYRESULT(...)   fprintf(stdout, __VA_ARGS__)
495 #endif
496 
497 #define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
498 static int g_displayLevel = 2;
499 
500 
501 /* ************************************
502  *  Local variables
503  **************************************/
504 static U32 g_nbIterations = NBLOOPS;
505 
506 
507 /* ************************************
508  *  Benchmark Functions
509  **************************************/
BMK_clockSpan(clock_t start)510 static clock_t BMK_clockSpan( clock_t start )
511 {
512     return clock() - start;   /* works even if overflow; Typical max span ~ 30 mn */
513 }
514 
515 
BMK_findMaxMem(U64 requiredMem)516 static size_t BMK_findMaxMem(U64 requiredMem)
517 {
518     size_t const step = 64 MB;
519     void* testmem = NULL;
520 
521     requiredMem = (((requiredMem >> 26) + 1) << 26);
522     requiredMem += 2*step;
523     if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
524 
525     while (!testmem) {
526         if (requiredMem > step) requiredMem -= step;
527         else requiredMem >>= 1;
528         testmem = malloc ((size_t)requiredMem);
529     }
530     free (testmem);
531 
532     /* keep some space available */
533     if (requiredMem > step) requiredMem -= step;
534     else requiredMem >>= 1;
535 
536     return (size_t)requiredMem;
537 }
538 
539 
BMK_GetFileSize(const char * infilename)540 static U64 BMK_GetFileSize(const char* infilename)
541 {
542     int r;
543 #if defined(_MSC_VER)
544     struct _stat64 statbuf;
545     r = _stat64(infilename, &statbuf);
546 #else
547     struct stat statbuf;
548     r = stat(infilename, &statbuf);
549 #endif
550     if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
551     return (U64)statbuf.st_size;
552 }
553 
554 /*
555  * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
556  * The result must be freed.
557  */
XXH_strcatDup(const char * s1,const char * s2)558 static char* XXH_strcatDup(const char* s1, const char* s2)
559 {
560     assert(s1 != NULL);
561     assert(s2 != NULL);
562     {   size_t len1 = strlen(s1);
563         size_t len2 = strlen(s2);
564         char* buf = (char*)malloc(len1 + len2 + 1);
565         if (buf != NULL) {
566             /* strcpy(buf, s1) */
567             memcpy(buf, s1, len1);
568             /* strcat(buf, s2) */
569             memcpy(buf + len1, s2, len2 + 1);
570         }
571         return buf;
572     }
573 }
574 
575 
576 /* use #define to make them constant, required for initialization */
577 #define PRIME32 2654435761U
578 #define PRIME64 11400714785074694797ULL
579 
580 /*
581  * Fills a test buffer with pseudorandom data.
582  *
583  * This is used in the sanity check - its values must not be changed.
584  */
BMK_fillTestBuffer(U8 * buffer,size_t len)585 static void BMK_fillTestBuffer(U8* buffer, size_t len)
586 {
587     U64 byteGen = PRIME32;
588     size_t i;
589 
590     assert(buffer != NULL);
591 
592     for (i=0; i<len; i++) {
593         buffer[i] = (U8)(byteGen>>56);
594         byteGen *= PRIME64;
595     }
596 }
597 
598 /*
599  * A secret buffer used for benchmarking XXH3's withSecret variants.
600  *
601  * In order for the bench to be realistic, the secret buffer would need to be
602  * pre-generated.
603  *
604  * Adding a pointer to the parameter list would be messy.
605  */
606 static U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
607 
608 /*
609  * Wrappers for the benchmark.
610  *
611  * If you would like to add other hashes to the bench, create a wrapper and add
612  * it to the g_hashesToBench table. It will automatically be added.
613  */
614 typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed);
615 
localXXH32(const void * buffer,size_t bufferSize,U32 seed)616 static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed)
617 {
618     return XXH32(buffer, bufferSize, seed);
619 }
localXXH64(const void * buffer,size_t bufferSize,U32 seed)620 static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed)
621 {
622     return (U32)XXH64(buffer, bufferSize, seed);
623 }
localXXH3_64b(const void * buffer,size_t bufferSize,U32 seed)624 static U32 localXXH3_64b(const void* buffer, size_t bufferSize, U32 seed)
625 {
626     (void)seed;
627     return (U32)XXH3_64bits(buffer, bufferSize);
628 }
localXXH3_64b_seeded(const void * buffer,size_t bufferSize,U32 seed)629 static U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, U32 seed)
630 {
631     return (U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
632 }
localXXH3_64b_secret(const void * buffer,size_t bufferSize,U32 seed)633 static U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, U32 seed)
634 {
635     (void)seed;
636     return (U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
637 }
localXXH3_128b(const void * buffer,size_t bufferSize,U32 seed)638 static U32 localXXH3_128b(const void* buffer, size_t bufferSize, U32 seed)
639 {
640     (void)seed;
641     return (U32)(XXH3_128bits(buffer, bufferSize).low64);
642 }
localXXH3_128b_seeded(const void * buffer,size_t bufferSize,U32 seed)643 static U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, U32 seed)
644 {
645     return (U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
646 }
localXXH3_128b_secret(const void * buffer,size_t bufferSize,U32 seed)647 static U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, U32 seed)
648 {
649     (void)seed;
650     return (U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
651 }
localXXH3_stream(const void * buffer,size_t bufferSize,U32 seed)652 static U32 localXXH3_stream(const void* buffer, size_t bufferSize, U32 seed)
653 {
654     XXH3_state_t state;
655     (void)seed;
656     XXH3_64bits_reset(&state);
657     XXH3_64bits_update(&state, buffer, bufferSize);
658     return (U32)XXH3_64bits_digest(&state);
659 }
localXXH3_stream_seeded(const void * buffer,size_t bufferSize,U32 seed)660 static U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, U32 seed)
661 {
662     XXH3_state_t state;
663     XXH3_INITSTATE(&state);
664     XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
665     XXH3_64bits_update(&state, buffer, bufferSize);
666     return (U32)XXH3_64bits_digest(&state);
667 }
localXXH128_stream(const void * buffer,size_t bufferSize,U32 seed)668 static U32 localXXH128_stream(const void* buffer, size_t bufferSize, U32 seed)
669 {
670     XXH3_state_t state;
671     (void)seed;
672     XXH3_128bits_reset(&state);
673     XXH3_128bits_update(&state, buffer, bufferSize);
674     return (U32)(XXH3_128bits_digest(&state).low64);
675 }
localXXH128_stream_seeded(const void * buffer,size_t bufferSize,U32 seed)676 static U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, U32 seed)
677 {
678     XXH3_state_t state;
679     XXH3_INITSTATE(&state);
680     XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
681     XXH3_128bits_update(&state, buffer, bufferSize);
682     return (U32)(XXH3_128bits_digest(&state).low64);
683 }
684 
685 
686 typedef struct {
687     const char*  name;
688     hashFunction func;
689 } hashInfo;
690 
691 #define NB_HASHFUNC 12
692 static const hashInfo g_hashesToBench[NB_HASHFUNC] = {
693     { "XXH32",             &localXXH32 },
694     { "XXH64",             &localXXH64 },
695     { "XXH3_64b",          &localXXH3_64b },
696     { "XXH3_64b w/seed",   &localXXH3_64b_seeded },
697     { "XXH3_64b w/secret", &localXXH3_64b_secret },
698     { "XXH128",            &localXXH3_128b },
699     { "XXH128 w/seed",     &localXXH3_128b_seeded },
700     { "XXH128 w/secret",   &localXXH3_128b_secret },
701     { "XXH3_stream",       &localXXH3_stream },
702     { "XXH3_stream w/seed",&localXXH3_stream_seeded },
703     { "XXH128_stream",     &localXXH128_stream },
704     { "XXH128_stream w/seed",&localXXH128_stream_seeded },
705 };
706 
707 #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
708 static char g_testIDs[NB_TESTFUNC] = { 0 };
709 static const char k_testIDs_default[NB_TESTFUNC] = { 0,
710         1 /*XXH32*/, 0,
711         1 /*XXH64*/, 0,
712         1 /*XXH3*/, 0, 0, 0, 0, 0,
713         1 /*XXH128*/ };
714 
715 #define HASHNAME_MAX 29
BMK_benchHash(hashFunction h,const char * hName,int testID,const void * buffer,size_t bufferSize)716 static void BMK_benchHash(hashFunction h, const char* hName, int testID,
717                           const void* buffer, size_t bufferSize)
718 {
719     U32 nbh_perIteration = (U32)((300 MB) / (bufferSize+1)) + 1;  /* first iteration conservatively aims for 300 MB/s */
720     unsigned iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
721     double fastestH = 100000000.;
722     assert(HASHNAME_MAX > 2);
723     DISPLAYLEVEL(2, "\r%80s\r", "");       /* Clean display line */
724 
725     for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
726         U32 r=0;
727         clock_t cStart;
728 
729         DISPLAYLEVEL(2, "%2u-%-*.*s : %10u ->\r",
730                         iterationNb,
731                         HASHNAME_MAX, HASHNAME_MAX, hName,
732                         (unsigned)bufferSize);
733         cStart = clock();
734         while (clock() == cStart);   /* starts clock() at its exact beginning */
735         cStart = clock();
736 
737         {   U32 u;
738             for (u=0; u<nbh_perIteration; u++)
739                 r += h(buffer, bufferSize, u);
740         }
741         if (r==0) DISPLAYLEVEL(3,".\r");  /* do something with r to defeat compiler "optimizing" hash away */
742 
743         {   clock_t const nbTicks = BMK_clockSpan(cStart);
744             double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
745             /*
746              * clock() is the only decent portable timer, but it isn't very
747              * precise.
748              *
749              * Sometimes, this lack of precision is enough that the benchmark
750              * finishes before there are enough ticks to get a meaningful result.
751              *
752              * For example, on a Core 2 Duo (without any sort of Turbo Boost),
753              * the imprecise timer caused peculiar results like so:
754              *
755              *    XXH3_64b                   4800.0 MB/s // conveniently even
756              *    XXH3_64b unaligned         4800.0 MB/s
757              *    XXH3_64b seeded            9600.0 MB/s // magical 2x speedup?!
758              *    XXH3_64b seeded unaligned  4800.0 MB/s
759              *
760              * If we sense a suspiciously low number of ticks, we increase the
761              * iterations until we can get something meaningful.
762              */
763             if (nbTicks < TIMELOOP_MIN) {
764                 /* Not enough time spent in benchmarking, risk of rounding bias */
765                 if (nbTicks == 0) { /* faster than resolution timer */
766                     nbh_perIteration *= 100;
767                 } else {
768                     /*
769                      * update nbh_perIteration so that the next round lasts
770                      * approximately 1 second.
771                      */
772                     double nbh_perSecond = (1 / ticksPerHash) + 1;
773                     if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20);   /* avoid overflow */
774                     nbh_perIteration = (U32)nbh_perSecond;
775                 }
776                 /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
777                 if (g_nbIterations>0) {
778                     iterationNb--;   /* new round for a more accurate speed evaluation */
779                     continue;
780                 }
781             }
782             if (ticksPerHash < fastestH) fastestH = ticksPerHash;
783             if (fastestH>0.) { /* avoid div by zero */
784                 DISPLAYLEVEL(2, "%2u-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
785                             iterationNb,
786                             HASHNAME_MAX, HASHNAME_MAX, hName,
787                             (unsigned)bufferSize,
788                             (double)1 / fastestH,
789                             ((double)bufferSize / (1 MB)) / fastestH);
790         }   }
791         {   double nbh_perSecond = (1 / fastestH) + 1;
792             if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20);   /* avoid overflow */
793             nbh_perIteration = (U32)nbh_perSecond;
794         }
795     }
796     DISPLAYLEVEL(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
797                     testID,
798                     HASHNAME_MAX, HASHNAME_MAX, hName,
799                     (unsigned)bufferSize,
800                     (double)1 / fastestH,
801                     ((double)bufferSize / (1 MB)) / fastestH);
802     if (g_displayLevel<1)
803         DISPLAYLEVEL(0, "%u, ", (unsigned)((double)1 / fastestH));
804 }
805 
806 
807 /*!
808  * BMK_benchMem():
809  * buffer: Must be 16-byte aligned.
810  * The real allocated size of buffer is supposed to be >= (bufferSize+3).
811  * returns: 0 on success, 1 if error (invalid mode selected)
812  */
BMK_benchMem(const void * buffer,size_t bufferSize)813 static void BMK_benchMem(const void* buffer, size_t bufferSize)
814 {
815     assert((((size_t)buffer) & 15) == 0);  /* ensure alignment */
816     BMK_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
817     {   int i;
818         for (i = 1; i < NB_TESTFUNC; i++) {
819             int const hashFuncID = (i-1) / 2;
820             assert(g_hashesToBench[hashFuncID].name != NULL);
821             if (g_testIDs[i] == 0) continue;
822             /* aligned */
823             if ((i % 2) == 1) {
824                 BMK_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
825             }
826             /* unaligned */
827             if ((i % 2) == 0) {
828                 /* Append "unaligned". */
829                 char* const hashNameBuf = XXH_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
830                 assert(hashNameBuf != NULL);
831                 BMK_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
832                 free(hashNameBuf);
833             }
834     }   }
835 }
836 
BMK_selectBenchedSize(const char * fileName)837 static size_t BMK_selectBenchedSize(const char* fileName)
838 {
839     U64 const inFileSize = BMK_GetFileSize(fileName);
840     size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize);
841     if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
842     if (benchedSize < inFileSize) {
843         DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
844     }
845     return benchedSize;
846 }
847 
848 
BMK_benchFiles(const char * const * fileNamesTable,int nbFiles)849 static int BMK_benchFiles(const char*const* fileNamesTable, int nbFiles)
850 {
851     int fileIdx;
852     for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
853         const char* const inFileName = fileNamesTable[fileIdx];
854         assert(inFileName != NULL);
855 
856         {   FILE* const inFile = XXH_fopen( inFileName, "rb" );
857             size_t const benchedSize = BMK_selectBenchedSize(inFileName);
858             char* const buffer = (char*)calloc(benchedSize+16+3, 1);
859             void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF);  /* align on next 16 bytes */
860 
861             /* Checks */
862             if (inFile==NULL){
863                 DISPLAY("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
864                 free(buffer);
865                 exit(11);
866             }
867             if(!buffer) {
868                 DISPLAY("\nError: Out of memory.\n");
869                 fclose(inFile);
870                 exit(12);
871             }
872 
873             /* Fill input buffer */
874             {   size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
875                 fclose(inFile);
876                 if(readSize != benchedSize) {
877                     DISPLAY("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
878                     free(buffer);
879                     exit(13);
880             }   }
881 
882             /* bench */
883             BMK_benchMem(alignedBuffer, benchedSize);
884 
885             free(buffer);
886     }   }
887     return 0;
888 }
889 
890 
BMK_benchInternal(size_t keySize)891 static int BMK_benchInternal(size_t keySize)
892 {
893     void* const buffer = calloc(keySize+16+3, 1);
894     if (buffer == NULL) {
895         DISPLAY("\nError: Out of memory.\n");
896         exit(12);
897     }
898 
899     {   const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF);  /* align on next 16 bytes */
900 
901         /* bench */
902         DISPLAYLEVEL(1, "Sample of ");
903         if (keySize > 10 KB) {
904             DISPLAYLEVEL(1, "%u KB", (unsigned)(keySize >> 10));
905         } else {
906             DISPLAYLEVEL(1, "%u bytes", (unsigned)keySize);
907         }
908         DISPLAYLEVEL(1, "...        \n");
909 
910         BMK_benchMem(alignedBuffer, keySize);
911         free(buffer);
912     }
913     return 0;
914 }
915 
916 
917 /* ************************************************
918  * Self-test:
919  * ensure results consistency accross platforms
920  *********************************************** */
921 
BMK_checkResult32(XXH32_hash_t r1,XXH32_hash_t r2)922 static void BMK_checkResult32(XXH32_hash_t r1, XXH32_hash_t r2)
923 {
924     static int nbTests = 1;
925     if (r1!=r2) {
926         DISPLAY("\rError: 32-bit hash test %i: Internal sanity check failed!\n", nbTests);
927         DISPLAY("\rGot 0x%08X, expected 0x%08X.\n", (unsigned)r1, (unsigned)r2);
928         DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
929                   "or temporarily comment out the tests in BMK_sanityCheck.\n");
930         exit(1);
931     }
932     nbTests++;
933 }
934 
BMK_checkResult64(XXH64_hash_t r1,XXH64_hash_t r2)935 static void BMK_checkResult64(XXH64_hash_t r1, XXH64_hash_t r2)
936 {
937     static int nbTests = 1;
938     if (r1!=r2) {
939         DISPLAY("\rError: 64-bit hash test %i: Internal sanity check failed!\n", nbTests);
940         DISPLAY("\rGot 0x%08X%08XULL, expected 0x%08X%08XULL.\n",
941                 (unsigned)(r1>>32), (unsigned)r1, (unsigned)(r2>>32), (unsigned)r2);
942         DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
943                   "or temporarily comment out the tests in BMK_sanityCheck.\n");
944         exit(1);
945     }
946     nbTests++;
947 }
948 
BMK_checkResult128(XXH128_hash_t r1,XXH128_hash_t r2)949 static void BMK_checkResult128(XXH128_hash_t r1, XXH128_hash_t r2)
950 {
951     static int nbTests = 1;
952     if ((r1.low64 != r2.low64) || (r1.high64 != r2.high64)) {
953         DISPLAY("\rError: 128-bit hash test %i: Internal sanity check failed.\n", nbTests);
954         DISPLAY("\rGot { 0x%08X%08XULL, 0x%08X%08XULL }, expected { 0x%08X%08XULL, 0x%08X%08XULL } \n",
955                 (unsigned)(r1.low64>>32), (unsigned)r1.low64, (unsigned)(r1.high64>>32), (unsigned)r1.high64,
956                 (unsigned)(r2.low64>>32), (unsigned)r2.low64, (unsigned)(r2.high64>>32), (unsigned)r2.high64 );
957         DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
958                   "or temporarily comment out the tests in BMK_sanityCheck.\n");
959         exit(1);
960     }
961     nbTests++;
962 }
963 
964 
BMK_testXXH32(const void * data,size_t len,U32 seed,U32 Nresult)965 static void BMK_testXXH32(const void* data, size_t len, U32 seed, U32 Nresult)
966 {
967     XXH32_state_t *state = XXH32_createState();
968     size_t pos;
969 
970     assert(state != NULL);
971     if (len>0) assert(data != NULL);
972 
973     BMK_checkResult32(XXH32(data, len, seed), Nresult);
974 
975     (void)XXH32_reset(state, seed);
976     (void)XXH32_update(state, data, len);
977     BMK_checkResult32(XXH32_digest(state), Nresult);
978 
979     (void)XXH32_reset(state, seed);
980     for (pos=0; pos<len; pos++)
981         (void)XXH32_update(state, ((const char*)data)+pos, 1);
982     BMK_checkResult32(XXH32_digest(state), Nresult);
983     XXH32_freeState(state);
984 }
985 
BMK_testXXH64(const void * data,size_t len,U64 seed,U64 Nresult)986 static void BMK_testXXH64(const void* data, size_t len, U64 seed, U64 Nresult)
987 {
988     XXH64_state_t *state = XXH64_createState();
989     size_t pos;
990 
991     assert(state != NULL);
992     if (len>0) assert(data != NULL);
993 
994     BMK_checkResult64(XXH64(data, len, seed), Nresult);
995 
996     (void)XXH64_reset(state, seed);
997     (void)XXH64_update(state, data, len);
998     BMK_checkResult64(XXH64_digest(state), Nresult);
999 
1000     (void)XXH64_reset(state, seed);
1001     for (pos=0; pos<len; pos++)
1002         (void)XXH64_update(state, ((const char*)data)+pos, 1);
1003     BMK_checkResult64(XXH64_digest(state), Nresult);
1004     XXH64_freeState(state);
1005 }
1006 
BMK_rand(void)1007 static U32 BMK_rand(void)
1008 {
1009     static U64 seed = PRIME32;
1010     seed *= PRIME64;
1011     return (U32)(seed >> 40);
1012 }
1013 
1014 
BMK_testXXH3(const void * data,size_t len,U64 seed,U64 Nresult)1015 void BMK_testXXH3(const void* data, size_t len, U64 seed, U64 Nresult)
1016 {
1017     if (len>0) assert(data != NULL);
1018 
1019     {   U64 const Dresult = XXH3_64bits_withSeed(data, len, seed);
1020         BMK_checkResult64(Dresult, Nresult);
1021     }
1022 
1023     /* check that the no-seed variant produces same result as seed==0 */
1024     if (seed == 0) {
1025         U64 const Dresult = XXH3_64bits(data, len);
1026         BMK_checkResult64(Dresult, Nresult);
1027     }
1028 
1029     /* streaming API test */
1030     {   XXH3_state_t* const state = XXH3_createState();
1031         assert(state != NULL);
1032         /* single ingestion */
1033         (void)XXH3_64bits_reset_withSeed(state, seed);
1034         (void)XXH3_64bits_update(state, data, len);
1035         BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1036 
1037         /* random ingestion */
1038         {   size_t p = 0;
1039             (void)XXH3_64bits_reset_withSeed(state, seed);
1040             while (p < len) {
1041                 size_t const modulo = len > 2 ? len : 2;
1042                 size_t l = (size_t)(BMK_rand()) % modulo;
1043                 if (p + l > len) l = len - p;
1044                 (void)XXH3_64bits_update(state, (const char*)data+p, l);
1045                 p += l;
1046             }
1047             BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1048         }
1049 
1050         /* byte by byte ingestion */
1051         {   size_t pos;
1052             (void)XXH3_64bits_reset_withSeed(state, seed);
1053             for (pos=0; pos<len; pos++)
1054                 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1);
1055             BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1056         }
1057         XXH3_freeState(state);
1058     }
1059 }
1060 
BMK_testXXH3_withSecret(const void * data,size_t len,const void * secret,size_t secretSize,U64 Nresult)1061 void BMK_testXXH3_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, U64 Nresult)
1062 {
1063     if (len>0) assert(data != NULL);
1064 
1065     {   U64 const Dresult = XXH3_64bits_withSecret(data, len, secret, secretSize);
1066         BMK_checkResult64(Dresult, Nresult);
1067     }
1068 
1069     /* streaming API test */
1070     {   XXH3_state_t *state = XXH3_createState();
1071         assert(state != NULL);
1072         (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1073         (void)XXH3_64bits_update(state, data, len);
1074         BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1075 
1076         /* random ingestion */
1077         {   size_t p = 0;
1078             (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1079             while (p < len) {
1080                 size_t const modulo = len > 2 ? len : 2;
1081                 size_t l = (size_t)(BMK_rand()) % modulo;
1082                 if (p + l > len) l = len - p;
1083                 (void)XXH3_64bits_update(state, (const char*)data+p, l);
1084                 p += l;
1085             }
1086             BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1087         }
1088 
1089         /* byte by byte ingestion */
1090         {   size_t pos;
1091             (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1092             for (pos=0; pos<len; pos++)
1093                 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1);
1094             BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1095         }
1096         XXH3_freeState(state);
1097     }
1098 }
1099 
BMK_testXXH128(const void * data,size_t len,U64 seed,XXH128_hash_t Nresult)1100 void BMK_testXXH128(const void* data, size_t len, U64 seed, XXH128_hash_t Nresult)
1101 {
1102     {   XXH128_hash_t const Dresult = XXH3_128bits_withSeed(data, len, seed);
1103         BMK_checkResult128(Dresult, Nresult);
1104     }
1105 
1106     /* check that XXH128() is identical to XXH3_128bits_withSeed() */
1107     {   XXH128_hash_t const Dresult2 = XXH128(data, len, seed);
1108         BMK_checkResult128(Dresult2, Nresult);
1109     }
1110 
1111     /* check that the no-seed variant produces same result as seed==0 */
1112     if (seed == 0) {
1113         XXH128_hash_t const Dresult = XXH3_128bits(data, len);
1114         BMK_checkResult128(Dresult, Nresult);
1115     }
1116 
1117     /* streaming API test */
1118     {   XXH3_state_t *state = XXH3_createState();
1119         assert(state != NULL);
1120 
1121         /* single ingestion */
1122         (void)XXH3_128bits_reset_withSeed(state, seed);
1123         (void)XXH3_128bits_update(state, data, len);
1124         BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1125 
1126         /* random ingestion */
1127         {   size_t p = 0;
1128             (void)XXH3_128bits_reset_withSeed(state, seed);
1129             while (p < len) {
1130                 size_t const modulo = len > 2 ? len : 2;
1131                 size_t l = (size_t)(BMK_rand()) % modulo;
1132                 if (p + l > len) l = len - p;
1133                 (void)XXH3_128bits_update(state, (const char*)data+p, l);
1134                 p += l;
1135             }
1136             BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1137         }
1138 
1139         /* byte by byte ingestion */
1140         {   size_t pos;
1141             (void)XXH3_128bits_reset_withSeed(state, seed);
1142             for (pos=0; pos<len; pos++)
1143                 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1);
1144             BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1145         }
1146         XXH3_freeState(state);
1147     }
1148 }
1149 
BMK_testXXH128_withSecret(const void * data,size_t len,const void * secret,size_t secretSize,XXH128_hash_t Nresult)1150 void BMK_testXXH128_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, XXH128_hash_t Nresult)
1151 {
1152     if (len>0) assert(data != NULL);
1153 
1154     {   XXH128_hash_t const Dresult = XXH3_128bits_withSecret(data, len, secret, secretSize);
1155         BMK_checkResult128(Dresult, Nresult);
1156     }
1157 
1158     /* streaming API test */
1159     {   XXH3_state_t* const state = XXH3_createState();
1160         assert(state != NULL);
1161         (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1162         (void)XXH3_128bits_update(state, data, len);
1163         BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1164 
1165         /* random ingestion */
1166         {   size_t p = 0;
1167             (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1168             while (p < len) {
1169                 size_t const modulo = len > 2 ? len : 2;
1170                 size_t l = (size_t)(BMK_rand()) % modulo;
1171                 if (p + l > len) l = len - p;
1172                 (void)XXH3_128bits_update(state, (const char*)data+p, l);
1173                 p += l;
1174             }
1175             BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1176         }
1177 
1178         /* byte by byte ingestion */
1179         {   size_t pos;
1180             (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1181             for (pos=0; pos<len; pos++)
1182                 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1);
1183             BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1184         }
1185         XXH3_freeState(state);
1186     }
1187 }
1188 
1189 #define SECRET_SAMPLE_NBBYTES 4
1190 typedef struct { U8 byte[SECRET_SAMPLE_NBBYTES]; } verifSample_t;
1191 
BMK_testSecretGenerator(const void * customSeed,size_t len,verifSample_t result)1192 void BMK_testSecretGenerator(const void* customSeed, size_t len, verifSample_t result)
1193 {
1194     static int nbTests = 1;
1195     const int sampleIndex[SECRET_SAMPLE_NBBYTES] = { 0, 62, 131, 191};
1196     U8 secretBuffer[XXH3_SECRET_DEFAULT_SIZE] = {0};
1197     verifSample_t samples;
1198     int i;
1199 
1200     XXH3_generateSecret(secretBuffer, customSeed, len);
1201     for (i=0; i<SECRET_SAMPLE_NBBYTES; i++) {
1202         samples.byte[i] = secretBuffer[sampleIndex[i]];
1203     }
1204     if (memcmp(&samples, &result, sizeof(result))) {
1205         DISPLAY("\rError: Secret generation test %i: Internal sanity check failed. \n", nbTests);
1206         DISPLAY("\rGot { 0x%02X, 0x%02X, 0x%02X, 0x%02X }, expected { 0x%02X, 0x%02X, 0x%02X, 0x%02X } \n",
1207                 samples.byte[0], samples.byte[1], samples.byte[2], samples.byte[3],
1208                 result.byte[0], result.byte[1], result.byte[2], result.byte[3] );
1209         exit(1);
1210     }
1211     nbTests++;
1212 }
1213 
1214 
1215 /*!
1216  * BMK_sanityCheck():
1217  * Runs a sanity check before the benchmark.
1218  *
1219  * Exits on an incorrect output.
1220  */
BMK_sanityCheck(void)1221 static void BMK_sanityCheck(void)
1222 {
1223 #define SANITY_BUFFER_SIZE 2367
1224     U8 sanityBuffer[SANITY_BUFFER_SIZE];
1225     BMK_fillTestBuffer(sanityBuffer, sizeof(sanityBuffer));
1226 
1227     BMK_testXXH32(NULL,          0, 0,       0x02CC5D05);
1228     BMK_testXXH32(NULL,          0, PRIME32, 0x36B78AE7);
1229     BMK_testXXH32(sanityBuffer,  1, 0,       0xCF65B03E);
1230     BMK_testXXH32(sanityBuffer,  1, PRIME32, 0xB4545AA4);
1231     BMK_testXXH32(sanityBuffer, 14, 0,       0x1208E7E2);
1232     BMK_testXXH32(sanityBuffer, 14, PRIME32, 0x6AF1D1FE);
1233     BMK_testXXH32(sanityBuffer,222, 0,       0x5BD11DBD);
1234     BMK_testXXH32(sanityBuffer,222, PRIME32, 0x58803C5F);
1235 
1236     BMK_testXXH64(NULL        ,  0, 0,       0xEF46DB3751D8E999ULL);
1237     BMK_testXXH64(NULL        ,  0, PRIME32, 0xAC75FDA2929B17EFULL);
1238     BMK_testXXH64(sanityBuffer,  1, 0,       0xE934A84ADB052768ULL);
1239     BMK_testXXH64(sanityBuffer,  1, PRIME32, 0x5014607643A9B4C3ULL);
1240     BMK_testXXH64(sanityBuffer,  4, 0,       0x9136A0DCA57457EEULL);
1241     BMK_testXXH64(sanityBuffer, 14, 0,       0x8282DCC4994E35C8ULL);
1242     BMK_testXXH64(sanityBuffer, 14, PRIME32, 0xC3BD6BF63DEB6DF0ULL);
1243     BMK_testXXH64(sanityBuffer,222, 0,       0xB641AE8CB691C174ULL);
1244     BMK_testXXH64(sanityBuffer,222, PRIME32, 0x20CB8AB7AE10C14AULL);
1245 
1246     BMK_testXXH3(NULL,           0, 0,       0x2D06800538D394C2ULL);  /* empty string */
1247     BMK_testXXH3(NULL,           0, PRIME64, 0xA8A6B918B2F0364AULL);
1248     BMK_testXXH3(sanityBuffer,   1, 0,       0xC44BDFF4074EECDBULL);  /*  1 -  3 */
1249     BMK_testXXH3(sanityBuffer,   1, PRIME64, 0x032BE332DD766EF8ULL);  /*  1 -  3 */
1250     BMK_testXXH3(sanityBuffer,   6, 0,       0x27B56A84CD2D7325ULL);  /*  4 -  8 */
1251     BMK_testXXH3(sanityBuffer,   6, PRIME64, 0x84589C116AB59AB9ULL);  /*  4 -  8 */
1252     BMK_testXXH3(sanityBuffer,  12, 0,       0xA713DAF0DFBB77E7ULL);  /*  9 - 16 */
1253     BMK_testXXH3(sanityBuffer,  12, PRIME64, 0xE7303E1B2336DE0EULL);  /*  9 - 16 */
1254     BMK_testXXH3(sanityBuffer,  24, 0,       0xA3FE70BF9D3510EBULL);  /* 17 - 32 */
1255     BMK_testXXH3(sanityBuffer,  24, PRIME64, 0x850E80FC35BDD690ULL);  /* 17 - 32 */
1256     BMK_testXXH3(sanityBuffer,  48, 0,       0x397DA259ECBA1F11ULL);  /* 33 - 64 */
1257     BMK_testXXH3(sanityBuffer,  48, PRIME64, 0xADC2CBAA44ACC616ULL);  /* 33 - 64 */
1258     BMK_testXXH3(sanityBuffer,  80, 0,       0xBCDEFBBB2C47C90AULL);  /* 65 - 96 */
1259     BMK_testXXH3(sanityBuffer,  80, PRIME64, 0xC6DD0CB699532E73ULL);  /* 65 - 96 */
1260     BMK_testXXH3(sanityBuffer, 195, 0,       0xCD94217EE362EC3AULL);  /* 129-240 */
1261     BMK_testXXH3(sanityBuffer, 195, PRIME64, 0xBA68003D370CB3D9ULL);  /* 129-240 */
1262 
1263     BMK_testXXH3(sanityBuffer, 403, 0,       0xCDEB804D65C6DEA4ULL);  /* one block, last stripe is overlapping */
1264     BMK_testXXH3(sanityBuffer, 403, PRIME64, 0x6259F6ECFD6443FDULL);  /* one block, last stripe is overlapping */
1265     BMK_testXXH3(sanityBuffer, 512, 0,       0x617E49599013CB6BULL);  /* one block, finishing at stripe boundary */
1266     BMK_testXXH3(sanityBuffer, 512, PRIME64, 0x3CE457DE14C27708ULL);  /* one block, finishing at stripe boundary */
1267     BMK_testXXH3(sanityBuffer,2048, 0,       0xDD59E2C3A5F038E0ULL);  /* 2 blocks, finishing at block boundary */
1268     BMK_testXXH3(sanityBuffer,2048, PRIME64, 0x66F81670669ABABCULL);  /* 2 blocks, finishing at block boundary */
1269     BMK_testXXH3(sanityBuffer,2240, 0,       0x6E73A90539CF2948ULL);  /* 3 blocks, finishing at stripe boundary */
1270     BMK_testXXH3(sanityBuffer,2240, PRIME64, 0x757BA8487D1B5247ULL);  /* 3 blocks, finishing at stripe boundary */
1271     BMK_testXXH3(sanityBuffer,2367, 0,       0xCB37AEB9E5D361EDULL);  /* 3 blocks, last stripe is overlapping */
1272     BMK_testXXH3(sanityBuffer,2367, PRIME64, 0xD2DB3415B942B42AULL);  /* 3 blocks, last stripe is overlapping */
1273 
1274     /* XXH3 with Custom Secret */
1275     {   const void* const secret = sanityBuffer + 7;
1276         const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11;
1277         assert(sizeof(sanityBuffer) >= 7 + secretSize);
1278         BMK_testXXH3_withSecret(NULL,           0, secret, secretSize, 0x3559D64878C5C66CULL);  /* empty string */
1279         BMK_testXXH3_withSecret(sanityBuffer,   1, secret, secretSize, 0x8A52451418B2DA4DULL);  /*  1 -  3 */
1280         BMK_testXXH3_withSecret(sanityBuffer,   6, secret, secretSize, 0x82C90AB0519369ADULL);  /*  4 -  8 */
1281         BMK_testXXH3_withSecret(sanityBuffer,  12, secret, secretSize, 0x14631E773B78EC57ULL);  /*  9 - 16 */
1282         BMK_testXXH3_withSecret(sanityBuffer,  24, secret, secretSize, 0xCDD5542E4A9D9FE8ULL);  /* 17 - 32 */
1283         BMK_testXXH3_withSecret(sanityBuffer,  48, secret, secretSize, 0x33ABD54D094B2534ULL);  /* 33 - 64 */
1284         BMK_testXXH3_withSecret(sanityBuffer,  80, secret, secretSize, 0xE687BA1684965297ULL);  /* 65 - 96 */
1285         BMK_testXXH3_withSecret(sanityBuffer, 195, secret, secretSize, 0xA057273F5EECFB20ULL);  /* 129-240 */
1286 
1287         BMK_testXXH3_withSecret(sanityBuffer, 403, secret, secretSize, 0x14546019124D43B8ULL);  /* one block, last stripe is overlapping */
1288         BMK_testXXH3_withSecret(sanityBuffer, 512, secret, secretSize, 0x7564693DD526E28DULL);  /* one block, finishing at stripe boundary */
1289         BMK_testXXH3_withSecret(sanityBuffer,2048, secret, secretSize, 0xD32E975821D6519FULL);  /* >= 2 blocks, at least one scrambling */
1290         BMK_testXXH3_withSecret(sanityBuffer,2367, secret, secretSize, 0x293FA8E5173BB5E7ULL);  /* >= 2 blocks, at least one scrambling, last stripe unaligned */
1291 
1292         BMK_testXXH3_withSecret(sanityBuffer,64*10*3, secret, secretSize, 0x751D2EC54BC6038BULL);  /* exactly 3 full blocks, not a multiple of 256 */
1293     }
1294 
1295     /* XXH128 */
1296     {   XXH128_hash_t const expected = { 0x6001C324468D497FULL, 0x99AA06D3014798D8ULL };
1297         BMK_testXXH128(NULL,           0, 0,     expected);         /* empty string */
1298     }
1299     {   XXH128_hash_t const expected = { 0x5444F7869C671AB0ULL, 0x92220AE55E14AB50ULL };
1300         BMK_testXXH128(NULL,           0, PRIME32, expected);
1301     }
1302     {   XXH128_hash_t const expected = { 0xC44BDFF4074EECDBULL, 0xA6CD5E9392000F6AULL };
1303         BMK_testXXH128(sanityBuffer,   1, 0,       expected);       /* 1-3 */
1304     }
1305     {   XXH128_hash_t const expected = { 0xB53D5557E7F76F8DULL, 0x89B99554BA22467CULL };
1306         BMK_testXXH128(sanityBuffer,   1, PRIME32, expected);       /* 1-3 */
1307     }
1308     {   XXH128_hash_t const expected = { 0x3E7039BDDA43CFC6ULL, 0x082AFE0B8162D12AULL };
1309         BMK_testXXH128(sanityBuffer,   6, 0,       expected);       /* 4-8 */
1310     }
1311     {   XXH128_hash_t const expected = { 0x269D8F70BE98856EULL, 0x5A865B5389ABD2B1ULL };
1312         BMK_testXXH128(sanityBuffer,   6, PRIME32, expected);       /* 4-8 */
1313     }
1314     {   XXH128_hash_t const expected = { 0x061A192713F69AD9ULL, 0x6E3EFD8FC7802B18ULL };
1315         BMK_testXXH128(sanityBuffer,  12, 0,       expected);       /* 9-16 */
1316     }
1317     {   XXH128_hash_t const expected = { 0x9BE9F9A67F3C7DFBULL, 0xD7E09D518A3405D3ULL };
1318         BMK_testXXH128(sanityBuffer,  12, PRIME32, expected);       /* 9-16 */
1319     }
1320     {   XXH128_hash_t const expected = { 0x1E7044D28B1B901DULL, 0x0CE966E4678D3761ULL };
1321         BMK_testXXH128(sanityBuffer,  24, 0,       expected);       /* 17-32 */
1322     }
1323     {   XXH128_hash_t const expected = { 0xD7304C54EBAD40A9ULL, 0x3162026714A6A243ULL };
1324         BMK_testXXH128(sanityBuffer,  24, PRIME32, expected);       /* 17-32 */
1325     }
1326     {   XXH128_hash_t const expected = { 0xF942219AED80F67BULL, 0xA002AC4E5478227EULL };
1327         BMK_testXXH128(sanityBuffer,  48, 0,       expected);       /* 33-64 */
1328     }
1329     {   XXH128_hash_t const expected = { 0x7BA3C3E453A1934EULL, 0x163ADDE36C072295ULL };
1330         BMK_testXXH128(sanityBuffer,  48, PRIME32, expected);       /* 33-64 */
1331     }
1332     {   XXH128_hash_t const expected = { 0x5E8BAFB9F95FB803ULL, 0x4952F58181AB0042ULL };
1333         BMK_testXXH128(sanityBuffer,  81, 0,       expected);       /* 65-96 */
1334     }
1335     {   XXH128_hash_t const expected = { 0x703FBB3D7A5F755CULL, 0x2724EC7ADC750FB6ULL };
1336         BMK_testXXH128(sanityBuffer,  81, PRIME32, expected);       /* 65-96 */
1337     }
1338     {   XXH128_hash_t const expected = { 0xF1AEBD597CEC6B3AULL, 0x337E09641B948717ULL };
1339         BMK_testXXH128(sanityBuffer, 222, 0,       expected);       /* 129-240 */
1340     }
1341     {   XXH128_hash_t const expected = { 0xAE995BB8AF917A8DULL, 0x91820016621E97F1ULL };
1342         BMK_testXXH128(sanityBuffer, 222, PRIME32, expected);       /* 129-240 */
1343     }
1344     {   XXH128_hash_t const expected = { 0xCDEB804D65C6DEA4ULL, 0x1B6DE21E332DD73DULL };
1345         BMK_testXXH128(sanityBuffer, 403, 0,       expected);       /* one block, last stripe is overlapping */
1346     }
1347     {   XXH128_hash_t const expected = { 0x6259F6ECFD6443FDULL, 0xBED311971E0BE8F2ULL };
1348         BMK_testXXH128(sanityBuffer, 403, PRIME64, expected);       /* one block, last stripe is overlapping */
1349     }
1350     {   XXH128_hash_t const expected = { 0x617E49599013CB6BULL, 0x18D2D110DCC9BCA1ULL };
1351         BMK_testXXH128(sanityBuffer, 512, 0,       expected);       /* one block, finishing at stripe boundary */
1352     }
1353     {   XXH128_hash_t const expected = { 0x3CE457DE14C27708ULL, 0x925D06B8EC5B8040ULL };
1354         BMK_testXXH128(sanityBuffer, 512, PRIME64, expected);       /* one block, finishing at stripe boundary */
1355     }
1356     {   XXH128_hash_t const expected = { 0xDD59E2C3A5F038E0ULL, 0xF736557FD47073A5ULL };
1357         BMK_testXXH128(sanityBuffer,2048, 0,       expected);       /* two blocks, finishing at block boundary */
1358     }
1359     {   XXH128_hash_t const expected = { 0x230D43F30206260BULL, 0x7FB03F7E7186C3EAULL };
1360         BMK_testXXH128(sanityBuffer,2048, PRIME32, expected);       /* two blocks, finishing at block boundary */
1361     }
1362     {   XXH128_hash_t const expected = { 0x6E73A90539CF2948ULL, 0xCCB134FBFA7CE49DULL };
1363         BMK_testXXH128(sanityBuffer,2240, 0,       expected);      /* two blocks, ends at stripe boundary */
1364     }
1365     {   XXH128_hash_t const expected = { 0xED385111126FBA6FULL, 0x50A1FE17B338995FULL };
1366         BMK_testXXH128(sanityBuffer,2240, PRIME32, expected);       /* two blocks, ends at stripe boundary */
1367     }
1368     {   XXH128_hash_t const expected = { 0xCB37AEB9E5D361EDULL, 0xE89C0F6FF369B427ULL };
1369         BMK_testXXH128(sanityBuffer,2367, 0,       expected);       /* two blocks, last stripe is overlapping */
1370     }
1371     {   XXH128_hash_t const expected = { 0x6F5360AE69C2F406ULL, 0xD23AAE4B76C31ECBULL };
1372         BMK_testXXH128(sanityBuffer,2367, PRIME32, expected);       /* two blocks, last stripe is overlapping */
1373     }
1374 
1375     /* XXH128 with custom Secret */
1376     {   const void* const secret = sanityBuffer + 7;
1377         const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11;
1378         assert(sizeof(sanityBuffer) >= 7 + secretSize);
1379 
1380         {   XXH128_hash_t const expected = { 0x005923CCEECBE8AEULL, 0x5F70F4EA232F1D38ULL };
1381             BMK_testXXH128_withSecret(NULL,           0, secret, secretSize,     expected);         /* empty string */
1382         }
1383         {   XXH128_hash_t const expected = { 0x8A52451418B2DA4DULL, 0x3A66AF5A9819198EULL };
1384             BMK_testXXH128_withSecret(sanityBuffer,   1, secret, secretSize,       expected);       /* 1-3 */
1385         }
1386         {   XXH128_hash_t const expected = { 0x0B61C8ACA7D4778FULL, 0x376BD91B6432F36DULL };
1387             BMK_testXXH128_withSecret(sanityBuffer,   6, secret, secretSize,       expected);       /* 4-8 */
1388         }
1389         {   XXH128_hash_t const expected = { 0xAF82F6EBA263D7D8ULL, 0x90A3C2D839F57D0FULL };
1390             BMK_testXXH128_withSecret(sanityBuffer,  12, secret, secretSize,       expected);       /* 9-16 */
1391         }
1392     }
1393 
1394     /* secret generator */
1395     {   verifSample_t const expected = { { 0xB8, 0x26, 0x83, 0x7E } };
1396         BMK_testSecretGenerator(NULL, 0, expected);
1397     }
1398 
1399     {   verifSample_t const expected = { { 0xA6, 0x16, 0x06, 0x7B } };
1400         BMK_testSecretGenerator(sanityBuffer, 1, expected);
1401     }
1402 
1403     {   verifSample_t const expected = { { 0xDA, 0x2A, 0x12, 0x11 } };
1404         BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_SIZE_MIN - 1, expected);
1405     }
1406 
1407     {   verifSample_t const expected = { { 0x7E, 0x48, 0x0C, 0xA7 } };
1408         BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_DEFAULT_SIZE + 500, expected);
1409     }
1410 
1411     DISPLAYLEVEL(3, "\r%70s\r", "");       /* Clean display line */
1412     DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
1413 }
1414 
1415 
1416 /* ********************************************************
1417 *  File Hashing
1418 **********************************************************/
1419 #if defined(_MSC_VER)
1420     typedef struct __stat64 stat_t;
1421     typedef int mode_t;
1422 #else
1423     typedef struct stat stat_t;
1424 #endif
1425 
1426 #include <sys/types.h>  /* struct stat / __start64 */
1427 #include <sys/stat.h>   /* stat() / _stat64() */
1428 
XSUM_isDirectory(const char * infilename)1429 int XSUM_isDirectory(const char* infilename)
1430 {
1431     stat_t statbuf;
1432 #if defined(_MSC_VER)
1433     int const r = _stat64(infilename, &statbuf);
1434     if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
1435 #else
1436     int const r = stat(infilename, &statbuf);
1437     if (!r && S_ISDIR(statbuf.st_mode)) return 1;
1438 #endif
1439     return 0;
1440 }
1441 
1442 /* for support of --little-endian display mode */
BMK_display_LittleEndian(const void * ptr,size_t length)1443 static void BMK_display_LittleEndian(const void* ptr, size_t length)
1444 {
1445     const U8* const p = (const U8*)ptr;
1446     size_t idx;
1447     for (idx=length-1; idx<length; idx--)    /* intentional underflow to negative to detect end */
1448         DISPLAYRESULT("%02x", p[idx]);
1449 }
1450 
BMK_display_BigEndian(const void * ptr,size_t length)1451 static void BMK_display_BigEndian(const void* ptr, size_t length)
1452 {
1453     const U8* const p = (const U8*)ptr;
1454     size_t idx;
1455     for (idx=0; idx<length; idx++)
1456         DISPLAYRESULT("%02x", p[idx]);
1457 }
1458 
1459 typedef union {
1460     XXH32_hash_t   xxh32;
1461     XXH64_hash_t   xxh64;
1462     XXH128_hash_t xxh128;
1463 } Multihash;
1464 
1465 /*
1466  * XSUM_hashStream:
1467  * Reads data from `inFile`, generating an incremental hash of type hashType,
1468  * using `buffer` of size `blockSize` for temporary storage.
1469  */
1470 static Multihash
XSUM_hashStream(FILE * inFile,AlgoSelected hashType,void * buffer,size_t blockSize)1471 XSUM_hashStream(FILE* inFile,
1472                 AlgoSelected hashType,
1473                 void* buffer, size_t blockSize)
1474 {
1475     XXH32_state_t state32;
1476     XXH64_state_t state64;
1477     XXH3_state_t state128;
1478 
1479     /* Init */
1480     (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
1481     (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
1482     (void)XXH3_128bits_reset(&state128);
1483 
1484     /* Load file & update hash */
1485     {   size_t readSize;
1486         while ((readSize = fread(buffer, 1, blockSize, inFile)) > 0) {
1487             switch(hashType)
1488             {
1489             case algo_xxh32:
1490                 (void)XXH32_update(&state32, buffer, readSize);
1491                 break;
1492             case algo_xxh64:
1493                 (void)XXH64_update(&state64, buffer, readSize);
1494                 break;
1495             case algo_xxh128:
1496                 (void)XXH3_128bits_update(&state128, buffer, readSize);
1497                 break;
1498             default:
1499                 assert(0);
1500             }
1501         }
1502         if (ferror(inFile)) {
1503             DISPLAY("Error: a failure occurred reading the input file.\n");
1504             exit(1);
1505     }   }
1506 
1507     {   Multihash finalHash = {0};
1508         switch(hashType)
1509         {
1510         case algo_xxh32:
1511             finalHash.xxh32 = XXH32_digest(&state32);
1512             break;
1513         case algo_xxh64:
1514             finalHash.xxh64 = XXH64_digest(&state64);
1515             break;
1516         case algo_xxh128:
1517             finalHash.xxh128 = XXH3_128bits_digest(&state128);
1518             break;
1519         default:
1520             assert(0);
1521         }
1522         return finalHash;
1523     }
1524 }
1525 
1526                                        /* algo_xxh32, algo_xxh64, algo_xxh128 */
1527 static const char* XSUM_algoName[] =    { "XXH32",    "XXH64",    "XXH128" };
1528 static const char* XSUM_algoLE_name[] = { "XXH32_LE", "XXH64_LE", "XXH128_LE" };
1529 static const size_t XSUM_algoLength[] = { 4,          8,          16 };
1530 
1531 #define XSUM_TABLE_ELT_SIZE(table)   (sizeof(table) / sizeof(*table))
1532 
1533 typedef void (*XSUM_displayHash_f)(const void*, size_t);  /* display function signature */
1534 
XSUM_printLine_BSD_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,const char * algoString[],XSUM_displayHash_f f_displayHash)1535 static void XSUM_printLine_BSD_internal(const char* filename,
1536                                         const void* canonicalHash, const AlgoSelected hashType,
1537                                         const char* algoString[],
1538                                         XSUM_displayHash_f f_displayHash)
1539 {
1540     assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
1541     {   const char* const typeString = algoString[hashType];
1542         const size_t hashLength = XSUM_algoLength[hashType];
1543         DISPLAYRESULT("%s (%s) = ", typeString, filename);
1544         f_displayHash(canonicalHash, hashLength);
1545         DISPLAYRESULT("\n");
1546 }   }
1547 
XSUM_printLine_BSD_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1548 static void XSUM_printLine_BSD_LE(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
1549 {
1550     XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoLE_name, BMK_display_LittleEndian);
1551 }
1552 
XSUM_printLine_BSD(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1553 static void XSUM_printLine_BSD(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
1554 {
1555     XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoName, BMK_display_BigEndian);
1556 }
1557 
XSUM_printLine_GNU_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,XSUM_displayHash_f f_displayHash)1558 static void XSUM_printLine_GNU_internal(const char* filename,
1559                                const void* canonicalHash, const AlgoSelected hashType,
1560                                XSUM_displayHash_f f_displayHash)
1561 {
1562     assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
1563     {   const size_t hashLength = XSUM_algoLength[hashType];
1564         f_displayHash(canonicalHash, hashLength);
1565         DISPLAYRESULT("  %s\n", filename);
1566 }   }
1567 
XSUM_printLine_GNU(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1568 static void XSUM_printLine_GNU(const char* filename,
1569                                const void* canonicalHash, const AlgoSelected hashType)
1570 {
1571     XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_BigEndian);
1572 }
1573 
XSUM_printLine_GNU_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1574 static void XSUM_printLine_GNU_LE(const char* filename,
1575                                   const void* canonicalHash, const AlgoSelected hashType)
1576 {
1577     XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_LittleEndian);
1578 }
1579 
1580 typedef enum { big_endian, little_endian} Display_endianess;
1581 
1582 typedef enum { display_gnu, display_bsd } Display_convention;
1583 
1584 typedef void (*XSUM_displayLine_f)(const char*, const void*, AlgoSelected);  /* line display signature */
1585 
1586 static XSUM_displayLine_f XSUM_kDisplayLine_fTable[2][2] = {
1587     { XSUM_printLine_GNU, XSUM_printLine_GNU_LE },
1588     { XSUM_printLine_BSD, XSUM_printLine_BSD_LE }
1589 };
1590 
XSUM_hashFile(const char * fileName,const AlgoSelected hashType,const Display_endianess displayEndianess,const Display_convention convention)1591 static int XSUM_hashFile(const char* fileName,
1592                          const AlgoSelected hashType,
1593                          const Display_endianess displayEndianess,
1594                          const Display_convention convention)
1595 {
1596     size_t const blockSize = 64 KB;
1597     XSUM_displayLine_f const f_displayLine = XSUM_kDisplayLine_fTable[convention][displayEndianess];
1598     FILE* inFile;
1599     Multihash hashValue;
1600     assert(displayEndianess==big_endian || displayEndianess==little_endian);
1601     assert(convention==display_gnu || convention==display_bsd);
1602 
1603     /* Check file existence */
1604     if (fileName == stdinName) {
1605         inFile = stdin;
1606         fileName = "stdin";
1607         SET_BINARY_MODE(stdin);
1608     } else {
1609         if (XSUM_isDirectory(fileName)) {
1610             DISPLAY("xxhsum: %s: Is a directory \n", fileName);
1611             return 1;
1612         }
1613         inFile = XXH_fopen( fileName, "rb" );
1614         if (inFile==NULL) {
1615             DISPLAY("Error: Could not open '%s': %s. \n", fileName, strerror(errno));
1616             return 1;
1617     }   }
1618 
1619     /* Memory allocation & streaming */
1620     {   void* const buffer = malloc(blockSize);
1621         if (buffer == NULL) {
1622             DISPLAY("\nError: Out of memory.\n");
1623             fclose(inFile);
1624             return 1;
1625         }
1626 
1627         /* Stream file & update hash */
1628         hashValue = XSUM_hashStream(inFile, hashType, buffer, blockSize);
1629 
1630         fclose(inFile);
1631         free(buffer);
1632     }
1633 
1634     /* display Hash value in selected format */
1635     switch(hashType)
1636     {
1637     case algo_xxh32:
1638         {   XXH32_canonical_t hcbe32;
1639             (void)XXH32_canonicalFromHash(&hcbe32, hashValue.xxh32);
1640             f_displayLine(fileName, &hcbe32, hashType);
1641             break;
1642         }
1643     case algo_xxh64:
1644         {   XXH64_canonical_t hcbe64;
1645             (void)XXH64_canonicalFromHash(&hcbe64, hashValue.xxh64);
1646             f_displayLine(fileName, &hcbe64, hashType);
1647             break;
1648         }
1649     case algo_xxh128:
1650         {   XXH128_canonical_t hcbe128;
1651             (void)XXH128_canonicalFromHash(&hcbe128, hashValue.xxh128);
1652             f_displayLine(fileName, &hcbe128, hashType);
1653             break;
1654         }
1655     default:
1656         assert(0);  /* not possible */
1657     }
1658 
1659     return 0;
1660 }
1661 
1662 
1663 /*
1664  * XSUM_hashFiles:
1665  * If fnTotal==0, read from stdin instead.
1666  */
XSUM_hashFiles(const char * const * fnList,int fnTotal,AlgoSelected hashType,Display_endianess displayEndianess,Display_convention convention)1667 static int XSUM_hashFiles(const char*const * fnList, int fnTotal,
1668                           AlgoSelected hashType,
1669                           Display_endianess displayEndianess,
1670                           Display_convention convention)
1671 {
1672     int fnNb;
1673     int result = 0;
1674 
1675     if (fnTotal==0)
1676         return XSUM_hashFile(stdinName, hashType, displayEndianess, convention);
1677 
1678     for (fnNb=0; fnNb<fnTotal; fnNb++)
1679         result |= XSUM_hashFile(fnList[fnNb], hashType, displayEndianess, convention);
1680     DISPLAYLEVEL(2, "\r%70s\r", "");
1681     return result;
1682 }
1683 
1684 
1685 typedef enum {
1686     GetLine_ok,
1687     GetLine_eof,
1688     GetLine_exceedMaxLineLength,
1689     GetLine_outOfMemory
1690 } GetLineResult;
1691 
1692 typedef enum {
1693     CanonicalFromString_ok,
1694     CanonicalFromString_invalidFormat
1695 } CanonicalFromStringResult;
1696 
1697 typedef enum {
1698     ParseLine_ok,
1699     ParseLine_invalidFormat
1700 } ParseLineResult;
1701 
1702 typedef enum {
1703     LineStatus_hashOk,
1704     LineStatus_hashFailed,
1705     LineStatus_failedToOpen
1706 } LineStatus;
1707 
1708 typedef union {
1709     XXH32_canonical_t xxh32;
1710     XXH64_canonical_t xxh64;
1711     XXH128_canonical_t xxh128;
1712 } Canonical;
1713 
1714 typedef struct {
1715     Canonical   canonical;
1716     const char* filename;
1717     int         xxhBits;    /* canonical type: 32:xxh32, 64:xxh64, 128:xxh128 */
1718 } ParsedLine;
1719 
1720 typedef struct {
1721     unsigned long   nProperlyFormattedLines;
1722     unsigned long   nImproperlyFormattedLines;
1723     unsigned long   nMismatchedChecksums;
1724     unsigned long   nOpenOrReadFailures;
1725     unsigned long   nMixedFormatLines;
1726     int             quit;
1727 } ParseFileReport;
1728 
1729 typedef struct {
1730     const char*     inFileName;
1731     FILE*           inFile;
1732     int             lineMax;
1733     char*           lineBuf;
1734     size_t          blockSize;
1735     char*           blockBuf;
1736     U32             strictMode;
1737     U32             statusOnly;
1738     U32             warn;
1739     U32             quiet;
1740     ParseFileReport report;
1741 } ParseFileArg;
1742 
1743 
1744 /*
1745  * Reads a line from stream `inFile`.
1746  * Returns GetLine_ok, if it reads line successfully.
1747  * Returns GetLine_eof, if stream reaches EOF.
1748  * Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
1749  * Returns GetLine_outOfMemory, if line buffer memory allocation failed.
1750  */
getLine(char ** lineBuf,int * lineMax,FILE * inFile)1751 static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile)
1752 {
1753     GetLineResult result = GetLine_ok;
1754     size_t len = 0;
1755 
1756     if ((*lineBuf == NULL) || (*lineMax<1)) {
1757         free(*lineBuf);  /* in case it's != NULL */
1758         *lineMax = 0;
1759         *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
1760         if(*lineBuf == NULL) return GetLine_outOfMemory;
1761         *lineMax = DEFAULT_LINE_LENGTH;
1762     }
1763 
1764     for (;;) {
1765         const int c = fgetc(inFile);
1766         if (c == EOF) {
1767             /*
1768              * If we meet EOF before first character, returns GetLine_eof,
1769              * otherwise GetLine_ok.
1770              */
1771             if (len == 0) result = GetLine_eof;
1772             break;
1773         }
1774 
1775         /* Make enough space for len+1 (for final NUL) bytes. */
1776         if (len+1 >= (size_t)*lineMax) {
1777             char* newLineBuf = NULL;
1778             size_t newBufSize = (size_t)*lineMax;
1779 
1780             newBufSize += (newBufSize/2) + 1; /* x 1.5 */
1781             if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
1782             if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
1783 
1784             newLineBuf = (char*) realloc(*lineBuf, newBufSize);
1785             if (newLineBuf == NULL) return GetLine_outOfMemory;
1786 
1787             *lineBuf = newLineBuf;
1788             *lineMax = (int)newBufSize;
1789         }
1790 
1791         if (c == '\n') break;
1792         (*lineBuf)[len++] = (char) c;
1793     }
1794 
1795     (*lineBuf)[len] = '\0';
1796     return result;
1797 }
1798 
1799 
1800 /*
1801  * Converts one hexadecimal character to integer.
1802  * Returns -1 if the given character is not hexadecimal.
1803  */
charToHex(char c)1804 static int charToHex(char c)
1805 {
1806     int result = -1;
1807     if (c >= '0' && c <= '9') {
1808         result = (int) (c - '0');
1809     } else if (c >= 'A' && c <= 'F') {
1810         result = (int) (c - 'A') + 0x0a;
1811     } else if (c >= 'a' && c <= 'f') {
1812         result = (int) (c - 'a') + 0x0a;
1813     }
1814     return result;
1815 }
1816 
1817 
1818 /*
1819  * Converts canonical ASCII hexadecimal string `hashStr`
1820  * to the big endian binary representation in unsigned char array `dst`.
1821  *
1822  * Returns CanonicalFromString_invalidFormat if hashStr is not well formatted.
1823  * Returns CanonicalFromString_ok if hashStr is parsed successfully.
1824  */
canonicalFromString(unsigned char * dst,size_t dstSize,const char * hashStr,int reverseBytes)1825 static CanonicalFromStringResult canonicalFromString(unsigned char* dst,
1826                                                      size_t dstSize,
1827                                                      const char* hashStr,
1828                                                      int reverseBytes)
1829 {
1830     size_t i;
1831     for (i = 0; i < dstSize; ++i) {
1832         int h0, h1;
1833         size_t j = reverseBytes ? dstSize - i - 1 : i;
1834 
1835         h0 = charToHex(hashStr[j*2 + 0]);
1836         if (h0 < 0) return CanonicalFromString_invalidFormat;
1837 
1838         h1 = charToHex(hashStr[j*2 + 1]);
1839         if (h1 < 0) return CanonicalFromString_invalidFormat;
1840 
1841         dst[i] = (unsigned char) ((h0 << 4) | h1);
1842     }
1843     return CanonicalFromString_ok;
1844 }
1845 
1846 
1847 /*
1848  * Parse single line of xxHash checksum file.
1849  * Returns ParseLine_invalidFormat if the line is not well formatted.
1850  * Returns ParseLine_ok if the line is parsed successfully.
1851  * And members of parseLine will be filled by parsed values.
1852  *
1853  *  - line must be terminated with '\0' without a trailing newline.
1854  *  - Since parsedLine.filename will point within given argument `line`,
1855  *    users must keep `line`s content when they are using parsedLine.
1856  *  - The line may be modified to carve up the information it contains.
1857  *
1858  * xxHash checksum lines should have the following format:
1859  *
1860  *      <8, 16, or 32 hexadecimal char> <space> <space> <filename...> <'\0'>
1861  *
1862  * or:
1863  *
1864  *      <algorithm> <' ('> <filename> <') = '> <hexstring> <'\0'>
1865  */
parseLine(ParsedLine * parsedLine,char * line,int rev)1866 static ParseLineResult parseLine(ParsedLine* parsedLine, char* line, int rev)
1867 {
1868     char* const firstSpace = strchr(line, ' ');
1869     const char* hash_ptr;
1870     size_t hash_len;
1871 
1872     parsedLine->filename = NULL;
1873     parsedLine->xxhBits = 0;
1874 
1875     if (firstSpace == NULL || !firstSpace[1]) return ParseLine_invalidFormat;
1876 
1877     if (firstSpace[1] == '(') {
1878         char* lastSpace = strrchr(line, ' ');
1879         if (lastSpace - firstSpace < 5) return ParseLine_invalidFormat;
1880         if (lastSpace[-1] != '=' || lastSpace[-2] != ' ' || lastSpace[-3] != ')') return ParseLine_invalidFormat;
1881         lastSpace[-3] = '\0'; /* Terminate the filename */
1882         *firstSpace = '\0';
1883         rev = strstr(line, "_LE") != NULL; /* was output little-endian */
1884         hash_ptr = lastSpace + 1;
1885         hash_len = strlen(hash_ptr);
1886         /* NOTE: This currently ignores the hash description at the start of the string.
1887          * In the future we should parse it and verify that it matches the hash length.
1888          * It could also be used to allow both XXH64 & XXH3_64bits to be differentiated. */
1889     } else {
1890         hash_ptr = line;
1891         hash_len = (size_t)(firstSpace - line);
1892     }
1893 
1894     switch (hash_len)
1895     {
1896     case 8:
1897         {   XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
1898             if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), hash_ptr, rev)
1899                 != CanonicalFromString_ok) {
1900                 return ParseLine_invalidFormat;
1901             }
1902             parsedLine->xxhBits = 32;
1903             break;
1904         }
1905 
1906     case 16:
1907         {   XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
1908             if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), hash_ptr, rev)
1909                 != CanonicalFromString_ok) {
1910                 return ParseLine_invalidFormat;
1911             }
1912             parsedLine->xxhBits = 64;
1913             break;
1914         }
1915 
1916     case 32:
1917         {   XXH128_canonical_t* xxh128c = &parsedLine->canonical.xxh128;
1918             if (canonicalFromString(xxh128c->digest, sizeof(xxh128c->digest), hash_ptr, rev)
1919                 != CanonicalFromString_ok) {
1920                 return ParseLine_invalidFormat;
1921             }
1922             parsedLine->xxhBits = 128;
1923             break;
1924         }
1925 
1926     default:
1927             return ParseLine_invalidFormat;
1928             break;
1929     }
1930 
1931     /* note : skipping second separation character, which can be anything,
1932      * allowing insertion of custom markers such as '*' */
1933     parsedLine->filename = firstSpace + 2;
1934     return ParseLine_ok;
1935 }
1936 
1937 
1938 /*!
1939  * Parse xxHash checksum file.
1940  */
parseFile1(ParseFileArg * parseFileArg,int rev)1941 static void parseFile1(ParseFileArg* parseFileArg, int rev)
1942 {
1943     const char* const inFileName = parseFileArg->inFileName;
1944     ParseFileReport* const report = &parseFileArg->report;
1945 
1946     unsigned long lineNumber = 0;
1947     memset(report, 0, sizeof(*report));
1948 
1949     while (!report->quit) {
1950         LineStatus lineStatus = LineStatus_hashFailed;
1951         ParsedLine parsedLine;
1952         memset(&parsedLine, 0, sizeof(parsedLine));
1953 
1954         lineNumber++;
1955         if (lineNumber == 0) {
1956             /* This is unlikely happen, but md5sum.c has this error check. */
1957             DISPLAY("%s: Error: Too many checksum lines\n", inFileName);
1958             report->quit = 1;
1959             break;
1960         }
1961 
1962         {   GetLineResult const getLineResult = getLine(&parseFileArg->lineBuf,
1963                                                         &parseFileArg->lineMax,
1964                                                          parseFileArg->inFile);
1965             if (getLineResult != GetLine_ok) {
1966                 if (getLineResult == GetLine_eof) break;
1967 
1968                 switch (getLineResult)
1969                 {
1970                 case GetLine_ok:
1971                 case GetLine_eof:
1972                     /* These cases never happen.  See above getLineResult related "if"s.
1973                        They exist just for make gcc's -Wswitch-enum happy. */
1974                     assert(0);
1975                     break;
1976 
1977                 default:
1978                     DISPLAY("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber);
1979                     break;
1980 
1981                 case GetLine_exceedMaxLineLength:
1982                     DISPLAY("%s:%lu: Error: Line too long.\n", inFileName, lineNumber);
1983                     break;
1984 
1985                 case GetLine_outOfMemory:
1986                     DISPLAY("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber);
1987                     break;
1988                 }
1989                 report->quit = 1;
1990                 break;
1991         }   }
1992 
1993         if (parseLine(&parsedLine, parseFileArg->lineBuf, rev) != ParseLine_ok) {
1994             report->nImproperlyFormattedLines++;
1995             if (parseFileArg->warn) {
1996                 DISPLAY("%s:%lu: Error: Improperly formatted checksum line.\n",
1997                         inFileName, lineNumber);
1998             }
1999             continue;
2000         }
2001 
2002         report->nProperlyFormattedLines++;
2003 
2004         do {
2005             FILE* const fp = XXH_fopen(parsedLine.filename, "rb");
2006             if (fp == NULL) {
2007                 lineStatus = LineStatus_failedToOpen;
2008                 break;
2009             }
2010             lineStatus = LineStatus_hashFailed;
2011             switch (parsedLine.xxhBits)
2012             {
2013             case 32:
2014                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh32, parseFileArg->blockBuf, parseFileArg->blockSize);
2015                     if (xxh.xxh32 == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
2016                         lineStatus = LineStatus_hashOk;
2017                 }   }
2018                 break;
2019 
2020             case 64:
2021                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh64, parseFileArg->blockBuf, parseFileArg->blockSize);
2022                     if (xxh.xxh64 == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
2023                         lineStatus = LineStatus_hashOk;
2024                 }   }
2025                 break;
2026 
2027             case 128:
2028                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh128, parseFileArg->blockBuf, parseFileArg->blockSize);
2029                     if (XXH128_isEqual(xxh.xxh128, XXH128_hashFromCanonical(&parsedLine.canonical.xxh128))) {
2030                         lineStatus = LineStatus_hashOk;
2031                 }   }
2032                 break;
2033 
2034             default:
2035                 break;
2036             }
2037             fclose(fp);
2038         } while (0);
2039 
2040         switch (lineStatus)
2041         {
2042         default:
2043             DISPLAY("%s: Error: Unknown error.\n", inFileName);
2044             report->quit = 1;
2045             break;
2046 
2047         case LineStatus_failedToOpen:
2048             report->nOpenOrReadFailures++;
2049             if (!parseFileArg->statusOnly) {
2050                 DISPLAYRESULT("%s:%lu: Could not open or read '%s': %s.\n",
2051                     inFileName, lineNumber, parsedLine.filename, strerror(errno));
2052             }
2053             break;
2054 
2055         case LineStatus_hashOk:
2056         case LineStatus_hashFailed:
2057             {   int b = 1;
2058                 if (lineStatus == LineStatus_hashOk) {
2059                     /* If --quiet is specified, don't display "OK" */
2060                     if (parseFileArg->quiet) b = 0;
2061                 } else {
2062                     report->nMismatchedChecksums++;
2063                 }
2064 
2065                 if (b && !parseFileArg->statusOnly) {
2066                     DISPLAYRESULT("%s: %s\n", parsedLine.filename
2067                         , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
2068             }   }
2069             break;
2070         }
2071     }   /* while (!report->quit) */
2072 }
2073 
2074 
2075 /*  Parse xxHash checksum file.
2076  *  Returns 1, if all procedures were succeeded.
2077  *  Returns 0, if any procedures was failed.
2078  *
2079  *  If strictMode != 0, return error code if any line is invalid.
2080  *  If statusOnly != 0, don't generate any output.
2081  *  If warn != 0, print a warning message to stderr.
2082  *  If quiet != 0, suppress "OK" line.
2083  *
2084  *  "All procedures are succeeded" means:
2085  *    - Checksum file contains at least one line and less than SIZE_T_MAX lines.
2086  *    - All files are properly opened and read.
2087  *    - All hash values match with its content.
2088  *    - (strict mode) All lines in checksum file are consistent and well formatted.
2089  */
checkFile(const char * inFileName,const Display_endianess displayEndianess,U32 strictMode,U32 statusOnly,U32 warn,U32 quiet)2090 static int checkFile(const char* inFileName,
2091                      const Display_endianess displayEndianess,
2092                      U32 strictMode,
2093                      U32 statusOnly,
2094                      U32 warn,
2095                      U32 quiet)
2096 {
2097     int result = 0;
2098     FILE* inFile = NULL;
2099     ParseFileArg parseFileArgBody;
2100     ParseFileArg* const parseFileArg = &parseFileArgBody;
2101     ParseFileReport* const report = &parseFileArg->report;
2102 
2103     /* note: stdinName is special constant pointer.  It is not a string. */
2104     if (inFileName == stdinName) {
2105         /*
2106          * Note: Since we expect text input for xxhash -c mode,
2107          * we don't set binary mode for stdin.
2108          */
2109         inFileName = "stdin";
2110         inFile = stdin;
2111     } else {
2112         inFile = XXH_fopen( inFileName, "rt" );
2113     }
2114 
2115     if (inFile == NULL) {
2116         DISPLAY("Error: Could not open '%s': %s\n", inFileName, strerror(errno));
2117         return 0;
2118     }
2119 
2120     parseFileArg->inFileName  = inFileName;
2121     parseFileArg->inFile      = inFile;
2122     parseFileArg->lineMax     = DEFAULT_LINE_LENGTH;
2123     parseFileArg->lineBuf     = (char*) malloc((size_t)parseFileArg->lineMax);
2124     parseFileArg->blockSize   = 64 * 1024;
2125     parseFileArg->blockBuf    = (char*) malloc(parseFileArg->blockSize);
2126     parseFileArg->strictMode  = strictMode;
2127     parseFileArg->statusOnly  = statusOnly;
2128     parseFileArg->warn        = warn;
2129     parseFileArg->quiet       = quiet;
2130 
2131     if ( (parseFileArg->lineBuf == NULL)
2132       || (parseFileArg->blockBuf == NULL) ) {
2133         DISPLAY("Error: : memory allocation failed \n");
2134         exit(1);
2135     }
2136     parseFile1(parseFileArg, displayEndianess != big_endian);
2137 
2138     free(parseFileArg->blockBuf);
2139     free(parseFileArg->lineBuf);
2140 
2141     if (inFile != stdin) fclose(inFile);
2142 
2143     /* Show error/warning messages.  All messages are copied from md5sum.c
2144      */
2145     if (report->nProperlyFormattedLines == 0) {
2146         DISPLAY("%s: no properly formatted xxHash checksum lines found\n", inFileName);
2147     } else if (!statusOnly) {
2148         if (report->nImproperlyFormattedLines) {
2149             DISPLAYRESULT("%lu %s improperly formatted\n"
2150                 , report->nImproperlyFormattedLines
2151                 , report->nImproperlyFormattedLines == 1 ? "line is" : "lines are");
2152         }
2153         if (report->nOpenOrReadFailures) {
2154             DISPLAYRESULT("%lu listed %s could not be read\n"
2155                 , report->nOpenOrReadFailures
2156                 , report->nOpenOrReadFailures == 1 ? "file" : "files");
2157         }
2158         if (report->nMismatchedChecksums) {
2159             DISPLAYRESULT("%lu computed %s did NOT match\n"
2160                 , report->nMismatchedChecksums
2161                 , report->nMismatchedChecksums == 1 ? "checksum" : "checksums");
2162     }   }
2163 
2164     /* Result (exit) code logic is copied from
2165      * gnu coreutils/src/md5sum.c digest_check() */
2166     result =   report->nProperlyFormattedLines != 0
2167             && report->nMismatchedChecksums == 0
2168             && report->nOpenOrReadFailures == 0
2169             && (!strictMode || report->nImproperlyFormattedLines == 0)
2170             && report->quit == 0;
2171     return result;
2172 }
2173 
2174 
checkFiles(const char * const * fnList,int fnTotal,const Display_endianess displayEndianess,U32 strictMode,U32 statusOnly,U32 warn,U32 quiet)2175 static int checkFiles(const char*const* fnList, int fnTotal,
2176                       const Display_endianess displayEndianess,
2177                       U32 strictMode,
2178                       U32 statusOnly,
2179                       U32 warn,
2180                       U32 quiet)
2181 {
2182     int ok = 1;
2183 
2184     /* Special case for stdinName "-",
2185      * note: stdinName is not a string.  It's special pointer. */
2186     if (fnTotal==0) {
2187         ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
2188     } else {
2189         int fnNb;
2190         for (fnNb=0; fnNb<fnTotal; fnNb++)
2191             ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
2192     }
2193     return ok ? 0 : 1;
2194 }
2195 
2196 
2197 /* ********************************************************
2198 *  Main
2199 **********************************************************/
2200 
usage(const char * exename)2201 static int usage(const char* exename)
2202 {
2203     DISPLAY( WELCOME_MESSAGE(exename) );
2204     DISPLAY( "Print or verify checksums using fast non-cryptographic algorithm xxHash \n\n" );
2205     DISPLAY( "Usage: %s [options] [files] \n\n", exename);
2206     DISPLAY( "When no filename provided or when '-' is provided, uses stdin as input. \n");
2207     DISPLAY( "Options: \n");
2208     DISPLAY( "  -H#         algorithm selection: 0,1,2 or 32,64,128 (default: %i) \n", (int)g_defaultAlgo);
2209     DISPLAY( "  -c, --check read xxHash checksum from [files] and check them \n");
2210     DISPLAY( "  -h, --help  display a long help page about advanced options \n");
2211     return 0;
2212 }
2213 
2214 
usage_advanced(const char * exename)2215 static int usage_advanced(const char* exename)
2216 {
2217     usage(exename);
2218     DISPLAY( "Advanced :\n");
2219     DISPLAY( "  -V, --version        Display version information \n");
2220     DISPLAY( "      --tag            Produce BSD-style checksum lines \n");
2221     DISPLAY( "      --little-endian  Checksum values use little endian convention (default: big endian) \n");
2222     DISPLAY( "  -b                   Run benchmark \n");
2223     DISPLAY( "  -b#                  Bench only algorithm variant # \n");
2224     DISPLAY( "  -i#                  Number of times to run the benchmark (default: %u) \n", (unsigned)g_nbIterations);
2225     DISPLAY( "  -q, --quiet          Don't display version header in benchmark mode \n");
2226     DISPLAY( "\n");
2227     DISPLAY( "The following four options are useful only when verifying checksums (-c): \n");
2228     DISPLAY( "  -q, --quiet          Don't print OK for each successfully verified file \n");
2229     DISPLAY( "      --status         Don't output anything, status code shows success \n");
2230     DISPLAY( "      --strict         Exit non-zero for improperly formatted checksum lines \n");
2231     DISPLAY( "      --warn           Warn about improperly formatted checksum lines \n");
2232     return 0;
2233 }
2234 
badusage(const char * exename)2235 static int badusage(const char* exename)
2236 {
2237     DISPLAY("Wrong parameters\n\n");
2238     usage(exename);
2239     return 1;
2240 }
2241 
errorOut(const char * msg)2242 static void errorOut(const char* msg)
2243 {
2244     DISPLAY("%s \n", msg); exit(1);
2245 }
2246 
lastNameFromPath(const char * path)2247 static const char* lastNameFromPath(const char* path)
2248 {
2249     const char* name = path;
2250     if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
2251     if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
2252     return name;
2253 }
2254 
2255 /*!
2256  * readU32FromCharChecked():
2257  * @return 0 if success, and store the result in *value.
2258  * Allows and interprets K, KB, KiB, M, MB and MiB suffix.
2259  * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2260  * @return 1 if an overflow error occurs
2261  */
readU32FromCharChecked(const char ** stringPtr,U32 * value)2262 static int readU32FromCharChecked(const char** stringPtr, U32* value)
2263 {
2264     static const U32 max = (((U32)(-1)) / 10) - 1;
2265     U32 result = 0;
2266     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2267         if (result > max) return 1; /* overflow error */
2268         result *= 10;
2269         result += (U32)(**stringPtr - '0');
2270         (*stringPtr)++ ;
2271     }
2272     if ((**stringPtr=='K') || (**stringPtr=='M')) {
2273         U32 const maxK = ((U32)(-1)) >> 10;
2274         if (result > maxK) return 1; /* overflow error */
2275         result <<= 10;
2276         if (**stringPtr=='M') {
2277             if (result > maxK) return 1; /* overflow error */
2278             result <<= 10;
2279         }
2280         (*stringPtr)++;  /* skip `K` or `M` */
2281         if (**stringPtr=='i') (*stringPtr)++;
2282         if (**stringPtr=='B') (*stringPtr)++;
2283     }
2284     *value = result;
2285     return 0;
2286 }
2287 
2288 /*!
2289  * readU32FromChar():
2290  * @return: unsigned integer value read from input in `char` format.
2291  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
2292  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2293  *  Note: function will exit() program if digit sequence overflows
2294  */
readU32FromChar(const char ** stringPtr)2295 static U32 readU32FromChar(const char** stringPtr) {
2296     U32 result;
2297     if (readU32FromCharChecked(stringPtr, &result)) {
2298         static const char errorMsg[] = "Error: numeric value too large";
2299         errorOut(errorMsg);
2300     }
2301     return result;
2302 }
2303 
XXH_main(int argc,const char * const * argv)2304 static int XXH_main(int argc, const char* const* argv)
2305 {
2306     int i, filenamesStart = 0;
2307     const char* const exename = lastNameFromPath(argv[0]);
2308     U32 benchmarkMode = 0;
2309     U32 fileCheckMode = 0;
2310     U32 strictMode    = 0;
2311     U32 statusOnly    = 0;
2312     U32 warn          = 0;
2313     int explicitStdin = 0;
2314     U32 selectBenchIDs= 0;  /* 0 == use default k_testIDs_default, kBenchAll == bench all */
2315     static const U32 kBenchAll = 99;
2316     size_t keySize    = XXH_DEFAULT_SAMPLE_SIZE;
2317     AlgoSelected algo     = g_defaultAlgo;
2318     Display_endianess displayEndianess = big_endian;
2319     Display_convention convention = display_gnu;
2320 
2321     /* special case: xxhNNsum default to NN bits checksum */
2322     if (strstr(exename,  "xxh32sum") != NULL) algo = g_defaultAlgo = algo_xxh32;
2323     if (strstr(exename,  "xxh64sum") != NULL) algo = g_defaultAlgo = algo_xxh64;
2324     if (strstr(exename, "xxh128sum") != NULL) algo = g_defaultAlgo = algo_xxh128;
2325 
2326     for (i=1; i<argc; i++) {
2327         const char* argument = argv[i];
2328         assert(argument != NULL);
2329 
2330         if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
2331         if (!strcmp(argument, "--benchmark-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
2332         if (!strcmp(argument, "--bench-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
2333         if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
2334         if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
2335         if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
2336         if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
2337         if (!strcmp(argument, "--warn")) { warn = 1; continue; }
2338         if (!strcmp(argument, "--help")) { return usage_advanced(exename); }
2339         if (!strcmp(argument, "--version")) { DISPLAY(FULL_WELCOME_MESSAGE(exename)); BMK_sanityCheck(); return 0; }
2340         if (!strcmp(argument, "--tag")) { convention = display_bsd; continue; }
2341 
2342         if (!strcmp(argument, "--")) {
2343             if (filenamesStart==0 && i!=argc-1) filenamesStart=i+1; /* only supports a continuous list of filenames */
2344             break;  /* treat rest of arguments as strictly file names */
2345         }
2346         if (*argument != '-') {
2347             if (filenamesStart==0) filenamesStart=i;   /* only supports a continuous list of filenames */
2348             break;  /* treat rest of arguments as strictly file names */
2349         }
2350 
2351         /* command selection */
2352         argument++;   /* note: *argument=='-' */
2353         if (*argument == 0) explicitStdin = 1;
2354 
2355         while (*argument != 0) {
2356             switch(*argument)
2357             {
2358             /* Display version */
2359             case 'V':
2360                 DISPLAY(FULL_WELCOME_MESSAGE(exename)); return 0;
2361 
2362             /* Display help on usage */
2363             case 'h':
2364                 return usage_advanced(exename);
2365 
2366             /* select hash algorithm */
2367             case 'H': argument++;
2368                 switch(readU32FromChar(&argument)) {
2369                     case 0 :
2370                     case 32: algo = algo_xxh32; break;
2371                     case 1 :
2372                     case 64: algo = algo_xxh64; break;
2373                     case 2 :
2374                     case 128: algo = algo_xxh128; break;
2375                     default:
2376                         return badusage(exename);
2377                 }
2378                 break;
2379 
2380             /* File check mode */
2381             case 'c':
2382                 fileCheckMode=1;
2383                 argument++;
2384                 break;
2385 
2386             /* Warning mode (file check mode only, alias of "--warning") */
2387             case 'w':
2388                 warn=1;
2389                 argument++;
2390                 break;
2391 
2392             /* Trigger benchmark mode */
2393             case 'b':
2394                 argument++;
2395                 benchmarkMode = 1;
2396                 do {
2397                     if (*argument == ',') argument++;
2398                     selectBenchIDs = readU32FromChar(&argument); /* select one specific test */
2399                     if (selectBenchIDs < NB_TESTFUNC) {
2400                         g_testIDs[selectBenchIDs] = 1;
2401                     } else
2402                         selectBenchIDs = kBenchAll;
2403                 } while (*argument == ',');
2404                 break;
2405 
2406             /* Modify Nb Iterations (benchmark only) */
2407             case 'i':
2408                 argument++;
2409                 g_nbIterations = readU32FromChar(&argument);
2410                 break;
2411 
2412             /* Modify Block size (benchmark only) */
2413             case 'B':
2414                 argument++;
2415                 keySize = readU32FromChar(&argument);
2416                 break;
2417 
2418             /* Modify verbosity of benchmark output (hidden option) */
2419             case 'q':
2420                 argument++;
2421                 g_displayLevel--;
2422                 break;
2423 
2424             default:
2425                 return badusage(exename);
2426             }
2427         }
2428     }   /* for(i=1; i<argc; i++) */
2429 
2430     /* Check benchmark mode */
2431     if (benchmarkMode) {
2432         DISPLAYLEVEL(2, FULL_WELCOME_MESSAGE(exename) );
2433         BMK_sanityCheck();
2434         if (selectBenchIDs == 0) memcpy(g_testIDs, k_testIDs_default, sizeof(g_testIDs));
2435         if (selectBenchIDs == kBenchAll) memset(g_testIDs, 1, sizeof(g_testIDs));
2436         if (filenamesStart==0) return BMK_benchInternal(keySize);
2437         return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart);
2438     }
2439 
2440     /* Check if input is defined as console; trigger an error in this case */
2441     if ( (filenamesStart==0) && IS_CONSOLE(stdin) && !explicitStdin)
2442         return badusage(exename);
2443 
2444     if (filenamesStart==0) filenamesStart = argc;
2445     if (fileCheckMode) {
2446         return checkFiles(argv+filenamesStart, argc-filenamesStart,
2447                           displayEndianess, strictMode, statusOnly, warn, (g_displayLevel < 2) /*quiet*/);
2448     } else {
2449         return XSUM_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess, convention);
2450     }
2451 }
2452 
2453 /* Windows main wrapper which properly handles UTF-8 command line arguments. */
2454 #ifdef _WIN32
2455 /* Converts a UTF-16 argv to UTF-8. */
convert_argv(int argc,const wchar_t * const utf16_argv[])2456 static char** convert_argv(int argc, const wchar_t* const utf16_argv[])
2457 {
2458     char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*));
2459     if (utf8_argv != NULL) {
2460         int i;
2461         for (i = 0; i < argc; i++) {
2462             utf8_argv[i] = utf16_to_utf8(utf16_argv[i]);
2463         }
2464         utf8_argv[argc] = NULL;
2465     }
2466     return utf8_argv;
2467 }
2468 /* Frees arguments returned by convert_argv */
free_argv(int argc,char ** argv)2469 static void free_argv(int argc, char** argv)
2470 {
2471     int i;
2472     if (argv == NULL) {
2473         return;
2474     }
2475     for (i = 0; i < argc; i++) {
2476         free(argv[i]);
2477     }
2478     free(argv);
2479 }
2480 
2481 
2482 /*
2483  * On Windows, main's argv parameter is useless. Instead of UTF-8, you get ANSI
2484  * encoding, and any unknown characters will show up as mojibake.
2485  *
2486  * While this doesn't affect most programs, what does happen is that we can't
2487  * open any files with Unicode filenames.
2488  *
2489  * We instead convert wmain's arguments to UTF-8, preserving Unicode arguments.
2490  *
2491  * This function is wrapped by `__wgetmainargs()` and `main()` below on MinGW
2492  * with Unicode disabled, but if possible, we try to use `wmain()`.
2493  */
XXH_wmain(int argc,const wchar_t * const utf16_argv[])2494 static int XXH_wmain(int argc, const wchar_t* const utf16_argv[])
2495 {
2496     /* Convert the UTF-16 arguments to UTF-8. */
2497     char** utf8_argv = convert_argv(argc, utf16_argv);
2498 
2499     if (utf8_argv == NULL) {
2500         /* An unfortunate but incredibly unlikely error, */
2501         fprintf(stderr, "Error converting command line arguments!\n");
2502         return 1;
2503     } else {
2504         int ret;
2505 
2506         /*
2507          * MinGW's terminal uses full block buffering for stderr.
2508          *
2509          * This is nonstandard behavior and causes text to not display until
2510          * the buffer fills.
2511          *
2512          * `setvbuf()` can easily correct this to make text display instantly.
2513          */
2514         setvbuf(stderr, NULL, _IONBF, 0);
2515 
2516         /* Call our real main function */
2517         ret = XXH_main(argc, (const char* const *) utf8_argv);
2518 
2519         /* Cleanup */
2520         free_argv(argc, utf8_argv);
2521         return ret;
2522     }
2523 }
2524 
2525 #if defined(_MSC_VER)                     /* MSVC always accepts wmain */ \
2526  || defined(_UNICODE) || defined(UNICODE) /* defined with -municode on MinGW-w64 */
2527 
2528 /* Preferred: Use the real `wmain()`. */
2529 #if defined(__cplusplus)
2530 extern "C"
2531 #endif
wmain(int argc,const wchar_t * utf16_argv[])2532 int wmain(int argc, const wchar_t* utf16_argv[])
2533 {
2534     return XXH_wmain(argc, utf16_argv);
2535 }
2536 
2537 #else /* Non-Unicode MinGW */
2538 
2539 /*
2540  * Wrap `XXH_wmain()` using `main()` and `__wgetmainargs()` on MinGW without
2541  * Unicode support.
2542  *
2543  * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for
2544  * `wmain()`, so we use it on MinGW to emulate `wmain()`.
2545  *
2546  * It is an internal function and not declared in any public headers, so we
2547  * have to declare it manually.
2548  *
2549  * An alternative that doesn't mess with internal APIs is `GetCommandLineW()`
2550  * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the
2551  * latter requires linking to Shell32.dll and its numerous dependencies.
2552  *
2553  * This method keeps our dependencies to kernel32.dll and the CRT.
2554  *
2555  * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019
2556  */
2557 typedef struct {
2558     int newmode;
2559 } _startupinfo;
2560 
2561 #ifdef __cplusplus
2562 extern "C"
2563 #endif
2564 int __cdecl __wgetmainargs(
2565     int*          Argc,
2566     wchar_t***    Argv,
2567     wchar_t***    Env,
2568     int           DoWildCard,
2569     _startupinfo* StartInfo
2570 );
2571 
main(int ansi_argc,const char * ansi_argv[])2572 int main(int ansi_argc, const char* ansi_argv[])
2573 {
2574     int       utf16_argc;
2575     wchar_t** utf16_argv;
2576     wchar_t** utf16_envp;         /* Unused but required */
2577     _startupinfo startinfo = {0}; /* 0 == don't change new mode */
2578 
2579     /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */
2580     if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0)
2581         /* In the very unlikely case of an error, use the ANSI arguments. */
2582         return XXH_main(ansi_argc, ansi_argv);
2583 
2584     /* Call XXH_wmain with our UTF-16 arguments */
2585     return XXH_wmain(utf16_argc, (const wchar_t* const *)utf16_argv);
2586 }
2587 
2588 #endif /* Non-Unicode MinGW */
2589 
2590 #else /* Not Windows */
2591 
2592 /* Wrap main normally on non-Windows platforms. */
main(int argc,const char * argv[])2593 int main(int argc, const char* argv[])
2594 {
2595     return XXH_main(argc, argv);
2596 }
2597 #endif /* !Windows */
2598