1 /*
2 * xxhsum - Command line interface for xxhash algorithms
3 * Copyright (C) 2013-2020 Yann Collet
4 *
5 * GPL v2 License
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * You can contact the author at:
22 * - xxHash homepage: https://www.xxhash.com
23 * - xxHash source repository: https://github.com/Cyan4973/xxHash
24 */
25
26 /*
27 * xxhsum:
28 * Provides hash value of a file content, or a list of files, or stdin
29 * Display convention is Big Endian, for both 32 and 64 bits algorithms
30 */
31
32
33 /* ************************************
34 * Compiler Options
35 **************************************/
36 /* MS Visual */
37 #if defined(_MSC_VER) || defined(_WIN32)
38 # ifndef _CRT_SECURE_NO_WARNINGS
39 # define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */
40 # endif
41 #endif
42
43 /* Under Linux at least, pull in the *64 commands */
44 #ifndef _LARGEFILE64_SOURCE
45 # define _LARGEFILE64_SOURCE
46 #endif
47
48 /* ************************************
49 * Includes
50 **************************************/
51 #include <limits.h>
52 #include <stdlib.h> /* malloc, calloc, free, exit */
53 #include <string.h> /* strcmp, memcpy */
54 #include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
55 #include <sys/types.h> /* stat, stat64, _stat64 */
56 #include <sys/stat.h> /* stat, stat64, _stat64 */
57 #include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
58 #include <assert.h> /* assert */
59 #include <errno.h> /* errno */
60
61 #define XXH_STATIC_LINKING_ONLY /* *_state_t */
62 #include "xxhash.h"
63
64 #ifdef XXHSUM_DISPATCH
65 # include "xxh_x86dispatch.h"
66 #endif
67
68
69 /* ************************************
70 * OS-Specific Includes
71 **************************************/
72 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
73 || defined(__midipix__) || defined(__VMS))
74 # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
75 || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
76 # define PLATFORM_POSIX_VERSION 200112L
77 # else
78 # if defined(__linux__) || defined(__linux)
79 # ifndef _POSIX_C_SOURCE
80 # define _POSIX_C_SOURCE 200112L /* use feature test macro */
81 # endif
82 # endif
83 # include <unistd.h> /* declares _POSIX_VERSION */
84 # if defined(_POSIX_VERSION) /* POSIX compliant */
85 # define PLATFORM_POSIX_VERSION _POSIX_VERSION
86 # else
87 # define PLATFORM_POSIX_VERSION 0
88 # endif
89 # endif
90 #endif
91 #if !defined(PLATFORM_POSIX_VERSION)
92 # define PLATFORM_POSIX_VERSION -1
93 #endif
94
95 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) \
96 || (PLATFORM_POSIX_VERSION >= 200112L) \
97 || defined(__DJGPP__) \
98 || defined(__MSYS__)
99 # include <unistd.h> /* isatty */
100 # define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
101 #elif defined(MSDOS) || defined(OS2)
102 # include <io.h> /* _isatty */
103 # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
104 #elif defined(WIN32) || defined(_WIN32)
105 # include <io.h> /* _isatty */
106 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
107 # include <stdio.h> /* FILE */
IS_CONSOLE(FILE * stdStream)108 static __inline int IS_CONSOLE(FILE* stdStream) {
109 DWORD dummy;
110 return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
111 }
112 #else
113 # define IS_CONSOLE(stdStream) 0
114 #endif
115
116 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
117 # include <fcntl.h> /* _O_BINARY */
118 # include <io.h> /* _setmode, _fileno, _get_osfhandle */
119 # if !defined(__DJGPP__)
120 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
121 # include <winioctl.h> /* FSCTL_SET_SPARSE */
122 # define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
123 # else
124 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
125 # endif
126 #else
127 # define SET_BINARY_MODE(file)
128 #endif
129
130 #if !defined(S_ISREG)
131 # define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
132 #endif
133
134 /* Unicode helpers for Windows to make UTF-8 act as it should. */
135 #ifdef _WIN32
136 /*
137 * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards.
138 * This version allows keeping the output length.
139 */
utf8_to_utf16_len(const char * str,int * lenOut)140 static wchar_t* utf8_to_utf16_len(const char* str, int* lenOut)
141 {
142 int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
143 if (lenOut != NULL) *lenOut = len;
144 if (len == 0) return NULL;
145 { wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t));
146 if (buf != NULL) {
147 if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) {
148 free(buf);
149 return NULL;
150 } }
151 return buf;
152 }
153 }
154
155 /* Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards. */
utf8_to_utf16(const char * str)156 static wchar_t* utf8_to_utf16(const char *str)
157 {
158 return utf8_to_utf16_len(str, NULL);
159 }
160
161 /*
162 * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards.
163 * This version allows keeping the output length.
164 */
utf16_to_utf8_len(const wchar_t * str,int * lenOut)165 static char* utf16_to_utf8_len(const wchar_t *str, int *lenOut)
166 {
167 int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
168 if (lenOut != NULL) *lenOut = len;
169 if (len == 0) return NULL;
170 { char* const buf = (char*)malloc((size_t)len * sizeof(char));
171 if (buf != NULL) {
172 if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) {
173 free(buf);
174 return NULL;
175 } }
176 return buf;
177 }
178 }
179
180 /* Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards. */
utf16_to_utf8(const wchar_t * str)181 static char *utf16_to_utf8(const wchar_t *str)
182 {
183 return utf16_to_utf8_len(str, NULL);
184 }
185
186 /*
187 * fopen wrapper that supports UTF-8
188 *
189 * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames.
190 *
191 * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen.
192 */
XXH_fopen_wrapped(const char * filename,const wchar_t * mode)193 static FILE* XXH_fopen_wrapped(const char *filename, const wchar_t *mode)
194 {
195 wchar_t* const wide_filename = utf8_to_utf16(filename);
196 if (wide_filename == NULL) return NULL;
197 { FILE* const f = _wfopen(wide_filename, mode);
198 free(wide_filename);
199 return f;
200 }
201 }
202
203 /*
204 * In case it isn't available, this is what MSVC 2019 defines in stdarg.h.
205 */
206 #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy)
207 # define va_copy(destination, source) ((destination) = (source))
208 #endif
209
210 /*
211 * fprintf wrapper that supports UTF-8.
212 *
213 * fprintf doesn't properly handle Unicode on Windows.
214 *
215 * Additionally, it is codepage sensitive on console and may crash the program.
216 *
217 * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16
218 * for console output and use the codepage-independent WriteConsoleW.
219 *
220 * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423
221 */
fprintf_utf8(FILE * stream,const char * format,...)222 static int fprintf_utf8(FILE *stream, const char *format, ...)
223 {
224 int result;
225 va_list args;
226 va_list copy;
227
228 va_start(args, format);
229
230 /*
231 * To be safe, make a va_copy.
232 *
233 * Note that Microsoft doesn't use va_copy in its sample code:
234 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019
235 */
236 va_copy(copy, args);
237 /* Counts the number of characters needed for vsnprintf. */
238 result = _vscprintf(format, copy);
239 va_end(copy);
240
241 if (result > 0) {
242 /* Create a buffer for vsnprintf */
243 const size_t nchar = (size_t)result + 1;
244 char* u8_str = (char*)malloc(nchar * sizeof(u8_str[0]));
245
246 if (u8_str == NULL) {
247 result = -1;
248 } else {
249 /* Generate the UTF-8 string with vsnprintf. */
250 result = _vsnprintf(u8_str, nchar - 1, format, args);
251 u8_str[nchar - 1] = '\0';
252 if (result > 0) {
253 /*
254 * Check if we are outputting to a console. Don't use IS_CONSOLE
255 * directly -- we don't need to call _get_osfhandle twice.
256 */
257 int fileNb = _fileno(stream);
258 intptr_t handle_raw = _get_osfhandle(fileNb);
259 HANDLE handle = (HANDLE)handle_raw;
260 DWORD dwTemp;
261
262 if (handle_raw < 0) {
263 result = -1;
264 } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) {
265 /*
266 * Convert to UTF-16 and output with WriteConsoleW.
267 *
268 * This is codepage independent and works on Windows XP's
269 * default msvcrt.dll.
270 */
271 int len;
272 wchar_t *const u16_buf = utf8_to_utf16_len(u8_str, &len);
273 if (u16_buf == NULL) {
274 result = -1;
275 } else {
276 if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) {
277 result = (int)dwTemp;
278 } else {
279 result = -1;
280 }
281 free(u16_buf);
282 }
283 } else {
284 /* fwrite the UTF-8 string if we are printing to a file */
285 result = (int)fwrite(u8_str, 1, nchar - 1, stream);
286 if (result == 0) {
287 result = -1;
288 }
289 }
290 }
291 free(u8_str);
292 }
293 }
294 va_end(args);
295 return result;
296 }
297 /*
298 * Since we always use literals in the "mode" argument, it is just easier to append "L" to
299 * the string to make it UTF-16 and avoid the hassle of a second manual conversion.
300 */
301 # define XXH_fopen(filename, mode) XXH_fopen_wrapped(filename, L##mode)
302 #else
303 # define XXH_fopen(filename, mode) fopen(filename, mode)
304 #endif
305
306 /* ************************************
307 * Basic Types
308 **************************************/
309 #if defined(__cplusplus) /* C++ */ \
310 || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) /* C99 */
311 # include <stdint.h>
312 typedef uint8_t U8;
313 typedef uint32_t U32;
314 typedef uint64_t U64;
315 # else
316 # include <limits.h>
317 typedef unsigned char U8;
318 # if UINT_MAX == 0xFFFFFFFFUL
319 typedef unsigned int U32;
320 # else
321 typedef unsigned long U32;
322 # endif
323 typedef unsigned long long U64;
324 #endif /* not C++/C99 */
325
BMK_isLittleEndian(void)326 static unsigned BMK_isLittleEndian(void)
327 {
328 const union { U32 u; U8 c[4]; } one = { 1 }; /* don't use static: performance detrimental */
329 return one.c[0];
330 }
331
332
333 /* *************************************
334 * Constants
335 ***************************************/
336 #define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE
337 #define QUOTE(str) #str
338 #define EXPAND_AND_QUOTE(str) QUOTE(str)
339 #define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION)
340
341 /* Show compiler versions in WELCOME_MESSAGE. CC_VERSION_FMT will return the printf specifiers,
342 * and VERSION will contain the comma separated list of arguments to the CC_VERSION_FMT string. */
343 #if defined(__clang_version__)
344 /* Clang does its own thing. */
345 # ifdef __apple_build_version__
346 # define CC_VERSION_FMT "Apple Clang %s"
347 # else
348 # define CC_VERSION_FMT "Clang %s"
349 # endif
350 # define CC_VERSION __clang_version__
351 #elif defined(__VERSION__)
352 /* GCC and ICC */
353 # define CC_VERSION_FMT "%s"
354 # ifdef __INTEL_COMPILER /* icc adds its prefix */
355 # define CC_VERSION __VERSION__
356 # else /* assume GCC */
357 # define CC_VERSION "GCC " __VERSION__
358 # endif
359 #elif defined(_MSC_FULL_VER) && defined(_MSC_BUILD)
360 /*
361 * MSVC
362 * "For example, if the version number of the Visual C++ compiler is
363 * 15.00.20706.01, the _MSC_FULL_VER macro evaluates to 150020706."
364 *
365 * https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=vs-2017
366 */
367 # define CC_VERSION_FMT "MSVC %02i.%02i.%05i.%02i"
368 # define CC_VERSION _MSC_FULL_VER / 10000000 % 100, _MSC_FULL_VER / 100000 % 100, _MSC_FULL_VER % 100000, _MSC_BUILD
369 #elif defined(__TINYC__)
370 /* tcc stores its version in the __TINYC__ macro. */
371 # define CC_VERSION_FMT "tcc %i.%i.%i"
372 # define CC_VERSION __TINYC__ / 10000 % 100, __TINYC__ / 100 % 100, __TINYC__ % 100
373 #else
374 # define CC_VERSION_FMT "%s"
375 # define CC_VERSION "unknown compiler"
376 #endif
377
378 /* makes the next part easier */
379 #if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
380 # define ARCH_X64 1
381 # define ARCH_X86 "x86_64"
382 #elif defined(__i386__) || defined(_M_IX86) || defined(_M_IX86_FP)
383 # define ARCH_X86 "i386"
384 #endif
385
386 /* Try to detect the architecture. */
387 #if defined(ARCH_X86)
388 # if defined(XXHSUM_DISPATCH)
389 # define ARCH ARCH_X86 " autoVec"
390 # elif defined(__AVX512F__)
391 # define ARCH ARCH_X86 " + AVX512"
392 # elif defined(__AVX2__)
393 # define ARCH ARCH_X86 " + AVX2"
394 # elif defined(__AVX__)
395 # define ARCH ARCH_X86 " + AVX"
396 # elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) \
397 || defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
398 # define ARCH ARCH_X86 " + SSE2"
399 # else
400 # define ARCH ARCH_X86
401 # endif
402 #elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
403 # define ARCH "aarch64 + NEON"
404 #elif defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(_M_ARM)
405 /* ARM has a lot of different features that can change xxHash significantly. */
406 # if defined(__thumb2__) || (defined(__thumb__) && (__thumb__ == 2 || __ARM_ARCH >= 7))
407 # define ARCH_THUMB " Thumb-2"
408 # elif defined(__thumb__)
409 # define ARCH_THUMB " Thumb-1"
410 # else
411 # define ARCH_THUMB ""
412 # endif
413 /* ARMv7 has unaligned by default */
414 # if defined(__ARM_FEATURE_UNALIGNED) || __ARM_ARCH >= 7 || defined(_M_ARMV7VE)
415 # define ARCH_UNALIGNED " + unaligned"
416 # else
417 # define ARCH_UNALIGNED ""
418 # endif
419 # if defined(__ARM_NEON) || defined(__ARM_NEON__)
420 # define ARCH_NEON " + NEON"
421 # else
422 # define ARCH_NEON ""
423 # endif
424 # define ARCH "ARMv" EXPAND_AND_QUOTE(__ARM_ARCH) ARCH_THUMB ARCH_NEON ARCH_UNALIGNED
425 #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
426 # if defined(__GNUC__) && defined(__POWER9_VECTOR__)
427 # define ARCH "ppc64 + POWER9 vector"
428 # elif defined(__GNUC__) && defined(__POWER8_VECTOR__)
429 # define ARCH "ppc64 + POWER8 vector"
430 # else
431 # define ARCH "ppc64"
432 # endif
433 #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
434 # define ARCH "ppc"
435 #elif defined(__AVR)
436 # define ARCH "AVR"
437 #elif defined(__mips64)
438 # define ARCH "mips64"
439 #elif defined(__mips)
440 # define ARCH "mips"
441 #elif defined(__s390x__)
442 # define ARCH "s390x"
443 #elif defined(__s390__)
444 # define ARCH "s390"
445 #else
446 # define ARCH "unknown"
447 #endif
448
449 static const int g_nbBits = (int)(sizeof(void*)*8);
450 static const char g_lename[] = "little endian";
451 static const char g_bename[] = "big endian";
452 #define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
453 static const char author[] = "Yann Collet";
454 #define WELCOME_MESSAGE(exename) "%s %s by %s \n", exename, PROGRAM_VERSION, author
455 #define FULL_WELCOME_MESSAGE(exename) "%s %s by %s \n" \
456 "compiled as %i-bit %s %s with " CC_VERSION_FMT " \n", \
457 exename, PROGRAM_VERSION, author, \
458 g_nbBits, ARCH, ENDIAN_NAME, CC_VERSION
459
460 #define KB *( 1<<10)
461 #define MB *( 1<<20)
462 #define GB *(1U<<30)
463
464 static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB;
465 #define NBLOOPS 3 /* Default number of benchmark iterations */
466 #define TIMELOOP_S 1
467 #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* target timing per iteration */
468 #define TIMELOOP_MIN (TIMELOOP / 2) /* minimum timing to validate a result */
469 #define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */
470 #define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */
471
472 #define MAX_MEM (2 GB - 64 MB)
473
474 static const char stdinName[] = "-";
475 typedef enum { algo_xxh32=0, algo_xxh64=1, algo_xxh128=2 } AlgoSelected;
476 static AlgoSelected g_defaultAlgo = algo_xxh64; /* required within main() & usage() */
477
478 /* <16 hex char> <SPC> <SPC> <filename> <'\0'>
479 * '4096' is typical Linux PATH_MAX configuration. */
480 #define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
481
482 /* Maximum acceptable line length. */
483 #define MAX_LINE_LENGTH (32 KB)
484
485
486 /* ************************************
487 * Display macros
488 **************************************/
489 #ifdef _WIN32
490 #define DISPLAY(...) fprintf_utf8(stderr, __VA_ARGS__)
491 #define DISPLAYRESULT(...) fprintf_utf8(stdout, __VA_ARGS__)
492 #else
493 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
494 #define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__)
495 #endif
496
497 #define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
498 static int g_displayLevel = 2;
499
500
501 /* ************************************
502 * Local variables
503 **************************************/
504 static U32 g_nbIterations = NBLOOPS;
505
506
507 /* ************************************
508 * Benchmark Functions
509 **************************************/
BMK_clockSpan(clock_t start)510 static clock_t BMK_clockSpan( clock_t start )
511 {
512 return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
513 }
514
515
BMK_findMaxMem(U64 requiredMem)516 static size_t BMK_findMaxMem(U64 requiredMem)
517 {
518 size_t const step = 64 MB;
519 void* testmem = NULL;
520
521 requiredMem = (((requiredMem >> 26) + 1) << 26);
522 requiredMem += 2*step;
523 if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
524
525 while (!testmem) {
526 if (requiredMem > step) requiredMem -= step;
527 else requiredMem >>= 1;
528 testmem = malloc ((size_t)requiredMem);
529 }
530 free (testmem);
531
532 /* keep some space available */
533 if (requiredMem > step) requiredMem -= step;
534 else requiredMem >>= 1;
535
536 return (size_t)requiredMem;
537 }
538
539
BMK_GetFileSize(const char * infilename)540 static U64 BMK_GetFileSize(const char* infilename)
541 {
542 int r;
543 #if defined(_MSC_VER)
544 struct _stat64 statbuf;
545 r = _stat64(infilename, &statbuf);
546 #else
547 struct stat statbuf;
548 r = stat(infilename, &statbuf);
549 #endif
550 if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
551 return (U64)statbuf.st_size;
552 }
553
554 /*
555 * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
556 * The result must be freed.
557 */
XXH_strcatDup(const char * s1,const char * s2)558 static char* XXH_strcatDup(const char* s1, const char* s2)
559 {
560 assert(s1 != NULL);
561 assert(s2 != NULL);
562 { size_t len1 = strlen(s1);
563 size_t len2 = strlen(s2);
564 char* buf = (char*)malloc(len1 + len2 + 1);
565 if (buf != NULL) {
566 /* strcpy(buf, s1) */
567 memcpy(buf, s1, len1);
568 /* strcat(buf, s2) */
569 memcpy(buf + len1, s2, len2 + 1);
570 }
571 return buf;
572 }
573 }
574
575
576 /* use #define to make them constant, required for initialization */
577 #define PRIME32 2654435761U
578 #define PRIME64 11400714785074694797ULL
579
580 /*
581 * Fills a test buffer with pseudorandom data.
582 *
583 * This is used in the sanity check - its values must not be changed.
584 */
BMK_fillTestBuffer(U8 * buffer,size_t len)585 static void BMK_fillTestBuffer(U8* buffer, size_t len)
586 {
587 U64 byteGen = PRIME32;
588 size_t i;
589
590 assert(buffer != NULL);
591
592 for (i=0; i<len; i++) {
593 buffer[i] = (U8)(byteGen>>56);
594 byteGen *= PRIME64;
595 }
596 }
597
598 /*
599 * A secret buffer used for benchmarking XXH3's withSecret variants.
600 *
601 * In order for the bench to be realistic, the secret buffer would need to be
602 * pre-generated.
603 *
604 * Adding a pointer to the parameter list would be messy.
605 */
606 static U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
607
608 /*
609 * Wrappers for the benchmark.
610 *
611 * If you would like to add other hashes to the bench, create a wrapper and add
612 * it to the g_hashesToBench table. It will automatically be added.
613 */
614 typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed);
615
localXXH32(const void * buffer,size_t bufferSize,U32 seed)616 static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed)
617 {
618 return XXH32(buffer, bufferSize, seed);
619 }
localXXH64(const void * buffer,size_t bufferSize,U32 seed)620 static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed)
621 {
622 return (U32)XXH64(buffer, bufferSize, seed);
623 }
localXXH3_64b(const void * buffer,size_t bufferSize,U32 seed)624 static U32 localXXH3_64b(const void* buffer, size_t bufferSize, U32 seed)
625 {
626 (void)seed;
627 return (U32)XXH3_64bits(buffer, bufferSize);
628 }
localXXH3_64b_seeded(const void * buffer,size_t bufferSize,U32 seed)629 static U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, U32 seed)
630 {
631 return (U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
632 }
localXXH3_64b_secret(const void * buffer,size_t bufferSize,U32 seed)633 static U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, U32 seed)
634 {
635 (void)seed;
636 return (U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
637 }
localXXH3_128b(const void * buffer,size_t bufferSize,U32 seed)638 static U32 localXXH3_128b(const void* buffer, size_t bufferSize, U32 seed)
639 {
640 (void)seed;
641 return (U32)(XXH3_128bits(buffer, bufferSize).low64);
642 }
localXXH3_128b_seeded(const void * buffer,size_t bufferSize,U32 seed)643 static U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, U32 seed)
644 {
645 return (U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
646 }
localXXH3_128b_secret(const void * buffer,size_t bufferSize,U32 seed)647 static U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, U32 seed)
648 {
649 (void)seed;
650 return (U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
651 }
localXXH3_stream(const void * buffer,size_t bufferSize,U32 seed)652 static U32 localXXH3_stream(const void* buffer, size_t bufferSize, U32 seed)
653 {
654 XXH3_state_t state;
655 (void)seed;
656 XXH3_64bits_reset(&state);
657 XXH3_64bits_update(&state, buffer, bufferSize);
658 return (U32)XXH3_64bits_digest(&state);
659 }
localXXH3_stream_seeded(const void * buffer,size_t bufferSize,U32 seed)660 static U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, U32 seed)
661 {
662 XXH3_state_t state;
663 XXH3_INITSTATE(&state);
664 XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
665 XXH3_64bits_update(&state, buffer, bufferSize);
666 return (U32)XXH3_64bits_digest(&state);
667 }
localXXH128_stream(const void * buffer,size_t bufferSize,U32 seed)668 static U32 localXXH128_stream(const void* buffer, size_t bufferSize, U32 seed)
669 {
670 XXH3_state_t state;
671 (void)seed;
672 XXH3_128bits_reset(&state);
673 XXH3_128bits_update(&state, buffer, bufferSize);
674 return (U32)(XXH3_128bits_digest(&state).low64);
675 }
localXXH128_stream_seeded(const void * buffer,size_t bufferSize,U32 seed)676 static U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, U32 seed)
677 {
678 XXH3_state_t state;
679 XXH3_INITSTATE(&state);
680 XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
681 XXH3_128bits_update(&state, buffer, bufferSize);
682 return (U32)(XXH3_128bits_digest(&state).low64);
683 }
684
685
686 typedef struct {
687 const char* name;
688 hashFunction func;
689 } hashInfo;
690
691 #define NB_HASHFUNC 12
692 static const hashInfo g_hashesToBench[NB_HASHFUNC] = {
693 { "XXH32", &localXXH32 },
694 { "XXH64", &localXXH64 },
695 { "XXH3_64b", &localXXH3_64b },
696 { "XXH3_64b w/seed", &localXXH3_64b_seeded },
697 { "XXH3_64b w/secret", &localXXH3_64b_secret },
698 { "XXH128", &localXXH3_128b },
699 { "XXH128 w/seed", &localXXH3_128b_seeded },
700 { "XXH128 w/secret", &localXXH3_128b_secret },
701 { "XXH3_stream", &localXXH3_stream },
702 { "XXH3_stream w/seed",&localXXH3_stream_seeded },
703 { "XXH128_stream", &localXXH128_stream },
704 { "XXH128_stream w/seed",&localXXH128_stream_seeded },
705 };
706
707 #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
708 static char g_testIDs[NB_TESTFUNC] = { 0 };
709 static const char k_testIDs_default[NB_TESTFUNC] = { 0,
710 1 /*XXH32*/, 0,
711 1 /*XXH64*/, 0,
712 1 /*XXH3*/, 0, 0, 0, 0, 0,
713 1 /*XXH128*/ };
714
715 #define HASHNAME_MAX 29
BMK_benchHash(hashFunction h,const char * hName,int testID,const void * buffer,size_t bufferSize)716 static void BMK_benchHash(hashFunction h, const char* hName, int testID,
717 const void* buffer, size_t bufferSize)
718 {
719 U32 nbh_perIteration = (U32)((300 MB) / (bufferSize+1)) + 1; /* first iteration conservatively aims for 300 MB/s */
720 unsigned iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
721 double fastestH = 100000000.;
722 assert(HASHNAME_MAX > 2);
723 DISPLAYLEVEL(2, "\r%80s\r", ""); /* Clean display line */
724
725 for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
726 U32 r=0;
727 clock_t cStart;
728
729 DISPLAYLEVEL(2, "%2u-%-*.*s : %10u ->\r",
730 iterationNb,
731 HASHNAME_MAX, HASHNAME_MAX, hName,
732 (unsigned)bufferSize);
733 cStart = clock();
734 while (clock() == cStart); /* starts clock() at its exact beginning */
735 cStart = clock();
736
737 { U32 u;
738 for (u=0; u<nbh_perIteration; u++)
739 r += h(buffer, bufferSize, u);
740 }
741 if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to defeat compiler "optimizing" hash away */
742
743 { clock_t const nbTicks = BMK_clockSpan(cStart);
744 double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
745 /*
746 * clock() is the only decent portable timer, but it isn't very
747 * precise.
748 *
749 * Sometimes, this lack of precision is enough that the benchmark
750 * finishes before there are enough ticks to get a meaningful result.
751 *
752 * For example, on a Core 2 Duo (without any sort of Turbo Boost),
753 * the imprecise timer caused peculiar results like so:
754 *
755 * XXH3_64b 4800.0 MB/s // conveniently even
756 * XXH3_64b unaligned 4800.0 MB/s
757 * XXH3_64b seeded 9600.0 MB/s // magical 2x speedup?!
758 * XXH3_64b seeded unaligned 4800.0 MB/s
759 *
760 * If we sense a suspiciously low number of ticks, we increase the
761 * iterations until we can get something meaningful.
762 */
763 if (nbTicks < TIMELOOP_MIN) {
764 /* Not enough time spent in benchmarking, risk of rounding bias */
765 if (nbTicks == 0) { /* faster than resolution timer */
766 nbh_perIteration *= 100;
767 } else {
768 /*
769 * update nbh_perIteration so that the next round lasts
770 * approximately 1 second.
771 */
772 double nbh_perSecond = (1 / ticksPerHash) + 1;
773 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
774 nbh_perIteration = (U32)nbh_perSecond;
775 }
776 /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
777 if (g_nbIterations>0) {
778 iterationNb--; /* new round for a more accurate speed evaluation */
779 continue;
780 }
781 }
782 if (ticksPerHash < fastestH) fastestH = ticksPerHash;
783 if (fastestH>0.) { /* avoid div by zero */
784 DISPLAYLEVEL(2, "%2u-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
785 iterationNb,
786 HASHNAME_MAX, HASHNAME_MAX, hName,
787 (unsigned)bufferSize,
788 (double)1 / fastestH,
789 ((double)bufferSize / (1 MB)) / fastestH);
790 } }
791 { double nbh_perSecond = (1 / fastestH) + 1;
792 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
793 nbh_perIteration = (U32)nbh_perSecond;
794 }
795 }
796 DISPLAYLEVEL(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
797 testID,
798 HASHNAME_MAX, HASHNAME_MAX, hName,
799 (unsigned)bufferSize,
800 (double)1 / fastestH,
801 ((double)bufferSize / (1 MB)) / fastestH);
802 if (g_displayLevel<1)
803 DISPLAYLEVEL(0, "%u, ", (unsigned)((double)1 / fastestH));
804 }
805
806
807 /*!
808 * BMK_benchMem():
809 * buffer: Must be 16-byte aligned.
810 * The real allocated size of buffer is supposed to be >= (bufferSize+3).
811 * returns: 0 on success, 1 if error (invalid mode selected)
812 */
BMK_benchMem(const void * buffer,size_t bufferSize)813 static void BMK_benchMem(const void* buffer, size_t bufferSize)
814 {
815 assert((((size_t)buffer) & 15) == 0); /* ensure alignment */
816 BMK_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
817 { int i;
818 for (i = 1; i < NB_TESTFUNC; i++) {
819 int const hashFuncID = (i-1) / 2;
820 assert(g_hashesToBench[hashFuncID].name != NULL);
821 if (g_testIDs[i] == 0) continue;
822 /* aligned */
823 if ((i % 2) == 1) {
824 BMK_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
825 }
826 /* unaligned */
827 if ((i % 2) == 0) {
828 /* Append "unaligned". */
829 char* const hashNameBuf = XXH_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
830 assert(hashNameBuf != NULL);
831 BMK_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
832 free(hashNameBuf);
833 }
834 } }
835 }
836
BMK_selectBenchedSize(const char * fileName)837 static size_t BMK_selectBenchedSize(const char* fileName)
838 {
839 U64 const inFileSize = BMK_GetFileSize(fileName);
840 size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize);
841 if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
842 if (benchedSize < inFileSize) {
843 DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
844 }
845 return benchedSize;
846 }
847
848
BMK_benchFiles(const char * const * fileNamesTable,int nbFiles)849 static int BMK_benchFiles(const char*const* fileNamesTable, int nbFiles)
850 {
851 int fileIdx;
852 for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
853 const char* const inFileName = fileNamesTable[fileIdx];
854 assert(inFileName != NULL);
855
856 { FILE* const inFile = XXH_fopen( inFileName, "rb" );
857 size_t const benchedSize = BMK_selectBenchedSize(inFileName);
858 char* const buffer = (char*)calloc(benchedSize+16+3, 1);
859 void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
860
861 /* Checks */
862 if (inFile==NULL){
863 DISPLAY("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
864 free(buffer);
865 exit(11);
866 }
867 if(!buffer) {
868 DISPLAY("\nError: Out of memory.\n");
869 fclose(inFile);
870 exit(12);
871 }
872
873 /* Fill input buffer */
874 { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
875 fclose(inFile);
876 if(readSize != benchedSize) {
877 DISPLAY("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
878 free(buffer);
879 exit(13);
880 } }
881
882 /* bench */
883 BMK_benchMem(alignedBuffer, benchedSize);
884
885 free(buffer);
886 } }
887 return 0;
888 }
889
890
BMK_benchInternal(size_t keySize)891 static int BMK_benchInternal(size_t keySize)
892 {
893 void* const buffer = calloc(keySize+16+3, 1);
894 if (buffer == NULL) {
895 DISPLAY("\nError: Out of memory.\n");
896 exit(12);
897 }
898
899 { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
900
901 /* bench */
902 DISPLAYLEVEL(1, "Sample of ");
903 if (keySize > 10 KB) {
904 DISPLAYLEVEL(1, "%u KB", (unsigned)(keySize >> 10));
905 } else {
906 DISPLAYLEVEL(1, "%u bytes", (unsigned)keySize);
907 }
908 DISPLAYLEVEL(1, "... \n");
909
910 BMK_benchMem(alignedBuffer, keySize);
911 free(buffer);
912 }
913 return 0;
914 }
915
916
917 /* ************************************************
918 * Self-test:
919 * ensure results consistency accross platforms
920 *********************************************** */
921
BMK_checkResult32(XXH32_hash_t r1,XXH32_hash_t r2)922 static void BMK_checkResult32(XXH32_hash_t r1, XXH32_hash_t r2)
923 {
924 static int nbTests = 1;
925 if (r1!=r2) {
926 DISPLAY("\rError: 32-bit hash test %i: Internal sanity check failed!\n", nbTests);
927 DISPLAY("\rGot 0x%08X, expected 0x%08X.\n", (unsigned)r1, (unsigned)r2);
928 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
929 "or temporarily comment out the tests in BMK_sanityCheck.\n");
930 exit(1);
931 }
932 nbTests++;
933 }
934
BMK_checkResult64(XXH64_hash_t r1,XXH64_hash_t r2)935 static void BMK_checkResult64(XXH64_hash_t r1, XXH64_hash_t r2)
936 {
937 static int nbTests = 1;
938 if (r1!=r2) {
939 DISPLAY("\rError: 64-bit hash test %i: Internal sanity check failed!\n", nbTests);
940 DISPLAY("\rGot 0x%08X%08XULL, expected 0x%08X%08XULL.\n",
941 (unsigned)(r1>>32), (unsigned)r1, (unsigned)(r2>>32), (unsigned)r2);
942 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
943 "or temporarily comment out the tests in BMK_sanityCheck.\n");
944 exit(1);
945 }
946 nbTests++;
947 }
948
BMK_checkResult128(XXH128_hash_t r1,XXH128_hash_t r2)949 static void BMK_checkResult128(XXH128_hash_t r1, XXH128_hash_t r2)
950 {
951 static int nbTests = 1;
952 if ((r1.low64 != r2.low64) || (r1.high64 != r2.high64)) {
953 DISPLAY("\rError: 128-bit hash test %i: Internal sanity check failed.\n", nbTests);
954 DISPLAY("\rGot { 0x%08X%08XULL, 0x%08X%08XULL }, expected { 0x%08X%08XULL, 0x%08X%08XULL } \n",
955 (unsigned)(r1.low64>>32), (unsigned)r1.low64, (unsigned)(r1.high64>>32), (unsigned)r1.high64,
956 (unsigned)(r2.low64>>32), (unsigned)r2.low64, (unsigned)(r2.high64>>32), (unsigned)r2.high64 );
957 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
958 "or temporarily comment out the tests in BMK_sanityCheck.\n");
959 exit(1);
960 }
961 nbTests++;
962 }
963
964
BMK_testXXH32(const void * data,size_t len,U32 seed,U32 Nresult)965 static void BMK_testXXH32(const void* data, size_t len, U32 seed, U32 Nresult)
966 {
967 XXH32_state_t *state = XXH32_createState();
968 size_t pos;
969
970 assert(state != NULL);
971 if (len>0) assert(data != NULL);
972
973 BMK_checkResult32(XXH32(data, len, seed), Nresult);
974
975 (void)XXH32_reset(state, seed);
976 (void)XXH32_update(state, data, len);
977 BMK_checkResult32(XXH32_digest(state), Nresult);
978
979 (void)XXH32_reset(state, seed);
980 for (pos=0; pos<len; pos++)
981 (void)XXH32_update(state, ((const char*)data)+pos, 1);
982 BMK_checkResult32(XXH32_digest(state), Nresult);
983 XXH32_freeState(state);
984 }
985
BMK_testXXH64(const void * data,size_t len,U64 seed,U64 Nresult)986 static void BMK_testXXH64(const void* data, size_t len, U64 seed, U64 Nresult)
987 {
988 XXH64_state_t *state = XXH64_createState();
989 size_t pos;
990
991 assert(state != NULL);
992 if (len>0) assert(data != NULL);
993
994 BMK_checkResult64(XXH64(data, len, seed), Nresult);
995
996 (void)XXH64_reset(state, seed);
997 (void)XXH64_update(state, data, len);
998 BMK_checkResult64(XXH64_digest(state), Nresult);
999
1000 (void)XXH64_reset(state, seed);
1001 for (pos=0; pos<len; pos++)
1002 (void)XXH64_update(state, ((const char*)data)+pos, 1);
1003 BMK_checkResult64(XXH64_digest(state), Nresult);
1004 XXH64_freeState(state);
1005 }
1006
BMK_rand(void)1007 static U32 BMK_rand(void)
1008 {
1009 static U64 seed = PRIME32;
1010 seed *= PRIME64;
1011 return (U32)(seed >> 40);
1012 }
1013
1014
BMK_testXXH3(const void * data,size_t len,U64 seed,U64 Nresult)1015 void BMK_testXXH3(const void* data, size_t len, U64 seed, U64 Nresult)
1016 {
1017 if (len>0) assert(data != NULL);
1018
1019 { U64 const Dresult = XXH3_64bits_withSeed(data, len, seed);
1020 BMK_checkResult64(Dresult, Nresult);
1021 }
1022
1023 /* check that the no-seed variant produces same result as seed==0 */
1024 if (seed == 0) {
1025 U64 const Dresult = XXH3_64bits(data, len);
1026 BMK_checkResult64(Dresult, Nresult);
1027 }
1028
1029 /* streaming API test */
1030 { XXH3_state_t* const state = XXH3_createState();
1031 assert(state != NULL);
1032 /* single ingestion */
1033 (void)XXH3_64bits_reset_withSeed(state, seed);
1034 (void)XXH3_64bits_update(state, data, len);
1035 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1036
1037 /* random ingestion */
1038 { size_t p = 0;
1039 (void)XXH3_64bits_reset_withSeed(state, seed);
1040 while (p < len) {
1041 size_t const modulo = len > 2 ? len : 2;
1042 size_t l = (size_t)(BMK_rand()) % modulo;
1043 if (p + l > len) l = len - p;
1044 (void)XXH3_64bits_update(state, (const char*)data+p, l);
1045 p += l;
1046 }
1047 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1048 }
1049
1050 /* byte by byte ingestion */
1051 { size_t pos;
1052 (void)XXH3_64bits_reset_withSeed(state, seed);
1053 for (pos=0; pos<len; pos++)
1054 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1);
1055 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1056 }
1057 XXH3_freeState(state);
1058 }
1059 }
1060
BMK_testXXH3_withSecret(const void * data,size_t len,const void * secret,size_t secretSize,U64 Nresult)1061 void BMK_testXXH3_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, U64 Nresult)
1062 {
1063 if (len>0) assert(data != NULL);
1064
1065 { U64 const Dresult = XXH3_64bits_withSecret(data, len, secret, secretSize);
1066 BMK_checkResult64(Dresult, Nresult);
1067 }
1068
1069 /* streaming API test */
1070 { XXH3_state_t *state = XXH3_createState();
1071 assert(state != NULL);
1072 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1073 (void)XXH3_64bits_update(state, data, len);
1074 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1075
1076 /* random ingestion */
1077 { size_t p = 0;
1078 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1079 while (p < len) {
1080 size_t const modulo = len > 2 ? len : 2;
1081 size_t l = (size_t)(BMK_rand()) % modulo;
1082 if (p + l > len) l = len - p;
1083 (void)XXH3_64bits_update(state, (const char*)data+p, l);
1084 p += l;
1085 }
1086 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1087 }
1088
1089 /* byte by byte ingestion */
1090 { size_t pos;
1091 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize);
1092 for (pos=0; pos<len; pos++)
1093 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1);
1094 BMK_checkResult64(XXH3_64bits_digest(state), Nresult);
1095 }
1096 XXH3_freeState(state);
1097 }
1098 }
1099
BMK_testXXH128(const void * data,size_t len,U64 seed,XXH128_hash_t Nresult)1100 void BMK_testXXH128(const void* data, size_t len, U64 seed, XXH128_hash_t Nresult)
1101 {
1102 { XXH128_hash_t const Dresult = XXH3_128bits_withSeed(data, len, seed);
1103 BMK_checkResult128(Dresult, Nresult);
1104 }
1105
1106 /* check that XXH128() is identical to XXH3_128bits_withSeed() */
1107 { XXH128_hash_t const Dresult2 = XXH128(data, len, seed);
1108 BMK_checkResult128(Dresult2, Nresult);
1109 }
1110
1111 /* check that the no-seed variant produces same result as seed==0 */
1112 if (seed == 0) {
1113 XXH128_hash_t const Dresult = XXH3_128bits(data, len);
1114 BMK_checkResult128(Dresult, Nresult);
1115 }
1116
1117 /* streaming API test */
1118 { XXH3_state_t *state = XXH3_createState();
1119 assert(state != NULL);
1120
1121 /* single ingestion */
1122 (void)XXH3_128bits_reset_withSeed(state, seed);
1123 (void)XXH3_128bits_update(state, data, len);
1124 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1125
1126 /* random ingestion */
1127 { size_t p = 0;
1128 (void)XXH3_128bits_reset_withSeed(state, seed);
1129 while (p < len) {
1130 size_t const modulo = len > 2 ? len : 2;
1131 size_t l = (size_t)(BMK_rand()) % modulo;
1132 if (p + l > len) l = len - p;
1133 (void)XXH3_128bits_update(state, (const char*)data+p, l);
1134 p += l;
1135 }
1136 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1137 }
1138
1139 /* byte by byte ingestion */
1140 { size_t pos;
1141 (void)XXH3_128bits_reset_withSeed(state, seed);
1142 for (pos=0; pos<len; pos++)
1143 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1);
1144 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1145 }
1146 XXH3_freeState(state);
1147 }
1148 }
1149
BMK_testXXH128_withSecret(const void * data,size_t len,const void * secret,size_t secretSize,XXH128_hash_t Nresult)1150 void BMK_testXXH128_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, XXH128_hash_t Nresult)
1151 {
1152 if (len>0) assert(data != NULL);
1153
1154 { XXH128_hash_t const Dresult = XXH3_128bits_withSecret(data, len, secret, secretSize);
1155 BMK_checkResult128(Dresult, Nresult);
1156 }
1157
1158 /* streaming API test */
1159 { XXH3_state_t* const state = XXH3_createState();
1160 assert(state != NULL);
1161 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1162 (void)XXH3_128bits_update(state, data, len);
1163 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1164
1165 /* random ingestion */
1166 { size_t p = 0;
1167 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1168 while (p < len) {
1169 size_t const modulo = len > 2 ? len : 2;
1170 size_t l = (size_t)(BMK_rand()) % modulo;
1171 if (p + l > len) l = len - p;
1172 (void)XXH3_128bits_update(state, (const char*)data+p, l);
1173 p += l;
1174 }
1175 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1176 }
1177
1178 /* byte by byte ingestion */
1179 { size_t pos;
1180 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize);
1181 for (pos=0; pos<len; pos++)
1182 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1);
1183 BMK_checkResult128(XXH3_128bits_digest(state), Nresult);
1184 }
1185 XXH3_freeState(state);
1186 }
1187 }
1188
1189 #define SECRET_SAMPLE_NBBYTES 4
1190 typedef struct { U8 byte[SECRET_SAMPLE_NBBYTES]; } verifSample_t;
1191
BMK_testSecretGenerator(const void * customSeed,size_t len,verifSample_t result)1192 void BMK_testSecretGenerator(const void* customSeed, size_t len, verifSample_t result)
1193 {
1194 static int nbTests = 1;
1195 const int sampleIndex[SECRET_SAMPLE_NBBYTES] = { 0, 62, 131, 191};
1196 U8 secretBuffer[XXH3_SECRET_DEFAULT_SIZE] = {0};
1197 verifSample_t samples;
1198 int i;
1199
1200 XXH3_generateSecret(secretBuffer, customSeed, len);
1201 for (i=0; i<SECRET_SAMPLE_NBBYTES; i++) {
1202 samples.byte[i] = secretBuffer[sampleIndex[i]];
1203 }
1204 if (memcmp(&samples, &result, sizeof(result))) {
1205 DISPLAY("\rError: Secret generation test %i: Internal sanity check failed. \n", nbTests);
1206 DISPLAY("\rGot { 0x%02X, 0x%02X, 0x%02X, 0x%02X }, expected { 0x%02X, 0x%02X, 0x%02X, 0x%02X } \n",
1207 samples.byte[0], samples.byte[1], samples.byte[2], samples.byte[3],
1208 result.byte[0], result.byte[1], result.byte[2], result.byte[3] );
1209 exit(1);
1210 }
1211 nbTests++;
1212 }
1213
1214
1215 /*!
1216 * BMK_sanityCheck():
1217 * Runs a sanity check before the benchmark.
1218 *
1219 * Exits on an incorrect output.
1220 */
BMK_sanityCheck(void)1221 static void BMK_sanityCheck(void)
1222 {
1223 #define SANITY_BUFFER_SIZE 2367
1224 U8 sanityBuffer[SANITY_BUFFER_SIZE];
1225 BMK_fillTestBuffer(sanityBuffer, sizeof(sanityBuffer));
1226
1227 BMK_testXXH32(NULL, 0, 0, 0x02CC5D05);
1228 BMK_testXXH32(NULL, 0, PRIME32, 0x36B78AE7);
1229 BMK_testXXH32(sanityBuffer, 1, 0, 0xCF65B03E);
1230 BMK_testXXH32(sanityBuffer, 1, PRIME32, 0xB4545AA4);
1231 BMK_testXXH32(sanityBuffer, 14, 0, 0x1208E7E2);
1232 BMK_testXXH32(sanityBuffer, 14, PRIME32, 0x6AF1D1FE);
1233 BMK_testXXH32(sanityBuffer,222, 0, 0x5BD11DBD);
1234 BMK_testXXH32(sanityBuffer,222, PRIME32, 0x58803C5F);
1235
1236 BMK_testXXH64(NULL , 0, 0, 0xEF46DB3751D8E999ULL);
1237 BMK_testXXH64(NULL , 0, PRIME32, 0xAC75FDA2929B17EFULL);
1238 BMK_testXXH64(sanityBuffer, 1, 0, 0xE934A84ADB052768ULL);
1239 BMK_testXXH64(sanityBuffer, 1, PRIME32, 0x5014607643A9B4C3ULL);
1240 BMK_testXXH64(sanityBuffer, 4, 0, 0x9136A0DCA57457EEULL);
1241 BMK_testXXH64(sanityBuffer, 14, 0, 0x8282DCC4994E35C8ULL);
1242 BMK_testXXH64(sanityBuffer, 14, PRIME32, 0xC3BD6BF63DEB6DF0ULL);
1243 BMK_testXXH64(sanityBuffer,222, 0, 0xB641AE8CB691C174ULL);
1244 BMK_testXXH64(sanityBuffer,222, PRIME32, 0x20CB8AB7AE10C14AULL);
1245
1246 BMK_testXXH3(NULL, 0, 0, 0x2D06800538D394C2ULL); /* empty string */
1247 BMK_testXXH3(NULL, 0, PRIME64, 0xA8A6B918B2F0364AULL);
1248 BMK_testXXH3(sanityBuffer, 1, 0, 0xC44BDFF4074EECDBULL); /* 1 - 3 */
1249 BMK_testXXH3(sanityBuffer, 1, PRIME64, 0x032BE332DD766EF8ULL); /* 1 - 3 */
1250 BMK_testXXH3(sanityBuffer, 6, 0, 0x27B56A84CD2D7325ULL); /* 4 - 8 */
1251 BMK_testXXH3(sanityBuffer, 6, PRIME64, 0x84589C116AB59AB9ULL); /* 4 - 8 */
1252 BMK_testXXH3(sanityBuffer, 12, 0, 0xA713DAF0DFBB77E7ULL); /* 9 - 16 */
1253 BMK_testXXH3(sanityBuffer, 12, PRIME64, 0xE7303E1B2336DE0EULL); /* 9 - 16 */
1254 BMK_testXXH3(sanityBuffer, 24, 0, 0xA3FE70BF9D3510EBULL); /* 17 - 32 */
1255 BMK_testXXH3(sanityBuffer, 24, PRIME64, 0x850E80FC35BDD690ULL); /* 17 - 32 */
1256 BMK_testXXH3(sanityBuffer, 48, 0, 0x397DA259ECBA1F11ULL); /* 33 - 64 */
1257 BMK_testXXH3(sanityBuffer, 48, PRIME64, 0xADC2CBAA44ACC616ULL); /* 33 - 64 */
1258 BMK_testXXH3(sanityBuffer, 80, 0, 0xBCDEFBBB2C47C90AULL); /* 65 - 96 */
1259 BMK_testXXH3(sanityBuffer, 80, PRIME64, 0xC6DD0CB699532E73ULL); /* 65 - 96 */
1260 BMK_testXXH3(sanityBuffer, 195, 0, 0xCD94217EE362EC3AULL); /* 129-240 */
1261 BMK_testXXH3(sanityBuffer, 195, PRIME64, 0xBA68003D370CB3D9ULL); /* 129-240 */
1262
1263 BMK_testXXH3(sanityBuffer, 403, 0, 0xCDEB804D65C6DEA4ULL); /* one block, last stripe is overlapping */
1264 BMK_testXXH3(sanityBuffer, 403, PRIME64, 0x6259F6ECFD6443FDULL); /* one block, last stripe is overlapping */
1265 BMK_testXXH3(sanityBuffer, 512, 0, 0x617E49599013CB6BULL); /* one block, finishing at stripe boundary */
1266 BMK_testXXH3(sanityBuffer, 512, PRIME64, 0x3CE457DE14C27708ULL); /* one block, finishing at stripe boundary */
1267 BMK_testXXH3(sanityBuffer,2048, 0, 0xDD59E2C3A5F038E0ULL); /* 2 blocks, finishing at block boundary */
1268 BMK_testXXH3(sanityBuffer,2048, PRIME64, 0x66F81670669ABABCULL); /* 2 blocks, finishing at block boundary */
1269 BMK_testXXH3(sanityBuffer,2240, 0, 0x6E73A90539CF2948ULL); /* 3 blocks, finishing at stripe boundary */
1270 BMK_testXXH3(sanityBuffer,2240, PRIME64, 0x757BA8487D1B5247ULL); /* 3 blocks, finishing at stripe boundary */
1271 BMK_testXXH3(sanityBuffer,2367, 0, 0xCB37AEB9E5D361EDULL); /* 3 blocks, last stripe is overlapping */
1272 BMK_testXXH3(sanityBuffer,2367, PRIME64, 0xD2DB3415B942B42AULL); /* 3 blocks, last stripe is overlapping */
1273
1274 /* XXH3 with Custom Secret */
1275 { const void* const secret = sanityBuffer + 7;
1276 const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11;
1277 assert(sizeof(sanityBuffer) >= 7 + secretSize);
1278 BMK_testXXH3_withSecret(NULL, 0, secret, secretSize, 0x3559D64878C5C66CULL); /* empty string */
1279 BMK_testXXH3_withSecret(sanityBuffer, 1, secret, secretSize, 0x8A52451418B2DA4DULL); /* 1 - 3 */
1280 BMK_testXXH3_withSecret(sanityBuffer, 6, secret, secretSize, 0x82C90AB0519369ADULL); /* 4 - 8 */
1281 BMK_testXXH3_withSecret(sanityBuffer, 12, secret, secretSize, 0x14631E773B78EC57ULL); /* 9 - 16 */
1282 BMK_testXXH3_withSecret(sanityBuffer, 24, secret, secretSize, 0xCDD5542E4A9D9FE8ULL); /* 17 - 32 */
1283 BMK_testXXH3_withSecret(sanityBuffer, 48, secret, secretSize, 0x33ABD54D094B2534ULL); /* 33 - 64 */
1284 BMK_testXXH3_withSecret(sanityBuffer, 80, secret, secretSize, 0xE687BA1684965297ULL); /* 65 - 96 */
1285 BMK_testXXH3_withSecret(sanityBuffer, 195, secret, secretSize, 0xA057273F5EECFB20ULL); /* 129-240 */
1286
1287 BMK_testXXH3_withSecret(sanityBuffer, 403, secret, secretSize, 0x14546019124D43B8ULL); /* one block, last stripe is overlapping */
1288 BMK_testXXH3_withSecret(sanityBuffer, 512, secret, secretSize, 0x7564693DD526E28DULL); /* one block, finishing at stripe boundary */
1289 BMK_testXXH3_withSecret(sanityBuffer,2048, secret, secretSize, 0xD32E975821D6519FULL); /* >= 2 blocks, at least one scrambling */
1290 BMK_testXXH3_withSecret(sanityBuffer,2367, secret, secretSize, 0x293FA8E5173BB5E7ULL); /* >= 2 blocks, at least one scrambling, last stripe unaligned */
1291
1292 BMK_testXXH3_withSecret(sanityBuffer,64*10*3, secret, secretSize, 0x751D2EC54BC6038BULL); /* exactly 3 full blocks, not a multiple of 256 */
1293 }
1294
1295 /* XXH128 */
1296 { XXH128_hash_t const expected = { 0x6001C324468D497FULL, 0x99AA06D3014798D8ULL };
1297 BMK_testXXH128(NULL, 0, 0, expected); /* empty string */
1298 }
1299 { XXH128_hash_t const expected = { 0x5444F7869C671AB0ULL, 0x92220AE55E14AB50ULL };
1300 BMK_testXXH128(NULL, 0, PRIME32, expected);
1301 }
1302 { XXH128_hash_t const expected = { 0xC44BDFF4074EECDBULL, 0xA6CD5E9392000F6AULL };
1303 BMK_testXXH128(sanityBuffer, 1, 0, expected); /* 1-3 */
1304 }
1305 { XXH128_hash_t const expected = { 0xB53D5557E7F76F8DULL, 0x89B99554BA22467CULL };
1306 BMK_testXXH128(sanityBuffer, 1, PRIME32, expected); /* 1-3 */
1307 }
1308 { XXH128_hash_t const expected = { 0x3E7039BDDA43CFC6ULL, 0x082AFE0B8162D12AULL };
1309 BMK_testXXH128(sanityBuffer, 6, 0, expected); /* 4-8 */
1310 }
1311 { XXH128_hash_t const expected = { 0x269D8F70BE98856EULL, 0x5A865B5389ABD2B1ULL };
1312 BMK_testXXH128(sanityBuffer, 6, PRIME32, expected); /* 4-8 */
1313 }
1314 { XXH128_hash_t const expected = { 0x061A192713F69AD9ULL, 0x6E3EFD8FC7802B18ULL };
1315 BMK_testXXH128(sanityBuffer, 12, 0, expected); /* 9-16 */
1316 }
1317 { XXH128_hash_t const expected = { 0x9BE9F9A67F3C7DFBULL, 0xD7E09D518A3405D3ULL };
1318 BMK_testXXH128(sanityBuffer, 12, PRIME32, expected); /* 9-16 */
1319 }
1320 { XXH128_hash_t const expected = { 0x1E7044D28B1B901DULL, 0x0CE966E4678D3761ULL };
1321 BMK_testXXH128(sanityBuffer, 24, 0, expected); /* 17-32 */
1322 }
1323 { XXH128_hash_t const expected = { 0xD7304C54EBAD40A9ULL, 0x3162026714A6A243ULL };
1324 BMK_testXXH128(sanityBuffer, 24, PRIME32, expected); /* 17-32 */
1325 }
1326 { XXH128_hash_t const expected = { 0xF942219AED80F67BULL, 0xA002AC4E5478227EULL };
1327 BMK_testXXH128(sanityBuffer, 48, 0, expected); /* 33-64 */
1328 }
1329 { XXH128_hash_t const expected = { 0x7BA3C3E453A1934EULL, 0x163ADDE36C072295ULL };
1330 BMK_testXXH128(sanityBuffer, 48, PRIME32, expected); /* 33-64 */
1331 }
1332 { XXH128_hash_t const expected = { 0x5E8BAFB9F95FB803ULL, 0x4952F58181AB0042ULL };
1333 BMK_testXXH128(sanityBuffer, 81, 0, expected); /* 65-96 */
1334 }
1335 { XXH128_hash_t const expected = { 0x703FBB3D7A5F755CULL, 0x2724EC7ADC750FB6ULL };
1336 BMK_testXXH128(sanityBuffer, 81, PRIME32, expected); /* 65-96 */
1337 }
1338 { XXH128_hash_t const expected = { 0xF1AEBD597CEC6B3AULL, 0x337E09641B948717ULL };
1339 BMK_testXXH128(sanityBuffer, 222, 0, expected); /* 129-240 */
1340 }
1341 { XXH128_hash_t const expected = { 0xAE995BB8AF917A8DULL, 0x91820016621E97F1ULL };
1342 BMK_testXXH128(sanityBuffer, 222, PRIME32, expected); /* 129-240 */
1343 }
1344 { XXH128_hash_t const expected = { 0xCDEB804D65C6DEA4ULL, 0x1B6DE21E332DD73DULL };
1345 BMK_testXXH128(sanityBuffer, 403, 0, expected); /* one block, last stripe is overlapping */
1346 }
1347 { XXH128_hash_t const expected = { 0x6259F6ECFD6443FDULL, 0xBED311971E0BE8F2ULL };
1348 BMK_testXXH128(sanityBuffer, 403, PRIME64, expected); /* one block, last stripe is overlapping */
1349 }
1350 { XXH128_hash_t const expected = { 0x617E49599013CB6BULL, 0x18D2D110DCC9BCA1ULL };
1351 BMK_testXXH128(sanityBuffer, 512, 0, expected); /* one block, finishing at stripe boundary */
1352 }
1353 { XXH128_hash_t const expected = { 0x3CE457DE14C27708ULL, 0x925D06B8EC5B8040ULL };
1354 BMK_testXXH128(sanityBuffer, 512, PRIME64, expected); /* one block, finishing at stripe boundary */
1355 }
1356 { XXH128_hash_t const expected = { 0xDD59E2C3A5F038E0ULL, 0xF736557FD47073A5ULL };
1357 BMK_testXXH128(sanityBuffer,2048, 0, expected); /* two blocks, finishing at block boundary */
1358 }
1359 { XXH128_hash_t const expected = { 0x230D43F30206260BULL, 0x7FB03F7E7186C3EAULL };
1360 BMK_testXXH128(sanityBuffer,2048, PRIME32, expected); /* two blocks, finishing at block boundary */
1361 }
1362 { XXH128_hash_t const expected = { 0x6E73A90539CF2948ULL, 0xCCB134FBFA7CE49DULL };
1363 BMK_testXXH128(sanityBuffer,2240, 0, expected); /* two blocks, ends at stripe boundary */
1364 }
1365 { XXH128_hash_t const expected = { 0xED385111126FBA6FULL, 0x50A1FE17B338995FULL };
1366 BMK_testXXH128(sanityBuffer,2240, PRIME32, expected); /* two blocks, ends at stripe boundary */
1367 }
1368 { XXH128_hash_t const expected = { 0xCB37AEB9E5D361EDULL, 0xE89C0F6FF369B427ULL };
1369 BMK_testXXH128(sanityBuffer,2367, 0, expected); /* two blocks, last stripe is overlapping */
1370 }
1371 { XXH128_hash_t const expected = { 0x6F5360AE69C2F406ULL, 0xD23AAE4B76C31ECBULL };
1372 BMK_testXXH128(sanityBuffer,2367, PRIME32, expected); /* two blocks, last stripe is overlapping */
1373 }
1374
1375 /* XXH128 with custom Secret */
1376 { const void* const secret = sanityBuffer + 7;
1377 const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11;
1378 assert(sizeof(sanityBuffer) >= 7 + secretSize);
1379
1380 { XXH128_hash_t const expected = { 0x005923CCEECBE8AEULL, 0x5F70F4EA232F1D38ULL };
1381 BMK_testXXH128_withSecret(NULL, 0, secret, secretSize, expected); /* empty string */
1382 }
1383 { XXH128_hash_t const expected = { 0x8A52451418B2DA4DULL, 0x3A66AF5A9819198EULL };
1384 BMK_testXXH128_withSecret(sanityBuffer, 1, secret, secretSize, expected); /* 1-3 */
1385 }
1386 { XXH128_hash_t const expected = { 0x0B61C8ACA7D4778FULL, 0x376BD91B6432F36DULL };
1387 BMK_testXXH128_withSecret(sanityBuffer, 6, secret, secretSize, expected); /* 4-8 */
1388 }
1389 { XXH128_hash_t const expected = { 0xAF82F6EBA263D7D8ULL, 0x90A3C2D839F57D0FULL };
1390 BMK_testXXH128_withSecret(sanityBuffer, 12, secret, secretSize, expected); /* 9-16 */
1391 }
1392 }
1393
1394 /* secret generator */
1395 { verifSample_t const expected = { { 0xB8, 0x26, 0x83, 0x7E } };
1396 BMK_testSecretGenerator(NULL, 0, expected);
1397 }
1398
1399 { verifSample_t const expected = { { 0xA6, 0x16, 0x06, 0x7B } };
1400 BMK_testSecretGenerator(sanityBuffer, 1, expected);
1401 }
1402
1403 { verifSample_t const expected = { { 0xDA, 0x2A, 0x12, 0x11 } };
1404 BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_SIZE_MIN - 1, expected);
1405 }
1406
1407 { verifSample_t const expected = { { 0x7E, 0x48, 0x0C, 0xA7 } };
1408 BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_DEFAULT_SIZE + 500, expected);
1409 }
1410
1411 DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */
1412 DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
1413 }
1414
1415
1416 /* ********************************************************
1417 * File Hashing
1418 **********************************************************/
1419 #if defined(_MSC_VER)
1420 typedef struct __stat64 stat_t;
1421 typedef int mode_t;
1422 #else
1423 typedef struct stat stat_t;
1424 #endif
1425
1426 #include <sys/types.h> /* struct stat / __start64 */
1427 #include <sys/stat.h> /* stat() / _stat64() */
1428
XSUM_isDirectory(const char * infilename)1429 int XSUM_isDirectory(const char* infilename)
1430 {
1431 stat_t statbuf;
1432 #if defined(_MSC_VER)
1433 int const r = _stat64(infilename, &statbuf);
1434 if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
1435 #else
1436 int const r = stat(infilename, &statbuf);
1437 if (!r && S_ISDIR(statbuf.st_mode)) return 1;
1438 #endif
1439 return 0;
1440 }
1441
1442 /* for support of --little-endian display mode */
BMK_display_LittleEndian(const void * ptr,size_t length)1443 static void BMK_display_LittleEndian(const void* ptr, size_t length)
1444 {
1445 const U8* const p = (const U8*)ptr;
1446 size_t idx;
1447 for (idx=length-1; idx<length; idx--) /* intentional underflow to negative to detect end */
1448 DISPLAYRESULT("%02x", p[idx]);
1449 }
1450
BMK_display_BigEndian(const void * ptr,size_t length)1451 static void BMK_display_BigEndian(const void* ptr, size_t length)
1452 {
1453 const U8* const p = (const U8*)ptr;
1454 size_t idx;
1455 for (idx=0; idx<length; idx++)
1456 DISPLAYRESULT("%02x", p[idx]);
1457 }
1458
1459 typedef union {
1460 XXH32_hash_t xxh32;
1461 XXH64_hash_t xxh64;
1462 XXH128_hash_t xxh128;
1463 } Multihash;
1464
1465 /*
1466 * XSUM_hashStream:
1467 * Reads data from `inFile`, generating an incremental hash of type hashType,
1468 * using `buffer` of size `blockSize` for temporary storage.
1469 */
1470 static Multihash
XSUM_hashStream(FILE * inFile,AlgoSelected hashType,void * buffer,size_t blockSize)1471 XSUM_hashStream(FILE* inFile,
1472 AlgoSelected hashType,
1473 void* buffer, size_t blockSize)
1474 {
1475 XXH32_state_t state32;
1476 XXH64_state_t state64;
1477 XXH3_state_t state128;
1478
1479 /* Init */
1480 (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
1481 (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
1482 (void)XXH3_128bits_reset(&state128);
1483
1484 /* Load file & update hash */
1485 { size_t readSize;
1486 while ((readSize = fread(buffer, 1, blockSize, inFile)) > 0) {
1487 switch(hashType)
1488 {
1489 case algo_xxh32:
1490 (void)XXH32_update(&state32, buffer, readSize);
1491 break;
1492 case algo_xxh64:
1493 (void)XXH64_update(&state64, buffer, readSize);
1494 break;
1495 case algo_xxh128:
1496 (void)XXH3_128bits_update(&state128, buffer, readSize);
1497 break;
1498 default:
1499 assert(0);
1500 }
1501 }
1502 if (ferror(inFile)) {
1503 DISPLAY("Error: a failure occurred reading the input file.\n");
1504 exit(1);
1505 } }
1506
1507 { Multihash finalHash = {0};
1508 switch(hashType)
1509 {
1510 case algo_xxh32:
1511 finalHash.xxh32 = XXH32_digest(&state32);
1512 break;
1513 case algo_xxh64:
1514 finalHash.xxh64 = XXH64_digest(&state64);
1515 break;
1516 case algo_xxh128:
1517 finalHash.xxh128 = XXH3_128bits_digest(&state128);
1518 break;
1519 default:
1520 assert(0);
1521 }
1522 return finalHash;
1523 }
1524 }
1525
1526 /* algo_xxh32, algo_xxh64, algo_xxh128 */
1527 static const char* XSUM_algoName[] = { "XXH32", "XXH64", "XXH128" };
1528 static const char* XSUM_algoLE_name[] = { "XXH32_LE", "XXH64_LE", "XXH128_LE" };
1529 static const size_t XSUM_algoLength[] = { 4, 8, 16 };
1530
1531 #define XSUM_TABLE_ELT_SIZE(table) (sizeof(table) / sizeof(*table))
1532
1533 typedef void (*XSUM_displayHash_f)(const void*, size_t); /* display function signature */
1534
XSUM_printLine_BSD_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,const char * algoString[],XSUM_displayHash_f f_displayHash)1535 static void XSUM_printLine_BSD_internal(const char* filename,
1536 const void* canonicalHash, const AlgoSelected hashType,
1537 const char* algoString[],
1538 XSUM_displayHash_f f_displayHash)
1539 {
1540 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
1541 { const char* const typeString = algoString[hashType];
1542 const size_t hashLength = XSUM_algoLength[hashType];
1543 DISPLAYRESULT("%s (%s) = ", typeString, filename);
1544 f_displayHash(canonicalHash, hashLength);
1545 DISPLAYRESULT("\n");
1546 } }
1547
XSUM_printLine_BSD_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1548 static void XSUM_printLine_BSD_LE(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
1549 {
1550 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoLE_name, BMK_display_LittleEndian);
1551 }
1552
XSUM_printLine_BSD(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1553 static void XSUM_printLine_BSD(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
1554 {
1555 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoName, BMK_display_BigEndian);
1556 }
1557
XSUM_printLine_GNU_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,XSUM_displayHash_f f_displayHash)1558 static void XSUM_printLine_GNU_internal(const char* filename,
1559 const void* canonicalHash, const AlgoSelected hashType,
1560 XSUM_displayHash_f f_displayHash)
1561 {
1562 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
1563 { const size_t hashLength = XSUM_algoLength[hashType];
1564 f_displayHash(canonicalHash, hashLength);
1565 DISPLAYRESULT(" %s\n", filename);
1566 } }
1567
XSUM_printLine_GNU(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1568 static void XSUM_printLine_GNU(const char* filename,
1569 const void* canonicalHash, const AlgoSelected hashType)
1570 {
1571 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_BigEndian);
1572 }
1573
XSUM_printLine_GNU_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)1574 static void XSUM_printLine_GNU_LE(const char* filename,
1575 const void* canonicalHash, const AlgoSelected hashType)
1576 {
1577 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_LittleEndian);
1578 }
1579
1580 typedef enum { big_endian, little_endian} Display_endianess;
1581
1582 typedef enum { display_gnu, display_bsd } Display_convention;
1583
1584 typedef void (*XSUM_displayLine_f)(const char*, const void*, AlgoSelected); /* line display signature */
1585
1586 static XSUM_displayLine_f XSUM_kDisplayLine_fTable[2][2] = {
1587 { XSUM_printLine_GNU, XSUM_printLine_GNU_LE },
1588 { XSUM_printLine_BSD, XSUM_printLine_BSD_LE }
1589 };
1590
XSUM_hashFile(const char * fileName,const AlgoSelected hashType,const Display_endianess displayEndianess,const Display_convention convention)1591 static int XSUM_hashFile(const char* fileName,
1592 const AlgoSelected hashType,
1593 const Display_endianess displayEndianess,
1594 const Display_convention convention)
1595 {
1596 size_t const blockSize = 64 KB;
1597 XSUM_displayLine_f const f_displayLine = XSUM_kDisplayLine_fTable[convention][displayEndianess];
1598 FILE* inFile;
1599 Multihash hashValue;
1600 assert(displayEndianess==big_endian || displayEndianess==little_endian);
1601 assert(convention==display_gnu || convention==display_bsd);
1602
1603 /* Check file existence */
1604 if (fileName == stdinName) {
1605 inFile = stdin;
1606 fileName = "stdin";
1607 SET_BINARY_MODE(stdin);
1608 } else {
1609 if (XSUM_isDirectory(fileName)) {
1610 DISPLAY("xxhsum: %s: Is a directory \n", fileName);
1611 return 1;
1612 }
1613 inFile = XXH_fopen( fileName, "rb" );
1614 if (inFile==NULL) {
1615 DISPLAY("Error: Could not open '%s': %s. \n", fileName, strerror(errno));
1616 return 1;
1617 } }
1618
1619 /* Memory allocation & streaming */
1620 { void* const buffer = malloc(blockSize);
1621 if (buffer == NULL) {
1622 DISPLAY("\nError: Out of memory.\n");
1623 fclose(inFile);
1624 return 1;
1625 }
1626
1627 /* Stream file & update hash */
1628 hashValue = XSUM_hashStream(inFile, hashType, buffer, blockSize);
1629
1630 fclose(inFile);
1631 free(buffer);
1632 }
1633
1634 /* display Hash value in selected format */
1635 switch(hashType)
1636 {
1637 case algo_xxh32:
1638 { XXH32_canonical_t hcbe32;
1639 (void)XXH32_canonicalFromHash(&hcbe32, hashValue.xxh32);
1640 f_displayLine(fileName, &hcbe32, hashType);
1641 break;
1642 }
1643 case algo_xxh64:
1644 { XXH64_canonical_t hcbe64;
1645 (void)XXH64_canonicalFromHash(&hcbe64, hashValue.xxh64);
1646 f_displayLine(fileName, &hcbe64, hashType);
1647 break;
1648 }
1649 case algo_xxh128:
1650 { XXH128_canonical_t hcbe128;
1651 (void)XXH128_canonicalFromHash(&hcbe128, hashValue.xxh128);
1652 f_displayLine(fileName, &hcbe128, hashType);
1653 break;
1654 }
1655 default:
1656 assert(0); /* not possible */
1657 }
1658
1659 return 0;
1660 }
1661
1662
1663 /*
1664 * XSUM_hashFiles:
1665 * If fnTotal==0, read from stdin instead.
1666 */
XSUM_hashFiles(const char * const * fnList,int fnTotal,AlgoSelected hashType,Display_endianess displayEndianess,Display_convention convention)1667 static int XSUM_hashFiles(const char*const * fnList, int fnTotal,
1668 AlgoSelected hashType,
1669 Display_endianess displayEndianess,
1670 Display_convention convention)
1671 {
1672 int fnNb;
1673 int result = 0;
1674
1675 if (fnTotal==0)
1676 return XSUM_hashFile(stdinName, hashType, displayEndianess, convention);
1677
1678 for (fnNb=0; fnNb<fnTotal; fnNb++)
1679 result |= XSUM_hashFile(fnList[fnNb], hashType, displayEndianess, convention);
1680 DISPLAYLEVEL(2, "\r%70s\r", "");
1681 return result;
1682 }
1683
1684
1685 typedef enum {
1686 GetLine_ok,
1687 GetLine_eof,
1688 GetLine_exceedMaxLineLength,
1689 GetLine_outOfMemory
1690 } GetLineResult;
1691
1692 typedef enum {
1693 CanonicalFromString_ok,
1694 CanonicalFromString_invalidFormat
1695 } CanonicalFromStringResult;
1696
1697 typedef enum {
1698 ParseLine_ok,
1699 ParseLine_invalidFormat
1700 } ParseLineResult;
1701
1702 typedef enum {
1703 LineStatus_hashOk,
1704 LineStatus_hashFailed,
1705 LineStatus_failedToOpen
1706 } LineStatus;
1707
1708 typedef union {
1709 XXH32_canonical_t xxh32;
1710 XXH64_canonical_t xxh64;
1711 XXH128_canonical_t xxh128;
1712 } Canonical;
1713
1714 typedef struct {
1715 Canonical canonical;
1716 const char* filename;
1717 int xxhBits; /* canonical type: 32:xxh32, 64:xxh64, 128:xxh128 */
1718 } ParsedLine;
1719
1720 typedef struct {
1721 unsigned long nProperlyFormattedLines;
1722 unsigned long nImproperlyFormattedLines;
1723 unsigned long nMismatchedChecksums;
1724 unsigned long nOpenOrReadFailures;
1725 unsigned long nMixedFormatLines;
1726 int quit;
1727 } ParseFileReport;
1728
1729 typedef struct {
1730 const char* inFileName;
1731 FILE* inFile;
1732 int lineMax;
1733 char* lineBuf;
1734 size_t blockSize;
1735 char* blockBuf;
1736 U32 strictMode;
1737 U32 statusOnly;
1738 U32 warn;
1739 U32 quiet;
1740 ParseFileReport report;
1741 } ParseFileArg;
1742
1743
1744 /*
1745 * Reads a line from stream `inFile`.
1746 * Returns GetLine_ok, if it reads line successfully.
1747 * Returns GetLine_eof, if stream reaches EOF.
1748 * Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
1749 * Returns GetLine_outOfMemory, if line buffer memory allocation failed.
1750 */
getLine(char ** lineBuf,int * lineMax,FILE * inFile)1751 static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile)
1752 {
1753 GetLineResult result = GetLine_ok;
1754 size_t len = 0;
1755
1756 if ((*lineBuf == NULL) || (*lineMax<1)) {
1757 free(*lineBuf); /* in case it's != NULL */
1758 *lineMax = 0;
1759 *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
1760 if(*lineBuf == NULL) return GetLine_outOfMemory;
1761 *lineMax = DEFAULT_LINE_LENGTH;
1762 }
1763
1764 for (;;) {
1765 const int c = fgetc(inFile);
1766 if (c == EOF) {
1767 /*
1768 * If we meet EOF before first character, returns GetLine_eof,
1769 * otherwise GetLine_ok.
1770 */
1771 if (len == 0) result = GetLine_eof;
1772 break;
1773 }
1774
1775 /* Make enough space for len+1 (for final NUL) bytes. */
1776 if (len+1 >= (size_t)*lineMax) {
1777 char* newLineBuf = NULL;
1778 size_t newBufSize = (size_t)*lineMax;
1779
1780 newBufSize += (newBufSize/2) + 1; /* x 1.5 */
1781 if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
1782 if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
1783
1784 newLineBuf = (char*) realloc(*lineBuf, newBufSize);
1785 if (newLineBuf == NULL) return GetLine_outOfMemory;
1786
1787 *lineBuf = newLineBuf;
1788 *lineMax = (int)newBufSize;
1789 }
1790
1791 if (c == '\n') break;
1792 (*lineBuf)[len++] = (char) c;
1793 }
1794
1795 (*lineBuf)[len] = '\0';
1796 return result;
1797 }
1798
1799
1800 /*
1801 * Converts one hexadecimal character to integer.
1802 * Returns -1 if the given character is not hexadecimal.
1803 */
charToHex(char c)1804 static int charToHex(char c)
1805 {
1806 int result = -1;
1807 if (c >= '0' && c <= '9') {
1808 result = (int) (c - '0');
1809 } else if (c >= 'A' && c <= 'F') {
1810 result = (int) (c - 'A') + 0x0a;
1811 } else if (c >= 'a' && c <= 'f') {
1812 result = (int) (c - 'a') + 0x0a;
1813 }
1814 return result;
1815 }
1816
1817
1818 /*
1819 * Converts canonical ASCII hexadecimal string `hashStr`
1820 * to the big endian binary representation in unsigned char array `dst`.
1821 *
1822 * Returns CanonicalFromString_invalidFormat if hashStr is not well formatted.
1823 * Returns CanonicalFromString_ok if hashStr is parsed successfully.
1824 */
canonicalFromString(unsigned char * dst,size_t dstSize,const char * hashStr,int reverseBytes)1825 static CanonicalFromStringResult canonicalFromString(unsigned char* dst,
1826 size_t dstSize,
1827 const char* hashStr,
1828 int reverseBytes)
1829 {
1830 size_t i;
1831 for (i = 0; i < dstSize; ++i) {
1832 int h0, h1;
1833 size_t j = reverseBytes ? dstSize - i - 1 : i;
1834
1835 h0 = charToHex(hashStr[j*2 + 0]);
1836 if (h0 < 0) return CanonicalFromString_invalidFormat;
1837
1838 h1 = charToHex(hashStr[j*2 + 1]);
1839 if (h1 < 0) return CanonicalFromString_invalidFormat;
1840
1841 dst[i] = (unsigned char) ((h0 << 4) | h1);
1842 }
1843 return CanonicalFromString_ok;
1844 }
1845
1846
1847 /*
1848 * Parse single line of xxHash checksum file.
1849 * Returns ParseLine_invalidFormat if the line is not well formatted.
1850 * Returns ParseLine_ok if the line is parsed successfully.
1851 * And members of parseLine will be filled by parsed values.
1852 *
1853 * - line must be terminated with '\0' without a trailing newline.
1854 * - Since parsedLine.filename will point within given argument `line`,
1855 * users must keep `line`s content when they are using parsedLine.
1856 * - The line may be modified to carve up the information it contains.
1857 *
1858 * xxHash checksum lines should have the following format:
1859 *
1860 * <8, 16, or 32 hexadecimal char> <space> <space> <filename...> <'\0'>
1861 *
1862 * or:
1863 *
1864 * <algorithm> <' ('> <filename> <') = '> <hexstring> <'\0'>
1865 */
parseLine(ParsedLine * parsedLine,char * line,int rev)1866 static ParseLineResult parseLine(ParsedLine* parsedLine, char* line, int rev)
1867 {
1868 char* const firstSpace = strchr(line, ' ');
1869 const char* hash_ptr;
1870 size_t hash_len;
1871
1872 parsedLine->filename = NULL;
1873 parsedLine->xxhBits = 0;
1874
1875 if (firstSpace == NULL || !firstSpace[1]) return ParseLine_invalidFormat;
1876
1877 if (firstSpace[1] == '(') {
1878 char* lastSpace = strrchr(line, ' ');
1879 if (lastSpace - firstSpace < 5) return ParseLine_invalidFormat;
1880 if (lastSpace[-1] != '=' || lastSpace[-2] != ' ' || lastSpace[-3] != ')') return ParseLine_invalidFormat;
1881 lastSpace[-3] = '\0'; /* Terminate the filename */
1882 *firstSpace = '\0';
1883 rev = strstr(line, "_LE") != NULL; /* was output little-endian */
1884 hash_ptr = lastSpace + 1;
1885 hash_len = strlen(hash_ptr);
1886 /* NOTE: This currently ignores the hash description at the start of the string.
1887 * In the future we should parse it and verify that it matches the hash length.
1888 * It could also be used to allow both XXH64 & XXH3_64bits to be differentiated. */
1889 } else {
1890 hash_ptr = line;
1891 hash_len = (size_t)(firstSpace - line);
1892 }
1893
1894 switch (hash_len)
1895 {
1896 case 8:
1897 { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
1898 if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), hash_ptr, rev)
1899 != CanonicalFromString_ok) {
1900 return ParseLine_invalidFormat;
1901 }
1902 parsedLine->xxhBits = 32;
1903 break;
1904 }
1905
1906 case 16:
1907 { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
1908 if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), hash_ptr, rev)
1909 != CanonicalFromString_ok) {
1910 return ParseLine_invalidFormat;
1911 }
1912 parsedLine->xxhBits = 64;
1913 break;
1914 }
1915
1916 case 32:
1917 { XXH128_canonical_t* xxh128c = &parsedLine->canonical.xxh128;
1918 if (canonicalFromString(xxh128c->digest, sizeof(xxh128c->digest), hash_ptr, rev)
1919 != CanonicalFromString_ok) {
1920 return ParseLine_invalidFormat;
1921 }
1922 parsedLine->xxhBits = 128;
1923 break;
1924 }
1925
1926 default:
1927 return ParseLine_invalidFormat;
1928 break;
1929 }
1930
1931 /* note : skipping second separation character, which can be anything,
1932 * allowing insertion of custom markers such as '*' */
1933 parsedLine->filename = firstSpace + 2;
1934 return ParseLine_ok;
1935 }
1936
1937
1938 /*!
1939 * Parse xxHash checksum file.
1940 */
parseFile1(ParseFileArg * parseFileArg,int rev)1941 static void parseFile1(ParseFileArg* parseFileArg, int rev)
1942 {
1943 const char* const inFileName = parseFileArg->inFileName;
1944 ParseFileReport* const report = &parseFileArg->report;
1945
1946 unsigned long lineNumber = 0;
1947 memset(report, 0, sizeof(*report));
1948
1949 while (!report->quit) {
1950 LineStatus lineStatus = LineStatus_hashFailed;
1951 ParsedLine parsedLine;
1952 memset(&parsedLine, 0, sizeof(parsedLine));
1953
1954 lineNumber++;
1955 if (lineNumber == 0) {
1956 /* This is unlikely happen, but md5sum.c has this error check. */
1957 DISPLAY("%s: Error: Too many checksum lines\n", inFileName);
1958 report->quit = 1;
1959 break;
1960 }
1961
1962 { GetLineResult const getLineResult = getLine(&parseFileArg->lineBuf,
1963 &parseFileArg->lineMax,
1964 parseFileArg->inFile);
1965 if (getLineResult != GetLine_ok) {
1966 if (getLineResult == GetLine_eof) break;
1967
1968 switch (getLineResult)
1969 {
1970 case GetLine_ok:
1971 case GetLine_eof:
1972 /* These cases never happen. See above getLineResult related "if"s.
1973 They exist just for make gcc's -Wswitch-enum happy. */
1974 assert(0);
1975 break;
1976
1977 default:
1978 DISPLAY("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber);
1979 break;
1980
1981 case GetLine_exceedMaxLineLength:
1982 DISPLAY("%s:%lu: Error: Line too long.\n", inFileName, lineNumber);
1983 break;
1984
1985 case GetLine_outOfMemory:
1986 DISPLAY("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber);
1987 break;
1988 }
1989 report->quit = 1;
1990 break;
1991 } }
1992
1993 if (parseLine(&parsedLine, parseFileArg->lineBuf, rev) != ParseLine_ok) {
1994 report->nImproperlyFormattedLines++;
1995 if (parseFileArg->warn) {
1996 DISPLAY("%s:%lu: Error: Improperly formatted checksum line.\n",
1997 inFileName, lineNumber);
1998 }
1999 continue;
2000 }
2001
2002 report->nProperlyFormattedLines++;
2003
2004 do {
2005 FILE* const fp = XXH_fopen(parsedLine.filename, "rb");
2006 if (fp == NULL) {
2007 lineStatus = LineStatus_failedToOpen;
2008 break;
2009 }
2010 lineStatus = LineStatus_hashFailed;
2011 switch (parsedLine.xxhBits)
2012 {
2013 case 32:
2014 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh32, parseFileArg->blockBuf, parseFileArg->blockSize);
2015 if (xxh.xxh32 == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
2016 lineStatus = LineStatus_hashOk;
2017 } }
2018 break;
2019
2020 case 64:
2021 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh64, parseFileArg->blockBuf, parseFileArg->blockSize);
2022 if (xxh.xxh64 == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
2023 lineStatus = LineStatus_hashOk;
2024 } }
2025 break;
2026
2027 case 128:
2028 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh128, parseFileArg->blockBuf, parseFileArg->blockSize);
2029 if (XXH128_isEqual(xxh.xxh128, XXH128_hashFromCanonical(&parsedLine.canonical.xxh128))) {
2030 lineStatus = LineStatus_hashOk;
2031 } }
2032 break;
2033
2034 default:
2035 break;
2036 }
2037 fclose(fp);
2038 } while (0);
2039
2040 switch (lineStatus)
2041 {
2042 default:
2043 DISPLAY("%s: Error: Unknown error.\n", inFileName);
2044 report->quit = 1;
2045 break;
2046
2047 case LineStatus_failedToOpen:
2048 report->nOpenOrReadFailures++;
2049 if (!parseFileArg->statusOnly) {
2050 DISPLAYRESULT("%s:%lu: Could not open or read '%s': %s.\n",
2051 inFileName, lineNumber, parsedLine.filename, strerror(errno));
2052 }
2053 break;
2054
2055 case LineStatus_hashOk:
2056 case LineStatus_hashFailed:
2057 { int b = 1;
2058 if (lineStatus == LineStatus_hashOk) {
2059 /* If --quiet is specified, don't display "OK" */
2060 if (parseFileArg->quiet) b = 0;
2061 } else {
2062 report->nMismatchedChecksums++;
2063 }
2064
2065 if (b && !parseFileArg->statusOnly) {
2066 DISPLAYRESULT("%s: %s\n", parsedLine.filename
2067 , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
2068 } }
2069 break;
2070 }
2071 } /* while (!report->quit) */
2072 }
2073
2074
2075 /* Parse xxHash checksum file.
2076 * Returns 1, if all procedures were succeeded.
2077 * Returns 0, if any procedures was failed.
2078 *
2079 * If strictMode != 0, return error code if any line is invalid.
2080 * If statusOnly != 0, don't generate any output.
2081 * If warn != 0, print a warning message to stderr.
2082 * If quiet != 0, suppress "OK" line.
2083 *
2084 * "All procedures are succeeded" means:
2085 * - Checksum file contains at least one line and less than SIZE_T_MAX lines.
2086 * - All files are properly opened and read.
2087 * - All hash values match with its content.
2088 * - (strict mode) All lines in checksum file are consistent and well formatted.
2089 */
checkFile(const char * inFileName,const Display_endianess displayEndianess,U32 strictMode,U32 statusOnly,U32 warn,U32 quiet)2090 static int checkFile(const char* inFileName,
2091 const Display_endianess displayEndianess,
2092 U32 strictMode,
2093 U32 statusOnly,
2094 U32 warn,
2095 U32 quiet)
2096 {
2097 int result = 0;
2098 FILE* inFile = NULL;
2099 ParseFileArg parseFileArgBody;
2100 ParseFileArg* const parseFileArg = &parseFileArgBody;
2101 ParseFileReport* const report = &parseFileArg->report;
2102
2103 /* note: stdinName is special constant pointer. It is not a string. */
2104 if (inFileName == stdinName) {
2105 /*
2106 * Note: Since we expect text input for xxhash -c mode,
2107 * we don't set binary mode for stdin.
2108 */
2109 inFileName = "stdin";
2110 inFile = stdin;
2111 } else {
2112 inFile = XXH_fopen( inFileName, "rt" );
2113 }
2114
2115 if (inFile == NULL) {
2116 DISPLAY("Error: Could not open '%s': %s\n", inFileName, strerror(errno));
2117 return 0;
2118 }
2119
2120 parseFileArg->inFileName = inFileName;
2121 parseFileArg->inFile = inFile;
2122 parseFileArg->lineMax = DEFAULT_LINE_LENGTH;
2123 parseFileArg->lineBuf = (char*) malloc((size_t)parseFileArg->lineMax);
2124 parseFileArg->blockSize = 64 * 1024;
2125 parseFileArg->blockBuf = (char*) malloc(parseFileArg->blockSize);
2126 parseFileArg->strictMode = strictMode;
2127 parseFileArg->statusOnly = statusOnly;
2128 parseFileArg->warn = warn;
2129 parseFileArg->quiet = quiet;
2130
2131 if ( (parseFileArg->lineBuf == NULL)
2132 || (parseFileArg->blockBuf == NULL) ) {
2133 DISPLAY("Error: : memory allocation failed \n");
2134 exit(1);
2135 }
2136 parseFile1(parseFileArg, displayEndianess != big_endian);
2137
2138 free(parseFileArg->blockBuf);
2139 free(parseFileArg->lineBuf);
2140
2141 if (inFile != stdin) fclose(inFile);
2142
2143 /* Show error/warning messages. All messages are copied from md5sum.c
2144 */
2145 if (report->nProperlyFormattedLines == 0) {
2146 DISPLAY("%s: no properly formatted xxHash checksum lines found\n", inFileName);
2147 } else if (!statusOnly) {
2148 if (report->nImproperlyFormattedLines) {
2149 DISPLAYRESULT("%lu %s improperly formatted\n"
2150 , report->nImproperlyFormattedLines
2151 , report->nImproperlyFormattedLines == 1 ? "line is" : "lines are");
2152 }
2153 if (report->nOpenOrReadFailures) {
2154 DISPLAYRESULT("%lu listed %s could not be read\n"
2155 , report->nOpenOrReadFailures
2156 , report->nOpenOrReadFailures == 1 ? "file" : "files");
2157 }
2158 if (report->nMismatchedChecksums) {
2159 DISPLAYRESULT("%lu computed %s did NOT match\n"
2160 , report->nMismatchedChecksums
2161 , report->nMismatchedChecksums == 1 ? "checksum" : "checksums");
2162 } }
2163
2164 /* Result (exit) code logic is copied from
2165 * gnu coreutils/src/md5sum.c digest_check() */
2166 result = report->nProperlyFormattedLines != 0
2167 && report->nMismatchedChecksums == 0
2168 && report->nOpenOrReadFailures == 0
2169 && (!strictMode || report->nImproperlyFormattedLines == 0)
2170 && report->quit == 0;
2171 return result;
2172 }
2173
2174
checkFiles(const char * const * fnList,int fnTotal,const Display_endianess displayEndianess,U32 strictMode,U32 statusOnly,U32 warn,U32 quiet)2175 static int checkFiles(const char*const* fnList, int fnTotal,
2176 const Display_endianess displayEndianess,
2177 U32 strictMode,
2178 U32 statusOnly,
2179 U32 warn,
2180 U32 quiet)
2181 {
2182 int ok = 1;
2183
2184 /* Special case for stdinName "-",
2185 * note: stdinName is not a string. It's special pointer. */
2186 if (fnTotal==0) {
2187 ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
2188 } else {
2189 int fnNb;
2190 for (fnNb=0; fnNb<fnTotal; fnNb++)
2191 ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
2192 }
2193 return ok ? 0 : 1;
2194 }
2195
2196
2197 /* ********************************************************
2198 * Main
2199 **********************************************************/
2200
usage(const char * exename)2201 static int usage(const char* exename)
2202 {
2203 DISPLAY( WELCOME_MESSAGE(exename) );
2204 DISPLAY( "Print or verify checksums using fast non-cryptographic algorithm xxHash \n\n" );
2205 DISPLAY( "Usage: %s [options] [files] \n\n", exename);
2206 DISPLAY( "When no filename provided or when '-' is provided, uses stdin as input. \n");
2207 DISPLAY( "Options: \n");
2208 DISPLAY( " -H# algorithm selection: 0,1,2 or 32,64,128 (default: %i) \n", (int)g_defaultAlgo);
2209 DISPLAY( " -c, --check read xxHash checksum from [files] and check them \n");
2210 DISPLAY( " -h, --help display a long help page about advanced options \n");
2211 return 0;
2212 }
2213
2214
usage_advanced(const char * exename)2215 static int usage_advanced(const char* exename)
2216 {
2217 usage(exename);
2218 DISPLAY( "Advanced :\n");
2219 DISPLAY( " -V, --version Display version information \n");
2220 DISPLAY( " --tag Produce BSD-style checksum lines \n");
2221 DISPLAY( " --little-endian Checksum values use little endian convention (default: big endian) \n");
2222 DISPLAY( " -b Run benchmark \n");
2223 DISPLAY( " -b# Bench only algorithm variant # \n");
2224 DISPLAY( " -i# Number of times to run the benchmark (default: %u) \n", (unsigned)g_nbIterations);
2225 DISPLAY( " -q, --quiet Don't display version header in benchmark mode \n");
2226 DISPLAY( "\n");
2227 DISPLAY( "The following four options are useful only when verifying checksums (-c): \n");
2228 DISPLAY( " -q, --quiet Don't print OK for each successfully verified file \n");
2229 DISPLAY( " --status Don't output anything, status code shows success \n");
2230 DISPLAY( " --strict Exit non-zero for improperly formatted checksum lines \n");
2231 DISPLAY( " --warn Warn about improperly formatted checksum lines \n");
2232 return 0;
2233 }
2234
badusage(const char * exename)2235 static int badusage(const char* exename)
2236 {
2237 DISPLAY("Wrong parameters\n\n");
2238 usage(exename);
2239 return 1;
2240 }
2241
errorOut(const char * msg)2242 static void errorOut(const char* msg)
2243 {
2244 DISPLAY("%s \n", msg); exit(1);
2245 }
2246
lastNameFromPath(const char * path)2247 static const char* lastNameFromPath(const char* path)
2248 {
2249 const char* name = path;
2250 if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
2251 if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
2252 return name;
2253 }
2254
2255 /*!
2256 * readU32FromCharChecked():
2257 * @return 0 if success, and store the result in *value.
2258 * Allows and interprets K, KB, KiB, M, MB and MiB suffix.
2259 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2260 * @return 1 if an overflow error occurs
2261 */
readU32FromCharChecked(const char ** stringPtr,U32 * value)2262 static int readU32FromCharChecked(const char** stringPtr, U32* value)
2263 {
2264 static const U32 max = (((U32)(-1)) / 10) - 1;
2265 U32 result = 0;
2266 while ((**stringPtr >='0') && (**stringPtr <='9')) {
2267 if (result > max) return 1; /* overflow error */
2268 result *= 10;
2269 result += (U32)(**stringPtr - '0');
2270 (*stringPtr)++ ;
2271 }
2272 if ((**stringPtr=='K') || (**stringPtr=='M')) {
2273 U32 const maxK = ((U32)(-1)) >> 10;
2274 if (result > maxK) return 1; /* overflow error */
2275 result <<= 10;
2276 if (**stringPtr=='M') {
2277 if (result > maxK) return 1; /* overflow error */
2278 result <<= 10;
2279 }
2280 (*stringPtr)++; /* skip `K` or `M` */
2281 if (**stringPtr=='i') (*stringPtr)++;
2282 if (**stringPtr=='B') (*stringPtr)++;
2283 }
2284 *value = result;
2285 return 0;
2286 }
2287
2288 /*!
2289 * readU32FromChar():
2290 * @return: unsigned integer value read from input in `char` format.
2291 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
2292 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2293 * Note: function will exit() program if digit sequence overflows
2294 */
readU32FromChar(const char ** stringPtr)2295 static U32 readU32FromChar(const char** stringPtr) {
2296 U32 result;
2297 if (readU32FromCharChecked(stringPtr, &result)) {
2298 static const char errorMsg[] = "Error: numeric value too large";
2299 errorOut(errorMsg);
2300 }
2301 return result;
2302 }
2303
XXH_main(int argc,const char * const * argv)2304 static int XXH_main(int argc, const char* const* argv)
2305 {
2306 int i, filenamesStart = 0;
2307 const char* const exename = lastNameFromPath(argv[0]);
2308 U32 benchmarkMode = 0;
2309 U32 fileCheckMode = 0;
2310 U32 strictMode = 0;
2311 U32 statusOnly = 0;
2312 U32 warn = 0;
2313 int explicitStdin = 0;
2314 U32 selectBenchIDs= 0; /* 0 == use default k_testIDs_default, kBenchAll == bench all */
2315 static const U32 kBenchAll = 99;
2316 size_t keySize = XXH_DEFAULT_SAMPLE_SIZE;
2317 AlgoSelected algo = g_defaultAlgo;
2318 Display_endianess displayEndianess = big_endian;
2319 Display_convention convention = display_gnu;
2320
2321 /* special case: xxhNNsum default to NN bits checksum */
2322 if (strstr(exename, "xxh32sum") != NULL) algo = g_defaultAlgo = algo_xxh32;
2323 if (strstr(exename, "xxh64sum") != NULL) algo = g_defaultAlgo = algo_xxh64;
2324 if (strstr(exename, "xxh128sum") != NULL) algo = g_defaultAlgo = algo_xxh128;
2325
2326 for (i=1; i<argc; i++) {
2327 const char* argument = argv[i];
2328 assert(argument != NULL);
2329
2330 if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
2331 if (!strcmp(argument, "--benchmark-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
2332 if (!strcmp(argument, "--bench-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
2333 if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
2334 if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
2335 if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
2336 if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
2337 if (!strcmp(argument, "--warn")) { warn = 1; continue; }
2338 if (!strcmp(argument, "--help")) { return usage_advanced(exename); }
2339 if (!strcmp(argument, "--version")) { DISPLAY(FULL_WELCOME_MESSAGE(exename)); BMK_sanityCheck(); return 0; }
2340 if (!strcmp(argument, "--tag")) { convention = display_bsd; continue; }
2341
2342 if (!strcmp(argument, "--")) {
2343 if (filenamesStart==0 && i!=argc-1) filenamesStart=i+1; /* only supports a continuous list of filenames */
2344 break; /* treat rest of arguments as strictly file names */
2345 }
2346 if (*argument != '-') {
2347 if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */
2348 break; /* treat rest of arguments as strictly file names */
2349 }
2350
2351 /* command selection */
2352 argument++; /* note: *argument=='-' */
2353 if (*argument == 0) explicitStdin = 1;
2354
2355 while (*argument != 0) {
2356 switch(*argument)
2357 {
2358 /* Display version */
2359 case 'V':
2360 DISPLAY(FULL_WELCOME_MESSAGE(exename)); return 0;
2361
2362 /* Display help on usage */
2363 case 'h':
2364 return usage_advanced(exename);
2365
2366 /* select hash algorithm */
2367 case 'H': argument++;
2368 switch(readU32FromChar(&argument)) {
2369 case 0 :
2370 case 32: algo = algo_xxh32; break;
2371 case 1 :
2372 case 64: algo = algo_xxh64; break;
2373 case 2 :
2374 case 128: algo = algo_xxh128; break;
2375 default:
2376 return badusage(exename);
2377 }
2378 break;
2379
2380 /* File check mode */
2381 case 'c':
2382 fileCheckMode=1;
2383 argument++;
2384 break;
2385
2386 /* Warning mode (file check mode only, alias of "--warning") */
2387 case 'w':
2388 warn=1;
2389 argument++;
2390 break;
2391
2392 /* Trigger benchmark mode */
2393 case 'b':
2394 argument++;
2395 benchmarkMode = 1;
2396 do {
2397 if (*argument == ',') argument++;
2398 selectBenchIDs = readU32FromChar(&argument); /* select one specific test */
2399 if (selectBenchIDs < NB_TESTFUNC) {
2400 g_testIDs[selectBenchIDs] = 1;
2401 } else
2402 selectBenchIDs = kBenchAll;
2403 } while (*argument == ',');
2404 break;
2405
2406 /* Modify Nb Iterations (benchmark only) */
2407 case 'i':
2408 argument++;
2409 g_nbIterations = readU32FromChar(&argument);
2410 break;
2411
2412 /* Modify Block size (benchmark only) */
2413 case 'B':
2414 argument++;
2415 keySize = readU32FromChar(&argument);
2416 break;
2417
2418 /* Modify verbosity of benchmark output (hidden option) */
2419 case 'q':
2420 argument++;
2421 g_displayLevel--;
2422 break;
2423
2424 default:
2425 return badusage(exename);
2426 }
2427 }
2428 } /* for(i=1; i<argc; i++) */
2429
2430 /* Check benchmark mode */
2431 if (benchmarkMode) {
2432 DISPLAYLEVEL(2, FULL_WELCOME_MESSAGE(exename) );
2433 BMK_sanityCheck();
2434 if (selectBenchIDs == 0) memcpy(g_testIDs, k_testIDs_default, sizeof(g_testIDs));
2435 if (selectBenchIDs == kBenchAll) memset(g_testIDs, 1, sizeof(g_testIDs));
2436 if (filenamesStart==0) return BMK_benchInternal(keySize);
2437 return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart);
2438 }
2439
2440 /* Check if input is defined as console; trigger an error in this case */
2441 if ( (filenamesStart==0) && IS_CONSOLE(stdin) && !explicitStdin)
2442 return badusage(exename);
2443
2444 if (filenamesStart==0) filenamesStart = argc;
2445 if (fileCheckMode) {
2446 return checkFiles(argv+filenamesStart, argc-filenamesStart,
2447 displayEndianess, strictMode, statusOnly, warn, (g_displayLevel < 2) /*quiet*/);
2448 } else {
2449 return XSUM_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess, convention);
2450 }
2451 }
2452
2453 /* Windows main wrapper which properly handles UTF-8 command line arguments. */
2454 #ifdef _WIN32
2455 /* Converts a UTF-16 argv to UTF-8. */
convert_argv(int argc,const wchar_t * const utf16_argv[])2456 static char** convert_argv(int argc, const wchar_t* const utf16_argv[])
2457 {
2458 char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*));
2459 if (utf8_argv != NULL) {
2460 int i;
2461 for (i = 0; i < argc; i++) {
2462 utf8_argv[i] = utf16_to_utf8(utf16_argv[i]);
2463 }
2464 utf8_argv[argc] = NULL;
2465 }
2466 return utf8_argv;
2467 }
2468 /* Frees arguments returned by convert_argv */
free_argv(int argc,char ** argv)2469 static void free_argv(int argc, char** argv)
2470 {
2471 int i;
2472 if (argv == NULL) {
2473 return;
2474 }
2475 for (i = 0; i < argc; i++) {
2476 free(argv[i]);
2477 }
2478 free(argv);
2479 }
2480
2481
2482 /*
2483 * On Windows, main's argv parameter is useless. Instead of UTF-8, you get ANSI
2484 * encoding, and any unknown characters will show up as mojibake.
2485 *
2486 * While this doesn't affect most programs, what does happen is that we can't
2487 * open any files with Unicode filenames.
2488 *
2489 * We instead convert wmain's arguments to UTF-8, preserving Unicode arguments.
2490 *
2491 * This function is wrapped by `__wgetmainargs()` and `main()` below on MinGW
2492 * with Unicode disabled, but if possible, we try to use `wmain()`.
2493 */
XXH_wmain(int argc,const wchar_t * const utf16_argv[])2494 static int XXH_wmain(int argc, const wchar_t* const utf16_argv[])
2495 {
2496 /* Convert the UTF-16 arguments to UTF-8. */
2497 char** utf8_argv = convert_argv(argc, utf16_argv);
2498
2499 if (utf8_argv == NULL) {
2500 /* An unfortunate but incredibly unlikely error, */
2501 fprintf(stderr, "Error converting command line arguments!\n");
2502 return 1;
2503 } else {
2504 int ret;
2505
2506 /*
2507 * MinGW's terminal uses full block buffering for stderr.
2508 *
2509 * This is nonstandard behavior and causes text to not display until
2510 * the buffer fills.
2511 *
2512 * `setvbuf()` can easily correct this to make text display instantly.
2513 */
2514 setvbuf(stderr, NULL, _IONBF, 0);
2515
2516 /* Call our real main function */
2517 ret = XXH_main(argc, (const char* const *) utf8_argv);
2518
2519 /* Cleanup */
2520 free_argv(argc, utf8_argv);
2521 return ret;
2522 }
2523 }
2524
2525 #if defined(_MSC_VER) /* MSVC always accepts wmain */ \
2526 || defined(_UNICODE) || defined(UNICODE) /* defined with -municode on MinGW-w64 */
2527
2528 /* Preferred: Use the real `wmain()`. */
2529 #if defined(__cplusplus)
2530 extern "C"
2531 #endif
wmain(int argc,const wchar_t * utf16_argv[])2532 int wmain(int argc, const wchar_t* utf16_argv[])
2533 {
2534 return XXH_wmain(argc, utf16_argv);
2535 }
2536
2537 #else /* Non-Unicode MinGW */
2538
2539 /*
2540 * Wrap `XXH_wmain()` using `main()` and `__wgetmainargs()` on MinGW without
2541 * Unicode support.
2542 *
2543 * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for
2544 * `wmain()`, so we use it on MinGW to emulate `wmain()`.
2545 *
2546 * It is an internal function and not declared in any public headers, so we
2547 * have to declare it manually.
2548 *
2549 * An alternative that doesn't mess with internal APIs is `GetCommandLineW()`
2550 * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the
2551 * latter requires linking to Shell32.dll and its numerous dependencies.
2552 *
2553 * This method keeps our dependencies to kernel32.dll and the CRT.
2554 *
2555 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019
2556 */
2557 typedef struct {
2558 int newmode;
2559 } _startupinfo;
2560
2561 #ifdef __cplusplus
2562 extern "C"
2563 #endif
2564 int __cdecl __wgetmainargs(
2565 int* Argc,
2566 wchar_t*** Argv,
2567 wchar_t*** Env,
2568 int DoWildCard,
2569 _startupinfo* StartInfo
2570 );
2571
main(int ansi_argc,const char * ansi_argv[])2572 int main(int ansi_argc, const char* ansi_argv[])
2573 {
2574 int utf16_argc;
2575 wchar_t** utf16_argv;
2576 wchar_t** utf16_envp; /* Unused but required */
2577 _startupinfo startinfo = {0}; /* 0 == don't change new mode */
2578
2579 /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */
2580 if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0)
2581 /* In the very unlikely case of an error, use the ANSI arguments. */
2582 return XXH_main(ansi_argc, ansi_argv);
2583
2584 /* Call XXH_wmain with our UTF-16 arguments */
2585 return XXH_wmain(utf16_argc, (const wchar_t* const *)utf16_argv);
2586 }
2587
2588 #endif /* Non-Unicode MinGW */
2589
2590 #else /* Not Windows */
2591
2592 /* Wrap main normally on non-Windows platforms. */
main(int argc,const char * argv[])2593 int main(int argc, const char* argv[])
2594 {
2595 return XXH_main(argc, argv);
2596 }
2597 #endif /* !Windows */
2598