1 /* 2 * xxhsum - Command line interface for xxhash algorithms 3 * Copyright (C) 2013-2020 Yann Collet 4 * 5 * GPL v2 License 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * You can contact the author at: 22 * - xxHash homepage: https://www.xxhash.com 23 * - xxHash source repository: https://github.com/Cyan4973/xxHash 24 */ 25 26 /* 27 * xxhsum: 28 * Provides hash value of a file content, or a list of files, or stdin 29 * Display convention is Big Endian, for both 32 and 64 bits algorithms 30 */ 31 32 33 /* ************************************ 34 * Compiler Options 35 **************************************/ 36 /* MS Visual */ 37 #if defined(_MSC_VER) || defined(_WIN32) 38 # ifndef _CRT_SECURE_NO_WARNINGS 39 # define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ 40 # endif 41 #endif 42 43 /* Under Linux at least, pull in the *64 commands */ 44 #ifndef _LARGEFILE64_SOURCE 45 # define _LARGEFILE64_SOURCE 46 #endif 47 48 /* ************************************ 49 * Includes 50 **************************************/ 51 #include <limits.h> 52 #include <stdlib.h> /* malloc, calloc, free, exit */ 53 #include <string.h> /* strcmp, memcpy */ 54 #include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */ 55 #include <sys/types.h> /* stat, stat64, _stat64 */ 56 #include <sys/stat.h> /* stat, stat64, _stat64 */ 57 #include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ 58 #include <assert.h> /* assert */ 59 #include <errno.h> /* errno */ 60 61 #define XXH_STATIC_LINKING_ONLY /* *_state_t */ 62 #include "xxhash.h" 63 64 #ifdef XXHSUM_DISPATCH 65 # include "xxh_x86dispatch.h" 66 #endif 67 68 69 /* ************************************ 70 * OS-Specific Includes 71 **************************************/ 72 #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ 73 || defined(__midipix__) || defined(__VMS)) 74 # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ 75 || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ 76 # define PLATFORM_POSIX_VERSION 200112L 77 # else 78 # if defined(__linux__) || defined(__linux) 79 # ifndef _POSIX_C_SOURCE 80 # define _POSIX_C_SOURCE 200112L /* use feature test macro */ 81 # endif 82 # endif 83 # include <unistd.h> /* declares _POSIX_VERSION */ 84 # if defined(_POSIX_VERSION) /* POSIX compliant */ 85 # define PLATFORM_POSIX_VERSION _POSIX_VERSION 86 # else 87 # define PLATFORM_POSIX_VERSION 0 88 # endif 89 # endif 90 #endif 91 #if !defined(PLATFORM_POSIX_VERSION) 92 # define PLATFORM_POSIX_VERSION -1 93 #endif 94 95 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) \ 96 || (PLATFORM_POSIX_VERSION >= 200112L) \ 97 || defined(__DJGPP__) \ 98 || defined(__MSYS__) 99 # include <unistd.h> /* isatty */ 100 # define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) 101 #elif defined(MSDOS) || defined(OS2) 102 # include <io.h> /* _isatty */ 103 # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) 104 #elif defined(WIN32) || defined(_WIN32) 105 # include <io.h> /* _isatty */ 106 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ 107 # include <stdio.h> /* FILE */ 108 static __inline int IS_CONSOLE(FILE* stdStream) { 109 DWORD dummy; 110 return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); 111 } 112 #else 113 # define IS_CONSOLE(stdStream) 0 114 #endif 115 116 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) 117 # include <fcntl.h> /* _O_BINARY */ 118 # include <io.h> /* _setmode, _fileno, _get_osfhandle */ 119 # if !defined(__DJGPP__) 120 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ 121 # include <winioctl.h> /* FSCTL_SET_SPARSE */ 122 # define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } 123 # else 124 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) 125 # endif 126 #else 127 # define SET_BINARY_MODE(file) 128 #endif 129 130 #if !defined(S_ISREG) 131 # define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) 132 #endif 133 134 /* Unicode helpers for Windows to make UTF-8 act as it should. */ 135 #ifdef _WIN32 136 /* 137 * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards. 138 * This version allows keeping the output length. 139 */ 140 static wchar_t* utf8_to_utf16_len(const char* str, int* lenOut) 141 { 142 int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); 143 if (lenOut != NULL) *lenOut = len; 144 if (len == 0) return NULL; 145 { wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t)); 146 if (buf != NULL) { 147 if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) { 148 free(buf); 149 return NULL; 150 } } 151 return buf; 152 } 153 } 154 155 /* Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards. */ 156 static wchar_t* utf8_to_utf16(const char *str) 157 { 158 return utf8_to_utf16_len(str, NULL); 159 } 160 161 /* 162 * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards. 163 * This version allows keeping the output length. 164 */ 165 static char* utf16_to_utf8_len(const wchar_t *str, int *lenOut) 166 { 167 int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); 168 if (lenOut != NULL) *lenOut = len; 169 if (len == 0) return NULL; 170 { char* const buf = (char*)malloc((size_t)len * sizeof(char)); 171 if (buf != NULL) { 172 if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) { 173 free(buf); 174 return NULL; 175 } } 176 return buf; 177 } 178 } 179 180 /* Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards. */ 181 static char *utf16_to_utf8(const wchar_t *str) 182 { 183 return utf16_to_utf8_len(str, NULL); 184 } 185 186 /* 187 * fopen wrapper that supports UTF-8 188 * 189 * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames. 190 * 191 * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen. 192 */ 193 static FILE* XXH_fopen_wrapped(const char *filename, const wchar_t *mode) 194 { 195 wchar_t* const wide_filename = utf8_to_utf16(filename); 196 if (wide_filename == NULL) return NULL; 197 { FILE* const f = _wfopen(wide_filename, mode); 198 free(wide_filename); 199 return f; 200 } 201 } 202 203 /* 204 * In case it isn't available, this is what MSVC 2019 defines in stdarg.h. 205 */ 206 #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy) 207 # define va_copy(destination, source) ((destination) = (source)) 208 #endif 209 210 /* 211 * fprintf wrapper that supports UTF-8. 212 * 213 * fprintf doesn't properly handle Unicode on Windows. 214 * 215 * Additionally, it is codepage sensitive on console and may crash the program. 216 * 217 * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16 218 * for console output and use the codepage-independent WriteConsoleW. 219 * 220 * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423 221 */ 222 static int fprintf_utf8(FILE *stream, const char *format, ...) 223 { 224 int result; 225 va_list args; 226 va_list copy; 227 228 va_start(args, format); 229 230 /* 231 * To be safe, make a va_copy. 232 * 233 * Note that Microsoft doesn't use va_copy in its sample code: 234 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019 235 */ 236 va_copy(copy, args); 237 /* Counts the number of characters needed for vsnprintf. */ 238 result = _vscprintf(format, copy); 239 va_end(copy); 240 241 if (result > 0) { 242 /* Create a buffer for vsnprintf */ 243 const size_t nchar = (size_t)result + 1; 244 char* u8_str = (char*)malloc(nchar * sizeof(u8_str[0])); 245 246 if (u8_str == NULL) { 247 result = -1; 248 } else { 249 /* Generate the UTF-8 string with vsnprintf. */ 250 result = _vsnprintf(u8_str, nchar - 1, format, args); 251 u8_str[nchar - 1] = '\0'; 252 if (result > 0) { 253 /* 254 * Check if we are outputting to a console. Don't use IS_CONSOLE 255 * directly -- we don't need to call _get_osfhandle twice. 256 */ 257 int fileNb = _fileno(stream); 258 intptr_t handle_raw = _get_osfhandle(fileNb); 259 HANDLE handle = (HANDLE)handle_raw; 260 DWORD dwTemp; 261 262 if (handle_raw < 0) { 263 result = -1; 264 } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) { 265 /* 266 * Convert to UTF-16 and output with WriteConsoleW. 267 * 268 * This is codepage independent and works on Windows XP's 269 * default msvcrt.dll. 270 */ 271 int len; 272 wchar_t *const u16_buf = utf8_to_utf16_len(u8_str, &len); 273 if (u16_buf == NULL) { 274 result = -1; 275 } else { 276 if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) { 277 result = (int)dwTemp; 278 } else { 279 result = -1; 280 } 281 free(u16_buf); 282 } 283 } else { 284 /* fwrite the UTF-8 string if we are printing to a file */ 285 result = (int)fwrite(u8_str, 1, nchar - 1, stream); 286 if (result == 0) { 287 result = -1; 288 } 289 } 290 } 291 free(u8_str); 292 } 293 } 294 va_end(args); 295 return result; 296 } 297 /* 298 * Since we always use literals in the "mode" argument, it is just easier to append "L" to 299 * the string to make it UTF-16 and avoid the hassle of a second manual conversion. 300 */ 301 # define XXH_fopen(filename, mode) XXH_fopen_wrapped(filename, L##mode) 302 #else 303 # define XXH_fopen(filename, mode) fopen(filename, mode) 304 #endif 305 306 /* ************************************ 307 * Basic Types 308 **************************************/ 309 #if defined(__cplusplus) /* C++ */ \ 310 || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) /* C99 */ 311 # include <stdint.h> 312 typedef uint8_t U8; 313 typedef uint32_t U32; 314 typedef uint64_t U64; 315 # else 316 # include <limits.h> 317 typedef unsigned char U8; 318 # if UINT_MAX == 0xFFFFFFFFUL 319 typedef unsigned int U32; 320 # else 321 typedef unsigned long U32; 322 # endif 323 typedef unsigned long long U64; 324 #endif /* not C++/C99 */ 325 326 static unsigned BMK_isLittleEndian(void) 327 { 328 const union { U32 u; U8 c[4]; } one = { 1 }; /* don't use static: performance detrimental */ 329 return one.c[0]; 330 } 331 332 333 /* ************************************* 334 * Constants 335 ***************************************/ 336 #define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE 337 #define QUOTE(str) #str 338 #define EXPAND_AND_QUOTE(str) QUOTE(str) 339 #define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION) 340 341 /* Show compiler versions in WELCOME_MESSAGE. CC_VERSION_FMT will return the printf specifiers, 342 * and VERSION will contain the comma separated list of arguments to the CC_VERSION_FMT string. */ 343 #if defined(__clang_version__) 344 /* Clang does its own thing. */ 345 # ifdef __apple_build_version__ 346 # define CC_VERSION_FMT "Apple Clang %s" 347 # else 348 # define CC_VERSION_FMT "Clang %s" 349 # endif 350 # define CC_VERSION __clang_version__ 351 #elif defined(__VERSION__) 352 /* GCC and ICC */ 353 # define CC_VERSION_FMT "%s" 354 # ifdef __INTEL_COMPILER /* icc adds its prefix */ 355 # define CC_VERSION __VERSION__ 356 # else /* assume GCC */ 357 # define CC_VERSION "GCC " __VERSION__ 358 # endif 359 #elif defined(_MSC_FULL_VER) && defined(_MSC_BUILD) 360 /* 361 * MSVC 362 * "For example, if the version number of the Visual C++ compiler is 363 * 15.00.20706.01, the _MSC_FULL_VER macro evaluates to 150020706." 364 * 365 * https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=vs-2017 366 */ 367 # define CC_VERSION_FMT "MSVC %02i.%02i.%05i.%02i" 368 # define CC_VERSION _MSC_FULL_VER / 10000000 % 100, _MSC_FULL_VER / 100000 % 100, _MSC_FULL_VER % 100000, _MSC_BUILD 369 #elif defined(__TINYC__) 370 /* tcc stores its version in the __TINYC__ macro. */ 371 # define CC_VERSION_FMT "tcc %i.%i.%i" 372 # define CC_VERSION __TINYC__ / 10000 % 100, __TINYC__ / 100 % 100, __TINYC__ % 100 373 #else 374 # define CC_VERSION_FMT "%s" 375 # define CC_VERSION "unknown compiler" 376 #endif 377 378 /* makes the next part easier */ 379 #if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) 380 # define ARCH_X64 1 381 # define ARCH_X86 "x86_64" 382 #elif defined(__i386__) || defined(_M_IX86) || defined(_M_IX86_FP) 383 # define ARCH_X86 "i386" 384 #endif 385 386 /* Try to detect the architecture. */ 387 #if defined(ARCH_X86) 388 # if defined(XXHSUM_DISPATCH) 389 # define ARCH ARCH_X86 " autoVec" 390 # elif defined(__AVX512F__) 391 # define ARCH ARCH_X86 " + AVX512" 392 # elif defined(__AVX2__) 393 # define ARCH ARCH_X86 " + AVX2" 394 # elif defined(__AVX__) 395 # define ARCH ARCH_X86 " + AVX" 396 # elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) \ 397 || defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP == 2) 398 # define ARCH ARCH_X86 " + SSE2" 399 # else 400 # define ARCH ARCH_X86 401 # endif 402 #elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) 403 # define ARCH "aarch64 + NEON" 404 #elif defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(_M_ARM) 405 /* ARM has a lot of different features that can change xxHash significantly. */ 406 # if defined(__thumb2__) || (defined(__thumb__) && (__thumb__ == 2 || __ARM_ARCH >= 7)) 407 # define ARCH_THUMB " Thumb-2" 408 # elif defined(__thumb__) 409 # define ARCH_THUMB " Thumb-1" 410 # else 411 # define ARCH_THUMB "" 412 # endif 413 /* ARMv7 has unaligned by default */ 414 # if defined(__ARM_FEATURE_UNALIGNED) || __ARM_ARCH >= 7 || defined(_M_ARMV7VE) 415 # define ARCH_UNALIGNED " + unaligned" 416 # else 417 # define ARCH_UNALIGNED "" 418 # endif 419 # if defined(__ARM_NEON) || defined(__ARM_NEON__) 420 # define ARCH_NEON " + NEON" 421 # else 422 # define ARCH_NEON "" 423 # endif 424 # define ARCH "ARMv" EXPAND_AND_QUOTE(__ARM_ARCH) ARCH_THUMB ARCH_NEON ARCH_UNALIGNED 425 #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) 426 # if defined(__GNUC__) && defined(__POWER9_VECTOR__) 427 # define ARCH "ppc64 + POWER9 vector" 428 # elif defined(__GNUC__) && defined(__POWER8_VECTOR__) 429 # define ARCH "ppc64 + POWER8 vector" 430 # else 431 # define ARCH "ppc64" 432 # endif 433 #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) 434 # define ARCH "ppc" 435 #elif defined(__AVR) 436 # define ARCH "AVR" 437 #elif defined(__mips64) 438 # define ARCH "mips64" 439 #elif defined(__mips) 440 # define ARCH "mips" 441 #elif defined(__s390x__) 442 # define ARCH "s390x" 443 #elif defined(__s390__) 444 # define ARCH "s390" 445 #else 446 # define ARCH "unknown" 447 #endif 448 449 static const int g_nbBits = (int)(sizeof(void*)*8); 450 static const char g_lename[] = "little endian"; 451 static const char g_bename[] = "big endian"; 452 #define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename) 453 static const char author[] = "Yann Collet"; 454 #define WELCOME_MESSAGE(exename) "%s %s by %s \n", exename, PROGRAM_VERSION, author 455 #define FULL_WELCOME_MESSAGE(exename) "%s %s by %s \n" \ 456 "compiled as %i-bit %s %s with " CC_VERSION_FMT " \n", \ 457 exename, PROGRAM_VERSION, author, \ 458 g_nbBits, ARCH, ENDIAN_NAME, CC_VERSION 459 460 #define KB *( 1<<10) 461 #define MB *( 1<<20) 462 #define GB *(1U<<30) 463 464 static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB; 465 #define NBLOOPS 3 /* Default number of benchmark iterations */ 466 #define TIMELOOP_S 1 467 #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* target timing per iteration */ 468 #define TIMELOOP_MIN (TIMELOOP / 2) /* minimum timing to validate a result */ 469 #define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */ 470 #define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */ 471 472 #define MAX_MEM (2 GB - 64 MB) 473 474 static const char stdinName[] = "-"; 475 typedef enum { algo_xxh32=0, algo_xxh64=1, algo_xxh128=2 } AlgoSelected; 476 static AlgoSelected g_defaultAlgo = algo_xxh64; /* required within main() & usage() */ 477 478 /* <16 hex char> <SPC> <SPC> <filename> <'\0'> 479 * '4096' is typical Linux PATH_MAX configuration. */ 480 #define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1) 481 482 /* Maximum acceptable line length. */ 483 #define MAX_LINE_LENGTH (32 KB) 484 485 486 /* ************************************ 487 * Display macros 488 **************************************/ 489 #ifdef _WIN32 490 #define DISPLAY(...) fprintf_utf8(stderr, __VA_ARGS__) 491 #define DISPLAYRESULT(...) fprintf_utf8(stdout, __VA_ARGS__) 492 #else 493 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 494 #define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__) 495 #endif 496 497 #define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0) 498 static int g_displayLevel = 2; 499 500 501 /* ************************************ 502 * Local variables 503 **************************************/ 504 static U32 g_nbIterations = NBLOOPS; 505 506 507 /* ************************************ 508 * Benchmark Functions 509 **************************************/ 510 static clock_t BMK_clockSpan( clock_t start ) 511 { 512 return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */ 513 } 514 515 516 static size_t BMK_findMaxMem(U64 requiredMem) 517 { 518 size_t const step = 64 MB; 519 void* testmem = NULL; 520 521 requiredMem = (((requiredMem >> 26) + 1) << 26); 522 requiredMem += 2*step; 523 if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; 524 525 while (!testmem) { 526 if (requiredMem > step) requiredMem -= step; 527 else requiredMem >>= 1; 528 testmem = malloc ((size_t)requiredMem); 529 } 530 free (testmem); 531 532 /* keep some space available */ 533 if (requiredMem > step) requiredMem -= step; 534 else requiredMem >>= 1; 535 536 return (size_t)requiredMem; 537 } 538 539 540 static U64 BMK_GetFileSize(const char* infilename) 541 { 542 int r; 543 #if defined(_MSC_VER) 544 struct _stat64 statbuf; 545 r = _stat64(infilename, &statbuf); 546 #else 547 struct stat statbuf; 548 r = stat(infilename, &statbuf); 549 #endif 550 if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ 551 return (U64)statbuf.st_size; 552 } 553 554 /* 555 * Allocates a string containing s1 and s2 concatenated. Acts like strdup. 556 * The result must be freed. 557 */ 558 static char* XXH_strcatDup(const char* s1, const char* s2) 559 { 560 assert(s1 != NULL); 561 assert(s2 != NULL); 562 { size_t len1 = strlen(s1); 563 size_t len2 = strlen(s2); 564 char* buf = (char*)malloc(len1 + len2 + 1); 565 if (buf != NULL) { 566 /* strcpy(buf, s1) */ 567 memcpy(buf, s1, len1); 568 /* strcat(buf, s2) */ 569 memcpy(buf + len1, s2, len2 + 1); 570 } 571 return buf; 572 } 573 } 574 575 576 /* use #define to make them constant, required for initialization */ 577 #define PRIME32 2654435761U 578 #define PRIME64 11400714785074694797ULL 579 580 /* 581 * Fills a test buffer with pseudorandom data. 582 * 583 * This is used in the sanity check - its values must not be changed. 584 */ 585 static void BMK_fillTestBuffer(U8* buffer, size_t len) 586 { 587 U64 byteGen = PRIME32; 588 size_t i; 589 590 assert(buffer != NULL); 591 592 for (i=0; i<len; i++) { 593 buffer[i] = (U8)(byteGen>>56); 594 byteGen *= PRIME64; 595 } 596 } 597 598 /* 599 * A secret buffer used for benchmarking XXH3's withSecret variants. 600 * 601 * In order for the bench to be realistic, the secret buffer would need to be 602 * pre-generated. 603 * 604 * Adding a pointer to the parameter list would be messy. 605 */ 606 static U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN]; 607 608 /* 609 * Wrappers for the benchmark. 610 * 611 * If you would like to add other hashes to the bench, create a wrapper and add 612 * it to the g_hashesToBench table. It will automatically be added. 613 */ 614 typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed); 615 616 static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed) 617 { 618 return XXH32(buffer, bufferSize, seed); 619 } 620 static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) 621 { 622 return (U32)XXH64(buffer, bufferSize, seed); 623 } 624 static U32 localXXH3_64b(const void* buffer, size_t bufferSize, U32 seed) 625 { 626 (void)seed; 627 return (U32)XXH3_64bits(buffer, bufferSize); 628 } 629 static U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, U32 seed) 630 { 631 return (U32)XXH3_64bits_withSeed(buffer, bufferSize, seed); 632 } 633 static U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, U32 seed) 634 { 635 (void)seed; 636 return (U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)); 637 } 638 static U32 localXXH3_128b(const void* buffer, size_t bufferSize, U32 seed) 639 { 640 (void)seed; 641 return (U32)(XXH3_128bits(buffer, bufferSize).low64); 642 } 643 static U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, U32 seed) 644 { 645 return (U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64); 646 } 647 static U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, U32 seed) 648 { 649 (void)seed; 650 return (U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64); 651 } 652 static U32 localXXH3_stream(const void* buffer, size_t bufferSize, U32 seed) 653 { 654 XXH3_state_t state; 655 (void)seed; 656 XXH3_64bits_reset(&state); 657 XXH3_64bits_update(&state, buffer, bufferSize); 658 return (U32)XXH3_64bits_digest(&state); 659 } 660 static U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, U32 seed) 661 { 662 XXH3_state_t state; 663 XXH3_INITSTATE(&state); 664 XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed); 665 XXH3_64bits_update(&state, buffer, bufferSize); 666 return (U32)XXH3_64bits_digest(&state); 667 } 668 static U32 localXXH128_stream(const void* buffer, size_t bufferSize, U32 seed) 669 { 670 XXH3_state_t state; 671 (void)seed; 672 XXH3_128bits_reset(&state); 673 XXH3_128bits_update(&state, buffer, bufferSize); 674 return (U32)(XXH3_128bits_digest(&state).low64); 675 } 676 static U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, U32 seed) 677 { 678 XXH3_state_t state; 679 XXH3_INITSTATE(&state); 680 XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed); 681 XXH3_128bits_update(&state, buffer, bufferSize); 682 return (U32)(XXH3_128bits_digest(&state).low64); 683 } 684 685 686 typedef struct { 687 const char* name; 688 hashFunction func; 689 } hashInfo; 690 691 #define NB_HASHFUNC 12 692 static const hashInfo g_hashesToBench[NB_HASHFUNC] = { 693 { "XXH32", &localXXH32 }, 694 { "XXH64", &localXXH64 }, 695 { "XXH3_64b", &localXXH3_64b }, 696 { "XXH3_64b w/seed", &localXXH3_64b_seeded }, 697 { "XXH3_64b w/secret", &localXXH3_64b_secret }, 698 { "XXH128", &localXXH3_128b }, 699 { "XXH128 w/seed", &localXXH3_128b_seeded }, 700 { "XXH128 w/secret", &localXXH3_128b_secret }, 701 { "XXH3_stream", &localXXH3_stream }, 702 { "XXH3_stream w/seed",&localXXH3_stream_seeded }, 703 { "XXH128_stream", &localXXH128_stream }, 704 { "XXH128_stream w/seed",&localXXH128_stream_seeded }, 705 }; 706 707 #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC) 708 static char g_testIDs[NB_TESTFUNC] = { 0 }; 709 static const char k_testIDs_default[NB_TESTFUNC] = { 0, 710 1 /*XXH32*/, 0, 711 1 /*XXH64*/, 0, 712 1 /*XXH3*/, 0, 0, 0, 0, 0, 713 1 /*XXH128*/ }; 714 715 #define HASHNAME_MAX 29 716 static void BMK_benchHash(hashFunction h, const char* hName, int testID, 717 const void* buffer, size_t bufferSize) 718 { 719 U32 nbh_perIteration = (U32)((300 MB) / (bufferSize+1)) + 1; /* first iteration conservatively aims for 300 MB/s */ 720 unsigned iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */; 721 double fastestH = 100000000.; 722 assert(HASHNAME_MAX > 2); 723 DISPLAYLEVEL(2, "\r%80s\r", ""); /* Clean display line */ 724 725 for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) { 726 U32 r=0; 727 clock_t cStart; 728 729 DISPLAYLEVEL(2, "%2u-%-*.*s : %10u ->\r", 730 iterationNb, 731 HASHNAME_MAX, HASHNAME_MAX, hName, 732 (unsigned)bufferSize); 733 cStart = clock(); 734 while (clock() == cStart); /* starts clock() at its exact beginning */ 735 cStart = clock(); 736 737 { U32 u; 738 for (u=0; u<nbh_perIteration; u++) 739 r += h(buffer, bufferSize, u); 740 } 741 if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to defeat compiler "optimizing" hash away */ 742 743 { clock_t const nbTicks = BMK_clockSpan(cStart); 744 double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration; 745 /* 746 * clock() is the only decent portable timer, but it isn't very 747 * precise. 748 * 749 * Sometimes, this lack of precision is enough that the benchmark 750 * finishes before there are enough ticks to get a meaningful result. 751 * 752 * For example, on a Core 2 Duo (without any sort of Turbo Boost), 753 * the imprecise timer caused peculiar results like so: 754 * 755 * XXH3_64b 4800.0 MB/s // conveniently even 756 * XXH3_64b unaligned 4800.0 MB/s 757 * XXH3_64b seeded 9600.0 MB/s // magical 2x speedup?! 758 * XXH3_64b seeded unaligned 4800.0 MB/s 759 * 760 * If we sense a suspiciously low number of ticks, we increase the 761 * iterations until we can get something meaningful. 762 */ 763 if (nbTicks < TIMELOOP_MIN) { 764 /* Not enough time spent in benchmarking, risk of rounding bias */ 765 if (nbTicks == 0) { /* faster than resolution timer */ 766 nbh_perIteration *= 100; 767 } else { 768 /* 769 * update nbh_perIteration so that the next round lasts 770 * approximately 1 second. 771 */ 772 double nbh_perSecond = (1 / ticksPerHash) + 1; 773 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */ 774 nbh_perIteration = (U32)nbh_perSecond; 775 } 776 /* g_nbIterations==0 => quick evaluation, no claim of accuracy */ 777 if (g_nbIterations>0) { 778 iterationNb--; /* new round for a more accurate speed evaluation */ 779 continue; 780 } 781 } 782 if (ticksPerHash < fastestH) fastestH = ticksPerHash; 783 if (fastestH>0.) { /* avoid div by zero */ 784 DISPLAYLEVEL(2, "%2u-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r", 785 iterationNb, 786 HASHNAME_MAX, HASHNAME_MAX, hName, 787 (unsigned)bufferSize, 788 (double)1 / fastestH, 789 ((double)bufferSize / (1 MB)) / fastestH); 790 } } 791 { double nbh_perSecond = (1 / fastestH) + 1; 792 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */ 793 nbh_perIteration = (U32)nbh_perSecond; 794 } 795 } 796 DISPLAYLEVEL(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n", 797 testID, 798 HASHNAME_MAX, HASHNAME_MAX, hName, 799 (unsigned)bufferSize, 800 (double)1 / fastestH, 801 ((double)bufferSize / (1 MB)) / fastestH); 802 if (g_displayLevel<1) 803 DISPLAYLEVEL(0, "%u, ", (unsigned)((double)1 / fastestH)); 804 } 805 806 807 /*! 808 * BMK_benchMem(): 809 * buffer: Must be 16-byte aligned. 810 * The real allocated size of buffer is supposed to be >= (bufferSize+3). 811 * returns: 0 on success, 1 if error (invalid mode selected) 812 */ 813 static void BMK_benchMem(const void* buffer, size_t bufferSize) 814 { 815 assert((((size_t)buffer) & 15) == 0); /* ensure alignment */ 816 BMK_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf)); 817 { int i; 818 for (i = 1; i < NB_TESTFUNC; i++) { 819 int const hashFuncID = (i-1) / 2; 820 assert(g_hashesToBench[hashFuncID].name != NULL); 821 if (g_testIDs[i] == 0) continue; 822 /* aligned */ 823 if ((i % 2) == 1) { 824 BMK_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize); 825 } 826 /* unaligned */ 827 if ((i % 2) == 0) { 828 /* Append "unaligned". */ 829 char* const hashNameBuf = XXH_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned"); 830 assert(hashNameBuf != NULL); 831 BMK_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize); 832 free(hashNameBuf); 833 } 834 } } 835 } 836 837 static size_t BMK_selectBenchedSize(const char* fileName) 838 { 839 U64 const inFileSize = BMK_GetFileSize(fileName); 840 size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize); 841 if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; 842 if (benchedSize < inFileSize) { 843 DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20)); 844 } 845 return benchedSize; 846 } 847 848 849 static int BMK_benchFiles(const char*const* fileNamesTable, int nbFiles) 850 { 851 int fileIdx; 852 for (fileIdx=0; fileIdx<nbFiles; fileIdx++) { 853 const char* const inFileName = fileNamesTable[fileIdx]; 854 assert(inFileName != NULL); 855 856 { FILE* const inFile = XXH_fopen( inFileName, "rb" ); 857 size_t const benchedSize = BMK_selectBenchedSize(inFileName); 858 char* const buffer = (char*)calloc(benchedSize+16+3, 1); 859 void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */ 860 861 /* Checks */ 862 if (inFile==NULL){ 863 DISPLAY("Error: Could not open '%s': %s.\n", inFileName, strerror(errno)); 864 free(buffer); 865 exit(11); 866 } 867 if(!buffer) { 868 DISPLAY("\nError: Out of memory.\n"); 869 fclose(inFile); 870 exit(12); 871 } 872 873 /* Fill input buffer */ 874 { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile); 875 fclose(inFile); 876 if(readSize != benchedSize) { 877 DISPLAY("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno)); 878 free(buffer); 879 exit(13); 880 } } 881 882 /* bench */ 883 BMK_benchMem(alignedBuffer, benchedSize); 884 885 free(buffer); 886 } } 887 return 0; 888 } 889 890 891 static int BMK_benchInternal(size_t keySize) 892 { 893 void* const buffer = calloc(keySize+16+3, 1); 894 if (buffer == NULL) { 895 DISPLAY("\nError: Out of memory.\n"); 896 exit(12); 897 } 898 899 { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */ 900 901 /* bench */ 902 DISPLAYLEVEL(1, "Sample of "); 903 if (keySize > 10 KB) { 904 DISPLAYLEVEL(1, "%u KB", (unsigned)(keySize >> 10)); 905 } else { 906 DISPLAYLEVEL(1, "%u bytes", (unsigned)keySize); 907 } 908 DISPLAYLEVEL(1, "... \n"); 909 910 BMK_benchMem(alignedBuffer, keySize); 911 free(buffer); 912 } 913 return 0; 914 } 915 916 917 /* ************************************************ 918 * Self-test: 919 * ensure results consistency accross platforms 920 *********************************************** */ 921 922 static void BMK_checkResult32(XXH32_hash_t r1, XXH32_hash_t r2) 923 { 924 static int nbTests = 1; 925 if (r1!=r2) { 926 DISPLAY("\rError: 32-bit hash test %i: Internal sanity check failed!\n", nbTests); 927 DISPLAY("\rGot 0x%08X, expected 0x%08X.\n", (unsigned)r1, (unsigned)r2); 928 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n" 929 "or temporarily comment out the tests in BMK_sanityCheck.\n"); 930 exit(1); 931 } 932 nbTests++; 933 } 934 935 static void BMK_checkResult64(XXH64_hash_t r1, XXH64_hash_t r2) 936 { 937 static int nbTests = 1; 938 if (r1!=r2) { 939 DISPLAY("\rError: 64-bit hash test %i: Internal sanity check failed!\n", nbTests); 940 DISPLAY("\rGot 0x%08X%08XULL, expected 0x%08X%08XULL.\n", 941 (unsigned)(r1>>32), (unsigned)r1, (unsigned)(r2>>32), (unsigned)r2); 942 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n" 943 "or temporarily comment out the tests in BMK_sanityCheck.\n"); 944 exit(1); 945 } 946 nbTests++; 947 } 948 949 static void BMK_checkResult128(XXH128_hash_t r1, XXH128_hash_t r2) 950 { 951 static int nbTests = 1; 952 if ((r1.low64 != r2.low64) || (r1.high64 != r2.high64)) { 953 DISPLAY("\rError: 128-bit hash test %i: Internal sanity check failed.\n", nbTests); 954 DISPLAY("\rGot { 0x%08X%08XULL, 0x%08X%08XULL }, expected { 0x%08X%08XULL, 0x%08X%08XULL } \n", 955 (unsigned)(r1.low64>>32), (unsigned)r1.low64, (unsigned)(r1.high64>>32), (unsigned)r1.high64, 956 (unsigned)(r2.low64>>32), (unsigned)r2.low64, (unsigned)(r2.high64>>32), (unsigned)r2.high64 ); 957 DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n" 958 "or temporarily comment out the tests in BMK_sanityCheck.\n"); 959 exit(1); 960 } 961 nbTests++; 962 } 963 964 965 static void BMK_testXXH32(const void* data, size_t len, U32 seed, U32 Nresult) 966 { 967 XXH32_state_t *state = XXH32_createState(); 968 size_t pos; 969 970 assert(state != NULL); 971 if (len>0) assert(data != NULL); 972 973 BMK_checkResult32(XXH32(data, len, seed), Nresult); 974 975 (void)XXH32_reset(state, seed); 976 (void)XXH32_update(state, data, len); 977 BMK_checkResult32(XXH32_digest(state), Nresult); 978 979 (void)XXH32_reset(state, seed); 980 for (pos=0; pos<len; pos++) 981 (void)XXH32_update(state, ((const char*)data)+pos, 1); 982 BMK_checkResult32(XXH32_digest(state), Nresult); 983 XXH32_freeState(state); 984 } 985 986 static void BMK_testXXH64(const void* data, size_t len, U64 seed, U64 Nresult) 987 { 988 XXH64_state_t *state = XXH64_createState(); 989 size_t pos; 990 991 assert(state != NULL); 992 if (len>0) assert(data != NULL); 993 994 BMK_checkResult64(XXH64(data, len, seed), Nresult); 995 996 (void)XXH64_reset(state, seed); 997 (void)XXH64_update(state, data, len); 998 BMK_checkResult64(XXH64_digest(state), Nresult); 999 1000 (void)XXH64_reset(state, seed); 1001 for (pos=0; pos<len; pos++) 1002 (void)XXH64_update(state, ((const char*)data)+pos, 1); 1003 BMK_checkResult64(XXH64_digest(state), Nresult); 1004 XXH64_freeState(state); 1005 } 1006 1007 static U32 BMK_rand(void) 1008 { 1009 static U64 seed = PRIME32; 1010 seed *= PRIME64; 1011 return (U32)(seed >> 40); 1012 } 1013 1014 1015 void BMK_testXXH3(const void* data, size_t len, U64 seed, U64 Nresult) 1016 { 1017 if (len>0) assert(data != NULL); 1018 1019 { U64 const Dresult = XXH3_64bits_withSeed(data, len, seed); 1020 BMK_checkResult64(Dresult, Nresult); 1021 } 1022 1023 /* check that the no-seed variant produces same result as seed==0 */ 1024 if (seed == 0) { 1025 U64 const Dresult = XXH3_64bits(data, len); 1026 BMK_checkResult64(Dresult, Nresult); 1027 } 1028 1029 /* streaming API test */ 1030 { XXH3_state_t* const state = XXH3_createState(); 1031 assert(state != NULL); 1032 /* single ingestion */ 1033 (void)XXH3_64bits_reset_withSeed(state, seed); 1034 (void)XXH3_64bits_update(state, data, len); 1035 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1036 1037 /* random ingestion */ 1038 { size_t p = 0; 1039 (void)XXH3_64bits_reset_withSeed(state, seed); 1040 while (p < len) { 1041 size_t const modulo = len > 2 ? len : 2; 1042 size_t l = (size_t)(BMK_rand()) % modulo; 1043 if (p + l > len) l = len - p; 1044 (void)XXH3_64bits_update(state, (const char*)data+p, l); 1045 p += l; 1046 } 1047 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1048 } 1049 1050 /* byte by byte ingestion */ 1051 { size_t pos; 1052 (void)XXH3_64bits_reset_withSeed(state, seed); 1053 for (pos=0; pos<len; pos++) 1054 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1); 1055 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1056 } 1057 XXH3_freeState(state); 1058 } 1059 } 1060 1061 void BMK_testXXH3_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, U64 Nresult) 1062 { 1063 if (len>0) assert(data != NULL); 1064 1065 { U64 const Dresult = XXH3_64bits_withSecret(data, len, secret, secretSize); 1066 BMK_checkResult64(Dresult, Nresult); 1067 } 1068 1069 /* streaming API test */ 1070 { XXH3_state_t *state = XXH3_createState(); 1071 assert(state != NULL); 1072 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize); 1073 (void)XXH3_64bits_update(state, data, len); 1074 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1075 1076 /* random ingestion */ 1077 { size_t p = 0; 1078 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize); 1079 while (p < len) { 1080 size_t const modulo = len > 2 ? len : 2; 1081 size_t l = (size_t)(BMK_rand()) % modulo; 1082 if (p + l > len) l = len - p; 1083 (void)XXH3_64bits_update(state, (const char*)data+p, l); 1084 p += l; 1085 } 1086 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1087 } 1088 1089 /* byte by byte ingestion */ 1090 { size_t pos; 1091 (void)XXH3_64bits_reset_withSecret(state, secret, secretSize); 1092 for (pos=0; pos<len; pos++) 1093 (void)XXH3_64bits_update(state, ((const char*)data)+pos, 1); 1094 BMK_checkResult64(XXH3_64bits_digest(state), Nresult); 1095 } 1096 XXH3_freeState(state); 1097 } 1098 } 1099 1100 void BMK_testXXH128(const void* data, size_t len, U64 seed, XXH128_hash_t Nresult) 1101 { 1102 { XXH128_hash_t const Dresult = XXH3_128bits_withSeed(data, len, seed); 1103 BMK_checkResult128(Dresult, Nresult); 1104 } 1105 1106 /* check that XXH128() is identical to XXH3_128bits_withSeed() */ 1107 { XXH128_hash_t const Dresult2 = XXH128(data, len, seed); 1108 BMK_checkResult128(Dresult2, Nresult); 1109 } 1110 1111 /* check that the no-seed variant produces same result as seed==0 */ 1112 if (seed == 0) { 1113 XXH128_hash_t const Dresult = XXH3_128bits(data, len); 1114 BMK_checkResult128(Dresult, Nresult); 1115 } 1116 1117 /* streaming API test */ 1118 { XXH3_state_t *state = XXH3_createState(); 1119 assert(state != NULL); 1120 1121 /* single ingestion */ 1122 (void)XXH3_128bits_reset_withSeed(state, seed); 1123 (void)XXH3_128bits_update(state, data, len); 1124 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1125 1126 /* random ingestion */ 1127 { size_t p = 0; 1128 (void)XXH3_128bits_reset_withSeed(state, seed); 1129 while (p < len) { 1130 size_t const modulo = len > 2 ? len : 2; 1131 size_t l = (size_t)(BMK_rand()) % modulo; 1132 if (p + l > len) l = len - p; 1133 (void)XXH3_128bits_update(state, (const char*)data+p, l); 1134 p += l; 1135 } 1136 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1137 } 1138 1139 /* byte by byte ingestion */ 1140 { size_t pos; 1141 (void)XXH3_128bits_reset_withSeed(state, seed); 1142 for (pos=0; pos<len; pos++) 1143 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1); 1144 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1145 } 1146 XXH3_freeState(state); 1147 } 1148 } 1149 1150 void BMK_testXXH128_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, XXH128_hash_t Nresult) 1151 { 1152 if (len>0) assert(data != NULL); 1153 1154 { XXH128_hash_t const Dresult = XXH3_128bits_withSecret(data, len, secret, secretSize); 1155 BMK_checkResult128(Dresult, Nresult); 1156 } 1157 1158 /* streaming API test */ 1159 { XXH3_state_t* const state = XXH3_createState(); 1160 assert(state != NULL); 1161 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize); 1162 (void)XXH3_128bits_update(state, data, len); 1163 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1164 1165 /* random ingestion */ 1166 { size_t p = 0; 1167 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize); 1168 while (p < len) { 1169 size_t const modulo = len > 2 ? len : 2; 1170 size_t l = (size_t)(BMK_rand()) % modulo; 1171 if (p + l > len) l = len - p; 1172 (void)XXH3_128bits_update(state, (const char*)data+p, l); 1173 p += l; 1174 } 1175 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1176 } 1177 1178 /* byte by byte ingestion */ 1179 { size_t pos; 1180 (void)XXH3_128bits_reset_withSecret(state, secret, secretSize); 1181 for (pos=0; pos<len; pos++) 1182 (void)XXH3_128bits_update(state, ((const char*)data)+pos, 1); 1183 BMK_checkResult128(XXH3_128bits_digest(state), Nresult); 1184 } 1185 XXH3_freeState(state); 1186 } 1187 } 1188 1189 #define SECRET_SAMPLE_NBBYTES 4 1190 typedef struct { U8 byte[SECRET_SAMPLE_NBBYTES]; } verifSample_t; 1191 1192 void BMK_testSecretGenerator(const void* customSeed, size_t len, verifSample_t result) 1193 { 1194 static int nbTests = 1; 1195 const int sampleIndex[SECRET_SAMPLE_NBBYTES] = { 0, 62, 131, 191}; 1196 U8 secretBuffer[XXH3_SECRET_DEFAULT_SIZE] = {0}; 1197 verifSample_t samples; 1198 int i; 1199 1200 XXH3_generateSecret(secretBuffer, customSeed, len); 1201 for (i=0; i<SECRET_SAMPLE_NBBYTES; i++) { 1202 samples.byte[i] = secretBuffer[sampleIndex[i]]; 1203 } 1204 if (memcmp(&samples, &result, sizeof(result))) { 1205 DISPLAY("\rError: Secret generation test %i: Internal sanity check failed. \n", nbTests); 1206 DISPLAY("\rGot { 0x%02X, 0x%02X, 0x%02X, 0x%02X }, expected { 0x%02X, 0x%02X, 0x%02X, 0x%02X } \n", 1207 samples.byte[0], samples.byte[1], samples.byte[2], samples.byte[3], 1208 result.byte[0], result.byte[1], result.byte[2], result.byte[3] ); 1209 exit(1); 1210 } 1211 nbTests++; 1212 } 1213 1214 1215 /*! 1216 * BMK_sanityCheck(): 1217 * Runs a sanity check before the benchmark. 1218 * 1219 * Exits on an incorrect output. 1220 */ 1221 static void BMK_sanityCheck(void) 1222 { 1223 #define SANITY_BUFFER_SIZE 2367 1224 U8 sanityBuffer[SANITY_BUFFER_SIZE]; 1225 BMK_fillTestBuffer(sanityBuffer, sizeof(sanityBuffer)); 1226 1227 BMK_testXXH32(NULL, 0, 0, 0x02CC5D05); 1228 BMK_testXXH32(NULL, 0, PRIME32, 0x36B78AE7); 1229 BMK_testXXH32(sanityBuffer, 1, 0, 0xCF65B03E); 1230 BMK_testXXH32(sanityBuffer, 1, PRIME32, 0xB4545AA4); 1231 BMK_testXXH32(sanityBuffer, 14, 0, 0x1208E7E2); 1232 BMK_testXXH32(sanityBuffer, 14, PRIME32, 0x6AF1D1FE); 1233 BMK_testXXH32(sanityBuffer,222, 0, 0x5BD11DBD); 1234 BMK_testXXH32(sanityBuffer,222, PRIME32, 0x58803C5F); 1235 1236 BMK_testXXH64(NULL , 0, 0, 0xEF46DB3751D8E999ULL); 1237 BMK_testXXH64(NULL , 0, PRIME32, 0xAC75FDA2929B17EFULL); 1238 BMK_testXXH64(sanityBuffer, 1, 0, 0xE934A84ADB052768ULL); 1239 BMK_testXXH64(sanityBuffer, 1, PRIME32, 0x5014607643A9B4C3ULL); 1240 BMK_testXXH64(sanityBuffer, 4, 0, 0x9136A0DCA57457EEULL); 1241 BMK_testXXH64(sanityBuffer, 14, 0, 0x8282DCC4994E35C8ULL); 1242 BMK_testXXH64(sanityBuffer, 14, PRIME32, 0xC3BD6BF63DEB6DF0ULL); 1243 BMK_testXXH64(sanityBuffer,222, 0, 0xB641AE8CB691C174ULL); 1244 BMK_testXXH64(sanityBuffer,222, PRIME32, 0x20CB8AB7AE10C14AULL); 1245 1246 BMK_testXXH3(NULL, 0, 0, 0x2D06800538D394C2ULL); /* empty string */ 1247 BMK_testXXH3(NULL, 0, PRIME64, 0xA8A6B918B2F0364AULL); 1248 BMK_testXXH3(sanityBuffer, 1, 0, 0xC44BDFF4074EECDBULL); /* 1 - 3 */ 1249 BMK_testXXH3(sanityBuffer, 1, PRIME64, 0x032BE332DD766EF8ULL); /* 1 - 3 */ 1250 BMK_testXXH3(sanityBuffer, 6, 0, 0x27B56A84CD2D7325ULL); /* 4 - 8 */ 1251 BMK_testXXH3(sanityBuffer, 6, PRIME64, 0x84589C116AB59AB9ULL); /* 4 - 8 */ 1252 BMK_testXXH3(sanityBuffer, 12, 0, 0xA713DAF0DFBB77E7ULL); /* 9 - 16 */ 1253 BMK_testXXH3(sanityBuffer, 12, PRIME64, 0xE7303E1B2336DE0EULL); /* 9 - 16 */ 1254 BMK_testXXH3(sanityBuffer, 24, 0, 0xA3FE70BF9D3510EBULL); /* 17 - 32 */ 1255 BMK_testXXH3(sanityBuffer, 24, PRIME64, 0x850E80FC35BDD690ULL); /* 17 - 32 */ 1256 BMK_testXXH3(sanityBuffer, 48, 0, 0x397DA259ECBA1F11ULL); /* 33 - 64 */ 1257 BMK_testXXH3(sanityBuffer, 48, PRIME64, 0xADC2CBAA44ACC616ULL); /* 33 - 64 */ 1258 BMK_testXXH3(sanityBuffer, 80, 0, 0xBCDEFBBB2C47C90AULL); /* 65 - 96 */ 1259 BMK_testXXH3(sanityBuffer, 80, PRIME64, 0xC6DD0CB699532E73ULL); /* 65 - 96 */ 1260 BMK_testXXH3(sanityBuffer, 195, 0, 0xCD94217EE362EC3AULL); /* 129-240 */ 1261 BMK_testXXH3(sanityBuffer, 195, PRIME64, 0xBA68003D370CB3D9ULL); /* 129-240 */ 1262 1263 BMK_testXXH3(sanityBuffer, 403, 0, 0xCDEB804D65C6DEA4ULL); /* one block, last stripe is overlapping */ 1264 BMK_testXXH3(sanityBuffer, 403, PRIME64, 0x6259F6ECFD6443FDULL); /* one block, last stripe is overlapping */ 1265 BMK_testXXH3(sanityBuffer, 512, 0, 0x617E49599013CB6BULL); /* one block, finishing at stripe boundary */ 1266 BMK_testXXH3(sanityBuffer, 512, PRIME64, 0x3CE457DE14C27708ULL); /* one block, finishing at stripe boundary */ 1267 BMK_testXXH3(sanityBuffer,2048, 0, 0xDD59E2C3A5F038E0ULL); /* 2 blocks, finishing at block boundary */ 1268 BMK_testXXH3(sanityBuffer,2048, PRIME64, 0x66F81670669ABABCULL); /* 2 blocks, finishing at block boundary */ 1269 BMK_testXXH3(sanityBuffer,2240, 0, 0x6E73A90539CF2948ULL); /* 3 blocks, finishing at stripe boundary */ 1270 BMK_testXXH3(sanityBuffer,2240, PRIME64, 0x757BA8487D1B5247ULL); /* 3 blocks, finishing at stripe boundary */ 1271 BMK_testXXH3(sanityBuffer,2367, 0, 0xCB37AEB9E5D361EDULL); /* 3 blocks, last stripe is overlapping */ 1272 BMK_testXXH3(sanityBuffer,2367, PRIME64, 0xD2DB3415B942B42AULL); /* 3 blocks, last stripe is overlapping */ 1273 1274 /* XXH3 with Custom Secret */ 1275 { const void* const secret = sanityBuffer + 7; 1276 const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11; 1277 assert(sizeof(sanityBuffer) >= 7 + secretSize); 1278 BMK_testXXH3_withSecret(NULL, 0, secret, secretSize, 0x3559D64878C5C66CULL); /* empty string */ 1279 BMK_testXXH3_withSecret(sanityBuffer, 1, secret, secretSize, 0x8A52451418B2DA4DULL); /* 1 - 3 */ 1280 BMK_testXXH3_withSecret(sanityBuffer, 6, secret, secretSize, 0x82C90AB0519369ADULL); /* 4 - 8 */ 1281 BMK_testXXH3_withSecret(sanityBuffer, 12, secret, secretSize, 0x14631E773B78EC57ULL); /* 9 - 16 */ 1282 BMK_testXXH3_withSecret(sanityBuffer, 24, secret, secretSize, 0xCDD5542E4A9D9FE8ULL); /* 17 - 32 */ 1283 BMK_testXXH3_withSecret(sanityBuffer, 48, secret, secretSize, 0x33ABD54D094B2534ULL); /* 33 - 64 */ 1284 BMK_testXXH3_withSecret(sanityBuffer, 80, secret, secretSize, 0xE687BA1684965297ULL); /* 65 - 96 */ 1285 BMK_testXXH3_withSecret(sanityBuffer, 195, secret, secretSize, 0xA057273F5EECFB20ULL); /* 129-240 */ 1286 1287 BMK_testXXH3_withSecret(sanityBuffer, 403, secret, secretSize, 0x14546019124D43B8ULL); /* one block, last stripe is overlapping */ 1288 BMK_testXXH3_withSecret(sanityBuffer, 512, secret, secretSize, 0x7564693DD526E28DULL); /* one block, finishing at stripe boundary */ 1289 BMK_testXXH3_withSecret(sanityBuffer,2048, secret, secretSize, 0xD32E975821D6519FULL); /* >= 2 blocks, at least one scrambling */ 1290 BMK_testXXH3_withSecret(sanityBuffer,2367, secret, secretSize, 0x293FA8E5173BB5E7ULL); /* >= 2 blocks, at least one scrambling, last stripe unaligned */ 1291 1292 BMK_testXXH3_withSecret(sanityBuffer,64*10*3, secret, secretSize, 0x751D2EC54BC6038BULL); /* exactly 3 full blocks, not a multiple of 256 */ 1293 } 1294 1295 /* XXH128 */ 1296 { XXH128_hash_t const expected = { 0x6001C324468D497FULL, 0x99AA06D3014798D8ULL }; 1297 BMK_testXXH128(NULL, 0, 0, expected); /* empty string */ 1298 } 1299 { XXH128_hash_t const expected = { 0x5444F7869C671AB0ULL, 0x92220AE55E14AB50ULL }; 1300 BMK_testXXH128(NULL, 0, PRIME32, expected); 1301 } 1302 { XXH128_hash_t const expected = { 0xC44BDFF4074EECDBULL, 0xA6CD5E9392000F6AULL }; 1303 BMK_testXXH128(sanityBuffer, 1, 0, expected); /* 1-3 */ 1304 } 1305 { XXH128_hash_t const expected = { 0xB53D5557E7F76F8DULL, 0x89B99554BA22467CULL }; 1306 BMK_testXXH128(sanityBuffer, 1, PRIME32, expected); /* 1-3 */ 1307 } 1308 { XXH128_hash_t const expected = { 0x3E7039BDDA43CFC6ULL, 0x082AFE0B8162D12AULL }; 1309 BMK_testXXH128(sanityBuffer, 6, 0, expected); /* 4-8 */ 1310 } 1311 { XXH128_hash_t const expected = { 0x269D8F70BE98856EULL, 0x5A865B5389ABD2B1ULL }; 1312 BMK_testXXH128(sanityBuffer, 6, PRIME32, expected); /* 4-8 */ 1313 } 1314 { XXH128_hash_t const expected = { 0x061A192713F69AD9ULL, 0x6E3EFD8FC7802B18ULL }; 1315 BMK_testXXH128(sanityBuffer, 12, 0, expected); /* 9-16 */ 1316 } 1317 { XXH128_hash_t const expected = { 0x9BE9F9A67F3C7DFBULL, 0xD7E09D518A3405D3ULL }; 1318 BMK_testXXH128(sanityBuffer, 12, PRIME32, expected); /* 9-16 */ 1319 } 1320 { XXH128_hash_t const expected = { 0x1E7044D28B1B901DULL, 0x0CE966E4678D3761ULL }; 1321 BMK_testXXH128(sanityBuffer, 24, 0, expected); /* 17-32 */ 1322 } 1323 { XXH128_hash_t const expected = { 0xD7304C54EBAD40A9ULL, 0x3162026714A6A243ULL }; 1324 BMK_testXXH128(sanityBuffer, 24, PRIME32, expected); /* 17-32 */ 1325 } 1326 { XXH128_hash_t const expected = { 0xF942219AED80F67BULL, 0xA002AC4E5478227EULL }; 1327 BMK_testXXH128(sanityBuffer, 48, 0, expected); /* 33-64 */ 1328 } 1329 { XXH128_hash_t const expected = { 0x7BA3C3E453A1934EULL, 0x163ADDE36C072295ULL }; 1330 BMK_testXXH128(sanityBuffer, 48, PRIME32, expected); /* 33-64 */ 1331 } 1332 { XXH128_hash_t const expected = { 0x5E8BAFB9F95FB803ULL, 0x4952F58181AB0042ULL }; 1333 BMK_testXXH128(sanityBuffer, 81, 0, expected); /* 65-96 */ 1334 } 1335 { XXH128_hash_t const expected = { 0x703FBB3D7A5F755CULL, 0x2724EC7ADC750FB6ULL }; 1336 BMK_testXXH128(sanityBuffer, 81, PRIME32, expected); /* 65-96 */ 1337 } 1338 { XXH128_hash_t const expected = { 0xF1AEBD597CEC6B3AULL, 0x337E09641B948717ULL }; 1339 BMK_testXXH128(sanityBuffer, 222, 0, expected); /* 129-240 */ 1340 } 1341 { XXH128_hash_t const expected = { 0xAE995BB8AF917A8DULL, 0x91820016621E97F1ULL }; 1342 BMK_testXXH128(sanityBuffer, 222, PRIME32, expected); /* 129-240 */ 1343 } 1344 { XXH128_hash_t const expected = { 0xCDEB804D65C6DEA4ULL, 0x1B6DE21E332DD73DULL }; 1345 BMK_testXXH128(sanityBuffer, 403, 0, expected); /* one block, last stripe is overlapping */ 1346 } 1347 { XXH128_hash_t const expected = { 0x6259F6ECFD6443FDULL, 0xBED311971E0BE8F2ULL }; 1348 BMK_testXXH128(sanityBuffer, 403, PRIME64, expected); /* one block, last stripe is overlapping */ 1349 } 1350 { XXH128_hash_t const expected = { 0x617E49599013CB6BULL, 0x18D2D110DCC9BCA1ULL }; 1351 BMK_testXXH128(sanityBuffer, 512, 0, expected); /* one block, finishing at stripe boundary */ 1352 } 1353 { XXH128_hash_t const expected = { 0x3CE457DE14C27708ULL, 0x925D06B8EC5B8040ULL }; 1354 BMK_testXXH128(sanityBuffer, 512, PRIME64, expected); /* one block, finishing at stripe boundary */ 1355 } 1356 { XXH128_hash_t const expected = { 0xDD59E2C3A5F038E0ULL, 0xF736557FD47073A5ULL }; 1357 BMK_testXXH128(sanityBuffer,2048, 0, expected); /* two blocks, finishing at block boundary */ 1358 } 1359 { XXH128_hash_t const expected = { 0x230D43F30206260BULL, 0x7FB03F7E7186C3EAULL }; 1360 BMK_testXXH128(sanityBuffer,2048, PRIME32, expected); /* two blocks, finishing at block boundary */ 1361 } 1362 { XXH128_hash_t const expected = { 0x6E73A90539CF2948ULL, 0xCCB134FBFA7CE49DULL }; 1363 BMK_testXXH128(sanityBuffer,2240, 0, expected); /* two blocks, ends at stripe boundary */ 1364 } 1365 { XXH128_hash_t const expected = { 0xED385111126FBA6FULL, 0x50A1FE17B338995FULL }; 1366 BMK_testXXH128(sanityBuffer,2240, PRIME32, expected); /* two blocks, ends at stripe boundary */ 1367 } 1368 { XXH128_hash_t const expected = { 0xCB37AEB9E5D361EDULL, 0xE89C0F6FF369B427ULL }; 1369 BMK_testXXH128(sanityBuffer,2367, 0, expected); /* two blocks, last stripe is overlapping */ 1370 } 1371 { XXH128_hash_t const expected = { 0x6F5360AE69C2F406ULL, 0xD23AAE4B76C31ECBULL }; 1372 BMK_testXXH128(sanityBuffer,2367, PRIME32, expected); /* two blocks, last stripe is overlapping */ 1373 } 1374 1375 /* XXH128 with custom Secret */ 1376 { const void* const secret = sanityBuffer + 7; 1377 const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11; 1378 assert(sizeof(sanityBuffer) >= 7 + secretSize); 1379 1380 { XXH128_hash_t const expected = { 0x005923CCEECBE8AEULL, 0x5F70F4EA232F1D38ULL }; 1381 BMK_testXXH128_withSecret(NULL, 0, secret, secretSize, expected); /* empty string */ 1382 } 1383 { XXH128_hash_t const expected = { 0x8A52451418B2DA4DULL, 0x3A66AF5A9819198EULL }; 1384 BMK_testXXH128_withSecret(sanityBuffer, 1, secret, secretSize, expected); /* 1-3 */ 1385 } 1386 { XXH128_hash_t const expected = { 0x0B61C8ACA7D4778FULL, 0x376BD91B6432F36DULL }; 1387 BMK_testXXH128_withSecret(sanityBuffer, 6, secret, secretSize, expected); /* 4-8 */ 1388 } 1389 { XXH128_hash_t const expected = { 0xAF82F6EBA263D7D8ULL, 0x90A3C2D839F57D0FULL }; 1390 BMK_testXXH128_withSecret(sanityBuffer, 12, secret, secretSize, expected); /* 9-16 */ 1391 } 1392 } 1393 1394 /* secret generator */ 1395 { verifSample_t const expected = { { 0xB8, 0x26, 0x83, 0x7E } }; 1396 BMK_testSecretGenerator(NULL, 0, expected); 1397 } 1398 1399 { verifSample_t const expected = { { 0xA6, 0x16, 0x06, 0x7B } }; 1400 BMK_testSecretGenerator(sanityBuffer, 1, expected); 1401 } 1402 1403 { verifSample_t const expected = { { 0xDA, 0x2A, 0x12, 0x11 } }; 1404 BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_SIZE_MIN - 1, expected); 1405 } 1406 1407 { verifSample_t const expected = { { 0x7E, 0x48, 0x0C, 0xA7 } }; 1408 BMK_testSecretGenerator(sanityBuffer, XXH3_SECRET_DEFAULT_SIZE + 500, expected); 1409 } 1410 1411 DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */ 1412 DISPLAYLEVEL(3, "Sanity check -- all tests ok\n"); 1413 } 1414 1415 1416 /* ******************************************************** 1417 * File Hashing 1418 **********************************************************/ 1419 #if defined(_MSC_VER) 1420 typedef struct __stat64 stat_t; 1421 typedef int mode_t; 1422 #else 1423 typedef struct stat stat_t; 1424 #endif 1425 1426 #include <sys/types.h> /* struct stat / __start64 */ 1427 #include <sys/stat.h> /* stat() / _stat64() */ 1428 1429 int XSUM_isDirectory(const char* infilename) 1430 { 1431 stat_t statbuf; 1432 #if defined(_MSC_VER) 1433 int const r = _stat64(infilename, &statbuf); 1434 if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; 1435 #else 1436 int const r = stat(infilename, &statbuf); 1437 if (!r && S_ISDIR(statbuf.st_mode)) return 1; 1438 #endif 1439 return 0; 1440 } 1441 1442 /* for support of --little-endian display mode */ 1443 static void BMK_display_LittleEndian(const void* ptr, size_t length) 1444 { 1445 const U8* const p = (const U8*)ptr; 1446 size_t idx; 1447 for (idx=length-1; idx<length; idx--) /* intentional underflow to negative to detect end */ 1448 DISPLAYRESULT("%02x", p[idx]); 1449 } 1450 1451 static void BMK_display_BigEndian(const void* ptr, size_t length) 1452 { 1453 const U8* const p = (const U8*)ptr; 1454 size_t idx; 1455 for (idx=0; idx<length; idx++) 1456 DISPLAYRESULT("%02x", p[idx]); 1457 } 1458 1459 typedef union { 1460 XXH32_hash_t xxh32; 1461 XXH64_hash_t xxh64; 1462 XXH128_hash_t xxh128; 1463 } Multihash; 1464 1465 /* 1466 * XSUM_hashStream: 1467 * Reads data from `inFile`, generating an incremental hash of type hashType, 1468 * using `buffer` of size `blockSize` for temporary storage. 1469 */ 1470 static Multihash 1471 XSUM_hashStream(FILE* inFile, 1472 AlgoSelected hashType, 1473 void* buffer, size_t blockSize) 1474 { 1475 XXH32_state_t state32; 1476 XXH64_state_t state64; 1477 XXH3_state_t state128; 1478 1479 /* Init */ 1480 (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED); 1481 (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED); 1482 (void)XXH3_128bits_reset(&state128); 1483 1484 /* Load file & update hash */ 1485 { size_t readSize; 1486 while ((readSize = fread(buffer, 1, blockSize, inFile)) > 0) { 1487 switch(hashType) 1488 { 1489 case algo_xxh32: 1490 (void)XXH32_update(&state32, buffer, readSize); 1491 break; 1492 case algo_xxh64: 1493 (void)XXH64_update(&state64, buffer, readSize); 1494 break; 1495 case algo_xxh128: 1496 (void)XXH3_128bits_update(&state128, buffer, readSize); 1497 break; 1498 default: 1499 assert(0); 1500 } 1501 } 1502 if (ferror(inFile)) { 1503 DISPLAY("Error: a failure occurred reading the input file.\n"); 1504 exit(1); 1505 } } 1506 1507 { Multihash finalHash = {0}; 1508 switch(hashType) 1509 { 1510 case algo_xxh32: 1511 finalHash.xxh32 = XXH32_digest(&state32); 1512 break; 1513 case algo_xxh64: 1514 finalHash.xxh64 = XXH64_digest(&state64); 1515 break; 1516 case algo_xxh128: 1517 finalHash.xxh128 = XXH3_128bits_digest(&state128); 1518 break; 1519 default: 1520 assert(0); 1521 } 1522 return finalHash; 1523 } 1524 } 1525 1526 /* algo_xxh32, algo_xxh64, algo_xxh128 */ 1527 static const char* XSUM_algoName[] = { "XXH32", "XXH64", "XXH128" }; 1528 static const char* XSUM_algoLE_name[] = { "XXH32_LE", "XXH64_LE", "XXH128_LE" }; 1529 static const size_t XSUM_algoLength[] = { 4, 8, 16 }; 1530 1531 #define XSUM_TABLE_ELT_SIZE(table) (sizeof(table) / sizeof(*table)) 1532 1533 typedef void (*XSUM_displayHash_f)(const void*, size_t); /* display function signature */ 1534 1535 static void XSUM_printLine_BSD_internal(const char* filename, 1536 const void* canonicalHash, const AlgoSelected hashType, 1537 const char* algoString[], 1538 XSUM_displayHash_f f_displayHash) 1539 { 1540 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName)); 1541 { const char* const typeString = algoString[hashType]; 1542 const size_t hashLength = XSUM_algoLength[hashType]; 1543 DISPLAYRESULT("%s (%s) = ", typeString, filename); 1544 f_displayHash(canonicalHash, hashLength); 1545 DISPLAYRESULT("\n"); 1546 } } 1547 1548 static void XSUM_printLine_BSD_LE(const char* filename, const void* canonicalHash, const AlgoSelected hashType) 1549 { 1550 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoLE_name, BMK_display_LittleEndian); 1551 } 1552 1553 static void XSUM_printLine_BSD(const char* filename, const void* canonicalHash, const AlgoSelected hashType) 1554 { 1555 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoName, BMK_display_BigEndian); 1556 } 1557 1558 static void XSUM_printLine_GNU_internal(const char* filename, 1559 const void* canonicalHash, const AlgoSelected hashType, 1560 XSUM_displayHash_f f_displayHash) 1561 { 1562 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName)); 1563 { const size_t hashLength = XSUM_algoLength[hashType]; 1564 f_displayHash(canonicalHash, hashLength); 1565 DISPLAYRESULT(" %s\n", filename); 1566 } } 1567 1568 static void XSUM_printLine_GNU(const char* filename, 1569 const void* canonicalHash, const AlgoSelected hashType) 1570 { 1571 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_BigEndian); 1572 } 1573 1574 static void XSUM_printLine_GNU_LE(const char* filename, 1575 const void* canonicalHash, const AlgoSelected hashType) 1576 { 1577 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, BMK_display_LittleEndian); 1578 } 1579 1580 typedef enum { big_endian, little_endian} Display_endianess; 1581 1582 typedef enum { display_gnu, display_bsd } Display_convention; 1583 1584 typedef void (*XSUM_displayLine_f)(const char*, const void*, AlgoSelected); /* line display signature */ 1585 1586 static XSUM_displayLine_f XSUM_kDisplayLine_fTable[2][2] = { 1587 { XSUM_printLine_GNU, XSUM_printLine_GNU_LE }, 1588 { XSUM_printLine_BSD, XSUM_printLine_BSD_LE } 1589 }; 1590 1591 static int XSUM_hashFile(const char* fileName, 1592 const AlgoSelected hashType, 1593 const Display_endianess displayEndianess, 1594 const Display_convention convention) 1595 { 1596 size_t const blockSize = 64 KB; 1597 XSUM_displayLine_f const f_displayLine = XSUM_kDisplayLine_fTable[convention][displayEndianess]; 1598 FILE* inFile; 1599 Multihash hashValue; 1600 assert(displayEndianess==big_endian || displayEndianess==little_endian); 1601 assert(convention==display_gnu || convention==display_bsd); 1602 1603 /* Check file existence */ 1604 if (fileName == stdinName) { 1605 inFile = stdin; 1606 fileName = "stdin"; 1607 SET_BINARY_MODE(stdin); 1608 } else { 1609 if (XSUM_isDirectory(fileName)) { 1610 DISPLAY("xxhsum: %s: Is a directory \n", fileName); 1611 return 1; 1612 } 1613 inFile = XXH_fopen( fileName, "rb" ); 1614 if (inFile==NULL) { 1615 DISPLAY("Error: Could not open '%s': %s. \n", fileName, strerror(errno)); 1616 return 1; 1617 } } 1618 1619 /* Memory allocation & streaming */ 1620 { void* const buffer = malloc(blockSize); 1621 if (buffer == NULL) { 1622 DISPLAY("\nError: Out of memory.\n"); 1623 fclose(inFile); 1624 return 1; 1625 } 1626 1627 /* Stream file & update hash */ 1628 hashValue = XSUM_hashStream(inFile, hashType, buffer, blockSize); 1629 1630 fclose(inFile); 1631 free(buffer); 1632 } 1633 1634 /* display Hash value in selected format */ 1635 switch(hashType) 1636 { 1637 case algo_xxh32: 1638 { XXH32_canonical_t hcbe32; 1639 (void)XXH32_canonicalFromHash(&hcbe32, hashValue.xxh32); 1640 f_displayLine(fileName, &hcbe32, hashType); 1641 break; 1642 } 1643 case algo_xxh64: 1644 { XXH64_canonical_t hcbe64; 1645 (void)XXH64_canonicalFromHash(&hcbe64, hashValue.xxh64); 1646 f_displayLine(fileName, &hcbe64, hashType); 1647 break; 1648 } 1649 case algo_xxh128: 1650 { XXH128_canonical_t hcbe128; 1651 (void)XXH128_canonicalFromHash(&hcbe128, hashValue.xxh128); 1652 f_displayLine(fileName, &hcbe128, hashType); 1653 break; 1654 } 1655 default: 1656 assert(0); /* not possible */ 1657 } 1658 1659 return 0; 1660 } 1661 1662 1663 /* 1664 * XSUM_hashFiles: 1665 * If fnTotal==0, read from stdin instead. 1666 */ 1667 static int XSUM_hashFiles(const char*const * fnList, int fnTotal, 1668 AlgoSelected hashType, 1669 Display_endianess displayEndianess, 1670 Display_convention convention) 1671 { 1672 int fnNb; 1673 int result = 0; 1674 1675 if (fnTotal==0) 1676 return XSUM_hashFile(stdinName, hashType, displayEndianess, convention); 1677 1678 for (fnNb=0; fnNb<fnTotal; fnNb++) 1679 result |= XSUM_hashFile(fnList[fnNb], hashType, displayEndianess, convention); 1680 DISPLAYLEVEL(2, "\r%70s\r", ""); 1681 return result; 1682 } 1683 1684 1685 typedef enum { 1686 GetLine_ok, 1687 GetLine_eof, 1688 GetLine_exceedMaxLineLength, 1689 GetLine_outOfMemory 1690 } GetLineResult; 1691 1692 typedef enum { 1693 CanonicalFromString_ok, 1694 CanonicalFromString_invalidFormat 1695 } CanonicalFromStringResult; 1696 1697 typedef enum { 1698 ParseLine_ok, 1699 ParseLine_invalidFormat 1700 } ParseLineResult; 1701 1702 typedef enum { 1703 LineStatus_hashOk, 1704 LineStatus_hashFailed, 1705 LineStatus_failedToOpen 1706 } LineStatus; 1707 1708 typedef union { 1709 XXH32_canonical_t xxh32; 1710 XXH64_canonical_t xxh64; 1711 XXH128_canonical_t xxh128; 1712 } Canonical; 1713 1714 typedef struct { 1715 Canonical canonical; 1716 const char* filename; 1717 int xxhBits; /* canonical type: 32:xxh32, 64:xxh64, 128:xxh128 */ 1718 } ParsedLine; 1719 1720 typedef struct { 1721 unsigned long nProperlyFormattedLines; 1722 unsigned long nImproperlyFormattedLines; 1723 unsigned long nMismatchedChecksums; 1724 unsigned long nOpenOrReadFailures; 1725 unsigned long nMixedFormatLines; 1726 int quit; 1727 } ParseFileReport; 1728 1729 typedef struct { 1730 const char* inFileName; 1731 FILE* inFile; 1732 int lineMax; 1733 char* lineBuf; 1734 size_t blockSize; 1735 char* blockBuf; 1736 U32 strictMode; 1737 U32 statusOnly; 1738 U32 warn; 1739 U32 quiet; 1740 ParseFileReport report; 1741 } ParseFileArg; 1742 1743 1744 /* 1745 * Reads a line from stream `inFile`. 1746 * Returns GetLine_ok, if it reads line successfully. 1747 * Returns GetLine_eof, if stream reaches EOF. 1748 * Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH. 1749 * Returns GetLine_outOfMemory, if line buffer memory allocation failed. 1750 */ 1751 static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile) 1752 { 1753 GetLineResult result = GetLine_ok; 1754 size_t len = 0; 1755 1756 if ((*lineBuf == NULL) || (*lineMax<1)) { 1757 free(*lineBuf); /* in case it's != NULL */ 1758 *lineMax = 0; 1759 *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH); 1760 if(*lineBuf == NULL) return GetLine_outOfMemory; 1761 *lineMax = DEFAULT_LINE_LENGTH; 1762 } 1763 1764 for (;;) { 1765 const int c = fgetc(inFile); 1766 if (c == EOF) { 1767 /* 1768 * If we meet EOF before first character, returns GetLine_eof, 1769 * otherwise GetLine_ok. 1770 */ 1771 if (len == 0) result = GetLine_eof; 1772 break; 1773 } 1774 1775 /* Make enough space for len+1 (for final NUL) bytes. */ 1776 if (len+1 >= (size_t)*lineMax) { 1777 char* newLineBuf = NULL; 1778 size_t newBufSize = (size_t)*lineMax; 1779 1780 newBufSize += (newBufSize/2) + 1; /* x 1.5 */ 1781 if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH; 1782 if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength; 1783 1784 newLineBuf = (char*) realloc(*lineBuf, newBufSize); 1785 if (newLineBuf == NULL) return GetLine_outOfMemory; 1786 1787 *lineBuf = newLineBuf; 1788 *lineMax = (int)newBufSize; 1789 } 1790 1791 if (c == '\n') break; 1792 (*lineBuf)[len++] = (char) c; 1793 } 1794 1795 (*lineBuf)[len] = '\0'; 1796 return result; 1797 } 1798 1799 1800 /* 1801 * Converts one hexadecimal character to integer. 1802 * Returns -1 if the given character is not hexadecimal. 1803 */ 1804 static int charToHex(char c) 1805 { 1806 int result = -1; 1807 if (c >= '0' && c <= '9') { 1808 result = (int) (c - '0'); 1809 } else if (c >= 'A' && c <= 'F') { 1810 result = (int) (c - 'A') + 0x0a; 1811 } else if (c >= 'a' && c <= 'f') { 1812 result = (int) (c - 'a') + 0x0a; 1813 } 1814 return result; 1815 } 1816 1817 1818 /* 1819 * Converts canonical ASCII hexadecimal string `hashStr` 1820 * to the big endian binary representation in unsigned char array `dst`. 1821 * 1822 * Returns CanonicalFromString_invalidFormat if hashStr is not well formatted. 1823 * Returns CanonicalFromString_ok if hashStr is parsed successfully. 1824 */ 1825 static CanonicalFromStringResult canonicalFromString(unsigned char* dst, 1826 size_t dstSize, 1827 const char* hashStr, 1828 int reverseBytes) 1829 { 1830 size_t i; 1831 for (i = 0; i < dstSize; ++i) { 1832 int h0, h1; 1833 size_t j = reverseBytes ? dstSize - i - 1 : i; 1834 1835 h0 = charToHex(hashStr[j*2 + 0]); 1836 if (h0 < 0) return CanonicalFromString_invalidFormat; 1837 1838 h1 = charToHex(hashStr[j*2 + 1]); 1839 if (h1 < 0) return CanonicalFromString_invalidFormat; 1840 1841 dst[i] = (unsigned char) ((h0 << 4) | h1); 1842 } 1843 return CanonicalFromString_ok; 1844 } 1845 1846 1847 /* 1848 * Parse single line of xxHash checksum file. 1849 * Returns ParseLine_invalidFormat if the line is not well formatted. 1850 * Returns ParseLine_ok if the line is parsed successfully. 1851 * And members of parseLine will be filled by parsed values. 1852 * 1853 * - line must be terminated with '\0' without a trailing newline. 1854 * - Since parsedLine.filename will point within given argument `line`, 1855 * users must keep `line`s content when they are using parsedLine. 1856 * - The line may be modified to carve up the information it contains. 1857 * 1858 * xxHash checksum lines should have the following format: 1859 * 1860 * <8, 16, or 32 hexadecimal char> <space> <space> <filename...> <'\0'> 1861 * 1862 * or: 1863 * 1864 * <algorithm> <' ('> <filename> <') = '> <hexstring> <'\0'> 1865 */ 1866 static ParseLineResult parseLine(ParsedLine* parsedLine, char* line, int rev) 1867 { 1868 char* const firstSpace = strchr(line, ' '); 1869 const char* hash_ptr; 1870 size_t hash_len; 1871 1872 parsedLine->filename = NULL; 1873 parsedLine->xxhBits = 0; 1874 1875 if (firstSpace == NULL || !firstSpace[1]) return ParseLine_invalidFormat; 1876 1877 if (firstSpace[1] == '(') { 1878 char* lastSpace = strrchr(line, ' '); 1879 if (lastSpace - firstSpace < 5) return ParseLine_invalidFormat; 1880 if (lastSpace[-1] != '=' || lastSpace[-2] != ' ' || lastSpace[-3] != ')') return ParseLine_invalidFormat; 1881 lastSpace[-3] = '\0'; /* Terminate the filename */ 1882 *firstSpace = '\0'; 1883 rev = strstr(line, "_LE") != NULL; /* was output little-endian */ 1884 hash_ptr = lastSpace + 1; 1885 hash_len = strlen(hash_ptr); 1886 /* NOTE: This currently ignores the hash description at the start of the string. 1887 * In the future we should parse it and verify that it matches the hash length. 1888 * It could also be used to allow both XXH64 & XXH3_64bits to be differentiated. */ 1889 } else { 1890 hash_ptr = line; 1891 hash_len = (size_t)(firstSpace - line); 1892 } 1893 1894 switch (hash_len) 1895 { 1896 case 8: 1897 { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32; 1898 if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), hash_ptr, rev) 1899 != CanonicalFromString_ok) { 1900 return ParseLine_invalidFormat; 1901 } 1902 parsedLine->xxhBits = 32; 1903 break; 1904 } 1905 1906 case 16: 1907 { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64; 1908 if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), hash_ptr, rev) 1909 != CanonicalFromString_ok) { 1910 return ParseLine_invalidFormat; 1911 } 1912 parsedLine->xxhBits = 64; 1913 break; 1914 } 1915 1916 case 32: 1917 { XXH128_canonical_t* xxh128c = &parsedLine->canonical.xxh128; 1918 if (canonicalFromString(xxh128c->digest, sizeof(xxh128c->digest), hash_ptr, rev) 1919 != CanonicalFromString_ok) { 1920 return ParseLine_invalidFormat; 1921 } 1922 parsedLine->xxhBits = 128; 1923 break; 1924 } 1925 1926 default: 1927 return ParseLine_invalidFormat; 1928 break; 1929 } 1930 1931 /* note : skipping second separation character, which can be anything, 1932 * allowing insertion of custom markers such as '*' */ 1933 parsedLine->filename = firstSpace + 2; 1934 return ParseLine_ok; 1935 } 1936 1937 1938 /*! 1939 * Parse xxHash checksum file. 1940 */ 1941 static void parseFile1(ParseFileArg* parseFileArg, int rev) 1942 { 1943 const char* const inFileName = parseFileArg->inFileName; 1944 ParseFileReport* const report = &parseFileArg->report; 1945 1946 unsigned long lineNumber = 0; 1947 memset(report, 0, sizeof(*report)); 1948 1949 while (!report->quit) { 1950 LineStatus lineStatus = LineStatus_hashFailed; 1951 ParsedLine parsedLine; 1952 memset(&parsedLine, 0, sizeof(parsedLine)); 1953 1954 lineNumber++; 1955 if (lineNumber == 0) { 1956 /* This is unlikely happen, but md5sum.c has this error check. */ 1957 DISPLAY("%s: Error: Too many checksum lines\n", inFileName); 1958 report->quit = 1; 1959 break; 1960 } 1961 1962 { GetLineResult const getLineResult = getLine(&parseFileArg->lineBuf, 1963 &parseFileArg->lineMax, 1964 parseFileArg->inFile); 1965 if (getLineResult != GetLine_ok) { 1966 if (getLineResult == GetLine_eof) break; 1967 1968 switch (getLineResult) 1969 { 1970 case GetLine_ok: 1971 case GetLine_eof: 1972 /* These cases never happen. See above getLineResult related "if"s. 1973 They exist just for make gcc's -Wswitch-enum happy. */ 1974 assert(0); 1975 break; 1976 1977 default: 1978 DISPLAY("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber); 1979 break; 1980 1981 case GetLine_exceedMaxLineLength: 1982 DISPLAY("%s:%lu: Error: Line too long.\n", inFileName, lineNumber); 1983 break; 1984 1985 case GetLine_outOfMemory: 1986 DISPLAY("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber); 1987 break; 1988 } 1989 report->quit = 1; 1990 break; 1991 } } 1992 1993 if (parseLine(&parsedLine, parseFileArg->lineBuf, rev) != ParseLine_ok) { 1994 report->nImproperlyFormattedLines++; 1995 if (parseFileArg->warn) { 1996 DISPLAY("%s:%lu: Error: Improperly formatted checksum line.\n", 1997 inFileName, lineNumber); 1998 } 1999 continue; 2000 } 2001 2002 report->nProperlyFormattedLines++; 2003 2004 do { 2005 FILE* const fp = XXH_fopen(parsedLine.filename, "rb"); 2006 if (fp == NULL) { 2007 lineStatus = LineStatus_failedToOpen; 2008 break; 2009 } 2010 lineStatus = LineStatus_hashFailed; 2011 switch (parsedLine.xxhBits) 2012 { 2013 case 32: 2014 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh32, parseFileArg->blockBuf, parseFileArg->blockSize); 2015 if (xxh.xxh32 == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) { 2016 lineStatus = LineStatus_hashOk; 2017 } } 2018 break; 2019 2020 case 64: 2021 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh64, parseFileArg->blockBuf, parseFileArg->blockSize); 2022 if (xxh.xxh64 == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) { 2023 lineStatus = LineStatus_hashOk; 2024 } } 2025 break; 2026 2027 case 128: 2028 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh128, parseFileArg->blockBuf, parseFileArg->blockSize); 2029 if (XXH128_isEqual(xxh.xxh128, XXH128_hashFromCanonical(&parsedLine.canonical.xxh128))) { 2030 lineStatus = LineStatus_hashOk; 2031 } } 2032 break; 2033 2034 default: 2035 break; 2036 } 2037 fclose(fp); 2038 } while (0); 2039 2040 switch (lineStatus) 2041 { 2042 default: 2043 DISPLAY("%s: Error: Unknown error.\n", inFileName); 2044 report->quit = 1; 2045 break; 2046 2047 case LineStatus_failedToOpen: 2048 report->nOpenOrReadFailures++; 2049 if (!parseFileArg->statusOnly) { 2050 DISPLAYRESULT("%s:%lu: Could not open or read '%s': %s.\n", 2051 inFileName, lineNumber, parsedLine.filename, strerror(errno)); 2052 } 2053 break; 2054 2055 case LineStatus_hashOk: 2056 case LineStatus_hashFailed: 2057 { int b = 1; 2058 if (lineStatus == LineStatus_hashOk) { 2059 /* If --quiet is specified, don't display "OK" */ 2060 if (parseFileArg->quiet) b = 0; 2061 } else { 2062 report->nMismatchedChecksums++; 2063 } 2064 2065 if (b && !parseFileArg->statusOnly) { 2066 DISPLAYRESULT("%s: %s\n", parsedLine.filename 2067 , lineStatus == LineStatus_hashOk ? "OK" : "FAILED"); 2068 } } 2069 break; 2070 } 2071 } /* while (!report->quit) */ 2072 } 2073 2074 2075 /* Parse xxHash checksum file. 2076 * Returns 1, if all procedures were succeeded. 2077 * Returns 0, if any procedures was failed. 2078 * 2079 * If strictMode != 0, return error code if any line is invalid. 2080 * If statusOnly != 0, don't generate any output. 2081 * If warn != 0, print a warning message to stderr. 2082 * If quiet != 0, suppress "OK" line. 2083 * 2084 * "All procedures are succeeded" means: 2085 * - Checksum file contains at least one line and less than SIZE_T_MAX lines. 2086 * - All files are properly opened and read. 2087 * - All hash values match with its content. 2088 * - (strict mode) All lines in checksum file are consistent and well formatted. 2089 */ 2090 static int checkFile(const char* inFileName, 2091 const Display_endianess displayEndianess, 2092 U32 strictMode, 2093 U32 statusOnly, 2094 U32 warn, 2095 U32 quiet) 2096 { 2097 int result = 0; 2098 FILE* inFile = NULL; 2099 ParseFileArg parseFileArgBody; 2100 ParseFileArg* const parseFileArg = &parseFileArgBody; 2101 ParseFileReport* const report = &parseFileArg->report; 2102 2103 /* note: stdinName is special constant pointer. It is not a string. */ 2104 if (inFileName == stdinName) { 2105 /* 2106 * Note: Since we expect text input for xxhash -c mode, 2107 * we don't set binary mode for stdin. 2108 */ 2109 inFileName = "stdin"; 2110 inFile = stdin; 2111 } else { 2112 inFile = XXH_fopen( inFileName, "rt" ); 2113 } 2114 2115 if (inFile == NULL) { 2116 DISPLAY("Error: Could not open '%s': %s\n", inFileName, strerror(errno)); 2117 return 0; 2118 } 2119 2120 parseFileArg->inFileName = inFileName; 2121 parseFileArg->inFile = inFile; 2122 parseFileArg->lineMax = DEFAULT_LINE_LENGTH; 2123 parseFileArg->lineBuf = (char*) malloc((size_t)parseFileArg->lineMax); 2124 parseFileArg->blockSize = 64 * 1024; 2125 parseFileArg->blockBuf = (char*) malloc(parseFileArg->blockSize); 2126 parseFileArg->strictMode = strictMode; 2127 parseFileArg->statusOnly = statusOnly; 2128 parseFileArg->warn = warn; 2129 parseFileArg->quiet = quiet; 2130 2131 if ( (parseFileArg->lineBuf == NULL) 2132 || (parseFileArg->blockBuf == NULL) ) { 2133 DISPLAY("Error: : memory allocation failed \n"); 2134 exit(1); 2135 } 2136 parseFile1(parseFileArg, displayEndianess != big_endian); 2137 2138 free(parseFileArg->blockBuf); 2139 free(parseFileArg->lineBuf); 2140 2141 if (inFile != stdin) fclose(inFile); 2142 2143 /* Show error/warning messages. All messages are copied from md5sum.c 2144 */ 2145 if (report->nProperlyFormattedLines == 0) { 2146 DISPLAY("%s: no properly formatted xxHash checksum lines found\n", inFileName); 2147 } else if (!statusOnly) { 2148 if (report->nImproperlyFormattedLines) { 2149 DISPLAYRESULT("%lu %s improperly formatted\n" 2150 , report->nImproperlyFormattedLines 2151 , report->nImproperlyFormattedLines == 1 ? "line is" : "lines are"); 2152 } 2153 if (report->nOpenOrReadFailures) { 2154 DISPLAYRESULT("%lu listed %s could not be read\n" 2155 , report->nOpenOrReadFailures 2156 , report->nOpenOrReadFailures == 1 ? "file" : "files"); 2157 } 2158 if (report->nMismatchedChecksums) { 2159 DISPLAYRESULT("%lu computed %s did NOT match\n" 2160 , report->nMismatchedChecksums 2161 , report->nMismatchedChecksums == 1 ? "checksum" : "checksums"); 2162 } } 2163 2164 /* Result (exit) code logic is copied from 2165 * gnu coreutils/src/md5sum.c digest_check() */ 2166 result = report->nProperlyFormattedLines != 0 2167 && report->nMismatchedChecksums == 0 2168 && report->nOpenOrReadFailures == 0 2169 && (!strictMode || report->nImproperlyFormattedLines == 0) 2170 && report->quit == 0; 2171 return result; 2172 } 2173 2174 2175 static int checkFiles(const char*const* fnList, int fnTotal, 2176 const Display_endianess displayEndianess, 2177 U32 strictMode, 2178 U32 statusOnly, 2179 U32 warn, 2180 U32 quiet) 2181 { 2182 int ok = 1; 2183 2184 /* Special case for stdinName "-", 2185 * note: stdinName is not a string. It's special pointer. */ 2186 if (fnTotal==0) { 2187 ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet); 2188 } else { 2189 int fnNb; 2190 for (fnNb=0; fnNb<fnTotal; fnNb++) 2191 ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet); 2192 } 2193 return ok ? 0 : 1; 2194 } 2195 2196 2197 /* ******************************************************** 2198 * Main 2199 **********************************************************/ 2200 2201 static int usage(const char* exename) 2202 { 2203 DISPLAY( WELCOME_MESSAGE(exename) ); 2204 DISPLAY( "Print or verify checksums using fast non-cryptographic algorithm xxHash \n\n" ); 2205 DISPLAY( "Usage: %s [options] [files] \n\n", exename); 2206 DISPLAY( "When no filename provided or when '-' is provided, uses stdin as input. \n"); 2207 DISPLAY( "Options: \n"); 2208 DISPLAY( " -H# algorithm selection: 0,1,2 or 32,64,128 (default: %i) \n", (int)g_defaultAlgo); 2209 DISPLAY( " -c, --check read xxHash checksum from [files] and check them \n"); 2210 DISPLAY( " -h, --help display a long help page about advanced options \n"); 2211 return 0; 2212 } 2213 2214 2215 static int usage_advanced(const char* exename) 2216 { 2217 usage(exename); 2218 DISPLAY( "Advanced :\n"); 2219 DISPLAY( " -V, --version Display version information \n"); 2220 DISPLAY( " --tag Produce BSD-style checksum lines \n"); 2221 DISPLAY( " --little-endian Checksum values use little endian convention (default: big endian) \n"); 2222 DISPLAY( " -b Run benchmark \n"); 2223 DISPLAY( " -b# Bench only algorithm variant # \n"); 2224 DISPLAY( " -i# Number of times to run the benchmark (default: %u) \n", (unsigned)g_nbIterations); 2225 DISPLAY( " -q, --quiet Don't display version header in benchmark mode \n"); 2226 DISPLAY( "\n"); 2227 DISPLAY( "The following four options are useful only when verifying checksums (-c): \n"); 2228 DISPLAY( " -q, --quiet Don't print OK for each successfully verified file \n"); 2229 DISPLAY( " --status Don't output anything, status code shows success \n"); 2230 DISPLAY( " --strict Exit non-zero for improperly formatted checksum lines \n"); 2231 DISPLAY( " --warn Warn about improperly formatted checksum lines \n"); 2232 return 0; 2233 } 2234 2235 static int badusage(const char* exename) 2236 { 2237 DISPLAY("Wrong parameters\n\n"); 2238 usage(exename); 2239 return 1; 2240 } 2241 2242 static void errorOut(const char* msg) 2243 { 2244 DISPLAY("%s \n", msg); exit(1); 2245 } 2246 2247 static const char* lastNameFromPath(const char* path) 2248 { 2249 const char* name = path; 2250 if (strrchr(name, '/')) name = strrchr(name, '/') + 1; 2251 if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */ 2252 return name; 2253 } 2254 2255 /*! 2256 * readU32FromCharChecked(): 2257 * @return 0 if success, and store the result in *value. 2258 * Allows and interprets K, KB, KiB, M, MB and MiB suffix. 2259 * Will also modify `*stringPtr`, advancing it to position where it stopped reading. 2260 * @return 1 if an overflow error occurs 2261 */ 2262 static int readU32FromCharChecked(const char** stringPtr, U32* value) 2263 { 2264 static const U32 max = (((U32)(-1)) / 10) - 1; 2265 U32 result = 0; 2266 while ((**stringPtr >='0') && (**stringPtr <='9')) { 2267 if (result > max) return 1; /* overflow error */ 2268 result *= 10; 2269 result += (U32)(**stringPtr - '0'); 2270 (*stringPtr)++ ; 2271 } 2272 if ((**stringPtr=='K') || (**stringPtr=='M')) { 2273 U32 const maxK = ((U32)(-1)) >> 10; 2274 if (result > maxK) return 1; /* overflow error */ 2275 result <<= 10; 2276 if (**stringPtr=='M') { 2277 if (result > maxK) return 1; /* overflow error */ 2278 result <<= 10; 2279 } 2280 (*stringPtr)++; /* skip `K` or `M` */ 2281 if (**stringPtr=='i') (*stringPtr)++; 2282 if (**stringPtr=='B') (*stringPtr)++; 2283 } 2284 *value = result; 2285 return 0; 2286 } 2287 2288 /*! 2289 * readU32FromChar(): 2290 * @return: unsigned integer value read from input in `char` format. 2291 * allows and interprets K, KB, KiB, M, MB and MiB suffix. 2292 * Will also modify `*stringPtr`, advancing it to position where it stopped reading. 2293 * Note: function will exit() program if digit sequence overflows 2294 */ 2295 static U32 readU32FromChar(const char** stringPtr) { 2296 U32 result; 2297 if (readU32FromCharChecked(stringPtr, &result)) { 2298 static const char errorMsg[] = "Error: numeric value too large"; 2299 errorOut(errorMsg); 2300 } 2301 return result; 2302 } 2303 2304 static int XXH_main(int argc, const char* const* argv) 2305 { 2306 int i, filenamesStart = 0; 2307 const char* const exename = lastNameFromPath(argv[0]); 2308 U32 benchmarkMode = 0; 2309 U32 fileCheckMode = 0; 2310 U32 strictMode = 0; 2311 U32 statusOnly = 0; 2312 U32 warn = 0; 2313 int explicitStdin = 0; 2314 U32 selectBenchIDs= 0; /* 0 == use default k_testIDs_default, kBenchAll == bench all */ 2315 static const U32 kBenchAll = 99; 2316 size_t keySize = XXH_DEFAULT_SAMPLE_SIZE; 2317 AlgoSelected algo = g_defaultAlgo; 2318 Display_endianess displayEndianess = big_endian; 2319 Display_convention convention = display_gnu; 2320 2321 /* special case: xxhNNsum default to NN bits checksum */ 2322 if (strstr(exename, "xxh32sum") != NULL) algo = g_defaultAlgo = algo_xxh32; 2323 if (strstr(exename, "xxh64sum") != NULL) algo = g_defaultAlgo = algo_xxh64; 2324 if (strstr(exename, "xxh128sum") != NULL) algo = g_defaultAlgo = algo_xxh128; 2325 2326 for (i=1; i<argc; i++) { 2327 const char* argument = argv[i]; 2328 assert(argument != NULL); 2329 2330 if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; } 2331 if (!strcmp(argument, "--benchmark-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; } 2332 if (!strcmp(argument, "--bench-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; } 2333 if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } 2334 if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; } 2335 if (!strcmp(argument, "--strict")) { strictMode = 1; continue; } 2336 if (!strcmp(argument, "--status")) { statusOnly = 1; continue; } 2337 if (!strcmp(argument, "--warn")) { warn = 1; continue; } 2338 if (!strcmp(argument, "--help")) { return usage_advanced(exename); } 2339 if (!strcmp(argument, "--version")) { DISPLAY(FULL_WELCOME_MESSAGE(exename)); BMK_sanityCheck(); return 0; } 2340 if (!strcmp(argument, "--tag")) { convention = display_bsd; continue; } 2341 2342 if (!strcmp(argument, "--")) { 2343 if (filenamesStart==0 && i!=argc-1) filenamesStart=i+1; /* only supports a continuous list of filenames */ 2344 break; /* treat rest of arguments as strictly file names */ 2345 } 2346 if (*argument != '-') { 2347 if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */ 2348 break; /* treat rest of arguments as strictly file names */ 2349 } 2350 2351 /* command selection */ 2352 argument++; /* note: *argument=='-' */ 2353 if (*argument == 0) explicitStdin = 1; 2354 2355 while (*argument != 0) { 2356 switch(*argument) 2357 { 2358 /* Display version */ 2359 case 'V': 2360 DISPLAY(FULL_WELCOME_MESSAGE(exename)); return 0; 2361 2362 /* Display help on usage */ 2363 case 'h': 2364 return usage_advanced(exename); 2365 2366 /* select hash algorithm */ 2367 case 'H': argument++; 2368 switch(readU32FromChar(&argument)) { 2369 case 0 : 2370 case 32: algo = algo_xxh32; break; 2371 case 1 : 2372 case 64: algo = algo_xxh64; break; 2373 case 2 : 2374 case 128: algo = algo_xxh128; break; 2375 default: 2376 return badusage(exename); 2377 } 2378 break; 2379 2380 /* File check mode */ 2381 case 'c': 2382 fileCheckMode=1; 2383 argument++; 2384 break; 2385 2386 /* Warning mode (file check mode only, alias of "--warning") */ 2387 case 'w': 2388 warn=1; 2389 argument++; 2390 break; 2391 2392 /* Trigger benchmark mode */ 2393 case 'b': 2394 argument++; 2395 benchmarkMode = 1; 2396 do { 2397 if (*argument == ',') argument++; 2398 selectBenchIDs = readU32FromChar(&argument); /* select one specific test */ 2399 if (selectBenchIDs < NB_TESTFUNC) { 2400 g_testIDs[selectBenchIDs] = 1; 2401 } else 2402 selectBenchIDs = kBenchAll; 2403 } while (*argument == ','); 2404 break; 2405 2406 /* Modify Nb Iterations (benchmark only) */ 2407 case 'i': 2408 argument++; 2409 g_nbIterations = readU32FromChar(&argument); 2410 break; 2411 2412 /* Modify Block size (benchmark only) */ 2413 case 'B': 2414 argument++; 2415 keySize = readU32FromChar(&argument); 2416 break; 2417 2418 /* Modify verbosity of benchmark output (hidden option) */ 2419 case 'q': 2420 argument++; 2421 g_displayLevel--; 2422 break; 2423 2424 default: 2425 return badusage(exename); 2426 } 2427 } 2428 } /* for(i=1; i<argc; i++) */ 2429 2430 /* Check benchmark mode */ 2431 if (benchmarkMode) { 2432 DISPLAYLEVEL(2, FULL_WELCOME_MESSAGE(exename) ); 2433 BMK_sanityCheck(); 2434 if (selectBenchIDs == 0) memcpy(g_testIDs, k_testIDs_default, sizeof(g_testIDs)); 2435 if (selectBenchIDs == kBenchAll) memset(g_testIDs, 1, sizeof(g_testIDs)); 2436 if (filenamesStart==0) return BMK_benchInternal(keySize); 2437 return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart); 2438 } 2439 2440 /* Check if input is defined as console; trigger an error in this case */ 2441 if ( (filenamesStart==0) && IS_CONSOLE(stdin) && !explicitStdin) 2442 return badusage(exename); 2443 2444 if (filenamesStart==0) filenamesStart = argc; 2445 if (fileCheckMode) { 2446 return checkFiles(argv+filenamesStart, argc-filenamesStart, 2447 displayEndianess, strictMode, statusOnly, warn, (g_displayLevel < 2) /*quiet*/); 2448 } else { 2449 return XSUM_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess, convention); 2450 } 2451 } 2452 2453 /* Windows main wrapper which properly handles UTF-8 command line arguments. */ 2454 #ifdef _WIN32 2455 /* Converts a UTF-16 argv to UTF-8. */ 2456 static char** convert_argv(int argc, const wchar_t* const utf16_argv[]) 2457 { 2458 char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*)); 2459 if (utf8_argv != NULL) { 2460 int i; 2461 for (i = 0; i < argc; i++) { 2462 utf8_argv[i] = utf16_to_utf8(utf16_argv[i]); 2463 } 2464 utf8_argv[argc] = NULL; 2465 } 2466 return utf8_argv; 2467 } 2468 /* Frees arguments returned by convert_argv */ 2469 static void free_argv(int argc, char** argv) 2470 { 2471 int i; 2472 if (argv == NULL) { 2473 return; 2474 } 2475 for (i = 0; i < argc; i++) { 2476 free(argv[i]); 2477 } 2478 free(argv); 2479 } 2480 2481 2482 /* 2483 * On Windows, main's argv parameter is useless. Instead of UTF-8, you get ANSI 2484 * encoding, and any unknown characters will show up as mojibake. 2485 * 2486 * While this doesn't affect most programs, what does happen is that we can't 2487 * open any files with Unicode filenames. 2488 * 2489 * We instead convert wmain's arguments to UTF-8, preserving Unicode arguments. 2490 * 2491 * This function is wrapped by `__wgetmainargs()` and `main()` below on MinGW 2492 * with Unicode disabled, but if possible, we try to use `wmain()`. 2493 */ 2494 static int XXH_wmain(int argc, const wchar_t* const utf16_argv[]) 2495 { 2496 /* Convert the UTF-16 arguments to UTF-8. */ 2497 char** utf8_argv = convert_argv(argc, utf16_argv); 2498 2499 if (utf8_argv == NULL) { 2500 /* An unfortunate but incredibly unlikely error, */ 2501 fprintf(stderr, "Error converting command line arguments!\n"); 2502 return 1; 2503 } else { 2504 int ret; 2505 2506 /* 2507 * MinGW's terminal uses full block buffering for stderr. 2508 * 2509 * This is nonstandard behavior and causes text to not display until 2510 * the buffer fills. 2511 * 2512 * `setvbuf()` can easily correct this to make text display instantly. 2513 */ 2514 setvbuf(stderr, NULL, _IONBF, 0); 2515 2516 /* Call our real main function */ 2517 ret = XXH_main(argc, (const char* const *) utf8_argv); 2518 2519 /* Cleanup */ 2520 free_argv(argc, utf8_argv); 2521 return ret; 2522 } 2523 } 2524 2525 #if defined(_MSC_VER) /* MSVC always accepts wmain */ \ 2526 || defined(_UNICODE) || defined(UNICODE) /* defined with -municode on MinGW-w64 */ 2527 2528 /* Preferred: Use the real `wmain()`. */ 2529 #if defined(__cplusplus) 2530 extern "C" 2531 #endif 2532 int wmain(int argc, const wchar_t* utf16_argv[]) 2533 { 2534 return XXH_wmain(argc, utf16_argv); 2535 } 2536 2537 #else /* Non-Unicode MinGW */ 2538 2539 /* 2540 * Wrap `XXH_wmain()` using `main()` and `__wgetmainargs()` on MinGW without 2541 * Unicode support. 2542 * 2543 * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for 2544 * `wmain()`, so we use it on MinGW to emulate `wmain()`. 2545 * 2546 * It is an internal function and not declared in any public headers, so we 2547 * have to declare it manually. 2548 * 2549 * An alternative that doesn't mess with internal APIs is `GetCommandLineW()` 2550 * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the 2551 * latter requires linking to Shell32.dll and its numerous dependencies. 2552 * 2553 * This method keeps our dependencies to kernel32.dll and the CRT. 2554 * 2555 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019 2556 */ 2557 typedef struct { 2558 int newmode; 2559 } _startupinfo; 2560 2561 #ifdef __cplusplus 2562 extern "C" 2563 #endif 2564 int __cdecl __wgetmainargs( 2565 int* Argc, 2566 wchar_t*** Argv, 2567 wchar_t*** Env, 2568 int DoWildCard, 2569 _startupinfo* StartInfo 2570 ); 2571 2572 int main(int ansi_argc, const char* ansi_argv[]) 2573 { 2574 int utf16_argc; 2575 wchar_t** utf16_argv; 2576 wchar_t** utf16_envp; /* Unused but required */ 2577 _startupinfo startinfo = {0}; /* 0 == don't change new mode */ 2578 2579 /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */ 2580 if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0) 2581 /* In the very unlikely case of an error, use the ANSI arguments. */ 2582 return XXH_main(ansi_argc, ansi_argv); 2583 2584 /* Call XXH_wmain with our UTF-16 arguments */ 2585 return XXH_wmain(utf16_argc, (const wchar_t* const *)utf16_argv); 2586 } 2587 2588 #endif /* Non-Unicode MinGW */ 2589 2590 #else /* Not Windows */ 2591 2592 /* Wrap main normally on non-Windows platforms. */ 2593 int main(int argc, const char* argv[]) 2594 { 2595 return XXH_main(argc, argv); 2596 } 2597 #endif /* !Windows */ 2598