1 /* 2 * main.h: 3 * 4 * This is the main file included by all other modules in md5deep/hashdeep/etc. 5 * 6 * It includes: 7 * common.h - the common system include files 8 * xml.h - the C++ XML system. 9 * hash function headers 10 * 11 * C++ STL stuff. 12 * 13 * It then creates all the C++ classes and structures used. 14 * 15 * $Id$ 16 */ 17 18 19 #ifndef __MAIN_H 20 #define __MAIN_H 21 22 #include "common.h" 23 #include "xml.h" 24 25 #ifdef HAVE_PTHREAD 26 #include "threadpool.h" 27 #endif 28 29 #include <map> 30 #include <vector> 31 32 #if !defined(VERSION) && defined(PACKAGE_VERSION) 33 #define VERSION PACKAGE_VERSION 34 #endif 35 36 #define VERBOSE 1 37 #define MORE_VERBOSE 2 38 #define INSANELY_VERBOSE 3 39 40 /* These describe the version of the file format being used, not 41 * the version of the program. 42 */ 43 #define HASHDEEP_PREFIX "%%%% " 44 #define HASHDEEP_HEADER_10 "%%%% HASHDEEP-1.0" 45 46 /* HOW TO ADD A NEW HASHING ALGORITHM 47 * Add a value for the algorithm to the hashid_t enumeration 48 * Add the functions to compute the hashes. There should be three functions, 49 an initialization route, an update routine, and a finalize routine. 50 The convention, for an algorithm "foo", is 51 foo_init, foo_update, and foo_final. 52 * Add your new code to Makefile.am under hashdeep_SOURCES 53 * Add a call to insert the algorithm in state::load_hashing_algorithms 54 * See if you need to increase MAX_ALGORITHM_NAME_LENGTH or 55 MAX_ALGORITHM_CONTEXT_SIZE for your algorithm in common.h 56 * Update the usage function and man page to include the function 57 */ 58 59 typedef enum { 60 alg_md5=0, 61 alg_sha1, 62 alg_sha256, 63 alg_tiger, 64 alg_whirlpool, 65 alg_sha3, 66 67 // alg_unknown must always be last in this list. It's used 68 // as a loop terminator in many functions. 69 alg_unknown 70 } hashid_t; 71 72 inline std::ostream & operator << (std::ostream &os,const hashid_t &h) 73 { 74 switch (h) 75 { 76 case alg_md5: os << "alg_md5" ; break ; 77 case alg_sha1: os << "alg_sha1" ; break ; 78 case alg_sha256: os << "alg_sha256" ; break ; 79 case alg_tiger: os << "alg_tiger" ; break ; 80 case alg_whirlpool: os << "alg_whirlpool" ; break ; 81 case alg_sha3: os << "alg_sha3" ; break ; 82 case alg_unknown: os << "alg_unknown" ; break ; 83 } 84 85 return os; 86 } 87 88 #define NUM_ALGORITHMS alg_unknown 89 90 /* Which ones are enabled by default */ 91 #define DEFAULT_ENABLE_MD5 TRUE 92 #define DEFAULT_ENABLE_SHA1 FALSE 93 #define DEFAULT_ENABLE_SHA256 TRUE 94 #define DEFAULT_ENABLE_TIGER FALSE 95 #define DEFAULT_ENABLE_WHIRLPOOL FALSE 96 #define DEFAULT_ENABLE_SHA3 FALSE 97 98 class iomode { 99 public:; 100 static const int buffered=0; // use fopen, fread, fclose 101 static const int unbuffered=1; // use open, read, close 102 static const int mmapped=2; // use open, mmap, close toiomode(const std::string & str)103 static int toiomode(const std::string &str){ 104 if(str=="0" || str[0]=='b') return iomode::buffered; 105 if(str=="1" || str[0]=='u') return iomode::unbuffered; 106 if(str=="2" || str[0]=='m') return iomode::mmapped; 107 std::cerr << "Invalid iomode '" << str << "'"; 108 assert(0); 109 return iomode::unbuffered; // default 110 } 111 }; 112 113 /* This class holds the information known about each hash algorithm. 114 * It's sort of like the EVP system in OpenSSL. 115 * 116 * In version 3 the list of known hashes was stored here as well. 117 * That has been moved to the hashlist database (further down). 118 * 119 * Right now we are using some global variables; the better way to do this 120 * would be with a C++ singleton. 121 * 122 * Perhaps the correct way to do this would be a global C++ vector of objects? 123 */ 124 class algorithm_t { 125 public: 126 bool inuse; // true if we are using this algorithm 127 std::string name; // name of algorithm 128 size_t bit_length; // 128 for MD5 129 hashid_t id; // usually the position in the array... 130 131 /* The hashing functions */ 132 void ( *f_init)(void *ctx); 133 void ( *f_update)(void *ctx, const unsigned char *buf, size_t len ); 134 void ( *f_finalize)(void *ctx, unsigned char *); 135 136 /* The methods */ 137 static void add_algorithm(hashid_t pos, const char *name, uint16_t bits, 138 void ( *func_init)(void *ctx), 139 void ( *func_update)(void *ctx, const unsigned char *buf, size_t len ), 140 void ( *func_finalize)(void *ctx, unsigned char *), 141 int inuse); 142 static void load_hashing_algorithms(); 143 static void clear_algorithms_inuse(); 144 static void enable_hashing_algorithms(std::string var); // enable the algorithms in 'var'; var can be 'all' 145 static hashid_t get_hashid_for_name(std::string name); // return the hashid_t for 'name' 146 static bool valid_hex(const std::string &buf); // returns true if buf contains only hex characters 147 static bool valid_hash(hashid_t alg,const std::string &buf); // returns true if buf is a valid hash for hashid_t a 148 static int algorithms_in_use_count(); // returns count of algorithms in use 149 }; 150 151 extern algorithm_t hashes[NUM_ALGORITHMS]; // which hash algorithms are available and in use 152 153 154 155 /** status_t describes exit codes for the program 156 * 157 */ 158 class status_t { 159 private: 160 int32_t code; 161 public:; status_t()162 status_t():code(0){}; 163 static const int32_t status_ok = EXIT_SUCCESS; // 0 164 static const int32_t status_EXIT_FAILURE = EXIT_FAILURE; 165 static const int32_t status_out_of_memory = -2; 166 static const int32_t status_invalid_hash = -3; 167 static const int32_t status_unknown_error = -4; 168 static const int32_t status_omg_ponies = -5; 169 170 /* 171 * Return values for the program 172 * RBF - Document these return values for hashdeep 173 * A successful run has these or'ed together 174 */ 175 static const int32_t STATUS_UNUSED_HASHES = 1; 176 static const int32_t STATUS_INPUT_DID_NOT_MATCH = 2; 177 static const int32_t STATUS_USER_ERROR = 64; 178 static const int32_t STATUS_INTERNAL_ERROR = 128; add(int32_t val)179 void add(int32_t val){ code |= val; } set(int32_t val)180 void set(int32_t val){ code = val; } get_status()181 int32_t get_status(){ return code; } 182 bool operator==(int32_t v){ return this->code==v; } 183 bool operator!=(int32_t v){ return this->code!=v; } 184 }; 185 186 187 #ifdef _WIN32 188 typedef __time64_t timestamp_t; 189 typedef std::wstring filename_t; 190 #else 191 typedef time_t timestamp_t; 192 typedef std::string filename_t; 193 #endif 194 195 /** 196 * file_metadata_t contains metadata information about a file. 197 * It also includes a stat call that returns the inode information 198 * and link count even on windows, where the API is different than stat. 199 * Note that we only include information we care about in this program 200 * 201 * this is in dig.cpp. 202 */ 203 204 205 /* strangely, we define our own file types */ 206 typedef enum { 207 stat_regular=0, 208 stat_directory, 209 stat_door, 210 stat_block, 211 stat_character, 212 stat_pipe, 213 stat_socket, 214 stat_symlink, 215 stat_unknown=254 216 } file_types; 217 218 class file_metadata_t { 219 public: 220 static file_types decode_file_type(const struct __stat64 &sb); 221 222 // stat a file, print an error and return -1 if it fails, otherwise return 0 223 static int stat(const filename_t &path,file_metadata_t *m,class display &ocb); 224 class fileid_t { // uniquely defines a file on this system 225 public: fileid_t()226 fileid_t():dev(0),ino(0){}; fileid_t(uint64_t dev_,uint64_t ino_)227 fileid_t(uint64_t dev_,uint64_t ino_):dev(dev_),ino(ino_){}; 228 uint64_t dev; // device number 229 uint64_t ino; // inode number 230 }; file_metadata_t()231 file_metadata_t():fileid(),nlink(0),size(0),ctime(0),mtime(0),atime(0){}; file_metadata_t(fileid_t fileid_,uint64_t nlink_,uint64_t size_,timestamp_t ctime_,timestamp_t mtime_,timestamp_t atime_)232 file_metadata_t(fileid_t fileid_,uint64_t nlink_,uint64_t size_,timestamp_t ctime_,timestamp_t mtime_, 233 timestamp_t atime_):fileid(fileid_),nlink(nlink_),size(size_),ctime(ctime_),mtime(mtime_),atime(atime_){}; 234 fileid_t fileid; 235 uint64_t nlink; 236 uint64_t size; 237 timestamp_t ctime; 238 timestamp_t mtime; 239 timestamp_t atime; 240 241 }; 242 243 /** file_data_t contains information about a file. 244 * It can be created by hashing an actual file, or by reading a hash file a file of hashes. 245 * The object is simple so that the built in C++ shallow copy will make a proper copy of it. 246 * Note that all hashes are currently stored as a hex string. That incurs a 2x memory overhead. 247 * This will be changed. 248 */ 249 class file_data_t { 250 public: file_data_t()251 file_data_t():file_bytes(0),matched_file_number(0){ 252 }; ~file_data_t()253 virtual ~file_data_t(){} // required because we subclass 254 255 std::string hash_hex[NUM_ALGORITHMS]; // the hash in hex of the entire file 256 std::string hash512_hex[NUM_ALGORITHMS]; // hash of the first 512 bytes, for triage mode 257 std::string file_name; // just the file_name; native on POSIX; UTF-8 on Windows. 258 259 uint64_t file_bytes; // how many bytes were actually read 260 uint64_t matched_file_number; // file number that we matched.; 0 if no match 261 262 }; 263 264 /** 265 * hash_context stores information for a specific hash. 266 * which may for a piece of a file or an entire file 267 */ 268 class hash_context_obj { 269 public:; hash_context_obj()270 hash_context_obj():read_offset(0),read_len(0){} 271 272 /* Information for the hashing underway */ 273 uint8_t hash_context[NUM_ALGORITHMS][MAX_ALGORITHM_CONTEXT_SIZE]; 274 275 /* The actual hashing */ 276 void multihash_initialize(); 277 void multihash_update(const unsigned char *buffer,size_t bufsize); 278 void multihash_finalize(std::string dest[]); 279 280 // for piecewise hashing: where this segment was actually read 281 uint64_t read_offset; // where the segment we read started 282 uint64_t read_len; // how many bytes were read and hashed 283 }; 284 285 286 /** file_data_hasher_t is a subclass of file_data_t. 287 * It contains additional information necessary to actually hash a file. 288 */ 289 class file_data_hasher_t : public file_data_t { 290 private: 291 static uint64_t next_file_number; 292 static mutex_t fdh_lock; 293 public: stat_megs()294 uint64_t stat_megs() const { // return how many megabytes is the file in MB? 295 return stat_bytes / ONE_MEGABYTE; 296 } 297 static const size_t MD5DEEP_IDEAL_BLOCK_SIZE = 8192; file_data_hasher_t(class display * ocb_)298 file_data_hasher_t(class display *ocb_): 299 ocb(ocb_), // where we put results 300 handle(0), 301 fd(-1), 302 base(0),bounds(0), // for mmap 303 file_number(0),ctime(0),mtime(0),atime(0),stat_bytes(0), 304 start_time(0),last_time(0),eof(false),workerid(-1){ 305 file_number = ++next_file_number; 306 }; ~file_data_hasher_t()307 virtual ~file_data_hasher_t(){ 308 if(handle){ 309 fclose(handle); 310 handle = 0; 311 } 312 if(fd){ 313 #ifdef HAVE_MMAP 314 if(base) munmap((void *)base,bounds); 315 #endif 316 close(fd); 317 fd = 0; 318 } 319 } 320 is_stdin()321 bool is_stdin(){ return handle==stdin; } 322 323 /* The actual file to hash */ 324 filename_t file_name_to_hash; 325 326 /* Where the results go */ 327 class display *ocb; 328 329 /* How we read the data */ 330 FILE *handle; // the file we are reading 331 int fd; // fd used for unbuffered and mmap 332 const unsigned char *base; // base of mapped file 333 size_t bounds; // size of the mapped file 334 335 std::string triage_info; // if true, must print on output 336 std::stringstream dfxml_hash; // the DFXML hash digest for the piece just hashed; 337 // used to build piecewise 338 uint64_t file_number; 339 void append_dfxml_for_byterun(); 340 void compute_dfxml(bool known_hash,const hash_context_obj *hc); 341 342 timestamp_t ctime; // ctime; previously 'timestamp' 343 timestamp_t mtime; 344 timestamp_t atime; 345 346 // How many bytes (and megs) we think are in the file, via stat(2) 347 // and how many bytes we've actually read in the file 348 uint64_t stat_bytes; // how much stat returned 349 350 /* When we started the hashing, and when was the last time a display was printed, 351 * for printing status updates. 352 */ 353 time_t start_time, last_time; // of hashing 354 bool eof; // end of file encountered while reading 355 int workerid; // my worker id, or -1 if there is none set_workerid(int id)356 void set_workerid(int id){workerid=id;} 357 358 /* multithreaded hash implementation is these functions in hash.cpp. 359 * hash() is called to hash each file and record the results. 360 * Return codes are both stored in display return_code and returned 361 * 0 - for success, -1 for error 362 */ 363 // called to actually do the computation; returns true if successful 364 // and fills in the read_offset and read_len 365 void dfxml_timeout(const std::string &tag,const timestamp_t &val); 366 void dfxml_write_hashes(std::string hex_hashes[],int indent); 367 bool compute_hash(uint64_t request_start,uint64_t request_len,hash_context_obj *segment,hash_context_obj *file); 368 void hash(); // called to hash each file and record results 369 }; 370 371 372 /** The hashlist holds a list of file_data_t objects. 373 * state->known is used to hold the audit file that is loaded. 374 * state->seen is used to hold the hashes seen on the current run. 375 * We store multiple maps for each algorithm number which map the hash hex code 376 * to the pointer as well. 377 * 378 * the hashlist.cpp file contains the implementation. It's largely taken 379 * from the v3 audit.cpp and match.cpp files. 380 */ 381 class hashlist : public std::vector<file_data_t *> { 382 /** 383 * The largest number of columns we can expect in a file of hashes 384 * (knowns). Normally this should be the number of hash 385 * algorithms plus a column for file size, file name, and, well, 386 * some fudge factors. Any values after this number will be 387 * ignored. For example, if the user invokes the program as: 388 * 389 * hashdeep -c md5,md5,md5,md5,...,md5,md5,md5,md5,md5,md5,md5,whirlpool 390 * 391 * the whirlpool will not be registered. 392 */ 393 394 public:; 395 static const int MAX_KNOWN_COLUMNS= NUM_ALGORITHMS+ 6; 396 typedef enum { 397 /* return codes from loading a hash list */ 398 loadstatus_ok = 0, 399 status_unknown_filetype, 400 status_contains_bad_hashes, 401 status_contains_no_hashes, 402 status_file_error 403 } loadstatus_t; 404 405 typedef enum { 406 searchstatus_ok = 0, 407 408 /* Matching hashes */ 409 status_match, // all hashes match 410 status_partial_match, /* One or more hashes match, but not all */ 411 status_file_size_mismatch, /* Implies all hashes match */ 412 status_file_name_mismatch, /* Implies all hashes and file size match */ 413 status_no_match /* Implies none of the hashes match */ 414 } searchstatus_t; 415 static const char *searchstatus_to_str(searchstatus_t val); 416 417 // Types of files that contain known hashes 418 typedef enum { 419 file_plain, 420 file_bsd, 421 file_hashkeeper, 422 file_nsrl_15, 423 file_nsrl_20, 424 file_encase3, 425 file_encase4, 426 file_ilook, 427 428 // Files generated by md5deep with the ten digit filesize at the start 429 // of each line 430 file_md5deep_size, 431 file_hashdeep_10, 432 file_unknown 433 } hashfile_format; 434 435 class hashmap : public std::multimap<std::string,file_data_t *> { 436 public:; 437 void add_file(file_data_t *fi,int alg_num); 438 }; 439 hashmap hashmaps[NUM_ALGORITHMS]; 440 441 /**************************************************************** 442 ** Search functions follow 443 ** It's not entirely clear why we have two search functions, but we do. 444 ** Perhaps one is from md5deep and the other is from hashdeep 445 ****************************************************************/ 446 447 /** 448 * hashlist.cpp 449 * find_hash finds the 'best match', which ideally is a match for both the hash and the filename. 450 */ 451 file_data_t *find_hash(hashid_t alg,const std::string &hash_hex, 452 const std::string &file_name, 453 uint64_t file_number); 454 455 /** 456 * look up a fdt by hash code(s) and return if it is present or not. 457 * optionally return a pointer to it as well. 458 */ 459 searchstatus_t search(const file_data_hasher_t *fdht, file_data_t ** matched, bool case_sensitive) ; 460 uint64_t total_matched(); // return the total matched from all calls to search() 461 462 /****************************************************************/ 463 464 /** 465 * Figure out the format of a hashlist file and load it. 466 * Both of these functions take the file name and the open handle. 467 * They read from the handle and just use the filename for printing error messages. 468 */ 469 void enable_hashing_algorithms_from_hashdeep_file(class display *ocb, 470 const std::string &fn,std::string val); 471 472 std::string last_enabled_algorithms; // a string with the algorithms that were enabled last 473 hashid_t hash_column[NUM_ALGORITHMS]; // maps a column number to a hashid; 474 // the order columns appear in the file being loaded. 475 uint8_t filename_column; // Column number which should contain the filename 476 hashfile_format identify_format(class display *ocb,const std::string &fn,FILE *handle); 477 loadstatus_t load_hash_file(class display *ocb,const std::string &fn); // not tstring! always ASCII 478 479 void dump_hashlist(); // send contents to stdout 480 481 /** 482 * add_fdt adds a file_data_t record to the hashlist, and its hashes to all the hashmaps. 483 * @param fi - a file_data_t to add. Don't erase it; we're going to use it (and modify it) 484 */ 485 void add_fdt(file_data_t *fi); 486 }; 487 488 /* Primary modes of operation (primary_function) */ 489 typedef enum { 490 primary_compute=0, 491 primary_match=1, 492 primary_match_neg=2, 493 primary_audit=3 494 } primary_t; 495 496 497 // These are the types of files that we can match against 498 #define TYPE_PLAIN 0 499 #define TYPE_BSD 1 500 #define TYPE_HASHKEEPER 2 501 #define TYPE_NSRL_15 3 502 #define TYPE_NSRL_20 4 503 #define TYPE_ILOOK 5 504 #define TYPE_ILOOK3 6 505 #define TYPE_ILOOK4 7 506 #define TYPE_MD5DEEP_SIZE 8 507 #define TYPE_ENCASE 9 508 #define TYPE_UNKNOWN 254 509 510 /* audit mode stats */ 511 class audit_stats { 512 public: audit_stats()513 audit_stats():exact(0), expect(0), partial(0), moved(0), unused(0), unknown(0), total(0){ 514 }; 515 /* For audit mode, the number of each type of file */ 516 uint64_t exact, expect, partial; // 517 uint64_t moved, unused, unknown, total; // clear()518 void clear(){ 519 exact = 0; 520 expect = 0; 521 partial = 0; 522 moved = 0; 523 unused = 0; 524 unknown = 0; 525 total = 0; 526 } 527 }; 528 529 /** display describes how information is output. 530 * There is only one OCB (it is a singleton). 531 * It needs to be mutex protected. 532 * 533 * The hashing happens in lots of threads and then calls the output 534 * classes in output_control_block to actually do the outputing. The 535 * problem here is that one of the things that is done is looking up, 536 * so the searches into "known" and "seen" also need to be 537 * protected. Hence "known" and "seen" appear in the 538 * output_control_block, and not elsewhere, and all of the access to 539 * them needs to be mediated. 540 * 541 * It also needs to maintain all of the state for audit mode. 542 * Finally, it maintains options for reading 543 * (e.g. buffered, unbuffered, or memory-mapped I/O) 544 * 545 * It is a class because it is protected and is passed around. 546 */ 547 class display { 548 private: 549 mutable mutex_t M; // lock for anything in output section lock()550 void lock() const { M.lock(); } unlock()551 void unlock() const { M.unlock(); } 552 553 /* all display state variables are protected by M and must be private */ 554 std::ostream *out; // where things get sent 555 std::ofstream myoutstream; // if we open it 556 std::string utf8_banner; // banner to be displayed 557 bool banner_displayed; // has the header been shown (text output) 558 XML *dfxml; /* output in DFXML */ 559 560 /* The set of known values; typically read from the audit file */ 561 hashlist known; // hashes read from the -k file 562 hashlist seen; // hashes seen on this hashing run; from the command line 563 class audit_stats match; // for the audit mode 564 status_t return_code; // prevously returned by hash() and dig(). 565 566 public: display()567 display(): 568 out(&std::cout), 569 banner_displayed(0),dfxml(0), 570 mode_triage(false), 571 mode_not_matched(false),mode_quiet(false),mode_timestamp(false), 572 mode_barename(false), 573 mode_size(false),mode_size_all(false), 574 opt_silent(false), 575 opt_verbose(0), 576 opt_estimate(false), 577 opt_relative(false), 578 opt_unicode_escape(false), 579 opt_mode_match(false), 580 opt_mode_match_neg(false), 581 opt_csv(false), 582 opt_asterisk(false), 583 opt_zero(false), 584 opt_display_size(false), 585 opt_display_hash(false), 586 opt_show_matched(false), 587 opt_case_sensitive(true), 588 opt_iomode(iomode::buffered), // by default, use buffered 589 #ifdef HAVE_PTHREAD 590 opt_threadcount(threadpool::numCPU()), 591 tp(0), 592 #else 593 opt_threadcount(0), 594 #endif 595 size_threshold(0), 596 piecewise_size(0), 597 primary_function(primary_compute){ 598 } 599 600 /* These variables are read-only after threading starts */ 601 bool mode_triage; 602 bool mode_not_matched; 603 bool mode_quiet; 604 bool mode_timestamp; 605 bool mode_barename; 606 bool mode_size; 607 bool mode_size_all; 608 std::string opt_outfilename; 609 bool opt_silent; 610 int opt_verbose; 611 bool opt_estimate; 612 bool opt_relative; 613 bool opt_unicode_escape; 614 bool opt_mode_match; 615 bool opt_mode_match_neg; 616 bool opt_csv; 617 bool opt_asterisk; 618 bool opt_zero; 619 bool opt_display_size; 620 bool opt_display_hash; 621 bool opt_show_matched; 622 bool opt_case_sensitive; 623 int opt_iomode; 624 int opt_threadcount; 625 626 #ifdef HAVE_PTHREAD 627 threadpool *tp; 628 #endif 629 630 // When only hashing files larger/smaller than a given threshold 631 uint64_t size_threshold; 632 uint64_t piecewise_size; // non-zero for piecewise mode 633 primary_t primary_function; /* what do we want to do? */ 634 635 636 /* Functions for working */ 637 638 void set_outfilename(std::string outfilename); 639 640 /* Return code support */ get_return_code()641 int32_t get_return_code(){ lock(); int ret = return_code.get_status(); unlock(); return ret; } set_return_code(status_t code)642 void set_return_code(status_t code){ lock(); return_code = code; unlock(); } set_return_code(int32_t code)643 void set_return_code(int32_t code){ lock(); return_code.set(code); unlock(); } set_return_code_if_not_ok(status_t code)644 void set_return_code_if_not_ok(status_t code){ 645 lock(); 646 if(code!=status_t::status_ok) return_code = code; 647 unlock(); 648 } 649 650 /* DFXML support */ 651 xml_open(FILE * out_)652 void xml_open(FILE *out_){ 653 lock(); 654 dfxml = new XML(out_); 655 unlock(); 656 } 657 void dfxml_startup(int argc,char **argv); 658 void dfxml_shutdown(); 659 void dfxml_timeout(const std::string &tag,const timestamp_t &val); 660 void dfxml_write(file_data_hasher_t *fdht); 661 662 663 /* Known hash database interface */ 664 /* Display the unused files and return the count */ 665 uint64_t compute_unused(bool show_display,std::string annotation); set_utf8_banner(std::string utf8_banner_)666 void set_utf8_banner(std::string utf8_banner_){ 667 utf8_banner = utf8_banner_; 668 } 669 670 671 static mutex_t portable_gmtime_mutex; 672 struct tm *portable_gmtime(struct tm *my_time,const timestamp_t *t); 673 void try_msg(void); 674 675 void display_banner_if_needed(); 676 void display_match_result(file_data_hasher_t *fdht,const hash_context_obj *hc); 677 678 void md5deep_display_match_result(file_data_hasher_t *fdht,const hash_context_obj *hc); 679 void md5deep_display_hash(file_data_hasher_t *fdht,const hash_context_obj *hc); 680 681 void display_hash(file_data_hasher_t *fdht,const hash_context_obj *hc); 682 void display_hash_simple(file_data_hasher_t *fdt,const hash_context_obj *hc); 683 684 /* The following routines are for printing and outputing filenames. 685 * 686 * fmt_filename formats the filename. 687 * On Windows this version outputs as UTF-8 unless unicode quoting is requested, 688 * in which case Unicode characters are emited as U+xxxx. 689 * For example, the Unicode smiley character ☺ is output as U+263A. 690 * 691 */ 692 std::string fmt_size(const file_data_t *fdh) const; 693 std::string fmt_filename(const std::string &fn) const; 694 #ifdef _WIN32 695 std::string fmt_filename(const std::wstring &fn) const; 696 #endif fmt_filename(const file_data_t * fdt)697 std::string fmt_filename(const file_data_t *fdt) const { 698 return fmt_filename(fdt->file_name); 699 } 700 void writeln(std::ostream *s,const std::string &str); // writes a line with NEWLINE and locking 701 702 // Display an ordinary message with newline added 703 void status(const char *fmt, ...) __attribute__((format(printf, 2, 0))); // note that 1 is 'self' 704 705 // Display an error message if not in silent mode 706 void error(const char *fmt, ...) __attribute__((format(printf, 2, 0))); 707 708 // Display an error message if not in silent mode and exit 709 void fatal_error(const char *fmt, ...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__)); 710 // Display an error message, ask user to contact the developer, 711 void internal_error(const char *fmt, ...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__)); 712 void print_debug(const char *fmt, ...) __attribute__((format(printf, 2, 0))); 713 void error_filename(const std::string &fn, const char *fmt, ...) __attribute__((format(printf, 3, 0))) ; 714 #ifdef _WIN32 715 void error_filename(const std::wstring &fn, const char *fmt, ...) __attribute__((format(printf, 3, 0))); 716 #endif 717 718 /* these versions extract the filename and the annotation if it is present. 719 */ 720 721 /* known hash database and realtime stats. 722 * Note that this is not locked() and unlocked(). 723 * It can only be run from the main thread before fork. 724 */ load_hash_file(const std::string & fn)725 hashlist::loadstatus_t load_hash_file(const std::string &fn){ 726 hashlist::loadstatus_t ret = known.load_hash_file(this,fn); 727 return ret; 728 } 729 730 /** These are multi-threaded */ 731 known_size()732 uint64_t known_size() const { 733 lock(); 734 uint64_t ret= known.size(); 735 unlock(); 736 return ret; 737 } find_hash(hashid_t alg,const std::string & hash_hex,const std::string & file_name,uint64_t file_number)738 const file_data_t *find_hash(hashid_t alg,const std::string &hash_hex, 739 const std::string &file_name, 740 uint64_t file_number){ 741 lock(); 742 const file_data_t *ret = known.find_hash(alg,hash_hex,file_name,file_number); 743 unlock(); 744 return ret; 745 } 746 void clear_realtime_stats(); 747 void display_realtime_stats(const file_data_hasher_t *fdht,const hash_context_obj *hc,time_t elapsed); hashes_loaded()748 bool hashes_loaded() const{ lock(); bool ret = known.size()>0; unlock(); return ret; } add_fdt(file_data_t * fdt)749 void add_fdt(file_data_t *fdt){ lock(); known.add_fdt(fdt); unlock(); } 750 751 /* audit mode */ 752 int audit_update(file_data_hasher_t *fdt); 753 int audit_check(); // performs an audit; return 0 if pass, -1 if fail 754 void display_audit_results(); // sets return code if fails 755 void finalize_matching(); 756 757 /* hash.cpp: Actually trigger the hashing. */ 758 void hash_file(const tstring &file_name); 759 void hash_stdin(); dump_hashlist()760 void dump_hashlist(){ lock(); known.dump_hashlist(); unlock(); } 761 }; 762 763 /** 764 * The 'state' class holds the state of the hashdeep/md5deep program. 765 * This includes: 766 * startup parameters 767 * known - the list of hashes in the hash database. 768 * seen - the list of hashes that have been seen this time through. 769 */ 770 771 772 class global { 773 public: 774 static tstring getcwd(); // returns the current directory 775 static tstring get_realpath(const tstring &fn); // returns the full path 776 static std::string get_realpath8(const tstring &fn); // returns the full path in UTF-8 777 static std::string escape_utf8(const std::string &fn); // turns "⦿" to "U+29BF" 778 #ifdef _WIN32 779 static std::string make_utf8(const std::wstring &tfn) ; 780 #endif make_utf8(const std::string & tfn)781 static std::string make_utf8(const std::string &tfn){return tfn;} 782 }; 783 784 /* On Win32, allow output of wstr's by converting them to UTF-8 */ 785 #ifdef _WIN32 786 inline std::ostream & operator <<(std::ostream &os,const std::wstring &wstr) { 787 os << global::make_utf8(wstr); 788 return os; 789 } 790 #endif 791 792 class state { 793 public:; 794 state()795 state():mode_recursive(false), // do we recurse? 796 mode_warn_only(false), // for loading hash files 797 798 // these determine which files get hashed 799 mode_expert(false), 800 mode_regular(false), 801 mode_directory(false), 802 mode_door(false), 803 mode_block(false), 804 mode_character(false), 805 mode_pipe(false), 806 mode_socket(false), 807 mode_symlink(false), 808 mode_winpe(false), 809 810 // command line argument 811 argc(0),argv(0), 812 813 // these have something to do with hash files that are loaded 814 h_field(0), 815 h_plain(0),h_bsd(0), 816 h_md5deep_size(0), 817 h_hashkeeper(0),h_ilook(0),h_ilook3(0),h_ilook4(0), h_nsrl20(0), h_encase(0), 818 usage_count(0) // allows -hh to print extra help 819 {}; 820 821 bool mode_recursive; 822 bool mode_warn_only; 823 824 // which files do we hash. 825 bool mode_expert; 826 bool mode_regular; 827 bool mode_directory; 828 bool mode_door; 829 bool mode_block; 830 bool mode_character; 831 bool mode_pipe; 832 bool mode_socket; 833 bool mode_symlink; 834 bool mode_winpe; 835 836 837 /* Command line arguments */ 838 std::string opt_input_list; // file with a list of files to read 839 int argc; 840 #ifdef _WIN32 841 wchar_t **argv; // never allocated, never freed 842 #else 843 char **argv; 844 #endif 845 846 // configuration and output 847 display ocb; // output control block 848 849 // Which filetypes this algorithm supports and their position in the file 850 uint8_t h_field; // which field to extract from a hash file. 851 uint8_t h_plain, h_bsd, h_md5deep_size, h_hashkeeper; 852 uint8_t h_ilook, h_ilook3, h_ilook4, h_nsrl20, h_encase; 853 854 void md5deep_add_hash(char *h, char *fn); // explicitly add a hash 855 void setup_expert_mode(char *arg); 856 857 /* main.cpp */ 858 uint64_t find_block_size(std::string input_str); 859 int usage_count; 860 bool opt_enable_mac_cc; 861 tstring generate_filename(const tstring &input); 862 void hashdeep_usage(); 863 std::string make_banner(); 864 void md5deep_usage(); 865 void hashdeep_check_flags_okay(); 866 void check_wow64(); 867 void md5deep_check_flags_okay(); 868 int hashdeep_process_command_line(int argc,char **argv); 869 void md5deep_check_matching_modes(); 870 void hashdeep_check_matching_modes(); 871 int md5deep_process_command_line(int argc,char **argv); 872 #ifdef _WIN32 873 int prepare_windows_command_line(); 874 #endif 875 876 /* files.cpp 877 * Not quite sure what to do with this stuff yet... 878 */ 879 880 void md5deep_load_match_file(const char *fn); 881 int find_hash_in_line(char *buf, int fileType, char *filename); 882 int parse_encase_file(const char *fn,FILE *f,uint32_t num_expected_hashes); 883 int find_plain_hash(char *buf,char *known_fn); // returns FALSE if error 884 int find_md5deep_size_hash(char *buf, char *known_fn); 885 int find_bsd_hash(char *buf, char *fn); 886 int find_rigid_hash(char *buf, char *fn, unsigned int fn_location, unsigned int hash_location); 887 int find_ilook_hash(char *buf, char *known_fn); 888 int check_for_encase(FILE *f,uint32_t *expected_hashes); 889 890 /* dig.cpp 891 * 892 * Note the file typing system needs to be able to display errors... 893 */ 894 895 class dir_table_t : public std::set<tstring>{ 896 }; 897 dir_table_t dir_table; 898 void done_processing_dir(const tstring &fn_); 899 void processing_dir(const tstring &fn_); 900 bool have_processed_dir(const tstring &fn_); 901 902 903 int identify_hash_file_type(FILE *f,uint32_t *expected_hashes); // identify the hash file type 904 bool should_hash_symlink(const tstring &fn,file_types *link_type); 905 bool should_hash_winpe(const tstring &fn); 906 bool should_hash_expert(const tstring &fn, file_types type); 907 bool should_hash(const tstring &fn); 908 909 /* file_type returns the file type of a string. 910 * If an error is found and ocb is provided, send the error to ocb. 911 * If filesize and timestamp are provided, give them. 912 */ 913 static file_types file_type(const filename_t &fn,class display *ocb,uint64_t *filesize, 914 timestamp_t *ctime,timestamp_t *mtime,timestamp_t *atime); 915 #ifdef _WIN32 916 bool is_junction_point(const std::wstring &fn); 917 #endif 918 void clean_name_posix(std::string &fn); 919 void process_dir(const tstring &path); 920 void dig_normal(const tstring &path); // posix & win32 921 void dig_win32(const tstring &path); // win32 only; calls dig_normal 922 static void dig_self_test(); 923 hashes_loaded()924 bool hashes_loaded(){ 925 return ocb.hashes_loaded(); 926 } 927 928 int main(int argc,char **argv); // main 929 void sanity_check(int condition,const char *msg); 930 931 }; 932 933 /** 934 * the files class knows how to read various hash file types 935 */ 936 937 /* Due to an inadvertant code fork several years ago, this program has different usage 938 * and output when run as 'md5deep' then when run as 'hashdeep'. We call this the 939 * 'md5deep_mode' and track it with the variables below. 940 */ 941 942 /* main.cpp */ 943 extern bool md5deep_mode; // if true, then we were run as md5deep, sha1deep, etc. 944 extern int opt_debug; // for debugging 945 extern hashid_t opt_md5deep_mode_algorithm; // for when we are in MD5DEEP mode 946 947 948 std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems); 949 std::vector<std::string> split(const std::string &s, char delim); 950 void lowercase(std::string &s); 951 extern std::string progname; // formerly const char *__progname 952 953 // ------------------------------------------------------------------ 954 // HELPER FUNCTIONS 955 // 956 // helper.cpp 957 // ------------------------------------------------------------------ 958 959 void chop_line(char *s); 960 off_t find_file_size(FILE *f,class display *ocb); // Return the size, in bytes of an open file stream. On error, return -1 961 962 // ------------------------------------------------------------------ 963 // MAIN PROCESSING 964 // ------------------------------------------------------------------ 965 /* dig.cpp */ 966 void dig_self_test(); // check the string-processing 967 968 969 970 971 #endif /* ifndef __MAIN_H */ 972