1 /*
2  *
3  * This is the main() function and support functions for hashdeep and md5deep.
4  *
5  * This is a work of the US Government. In accordance with 17 USC 105,
6  * copyright protection is not available for any work of the US Government.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Original program by Jesse Kornblum.
13  * Significantly modified by Simson Garfinkel.
14  */
15 
16 #include "main.h"
17 #include <string>
18 #include <algorithm>
19 #include <iostream>
20 #include <vector>
21 #include "utf8.h"
22 
23 #include "md5.h"
24 #include "sha1.h"
25 #include "sha256.h"
26 //#include "sha3.h"
27 #include "tiger.h"
28 #include "whirlpool.h"
29 
30 using namespace std;
31 
32 std::string progname;
33 
34 #define AUTHOR      "Jesse Kornblum and Simson Garfinkel"
35 #define COPYRIGHT   "This program is a work of the US Government. "\
36 "In accordance with 17 USC 105,\n"\
37 "copyright protection is not available for any work of the US Government.\n"\
38 "This is free software; see the source for copying conditions. There is NO\n"\
39 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
40 
41 
42 #ifdef _WIN32
43 // This can't go in main.h or we get multiple definitions of it
44 // Allows us to open standard input in binary mode by default
45 // See http://gnuwin32.sourceforge.net/compile.html for more
46 int _CRT_fmode = _O_BINARY;
47 #endif
48 
49 
50 /* The only remaining global options */
51 bool	md5deep_mode = false;
52 int	opt_debug = 0;			// debug mode; 1 is self-test
53 hashid_t  opt_md5deep_mode_algorithm = alg_unknown;
54 
55 
56 /****************************************************************
57  ** Various helper functions.
58  ****************************************************************/
59 
60 uint64_t file_data_hasher_t::next_file_number = 0; // needs to live somewhere
61 
62 /* This is the one place we allow a printf, becuase we are about to exit, and we call it before we multithread */
try_msg(void)63 static void try_msg(void)
64 {
65     std::cerr << "Try `" << progname << " -h` for more information." << std::endl;
66 }
67 
68 
sanity_check(int condition,const char * msg)69 void state::sanity_check(int condition, const char *msg)
70 {
71   if (condition)
72   {
73     if (!ocb.opt_silent)
74     {
75       ocb.error("%s",msg);
76       try_msg();
77     }
78     exit (status_t::STATUS_USER_ERROR);
79   }
80 }
81 
is_absolute_path(const tstring & fn)82 static int is_absolute_path(const tstring &fn)
83 {
84 #ifdef _WIN32
85   return FALSE;
86 #endif
87   return (fn.size()>0 && fn[0] == DIR_SEPARATOR);
88 }
89 
90 
91 /**
92  * return the full pathname for a filename.
93  */
94 
generate_filename(const tstring & input)95 tstring state::generate_filename(const tstring &input)
96 {
97     if ((ocb.opt_relative) || is_absolute_path(input)){
98 	return tstring(input);
99     }
100     // Windows systems don't have symbolic links, so we don't
101     // have to worry about carefully preserving the paths
102     // they follow. Just use the system command to resolve the paths
103     //
104     // Actually, they can have symbolic links...
105 #ifdef _WIN32
106     wchar_t fn[PATH_MAX];
107     memset(fn,0,sizeof(fn));
108     _wfullpath(fn,input.c_str(),PATH_MAX);
109     return tstring(fn);
110 #else
111     char buf[PATH_MAX+1];
112     std::string cwd = global::getcwd();
113     if (cwd=="") {
114 	// If we can't get the current working directory, we're not
115 	// going to be able to build the relative path to this file anyway.
116 	// So we just call realpath and make the best of things
117 	if (realpath(input.c_str(),buf)==0){
118 	    ocb.internal_error("Error calling realpath in generate_filename");
119 	}
120 	return string(buf);
121     }
122     return cwd + DIR_SEPARATOR + input;
123 #endif
124 }
125 
126 
127 
128 // So that the usage message fits in a standard DOS window, this
129 // function should produce no more than 22 lines of text.
hashdeep_usage()130 void state::hashdeep_usage()
131 {
132   if (1 == usage_count)
133   {
134     ocb.status("%s version %s by %s.",progname.c_str(),VERSION,AUTHOR);
135     ocb.status("%s %s [OPTION]... [FILES]...",CMD_PROMPT,progname.c_str());
136 
137     // Make a list of the hashes
138     ocb.status("-c <alg1,[alg2]> - Compute hashes only. Defaults are MD5 and SHA-256");
139     fprintf(stdout,"                   legal values: ");
140     for (int i = 0 ; i < NUM_ALGORITHMS ; i++)
141     {
142       fprintf(stdout,"%s%s",hashes[i].name.c_str(),(i+1<NUM_ALGORITHMS) ? "," : NEWLINE);
143     }
144 
145     ocb.status("-p <size> - piecewise mode. Files are broken into blocks for hashing");
146     ocb.status("-r        - recursive mode. All subdirectories are traversed");
147     ocb.status("-d        - output in DFXML (Digital Forensics XML)");
148     ocb.status("-k <file> - add a file of known hashes");
149     ocb.status("-a        - audit mode. Validates FILES against known hashes. Requires -k");
150     ocb.status("-m        - matching mode. Requires -k");
151     ocb.status("-x        - negative matching mode. Requires -k");
152     ocb.status("-w        - in -m mode, displays which known file was matched");
153     ocb.status("-M and -X act like -m and -x, but display hashes of matching files");
154     ocb.status("-e        - compute estimated time remaining for each file");
155     ocb.status("-s        - silent mode. Suppress all error messages");
156     ocb.status("-b        - prints only the bare name of files; all path information is omitted");
157     ocb.status("-l        - print relative paths for filenames");
158     ocb.status("-i/-I     - only process files smaller than the given threshold");
159     ocb.status("-o        - only process certain types of files. See README/manpage");
160     ocb.status("-v        - verbose mode. Use again to be more verbose");
161     ocb.status("-d        - output in DFXML; -W FILE - write to FILE.");
162 #ifdef HAVE_PTHREAD
163     ocb.status("-j <num>  - use num threads (default %d)",threadpool::numCPU());
164 #else
165     ocb.status("-j <num>  - ignored (compiled without pthreads)");
166 #endif
167   }
168 
169   // -hh makes us more verbose
170   if (2 == usage_count)
171   {
172     ocb.status("-f <file> - Use file as a list of files to process.");
173     ocb.status("-V        - display version number and exit");
174     ocb.status("-0        - use a NUL (\\0) for newline.");
175     ocb.status("-u        - escape Unicode");
176     ocb.status("-E        - Use case insensitive matching for filenames in audit mode");
177     ocb.status("-B        - verbose mode; repeat for more verbosity");
178     ocb.status("-C        - OS X only --- use Common Crypto hash functions");
179     ocb.status("-Fb       - I/O mode buffered; -Fu unbuffered; -Fm memory-mapped");
180     ocb.status("-o[bcpflsde] - Expert mode. only process certain types of files:");
181     ocb.status("               b=block dev; c=character dev; p=named pipe");
182     ocb.status("               f=regular file; l=symlink; s=socket; d=door e=Windows PE");
183     ocb.status("-D <num>  - set debug level");
184   }
185 
186   /// -hhh mode includes debugging information.
187   if (3 == usage_count)
188   {
189     ocb.status("sizeof(off_t)= %d",sizeof(off_t));
190 #ifdef HAVE_PTHREAD
191     ocb.status("HAVE_PTHREAD");
192 #endif
193 #ifdef HAVE_PTHREAD_H
194     ocb.status("HAVE_PTHREAD_H");
195 #endif
196 #ifdef HAVE_PTHREAD_WIN32_PROCESS_ATTACH_NP
197     ocb.status("HAVE_PTHREAD_WIN32_PROCESS_ATTACH_NP");
198 #endif
199   }
200 }
201 
202 
203 // So that the usage message fits in a standard DOS window, this
204 // function should produce no more than 22 lines of text.
md5deep_usage(void)205 void state::md5deep_usage(void)
206 {
207     if(usage_count==1){
208 	ocb.status("%s version %s by %s.",progname.c_str(),VERSION,AUTHOR);
209 	ocb.status("%s %s [OPTION]... [FILES]...",CMD_PROMPT,progname.c_str());
210 	ocb.status("See the man page or README.txt file or use -hh for the full list of options");
211 	ocb.status("-p <size> - piecewise mode. Files are broken into blocks for hashing");
212 	ocb.status("-r        - recursive mode. All subdirectories are traversed");
213 	ocb.status("-e        - show estimated time remaining for each file");
214 	ocb.status("-s        - silent mode. Suppress all error messages");
215 	ocb.status("-z        - display file size before hash");
216 	ocb.status("-m <file> - enables matching mode. See README/man page");
217 	ocb.status("-x <file> - enables negative matching mode. See README/man page");
218 	ocb.status("-M and -X are the same as -m and -x but also print hashes of each file");
219 	ocb.status("-w        - displays which known file generated a match");
220 	ocb.status("-n        - displays known hashes that did not match any input files");
221 	ocb.status("-a and -A add a single hash to the positive or negative matching set");
222 	ocb.status("-b        - prints only the bare name of files; all path information is omitted");
223 	ocb.status("-l        - print relative paths for filenames");
224 	ocb.status("-t        - print GMT timestamp (ctime)");
225 	ocb.status("-i/I <size> - only process files smaller/larger than SIZE");
226 	ocb.status("-v        - display version number and exit");
227 	ocb.status("-d        - output in DFXML; -u - Escape Unicode; -W FILE - write to FILE.");
228 #ifdef HAVE_PTHREAD
229 	ocb.status("-j <num>  - use num threads (default %d)",threadpool::numCPU());
230 #else
231 	ocb.status("-j <num>  - ignored (compiled without pthreads)");
232 #endif
233 	ocb.status("-Z - triage mode;   -h - help;   -hh - full help");
234     }
235     if(usage_count==2){			// -hh
236 	ocb.status("-S        - Silent mode, but warn on bad hashes");
237 	ocb.status("-0        - use a NUL (\\0) for newline.");
238 	ocb.status("-k        - print asterisk before filename");
239 	ocb.status("-u        - escape Unicode characters in filenames");
240 	ocb.status("-B        - verbose mode; repeat for more verbosity");
241 	ocb.status("-C        - OS X only --- use Common Crypto hash functions");
242 	ocb.status("-Fb       - I/O mode buffered; -Fu unbuffered; -Fm memory-mapped");
243 	ocb.status("-f <file> - take list of files to hash from filename");
244 	ocb.status("-o[bcpflsde] - expert mode. Only process certain types of files:");
245 	ocb.status("               b=block dev; c=character dev; p=named pipe");
246 	ocb.status("               f=regular file; l=symlink; s=socket; d=door e=Windows PE");
247 	ocb.status("-D <num>  - set debug level to nn");
248     }
249     if (usage_count==3){			// -hhh
250 	ocb.status("sizeof(off_t)= %d",sizeof(off_t));
251 #ifdef HAVE_PTHREAD
252 	ocb.status("HAVE_PTHREAD");
253 #endif
254 #ifdef HAVE_PTHREAD_H
255 	ocb.status("HAVE_PTHREAD_H");
256 #endif
257 #ifdef HAVE_PTHREAD_WIN32_PROCESS_ATTACH_NP
258 	ocb.status("HAVE_PTHREAD_WIN32_PROCESS_ATTACH_NP");
259 #endif
260     }
261 }
262 
263 
hashdeep_check_flags_okay()264 void state::hashdeep_check_flags_okay()
265 {
266   sanity_check(
267 	       (((ocb.primary_function & primary_match) ||
268 		 (ocb.primary_function & primary_match_neg) ||
269 		 (ocb.primary_function & primary_audit)) &&
270 		!hashes_loaded()),
271 	       "Unable to load any matching files.");
272 
273   sanity_check(
274 	       (ocb.opt_relative) && (ocb.mode_barename),
275 	       "Relative paths and bare filenames are mutally exclusive.");
276 
277   /* Additional sanity checks will go here as needed... */
278 }
279 
280 
281 
282 /****************************************************************
283  ** Hash algorithms database.
284  ****************************************************************/
285 
286 algorithm_t     hashes[NUM_ALGORITHMS];		// which hash algorithms are available and in use
287 /**
288  * Add a hash algorithm. This could be table driven, but it isn't.
289  */
add_algorithm(hashid_t pos,const char * name,uint16_t bits,void (* func_init)(void * ctx),void (* func_update)(void * ctx,const unsigned char * buf,size_t buflen),void (* func_finalize)(void * ctx,unsigned char *),int inuse)290 void algorithm_t::add_algorithm(
291 	      hashid_t pos,
292 	      const char *name,
293 	      uint16_t bits,
294 	      void ( *func_init)(void *ctx),
295 	      void ( *func_update)(void *ctx, const unsigned char *buf, size_t buflen),
296 	      void ( *func_finalize)(void *ctx, unsigned char *),
297 	      int inuse)
298 {
299     hashes[pos].name		= name;
300     hashes[pos].f_init      = func_init;
301     hashes[pos].f_update    = func_update;
302     hashes[pos].f_finalize  = func_finalize;
303     hashes[pos].bit_length  = bits;
304     hashes[pos].inuse       = inuse;
305     hashes[pos].id          = pos;
306 }
307 
308 
309 extern "C" {
310 int sha1_init(void * md);
311     int sha1_process(void *md, const unsigned char *buf,uint64_t);
312     int sha1_done(void * md, unsigned char *out);
313     };
314 
315 
316 #ifdef POLARSSL_SHA1_H
hash_init_sha1(void * ctx)317 void hash_init_sha1(void * ctx)
318 {
319     assert(sizeof(sha1_context) < MAX_ALGORITHM_CONTEXT_SIZE);
320     sha1_starts((sha1_context *)ctx);
321 }
322 
hash_update_sha1(void * ctx,const unsigned char * buf,size_t len)323 void hash_update_sha1(void * ctx, const unsigned char *buf, size_t len)
324 {
325     sha1_update((sha1_context *)ctx,buf,len);
326 }
327 
hash_final_sha1(void * ctx,unsigned char * sum)328 void hash_final_sha1(void * ctx, unsigned char *sum)
329 {
330     sha1_finish((sha1_context *)ctx,sum);
331 }
332 #endif
333 
334 #if defined(HAVE_COMMONCRYPTO_COMMONDIGEST_H)
335 #include <CommonCrypto/CommonDigest.h>
336 #endif
337 
338 bool opt_enable_mac_cc=false;	// enable mac common crypto
339 
340 #ifdef HAVE_CC_SHA1_INIT
341 /* These are to overcome C++ cast issues */
cc_md5_init(void * ctx)342 void cc_md5_init(void * ctx)
343 {
344     if(opt_enable_mac_cc){
345 	CC_MD5_Init((CC_MD5_CTX *)ctx);
346     } else {
347 	hash_init_md5(ctx);
348     }
349 }
350 
cc_sha1_init(void * ctx)351 void cc_sha1_init(void * ctx)
352 {
353     if(opt_enable_mac_cc){
354 	CC_SHA1_Init((CC_SHA1_CTX *)ctx);
355     } else {
356 	hash_init_sha1(ctx);
357     }
358 }
359 
cc_sha256_init(void * ctx)360 void cc_sha256_init(void * ctx)
361 {
362     if(opt_enable_mac_cc){
363 	CC_SHA256_Init((CC_SHA256_CTX *)ctx);
364     } else {
365 	hash_init_sha256(ctx);
366     }
367 }
368 
cc_md5_update(void * ctx,const unsigned char * buf,size_t len)369 void cc_md5_update(void *ctx, const unsigned char *buf, size_t len)
370 {
371     if(opt_enable_mac_cc){
372 	CC_MD5_Update((CC_MD5_CTX *)ctx,buf,len);
373     } else {
374 	hash_update_md5(ctx,buf,len);
375     }
376 }
377 
cc_sha1_update(void * ctx,const unsigned char * buf,size_t len)378 void cc_sha1_update(void *ctx, const unsigned char *buf, size_t len)
379 {
380     if(opt_enable_mac_cc){
381 	CC_SHA1_Update((CC_SHA1_CTX *)ctx,buf,len);
382     } else {
383 	hash_update_sha1(ctx,buf,len);
384     }
385 }
386 
cc_sha256_update(void * ctx,const unsigned char * buf,size_t len)387 void cc_sha256_update(void *ctx, const unsigned char *buf, size_t len)
388 {
389     if(opt_enable_mac_cc){
390 	CC_SHA256_Update((CC_SHA256_CTX *)ctx,buf,len);
391     } else {
392 	hash_update_sha256(ctx,buf,len);
393     }
394 }
395 
396 /* These swap argument orders, which are different for Apple and our implementation */
cc_md5_final(void * ctx,unsigned char * digest)397 void cc_md5_final(void *ctx, unsigned char *digest)
398 {
399     if(opt_enable_mac_cc){
400 	CC_MD5_Final(digest,(CC_MD5_CTX *)ctx);
401     } else {
402 	hash_final_md5(ctx,digest);
403     }
404 }
405 
cc_sha1_final(void * ctx,unsigned char * digest)406 void cc_sha1_final(void *ctx, unsigned char *digest)
407 {
408     if(opt_enable_mac_cc){
409 	CC_SHA1_Final(digest,(CC_SHA1_CTX *)ctx);
410     } else {
411 	hash_final_sha1(ctx,digest);
412     }
413 }
414 
cc_sha256_final(void * ctx,unsigned char * digest)415 void cc_sha256_final(void *ctx, unsigned char *digest)
416 {
417     if(opt_enable_mac_cc){
418 	CC_SHA256_Final(digest,(CC_SHA256_CTX *)ctx);
419     } else {
420 	hash_final_sha256(ctx,digest);
421     }
422 }
423 #endif
424 
425 
426 
427 /*
428  * Load the hashing algorithms array.
429  */
load_hashing_algorithms()430 void algorithm_t::load_hashing_algorithms()
431 {
432     /* The DEFAULT_ENABLE variables are in main.h */
433 #if defined(HAVE_CC_SHA1_INIT)
434     /* Use the Apple's validated Common Crypto for SHA1 and SHA256 */
435     assert(sizeof(struct CC_MD5state_st)<MAX_ALGORITHM_CONTEXT_SIZE);
436     assert(sizeof(struct CC_SHA1state_st)<MAX_ALGORITHM_CONTEXT_SIZE);
437     assert(sizeof(struct CC_SHA256state_st)<MAX_ALGORITHM_CONTEXT_SIZE);
438     add_algorithm(alg_md5,       "md5",       128, cc_md5_init,         cc_md5_update,         cc_md5_final,         DEFAULT_ENABLE_MD5);
439     add_algorithm(alg_sha1,      "sha1",      160, cc_sha1_init,        cc_sha1_update,        cc_sha1_final,        DEFAULT_ENABLE_SHA1);
440     add_algorithm(alg_sha256,    "sha256",    256, cc_sha256_init,      cc_sha256_update,      cc_sha256_final,      DEFAULT_ENABLE_SHA256);
441 #else
442     add_algorithm(alg_md5,       "md5",       128, hash_init_md5,       hash_update_md5,       hash_final_md5,       DEFAULT_ENABLE_MD5);
443     add_algorithm(alg_sha1,      "sha1",      160, hash_init_sha1,      hash_update_sha1,      hash_final_sha1,      DEFAULT_ENABLE_SHA1);
444     add_algorithm(alg_sha256,    "sha256",    256, hash_init_sha256,    hash_update_sha256,    hash_final_sha256,    DEFAULT_ENABLE_SHA256);
445 #endif
446     add_algorithm(alg_tiger,     "tiger",     192, hash_init_tiger,     hash_update_tiger,     hash_final_tiger,     DEFAULT_ENABLE_TIGER);
447     add_algorithm(alg_whirlpool, "whirlpool", 512, hash_init_whirlpool, hash_update_whirlpool, hash_final_whirlpool, DEFAULT_ENABLE_WHIRLPOOL);
448 
449     //add_algorithm(alg_sha3,
450     //		  "sha3",
451     //256,
452     //hash_init_sha3,
453     //hash_update_sha3,
454     //hash_final_sha3,
455     //	  DEFAULT_ENABLE_SHA3);
456 }
457 
458 
459 /**
460  * Given an algorithm name, convert it to a hashid_t
461  * returns alg_unknown if the name is not valid.
462  */
get_hashid_for_name(string name)463 hashid_t algorithm_t::get_hashid_for_name(string name)
464 {
465     /* convert name to lowercase and remove any dashes */
466     lowercase(name);
467     size_t dash;
468     while((dash=name.find("-")) != string::npos){
469 	name.replace(dash,1,"");
470     }
471     for(int i=0;i<NUM_ALGORITHMS;i++){
472 	if(hashes[i].name==name) return hashes[i].id;
473     }
474     return alg_unknown;
475 }
476 
clear_algorithms_inuse()477 void algorithm_t::clear_algorithms_inuse()
478 {
479   for (int i = 0 ; i < NUM_ALGORITHMS ; ++i)  {
480       hashes[i].inuse = false;
481   }
482 }
483 
484 
valid_hex(const std::string & buf)485 bool algorithm_t::valid_hex(const std::string &buf)
486 {
487     for(std::string::const_iterator it = buf.begin(); it!=buf.end(); it++){
488 	if(!isxdigit(*it)) return false;
489     }
490     return true;
491 }
492 
valid_hash(hashid_t alg,const std::string & buf)493 bool algorithm_t::valid_hash(hashid_t alg, const std::string &buf)
494 {
495     for (size_t pos = 0 ; pos < hashes[alg].bit_length/4 ; pos++)  {
496 	if (!isxdigit(buf[pos])) return false; // invalid character
497 	if (pos==(hashes[alg].bit_length/4)-1) return true; // we found them all
498     }
499     return false;				// too short or too long
500 }
501 
502 
algorithms_in_use_count()503 int algorithm_t::algorithms_in_use_count()
504 {
505     int count = 0;
506     for (int i = 0 ; i < NUM_ALGORITHMS ; ++i)  {
507 	if(hashes[i].inuse) count++;
508     }
509     return count;
510 }
511 
512 
513 // C++ string splitting code from
514 // http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c
split(const std::string & s,char delim,std::vector<std::string> & elems)515 std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
516     std::stringstream ss(s);
517     std::string item;
518     while(std::getline(ss, item, delim)) {
519         elems.push_back(item);
520     }
521     return elems;
522 }
523 
524 
split(const std::string & s,char delim)525 std::vector<std::string> split(const std::string &s, char delim) {
526     std::vector<std::string> elems;
527     return split(s, delim, elems);
528 }
529 
530 
lowercase(std::string & s)531 void lowercase(std::string &s)
532 {
533     std::transform(s.begin(), s.end(), s.begin(), ::tolower);
534 }
535 
536 
537 //
538 // Set inuse for each of the algorithms in the argument.
539 //
enable_hashing_algorithms(std::string var)540 void algorithm_t::enable_hashing_algorithms(std::string var)
541 {
542   // convert name to lowercase and remove any dashes
543   std::transform(var.begin(), var.end(), var.begin(), ::tolower);
544 
545   // Split on the commas
546   std::vector<std::string>algs = split(var,',');
547 
548   for (std::vector<std::string>::const_iterator it = algs.begin();it!=algs.end();it++)
549   {
550     hashid_t id = get_hashid_for_name(*it);
551     if (id==alg_unknown)
552     {
553       // Did the user specify to compute all hash algorithms?
554       if (*it == "all")
555       {
556 	for (int j=0 ; j<NUM_ALGORITHMS ; j++)
557 	{
558 	  hashes[j].inuse = TRUE;
559 	}
560 	return;
561       }
562 
563       // No idea what this algorithm is.
564       fprintf(stderr,
565 	      "%s: Unknown algorithm: %s%s",
566 	      progname.c_str(),
567 	      (*it).c_str(),
568 	      NEWLINE);
569       try_msg();
570       exit(EXIT_FAILURE);
571     }
572 
573     hashes[id].inuse = TRUE;
574   }
575 }
576 
577 
setup_expert_mode(char * arg)578 void state::setup_expert_mode(char *arg)
579 {
580     for(unsigned int i=0;i<strlen(arg);i++){
581 	switch(arg[i]){
582 	case 'e': // Windows PE executables
583 	  mode_winpe = true;     break;
584 	case 'b': // Block Device
585 	  mode_block = true;     break;
586 	case 'c': // Character Device
587 	  mode_character = true; break;
588 	case 'p': // Named Pipe
589 	  mode_pipe=true;        break;
590 	case 'f': // Regular File
591 	  mode_regular=true;     break;
592 	case 'l': // Symbolic Link
593 	  mode_symlink=true;     break;
594 	case 's': // Socket
595 	  mode_socket=true;      break;
596 	case 'd': // Door (Solaris)
597 	  mode_door=true;        break;
598 	default:
599 	  ocb.error("%s: Unrecognized file type: %c", progname.c_str(),arg[i]);
600 	}
601     }
602 }
603 
hashdeep_check_matching_modes()604 void state::hashdeep_check_matching_modes()
605 {
606   sanity_check((not (primary_compute == ocb.primary_function)),
607 	       "Multiple processing modes specified.");
608 }
609 
610 
611 
hashdeep_process_command_line(int argc_,char ** argv_)612 int state::hashdeep_process_command_line(int argc_, char **argv_)
613 {
614     bool did_usage = false;
615   int i;
616 
617   while ((i=getopt(argc_,argv_,"abc:CdeEF:f:o:I:i:MmXxtlk:rsp:wvVhW:0D:uj:")) != -1)  {
618     switch (i)
619     {
620     case 'a':
621       hashdeep_check_matching_modes();
622       ocb.primary_function = primary_audit;
623       break;
624 
625     case 'C':
626       opt_enable_mac_cc = true;
627       break;
628 
629     case 'd':
630       ocb.xml_open(stdout);
631       break;
632 
633     case 'f':
634       opt_input_list = optarg;
635       break;
636 
637     case 'o':
638       mode_expert=true;
639       setup_expert_mode(optarg);
640       break;
641 
642     case 'I':
643       ocb.mode_size_all=true;
644       // falls through
645     case 'i':
646       ocb.mode_size = true;
647       ocb.size_threshold = find_block_size(optarg);
648       if (ocb.size_threshold==0)
649       {
650 	ocb.error("Requested size threshold implies not hashing anything");
651 	exit(status_t::STATUS_USER_ERROR);
652       }
653       break;
654 
655     case 'c':
656       ocb.primary_function = primary_compute;
657       /* Before we parse which algorithms we're using now, we have
658        * to erase the default (or previously entered) values
659        */
660       algorithm_t::clear_algorithms_inuse();
661       algorithm_t::enable_hashing_algorithms(optarg);
662       break;
663 
664     case 'M':
665       ocb.opt_display_hash = true;
666       // falls through
667     case 'm':
668       hashdeep_check_matching_modes();
669       ocb.primary_function = primary_match;
670       break;
671 
672     case 'X':
673       ocb.opt_display_hash=true;
674       // falls through
675     case 'x':
676       hashdeep_check_matching_modes();
677       ocb.primary_function = primary_match_neg;
678       break;
679 
680       // TODO: Add -t mode to hashdeep
681       //    case 't': mode |= mode_timestamp;    break;
682 
683     case 'b': ocb.mode_barename=true;   break;
684     case 'l': ocb.opt_relative=true;    break;
685     case 'e': ocb.opt_estimate = true;	break;
686     case 'r': mode_recursive=true;	break;
687     case 's': ocb.opt_silent = true;	break;
688 
689 
690     case 'p':
691 	ocb.piecewise_size = find_block_size(optarg);
692       if (ocb.piecewise_size==0)
693 	  ocb.fatal_error("Piecewise blocks of zero bytes are impossible");
694 
695       break;
696 
697     case 'w': ocb.opt_show_matched = true;    break; // displays which known hash generated a match
698 
699     case 'k':
700 	switch (ocb.load_hash_file(optarg)) {
701 	case hashlist::loadstatus_ok:
702 	    if(opt_debug){
703 		ocb.error("%s: Match file loaded %d known hash values.",
704 				optarg,ocb.known_size());
705 	    }
706 	    break;
707 
708       case hashlist::status_contains_no_hashes:
709 	  /* Trying to load an empty file is fine, but we shouldn't
710 	     change hashes_loaded */
711 	  break;
712 
713       case hashlist::status_contains_bad_hashes:
714 	  ocb.error("%s: contains some bad hashes, using anyway",optarg);
715 	  break;
716 
717       case hashlist::status_unknown_filetype:
718       case hashlist::status_file_error:
719 	  /* The loading code has already printed an error */
720 	    break;
721 
722 	default:
723 	    ocb.error("%s: unknown error, skipping%s", optarg, NEWLINE);
724 	  break;
725 	}
726       break;
727 
728     case 'v':
729       ++ocb.opt_verbose;
730       if (ocb.opt_verbose > INSANELY_VERBOSE)
731 	ocb.error("User request for insane verbosity denied");
732       break;
733 
734     case 'V':
735       ocb.status("%s", VERSION);
736       exit(EXIT_SUCCESS);
737 
738     case 'W': ocb.set_outfilename(optarg); break;
739     case '0': ocb.opt_zero = true; break;
740     case 'u': ocb.opt_unicode_escape = true;break;
741     case 'j': ocb.opt_threadcount = atoi(optarg); break;
742     case 'F': ocb.opt_iomode = iomode::toiomode(optarg);break;
743     case 'E': ocb.opt_case_sensitive = false; break;
744 
745     case 'h':
746 	usage_count++;
747 	hashdeep_usage();
748 	did_usage = true;
749 	break;
750 
751     case 'D': opt_debug = atoi(optarg); break;
752     default:
753       try_msg();
754       exit(EXIT_FAILURE);
755     }
756   }
757 
758   if(did_usage ) exit(EXIT_SUCCESS);
759 
760   hashdeep_check_flags_okay();
761   return FALSE;
762 }
763 
764 #ifdef _WIN32
765 /**
766  * WIN32 requires the argv in wchar_t format to allow the program to get UTF16
767  * filenames resulting from star expansion.
768  */
prepare_windows_command_line()769 int state::prepare_windows_command_line()
770 {
771     this->argv = CommandLineToArgvW(GetCommandLineW(),&this->argc);
772     return FALSE;
773 }
774 #endif
775 
776 class uni32str:public vector<uint32_t> {};
777 
escape_utf8(const std::string & utf8)778 std::string global::escape_utf8(const std::string &utf8)
779 {
780     uni32str utf32_line;
781     std::string ret;
782     utf8::utf8to32(utf8.begin(),utf8.end(),back_inserter(utf32_line));
783     for(uni32str::const_iterator it = utf32_line.begin(); it!=utf32_line.end(); it++){
784 	if((*it) < 256){
785 	    ret.push_back(*it);
786 	} else {
787 	    char buf[16];
788 	    snprintf(buf,sizeof(buf),"U+%04X",*it);
789 	    ret.append(buf);
790 	}
791     }
792     return ret;
793 }
794 
795 #ifdef _WIN32
796 /**
797  * We only need make_utf8 on windows because on POSIX systems
798  * all filenames are assumed to be UTF8.
799  */
make_utf8(const tstring & str)800 std::string global::make_utf8(const tstring &str)
801 {
802     if(str.size()==0) return std::string(); // nothing to convert
803 
804     /* Figure out how many bytes req required */
805     size_t len = WideCharToMultiByte(CP_UTF8,0,str.c_str(),str.size(),0,0,0,0);
806     if(len==0){
807 	switch(GetLastError()){
808 	case ERROR_INSUFFICIENT_BUFFER: std::cerr << "ERROR_INSUFFICIENT_BUFFER\n";break;
809 	case ERROR_INVALID_FLAGS: std::cerr << "ERROR_INVALID_FLAGS\n";break;
810 	case ERROR_INVALID_PARAMETER: std::cerr << "ERROR_INVALID_PARAMETER\n";break;
811 	case ERROR_NO_UNICODE_TRANSLATION: std::cerr << "ERROR_NO_UNICODE_TRANSLATION\n";break;
812 	}
813 	std::cerr << "WideCharToMultiByte failed\n";
814 	return std::string("");
815     }
816     /* allocate the space we need (plus one for null-termination */
817     char *buf = new char[len+1];
818 
819     /* Perform the conversion */
820     len = WideCharToMultiByte(CP_UTF8,0,str.c_str(),str.size(),buf,len,0,0);
821     if(len==0){
822 	return std::string("");		// nothing to return
823     }
824     buf[len] = 0;			// be sure it is null-terminated
825     std::string s2(buf);		// Make a STL string
826     delete [] buf;			// Delete the buffern
827     return s2;				// return the string
828 }
829 #endif
830 
831 
getcwd()832 tstring global::getcwd()
833 {
834 #ifdef _WIN32
835     wchar_t buf[MAX_PATH];
836     memset(buf,0,sizeof(buf));
837     _wgetcwd(buf,MAX_PATH);
838     tstring ret = tstring(buf);
839     return ret;
840 #else
841     char buf[PATH_MAX+1];
842     memset(buf,0,sizeof(buf));
843     ::getcwd(buf,sizeof(buf));
844     return std::string(buf);
845 #endif
846 }
847 
848 
849 #if 0
850 // See http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html
851 tstring global::getcwd()
852 {
853     std::string path;
854     typedef std::pair<dev_t, ino_t> file_id;
855 
856     bool success = false;
857     int start_fd = open(".", O_RDONLY); //Keep track of start directory, so can jump back to it later
858     if (start_fd == -1) {
859 	fprintf(stderr,"global::getcwd(): Cannot open '.': %s\n",strerror(errno));
860 	exit(1);
861     }
862     struct stat sb;
863     if (fstat(start_fd, &sb)==0) {
864 	file_id current_id(sb.st_dev, sb.st_ino);
865 	if (!stat("/", &sb)){ //Get info for root directory, so we can determine when we hit it
866 	    std::vector<std::string> path_components;
867 	    file_id root_id(sb.st_dev, sb.st_ino);
868 
869 	    // while we are not at the root, keep going up...
870 	    while (current_id != root_id){
871 		bool pushed = false;
872 		if (!chdir("..")){ 		    //Keep recursing towards root each iteration
873 		    DIR *dir = opendir(".");
874 		    if (dir) {
875 			dirent *entry;
876 			while ((entry = readdir(dir))){
877 			    //We loop through each entry trying to find where we came from
878 			    if (strcmp(entry->d_name,".")==0) continue; // ignore .
879 			    if (strcmp(entry->d_name,"..")==0) continue;
880 			    if (lstat(entry->d_name, &sb)==0){
881 				file_id child_id(sb.st_dev, sb.st_ino);
882 				if (child_id == current_id){
883 				    //We found where we came from, add its name to the list
884 				    path_components.push_back(entry->d_name);
885 				    pushed = true;
886 				    break;
887 				}
888 			    }
889 			}
890 			closedir(dir);
891 			if (pushed && !stat(".", &sb)){
892 			    //If we have a reason to continue, we update the current dir id
893 			    current_id = file_id(sb.st_dev, sb.st_ino);
894 			}
895 		    } //Else, Uh oh, can't read information at this level
896 		}
897 		if (!pushed) { break; } //If we didn't obtain any info this pass, no reason to continue
898 	    }
899 
900 	    if (current_id == root_id){
901 		//Unless they're equal, we failed above
902 		//Built the path, will always end with a slash
903 		path = "/";
904 		for (std::vector<std::string>::reverse_iterator i = path_components.rbegin();
905 		     i != path_components.rend();
906 		     ++i){
907 		    path += *i+"/";
908 		}
909 		success = true;
910 	    }
911 	    fchdir(start_fd);
912 	}
913     }
914     close(start_fd);
915     return path;
916 }
917 #endif
918 
919 
920 
921 /* Return the canonicalized absolute pathname in UTF-8 on Windows and POSIX systems */
get_realpath(const tstring & fn)922 tstring global::get_realpath(const tstring &fn)
923 {
924 #ifdef _WIN32
925     /*
926      * expand a relative path to the full path.
927      * http://msdn.microsoft.com/en-us/library/506720ff(v=vs.80).aspx
928      */
929     wchar_t absPath[PATH_MAX];
930     if(_wfullpath(absPath,fn.c_str(),PATH_MAX)==0) tstring(); // fullpath failed...
931     return tstring(absPath);
932 #else
933     char resolved_name[PATH_MAX];	//
934     if(realpath(fn.c_str(),resolved_name)==0) return "";
935     if(opt_debug) std::cout << "global::get_realpath(" << fn << ")=" << resolved_name << "\n";
936     return tstring(resolved_name);
937 #endif
938 }
939 
get_realpath8(const tstring & fn)940 std::string global::get_realpath8(const tstring &fn)
941 {
942     return global::make_utf8(global::get_realpath(fn));
943 }
944 
945 
946 #ifdef _WIN32
947 /**
948  * Detect if we are a 32-bit program running on a 64-bit system.
949  *
950  * Running a 32-bit program on a 64-bit system is problematic because WoW64
951  * changes the program's view of critical directories. An affected
952  * program does not see the true %WINDIR%, but instead gets a mapped
953  * version. Thus the user cannot get an accurate picture of their system.
954  * See http://jessekornblum.livejournal.com/273084.html for an example.
955  *
956  * The following is adapted from
957  * http://msdn.microsoft.com/en-us/library/ms684139(v=VS.85).aspx
958  */
959 
960 typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
961 LPFN_ISWOW64PROCESS fnIsWow64Process;
962 
check_wow64()963 void state::check_wow64()
964 {
965     BOOL result;
966 
967     fnIsWow64Process = (LPFN_ISWOW64PROCESS) GetProcAddress(GetModuleHandle(TEXT("kernel32")),
968 							  "IsWow64Process");
969     // If this system doesn't have the function IsWow64Process then
970     // it's definitely not running under WoW64.
971     if (NULL == fnIsWow64Process) return;
972 
973     if (! fnIsWow64Process(GetCurrentProcess(), &result))  {
974 	// The function failed? WTF? Well, let's not worry about it.
975 	return;
976     }
977 
978     if (result) {
979 	ocb.error("WARNING: You are running a 32-bit program on a 64-bit system.");
980 	ocb.error("You probably want to use the 64-bit version of this program.");
981     }
982 }
983 #endif   // ifdef _WIN32
984 
985 
986 /****************************************************************
987  * Legacy code from md5deep follows....
988  *
989  ****************************************************************/
990 
991 
md5deep_check_flags_okay()992 void state::md5deep_check_flags_okay()
993 {
994   sanity_check(((ocb.opt_mode_match) || (ocb.opt_mode_match_neg)) &&
995 	       hashes_loaded()==0,
996 	       "Unable to load any matching files.");
997 
998   sanity_check((ocb.opt_relative) && (ocb.mode_barename),
999 	       "Relative paths and bare filenames are mutally exclusive.");
1000 
1001   sanity_check((ocb.piecewise_size>0) && (ocb.opt_display_size),
1002 	       "Piecewise mode and file size display is just plain silly.");
1003 
1004 
1005   /* If we try to display non-matching files but haven't initialized the
1006      list of matching files in the first place, bad things will happen. */
1007   sanity_check((ocb.mode_not_matched) &&
1008 	       ! ((ocb.opt_mode_match) || (ocb.opt_mode_match_neg)),
1009 	       "Matching or negative matching must be enabled to display non-matching files.");
1010 
1011   sanity_check(ocb.opt_show_matched &&
1012 	       ! ((ocb.opt_mode_match) || (ocb.opt_mode_match_neg)),
1013 	       "Matching or negative matching must be enabled to display which file matched.");
1014 }
1015 
1016 
md5deep_check_matching_modes()1017 void state::md5deep_check_matching_modes()
1018 {
1019     sanity_check((ocb.opt_mode_match) && (ocb.opt_mode_match_neg),
1020 		 "Regular and negative matching are mutually exclusive.");
1021 }
1022 
1023 
md5deep_process_command_line(int argc_,char ** argv_)1024 int state::md5deep_process_command_line(int argc_, char **argv_)
1025 {
1026     bool did_usage = false;
1027     int i;
1028 
1029     while ((i = getopt(argc_,
1030 		       argv_,
1031 		       "A:a:bcCdeF:f:I:i:M:X:x:m:o:tnwzsSp:rhvV0lkqZW:D:uj:")) != -1) {
1032 	switch (i) {
1033 	case 'C': opt_enable_mac_cc = true; break;
1034 	case 'D': opt_debug = atoi(optarg);	break;
1035 	case 'd': ocb.xml_open(stdout);		break;
1036 	case 'f': opt_input_list = optarg;	break;
1037 	case 'I':
1038 	  // RBF - Document -I mode for hashdeep man page
1039 	    ocb.mode_size_all=true;
1040 	    // falls through
1041 	case 'i':
1042 	    ocb.mode_size=true;
1043 	    ocb.size_threshold = find_block_size(optarg);
1044 	    if (ocb.size_threshold==0) {
1045 		ocb.error("Requested size threshold implies not hashing anything.");
1046 		exit(status_t::STATUS_USER_ERROR);
1047 	    }
1048 	    break;
1049 
1050 	case 'p':
1051 	    ocb.piecewise_size = find_block_size(optarg);
1052 	    if (ocb.piecewise_size==0) {
1053 		ocb.error("Illegal size value for piecewise mode.");
1054 		exit(status_t::STATUS_USER_ERROR);
1055 	    }
1056 
1057 	    break;
1058 
1059 	case 'Z': ocb.mode_triage	= true;		break;
1060 	case 't': ocb.mode_timestamp	= true;		break;
1061 	case 'n': ocb.mode_not_matched	= true;		break;
1062 	case 'w': ocb.opt_show_matched	= true;		break; 	// display which known hash generated match
1063 	case 'j': ocb.opt_threadcount	= atoi(optarg);	break;
1064 	case 'F': ocb.opt_iomode	= iomode::toiomode(optarg);break;
1065 
1066 	case 'a':
1067 	    ocb.opt_mode_match=true;
1068 	    md5deep_check_matching_modes();
1069 	    md5deep_add_hash(optarg,optarg);
1070 	    break;
1071 
1072 	case 'A':
1073 	    ocb.opt_mode_match_neg=true;
1074 	    md5deep_check_matching_modes();
1075 	    md5deep_add_hash(optarg,optarg);
1076 	    break;
1077 
1078 	case 'o':
1079 	    mode_expert=true;
1080 	    setup_expert_mode(optarg);
1081 	    break;
1082 
1083 	case 'M':			// match mode
1084 	    ocb.opt_display_hash=true;
1085 	    /* Intentional fall through */
1086 	case 'm':
1087 	    ocb.opt_mode_match=true;
1088 	    md5deep_check_matching_modes();
1089 	    md5deep_load_match_file(optarg);
1090 	    break;
1091 
1092 	case 'X':
1093 	    ocb.opt_display_hash=true;
1094 	case 'x':
1095 	    ocb.opt_mode_match_neg=true;
1096 	    md5deep_check_matching_modes();
1097 	    md5deep_load_match_file(optarg);
1098 	    break;
1099 
1100 	case 'c': ocb.opt_csv = true;		break;
1101 	case 'z': ocb.opt_display_size = true;	break;
1102 	case '0': ocb.opt_zero = true;		break;
1103 
1104 	case 'S':
1105 	    mode_warn_only = true;
1106 	    ocb.opt_silent = true;
1107 	    break;
1108 
1109 	case 's': ocb.opt_silent = true;	break;
1110 	case 'e': ocb.opt_estimate = true;	break;
1111 	case 'r': mode_recursive = true;	break;
1112 	case 'k': ocb.opt_asterisk = true;      break;
1113 	case 'b': ocb.mode_barename=true;	break;
1114 
1115 	case 'l': ocb.opt_relative = true;      break;
1116 	case 'q': ocb.mode_quiet = true;	break;
1117 	case 'W': ocb.set_outfilename(optarg);	break;
1118 	case 'u': ocb.opt_unicode_escape = true;break;
1119 
1120 	case 'h':
1121 	usage_count++;
1122 	    md5deep_usage();
1123 	    did_usage = true;
1124 	    break;
1125 
1126 	case 'v':
1127 	    ocb.status("%s",VERSION);
1128 	    exit (EXIT_SUCCESS);
1129 
1130 	case 'V':
1131 	    // COPYRIGHT is a format string, complete with newlines
1132 	    ocb.status(COPYRIGHT);
1133 	    exit (EXIT_SUCCESS);
1134 
1135 	default:
1136 	    try_msg();
1137 	    exit (status_t::STATUS_USER_ERROR);
1138 	}
1139     }
1140     if(did_usage) exit (EXIT_SUCCESS);
1141 
1142     md5deep_check_flags_okay();
1143     return EXIT_SUCCESS;
1144 }
1145 
1146 
1147 /****************************************************************/
1148 /* Make the UTF8 banner in case we need it
1149  * Only hashdeep has a header.
1150  */
make_banner()1151 std::string state::make_banner()
1152 {
1153     std::string utf8_banner;
1154     utf8_banner = HASHDEEP_HEADER_10 + std::string(NEWLINE);
1155     utf8_banner += HASHDEEP_PREFIX;
1156     utf8_banner += "size,";
1157     for (int i = 0 ; i < NUM_ALGORITHMS ; ++i) {
1158 	if (hashes[i].inuse){
1159 	    utf8_banner += hashes[i].name + std::string(",");
1160 	}
1161     }
1162     utf8_banner += std::string("filename") + NEWLINE;
1163     utf8_banner += "## Invoked from: " + global::make_utf8(global::getcwd()) + NEWLINE;
1164     utf8_banner += "## ";
1165 #ifdef _WIN32
1166     std::wstring cwd = global::getcwd();
1167     std::string  cwd8 = global::make_utf8(cwd);
1168 
1169     utf8_banner += cwd8 + ">";
1170 #else
1171     utf8_banner += (geteuid()==0) ? "#" : "$";
1172 #endif
1173 
1174     // Accounts for '## ', command prompt, and space before first argument
1175     size_t bytes_written = 8;
1176 
1177     for (int largc = 0 ; largc < this->argc ; ++largc) {
1178 	utf8_banner += " ";
1179 	bytes_written++;
1180 
1181 	// We are going to print the string. It's either ASCII or UTF16
1182 	// convert it to a tstring and then to UTF8 string.
1183 	tstring arg_t = tstring(this->argv[largc]);
1184 	std::string arg_utf8 = global::make_utf8(arg_t);
1185 	size_t current_bytes = arg_utf8.size();
1186 
1187 	// The extra 32 bytes is a fudge factor
1188 	if (current_bytes + bytes_written + 32 > MAX_STRING_LENGTH) {
1189 	    utf8_banner += std::string(NEWLINE) + "## ";
1190 	    bytes_written = 3;
1191 	}
1192 	utf8_banner += arg_utf8;
1193 	bytes_written += current_bytes;
1194     }
1195     utf8_banner += std::string(NEWLINE) + "## " + NEWLINE;
1196     return utf8_banner;
1197     /****************************************************************/
1198 
1199 }
1200 
find_block_size(std::string input_str)1201 uint64_t state::find_block_size(std::string input_str)
1202 {
1203     if(input_str.size()==0) return 0;	// no input???
1204     uint64_t multiplier = 1;
1205     char last_char = input_str[input_str.size()-1];
1206 
1207     // All cases fall through in this switch statement
1208     switch (tolower(last_char)) {
1209     case 'e':
1210 	multiplier *= 1024;
1211     case 'p':
1212 	multiplier *= 1024;
1213     case 't':
1214 	multiplier *= 1024;
1215     case 'g':
1216 	multiplier *= 1024;
1217     case 'm':
1218 	multiplier *= 1024;
1219     case 'k':
1220 	multiplier *= 1024;
1221     case 'b':
1222 	input_str.erase(input_str.size()-1,1); // erase the last character
1223 	break;
1224     default:
1225 	ocb.error("Improper piecewise multiplier ignored.");
1226 	break;
1227     case '0':case '1':case '2':case '3':case '4':
1228     case '5':case '6':case '7':case '8':case '9':
1229 	break;
1230     }
1231 
1232 #ifdef __HPUX
1233     return (strtoumax ( input_str.c_str(), (char**)0, 10) * multiplier);
1234 #else
1235     return (atoll(input_str.c_str()) * multiplier);
1236 #endif
1237 }
1238 
1239 
1240 
main(int argc,char ** argv)1241 int main(int argc, char **argv)
1242 {
1243   // Because the main() function can handle wchar_t arguments on Win32,
1244   // we need a way to reference those values. Thus we make a duplciate
1245   // of the argc and argv values.
1246 
1247   // Initialize the plugable algorithm system and create the state object!
1248 
1249   // Be sure that we were compiled correctly
1250   assert(sizeof(off_t)==8);
1251 
1252   algorithm_t::load_hashing_algorithms();
1253 
1254   state *s = new state();
1255   exit(s->main(argc,argv));
1256 }
1257 
1258 
main(int _argc,char ** _argv)1259 int state::main(int _argc,char **_argv)
1260 {
1261     /**
1262      * Originally this program was two sets of progarms:
1263      * 'hashdeep' with the new interface, and 'md5deep', 'sha1deep', etc
1264      * with the old interface. Now we are a single program and we figure out
1265      * which interface to use based on how we are started.
1266      */
1267 
1268     /* Get the program name */
1269     progname = _argv[0];		// default
1270 #ifdef HAVE_GETPROGNAME
1271     progname = getprogname();		// possibly better
1272 #endif
1273 #ifdef HAVE_PROGRAM_INVOCATION_NAME
1274     progname = program_invocation_name;	// possibly better
1275 #endif
1276 
1277 #ifdef HAVE_PTHREAD
1278     threadpool::win32_init();			//
1279     ocb.opt_threadcount = threadpool::numCPU(); // be sure it's set
1280 #endif
1281 
1282     /* There are two versions of basename, so use our own */
1283     size_t delim = progname.rfind(DIR_SEPARATOR);
1284     if(delim!=std::string::npos) progname.erase(0,delim+1);
1285 
1286     // Convert progname to lower case
1287     std::transform(progname.begin(), progname.end(), progname.begin(), ::tolower);
1288     std::string algname = progname.substr(0,progname.find("deep"));
1289 
1290     if (algname=="hash")
1291     {
1292       // We were called as "hashdeep"
1293       hashdeep_process_command_line(_argc,_argv);
1294     }
1295     else
1296     {
1297       // We were called as "[somethingelse]deep". Figure out which
1298       // algorithm and if we support that something else
1299 
1300       algorithm_t::clear_algorithms_inuse();
1301       char buf[256];
1302       strcpy(buf,algname.c_str());
1303       algorithm_t::enable_hashing_algorithms(buf);
1304       for (int i=0;i<NUM_ALGORITHMS;++i)
1305       {
1306 	if (hashes[i].inuse)
1307 	{
1308 	  md5deep_mode = true;
1309 	  opt_md5deep_mode_algorithm = hashes[i].id;
1310 	  break;
1311 	}
1312       }
1313 
1314       if (not md5deep_mode)
1315       {
1316 	cerr << progname << ": unknown hash: " <<algname << "\n";
1317 	exit(1);
1318       }
1319       md5deep_process_command_line(_argc,_argv);
1320     }
1321 
1322     if (opt_debug==1)
1323     {
1324       printf("self-test...\n");
1325       state::dig_self_test();
1326     }
1327 
1328 
1329     // See if we can open a regular file output, if requested
1330     // Set up the DFXML output if requested
1331     ocb.dfxml_startup(_argc,_argv);
1332 
1333 #ifdef _WIN32
1334     if (prepare_windows_command_line()){
1335 	ocb.fatal_error("Unable to process command line arguments");
1336     }
1337     check_wow64();
1338 #else
1339     argc = _argc;
1340     argv = _argv;
1341 #endif
1342 
1343     /* Verify that we can get the current working directory. */
1344     if(global::getcwd().size()==0){
1345 	ocb.fatal_error("%s", strerror(errno));
1346     }
1347 
1348     /* Make the banner if we are not in md5deep mode */
1349     if (!md5deep_mode){
1350 	ocb.set_utf8_banner( make_banner());
1351     }
1352 
1353 #ifdef HAVE_PTHREAD
1354     /* set up the threadpool */
1355     if(ocb.opt_threadcount>0){
1356 	ocb.tp = new threadpool(ocb.opt_threadcount);
1357     }
1358 #endif
1359 
1360     if(opt_debug>2){
1361 	std::cout << "dump hashlist before matching:\n";
1362 	ocb.dump_hashlist();
1363     }
1364 
1365     /* If we were given an input list, process it */
1366     if(opt_input_list!=""){
1367 	std::ifstream in;
1368 	in.open(opt_input_list.c_str());
1369 	if(!in.is_open()){
1370 	    std::cerr << "Cannot open " << opt_input_list << ": " << strerror(errno) << "\n";
1371 	    exit(1);
1372 	}
1373 	while(!in.eof()){
1374 	    std::string line;
1375 	    std::getline(in,line);
1376 	    /* Remove any possible \r\n or \n */
1377 	    if(line.size()>0 && line[line.size()-1]=='\n') line.erase(line.size()-1);
1378 	    if(line.size()>0 && line[line.size()-1]=='\r') line.erase(line.size()-1);
1379 	    if(line.size()==0) continue;
1380 	    /* If we are running on Windows, turn it into a UTF-16 filename */
1381 #ifdef _WIN32
1382 	    /* I think that this will work, but it needs to be tested */
1383 	    std::wstring wstr;
1384 	    utf8::utf8to16(line.begin(),line.end(),back_inserter(wstr));
1385 	    dig_win32(wstr);
1386 #else
1387 	    dig_normal(line.c_str());
1388 #endif
1389 	}
1390 	in.close();
1391     }
1392 
1393     /*
1394      * Anything left on the command line at this point is a file
1395      * or directory we're supposed to process. If there's nothing
1396      * specified, we should hash standard input
1397      */
1398 
1399     if (optind == argc && opt_input_list==""){
1400 	if(ocb.mode_triage){
1401 	    ocb.fatal_error("Processing stdin not supported in Triage mode");
1402 	}
1403 	ocb.hash_stdin();
1404     } else {
1405 	for(int i=optind;i<argc;i++){
1406 	    tstring fn = generate_filename(this->argv[i]);
1407 #ifdef _WIN32
1408 	    dig_win32(fn);
1409 #else
1410 	    dig_normal(fn);
1411 #endif
1412 	}
1413     }
1414 
1415     /* If we are multi-threading, wait for all threads to finish */
1416 #ifdef HAVE_PTHREAD
1417     if(ocb.tp) ocb.tp->wait_till_all_free();
1418 #endif
1419 
1420     if (opt_debug>2)
1421     {
1422       std::cout << "\ndump hashlist after matching:\n";
1423       ocb.dump_hashlist();
1424     }
1425 
1426     // If we were auditing, display the audit results
1427     if (ocb.primary_function == primary_audit)
1428     {
1429       ocb.display_audit_results();
1430     }
1431 
1432     /* We only have to worry about checking for unused hashes if one
1433      * of the matching modes was enabled. We let the display_not_matched
1434      * function determine if it needs to display anything. The function
1435      * also sets our return values in terms of inputs not being matched
1436      * or known hashes not being used
1437      */
1438     if (ocb.opt_mode_match or
1439 	ocb.opt_mode_match_neg or
1440 	(primary_match == ocb.primary_function) or
1441 	(primary_match_neg == ocb.primary_function))
1442     {
1443       ocb.finalize_matching();
1444     }
1445 
1446     /* If we were generating DFXML, finish the job */
1447     if(opt_debug>1) std::cerr << "*** main calling dfxml_shutdown\n";
1448     ocb.dfxml_shutdown();
1449 
1450     /* On windows, do a hard exit
1451      *
1452      * "If one of the terminated threads in the process holds a lock
1453      * and the DLL detach code in one of the loaded DLLs attempts to
1454      * acquire the same lock, then calling ExitProcess results in a
1455      * deadlock. In contrast, if a process terminates by calling
1456      * TerminateProcess, the DLLs that the process is attached to are
1457      * not notified of the process termination. Therefore, if you do
1458      * not know the state of all threads in your process, it is better
1459      * to call TerminateProcess than ExitProcess. Note that returning
1460      * from the main function of an application results in a call to
1461      * ExitProcess."
1462      *
1463      * http://msdn.microsoft.com/en-us/library/ms682658(v=vs.85).aspx
1464      */
1465 #if defined(_WIN32)
1466     TerminateProcess(GetCurrentProcess(),ocb.get_return_code());
1467 #endif
1468     return ocb.get_return_code();
1469 }
1470 
1471