1 // Fuzzy Hashing by Jesse Kornblum
2 // Copyright (C) 2013 Facebook
3 // Copyright (C) 2012 Kyrus
4 // Copyright (C) 2010 ManTech International Corporation
5 //
6 // $Id$
7 //
8 // This program is licensed under version 2 of the GNU Public License.
9 // See the file COPYING for details.
10 
11 #include "ssdeep.h"
12 #include "match.h"
13 
14 #ifdef _WIN32
15 // This can't go in main.h or we get multiple definitions of it
16 // Allows us to open standard input in binary mode by default
17 // See http://gnuwin32.sourceforge.net/compile.html for more
18 int _CRT_fmode = _O_BINARY;
19 #endif
20 
21 
initialize_state(state * s)22 static bool initialize_state(state *s)
23 {
24   if (NULL == s)
25     return true;
26 
27   s->mode                  = mode_none;
28   s->first_file_processed  = true;
29   s->found_meaningful_file = false;
30   s->processed_file        = false;
31 
32   s->threshold = 0;
33 
34   return false;
35 }
36 
37 
38 // In order to fit on one Win32 screen this function should produce
39 // no more than 22 lines of output.
usage(void)40 static void usage(void)
41 {
42   print_status ("%s version %s by Jesse Kornblum and the ssdeep Project", __progname, VERSION);
43   print_status ("For copyright information, see man page or README.TXT.");
44   print_status ("");
45   print_status ("Usage: %s [-m file] [-k file] [-dpgvrsblcxa] [-t val] [-h|-V] [FILES]",
46 	  __progname);
47 
48   print_status ("-m - Match FILES against known hashes in file");
49   print_status ("-k - Match signatures in FILES against signatures in file");
50   print_status ("-d - Directory mode, compare all files in a directory");
51   print_status ("-p - Pretty matching mode. Similar to -d but includes all matches");
52   print_status ("-g - Cluster matches together");
53   print_status ("-v - Verbose mode. Displays filename as its being processed");
54   print_status ("-r - Recursive mode");
55 
56   print_status ("-s - Silent mode; all errors are suppressed");
57   print_status ("-b - Uses only the bare name of files; all path information omitted");
58   print_status ("-l - Uses relative paths for filenames");
59   print_status ("-c - Prints output in CSV format");
60   print_status ("-x - Compare FILES as signature files");
61   print_status ("-a - Display all matches, regardless of score");
62 
63   print_status ("-t - Only displays matches above the given threshold");
64 
65   print_status ("-h - Display this help message");
66   print_status ("-V - Display version number and exit");
67 }
68 
69 
process_cmd_line(state * s,int argc,char ** argv)70 static void process_cmd_line(state *s, int argc, char **argv)
71 {
72   int i, match_files_loaded = FALSE;
73 
74   while ((i=getopt(argc,argv,"gavhVpdsblcxt:rm:k:")) != -1) {
75     switch(i) {
76 
77     case 'g':
78       s->mode |= mode_cluster;
79       break;
80 
81     case 'a':
82       s->mode |= mode_display_all;
83       break;
84 
85     case 'v':
86       if (MODE(mode_verbose))
87       {
88 	print_error(s,"%s: Already at maximum verbosity", __progname);
89 	print_error(s,
90 		    "%s: Error message displayed to user correctly",
91 		    __progname);
92       }
93       else
94 	s->mode |= mode_verbose;
95       break;
96 
97     case 'p':
98       s->mode |= mode_match_pretty;
99       break;
100 
101     case 'd':
102       s->mode |= mode_directory;
103       break;
104 
105     case 's':
106       s->mode |= mode_silent; break;
107 
108     case 'b':
109       s->mode |= mode_barename; break;
110 
111     case 'l':
112       s->mode |= mode_relative; break;
113 
114     case 'c':
115       s->mode |= mode_csv; break;
116 
117     case 'x':
118       s->mode |= mode_sigcompare; break;
119 
120     case 'r':
121       s->mode |= mode_recursive; break;
122 
123     case 't':
124       s->threshold = (uint8_t)atol(optarg);
125       if (s->threshold > 100)
126 	fatal_error("%s: Illegal threshold", __progname);
127       s->mode |= mode_threshold;
128       break;
129 
130     case 'm':
131       if (MODE(mode_compare_unknown) || MODE(mode_sigcompare))
132 	fatal_error("Positive matching cannot be combined with other matching modes");
133       s->mode |= mode_match;
134       if (not match_load(s,optarg))
135 	match_files_loaded = TRUE;
136       break;
137 
138     case 'k':
139       if (MODE(mode_match) || MODE(mode_sigcompare))
140 	fatal_error("Signature matching cannot be combined with other matching modes");
141       s->mode |= mode_compare_unknown;
142       if (not match_load(s,optarg))
143 	match_files_loaded = TRUE;
144       break;
145 
146     case 'h':
147       usage();
148       exit (EXIT_SUCCESS);
149 
150     case 'V':
151       print_status ("%s", VERSION);
152       exit (EXIT_SUCCESS);
153 
154     default:
155       try_msg();
156       exit (EXIT_FAILURE);
157     }
158   }
159 
160   // We don't include mode_sigcompare in this list as we haven't loaded
161   // the matching files yet. In that mode the matching files are in fact
162   // the command line arguments.
163   sanity_check(s,
164 	       ((MODE(mode_match) || MODE(mode_compare_unknown))
165 		&& not match_files_loaded),
166 	       "No matching files loaded");
167 
168   sanity_check(s,
169 	       ((s->mode & mode_barename) && (s->mode & mode_relative)),
170 	       "Relative paths and bare names are mutually exclusive");
171 
172   sanity_check(s,
173 	       ((s->mode & mode_match_pretty) && (s->mode & mode_directory)),
174 	       "Directory mode and pretty matching are mutually exclusive");
175 
176   sanity_check(s,
177 	       MODE(mode_csv) and MODE(mode_cluster),
178 	       "CSV and clustering modes cannot be combined");
179 
180   // -m, -p, and -d are incompatible with -k and -x
181   // The former treat FILES as raw files. The latter require them to be sigs
182   sanity_check(s,
183 	       ((MODE(mode_match) or MODE(mode_match_pretty) or MODE(mode_directory))
184 		and
185 		(MODE(mode_compare_unknown) or MODE(mode_sigcompare))),
186 	       "Incompatible matching modes");
187 
188 
189 }
190 
191 
192 
193 
194 
195 #ifdef _WIN32
prepare_windows_command_line(state * s)196 static int prepare_windows_command_line(state *s)
197 {
198   int argc;
199   TCHAR **argv;
200 
201   argv = CommandLineToArgvW(GetCommandLineW(),&argc);
202 
203   s->argc = argc;
204   s->argv = argv;
205 
206   return FALSE;
207 }
208 #endif
209 
210 
is_absolute_path(TCHAR * fn)211 static int is_absolute_path(TCHAR *fn)
212 {
213   if (NULL == fn)
214     internal_error("Unknown error in is_absolute_path");
215 
216 #ifdef _WIN32
217   return (isalpha(fn[0]) and _TEXT(':') == fn[1]);
218 # else
219   return (DIR_SEPARATOR == fn[0]);
220 #endif
221 }
222 
223 
generate_filename(state * s,TCHAR * fn,TCHAR * cwd,TCHAR * input)224 static void generate_filename(state *s, TCHAR *fn, TCHAR *cwd, TCHAR *input)
225 {
226   if (NULL == fn || NULL == input)
227     internal_error("Error calling generate_filename");
228 
229   if ((s->mode & mode_relative) || is_absolute_path(input))
230     _tcsncpy(fn, input, SSDEEP_PATH_MAX);
231   else {
232     // Windows systems don't have symbolic links, so we don't
233     // have to worry about carefully preserving the paths
234     // they follow. Just use the system command to resolve the paths
235 #ifdef _WIN32
236     _wfullpath(fn, input, SSDEEP_PATH_MAX);
237 #else
238     if (NULL == cwd)
239       // If we can't get the current working directory, we're not
240       // going to be able to build the relative path to this file anyway.
241       // So we just call realpath and make the best of things
242       realpath(input, fn);
243     else
244       snprintf(fn, SSDEEP_PATH_MAX, "%s%c%s", cwd, DIR_SEPARATOR, input);
245 #endif
246   }
247 }
248 
249 
main(int argc,char ** argv)250 int main(int argc, char **argv)
251 {
252   int count, status, goal = argc;
253   state *s;
254   TCHAR *fn, *cwd;
255 
256 #ifndef __GLIBC__
257   //  __progname  = basename(argv[0]);
258 #endif
259 
260   s = new state;
261   if (initialize_state(s))
262     fatal_error("%s: Unable to initialize state variable", __progname);
263 
264   process_cmd_line(s,argc,argv);
265 
266 #ifdef _WIN32
267   if (prepare_windows_command_line(s))
268     fatal_error("%s: Unable to process command line arguments", __progname);
269 #else
270   s->argc = argc;
271   s->argv = argv;
272 #endif
273 
274   // Anything left on the command line at this point is a file
275   // or directory we're supposed to process. If there's nothing
276   // specified, we should tackle standard input
277   if (optind == argc) {
278     status = process_stdin(s);
279   }
280   else {
281     MD5DEEP_ALLOC(TCHAR, fn, SSDEEP_PATH_MAX);
282     MD5DEEP_ALLOC(TCHAR, cwd, SSDEEP_PATH_MAX);
283 
284     cwd = _tgetcwd(cwd, SSDEEP_PATH_MAX);
285     if (NULL == cwd)
286       fatal_error("%s: %s", __progname, strerror(errno));
287 
288     count = optind;
289 
290     // The signature comparsion mode needs to use the command line
291     // arguments and argument count. We don't do wildcard expansion
292     // on it on Win32 (i.e. where it matters). The setting of 'goal'
293     // to the original argc occured at the start of main(), so we just
294     // need to update it if we're *not* in signature compare mode.
295     if (not (s->mode & mode_sigcompare)) {
296       goal = s->argc;
297     }
298 
299     while (count < goal)
300     {
301       if (MODE(mode_sigcompare))
302 	match_load(s,argv[count]);
303       else if (MODE(mode_compare_unknown))
304 	match_compare_unknown(s,argv[count]);
305       else {
306 	generate_filename(s, fn, cwd, s->argv[count]);
307 
308 #ifdef _WIN32
309 	status = process_win32(s, fn);
310 #else
311 	status = process_normal(s, fn);
312 #endif
313       }
314 
315       ++count;
316     }
317 
318     // If we processed files, but didn't find anything large enough
319     // to be meaningful, we should display a warning message to the user.
320     // This happens mostly when people are testing very small files
321     // e.g. $ echo "hello world" > foo && ssdeep foo
322     if ((not s->found_meaningful_file) and s->processed_file)
323     {
324       print_error(s,"%s: Did not process files large enough to produce meaningful results", __progname);
325     }
326   }
327 
328 
329   // If the user has requested us to compare signature files, use
330   // our existng code to pretty-print directory matching to do the
331   // work for us.
332   if (MODE(mode_sigcompare))
333     s->mode |= mode_match_pretty;
334   if (MODE(mode_match_pretty) or MODE(mode_sigcompare) or MODE(mode_cluster))
335     find_matches_in_known(s);
336   if (MODE(mode_cluster))
337     display_clusters(s);
338 
339   return (EXIT_SUCCESS);
340 }
341