1 // Fuzzy Hashing by Jesse Kornblum
2 // Copyright (C) 2013 Facebook
3 // Copyright (C) 2012 Kyrus
4 // Copyright (C) 2010 ManTech International Corporation
5 //
6 // $Id$
7 //
8 // This program is licensed under version 2 of the GNU Public License.
9 // See the file COPYING for details.
10
11 #include "ssdeep.h"
12 #include "match.h"
13
14 #ifdef _WIN32
15 // This can't go in main.h or we get multiple definitions of it
16 // Allows us to open standard input in binary mode by default
17 // See http://gnuwin32.sourceforge.net/compile.html for more
18 int _CRT_fmode = _O_BINARY;
19 #endif
20
21
initialize_state(state * s)22 static bool initialize_state(state *s)
23 {
24 if (NULL == s)
25 return true;
26
27 s->mode = mode_none;
28 s->first_file_processed = true;
29 s->found_meaningful_file = false;
30 s->processed_file = false;
31
32 s->threshold = 0;
33
34 return false;
35 }
36
37
38 // In order to fit on one Win32 screen this function should produce
39 // no more than 22 lines of output.
usage(void)40 static void usage(void)
41 {
42 print_status ("%s version %s by Jesse Kornblum and the ssdeep Project", __progname, VERSION);
43 print_status ("For copyright information, see man page or README.TXT.");
44 print_status ("");
45 print_status ("Usage: %s [-m file] [-k file] [-dpgvrsblcxa] [-t val] [-h|-V] [FILES]",
46 __progname);
47
48 print_status ("-m - Match FILES against known hashes in file");
49 print_status ("-k - Match signatures in FILES against signatures in file");
50 print_status ("-d - Directory mode, compare all files in a directory");
51 print_status ("-p - Pretty matching mode. Similar to -d but includes all matches");
52 print_status ("-g - Cluster matches together");
53 print_status ("-v - Verbose mode. Displays filename as its being processed");
54 print_status ("-r - Recursive mode");
55
56 print_status ("-s - Silent mode; all errors are suppressed");
57 print_status ("-b - Uses only the bare name of files; all path information omitted");
58 print_status ("-l - Uses relative paths for filenames");
59 print_status ("-c - Prints output in CSV format");
60 print_status ("-x - Compare FILES as signature files");
61 print_status ("-a - Display all matches, regardless of score");
62
63 print_status ("-t - Only displays matches above the given threshold");
64
65 print_status ("-h - Display this help message");
66 print_status ("-V - Display version number and exit");
67 }
68
69
process_cmd_line(state * s,int argc,char ** argv)70 static void process_cmd_line(state *s, int argc, char **argv)
71 {
72 int i, match_files_loaded = FALSE;
73
74 while ((i=getopt(argc,argv,"gavhVpdsblcxt:rm:k:")) != -1) {
75 switch(i) {
76
77 case 'g':
78 s->mode |= mode_cluster;
79 break;
80
81 case 'a':
82 s->mode |= mode_display_all;
83 break;
84
85 case 'v':
86 if (MODE(mode_verbose))
87 {
88 print_error(s,"%s: Already at maximum verbosity", __progname);
89 print_error(s,
90 "%s: Error message displayed to user correctly",
91 __progname);
92 }
93 else
94 s->mode |= mode_verbose;
95 break;
96
97 case 'p':
98 s->mode |= mode_match_pretty;
99 break;
100
101 case 'd':
102 s->mode |= mode_directory;
103 break;
104
105 case 's':
106 s->mode |= mode_silent; break;
107
108 case 'b':
109 s->mode |= mode_barename; break;
110
111 case 'l':
112 s->mode |= mode_relative; break;
113
114 case 'c':
115 s->mode |= mode_csv; break;
116
117 case 'x':
118 s->mode |= mode_sigcompare; break;
119
120 case 'r':
121 s->mode |= mode_recursive; break;
122
123 case 't':
124 s->threshold = (uint8_t)atol(optarg);
125 if (s->threshold > 100)
126 fatal_error("%s: Illegal threshold", __progname);
127 s->mode |= mode_threshold;
128 break;
129
130 case 'm':
131 if (MODE(mode_compare_unknown) || MODE(mode_sigcompare))
132 fatal_error("Positive matching cannot be combined with other matching modes");
133 s->mode |= mode_match;
134 if (not match_load(s,optarg))
135 match_files_loaded = TRUE;
136 break;
137
138 case 'k':
139 if (MODE(mode_match) || MODE(mode_sigcompare))
140 fatal_error("Signature matching cannot be combined with other matching modes");
141 s->mode |= mode_compare_unknown;
142 if (not match_load(s,optarg))
143 match_files_loaded = TRUE;
144 break;
145
146 case 'h':
147 usage();
148 exit (EXIT_SUCCESS);
149
150 case 'V':
151 print_status ("%s", VERSION);
152 exit (EXIT_SUCCESS);
153
154 default:
155 try_msg();
156 exit (EXIT_FAILURE);
157 }
158 }
159
160 // We don't include mode_sigcompare in this list as we haven't loaded
161 // the matching files yet. In that mode the matching files are in fact
162 // the command line arguments.
163 sanity_check(s,
164 ((MODE(mode_match) || MODE(mode_compare_unknown))
165 && not match_files_loaded),
166 "No matching files loaded");
167
168 sanity_check(s,
169 ((s->mode & mode_barename) && (s->mode & mode_relative)),
170 "Relative paths and bare names are mutually exclusive");
171
172 sanity_check(s,
173 ((s->mode & mode_match_pretty) && (s->mode & mode_directory)),
174 "Directory mode and pretty matching are mutually exclusive");
175
176 sanity_check(s,
177 MODE(mode_csv) and MODE(mode_cluster),
178 "CSV and clustering modes cannot be combined");
179
180 // -m, -p, and -d are incompatible with -k and -x
181 // The former treat FILES as raw files. The latter require them to be sigs
182 sanity_check(s,
183 ((MODE(mode_match) or MODE(mode_match_pretty) or MODE(mode_directory))
184 and
185 (MODE(mode_compare_unknown) or MODE(mode_sigcompare))),
186 "Incompatible matching modes");
187
188
189 }
190
191
192
193
194
195 #ifdef _WIN32
prepare_windows_command_line(state * s)196 static int prepare_windows_command_line(state *s)
197 {
198 int argc;
199 TCHAR **argv;
200
201 argv = CommandLineToArgvW(GetCommandLineW(),&argc);
202
203 s->argc = argc;
204 s->argv = argv;
205
206 return FALSE;
207 }
208 #endif
209
210
is_absolute_path(TCHAR * fn)211 static int is_absolute_path(TCHAR *fn)
212 {
213 if (NULL == fn)
214 internal_error("Unknown error in is_absolute_path");
215
216 #ifdef _WIN32
217 return (isalpha(fn[0]) and _TEXT(':') == fn[1]);
218 # else
219 return (DIR_SEPARATOR == fn[0]);
220 #endif
221 }
222
223
generate_filename(state * s,TCHAR * fn,TCHAR * cwd,TCHAR * input)224 static void generate_filename(state *s, TCHAR *fn, TCHAR *cwd, TCHAR *input)
225 {
226 if (NULL == fn || NULL == input)
227 internal_error("Error calling generate_filename");
228
229 if ((s->mode & mode_relative) || is_absolute_path(input))
230 _tcsncpy(fn, input, SSDEEP_PATH_MAX);
231 else {
232 // Windows systems don't have symbolic links, so we don't
233 // have to worry about carefully preserving the paths
234 // they follow. Just use the system command to resolve the paths
235 #ifdef _WIN32
236 _wfullpath(fn, input, SSDEEP_PATH_MAX);
237 #else
238 if (NULL == cwd)
239 // If we can't get the current working directory, we're not
240 // going to be able to build the relative path to this file anyway.
241 // So we just call realpath and make the best of things
242 realpath(input, fn);
243 else
244 snprintf(fn, SSDEEP_PATH_MAX, "%s%c%s", cwd, DIR_SEPARATOR, input);
245 #endif
246 }
247 }
248
249
main(int argc,char ** argv)250 int main(int argc, char **argv)
251 {
252 int count, status, goal = argc;
253 state *s;
254 TCHAR *fn, *cwd;
255
256 #ifndef __GLIBC__
257 // __progname = basename(argv[0]);
258 #endif
259
260 s = new state;
261 if (initialize_state(s))
262 fatal_error("%s: Unable to initialize state variable", __progname);
263
264 process_cmd_line(s,argc,argv);
265
266 #ifdef _WIN32
267 if (prepare_windows_command_line(s))
268 fatal_error("%s: Unable to process command line arguments", __progname);
269 #else
270 s->argc = argc;
271 s->argv = argv;
272 #endif
273
274 // Anything left on the command line at this point is a file
275 // or directory we're supposed to process. If there's nothing
276 // specified, we should tackle standard input
277 if (optind == argc) {
278 status = process_stdin(s);
279 }
280 else {
281 MD5DEEP_ALLOC(TCHAR, fn, SSDEEP_PATH_MAX);
282 MD5DEEP_ALLOC(TCHAR, cwd, SSDEEP_PATH_MAX);
283
284 cwd = _tgetcwd(cwd, SSDEEP_PATH_MAX);
285 if (NULL == cwd)
286 fatal_error("%s: %s", __progname, strerror(errno));
287
288 count = optind;
289
290 // The signature comparsion mode needs to use the command line
291 // arguments and argument count. We don't do wildcard expansion
292 // on it on Win32 (i.e. where it matters). The setting of 'goal'
293 // to the original argc occured at the start of main(), so we just
294 // need to update it if we're *not* in signature compare mode.
295 if (not (s->mode & mode_sigcompare)) {
296 goal = s->argc;
297 }
298
299 while (count < goal)
300 {
301 if (MODE(mode_sigcompare))
302 match_load(s,argv[count]);
303 else if (MODE(mode_compare_unknown))
304 match_compare_unknown(s,argv[count]);
305 else {
306 generate_filename(s, fn, cwd, s->argv[count]);
307
308 #ifdef _WIN32
309 status = process_win32(s, fn);
310 #else
311 status = process_normal(s, fn);
312 #endif
313 }
314
315 ++count;
316 }
317
318 // If we processed files, but didn't find anything large enough
319 // to be meaningful, we should display a warning message to the user.
320 // This happens mostly when people are testing very small files
321 // e.g. $ echo "hello world" > foo && ssdeep foo
322 if ((not s->found_meaningful_file) and s->processed_file)
323 {
324 print_error(s,"%s: Did not process files large enough to produce meaningful results", __progname);
325 }
326 }
327
328
329 // If the user has requested us to compare signature files, use
330 // our existng code to pretty-print directory matching to do the
331 // work for us.
332 if (MODE(mode_sigcompare))
333 s->mode |= mode_match_pretty;
334 if (MODE(mode_match_pretty) or MODE(mode_sigcompare) or MODE(mode_cluster))
335 find_matches_in_known(s);
336 if (MODE(mode_cluster))
337 display_clusters(s);
338
339 return (EXIT_SUCCESS);
340 }
341