1 // MD5DEEP - dig.c
2 //
3 // By Jesse Kornblum
4 //
5 // This is a work of the US Government. In accordance with 17 USC 105,
6 // copyright protection is not available for any work of the US Government.
7 //
8 // This program is distributed in the hope that it will be useful, but
9 // WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 //
12 // $Id$
13
14 #include "ssdeep.h"
15
16 #define STATUS_OK FALSE
17
is_special_dir(TCHAR * d)18 static int is_special_dir(TCHAR *d)
19 {
20 return ((!_tcsncmp(d,_TEXT("."),1) && (_tcslen(d) == 1)) ||
21 (!_tcsncmp(d,_TEXT(".."),2) && (_tcslen(d) == 2)));
22 }
23
24 #ifndef _WIN32
25
26 static TCHAR DOUBLE_DIR[4] =
27 { (TCHAR)DIR_SEPARATOR,
28 (TCHAR)DIR_SEPARATOR,
29 0
30 };
31
remove_double_slash(TCHAR * fn)32 static void remove_double_slash(TCHAR *fn)
33 {
34 size_t tsize = sizeof(TCHAR);
35 TCHAR *tmp = fn, *new_str;
36
37 new_str = _tcsstr(tmp,DOUBLE_DIR);
38 while (NULL != new_str)
39 {
40 /*
41 #ifdef _WIN32
42 // On Windows, we have to allow the first two characters to be slashes
43 // to account for UNC paths. e.g. \\SERVER\dir\path
44 if (tmp == fn)
45 {
46 ++tmp;
47 }
48 else
49 {
50 #endif // ifdef _WIN32
51 */
52 _tmemmove(new_str,new_str+tsize,_tcslen(new_str));
53
54 /*
55 #ifdef _WIN32
56 }
57 #endif // ifdef _WIN32
58 */
59
60 new_str = _tcsstr(tmp,DOUBLE_DIR);
61 }
62 }
63
64
remove_single_dirs(TCHAR * fn)65 static void remove_single_dirs(TCHAR *fn)
66 {
67 unsigned int pos, chars_found = 0;
68 size_t sz = _tcslen(fn), tsize = sizeof(TCHAR);
69
70 for (pos = 0 ; pos < sz ; pos++)
71 {
72 // Catch strings that end with /. (e.g. /foo/.)
73 if (pos > 0 &&
74 fn[pos-1] == _TEXT(DIR_SEPARATOR) &&
75 fn[pos] == _TEXT('.') &&
76 fn[pos+1] == 0)
77 fn[pos] = 0;
78
79 if (fn[pos] == _TEXT('.') && fn[pos+1] == _TEXT(DIR_SEPARATOR))
80 {
81 if (chars_found && fn[pos-1] == _TEXT(DIR_SEPARATOR))
82 {
83 _tmemmove(fn+(pos*tsize),(fn+((pos+2)*tsize)),(sz-pos) * tsize);
84
85 // In case we have ././ we shift back one!
86 --pos;
87
88 }
89 }
90 else
91 ++chars_found;
92 }
93 }
94
95 // Removes all "../" references from the absolute path fn
remove_double_dirs(TCHAR * fn)96 void remove_double_dirs(TCHAR *fn)
97 {
98 size_t pos, next_dir, sz = _tcslen(fn), tsize = sizeof(TCHAR);
99
100 for (pos = 0; pos < _tcslen(fn) ; pos++)
101 {
102 if (fn[pos] == _TEXT('.') && fn[pos+1] == _TEXT('.'))
103 {
104 if (pos > 0)
105 {
106
107 /* We have to keep this next if statement and the one above separate.
108 If not, we can't tell later on if the pos <= 0 or
109 that the previous character was a DIR_SEPARATOR.
110 This matters when we're looking at ..foo/ as an input */
111
112 if (fn[pos-1] == _TEXT(DIR_SEPARATOR))
113 {
114
115 next_dir = pos + 2;
116
117 /* Back up to just before the previous DIR_SEPARATOR
118 unless we're already at the start of the string */
119 if (pos > 1)
120 pos -= 2;
121 else
122 pos = 0;
123
124 while (fn[pos] != _TEXT(DIR_SEPARATOR) && pos > 0)
125 --pos;
126
127 switch(fn[next_dir])
128 {
129 case DIR_SEPARATOR:
130 _tmemmove(fn+pos,fn+next_dir,((sz - next_dir) + 1) * tsize);
131 break;
132
133 case 0:
134 /* If we have /.. ending the filename */
135 fn[pos+1] = 0;
136 break;
137
138 /* If we have ..foo, we should do nothing, but skip
139 over these double dots */
140 default:
141 pos = next_dir;
142 }
143 }
144 }
145
146 /* If we have two dots starting off the string, we should prepend
147 a DIR_SEPARATOR and ignore the two dots. That is:
148 from the root directory the path ../foo is really just /foo */
149
150 else
151 {
152 fn[pos] = _TEXT(DIR_SEPARATOR);
153 _tmemmove(fn+pos+1,fn+pos+3,sz-(pos+3));
154
155
156 }
157 }
158 }
159 }
160
161
162 // We don't need to call these functions when running in Windows
163 // as we've already called real_path() on them in main.c. These
164 // functions are necessary in *nix so that we can clean up the
165 // path names without removing the names of symbolic links. They
166 // are also called when the user has specified an absolute path
167 // but has included extra double dots or such.
168
clean_name(state * s,TCHAR * fn)169 static void clean_name(state *s, TCHAR *fn)
170 {
171 if (not (s->mode & mode_relative)) {
172 remove_double_slash(fn);
173 remove_single_dirs(fn);
174 remove_double_dirs(fn);
175 }
176 }
177
178
process_dir(state * s,TCHAR * fn)179 static int process_dir(state *s, TCHAR *fn)
180 {
181 int return_value = STATUS_OK;
182 TCHAR *new_file;
183 _TDIR *current_dir;
184 struct _tdirent *entry;
185
186 if (have_processed_dir(fn))
187 {
188 print_error_unicode(s,fn,"symlink creates cycle");
189 return STATUS_OK;
190 }
191
192 if (!processing_dir(fn))
193 internal_error("%s: Cycle checking failed to register directory.", fn);
194
195 if ((current_dir = _topendir(fn)) == NULL)
196 {
197 print_error_unicode(s,fn,"%s", strerror(errno));
198 return STATUS_OK;
199 }
200
201 new_file = (TCHAR *)malloc(sizeof(TCHAR) * SSDEEP_PATH_MAX);
202 if (NULL == new_file)
203 internal_error("%s: Out of memory", __progname);
204
205 while ((entry = _treaddir(current_dir)) != NULL)
206 {
207 if (is_special_dir(entry->d_name))
208 continue;
209
210 _sntprintf(new_file,SSDEEP_PATH_MAX,_TEXT("%s%c%s"),
211 fn,DIR_SEPARATOR,entry->d_name);
212
213 return_value = process_normal(s,new_file);
214 }
215
216 free(new_file);
217 _tclosedir(current_dir);
218
219 if (!done_processing_dir(fn))
220 internal_error("%s: Cycle checking failed to unregister directory.", fn);
221
222 return return_value;
223 }
224
225
file_type_helper(_tstat_t sb)226 static int file_type_helper(_tstat_t sb)
227 {
228 if (S_ISREG(sb.st_mode))
229 return file_regular;
230
231 if (S_ISDIR(sb.st_mode))
232 return file_directory;
233
234 if (S_ISBLK(sb.st_mode))
235 return file_block;
236
237 if (S_ISCHR(sb.st_mode))
238 return file_character;
239
240 if (S_ISFIFO(sb.st_mode))
241 return file_pipe;
242
243 /* These file types do not exist in Win32 */
244 #ifndef _WIN32
245
246 if (S_ISSOCK(sb.st_mode))
247 return file_socket;
248
249 if (S_ISLNK(sb.st_mode))
250 return file_symlink;
251 #endif /* ifndef _WIN32 */
252
253
254 /* Used to detect Solaris doors */
255 #ifdef S_IFDOOR
256 #ifdef S_ISDOOR
257 if (S_ISDOOR(sb.st_mode))
258 return file_door;
259 #endif
260 #endif
261
262 return file_unknown;
263 }
264
265
file_type(state * s,TCHAR * fn)266 static int file_type(state *s, TCHAR *fn)
267 {
268 _tstat_t sb;
269
270 if (NULL == s || NULL == fn)
271 return file_unknown;
272
273 if (_lstat(fn,&sb))
274 {
275 print_error_unicode(s,fn,"%s", strerror(errno));
276 return file_unknown;
277 }
278
279 return file_type_helper(sb);
280 }
281
282
should_hash_symlink(state * s,TCHAR * fn,int * link_type)283 static int should_hash_symlink(state *s, TCHAR *fn, int *link_type)
284 {
285 int type;
286 _tstat_t sb;
287
288 if (NULL == s || NULL == fn)
289 fatal_error("%s: Null state passed into should_hash_symlink", __progname);
290
291 // We must look at what this symlink points to before we process it.
292 // The normal file_type function uses lstat to examine the file,
293 // we use stat to examine what this symlink points to.
294 if (_sstat(fn,&sb))
295 {
296 print_error_unicode(s,fn,"%s",strerror(errno));
297 return FALSE;
298 }
299
300 type = file_type_helper(sb);
301
302 if (type == file_directory)
303 {
304 if (s->mode & mode_recursive)
305 process_dir(s,fn);
306 else
307 {
308 print_error_unicode(s,fn,"Is a directory");
309 }
310 return FALSE;
311 }
312
313 if (link_type != NULL)
314 *link_type = type;
315 return TRUE;
316 }
317
318
319 #define RETURN_IF_MODE(A) \
320 if (s->mode & A) \
321 return TRUE; \
322 break;
323
324
should_hash(state * s,TCHAR * fn)325 static int should_hash(state *s, TCHAR *fn)
326 {
327 int type = file_type(s, fn);
328
329 if (NULL == s || NULL == fn)
330 fatal_error("%s: Null state passed into should_hash", __progname);
331
332 if (type == file_directory)
333 {
334 if (s->mode & mode_recursive)
335 process_dir(s,fn);
336 else
337 {
338 print_error_unicode(s,fn,"Is a directory");
339 }
340 return FALSE;
341 }
342
343 if (type == file_symlink)
344 return should_hash_symlink(s,fn,NULL);
345
346 if (type == file_unknown)
347 return FALSE;
348
349 // By default we hash anything we can't identify as a "bad thing"
350 return TRUE;
351 }
352
353
process_normal(state * s,TCHAR * fn)354 int process_normal(state *s, TCHAR *fn)
355 {
356 clean_name(s,fn);
357
358 if (should_hash(s,fn))
359 return (hash_file(s,fn));
360
361 return FALSE;
362 }
363 #endif // ifndef _WIN32
364
365
process_stdin(state * s)366 int process_stdin(state *s)
367 {
368 if (NULL == s)
369 return TRUE;
370
371 char sum[FUZZY_MAX_RESULT];
372 int status = fuzzy_hash_stream(stdin, sum);
373
374 if (status != 0)
375 {
376 print_error_unicode(s,_TEXT("stdin"),"Error processing stdin");
377 return TRUE;
378 }
379
380 display_result(s,_TEXT("stdin"),sum);
381
382 return FALSE;
383 }
384
385
386
387
388 #ifdef _WIN32
is_win32_device_file(TCHAR * fn)389 static int is_win32_device_file(TCHAR *fn)
390 {
391 /* Specifications for device files came from
392 http://msdn.microsoft.com/library/default.asp?url=/library/en-us/fileio/base/createfile.asp
393
394 -- Physical devices (like hard drives) are
395 \\.\PhysicalDriveX where X is a digit from 0 to 9
396 -- Tape devices is \\.\tapeX where X is a digit from 0 to 9
397 -- Logical volumes is \\.\X: where X is a letter */
398
399 if (!_tcsnicmp(fn, _TEXT("\\\\.\\physicaldrive"),17) &&
400 (_tcslen(fn) == 18) &&
401 isdigit(fn[17]))
402 return TRUE;
403
404 if (!_tcsnicmp(fn, _TEXT("\\\\.\\tape"),8) &&
405 (_tcslen(fn) == 9) &&
406 isdigit(fn[8]))
407 return TRUE;
408
409 if ((!_tcsnicmp(fn,_TEXT("\\\\.\\"),4)) &&
410 (_tcslen(fn) == 6) &&
411 (isalpha(fn[4])) &&
412 (fn[5] == ':'))
413 return TRUE;
414
415 return FALSE;
416 }
417
418
process_dir_win32(state * s,TCHAR * fn)419 bool process_dir_win32(state *s, TCHAR *fn) {
420 TCHAR new_fn[SSDEEP_PATH_MAX];
421
422 if (have_processed_dir(fn)) {
423 print_error_unicode(s, fn, "Cycle detected");
424 return true;
425 }
426
427 processing_dir(fn);
428
429 _sntprintf(new_fn,
430 SSDEEP_PATH_MAX,
431 _TEXT("%s\\*"),
432 fn);
433
434 process_win32(s, new_fn);
435
436 done_processing_dir(fn);
437 return false;
438 }
439
440
process_win32(state * s,TCHAR * fn)441 bool process_win32(state *s, TCHAR *fn)
442 {
443 int rc;
444 size_t len;
445 HANDLE hFind;
446 TCHAR dirname[SSDEEP_PATH_MAX], new_fn[SSDEEP_PATH_MAX], expanded_fn[SSDEEP_PATH_MAX];
447 WIN32_FIND_DATAW FindFileData;
448
449 if (NULL == s or NULL == fn)
450 return true;
451
452 //print_status("process_win32 got %S", fn);
453
454 if (is_win32_device_file(fn))
455 return hash_file(s, fn);
456 if (is_special_dir(fn))
457 return false;
458
459 // Most Win32 programs reject 'c:'
460 // as an error or use it to alias the current working directory on c:.
461 // As a convenience to users, we're going to accept 'c:'. To do this
462 // we change it into 'c:\'
463 if (_tcslen(fn) == 2 and isalpha(fn[0]) and fn[1] == _TEXT(':')) {
464 fn[2] = _TEXT(DIR_SEPARATOR);
465 fn[3] = 0;
466 }
467
468 // FindFirstFile doesn't accept '\' as the trailing character.
469 // If we get '\' as a trailing character, we assume this is a directory
470 // and handle that according. In recursive mode, go through the directory
471 // entries. Otherwise, return an error.
472 len = _tcslen(fn);
473 if (fn[len-1] == _TEXT(DIR_SEPARATOR)) {
474 if (s->mode & mode_recursive) {
475 fn[len] = _TEXT('*');
476 fn[len+1] = 0;
477 } else {
478 print_error_unicode(s, fn, "Is a directory");
479 return false;
480 }
481 }
482
483 //print_status("cleaned name %S", fn);
484
485 // If we don't have it already, create the expanded filename.
486 // "C:\foo\bar.txt" --> "\\?\C:\foo\bar.txt"
487 if (not expanded_path(fn) and
488 not (s->mode & mode_relative)) {
489 _sntprintf(expanded_fn,
490 SSDEEP_PATH_MAX,
491 _TEXT("\\\\?\\%s"),
492 fn);
493 }
494 else {
495 _tcsncpy(expanded_fn, fn, SSDEEP_PATH_MAX);
496 }
497 //print_status("expanded filename %S", expanded_fn);
498
499 hFind = FindFirstFile(expanded_fn, &FindFileData);
500 if (INVALID_HANDLE_VALUE == hFind)
501 {
502 // We don't display an error if there was a wildcard anywhere in the
503 // original filename, e.g. C:\foo\*. When this happens it means we just
504 // didn't find any matching files.
505 // Note that we still display errors with the original 'fn'
506 if (not _tcsstr(fn, _TEXT("*")))
507 print_error_unicode(s, fn, "No such file or directory");
508 return false;
509 }
510
511 _tcsncpy(dirname, fn, SSDEEP_PATH_MAX);
512 my_dirname(dirname);
513
514 do {
515 // The filename we've found doesn't include any path information.
516 // That is, for the file C:\foo\bar.txt, we have bar.txt.
517 // We have to add the path information back in manually.
518 // Thankfully Windows doesn't allow wildcards in the early part
519 // of the path. For example, we will never see: c:\bin\*\tools
520 //
521 // Because the wildcard is always in the last part of the input
522 // (e.g. c:\bin\*.exe) we can use the original dirname, combined
523 // with the filename we've found, to make the new filename.
524 if (not is_special_dir(FindFileData.cFileName)) {
525
526 // print_status("Found file: %S", FindFileData.cFileName);
527
528 _sntprintf(new_fn,
529 SSDEEP_PATH_MAX,
530 _TEXT("%s%s"),
531 dirname,
532 FindFileData.cFileName);
533 if (not expanded_path(new_fn) and
534 not (s->mode & mode_relative)) {
535 _sntprintf(expanded_fn,
536 SSDEEP_PATH_MAX,
537 _TEXT("\\\\?\\%s"),
538 new_fn);
539 } else {
540 _tcsncpy(expanded_fn, new_fn, SSDEEP_PATH_MAX);
541 }
542
543 // print_status("Getting attributes for %S", expanded_fn);
544 DWORD dwFileAttributes = FindFileData.dwFileAttributes;
545
546 if (INVALID_FILE_ATTRIBUTES == dwFileAttributes) {
547 print_error_unicode(s, new_fn, "File read error");
548 } else if (dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
549 if (s->mode & mode_recursive) {
550 process_dir_win32(s, new_fn);
551 }
552
553 // TODO Add support for symbolic links
554 /*
555 } else if (dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
556 // Generally we skip reparse points (e.g. symbolic links,
557 // junction points, etc) UNLESS it's part of single
558 // instance storage. Single Instance Storage "is a system's
559 // ability to keep one copy of content that multiple users
560 // or computers share". See
561 // http://blogs.technet.com/b/filecab/archive/2006/02/03/single-instance-store-sis-in-windows-storage-server-r2.aspx
562 switch (FindFileData.dwReserved0) {
563 case IO_REPARSE_TAG_MOUNT_POINT:
564 print_error_unicode(s, new_fn, "Junction point, skipping.");
565 break;
566
567 case IO_REPARSE_TAG_SYMLINK:
568 print_error_unicode(s, new_fn, "Symbolic link, skipping.");
569 break;
570
571 case IO_REPARSE_TAG_SIS:
572 hash_file(s, new_fn);
573 break;
574
575 default:
576 print_error_unicode(s,
577 new_fn,
578 "Unknown reparse point 0x%"PRIx32", skipping. Please report this to the developers",
579 FindFileData.dwReserved0);
580 break;
581 }
582 */
583
584 } else {
585 hash_file(s, new_fn);
586 }
587 }
588
589 rc = FindNextFile(hFind, &FindFileData);
590 } while (rc != 0);
591
592 if (GetLastError() != ERROR_NO_MORE_FILES) {
593 // The Windows API for getting an intelligible error message
594 // is beserk. Rather than play their silly games, we
595 // acknowledge that an unknown error occured and hope we
596 // can continue.
597 print_error_unicode(s, new_fn, "Unknown error during directory traversal");
598 return true;
599 }
600
601 rc = FindClose(hFind);
602 if (0 == rc) {
603 print_error_unicode(s, fn, "Unknown error cleaning up directory traversal");
604 }
605
606 return false;
607 }
608 #endif
609