1 #ifndef __SSDEEP_H 2 #define __SSDEEP_H 3 4 // Fuzzy Hashing by Jesse Kornblum 5 // Copyright (C) 2013 Facebook 6 // Copyright (C) 2012 Kyrus 7 // Copyright (C) 2008 ManTech International Corporation 8 // 9 // $Id$ 10 // 11 12 #include "main.h" 13 14 #include <string> 15 #include <map> 16 #include <set> 17 #include <vector> 18 19 #include "fuzzy.h" 20 #include "tchar-local.h" 21 #include "filedata.h" 22 23 // This is a kludge, but it works. 24 #define __progname "ssdeep" 25 26 #define SSDEEPV1_0_HEADER "ssdeep,1.0--blocksize:hash:hash,filename" 27 #define SSDEEPV1_1_HEADER "ssdeep,1.1--blocksize:hash:hash,filename" 28 #define OUTPUT_FILE_HEADER SSDEEPV1_1_HEADER 29 30 // We print a warning for files smaller than this size 31 #define SSDEEP_MIN_FILE_SIZE 4096 32 33 // The default 'PATH_MAX' on Windows is about 255 bytes. We can expand 34 // this limit to 32,767 characters by prepending filenames with "\\?\" 35 #define SSDEEP_PATH_MAX 32767 36 37 #define MD5DEEP_ALLOC(TYPE,VAR,SIZE) \ 38 VAR = (TYPE *)malloc(sizeof(TYPE) * SIZE); \ 39 if (NULL == VAR) \ 40 return EXIT_FAILURE; \ 41 memset(VAR,0,SIZE * sizeof(TYPE)); 42 43 44 // These are the types of files we can encounter while hashing 45 #define file_regular 0 46 #define file_directory 1 47 #define file_door 2 48 #define file_block 3 49 #define file_character 4 50 #define file_pipe 5 51 #define file_socket 6 52 #define file_symlink 7 53 #define file_unknown 254 54 55 56 typedef struct _filedata_t 57 { 58 uint64_t id; 59 60 /// Original signature in the form [blocksize]:[sig1]:[sig2] 61 std::string signature; 62 63 uint64_t blocksize; 64 65 /// Holds signature equal to blocksize 66 std::string s1; 67 /// Holds signature equal to blocksize * 2 68 std::string s2; 69 70 TCHAR * filename; 71 72 /// File of hashes where we got this known file from. 73 std::string match_file; 74 75 /// Cluster which contains this file 76 std::set<_filedata_t> * cluster; 77 78 } filedata_t; 79 80 81 typedef struct { 82 uint64_t mode; 83 84 bool first_file_processed; 85 86 // Known hashes 87 std::vector<Filedata *> all_files; 88 89 // Known clusters 90 std::set< std::set<Filedata *> * > all_clusters; 91 92 /// Display files who score above the threshold 93 uint8_t threshold; 94 95 bool found_meaningful_file; 96 bool processed_file; 97 98 int argc; 99 TCHAR **argv; 100 101 /// Current line number in file of known hashes 102 uint64_t line_number; 103 /// File handle to file of known hashes 104 FILE * known_handle; 105 /// Filename of known hashes 106 char * known_fn; 107 108 } state; 109 110 111 112 #define MM_INIT printf 113 114 // Things required when cross compiling for Microsoft Windows 115 #ifdef _WIN32 116 117 // We create macros for the Windows equivalent UNIX functions. 118 // No worries about lstat to stat; Windows doesn't have symbolic links 119 #define lstat(A,B) stat(A,B) 120 #define realpath(A,B) _fullpath(B,A,PATH_MAX) 121 #define snprintf _snprintf 122 123 char *basename(char *a); 124 extern char *optarg; 125 extern int optind; 126 int getopt(int argc, char *const argv[], const char *optstring); 127 128 #define NEWLINE "\r\n" 129 #define DIR_SEPARATOR '\\' 130 131 #else // ifdef _WIN32 132 // For all other operating systems 133 134 #define NEWLINE "\n" 135 #define DIR_SEPARATOR '/' 136 137 #endif // ifdef _WIN32/else 138 139 140 141 142 143 // Because the modes are stored in a uint64_t variable, they must 144 // be less than or equal to 1<<63 145 #define mode_none 0 146 #define mode_recursive 1 147 #define mode_match 1<<1 148 #define mode_barename 1<<2 149 #define mode_relative 1<<3 150 #define mode_silent 1<<4 151 #define mode_directory 1<<5 152 #define mode_match_pretty 1<<6 153 #define mode_verbose 1<<7 154 #define mode_csv 1<<8 155 #define mode_threshold 1<<9 156 #define mode_sigcompare 1<<10 157 #define mode_display_all 1<<11 158 #define mode_compare_unknown 1<<12 159 #define mode_cluster 1<<13 160 #define mode_recursive_cluster 1<<14 161 162 #define MODE(A) (s->mode & A) 163 164 #define BLANK_LINE \ 165 " " 166 167 168 169 // ********************************************************************* 170 // Checking for cycles 171 // ********************************************************************* 172 int done_processing_dir(TCHAR *fn); 173 int processing_dir(TCHAR *fn); 174 int have_processed_dir(TCHAR *fn); 175 176 bool process_win32(state *s, TCHAR *fn); 177 int process_normal(state *s, TCHAR *fn); 178 int process_stdin(state *s); 179 180 181 // ********************************************************************* 182 // Fuzzy Hashing Engine 183 // ********************************************************************* 184 int hash_file(state *s, TCHAR *fn); 185 bool display_result(state *s, const TCHAR * fn, const char * sum); 186 187 188 // ********************************************************************* 189 // Helper functions 190 // ********************************************************************* 191 void try_msg(void); 192 193 bool expanded_path(TCHAR *p); 194 195 void sanity_check(state *s, int condition, const char *msg); 196 197 // The basename function kept misbehaving on OS X, so I rewrote it. 198 // This function isn't perfect, nor is it designed to be. Because 199 // we're guarenteed to be working with a filename here, there's no way 200 // that s will end with a DIR_SEPARATOR (e.g. /foo/bar/). This function 201 // will not work properly for a string that ends in a DIR_SEPARATOR 202 int my_basename(TCHAR *s); 203 int my_dirname(TCHAR *s); 204 205 // Remove the newlines, if any, from the string. Works with both 206 // \r and \r\n style newlines 207 void chop_line_tchar(TCHAR *s); 208 void chop_line(char *s); 209 210 int find_comma_separated_string_tchar(TCHAR *s, unsigned int n); 211 void shift_string_tchar(TCHAR *fn, unsigned int start, unsigned int new_start); 212 213 int find_comma_separated_string(char *s, unsigned int n); 214 void shift_string(char *fn, size_t start, size_t new_start); 215 216 int remove_escaped_quotes(char * str); 217 218 void prepare_filename(state *s, TCHAR *fn); 219 220 // Returns the size of the given file, in bytes. 221 222 #ifdef __cplusplus 223 extern "C" { 224 #endif 225 226 off_t find_file_size(FILE *h); 227 228 #ifdef __cplusplus 229 } 230 #endif 231 232 233 234 // ********************************************************************* 235 // User Interface Functions 236 // ********************************************************************* 237 void print_status(const char *fmt, ...); 238 void print_error(const state *s, const char *fmt, ...); 239 void print_error_unicode(state *s, const TCHAR *fn, const char *fmt, ...); 240 void internal_error(const char *fmt, ... ); 241 void fatal_error(const char *fmt, ... ); 242 void display_filename(FILE *out, const TCHAR *fn, int escape_quotes); 243 244 245 246 #endif // #ifndef __SSDEEP_H 247