1 #ifndef __SSDEEP_H
2 #define __SSDEEP_H
3 
4 // Fuzzy Hashing by Jesse Kornblum
5 // Copyright (C) 2013 Facebook
6 // Copyright (C) 2012 Kyrus
7 // Copyright (C) 2008 ManTech International Corporation
8 //
9 // $Id$
10 //
11 
12 #include "main.h"
13 
14 #include <string>
15 #include <map>
16 #include <set>
17 #include <vector>
18 
19 #include "fuzzy.h"
20 #include "tchar-local.h"
21 #include "filedata.h"
22 
23 // This is a kludge, but it works.
24 #define __progname "ssdeep"
25 
26 #define SSDEEPV1_0_HEADER        "ssdeep,1.0--blocksize:hash:hash,filename"
27 #define SSDEEPV1_1_HEADER        "ssdeep,1.1--blocksize:hash:hash,filename"
28 #define OUTPUT_FILE_HEADER     SSDEEPV1_1_HEADER
29 
30 // We print a warning for files smaller than this size
31 #define SSDEEP_MIN_FILE_SIZE   4096
32 
33 // The default 'PATH_MAX' on Windows is about 255 bytes. We can expand
34 // this limit to 32,767 characters by prepending filenames with "\\?\"
35 #define SSDEEP_PATH_MAX 32767
36 
37 #define MD5DEEP_ALLOC(TYPE,VAR,SIZE)     \
38 VAR = (TYPE *)malloc(sizeof(TYPE) * SIZE);  \
39 if (NULL == VAR)  \
40    return EXIT_FAILURE; \
41 memset(VAR,0,SIZE * sizeof(TYPE));
42 
43 
44 // These are the types of files we can encounter while hashing
45 #define file_regular    0
46 #define file_directory  1
47 #define file_door       2
48 #define file_block      3
49 #define file_character  4
50 #define file_pipe       5
51 #define file_socket     6
52 #define file_symlink    7
53 #define file_unknown  254
54 
55 
56 typedef struct _filedata_t
57 {
58   uint64_t id;
59 
60   /// Original signature in the form [blocksize]:[sig1]:[sig2]
61   std::string signature;
62 
63   uint64_t blocksize;
64 
65   /// Holds signature equal to blocksize
66   std::string s1;
67   /// Holds signature equal to blocksize * 2
68   std::string s2;
69 
70   TCHAR * filename;
71 
72   /// File of hashes where we got this known file from.
73   std::string match_file;
74 
75   /// Cluster which contains this file
76   std::set<_filedata_t> * cluster;
77 
78 } filedata_t;
79 
80 
81 typedef struct {
82   uint64_t  mode;
83 
84   bool       first_file_processed;
85 
86   // Known hashes
87   std::vector<Filedata *> all_files;
88 
89   // Known clusters
90   std::set< std::set<Filedata *> * > all_clusters;
91 
92   /// Display files who score above the threshold
93   uint8_t   threshold;
94 
95   bool       found_meaningful_file;
96   bool       processed_file;
97 
98   int       argc;
99   TCHAR     **argv;
100 
101   /// Current line number in file of known hashes
102   uint64_t line_number;
103   /// File handle to file of known hashes
104   FILE     * known_handle;
105   /// Filename of known hashes
106   char     * known_fn;
107 
108 } state;
109 
110 
111 
112 #define MM_INIT  printf
113 
114 // Things required when cross compiling for Microsoft Windows
115 #ifdef _WIN32
116 
117 // We create macros for the Windows equivalent UNIX functions.
118 // No worries about lstat to stat; Windows doesn't have symbolic links
119 #define lstat(A,B)      stat(A,B)
120 #define realpath(A,B)   _fullpath(B,A,PATH_MAX)
121 #define snprintf        _snprintf
122 
123 char *basename(char *a);
124 extern char *optarg;
125 extern int optind;
126 int getopt(int argc, char *const argv[], const char *optstring);
127 
128 #define NEWLINE        "\r\n"
129 #define DIR_SEPARATOR  '\\'
130 
131 #else   // ifdef _WIN32
132 // For all other operating systems
133 
134 #define NEWLINE       "\n"
135 #define DIR_SEPARATOR '/'
136 
137 #endif  // ifdef _WIN32/else
138 
139 
140 
141 
142 
143 // Because the modes are stored in a uint64_t variable, they must
144 // be less than or equal to 1<<63
145 #define mode_none            0
146 #define mode_recursive       1
147 #define mode_match        1<<1
148 #define mode_barename     1<<2
149 #define mode_relative     1<<3
150 #define mode_silent       1<<4
151 #define mode_directory    1<<5
152 #define mode_match_pretty 1<<6
153 #define mode_verbose      1<<7
154 #define mode_csv          1<<8
155 #define mode_threshold    1<<9
156 #define mode_sigcompare   1<<10
157 #define mode_display_all  1<<11
158 #define mode_compare_unknown 1<<12
159 #define mode_cluster      1<<13
160 #define mode_recursive_cluster 1<<14
161 
162 #define MODE(A)   (s->mode & A)
163 
164 #define BLANK_LINE   \
165 "                                                                               "
166 
167 
168 
169 // *********************************************************************
170 // Checking for cycles
171 // *********************************************************************
172 int done_processing_dir(TCHAR *fn);
173 int processing_dir(TCHAR *fn);
174 int have_processed_dir(TCHAR *fn);
175 
176 bool process_win32(state *s, TCHAR *fn);
177 int process_normal(state *s, TCHAR *fn);
178 int process_stdin(state *s);
179 
180 
181 // *********************************************************************
182 // Fuzzy Hashing Engine
183 // *********************************************************************
184 int hash_file(state *s, TCHAR *fn);
185 bool display_result(state *s, const TCHAR * fn, const char * sum);
186 
187 
188 // *********************************************************************
189 // Helper functions
190 // *********************************************************************
191 void try_msg(void);
192 
193 bool expanded_path(TCHAR *p);
194 
195 void sanity_check(state *s, int condition, const char *msg);
196 
197 // The basename function kept misbehaving on OS X, so I rewrote it.
198 // This function isn't perfect, nor is it designed to be. Because
199 // we're guarenteed to be working with a filename here, there's no way
200 // that s will end with a DIR_SEPARATOR (e.g. /foo/bar/). This function
201 // will not work properly for a string that ends in a DIR_SEPARATOR
202 int my_basename(TCHAR *s);
203 int my_dirname(TCHAR *s);
204 
205 // Remove the newlines, if any, from the string. Works with both
206 // \r and \r\n style newlines
207 void chop_line_tchar(TCHAR *s);
208 void chop_line(char *s);
209 
210 int find_comma_separated_string_tchar(TCHAR *s, unsigned int n);
211 void shift_string_tchar(TCHAR *fn, unsigned int start, unsigned int new_start);
212 
213 int find_comma_separated_string(char *s, unsigned int n);
214 void shift_string(char *fn, size_t start, size_t new_start);
215 
216 int remove_escaped_quotes(char * str);
217 
218 void prepare_filename(state *s, TCHAR *fn);
219 
220 // Returns the size of the given file, in bytes.
221 
222 #ifdef __cplusplus
223 extern "C" {
224 #endif
225 
226 off_t find_file_size(FILE *h);
227 
228 #ifdef __cplusplus
229 }
230 #endif
231 
232 
233 
234 // *********************************************************************
235 // User Interface Functions
236 // *********************************************************************
237 void print_status(const char *fmt, ...);
238 void print_error(const state *s, const char *fmt, ...);
239 void print_error_unicode(state *s, const TCHAR *fn, const char *fmt, ...);
240 void internal_error(const char *fmt, ... );
241 void fatal_error(const char *fmt, ... );
242 void display_filename(FILE *out, const TCHAR *fn, int escape_quotes);
243 
244 
245 
246 #endif  // #ifndef __SSDEEP_H
247