1 
2 /* FOREMOST
3  *
4  * By Jesse Kornblum
5  *
6  * This is a work of the US Government. In accordance with 17 USC 105,
7  * copyright protection is not available for any work of the US Government.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  */
14 
15 //#define DEBUG 1
16 
17 #ifndef __FOREMOST_H
18 #define __FOREMOST_H
19 
20 /* Version information is defined in the Makefile */
21 
22 #define AUTHOR      "Jesse Kornblum, Kris Kendall, and Nick Mikus"
23 
24 /* We use \r\n for newlines as this has to work on Win32. It's redundant for
25    everybody else, but shouldn't cause any harm. */
26 #define COPYRIGHT   "This program is a work of the US Government. "\
27 "In accordance with 17 USC 105,\r\n"\
28 "copyright protection is not available for any work of the US Government.\r\n"\
29 "This is free software; see the source for copying conditions. There is NO\r\n"\
30 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n"
31 
32 #define _GNU_SOURCE
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <limits.h>
36 #include <dirent.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <time.h>
41 #include <math.h>
42 #include <ctype.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <signal.h>
46 
47 /* For va_arg */
48 #include <stdarg.h>
49 
50 #ifdef __LINUX
51 #include <sys/ioctl.h>
52 #include <sys/mount.h>
53 #define   u_int64_t   unsigned long long
54 #endif
55 
56 
57 #ifdef __LINUX
58 
59 #ifndef __USE_BSD
60 #define __USE_BSD
61 #endif
62 #include <endian.h>
63 
64 #elif defined (__SOLARIS)
65 
66 #define BIG_ENDIAN    4321
67 #define LITTLE_ENDIAN 1234
68 
69 #include <sys/isa_defs.h>
70 #ifdef _BIG_ENDIAN
71 #define BYTE_ORDER BIG_ENDIAN
72 #else
73 #define BYTE_ORDER LITTLE_ENDIAN
74 #endif
75 
76 #elif defined (__WIN32)
77 #include <sys/param.h>
78 
79 #elif defined (__MACOSX)
80 #include <machine/endian.h>
81 #define __U16_TYPE unsigned short
82 #endif
83 
84 
85 #define TRUE   1
86 #define FALSE  0
87 #define ONE_MEGABYTE  1048576
88 
89 
90 /* RBF - Do we need these type definitions? */
91 #ifdef __SOLARIS
92 #define   u_int32_t   unsigned int
93 #define   u_int64_t   unsigned long long
94 #endif
95 
96 
97 /* The only time we're *not* on a UNIX system is when we're on Windows */
98 #ifndef __WIN32
99 #ifndef __UNIX
100 #define __UNIX
101 #endif  /* ifndef __UNIX */
102 #endif  /* ifndef __WIN32 */
103 
104 
105 #ifdef __UNIX
106 
107 #ifndef __U16_TYPE
108 #define __U16_TYPE unsigned short
109 #endif
110 
111 #include <libgen.h>
112 
113 #ifndef BYTE_ORDER
114 
115 #define BIG_ENDIAN    4321
116 #define LITTLE_ENDIAN 1234
117 
118 #define BYTE_ORDER LITTLE_ENDIAN
119 
120 #endif
121 /* This avoids compiler warnings on older systems */
122 int fseeko(FILE *stream, off_t offset, int whence);
123 off_t ftello(FILE *stream);
124 
125 
126 #define CMD_PROMPT "$"
127 #define DIR_SEPARATOR   '/'
128 #define NEWLINE "\n"
129 #define LINE_LENGTH 74
130 #define BLANK_LINE \
131 "                                                                          "
132 
133 #endif /* #ifdef __UNIX */
134 
135 /* This allows us to open standard input in binary mode by default
136    See http://gnuwin32.sourceforge.net/compile.html for more */
137 #include <fcntl.h>
138 
139 /* Code specific to Microsoft Windows */
140 #ifdef __WIN32
141 
142 /* By default, Windows uses long for off_t. This won't do. We
143    need an unsigned number at minimum. Windows doesn't have 64 bit
144    numbers though. */
145 #ifdef off_t
146 #undef off_t
147 #endif
148 #define off_t unsigned long
149 
150 #define CMD_PROMPT "c:\\>"
151 #define  DIR_SEPARATOR   '\\'
152 #define NEWLINE "\r\n"
153 #define LINE_LENGTH 72
154 #define BLANK_LINE \
155 "                                                                        "
156 
157 
158 /* It would be nice to use 64-bit file lengths in Windows */
159 #define ftello   ftell
160 #define fseeko   fseek
161 
162 #ifndef __CYGWIN
163 #define  snprintf         _snprintf
164 #endif
165 
166 #define  u_int32_t        unsigned long
167 
168 /* We create macros for the Windows equivalent UNIX functions.
169    No worries about lstat to stat; Windows doesn't have symbolic links */
170 #define lstat(A,B)      stat(A,B)
171 
172 #define u_int64_t unsigned __int64
173 
174 #ifndef __CYGWIN
175 	#define realpath(A,B)   _fullpath(B,A,PATH_MAX)
176 #endif
177 /* Not used in md5deep anymore, but left in here in case I
178    ever need it again. Win32 documentation searches are evil.
179    int asprintf(char **strp, const char *fmt, ...);
180 */
181 
182 char *basename(char *a);
183 extern char *optarg;
184 extern int optind;
185 int getopt(int argc, char *const argv[], const char *optstring);
186 
187 #endif   /* ifdef _WIN32 */
188 
189 
190 /* On non-glibc systems we have to manually set the __progname variable */
191 #ifdef __GLIBC__
192 extern char *__progname;
193 #else
194 char *__progname;
195 #endif /* ifdef __GLIBC__ */
196 
197 /* -----------------------------------------------------------------
198    Program Defaults
199    ----------------------------------------------------------------- */
200 #define MAX_STRING_LENGTH   1024
201 #define COMMENT_LENGTH   64
202 
203 /* Modes refer to options that can be set by the user. */
204 
205 #define mode_none                0
206 #define mode_verbose          1<<1
207 #define mode_quiet            1<<2
208 #define mode_ind_blk          1<<3
209 #define mode_quick            1<<4
210 #define mode_write_all        1<<5
211 #define mode_write_audit      1<<6
212 #define mode_multi_file	      1<<7
213 
214 #define MAX_NEEDLES                   254
215 #define NUM_SEARCH_SPEC_ELEMENTS        6
216 #define MAX_SUFFIX_LENGTH               8
217 #define MAX_FILE_TYPES                100
218 #define FOREMOST_NOEXTENSION_SUFFIX "NONE"
219 /* Modes 3 to 31 are reserved for future use. We shouldn't use
220    modes higher than 31 as Win32 can't go that high. */
221 
222 #define DEFAULT_MODE              mode_none
223 #define DEFAULT_CONFIG_FILE       "foremost.conf"
224 #define DEFAULT_OUTPUT_DIRECTORY  "output"
225 #define AUDIT_FILE_NAME           "audit.txt"
226 #define FOREMOST_DIVIDER          "------------------------------------------------------------------"
227 
228 #define JPEG 0
229 #define GIF 1
230 #define BMP 2
231 #define MPG 3
232 #define PDF 4
233 #define DOC 5
234 #define AVI 6
235 #define WMV 7
236 #define HTM 8
237 #define ZIP 9
238 #define MOV 10
239 #define XLS 11
240 #define PPT 12
241 #define WPD 13
242 #define CPP 14
243 #define OLE 15
244 #define GZIP 16
245 #define RIFF 17
246 #define WAV 18
247 #define VJPEG 19
248 #define SXW 20
249 #define SXC 21
250 #define SXI 22
251 #define CONF 23
252 #define PNG 24
253 #define RAR 25
254 #define EXE 26
255 #define ELF 27
256 #define REG 28
257 #define DOCX 29
258 #define XLSX 30
259 #define PPTX 31
260 #define MP4 32
261 
262 
263 #define KILOBYTE                  1024
264 #define MEGABYTE                  1024 * KILOBYTE
265 #define GIGABYTE                  1024 * MEGABYTE
266 #define TERABYTE                  1024 * GIGABYTE
267 #define PETABYTE                  1024 * TERABYTE
268 #define EXABYTE                   1024 * PETABYTE
269 
270 #define UNITS_BYTES                     0
271 #define UNITS_KILOB                     1
272 #define UNITS_MEGAB                     2
273 #define UNITS_GIGAB                     3
274 #define UNITS_TERAB                     4
275 #define UNITS_PETAB                     5
276 #define UNITS_EXAB                      6
277 
278 #define SEARCHTYPE_FORWARD      0
279 #define SEARCHTYPE_REVERSE      1
280 #define SEARCHTYPE_FORWARD_NEXT 2
281 #define SEARCHTYPE_ASCII        3
282 
283 #define FOREMOST_BIG_ENDIAN 0
284 #define FOREMOST_LITTLE_ENDIAN 1
285 /*DEFAULT CHUNK SIZE In MB*/
286 #define CHUNK_SIZE 100
287 
288 
289 /* Wildcard is a global variable because it's used by very simple
290    functions that don't need the whole state passed to them */
291 
292 /* -----------------------------------------------------------------
293    State Variable and Global Variables
294    ----------------------------------------------------------------- */
295 char wildcard;
296 typedef struct f_state
297 {
298   off_t mode;
299   char *config_file;
300   char *input_file;
301   char *output_directory;
302   char *start_time;
303   char *invocation;
304   char *audit_file_name;
305   FILE *audit_file;
306   int audit_file_open;
307   int num_builtin;
308   int chunk_size; /*IN MB*/
309   int fileswritten;
310   int block_size;
311   int skip;
312 
313   int time_stamp;
314 } f_state;
315 
316 typedef struct marker
317 {
318     unsigned char* value;
319     int len;
320     size_t marker_bm_table[UCHAR_MAX+1];
321 }marker;
322 
323 typedef struct s_spec
324 {
325     char* suffix;
326     int type;
327     u_int64_t max_len;
328     unsigned char* header;
329     unsigned int header_len;
330     size_t header_bm_table[UCHAR_MAX+1];
331 
332     unsigned char* footer;
333     unsigned int footer_len;
334     size_t footer_bm_table[UCHAR_MAX+1];
335     marker markerlist[5];
336     int num_markers;
337     int searchtype;
338 
339     int case_sen;
340 
341     int found;
342 
343     char comment[MAX_STRING_LENGTH];/*Used for audit*/
344     int written; /*used for -a mode*/
345 }s_spec;
346 
347 s_spec search_spec[50];  /*ARRAY OF BUILTIN SEARCH TYPES*/
348 
349 typedef struct f_info {
350   char *file_name;
351   off_t total_bytes;
352 
353   /* We never use the total number of bytes in a file,
354      only the number of megabytes when we display a time estimate */
355   off_t total_megs;
356   off_t bytes_read;
357 
358 #ifdef __WIN32
359   /* Win32 is a 32-bit operating system and can't handle file sizes
360      larger than 4GB. We use this to keep track of overflows */
361   off_t last_read;
362   off_t overflow_count;
363 #endif
364 
365   FILE *handle;
366   int is_stdin;
367 } f_info;
368 
369 /* Set if the user hits ctrl-c */
370 int signal_caught;
371 
372 /* -----------------------------------------------------------------
373    Function definitions
374    ----------------------------------------------------------------- */
375 
376 /* State functions */
377 
378 int initialize_state(f_state *s, int argc, char **argv);
379 void free_state(f_state *s);
380 
381 char *get_invocation(f_state *s);
382 char *get_start_time(f_state *s);
383 
384 int set_config_file(f_state *s, char *fn);
385 char* get_config_file(f_state *s);
386 
387 int set_output_directory(f_state *s, char *fn);
388 char* get_output_directory(f_state *s);
389 
390 void set_audit_file_open(f_state *s);
391 int get_audit_file_open(f_state *s);
392 
393 void set_mode(f_state *s, off_t new_mode);
394 int get_mode(f_state *s, off_t check_mode);
395 
396 int set_search_def(f_state *s,char* ft,u_int64_t max_file_size);
397 void get_search_def(f_state s);
398 
399 void set_input_file(f_state *s,char* filename);
400 void get_input_file(f_state *s);
401 
402 void set_chunk(f_state *s, int size);
403 
404 void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype);
405 
406 void set_skip(f_state *s, int size);
407 void set_block(f_state *s, int size);
408 
409 
410 #ifdef __DEBUG
411 void dump_state(f_state *s);
412 #endif
413 
414 /* The audit file */
415 int open_audit_file(f_state *s);
416 void audit_msg(f_state *s, char *format, ...);
417 int close_audit_file(f_state *s);
418 
419 
420 /* Set up our output directory */
421 int create_output_directory(f_state *s);
422 int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf,  u_int64_t t_offset);
423 int create_sub_dirs(f_state *s);
424 void cleanup_output(f_state *s);
425 
426 /* Configuration Files */
427 int load_config_file(f_state *s);
428 
429 
430 /* Helper functions */
431 char *current_time(void);
432 off_t find_file_size(FILE *f);
433 char *human_readable(off_t size, char *buffer);
434 char *units(unsigned int c);
435 unsigned int chop(char *buf);
436 void print_search_specs(f_state *s);
437 int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive);
438 int charactersMatch(char a, char b, int caseSensitive);
439 void printx(unsigned char* buf,int start, int end);
440 unsigned short htos(unsigned char s[],int endian);
441 unsigned int htoi(unsigned char s[],int endian);
442 u_int64_t htoll(unsigned char s[],int endian);
443 int displayPosition(f_state* s,f_info* i,u_int64_t pos);
444 
445 
446 /* Interface functions
447    These functions stay the same regardless if we're using a
448    command line interface or a GUI */
449 void fatal_error(f_state *s, char *msg);
450 void print_error(f_state *s, char *fn, char *msg);
451 void print_message(f_state *s, char *format, va_list argp);
452 void print_stats(f_state *s);
453 
454 /* Engine */
455 int process_file(f_state *s);
456 int process_stdin(f_state *s);
457 unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
458 	size_t table[UCHAR_MAX + 1], int case_sen,int searchtype);
459 unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
460 	size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ;
461 #endif /* __FOREMOST_H */
462 
463 /* BUILTIN */
464 unsigned char* extract_file(f_state *s,  u_int64_t c_offset,unsigned char *foundat,  u_int64_t buflen, s_spec * needle, u_int64_t f_offset);
465 
466 
467 
468 
469 
470