1 2 /* FOREMOST 3 * 4 * By Jesse Kornblum 5 * 6 * This is a work of the US Government. In accordance with 17 USC 105, 7 * copyright protection is not available for any work of the US Government. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 //#define DEBUG 1 16 17 #ifndef __FOREMOST_H 18 #define __FOREMOST_H 19 20 /* Version information is defined in the Makefile */ 21 22 #define AUTHOR "Jesse Kornblum, Kris Kendall, and Nick Mikus" 23 24 /* We use \r\n for newlines as this has to work on Win32. It's redundant for 25 everybody else, but shouldn't cause any harm. */ 26 #define COPYRIGHT "This program is a work of the US Government. "\ 27 "In accordance with 17 USC 105,\r\n"\ 28 "copyright protection is not available for any work of the US Government.\r\n"\ 29 "This is free software; see the source for copying conditions. There is NO\r\n"\ 30 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n" 31 32 #define _GNU_SOURCE 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <limits.h> 36 #include <dirent.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <unistd.h> 40 #include <time.h> 41 #include <math.h> 42 #include <ctype.h> 43 #include <sys/stat.h> 44 #include <sys/types.h> 45 #include <signal.h> 46 47 /* For va_arg */ 48 #include <stdarg.h> 49 50 #ifdef __LINUX 51 #include <sys/ioctl.h> 52 #include <sys/mount.h> 53 #define u_int64_t unsigned long long 54 #endif 55 56 57 #ifdef __LINUX 58 59 #ifndef __USE_BSD 60 #define __USE_BSD 61 #endif 62 #include <endian.h> 63 64 #elif defined (__SOLARIS) 65 66 #define BIG_ENDIAN 4321 67 #define LITTLE_ENDIAN 1234 68 69 #include <sys/isa_defs.h> 70 #ifdef _BIG_ENDIAN 71 #define BYTE_ORDER BIG_ENDIAN 72 #else 73 #define BYTE_ORDER LITTLE_ENDIAN 74 #endif 75 76 #elif defined (__WIN32) 77 #include <sys/param.h> 78 79 #elif defined (__MACOSX) 80 #include <machine/endian.h> 81 #define __U16_TYPE unsigned short 82 #endif 83 84 85 #define TRUE 1 86 #define FALSE 0 87 #define ONE_MEGABYTE 1048576 88 89 90 /* RBF - Do we need these type definitions? */ 91 #ifdef __SOLARIS 92 #define u_int32_t unsigned int 93 #define u_int64_t unsigned long long 94 #endif 95 96 97 /* The only time we're *not* on a UNIX system is when we're on Windows */ 98 #ifndef __WIN32 99 #ifndef __UNIX 100 #define __UNIX 101 #endif /* ifndef __UNIX */ 102 #endif /* ifndef __WIN32 */ 103 104 105 #ifdef __UNIX 106 107 #ifndef __U16_TYPE 108 #define __U16_TYPE unsigned short 109 #endif 110 111 #include <libgen.h> 112 113 #ifndef BYTE_ORDER 114 115 #define BIG_ENDIAN 4321 116 #define LITTLE_ENDIAN 1234 117 118 #define BYTE_ORDER LITTLE_ENDIAN 119 120 #endif 121 /* This avoids compiler warnings on older systems */ 122 int fseeko(FILE *stream, off_t offset, int whence); 123 off_t ftello(FILE *stream); 124 125 126 #define CMD_PROMPT "$" 127 #define DIR_SEPARATOR '/' 128 #define NEWLINE "\n" 129 #define LINE_LENGTH 74 130 #define BLANK_LINE \ 131 " " 132 133 #endif /* #ifdef __UNIX */ 134 135 /* This allows us to open standard input in binary mode by default 136 See http://gnuwin32.sourceforge.net/compile.html for more */ 137 #include <fcntl.h> 138 139 /* Code specific to Microsoft Windows */ 140 #ifdef __WIN32 141 142 /* By default, Windows uses long for off_t. This won't do. We 143 need an unsigned number at minimum. Windows doesn't have 64 bit 144 numbers though. */ 145 #ifdef off_t 146 #undef off_t 147 #endif 148 #define off_t unsigned long 149 150 #define CMD_PROMPT "c:\\>" 151 #define DIR_SEPARATOR '\\' 152 #define NEWLINE "\r\n" 153 #define LINE_LENGTH 72 154 #define BLANK_LINE \ 155 " " 156 157 158 /* It would be nice to use 64-bit file lengths in Windows */ 159 #define ftello ftell 160 #define fseeko fseek 161 162 #ifndef __CYGWIN 163 #define snprintf _snprintf 164 #endif 165 166 #define u_int32_t unsigned long 167 168 /* We create macros for the Windows equivalent UNIX functions. 169 No worries about lstat to stat; Windows doesn't have symbolic links */ 170 #define lstat(A,B) stat(A,B) 171 172 #define u_int64_t unsigned __int64 173 174 #ifndef __CYGWIN 175 #define realpath(A,B) _fullpath(B,A,PATH_MAX) 176 #endif 177 /* Not used in md5deep anymore, but left in here in case I 178 ever need it again. Win32 documentation searches are evil. 179 int asprintf(char **strp, const char *fmt, ...); 180 */ 181 182 char *basename(char *a); 183 extern char *optarg; 184 extern int optind; 185 int getopt(int argc, char *const argv[], const char *optstring); 186 187 #endif /* ifdef _WIN32 */ 188 189 190 /* On non-glibc systems we have to manually set the __progname variable */ 191 #ifdef __GLIBC__ 192 extern char *__progname; 193 #else 194 char *__progname; 195 #endif /* ifdef __GLIBC__ */ 196 197 /* ----------------------------------------------------------------- 198 Program Defaults 199 ----------------------------------------------------------------- */ 200 #define MAX_STRING_LENGTH 1024 201 #define COMMENT_LENGTH 64 202 203 /* Modes refer to options that can be set by the user. */ 204 205 #define mode_none 0 206 #define mode_verbose 1<<1 207 #define mode_quiet 1<<2 208 #define mode_ind_blk 1<<3 209 #define mode_quick 1<<4 210 #define mode_write_all 1<<5 211 #define mode_write_audit 1<<6 212 #define mode_multi_file 1<<7 213 214 #define MAX_NEEDLES 254 215 #define NUM_SEARCH_SPEC_ELEMENTS 6 216 #define MAX_SUFFIX_LENGTH 8 217 #define MAX_FILE_TYPES 100 218 #define FOREMOST_NOEXTENSION_SUFFIX "NONE" 219 /* Modes 3 to 31 are reserved for future use. We shouldn't use 220 modes higher than 31 as Win32 can't go that high. */ 221 222 #define DEFAULT_MODE mode_none 223 #define DEFAULT_CONFIG_FILE "foremost.conf" 224 #define DEFAULT_OUTPUT_DIRECTORY "output" 225 #define AUDIT_FILE_NAME "audit.txt" 226 #define FOREMOST_DIVIDER "------------------------------------------------------------------" 227 228 #define JPEG 0 229 #define GIF 1 230 #define BMP 2 231 #define MPG 3 232 #define PDF 4 233 #define DOC 5 234 #define AVI 6 235 #define WMV 7 236 #define HTM 8 237 #define ZIP 9 238 #define MOV 10 239 #define XLS 11 240 #define PPT 12 241 #define WPD 13 242 #define CPP 14 243 #define OLE 15 244 #define GZIP 16 245 #define RIFF 17 246 #define WAV 18 247 #define VJPEG 19 248 #define SXW 20 249 #define SXC 21 250 #define SXI 22 251 #define CONF 23 252 #define PNG 24 253 #define RAR 25 254 #define EXE 26 255 #define ELF 27 256 #define REG 28 257 #define DOCX 29 258 #define XLSX 30 259 #define PPTX 31 260 #define MP4 32 261 262 263 #define KILOBYTE 1024 264 #define MEGABYTE 1024 * KILOBYTE 265 #define GIGABYTE 1024 * MEGABYTE 266 #define TERABYTE 1024 * GIGABYTE 267 #define PETABYTE 1024 * TERABYTE 268 #define EXABYTE 1024 * PETABYTE 269 270 #define UNITS_BYTES 0 271 #define UNITS_KILOB 1 272 #define UNITS_MEGAB 2 273 #define UNITS_GIGAB 3 274 #define UNITS_TERAB 4 275 #define UNITS_PETAB 5 276 #define UNITS_EXAB 6 277 278 #define SEARCHTYPE_FORWARD 0 279 #define SEARCHTYPE_REVERSE 1 280 #define SEARCHTYPE_FORWARD_NEXT 2 281 #define SEARCHTYPE_ASCII 3 282 283 #define FOREMOST_BIG_ENDIAN 0 284 #define FOREMOST_LITTLE_ENDIAN 1 285 /*DEFAULT CHUNK SIZE In MB*/ 286 #define CHUNK_SIZE 100 287 288 289 /* Wildcard is a global variable because it's used by very simple 290 functions that don't need the whole state passed to them */ 291 292 /* ----------------------------------------------------------------- 293 State Variable and Global Variables 294 ----------------------------------------------------------------- */ 295 char wildcard; 296 typedef struct f_state 297 { 298 off_t mode; 299 char *config_file; 300 char *input_file; 301 char *output_directory; 302 char *start_time; 303 char *invocation; 304 char *audit_file_name; 305 FILE *audit_file; 306 int audit_file_open; 307 int num_builtin; 308 int chunk_size; /*IN MB*/ 309 int fileswritten; 310 int block_size; 311 int skip; 312 313 int time_stamp; 314 } f_state; 315 316 typedef struct marker 317 { 318 unsigned char* value; 319 int len; 320 size_t marker_bm_table[UCHAR_MAX+1]; 321 }marker; 322 323 typedef struct s_spec 324 { 325 char* suffix; 326 int type; 327 u_int64_t max_len; 328 unsigned char* header; 329 unsigned int header_len; 330 size_t header_bm_table[UCHAR_MAX+1]; 331 332 unsigned char* footer; 333 unsigned int footer_len; 334 size_t footer_bm_table[UCHAR_MAX+1]; 335 marker markerlist[5]; 336 int num_markers; 337 int searchtype; 338 339 int case_sen; 340 341 int found; 342 343 char comment[MAX_STRING_LENGTH];/*Used for audit*/ 344 int written; /*used for -a mode*/ 345 }s_spec; 346 347 s_spec search_spec[50]; /*ARRAY OF BUILTIN SEARCH TYPES*/ 348 349 typedef struct f_info { 350 char *file_name; 351 off_t total_bytes; 352 353 /* We never use the total number of bytes in a file, 354 only the number of megabytes when we display a time estimate */ 355 off_t total_megs; 356 off_t bytes_read; 357 358 #ifdef __WIN32 359 /* Win32 is a 32-bit operating system and can't handle file sizes 360 larger than 4GB. We use this to keep track of overflows */ 361 off_t last_read; 362 off_t overflow_count; 363 #endif 364 365 FILE *handle; 366 int is_stdin; 367 } f_info; 368 369 /* Set if the user hits ctrl-c */ 370 int signal_caught; 371 372 /* ----------------------------------------------------------------- 373 Function definitions 374 ----------------------------------------------------------------- */ 375 376 /* State functions */ 377 378 int initialize_state(f_state *s, int argc, char **argv); 379 void free_state(f_state *s); 380 381 char *get_invocation(f_state *s); 382 char *get_start_time(f_state *s); 383 384 int set_config_file(f_state *s, char *fn); 385 char* get_config_file(f_state *s); 386 387 int set_output_directory(f_state *s, char *fn); 388 char* get_output_directory(f_state *s); 389 390 void set_audit_file_open(f_state *s); 391 int get_audit_file_open(f_state *s); 392 393 void set_mode(f_state *s, off_t new_mode); 394 int get_mode(f_state *s, off_t check_mode); 395 396 int set_search_def(f_state *s,char* ft,u_int64_t max_file_size); 397 void get_search_def(f_state s); 398 399 void set_input_file(f_state *s,char* filename); 400 void get_input_file(f_state *s); 401 402 void set_chunk(f_state *s, int size); 403 404 void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype); 405 406 void set_skip(f_state *s, int size); 407 void set_block(f_state *s, int size); 408 409 410 #ifdef __DEBUG 411 void dump_state(f_state *s); 412 #endif 413 414 /* The audit file */ 415 int open_audit_file(f_state *s); 416 void audit_msg(f_state *s, char *format, ...); 417 int close_audit_file(f_state *s); 418 419 420 /* Set up our output directory */ 421 int create_output_directory(f_state *s); 422 int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf, u_int64_t t_offset); 423 int create_sub_dirs(f_state *s); 424 void cleanup_output(f_state *s); 425 426 /* Configuration Files */ 427 int load_config_file(f_state *s); 428 429 430 /* Helper functions */ 431 char *current_time(void); 432 off_t find_file_size(FILE *f); 433 char *human_readable(off_t size, char *buffer); 434 char *units(unsigned int c); 435 unsigned int chop(char *buf); 436 void print_search_specs(f_state *s); 437 int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive); 438 int charactersMatch(char a, char b, int caseSensitive); 439 void printx(unsigned char* buf,int start, int end); 440 unsigned short htos(unsigned char s[],int endian); 441 unsigned int htoi(unsigned char s[],int endian); 442 u_int64_t htoll(unsigned char s[],int endian); 443 int displayPosition(f_state* s,f_info* i,u_int64_t pos); 444 445 446 /* Interface functions 447 These functions stay the same regardless if we're using a 448 command line interface or a GUI */ 449 void fatal_error(f_state *s, char *msg); 450 void print_error(f_state *s, char *fn, char *msg); 451 void print_message(f_state *s, char *format, va_list argp); 452 void print_stats(f_state *s); 453 454 /* Engine */ 455 int process_file(f_state *s); 456 int process_stdin(f_state *s); 457 unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len, 458 size_t table[UCHAR_MAX + 1], int case_sen,int searchtype); 459 unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len, 460 size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ; 461 #endif /* __FOREMOST_H */ 462 463 /* BUILTIN */ 464 unsigned char* extract_file(f_state *s, u_int64_t c_offset,unsigned char *foundat, u_int64_t buflen, s_spec * needle, u_int64_t f_offset); 465 466 467 468 469 470