1 // Scalpel Copyright (C) 2005-11 by Golden G. Richard III and 2 // 2007-11 by Vico Marziale. 3 // Written by Golden G. Richard III and Vico Marziale. 4 // 5 // This program is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU General Public License as 7 // published by the Free Software Foundation; either version 2 of the 8 // License, or (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, but 11 // WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // General Public License for more details. 14 15 // You should have received a copy of the GNU General Public License 16 // along with this program; if not, write to the Free Software 17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18 // 02110-1301, USA. 19 // 20 // Thanks to Kris Kendall, Jesse Kornblum, et al for their work 21 // on Foremost. Foremost 0.69 was used as the starting point for 22 // Scalpel, in 2005. 23 24 #ifndef SCALPEL_H 25 #define SCALPEL_H 26 #define SCALPEL_VERSION "2.0" 27 28 //#define GPU_THREADING 29 #define MULTICORE_THREADING 30 #define USE_FAST_STRING_SEARCH 31 32 #define _USE_LARGEFILE 1 33 #define _USE_FILEOFFSET64 1 34 #define _USE_LARGEFILE64 1 35 #define _LARGEFILE_SOURCE 1 36 #define _LARGEFILE64_SOURCE 1 37 #define _FILE_OFFSET_BITS 64 38 39 #include <stdlib.h> 40 #include <string.h> 41 #include <stdio.h> 42 #include <fcntl.h> 43 #include <unistd.h> 44 #include <ctype.h> 45 #include <sys/stat.h> 46 #include <time.h> 47 #include <errno.h> 48 #include <signal.h> 49 #include <limits.h> 50 #include <dirent.h> 51 #include <stdarg.h> 52 #include <math.h> 53 #include <pthread.h> 54 #include <semaphore.h> 55 #include <sys/timeb.h> 56 #include <sys/time.h> 57 58 59 #include "base_name.h" 60 #include "prioque.h" 61 #include "syncqueue.h" 62 #include "common.h" 63 64 65 #if defined(__APPLE__) || defined(__FreeBSD__) 66 #define __UNIX 67 #include <sys/ttycom.h> 68 #include <sys/param.h> 69 #include <sys/ioctl.h> 70 #include <libgen.h> 71 #include <tre/regex.h> 72 // off_t on Mac OS X is 64 bits 73 #define off64_t off_t 74 #endif /* ifdef __APPLE__ */ 75 76 #ifdef __linux 77 #define __UNIX 78 #include <linux/hdreg.h> 79 #include <libgen.h> 80 #include <error.h> 81 #include <tre/regex.h> 82 #endif /* ifdef __linux */ 83 84 #if defined ( _WIN32) 85 #include <windows.h> 86 #include <sys/timeb.h> 87 #include "regex.h" 88 #define gettimeofday(A, B) QueryPerformanceCounter(A) 89 #define ftello ftello64 90 #define fseeko fseeko64 91 #define sleep Sleep 92 #define snprintf _snprintf 93 #define lstat(A,B) stat(A,B) 94 char *basename (char *path); 95 extern char *optarg; 96 extern int optind; 97 int getopt (int argc, char *const argv[], const char *optstring); 98 99 #ifdef __MINGW32__ 100 #define realpath(A,B) _fullpath(B,A,PATH_MAX) 101 #endif 102 103 #ifdef __CYGWIN32__ 104 #define realpath(A,B) \ 105 (getenv ("CYGWIN_USE_WIN32_PATHS") \ 106 ? (cygwin_conv_to_full_win32_path ((A), (B)), B) \ 107 : realpath ((A), (B))) 108 #endif 109 #else // ! defined(WIN32) 110 #include <sys/mount.h> 111 #define gettimeofday_t struct timeval 112 #endif // ! defined(WIN32) 113 114 #define SEARCHTYPE_FORWARD 0 115 #define SEARCHTYPE_REVERSE 1 116 #define SEARCHTYPE_FORWARD_NEXT 2 117 118 // LARGEST_REGEXP_OVERLAP specifies the largest regular expression overlap 119 // across the boundaries of SIZE_OF_BUFFER-sized chunks of the disk image. 120 // This is also used internally as the maximum "size" of a regular expression 121 // and affects the mininum disk image size that can be processed. Large 122 // values will have negative impacts on performance. 123 #define LARGEST_REGEXP_OVERLAP 1024 124 125 #define SCALPEL_SIZEOFBUFFER_PANIC_STRING \ 126 "PANIC: SIZE_OF_BUFFER has been incorrectly configured.\n" 127 128 #define SCALPEL_BLOCK_SIZE 512 129 #define MAX_STRING_LENGTH 4096 130 #define MAX_NEEDLES 254 131 #define NUM_SEARCH_SPEC_ELEMENTS 6 132 #define MAX_SUFFIX_LENGTH 8 133 #define MAX_FILE_TYPES 100 134 #define MAX_MATCHES_PER_BUFFER (SIZE_OF_BUFFER / 10) // BUG: MUST ERROR OUT PROPERLY ON OVERFLOW (check) 135 136 // Length of the queues used to tranfer data / results blocks to workers. 137 #define QUEUELEN 20 138 139 #define MAX_FILES_PER_SUBDIRECTORY 1000 140 141 142 #define SCALPEL_OK 0 143 #define SCALPEL_ERROR_NO_SEARCH_SPEC 1 144 #define SCALPEL_ERROR_FILE_OPEN 2 145 #define SCALPEL_ERROR_FILE_READ 3 146 #define SCALPEL_ERROR_FILE_WRITE 4 147 #define SCALPEL_ERROR_FILE_CLOSE 5 148 #define SCALPEL_ERROR_TOO_MANY_TYPES 6 149 #define SCALPEL_ERROR_FATAL_READ 7 150 #define SCALPEL_ERROR_BAD_HEADER_REGEX 8 151 #define SCALPEL_ERROR_BAD_FOOTER_REGEX 9 152 #define SCALPEL_ERROR_FILE_TOO_SMALL 10 153 #define SCALPEL_ERROR_NONEMPTY_DIRECTORY 11 154 #define SCALPEL_ERROR_PTHREAD_FAILURE 12 155 156 #define SCALPEL_GENERAL_ABORT 999 157 158 #define UNITS_BYTES 0 159 #define UNITS_KILOB 1 160 #define UNITS_MEGAB 2 161 #define UNITS_GIGAB 3 162 #define UNITS_TERAB 4 163 #define UNITS_PETAB 5 164 #define UNITS_EXAB 6 165 166 // GLOBALS 167 168 // signal has been caught by signal handler 169 extern int signal_caught; 170 171 // current wildcard character 172 extern char wildcard; 173 174 // width of tty, for progress bar 175 extern int ttywidth; 176 177 extern char *__scalpel__progname; 178 179 extern int errno; 180 181 extern double totalsearch; // # of seconds spent in pass # 1 header/footer searches 182 extern double totalqueues; // # of seconds spent building work queues 183 extern double totalreads; // # of seconds spent in all passes for input file reads 184 extern double totalwrites; // # of seconds spent in pass # 2 for writing carved files 185 186 #define SCALPEL_NOEXTENSION_SUFFIX "NONE" 187 #define SCALPEL_NOEXTENSION '\xFF' 188 189 #define SCALPEL_DEFAULT_WILDCARD '?' 190 #define SCALPEL_DEFAULT_CONFIG_FILE "scalpel.conf" 191 192 #define SCALPEL_DEFAULT_OUTPUT_DIR "scalpel-output" 193 194 #define SCALPEL_BANNER_STRING \ 195 "Scalpel version %s\n"\ 196 "Written by Golden G. Richard III and Lodovico Marziale.\n", SCALPEL_VERSION 197 198 #define SCALPEL_COPYRIGHT_STRING \ 199 "Scalpel is (c) 2005-11 by Golden G. Richard III and Lodovico Marziale.\n" 200 201 // During the file carving operations (which occur after an initial 202 // scan of an image file to build the header/footer database), we want 203 // to read the image file only once more, sequentially, for all 204 // carves. The following structure tracks the filename and first/last 205 // bytes in the image file for a single file to be carved. When the 206 // read buffer includes the first byte of a file, the file is opened 207 // and the first write occurs. When the read buffer includes the end 208 // byte, the last write operation occurs, the file is closed, and the 209 // struct can be reused. 210 211 // *****GGRIII: use of priority field to store these flags and the 212 // data structures which track CarveInfo structs needs to be better 213 // documented 214 215 #define STARTCARVE 1 // carve operation for this CarveInfo struct 216 // starts in current buffer 217 #define STOPCARVE 2 // carve operation stops in current buffer 218 #define STARTSTOPCARVE 3 // carve operation both starts and stops in 219 // current buffer 220 #define CONTINUECARVE 4 // carve operation includes entire contents 221 // of current buffer 222 223 typedef struct CarveInfo { 224 char *filename; // output filename for file to carve 225 FILE *fp; // file descriptor for file to carve 226 unsigned long long start; // offset of first byte in file 227 unsigned long long stop; // offset of last byte in file 228 char chopped; // is carved file's length constrained 229 // by max file size for type? (i.e., could 230 // the file actually be longer? 231 } CarveInfo; 232 233 234 // Each struct SearchSpecLine defines a particular file type, 235 // including header and footer information. The following structure, 236 // SearchSpecOffsets, defines the absolute locations of all matching 237 // headers and footers for a particular file type. Because the entire 238 // header/footer database is built during a single pass over an image 239 // or device file, the header and footer locations are sorted in 240 // ascending order. 241 242 typedef struct SearchSpecOffsets { 243 unsigned long long *headers; // positions of discovered headers 244 size_t *headerlens; // lengths of discovered headers 245 unsigned long long headerstorage; // space allocated for this many header offsets 246 unsigned long long numheaders; // # stored header positions 247 unsigned long long *footers; // positions of discovered footers 248 size_t *footerlens; // lengths of discovered footers 249 unsigned long long footerstorage; // space allocated for this many footer offsets 250 unsigned long long numfooters; // # stored footer positions 251 } SearchSpecOffsets; 252 253 // max files to open at once during carving--modify if you get 254 // a "too many files open" error message during the second carving phase. 255 #ifdef _WIN32 256 #define MAX_FILES_TO_OPEN 20 257 #else 258 #define MAX_FILES_TO_OPEN 512 259 #endif 260 261 262 typedef union SearchState { 263 size_t bm_table[UCHAR_MAX + 1]; 264 regex_t re; 265 } SearchState; 266 267 typedef struct SearchSpecLine { 268 char *suffix; 269 int casesensitive; 270 unsigned long long length; 271 unsigned long long minlength; 272 char *begin; // translate()-d header 273 char *begintext; // textual version of header for humans 274 int beginlength; 275 int beginisRE; 276 SearchState beginstate; 277 char *end; // translate()-d footer 278 char *endtext; // textual version of footer for humans 279 int endlength; 280 int endisRE; 281 SearchState endstate; 282 int searchtype; // FORWARD, NEXT, REVERSE search type for footer 283 struct SearchSpecOffsets offsets; 284 unsigned long long numfilestocarve; // # files to carve of this type 285 unsigned long organizeDirNum; // subdirectory # for organization 286 // of files of this type 287 } SearchSpecLine; 288 289 290 typedef struct scalpelState { 291 char *imagefile; 292 FILE *infile; 293 char *conffile; 294 char *outputdirectory; 295 int specLines; 296 struct SearchSpecLine *SearchSpec; 297 unsigned long long fileswritten; 298 int modeVerbose; 299 int modeNoSuffix; 300 FILE *auditFile; 301 char *invocation; 302 unsigned long long skip; 303 char *coveragefile; 304 unsigned int coverageblocksize; 305 FILE *coverageblockmap; 306 unsigned char *coveragebitmap; 307 unsigned long long coveragenumblocks; 308 int useInputFileList; 309 char *inputFileList; 310 int carveWithMissingFooters; 311 int noSearchOverlap; 312 int handleEmbedded; 313 int generateHeaderFooterDatabase; 314 int updateCoverageBlockmap; 315 int useCoverageBlockmap; 316 int organizeSubdirectories; 317 unsigned long long organizeMaxFilesPerSub; 318 int blockAlignedOnly; 319 unsigned int alignedblocksize; 320 int previewMode; 321 } scalpelState; 322 323 324 // one extent for a fragmented file. 'start' and 'stop' 325 // are real disk image addresses that define the fragment's 326 // location. 327 typedef struct Fragment { 328 unsigned long long start; 329 unsigned long long stop; 330 } Fragment; 331 332 333 // prototypes for visible dig.c functions 334 int init_threading_model (struct scalpelState *state); 335 int digImageFile (struct scalpelState *state); 336 int carveImageFile (struct scalpelState *state); 337 void init_store (); // return int for error?? 338 339 // prototypes for visible helpers.c functions 340 341 // LMIII fix me 342 #ifndef _WIN32 343 double elapsed (struct timeval A, struct timeval B); 344 //double elapsed(gettimeofday_t a, gettimeofday_t b); 345 #endif 346 int isRegularExpression (char *s); 347 void checkMemoryAllocation (struct scalpelState *state, void *ptr, int line, 348 const char *file, const char *structure); 349 int skipInFile (struct scalpelState *state, FILE * infile); 350 void scalpelLog (struct scalpelState *state, const char *format, ...); 351 void handleError (struct scalpelState *s, int error); 352 int memwildcardcmp (const void *s1, const void *s2, 353 size_t n, int caseSensitive); 354 void setProgramName (char *s); 355 void init_bm_table (char *needle, size_t table[UCHAR_MAX + 1], 356 size_t len, int casesensitive); 357 int findLongestNeedle (struct SearchSpecLine *SearchSpec); 358 regmatch_t *re_needleinhaystack (regex_t * needle, 359 char *haystack, size_t haystack_len); 360 char *bm_needleinhaystack (char *needle, size_t needle_len, 361 char *haystack, size_t haystack_len, 362 size_t table[UCHAR_MAX + 1], int casesensitive); 363 int translate (char *str); 364 char *skipWhiteSpace (char *str); 365 void setttywidth (); 366 367 // prototypes for visible files.c functions 368 long long measureOpenFile (FILE * f, struct scalpelState *state); 369 int openAuditFile (struct scalpelState *state); 370 int closeAuditFile (FILE * f); 371 372 //// prototypes for visible dig.cu functions 373 int gpuSearchBuffer (char *readbuffer, int size_of_buffer, char *gpuresults, 374 int longestneedle, char wildcard); 375 void copytodevicepattern (char hostpatterntable[MAX_PATTERNS][MAX_PATTERN_LENGTH]); 376 void copytodevicelookup_headers(char hostlookuptable[LOOKUP_ROWS][LOOKUP_COLUMNS]); 377 void copytodevicelookup_footers(char hostlookuptable[LOOKUP_ROWS][LOOKUP_COLUMNS]); 378 void ourCudaMallocHost (void **ptr, int len); 379 int gpu_init (int longestneedle); 380 int gpu_cleanup(); 381 382 // WIN32 string.h wierdness 383 #ifdef _WIN32 384 extern const char *strsignal (int sig); 385 #else 386 extern char *strsignal (int sig); 387 #endif /* ifdef _WIN32 */ 388 389 #endif /* ifndef SCALPEL_H */ 390