1 // Scalpel Copyright (C) 2005-11 by Golden G. Richard III and
2 // 2007-11 by Vico Marziale.
3 // Written by Golden G. Richard III and Vico Marziale.
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 // General Public License for more details.
14 
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 // 02110-1301, USA.
19 //
20 // Thanks to Kris Kendall, Jesse Kornblum, et al for their work
21 // on Foremost.  Foremost 0.69 was used as the starting point for
22 // Scalpel, in 2005.
23 
24 #ifndef SCALPEL_H
25 #define SCALPEL_H
26 #define SCALPEL_VERSION    "2.0"
27 
28 //#define GPU_THREADING
29 #define MULTICORE_THREADING
30 #define USE_FAST_STRING_SEARCH
31 
32 #define _USE_LARGEFILE              1
33 #define _USE_FILEOFFSET64           1
34 #define _USE_LARGEFILE64            1
35 #define _LARGEFILE_SOURCE           1
36 #define _LARGEFILE64_SOURCE         1
37 #define _FILE_OFFSET_BITS           64
38 
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdio.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <ctype.h>
45 #include <sys/stat.h>
46 #include <time.h>
47 #include <errno.h>
48 #include <signal.h>
49 #include <limits.h>
50 #include <dirent.h>
51 #include <stdarg.h>
52 #include <math.h>
53 #include <pthread.h>
54 #include <semaphore.h>
55 #include <sys/timeb.h>
56 #include <sys/time.h>
57 
58 
59 #include "base_name.h"
60 #include "prioque.h"
61 #include "syncqueue.h"
62 #include "common.h"
63 
64 
65 #if defined(__APPLE__) || defined(__FreeBSD__)
66 #define __UNIX
67 #include <sys/ttycom.h>
68 #include <sys/param.h>
69 #include <sys/ioctl.h>
70 #include <libgen.h>
71 #include <tre/regex.h>
72 // off_t on Mac OS X is 64 bits
73 #define off64_t  off_t
74 #endif /* ifdef __APPLE__ */
75 
76 #ifdef __linux
77 #define __UNIX
78 #include <linux/hdreg.h>
79 #include <libgen.h>
80 #include <error.h>
81 #include <tre/regex.h>
82 #endif /* ifdef __linux */
83 
84 #if defined ( _WIN32)
85 #include <windows.h>
86 #include <sys/timeb.h>
87 #include "regex.h"
88 #define gettimeofday(A, B) QueryPerformanceCounter(A)
89 #define ftello   ftello64
90 #define fseeko   fseeko64
91 #define sleep    Sleep
92 #define  snprintf         _snprintf
93 #define lstat(A,B)      stat(A,B)
94 char *basename (char *path);
95 extern char *optarg;
96 extern int optind;
97 int getopt (int argc, char *const argv[], const char *optstring);
98 
99 #ifdef __MINGW32__
100 #define realpath(A,B)    _fullpath(B,A,PATH_MAX)
101 #endif
102 
103 #ifdef __CYGWIN32__
104 #define realpath(A,B) \
105   (getenv ("CYGWIN_USE_WIN32_PATHS") \
106    ? (cygwin_conv_to_full_win32_path ((A), (B)), B) \
107    : realpath ((A), (B)))
108 #endif
109 #else // ! defined(WIN32)
110 #include <sys/mount.h>
111 #define gettimeofday_t struct timeval
112 #endif // ! defined(WIN32)
113 
114 #define SEARCHTYPE_FORWARD      0
115 #define SEARCHTYPE_REVERSE      1
116 #define SEARCHTYPE_FORWARD_NEXT 2
117 
118 // LARGEST_REGEXP_OVERLAP specifies the largest regular expression overlap
119 // across the boundaries of SIZE_OF_BUFFER-sized chunks of the disk image.
120 //  This is also used internally as the maximum "size" of a regular expression
121 // and affects the mininum disk image size that can be processed.  Large
122 // values will have negative impacts on performance.
123 #define LARGEST_REGEXP_OVERLAP    1024
124 
125 #define SCALPEL_SIZEOFBUFFER_PANIC_STRING \
126 "PANIC: SIZE_OF_BUFFER has been incorrectly configured.\n"
127 
128 #define SCALPEL_BLOCK_SIZE            512
129 #define MAX_STRING_LENGTH            4096
130 #define MAX_NEEDLES                   254
131 #define NUM_SEARCH_SPEC_ELEMENTS        6
132 #define MAX_SUFFIX_LENGTH               8
133 #define MAX_FILE_TYPES                100
134 #define MAX_MATCHES_PER_BUFFER        (SIZE_OF_BUFFER / 10)	// BUG: MUST ERROR OUT PROPERLY ON OVERFLOW (check)
135 
136 // Length of the queues used to tranfer data / results blocks to workers.
137 #define QUEUELEN 20
138 
139 #define MAX_FILES_PER_SUBDIRECTORY    1000
140 
141 
142 #define SCALPEL_OK                             0
143 #define SCALPEL_ERROR_NO_SEARCH_SPEC           1
144 #define SCALPEL_ERROR_FILE_OPEN                2
145 #define SCALPEL_ERROR_FILE_READ                3
146 #define SCALPEL_ERROR_FILE_WRITE               4
147 #define SCALPEL_ERROR_FILE_CLOSE               5
148 #define SCALPEL_ERROR_TOO_MANY_TYPES           6
149 #define SCALPEL_ERROR_FATAL_READ               7
150 #define SCALPEL_ERROR_BAD_HEADER_REGEX         8
151 #define SCALPEL_ERROR_BAD_FOOTER_REGEX         9
152 #define SCALPEL_ERROR_FILE_TOO_SMALL          10
153 #define SCALPEL_ERROR_NONEMPTY_DIRECTORY      11
154 #define SCALPEL_ERROR_PTHREAD_FAILURE         12
155 
156 #define SCALPEL_GENERAL_ABORT                999
157 
158 #define UNITS_BYTES                     0
159 #define UNITS_KILOB                     1
160 #define UNITS_MEGAB                     2
161 #define UNITS_GIGAB                     3
162 #define UNITS_TERAB                     4
163 #define UNITS_PETAB                     5
164 #define UNITS_EXAB                      6
165 
166 // GLOBALS
167 
168 // signal has been caught by signal handler
169 extern int signal_caught;
170 
171 // current wildcard character
172 extern char wildcard;
173 
174 // width of tty, for progress bar
175 extern int ttywidth;
176 
177 extern char *__scalpel__progname;
178 
179 extern int errno;
180 
181 extern double totalsearch;	// # of seconds spent in pass # 1 header/footer searches
182 extern double totalqueues;	// # of seconds spent building work queues
183 extern double totalreads;	// # of seconds spent in all passes for input file reads
184 extern double totalwrites;	// # of seconds spent in pass # 2 for writing carved files
185 
186 #define SCALPEL_NOEXTENSION_SUFFIX "NONE"
187 #define SCALPEL_NOEXTENSION '\xFF'
188 
189 #define SCALPEL_DEFAULT_WILDCARD       '?'
190 #define SCALPEL_DEFAULT_CONFIG_FILE    "scalpel.conf"
191 
192 #define SCALPEL_DEFAULT_OUTPUT_DIR     "scalpel-output"
193 
194 #define SCALPEL_BANNER_STRING \
195 "Scalpel version %s\n"\
196 "Written by Golden G. Richard III and Lodovico Marziale.\n", SCALPEL_VERSION
197 
198 #define SCALPEL_COPYRIGHT_STRING \
199 "Scalpel is (c) 2005-11 by Golden G. Richard III and Lodovico Marziale.\n"
200 
201 // During the file carving operations (which occur after an initial
202 // scan of an image file to build the header/footer database), we want
203 // to read the image file only once more, sequentially, for all
204 // carves.  The following structure tracks the filename and first/last
205 // bytes in the image file for a single file to be carved.  When the
206 // read buffer includes the first byte of a file, the file is opened
207 // and the first write occurs.  When the read buffer includes the end
208 // byte, the last write operation occurs, the file is closed, and the
209 // struct can be reused.
210 
211 // *****GGRIII: use of priority field to store these flags and the
212 // data structures which track CarveInfo structs needs to be better
213 // documented
214 
215 #define STARTCARVE      1	// carve operation for this CarveInfo struct
216 				// starts in current buffer
217 #define STOPCARVE       2	// carve operation stops in current buffer
218 #define STARTSTOPCARVE  3	// carve operation both starts and stops in
219 				// current buffer
220 #define CONTINUECARVE   4	// carve operation includes entire contents
221 				// of current buffer
222 
223 typedef struct CarveInfo {
224   char *filename;		// output filename for file to carve
225   FILE *fp;			// file descriptor for file to carve
226   unsigned long long start;	// offset of first byte in file
227   unsigned long long stop;	// offset of last byte in file
228   char chopped;			// is carved file's length constrained
229   // by max file size for type? (i.e., could
230   // the file actually be longer?
231 } CarveInfo;
232 
233 
234 // Each struct SearchSpecLine defines a particular file type,
235 // including header and footer information.  The following structure,
236 // SearchSpecOffsets, defines the absolute locations of all matching
237 // headers and footers for a particular file type.  Because the entire
238 // header/footer database is built during a single pass over an image
239 // or device file, the header and footer locations are sorted in
240 // ascending order.
241 
242 typedef struct SearchSpecOffsets {
243   unsigned long long *headers;	// positions of discovered headers
244   size_t *headerlens;		// lengths of discovered headers
245   unsigned long long headerstorage;	// space allocated for this many header offsets
246   unsigned long long numheaders;	// # stored header positions
247   unsigned long long *footers;	// positions of discovered footers
248   size_t *footerlens;		// lengths of discovered footers
249   unsigned long long footerstorage;	// space allocated for this many footer offsets
250   unsigned long long numfooters;	// # stored footer positions
251 } SearchSpecOffsets;
252 
253 // max files to open at once during carving--modify if you get
254 // a "too many files open" error message during the second carving phase.
255 #ifdef _WIN32
256 #define MAX_FILES_TO_OPEN            20
257 #else
258 #define MAX_FILES_TO_OPEN            512
259 #endif
260 
261 
262 typedef union SearchState {
263   size_t bm_table[UCHAR_MAX + 1];
264   regex_t re;
265 } SearchState;
266 
267 typedef struct SearchSpecLine {
268   char *suffix;
269   int casesensitive;
270   unsigned long long length;
271   unsigned long long minlength;
272   char *begin;          // translate()-d header
273   char *begintext;      // textual version of header for humans
274   int beginlength;
275   int beginisRE;
276   SearchState beginstate;
277   char *end;            // translate()-d footer
278   char *endtext;        // textual version of footer for humans
279   int endlength;
280   int endisRE;
281   SearchState endstate;
282   int searchtype;		// FORWARD, NEXT, REVERSE search type for footer
283   struct SearchSpecOffsets offsets;
284   unsigned long long numfilestocarve;	// # files to carve of this type
285   unsigned long organizeDirNum;	// subdirectory # for organization
286   // of files of this type
287 } SearchSpecLine;
288 
289 
290 typedef struct scalpelState {
291   char *imagefile;
292   FILE *infile;
293   char *conffile;
294   char *outputdirectory;
295   int specLines;
296   struct SearchSpecLine *SearchSpec;
297   unsigned long long fileswritten;
298   int modeVerbose;
299   int modeNoSuffix;
300   FILE *auditFile;
301   char *invocation;
302   unsigned long long skip;
303   char *coveragefile;
304   unsigned int coverageblocksize;
305   FILE *coverageblockmap;
306   unsigned char *coveragebitmap;
307   unsigned long long coveragenumblocks;
308   int useInputFileList;
309   char *inputFileList;
310   int carveWithMissingFooters;
311   int noSearchOverlap;
312   int handleEmbedded;
313   int generateHeaderFooterDatabase;
314   int updateCoverageBlockmap;
315   int useCoverageBlockmap;
316   int organizeSubdirectories;
317   unsigned long long organizeMaxFilesPerSub;
318   int blockAlignedOnly;
319   unsigned int alignedblocksize;
320   int previewMode;
321 } scalpelState;
322 
323 
324 // one extent for a fragmented file.  'start' and 'stop'
325 // are real disk image addresses that define the fragment's
326 // location.
327 typedef struct Fragment {
328   unsigned long long start;
329   unsigned long long stop;
330 } Fragment;
331 
332 
333 // prototypes for visible dig.c functions
334 int init_threading_model (struct scalpelState *state);
335 int digImageFile (struct scalpelState *state);
336 int carveImageFile (struct scalpelState *state);
337 void init_store ();  // return int for error??
338 
339 // prototypes for visible helpers.c functions
340 
341 // LMIII fix me
342 #ifndef _WIN32
343 double elapsed (struct timeval A, struct timeval B);
344 //double elapsed(gettimeofday_t a, gettimeofday_t b);
345 #endif
346 int isRegularExpression (char *s);
347 void checkMemoryAllocation (struct scalpelState *state, void *ptr, int line,
348 			    const char *file, const char *structure);
349 int skipInFile (struct scalpelState *state, FILE * infile);
350 void scalpelLog (struct scalpelState *state, const char *format, ...);
351 void handleError (struct scalpelState *s, int error);
352 int memwildcardcmp (const void *s1, const void *s2,
353 		    size_t n, int caseSensitive);
354 void setProgramName (char *s);
355 void init_bm_table (char *needle, size_t table[UCHAR_MAX + 1],
356 		    size_t len, int casesensitive);
357 int findLongestNeedle (struct SearchSpecLine *SearchSpec);
358 regmatch_t *re_needleinhaystack (regex_t * needle,
359 				 char *haystack, size_t haystack_len);
360 char *bm_needleinhaystack (char *needle, size_t needle_len,
361 			   char *haystack, size_t haystack_len,
362 			   size_t table[UCHAR_MAX + 1], int casesensitive);
363 int translate (char *str);
364 char *skipWhiteSpace (char *str);
365 void setttywidth ();
366 
367 // prototypes for visible files.c functions
368 long long measureOpenFile (FILE * f, struct scalpelState *state);
369 int openAuditFile (struct scalpelState *state);
370 int closeAuditFile (FILE * f);
371 
372 //// prototypes for visible dig.cu functions
373 int gpuSearchBuffer (char *readbuffer, int size_of_buffer, char *gpuresults,
374 		     int longestneedle, char wildcard);
375 void copytodevicepattern (char hostpatterntable[MAX_PATTERNS][MAX_PATTERN_LENGTH]);
376 void copytodevicelookup_headers(char hostlookuptable[LOOKUP_ROWS][LOOKUP_COLUMNS]);
377 void copytodevicelookup_footers(char hostlookuptable[LOOKUP_ROWS][LOOKUP_COLUMNS]);
378 void ourCudaMallocHost (void **ptr, int len);
379 int gpu_init (int longestneedle);
380 int gpu_cleanup();
381 
382 // WIN32 string.h wierdness
383 #ifdef _WIN32
384 extern const char *strsignal (int sig);
385 #else
386 extern char *strsignal (int sig);
387 #endif /*  ifdef _WIN32 */
388 
389 #endif /* ifndef SCALPEL_H */
390