1 // Scalpel Copyright (C) 2005-11 by Golden G. Richard III and
2 // 2007-11 by Vico Marziale.
3 // Written by Golden G. Richard III and Vico Marziale.
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 // 02110-1301, USA.
19 //
20 // Thanks to Kris Kendall, Jesse Kornblum, et al for their work
21 // on Foremost. Foremost 0.69 was used as the starting point for
22 // Scalpel, in 2005.
23
24
25 #include "scalpel.h"
26
27
28 /////////// GLOBALS ////////////
29
30 static char *readbuffer; // Read buffer--process image files in
31 // SIZE_OF_BUFFER-size chunks.
32
33 // Info needed for each of above SIZE_OF_BUFFER chunks.
34 typedef struct readbuf_info {
35 long long bytesread; // number of bytes in this buf
36 long long beginreadpos; // position in the image
37 char *readbuf; // pointer SIZE_OF_BUFFER array
38 } readbuf_info;
39
40
41 // queues to facilitiate async reads, concurrent cpu, gpu work
42 syncqueue_t *full_readbuf; // que of full buffers read from image
43 syncqueue_t *empty_readbuf; // que of empty buffers
44
45 sig_atomic_t reads_finished;
46 //pthread_cond_t reader_done = PTHREAD_COND_INITIALIZER;
47
48 #ifdef GPU_THREADING
49 // GPU only threading globals
50 syncqueue_t *results_readbuf; // que of encoded results from gpu
51 //pthread_cond_t gpu_handler_done = PTHREAD_COND_INITIALIZER;
52 sig_atomic_t gpu_finished = FALSE; // for thread synchronization
53 char localpattern[MAX_PATTERNS][MAX_PATTERN_LENGTH]; // search patterns
54 char locallookup_headers[LOOKUP_ROWS][LOOKUP_COLUMNS]; // header lookup table
55 char locallookup_footers[LOOKUP_ROWS][LOOKUP_COLUMNS]; // footer lookup table
56
57 #endif
58
59
60
61 #ifdef MULTICORE_THREADING
62 // Multi-core only threading globals
63
64 // Parameters to threads under the MULTICORE_THREADING model, to allow
65 // efficient header/footer searches.
66 typedef struct ThreadFindAllParams {
67 int id;
68 char *str;
69 size_t length;
70 char *startpos;
71 long offset;
72 char **foundat;
73 size_t *foundatlens;
74 int strisRE;
75 union {
76 size_t *table;
77 regex_t *regex;
78 };
79 int casesensitive;
80 int nosearchoverlap;
81 struct scalpelState *state;
82 } ThreadFindAllParams;
83
84 // TODO: These structures could be released after the dig phase; they aren't needed in the carving phase since it's not
85 // threaded. Look into this in the future.
86
87 static pthread_t *searchthreads; // thread pool for header/footer searches
88 static ThreadFindAllParams *threadargs; // argument structures for threads
89 static char ***foundat; // locations of headers/footers discovered by threads
90 static size_t **foundatlens; // lengths of discovered headers/footers
91 //static sem_t *workavailable; // semaphores that block threads until
92 // header/footer searches are required
93 static pthread_mutex_t *workavailable;
94 //static sem_t *workcomplete; // semaphores that allow main thread to wait
95 // for all search threads to complete current job
96 static pthread_mutex_t *workcomplete;
97
98 #endif
99
100 // prototypes for private dig.c functions
101 static unsigned long long adjustForEmbedding(struct SearchSpecLine
102 *currentneedle,
103 unsigned long long headerindex,
104 unsigned long long *prevstopindex);
105 static int writeHeaderFooterDatabase(struct scalpelState *state);
106 static int setupCoverageMaps(struct scalpelState *state,
107 unsigned long long filesize);
108 static int auditUpdateCoverageBlockmap(struct scalpelState *state,
109 struct CarveInfo *carve);
110 static int updateCoverageBlockmap(struct scalpelState *state,
111 unsigned long long block);
112 static void generateFragments(struct scalpelState *state, Queue * fragments,
113 struct CarveInfo *carve);
114 static unsigned long long positionUseCoverageBlockmap(struct scalpelState
115 *state,
116 unsigned long long
117 position);
118 static void destroyCoverageMaps(struct scalpelState *state);
119 static int fseeko_use_coverage_map(struct scalpelState *state, FILE * fp,
120 off64_t offset);
121 static off64_t ftello_use_coverage_map(struct scalpelState *state, FILE * fp);
122 static size_t fread_use_coverage_map(struct scalpelState *state, void *ptr,
123 size_t size, size_t nmemb, FILE * stream);
124 static void printhex(char *s, int len);
125 static void clean_up(struct scalpelState *state, int signum);
126 static int displayPosition(int *units,
127 unsigned long long pos,
128 unsigned long long size, char *fn);
129 static int setupAuditFile(struct scalpelState *state);
130 static int digBuffer(struct scalpelState *state,
131 unsigned long long lengthofbuf,
132 unsigned long long offset);
133 #ifdef MULTICORE_THREADING
134 static void *threadedFindAll(void *args);
135 #endif
136
137
138 // force header/footer matching to deal with embedded headers/footers
139 static unsigned long long
adjustForEmbedding(struct SearchSpecLine * currentneedle,unsigned long long headerindex,unsigned long long * prevstopindex)140 adjustForEmbedding(struct SearchSpecLine *currentneedle,
141 unsigned long long headerindex,
142 unsigned long long *prevstopindex) {
143
144 unsigned long long h = headerindex + 1;
145 unsigned long long f = 0;
146 int header = 0;
147 int footer = 0;
148 long long headerstack = 0;
149 unsigned long long start = currentneedle->offsets.headers[headerindex] + 1;
150 unsigned long long candidatefooter;
151 int morecandidates = 1;
152 int moretests = 1;
153
154 // skip footers which precede header
155 while (*prevstopindex < currentneedle->offsets.numfooters &&
156 currentneedle->offsets.footers[*prevstopindex] < start) {
157 (*prevstopindex)++;
158 }
159
160 f = *prevstopindex;
161 candidatefooter = *prevstopindex;
162
163 // skip footers until header/footer count balances
164 while (candidatefooter < currentneedle->offsets.numfooters && morecandidates) {
165 // see if this footer is viable
166 morecandidates = 0; // assumption is yes, viable
167 moretests = 1;
168 while (moretests && start < currentneedle->offsets.footers[candidatefooter]) {
169 header = 0;
170 footer = 0;
171 if(h < currentneedle->offsets.numheaders
172 && currentneedle->offsets.headers[h] >= start
173 && currentneedle->offsets.headers[h] <
174 currentneedle->offsets.footers[candidatefooter]) {
175 header = 1;
176 }
177 if(f < currentneedle->offsets.numfooters
178 && currentneedle->offsets.footers[f] >= start
179 && currentneedle->offsets.footers[f] <
180 currentneedle->offsets.footers[candidatefooter]) {
181 footer = 1;
182 }
183
184 if(header && (!footer ||
185 currentneedle->offsets.headers[h] <
186 currentneedle->offsets.footers[f])) {
187 h++;
188 headerstack++;
189 start = (h < currentneedle->offsets.numheaders)
190 ? currentneedle->offsets.headers[h] : start + 1;
191 }
192 else if(footer) {
193 f++;
194 headerstack--;
195 if(headerstack < 0) {
196 headerstack = 0;
197 }
198 start = (f < currentneedle->offsets.numfooters)
199 ? currentneedle->offsets.footers[f] : start + 1;
200 }
201 else {
202 moretests = 0;
203 }
204 }
205
206 if(headerstack > 0) {
207 // header/footer imbalance, need to try next footer
208 candidatefooter++;
209 // this footer counts against footer deficit!
210 headerstack--;
211 morecandidates = 1;
212 }
213 }
214
215 return (headerstack > 0) ? 999999999999999999ULL : candidatefooter;
216 }
217
218
219 // output hex notation for chars in 's'
printhex(char * s,int len)220 static void printhex(char *s, int len) {
221
222 int i;
223 for(i = 0; i < len; i++) {
224 printf("\\x%.2x", (unsigned char)s[i]);
225 }
226 }
227
228
clean_up(struct scalpelState * state,int signum)229 static void clean_up(struct scalpelState *state, int signum) {
230
231 scalpelLog(state, "Cleaning up...\n");
232 scalpelLog(state,
233 "\nCaught signal: %s. Program is terminating early\n",
234 (char *)strsignal(signum));
235 closeAuditFile(state->auditFile);
236 exit(1);
237 }
238
239
240
241 // display progress bar
242 static int
displayPosition(int * units,unsigned long long pos,unsigned long long size,char * fn)243 displayPosition(int *units,
244 unsigned long long pos, unsigned long long size, char *fn) {
245
246 double percentDone = (((double)pos) / (double)(size) * 100);
247 double position = (double)pos;
248 int count;
249 int barlength, i, len;
250 double elapsed;
251 long remaining;
252
253 char buf[MAX_STRING_LENGTH];
254 char line[MAX_STRING_LENGTH];
255
256 #ifdef _WIN32
257 static LARGE_INTEGER start;
258 LARGE_INTEGER now;
259 static LARGE_INTEGER freq;
260 QueryPerformanceFrequency(&freq);
261 #else
262 static struct timeval start;
263 struct timeval now, td;
264 #endif
265
266 // get current time and remember start time when first chunk of
267 // an image file is read
268
269 if(pos <= SIZE_OF_BUFFER) {
270 gettimeofday(&start, (struct timezone *)0);
271 }
272 gettimeofday(&now, (struct timezone *)0);
273
274 // First, reduce the position to the right units
275 for(count = 0; count < *units; count++) {
276 position = position / 1024;
277 }
278
279 // Now check if we've hit the next type of units
280 while (position > 1023) {
281 position = position / 1024;
282 (*units)++;
283 }
284
285 switch (*units) {
286
287 case UNITS_BYTES:
288 sprintf(buf, "bytes");
289 break;
290 case UNITS_KILOB:
291 sprintf(buf, "KB");
292 break;
293 case UNITS_MEGAB:
294 sprintf(buf, "MB");
295 break;
296 case UNITS_GIGAB:
297 sprintf(buf, "GB");
298 break;
299 case UNITS_TERAB:
300 sprintf(buf, "TB");
301 break;
302 case UNITS_PETAB:
303 sprintf(buf, "PB");
304 break;
305 case UNITS_EXAB:
306 sprintf(buf, "EB");
307 break;
308
309 default:
310 fprintf(stdout, "Unable to compute progress.\n");
311 return SCALPEL_OK;
312 }
313
314 len = 0;
315 len +=
316 snprintf(line + len, sizeof(line) - len, "\r%s: %5.1f%% ", fn, percentDone);
317 barlength = ttywidth - strlen(fn) - strlen(buf) - 32;
318 if(barlength > 0) {
319 i = barlength * (int)percentDone / 100;
320 len += snprintf(line + len, sizeof(line) - len,
321 "|%.*s%*s|", i,
322 "****************************************************************************************************************************************************************",
323 barlength - i, "");
324 }
325
326 len += snprintf(line + len, sizeof(line) - len, " %6.1f %s", position, buf);
327
328 #ifdef _WIN32
329 elapsed =
330 ((double)now.QuadPart - (double)start.QuadPart) / ((double)freq.QuadPart);
331 //printf("elapsed: %f\n",elapsed);
332 #else
333 timersub(&now, &start, &td);
334 elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
335 #endif
336 remaining = (long)((100 - percentDone) / percentDone * elapsed);
337 //printf("Ratio remaining: %f\n",(100-percentDone)/percentDone);
338 //printf("Elapsed time: %f\n",elapsed);
339 if(remaining >= 100 * (60 * 60)) { //60*60 is seconds per hour
340 len += snprintf(line + len, sizeof(line) - len, " --:--ETA");
341 }
342 else {
343 i = remaining / (60 * 60);
344 if(i)
345 len += snprintf(line + len, sizeof(line) - len, " %2d:", i);
346 else
347 len += snprintf(line + len, sizeof(line) - len, " ");
348 i = remaining % (60 * 60);
349 len +=
350 snprintf(line + len, sizeof(line) - len, "%02d:%02d ETA", i / 60, i % 60);
351 }
352
353 fprintf(stdout, "%s", line);
354 fflush(stdout);
355
356 return SCALPEL_OK;
357 }
358
359
360
361 // create initial entries in audit for each image file processed
setupAuditFile(struct scalpelState * state)362 static int setupAuditFile(struct scalpelState *state) {
363
364 char imageFile[MAX_STRING_LENGTH];
365
366 if(realpath(state->imagefile, imageFile)) {
367 scalpelLog(state, "\nOpening target \"%s\"\n\n", imageFile);
368 }
369 else {
370 //handleError(state, SCALPEL_ERROR_FILE_OPEN);
371 return SCALPEL_ERROR_FILE_OPEN;
372 }
373
374 #ifdef _WIN32
375 if(state->skip) {
376 fprintf(state->auditFile, "Skipped the first %I64u bytes of %s...\n",
377 state->skip, state->imagefile);
378 if(state->modeVerbose) {
379 fprintf(stdout, "Skipped the first %I64u bytes of %s...\n",
380 state->skip, state->imagefile);
381 }
382 }
383 #else
384 if(state->skip) {
385 fprintf(state->auditFile, "Skipped the first %llu bytes of %s...\n",
386 state->skip, state->imagefile);
387 if(state->modeVerbose) {
388 fprintf(stdout, "Skipped the first %llu bytes of %s...\n",
389 state->skip, state->imagefile);
390 }
391 }
392 #endif
393
394 fprintf(state->auditFile, "The following files were carved:\n");
395 fprintf(state->auditFile,
396 "File\t\t Start\t\t\tChop\t\tLength\t\tExtracted From\n");
397
398 return SCALPEL_OK;
399 }
400
401
402
403 static int
digBuffer(struct scalpelState * state,unsigned long long lengthofbuf,unsigned long long offset)404 digBuffer(struct scalpelState *state, unsigned long long lengthofbuf,
405 unsigned long long offset) {
406
407 unsigned long long startLocation = 0;
408 int needlenum, i = 0;
409 struct SearchSpecLine *currentneedle = 0;
410 // gettimeofday_t srchnow, srchthen;
411
412 // for each file type, find all headers and some (or all) footers
413
414 // signal check
415 if(signal_caught == SIGTERM || signal_caught == SIGINT) {
416 clean_up(state, signal_caught);
417 }
418
419
420 ///////////////////////////////////////////////////
421 ///////////////////////////////////////////////////
422 ////////////////GPU-based SEARCH///////////////////
423 ///////////////////////////////////////////////////
424 ///////////////////////////////////////////////////
425 #ifdef GPU_THREADING // code for searches on GPUs
426
427
428 // BUG (GGRIII): GPU MODEL DOES NOT SUPPORT REGULAR EXPRESSIONS
429 // (LMIII): NOR WILL IT EVER.
430 // GGRIII: let's not be hasty. :)
431
432 // In GPU mode, the needle search has already been done. readbuffer now holds
433 // the encoded results, which must be decoded into scalpel structures.
434
435 // Each needle found is encoded in 2 bytes, one for the <type and one for position in buffer?>
436
437 int longestneedle = findLongestNeedle(state->SearchSpec);
438
439 // Compute the size of the results buffer.
440 int result_chunks = (lengthofbuf / (THREADS_PER_BLOCK - longestneedle)) + 1;
441 int result_length = result_chunks * RESULTS_SIZE_PER_BLOCK;
442
443 // Decode the results from the gpu.
444 int d = 0;
445 for(d = 0; d < result_length; d += 2) {
446 i = (d / RESULTS_SIZE_PER_BLOCK) * (THREADS_PER_BLOCK - longestneedle) +
447 (unsigned char)(readbuffer[d + 1]);
448 if(readbuffer[d] > 0) {
449
450 needlenum = readbuffer[d] - 1;
451 currentneedle = &(state->SearchSpec[needlenum]);
452 startLocation = offset + i;
453
454 // found a header--record location in header offsets database
455 if(state->modeVerbose) {
456 #ifdef _WIN32
457 fprintf(stdout, "A %s header was found at : %I64u\n",
458 currentneedle->suffix,
459 positionUseCoverageBlockmap(state, startLocation));
460 #else
461 fprintf(stdout, "A %s header was found at : %llu\n",
462 currentneedle->suffix,
463 positionUseCoverageBlockmap(state, startLocation));
464 #endif
465 }
466
467 currentneedle->offsets.numheaders++;
468 if(currentneedle->offsets.headerstorage <=
469 currentneedle->offsets.numheaders) {
470 // need more memory for header offset storage--add an
471 // additional 100 elements
472 currentneedle->offsets.headers = (unsigned long long *)
473 realloc(currentneedle->offsets.headers,
474 sizeof(unsigned long long) *
475 (currentneedle->offsets.numheaders + 100));
476
477
478 checkMemoryAllocation(state, currentneedle->offsets.headers,
479 __LINE__, __FILE__, "header array");
480 // inserted
481 currentneedle->offsets.headerlens = (size_t *)
482 realloc(currentneedle->offsets.headerlens, sizeof(size_t) *
483 (currentneedle->offsets.numheaders + 100));
484 checkMemoryAllocation(state, currentneedle->offsets.headerlens,
485 __LINE__, __FILE__, "header array");
486 currentneedle->offsets.headerstorage =
487 currentneedle->offsets.numheaders + 100;
488
489 if(state->modeVerbose) {
490 #ifdef _WIN32
491 fprintf(stdout,
492 "Memory reallocation performed, total header storage = %I64u\n",
493 currentneedle->offsets.headerstorage);
494 #else
495 fprintf(stdout,
496 "-->>Memory reallocation performed, total header storage = %llu\n",
497 currentneedle->offsets.headerstorage);
498 #endif
499 }
500 }
501 currentneedle->offsets.headers[currentneedle->offsets.numheaders -
502 1] = startLocation;
503
504 }
505 else if(readbuffer[d] < 0) { // footer
506
507 needlenum = -1 * readbuffer[d] - 1;
508 currentneedle = &(state->SearchSpec[needlenum]);
509 startLocation = offset + i;
510 // found a footer--record location in footer offsets database
511 if(state->modeVerbose) {
512 #ifdef _WIN32
513 fprintf(stdout, "A %s footer was found at : %I64u\n",
514 currentneedle->suffix,
515 positionUseCoverageBlockmap(state, startLocation));
516 #else
517 fprintf(stdout, "A %s footer was found at : %llu\n",
518 currentneedle->suffix,
519 positionUseCoverageBlockmap(state, startLocation));
520 #endif
521 }
522
523 currentneedle->offsets.numfooters++;
524 if(currentneedle->offsets.footerstorage <=
525 currentneedle->offsets.numfooters) {
526 // need more memory for footer offset storage--add an
527 // additional 100 elements
528 currentneedle->offsets.footers = (unsigned long long *)
529 realloc(currentneedle->offsets.footers,
530 sizeof(unsigned long long) *
531 (currentneedle->offsets.numfooters + 100));
532
533 checkMemoryAllocation(state, currentneedle->offsets.footers,
534 __LINE__, __FILE__, "footer array");
535 currentneedle->offsets.footerlens =
536 (size_t *) realloc(currentneedle->offsets.footerlens,
537 sizeof(size_t) *
538 (currentneedle->offsets.numfooters + 100));
539 checkMemoryAllocation(state, currentneedle->offsets.footerlens,
540 __LINE__, __FILE__, "footer array");
541 currentneedle->offsets.footerstorage =
542 currentneedle->offsets.numfooters + 100;
543
544 if(state->modeVerbose) {
545 #ifdef _WIN32
546 fprintf(stdout,
547 "Memory reallocation performed, total footer storage = %I64u\n",
548 currentneedle->offsets.footerstorage);
549 #else
550 fprintf(stdout,
551 "Memory reallocation performed, total footer storage = %llu\n",
552 currentneedle->offsets.footerstorage);
553 #endif
554 }
555 }
556 currentneedle->offsets.footers[currentneedle->offsets.numfooters -
557 1] = startLocation;
558
559 }
560 }
561
562 #endif
563 ///////////////////////////////////////////////////
564 ///////////////////////////////////////////////////
565 ////////////////END GPU-BASED SEARCH///////////////
566 ///////////////////////////////////////////////////
567 ///////////////////////////////////////////////////
568
569
570
571 ///////////////////////////////////////////////////
572 ///////////////////////////////////////////////////
573 ///////////////MULTI-THREADED SEARCH //////////////
574 ///////////////////////////////////////////////////
575 ///////////////////////////////////////////////////
576 #ifdef MULTICORE_THREADING
577
578 // as of v1.9, this is now the lowest common denominator mode
579
580 // ---------------- threaded header search ------------------ //
581 // ---------------- threaded header search ------------------ //
582
583 // gettimeofday(&srchthen, 0);
584 if(state->modeVerbose) {
585 printf("Waking up threads for header searches.\n");
586 }
587 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
588 currentneedle = &(state->SearchSpec[needlenum]);
589 // # of matches in last element of foundat array
590 foundat[needlenum][MAX_MATCHES_PER_BUFFER] = 0;
591 threadargs[needlenum].id = needlenum;
592 threadargs[needlenum].str = currentneedle->begin;
593 threadargs[needlenum].length = currentneedle->beginlength;
594 threadargs[needlenum].startpos = readbuffer;
595 threadargs[needlenum].offset = (long)readbuffer + lengthofbuf;
596 threadargs[needlenum].foundat = foundat[needlenum];
597 threadargs[needlenum].foundatlens = foundatlens[needlenum];
598 threadargs[needlenum].strisRE = currentneedle->beginisRE;
599 if(currentneedle->beginisRE) {
600 threadargs[needlenum].regex = &(currentneedle->beginstate.re);
601 }
602 else {
603 threadargs[needlenum].table = currentneedle->beginstate.bm_table;
604 }
605 threadargs[needlenum].casesensitive = currentneedle->casesensitive;
606 threadargs[needlenum].nosearchoverlap = state->noSearchOverlap;
607 threadargs[needlenum].state = state;
608
609 // unblock thread
610 // sem_post(&workavailable[needlenum]);
611
612 pthread_mutex_unlock(&workavailable[needlenum]);
613
614 }
615
616 // ---------- thread group synchronization point ----------- //
617 // ---------- thread group synchronization point ----------- //
618
619 if(state->modeVerbose) {
620 printf("Waiting for thread group synchronization.\n");
621 }
622
623 // wait for all threads to complete header search before proceeding
624 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
625 // sem_wait(&workcomplete[needlenum]);
626
627 pthread_mutex_lock(&workcomplete[needlenum]);
628
629 }
630
631 if(state->modeVerbose) {
632 printf("Thread group synchronization complete.\n");
633 }
634
635 // digest header locations discovered by the thread group
636
637 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
638 currentneedle = &(state->SearchSpec[needlenum]);
639
640 // number of matches stored in last element of vector
641 for(i = 0; i < (long)foundat[needlenum][MAX_MATCHES_PER_BUFFER]; i++) {
642 startLocation = offset + (foundat[needlenum][i] - readbuffer);
643 // found a header--record location in header offsets database
644 if(state->modeVerbose) {
645 #ifdef _WIN32
646 fprintf(stdout, "A %s header was found at : %I64u\n",
647 currentneedle->suffix,
648 positionUseCoverageBlockmap(state, startLocation));
649 #else
650 fprintf(stdout, "A %s header was found at : %llu\n",
651 currentneedle->suffix,
652 positionUseCoverageBlockmap(state, startLocation));
653 #endif
654 }
655
656 currentneedle->offsets.numheaders++;
657 if(currentneedle->offsets.headerstorage <=
658 currentneedle->offsets.numheaders) {
659 // need more memory for header offset storage--add an
660 // additional 100 elements
661 currentneedle->offsets.headers = (unsigned long long *)
662 realloc(currentneedle->offsets.headers,
663 sizeof(unsigned long long) *
664 (currentneedle->offsets.numheaders + 100));
665 checkMemoryAllocation(state, currentneedle->offsets.headers,
666 __LINE__, __FILE__, "header array");
667 currentneedle->offsets.headerlens =
668 (size_t *) realloc(currentneedle->offsets.headerlens,
669 sizeof(size_t) *
670 (currentneedle->offsets.numheaders + 100));
671 checkMemoryAllocation(state, currentneedle->offsets.headerlens,
672 __LINE__, __FILE__, "header array");
673
674 currentneedle->offsets.headerstorage =
675 currentneedle->offsets.numheaders + 100;
676
677 if(state->modeVerbose) {
678 #ifdef _WIN32
679 fprintf(stdout,
680 "Memory reallocation performed, total header storage = %I64u\n",
681 currentneedle->offsets.headerstorage);
682 #else
683 fprintf(stdout,
684 "Memory reallocation performed, total header storage = %llu\n",
685 currentneedle->offsets.headerstorage);
686 #endif
687 }
688 }
689 currentneedle->offsets.headers[currentneedle->offsets.numheaders -
690 1] = startLocation;
691 currentneedle->offsets.headerlens[currentneedle->offsets.
692 numheaders - 1] =
693 foundatlens[needlenum][i];
694 }
695 }
696
697
698 // ---------------- threaded footer search ------------------ //
699 // ---------------- threaded footer search ------------------ //
700
701 // now footer search, if:
702 //
703 // there's a footer for the file type AND
704 //
705 // (at least one header for that type has been previously seen and
706 // at least one header is viable--that is, it was found in the current
707 // buffer, or it's less than the max carve distance behind the current
708 // file offset
709 //
710 // OR
711 //
712 // a header/footer database is being created. In this case, ALL headers and
713 // footers must be discovered)
714
715 if(state->modeVerbose) {
716 printf("Waking up threads for footer searches.\n");
717 }
718
719 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
720 currentneedle = &(state->SearchSpec[needlenum]);
721 if(
722 // regular case--want to search for only "viable" (in the sense that they are
723 // useful for carving unfragmented files) footers, to save time
724 (currentneedle->offsets.numheaders > 0 &&
725 currentneedle->endlength &&
726 (currentneedle->offsets.
727 headers[currentneedle->offsets.numheaders - 1] > offset
728 || (offset -
729 currentneedle->offsets.headers[currentneedle->offsets.
730 numheaders - 1] <
731 currentneedle->length))) ||
732 // generating header/footer database, need to find all footers
733 // BUG: ALSO need to do this for discovery of fragmented files--document this
734 (currentneedle->endlength && state->generateHeaderFooterDatabase)) {
735 // # of matches in last element of foundat array
736 foundat[needlenum][MAX_MATCHES_PER_BUFFER] = 0;
737 threadargs[needlenum].id = needlenum;
738 threadargs[needlenum].str = currentneedle->end;
739 threadargs[needlenum].length = currentneedle->endlength;
740 threadargs[needlenum].startpos = readbuffer;
741 threadargs[needlenum].offset = (long)readbuffer + lengthofbuf;
742 threadargs[needlenum].foundat = foundat[needlenum];
743 threadargs[needlenum].foundatlens = foundatlens[needlenum];
744 threadargs[needlenum].strisRE = currentneedle->endisRE;
745 if(currentneedle->endisRE) {
746 threadargs[needlenum].regex = &(currentneedle->endstate.re);
747 }
748 else {
749 threadargs[needlenum].table = currentneedle->endstate.bm_table;
750 }
751 threadargs[needlenum].casesensitive = currentneedle->casesensitive;
752 threadargs[needlenum].nosearchoverlap = state->noSearchOverlap;
753 threadargs[needlenum].state = state;
754
755 // unblock thread
756 // sem_post(&workavailable[needlenum]);
757 pthread_mutex_unlock(&workavailable[needlenum]);
758
759 }
760 else {
761 threadargs[needlenum].length = 0;
762 }
763 }
764
765 if(state->modeVerbose) {
766 printf("Waiting for thread group synchronization.\n");
767 }
768
769 // ---------- thread group synchronization point ----------- //
770 // ---------- thread group synchronization point ----------- //
771
772 // wait for all threads to complete footer search before proceeding
773 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
774 if(threadargs[needlenum].length > 0) {
775 // sem_wait(&workcomplete[needlenum]);
776 pthread_mutex_lock(&workcomplete[needlenum]);
777 }
778 }
779
780 if(state->modeVerbose) {
781 printf("Thread group synchronization complete.\n");
782 }
783
784 // digest footer locations discovered by the thread group
785
786 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
787 currentneedle = &(state->SearchSpec[needlenum]);
788 // number of matches stored in last element of vector
789 for(i = 0; i < (long)foundat[needlenum][MAX_MATCHES_PER_BUFFER]; i++) {
790 startLocation = offset + (foundat[needlenum][i] - readbuffer);
791 if(state->modeVerbose) {
792 #ifdef _WIN32
793 fprintf(stdout, "A %s footer was found at : %I64u\n",
794 currentneedle->suffix,
795 positionUseCoverageBlockmap(state, startLocation));
796 #else
797 fprintf(stdout, "A %s footer was found at : %llu\n",
798 currentneedle->suffix,
799 positionUseCoverageBlockmap(state, startLocation));
800 #endif
801 }
802
803 currentneedle->offsets.numfooters++;
804 if(currentneedle->offsets.footerstorage <=
805 currentneedle->offsets.numfooters) {
806 // need more memory for footer offset storage--add an
807 // additional 100 elements
808 currentneedle->offsets.footers = (unsigned long long *)
809 realloc(currentneedle->offsets.footers,
810 sizeof(unsigned long long) *
811 (currentneedle->offsets.numfooters + 100));
812 checkMemoryAllocation(state, currentneedle->offsets.footers,
813 __LINE__, __FILE__, "footer array");
814 currentneedle->offsets.footerlens =
815 (size_t *) realloc(currentneedle->offsets.footerlens,
816 sizeof(size_t) *
817 (currentneedle->offsets.numfooters + 100));
818 checkMemoryAllocation(state, currentneedle->offsets.footerlens,
819 __LINE__, __FILE__, "footer array");
820 currentneedle->offsets.footerstorage =
821 currentneedle->offsets.numfooters + 100;
822
823 if(state->modeVerbose) {
824 #ifdef _WIN32
825 fprintf(stdout,
826 "Memory reallocation performed, total footer storage = %I64u\n",
827 currentneedle->offsets.footerstorage);
828 #else
829 fprintf(stdout,
830 "Memory reallocation performed, total footer storage = %llu\n",
831 currentneedle->offsets.footerstorage);
832 #endif
833 }
834 }
835 currentneedle->offsets.footers[currentneedle->offsets.numfooters -
836 1] = startLocation;
837 currentneedle->offsets.footerlens[currentneedle->offsets.
838 numfooters - 1] =
839 foundatlens[needlenum][i];
840 }
841 }
842
843 #endif // multi-core CPU code
844 ///////////////////////////////////////////////////
845 ///////////////////////////////////////////////////
846 ///////////END MULTI-THREADED SEARCH///////////////
847 ///////////////////////////////////////////////////
848 ///////////////////////////////////////////////////
849
850 return SCALPEL_OK;
851 }
852
853
854
855
856 ////////////////////////////////////////////////////////////////////////////////
857 /////////////////////// LMIII //////////////////////////////////////////////////
858 ////////////////////////////////////////////////////////////////////////////////
859
860 /* In order to facilitate asynchronous reads, we will set up a reader
861 pthread, whose only job is to read the image in SIZE_OF_BUFFER
862 chunks using the coverage map functions.
863
864 This thread will buffer QUEUELEN reads.
865 */
866
867
868 #ifdef GPU_THREADING
gpu_handler(void * sss)869 void *gpu_handler(void *sss) {
870
871 struct scalpelState *state = (struct scalpelState *)sss;
872
873 char *fullbuf;
874
875 // Initialize constant table of headers/footers on GPU. First array
876 // element is header # 1, second is footer # 1, third is header # 2, etc.
877 // zero-length header marks end of array.
878
879 // Each needle is stored as follows:
880 // byte 0: size of needle
881 // bytes 1 - sizeof needle: the needle itself
882 // byte PATTERN_WCSHIFT: number of leading wildcards
883 // byte PATTERN_CASESEN: bool this needle is case sensitive
884
885 // Example: wpc header
886 // (one leading wildcard, case sensitive)
887 // {\x04?WPC\x00\x00 ... \x00\x01\x01}
888
889 // htm header:
890 // (no leading wildcard, not case sensitive)
891 // \x05<html\x00\x00 ... \x00\x00\x00}
892
893 // We also create 2 lookup tables for efficiency, one for headers and one for
894 // footers. Each is a LOOKUP_ROWS x LOOKUP_COLUMNS array. Each row of the
895 // headers lookup table holds indexes into the h/f table described above of
896 // headers which begin with the array index of this row, eg lookup_headers[7]
897 // holds a list of the indexes of the headers whose first byte is \x07.
898
899
900 // first, build local copies ...
901 bzero(localpattern, sizeof(localpattern));
902 bzero(locallookup_headers, sizeof(locallookup_headers));
903 bzero(locallookup_footers, sizeof(locallookup_footers));
904 int i = 0;
905 for(i = 0; i < LOOKUP_ROWS; i++) {
906 locallookup_headers[i][0] = LOOKUP_ENDOFROW;
907 locallookup_footers[i][0] = LOOKUP_ENDOFROW;
908 }
909
910 struct SearchSpecLine *currentneedle = 0;
911 int j = 0;
912 int k = 0;
913 for(i = 0; i < state->specLines; i++) {
914 currentneedle = &(state->SearchSpec[i]);
915
916 // header/footer initialization for one Scalpel rule
917
918 localpattern[j][0] = currentneedle->beginlength;
919 memcpy(&(localpattern[j][1]), currentneedle->begin,
920 currentneedle->beginlength);
921 localpattern[j][PATTERN_CASESEN] = currentneedle->casesensitive;
922
923 // Here's the rub. Some patterns start with the wildcard. In order for
924 // the lookup tables to work out efficiently, we need to be able to skip
925 // over leading wildcards, so we keep track of the shift from the
926 // beginning of the pattern to the first non-wildcard character.
927 int wc_shift = 0;
928 while ((localpattern[j][wc_shift + 1] == wildcard)
929 && (wc_shift < localpattern[j][0])) {
930 wc_shift++;
931 }
932
933 // Now we can set up the lookup table to use the first non-wildcard
934 // byte of the pattern.
935 k = 0;
936 while (locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k]
937 != LOOKUP_ENDOFROW) {
938 k++;
939 }
940 locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k] = j;
941 locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k + 1] =
942 LOOKUP_ENDOFROW;
943
944 // And let's not forget to store the shift, we'll need in in the GPU when
945 // we search.
946 localpattern[j][PATTERN_WCSHIFT] = wc_shift;
947
948 // We never expect a footer to begin with a wildcard -- it makes no sense.
949 // BUG: Assume we'll find them anyway.
950 localpattern[j + 1][0] = currentneedle->endlength;
951 memcpy(&(localpattern[j + 1][1]), currentneedle->end,
952 currentneedle->endlength);
953 localpattern[j + 1][PATTERN_CASESEN] = currentneedle->casesensitive;
954 if(currentneedle->endlength > 0) {
955 k = 0;
956 while (locallookup_footers[(unsigned char)localpattern[j + 1][1]][k] !=
957 LOOKUP_ENDOFROW) {
958 k++;
959 }
960 locallookup_footers[(unsigned char)localpattern[j + 1][1]][k] = j + 1;
961 locallookup_footers[(unsigned char)localpattern[j + 1][1]][k + 1] =
962 LOOKUP_ENDOFROW;
963 }
964 j += 2;
965 }
966 localpattern[j][0] = 0; // mark end of array
967 localpattern[j + 1][0] = 0;
968
969
970 // ...then copy to GPU constant memory. This assignment is persistent across
971 // the entire Scalpel execution.
972 copytodevicepattern(localpattern);
973 copytodevicelookup_headers(locallookup_headers);
974 copytodevicelookup_footers(locallookup_footers);
975
976 printf("Copying tables to GPU constant memory complete.\n");
977
978 int longestneedle = findLongestNeedle(state->SearchSpec);
979 gpu_init(longestneedle);
980 printf("Initializing GPU buffers complete.\n");
981
982 // Now we get buffers from the full_readbuf queue, send them to the GPU for
983 // seach, wait till GPU finishes, and put the results buffers into the
984 // results_readbuf queue.
985 while (!reads_finished) {
986 readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
987 fullbuf = rinfo->readbuf;
988 gpuSearchBuffer(fullbuf, (unsigned int)rinfo->bytesread, fullbuf,
989 longestneedle, wildcard);
990 put(results_readbuf, (void *)rinfo);
991 }
992
993 // reads are finished, but there may still be stuff in the queue
994 while (!full_readbuf->empty) {
995 readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
996 fullbuf = rinfo->readbuf;
997 gpuSearchBuffer(fullbuf, (unsigned int)rinfo->bytesread, fullbuf,
998 longestneedle, wildcard);
999 put(results_readbuf, (void *)rinfo);
1000 }
1001
1002 // Done with image.
1003 gpu_finished = TRUE;
1004 gpu_cleanup();
1005 pthread_exit(0);
1006 }
1007 #endif
1008
1009
1010
1011 // Streaming reader gets empty buffers from the empty_readbuf queue, reads
1012 // SIZE_OF_BUFFER chunks of the input image into the buffers and puts them into
1013 // the full_readbuf queue for processing.
streaming_reader(void * sss)1014 void *streaming_reader(void *sss) {
1015
1016 struct scalpelState *state = (struct scalpelState *)sss;
1017
1018 long long filesize = 0, bytesread = 0, filebegin = 0,
1019 fileposition = 0, beginreadpos = 0;
1020 long err = SCALPEL_OK;
1021 int displayUnits = UNITS_BYTES;
1022 int longestneedle = findLongestNeedle(state->SearchSpec);
1023
1024 reads_finished = FALSE;
1025
1026 filebegin = ftello(state->infile);
1027 if((filesize = measureOpenFile(state->infile, state)) == -1) {
1028 fprintf(stderr,
1029 "ERROR: Couldn't measure size of image file %s\n",
1030 state->imagefile);
1031 err=SCALPEL_ERROR_FILE_READ;
1032 // goto exit_reader_thread;
1033 }
1034
1035 // Get empty buffer from empty_readbuf queue
1036 readbuf_info *rinfo = (readbuf_info *)get(empty_readbuf);
1037
1038 // Read chunk of image into empty buffer.
1039 while ((bytesread =
1040 fread_use_coverage_map(state, rinfo->readbuf, 1, SIZE_OF_BUFFER,
1041 state->infile)) > longestneedle - 1) {
1042
1043 if(state->modeVerbose) {
1044 #ifdef _WIN32
1045 fprintf(stdout, "Read %I64u bytes from image file.\n", bytesread);
1046 #else
1047 fprintf(stdout, "Read %llu bytes from image file.\n", bytesread);
1048 #endif
1049 }
1050
1051 if((err = ferror(state->infile))) {
1052 err = SCALPEL_ERROR_FILE_READ;
1053 goto exit_reader_thread;
1054 }
1055
1056 // progress report needs a fileposition that doesn't depend on coverage map
1057 fileposition = ftello(state->infile);
1058 displayPosition(&displayUnits, fileposition - filebegin,
1059 filesize, state->imagefile);
1060
1061 // if carving is dependent on coverage map, need adjusted fileposition
1062 fileposition = ftello_use_coverage_map(state, state->infile);
1063 beginreadpos = fileposition - bytesread;
1064
1065 //signal check
1066 if(signal_caught == SIGTERM || signal_caught == SIGINT) {
1067 clean_up(state, signal_caught);
1068 }
1069
1070 // Put now-full buffer into full_readbuf queue.
1071 // Note that if the -s option was used we need to adjust the relative begin
1072 // position
1073 rinfo->bytesread = bytesread;
1074 rinfo->beginreadpos = beginreadpos - state->skip;
1075 put(full_readbuf, (void *)rinfo);
1076
1077 // At this point, the host, GPU, whatever can start searching the buffer.
1078
1079 // Get another empty buffer.
1080 rinfo = (readbuf_info *)get(empty_readbuf);
1081
1082 // move file position back a bit so headers and footers that fall
1083 // across SIZE_OF_BUFFER boundaries in the image file aren't
1084 // missed
1085 fseeko_use_coverage_map(state, state->infile, -1 * (longestneedle - 1));
1086 }
1087
1088 exit_reader_thread:
1089 if (err != SCALPEL_OK) {
1090 handleError(state, err);
1091 }
1092 // Done reading image.
1093 reads_finished = TRUE;
1094 if (state->infile) {
1095 fclose(state->infile);
1096 }
1097 pthread_exit(0);
1098 return NULL;
1099 }
1100
1101
1102
1103 // Scalpel's approach dictates that this function digAllFiles an image
1104 // file, building the header/footer offset database. The task of
1105 // extracting files from the image has been moved to carveImageFile(),
1106 // which operates in a second pass over the image. Digging for
1107 // header/footer values proceeds in SIZE_OF_BUFFER sized chunks of the
1108 // image file. This buffer is now global and named "readbuffer".
digImageFile(struct scalpelState * state)1109 int digImageFile(struct scalpelState *state) {
1110
1111 int status, err;
1112 int longestneedle = findLongestNeedle(state->SearchSpec);
1113 long long filebegin, filesize;
1114
1115
1116 if ((err = setupAuditFile(state)) != SCALPEL_OK) {
1117 return err;
1118 }
1119
1120 if(state->SearchSpec[0].suffix == NULL) {
1121 return SCALPEL_ERROR_NO_SEARCH_SPEC;
1122 }
1123
1124 // open current image file
1125 if((state->infile = fopen(state->imagefile, "rb")) == NULL) {
1126 return SCALPEL_ERROR_FILE_OPEN;
1127 }
1128
1129 #ifdef _WIN32
1130 // set binary mode for Win32
1131 setmode(fileno(state->infile), O_BINARY);
1132 #endif
1133 #ifdef __linux
1134 fcntl(fileno(state->infile), F_SETFL, O_LARGEFILE);
1135 #endif
1136
1137 // skip initial portion of input file, if that cmd line option
1138 // was set
1139 if(state->skip > 0) {
1140 if(!skipInFile(state, state->infile)) {
1141 return SCALPEL_ERROR_FILE_READ;
1142 }
1143
1144 // ***GGRIII: want to update coverage bitmap when skip is specified????
1145 }
1146
1147 filebegin = ftello(state->infile);
1148 if((filesize = measureOpenFile(state->infile, state)) == -1) {
1149 fprintf(stderr,
1150 "ERROR: Couldn't measure size of image file %s\n",
1151 state->imagefile);
1152 return SCALPEL_ERROR_FILE_READ;
1153 }
1154
1155 // can't process an image file smaller than the longest needle
1156 if(filesize <= longestneedle * 2) {
1157 return SCALPEL_ERROR_FILE_TOO_SMALL;
1158 }
1159
1160 #ifdef _WIN32
1161 if(state->modeVerbose) {
1162 fprintf(stdout, "Total file size is %I64u bytes\n", filesize);
1163 }
1164 #else
1165 if(state->modeVerbose) {
1166 fprintf(stdout, "Total file size is %llu bytes\n", filesize);
1167 }
1168 #endif
1169
1170 // allocate and initialize coverage bitmap and blockmap, if appropriate
1171 if((err = setupCoverageMaps(state, filesize)) != SCALPEL_OK) {
1172 return err;
1173 }
1174
1175 // process SIZE_OF_BUFFER-sized chunks of the current image
1176 // file and look for both headers and footers, recording their
1177 // offsets for use in the 2nd scalpel phase, when file data will
1178 // be extracted.
1179
1180 fprintf(stdout, "Image file pass 1/2.\n");
1181
1182 // Create and start the streaming reader thread for this image file.
1183 reads_finished = FALSE;
1184 pthread_t reader;
1185 if(pthread_create(&reader, NULL, streaming_reader, (void *)state) != 0) {
1186 return SCALPEL_ERROR_PTHREAD_FAILURE;
1187 }
1188
1189 // wait on reader mutex "reads_begun"
1190
1191
1192 #ifdef GPU_THREADING
1193
1194 // Create and start the gpu_handler for searches on this image.
1195 gpu_finished = FALSE;
1196 pthread_t gpu;
1197 if(pthread_create(&gpu, NULL, gpu_handler, (void *)state) != 0) {
1198 return SCALPEL_ERROR_PTHREAD_FAILURE;
1199 }
1200
1201 // The reader is now reading in the image, and the GPU is searching the
1202 // buffers. We get the results buffers and call digBuffer to decode them.
1203 while (!gpu_finished) { // || !results_readbuf->empty) {
1204 readbuf_info *rinfo = (readbuf_info *)get(results_readbuf);
1205 readbuffer = rinfo->readbuf;
1206 if((status =
1207 digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1208 )) != SCALPEL_OK) {
1209 return status;
1210 }
1211 put(empty_readbuf, (void *)rinfo);
1212 }
1213
1214 // the GPU is finished, but there may still be results buffers to decode
1215 while (!results_readbuf->empty) {
1216 readbuf_info *rinfo = (readbuf_info *)get(results_readbuf);
1217 readbuffer = rinfo->readbuf;
1218 if((status =
1219 digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1220 )) != SCALPEL_OK) {
1221 return status;
1222 }
1223 put(empty_readbuf, (void *)rinfo);
1224 }
1225
1226
1227 #endif
1228
1229 #ifdef MULTICORE_THREADING
1230
1231 // The reader is now reading in chunks of the image. We call digbuffer on
1232 // these chunks for multi-threaded search.
1233
1234 while (!reads_finished || !full_readbuf->empty) {
1235
1236 readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
1237 readbuffer = rinfo->readbuf;
1238 if((status =
1239 digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1240 )) != SCALPEL_OK) {
1241 return status;
1242 }
1243 put(empty_readbuf, (void *)rinfo);
1244 }
1245
1246 #endif
1247
1248 return SCALPEL_OK;
1249 }
1250
1251
1252
1253
1254 // carveImageFile() uses the header/footer offsets database
1255 // created by digImageFile() to build a list of files to carve. These
1256 // files are then carved during a single, sequential pass over the
1257 // image file. The global 'readbuffer' is used as a buffer in this
1258 // function.
1259
carveImageFile(struct scalpelState * state)1260 int carveImageFile(struct scalpelState *state) {
1261
1262 FILE *infile;
1263 struct SearchSpecLine *currentneedle;
1264 struct CarveInfo *carveinfo;
1265 char fn[MAX_STRING_LENGTH]; // temp buffer for output filename
1266 char orgdir[MAX_STRING_LENGTH]; // buffer for name of organizing subdirectory
1267 long long start, stop; // temp begin/end bytes for file to carve
1268 unsigned long long prevstopindex; // tracks index of first 'reasonable'
1269 // footer
1270 int needlenum;
1271 long long filesize = 0, bytesread = 0, fileposition = 0, filebegin = 0;
1272 long err = 0;
1273 int displayUnits = UNITS_BYTES;
1274 int success = 0;
1275 long long i, j;
1276 int halt;
1277 char chopped; // file chopped because it exceeds
1278 // max carve size for type?
1279 int CURRENTFILESOPEN = 0; // number of files open (during carve)
1280 unsigned long long firstcandidatefooter=0;
1281
1282 // index of header and footer within image file, in SIZE_OF_BUFFER
1283 // blocks
1284 unsigned long long headerblockindex, footerblockindex;
1285
1286 struct Queue *carvelists; // one entry for each SIZE_OF_BUFFER bytes of
1287 // input file
1288 // struct timeval queuenow, queuethen;
1289
1290 // open image file and get size so carvelists can be allocated
1291 if((infile = fopen(state->imagefile, "rb")) == NULL) {
1292 fprintf(stderr, "ERROR: Couldn't open input file: %s -- %s\n",
1293 (*(state->imagefile) == '\0') ? "<blank>" : state->imagefile,
1294 strerror(errno));
1295 return SCALPEL_ERROR_FILE_OPEN;
1296 }
1297
1298 #ifdef _WIN32
1299 // explicit binary option for Win32
1300 setmode(fileno(infile), O_BINARY);
1301 #endif
1302 #ifdef __linux
1303 fcntl(fileno(infile), F_SETFL, O_LARGEFILE);
1304 #endif
1305
1306 // If skip was activated, then there's no way headers/footers were
1307 // found there, so skip during the carve operations, too
1308
1309 if(state->skip > 0) {
1310 if(!skipInFile(state, infile)) {
1311 return SCALPEL_ERROR_FILE_READ;
1312 }
1313 }
1314
1315 filebegin = ftello(infile);
1316 if((filesize = measureOpenFile(infile, state)) == -1) {
1317 fprintf(stderr,
1318 "ERROR: Couldn't measure size of image file %s\n",
1319 state->imagefile);
1320 return SCALPEL_ERROR_FILE_READ;
1321 }
1322
1323
1324 // gettimeofday(&queuethen, 0);
1325
1326 // allocate memory for carvelists--we alloc a queue for each
1327 // SIZE_OF_BUFFER bytes in advance because it's simpler and an empty
1328 // queue doesn't consume much memory, anyway.
1329
1330 carvelists =
1331 (Queue *) malloc(sizeof(Queue) * (2 + (filesize / SIZE_OF_BUFFER)));
1332 checkMemoryAllocation(state, carvelists, __LINE__, __FILE__, "carvelists");
1333
1334 // queue associated with each buffer of data holds pointers to
1335 // CarveInfo structures.
1336
1337 fprintf(stdout, "Allocating work queues...\n");
1338
1339 for(i = 0; i < 2 + (filesize / SIZE_OF_BUFFER); i++) {
1340 init_queue(&carvelists[i], sizeof(struct CarveInfo *), TRUE, 0, TRUE);
1341 }
1342 fprintf(stdout, "Work queues allocation complete. Building work queues...\n");
1343
1344 // build carvelists before 2nd pass over image file
1345
1346 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1347
1348 currentneedle = &(state->SearchSpec[needlenum]);
1349
1350 // handle each discovered header independently
1351
1352 prevstopindex = 0;
1353 for(i = 0; i < (long long)currentneedle->offsets.numheaders; i++) {
1354 start = currentneedle->offsets.headers[i];
1355
1356 ////////////// DEBUG ////////////////////////
1357 //fprintf(stdout, "start: %lu\n", start);
1358
1359 // block aligned test for "-q"
1360
1361 if(state->blockAlignedOnly && start % state->alignedblocksize != 0) {
1362 continue;
1363 }
1364
1365 stop = 0;
1366 chopped = 0;
1367
1368 // case 1: no footer defined for this file type
1369 if(!currentneedle->endlength) {
1370
1371 // this is the unfortunate case--if file type doesn't have a footer,
1372 // all we can done is carve a block between header position and
1373 // maximum carve size.
1374 stop = start + currentneedle->length - 1;
1375 // these are always considered chopped, because we don't really
1376 // know the actual size
1377 chopped = 1;
1378 }
1379 else if(currentneedle->searchtype == SEARCHTYPE_FORWARD ||
1380 currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT) {
1381 // footer defined: use FORWARD or FORWARD_NEXT semantics.
1382 // Stop at first occurrence of footer, but for FORWARD,
1383 // include the header in the carved file; for FORWARD_NEXT,
1384 // don't include footer in carved file. For FORWARD_NEXT, if
1385 // no footer is found, then the maximum carve size for this
1386 // file type will be used and carving will proceed. For
1387 // FORWARD, if no footer is found then no carving will be
1388 // performed unless -b was specified on the command line.
1389
1390 halt = 0;
1391
1392 if (state->handleEmbedded &&
1393 (currentneedle->searchtype == SEARCHTYPE_FORWARD ||
1394 currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT)) {
1395 firstcandidatefooter=adjustForEmbedding(currentneedle, i, &prevstopindex);
1396 }
1397 else {
1398 firstcandidatefooter=prevstopindex;
1399 }
1400
1401 for (j=firstcandidatefooter;
1402 j < (long long)currentneedle->offsets.numfooters && ! halt; j++) {
1403
1404 if((long long)currentneedle->offsets.footers[j] <= start) {
1405 if (! state->handleEmbedded) {
1406 prevstopindex=j;
1407 }
1408
1409 }
1410 else {
1411 halt = 1;
1412 stop = currentneedle->offsets.footers[j];
1413
1414 if(currentneedle->searchtype == SEARCHTYPE_FORWARD) {
1415 // include footer in carved file
1416 stop += currentneedle->endlength - 1;
1417 // BUG? this or above? stop += currentneedle->offsets.footerlens[j] - 1;
1418 }
1419 else {
1420 // FORWARD_NEXT--don't include footer in carved file
1421 stop--;
1422 }
1423 // sanity check on size of potential file to carve--different
1424 // actions depending on FORWARD or FORWARD_NEXT semantics
1425 if(stop - start + 1 > (long long)currentneedle->length) {
1426 if(currentneedle->searchtype == SEARCHTYPE_FORWARD) {
1427 // if the user specified -b, then foremost 0.69
1428 // compatibility is desired: carve this file even
1429 // though the footer wasn't found and indicate
1430 // the file was chopped, in the log. Otherwise,
1431 // carve nothing and move on.
1432 if(state->carveWithMissingFooters) {
1433 stop = start + currentneedle->length - 1;
1434 chopped = 1;
1435 }
1436 else {
1437 stop = 0;
1438 }
1439 }
1440 else {
1441 // footer found for FORWARD_NEXT, but distance exceeds
1442 // max carve size for this file type, so use max carve
1443 // size as stop
1444 stop = start + currentneedle->length - 1;
1445 chopped = 1;
1446 }
1447 }
1448 }
1449 }
1450 if(!halt &&
1451 (currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT ||
1452 (currentneedle->searchtype == SEARCHTYPE_FORWARD &&
1453 state->carveWithMissingFooters))) {
1454 // no footer found for SEARCHTYPE_FORWARD_NEXT, or no footer
1455 // found for SEARCHTYPE_FORWARD and user specified -b, so just use
1456 // max carve size for this file type as stop
1457 stop = start + currentneedle->length - 1;
1458 }
1459 }
1460 else {
1461 // footer defined: use REVERSE semantics: want matching footer
1462 // as far away from header as possible, within maximum carving
1463 // size for this file type. Don't bother to look at footers
1464 // that can't possibly match a header and remember this info
1465 // in prevstopindex, as the next headers will be even deeper
1466 // into the image file. Footer is included in carved file for
1467 // this type of carve.
1468 halt = 0;
1469 for(j = prevstopindex; j < (long long)currentneedle->offsets.numfooters &&
1470 !halt; j++) {
1471 if((long long)currentneedle->offsets.footers[j] <= start) {
1472 prevstopindex = j;
1473 }
1474 else if(currentneedle->offsets.footers[j] - start <=
1475 currentneedle->length) {
1476 stop = currentneedle->offsets.footers[j]
1477 + currentneedle->endlength - 1;
1478 }
1479 else {
1480 halt = 1;
1481 }
1482 }
1483 }
1484
1485 // if stop <> 0, then we have enough information to set up a
1486 // file carving operation. It must pass the minimum carve size
1487 // test, if currentneedle->minLength != 0.
1488 // if(stop) {
1489 if (stop && (stop - start + 1) >= (long long)currentneedle->minlength) {
1490
1491 // don't carve past end of image file...
1492 stop = stop > filesize ? filesize : stop;
1493
1494 // find indices (in SIZE_OF_BUFFER units) of header and
1495 // footer, so the carveinfo can be placed into the right
1496 // queues. The priority of each element in a queue allows the
1497 // appropriate thing to be done (e.g., STARTSTOPCARVE,
1498 // STARTCARVE, STOPCARVE, CONTINUECARVE).
1499
1500 headerblockindex = start / SIZE_OF_BUFFER;
1501 footerblockindex = stop / SIZE_OF_BUFFER;
1502
1503 // set up a struct CarveInfo for inclusion into the
1504 // appropriate carvelists
1505
1506 // generate unique filename for file to carve
1507
1508 if(state->organizeSubdirectories) {
1509 snprintf(orgdir, MAX_STRING_LENGTH, "%s/%s-%d-%1lu",
1510 state->outputdirectory,
1511 currentneedle->suffix,
1512 needlenum, currentneedle->organizeDirNum);
1513 if(!state->previewMode) {
1514 #ifdef _WIN32
1515 mkdir(orgdir);
1516 #else
1517 mkdir(orgdir, 0777);
1518 #endif
1519 }
1520 }
1521 else {
1522 snprintf(orgdir, MAX_STRING_LENGTH, "%s", state->outputdirectory);
1523 }
1524
1525 if(state->modeNoSuffix || currentneedle->suffix[0] ==
1526 SCALPEL_NOEXTENSION) {
1527 #ifdef _WIN32
1528 snprintf(fn, MAX_STRING_LENGTH, "%s/%08I64u",
1529 orgdir, state->fileswritten);
1530 #else
1531 snprintf(fn, MAX_STRING_LENGTH, "%s/%08llu",
1532 orgdir, state->fileswritten);
1533 #endif
1534
1535 }
1536 else {
1537 #ifdef _WIN32
1538 snprintf(fn, MAX_STRING_LENGTH, "%s/%08I64u.%s",
1539 orgdir, state->fileswritten, currentneedle->suffix);
1540 #else
1541 snprintf(fn, MAX_STRING_LENGTH, "%s/%08llu.%s",
1542 orgdir, state->fileswritten, currentneedle->suffix);
1543 #endif
1544 }
1545 state->fileswritten++;
1546 currentneedle->numfilestocarve++;
1547 if(currentneedle->numfilestocarve % state->organizeMaxFilesPerSub == 0) {
1548 currentneedle->organizeDirNum++;
1549 }
1550
1551 carveinfo = (struct CarveInfo *)malloc(sizeof(struct CarveInfo));
1552 checkMemoryAllocation(state, carveinfo, __LINE__, __FILE__,
1553 "carveinfo");
1554
1555 // remember filename
1556 carveinfo->filename = (char *)malloc(strlen(fn) + 1);
1557 checkMemoryAllocation(state, carveinfo->filename, __LINE__,
1558 __FILE__, "carveinfo");
1559 strcpy(carveinfo->filename, fn);
1560 carveinfo->start = start;
1561 carveinfo->stop = stop;
1562 carveinfo->chopped = chopped;
1563
1564 // fp will be allocated when the first byte of the file is
1565 // in the current buffer and cleaned up when we encounter the
1566 // last byte of the file.
1567 carveinfo->fp = 0;
1568
1569 if(headerblockindex == footerblockindex) {
1570 // header and footer will both appear in the same buffer
1571 add_to_queue(&carvelists[headerblockindex],
1572 &carveinfo, STARTSTOPCARVE);
1573 }
1574 else {
1575 // header/footer will appear in different buffers, add carveinfo to
1576 // stop and start lists...
1577 add_to_queue(&carvelists[headerblockindex], &carveinfo, STARTCARVE);
1578 add_to_queue(&carvelists[footerblockindex], &carveinfo, STOPCARVE);
1579 // .. and to all lists in between (these will result in a full
1580 // SIZE_OF_BUFFER bytes being carved into the file).
1581 for(j = (long long)headerblockindex + 1; j < (long long)footerblockindex; j++) {
1582 add_to_queue(&carvelists[j], &carveinfo, CONTINUECARVE);
1583 }
1584 }
1585 }
1586 }
1587 }
1588
1589 fprintf(stdout, "Work queues built. Workload:\n");
1590 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1591 currentneedle = &(state->SearchSpec[needlenum]);
1592 fprintf(stdout, "%s with header \"", currentneedle->suffix);
1593 fprintf(stdout, "%s", currentneedle->begintext);
1594 fprintf(stdout, "\" and footer \"");
1595 if(currentneedle->end == 0) {
1596 fprintf(stdout, "NONE");
1597 }
1598 else {
1599 fprintf(stdout, "%s", currentneedle->endtext);
1600 }
1601 #ifdef _WIN32
1602 fprintf(stdout, "\" --> %I64u files\n", currentneedle->numfilestocarve);
1603 #else
1604 fprintf(stdout, "\" --> %llu files\n", currentneedle->numfilestocarve);
1605 #endif
1606
1607 }
1608
1609 if(state->previewMode) {
1610 fprintf(stdout, "** PREVIEW MODE: GENERATING AUDIT LOG ONLY **\n");
1611 fprintf(stdout, "** NO CARVED FILES WILL BE WRITTEN **\n");
1612 }
1613
1614 fprintf(stdout, "Carving files from image.\n");
1615 fprintf(stdout, "Image file pass 2/2.\n");
1616
1617 // now read image file in SIZE_OF_BUFFER-sized windows, writing
1618 // carved files to output directory
1619
1620 success = 1;
1621 while (success) {
1622
1623 unsigned long long biglseek = 0L;
1624 // goal: skip reading buffers for which there is no work to do by using one big
1625 // seek
1626 fileposition = ftello_use_coverage_map(state, infile);
1627
1628 while (queue_length(&carvelists[fileposition / SIZE_OF_BUFFER]) == 0
1629 && success) {
1630 biglseek += SIZE_OF_BUFFER;
1631 fileposition += SIZE_OF_BUFFER;
1632 success = fileposition <= filesize;
1633
1634 }
1635
1636 if(success && biglseek) {
1637 fseeko_use_coverage_map(state, infile, biglseek);
1638 }
1639
1640 if(!success) {
1641 // not an error--just means we've exhausted the image file--show
1642 // progress report then quit carving
1643 displayPosition(&displayUnits, filesize, filesize, state->imagefile);
1644
1645 continue;
1646 }
1647
1648 if(!state->previewMode) {
1649 bytesread =
1650 fread_use_coverage_map(state, readbuffer, 1, SIZE_OF_BUFFER, infile);
1651 // Check for read errors
1652 if((err = ferror(infile))) {
1653 return SCALPEL_ERROR_FILE_READ;
1654 }
1655 else if(bytesread == 0) {
1656 // no error, but image file exhausted
1657 success = 0;
1658 continue;
1659 }
1660 }
1661 else {
1662 // in preview mode, seeks are used in the 2nd pass instead of
1663 // reads. This isn't optimal, but it's fast enough and avoids
1664 // complicating the file carving code further.
1665
1666 fileposition = ftello_use_coverage_map(state, infile);
1667 fseeko_use_coverage_map(state, infile, SIZE_OF_BUFFER);
1668 bytesread = ftello_use_coverage_map(state, infile) - fileposition;
1669
1670 // Check for errors
1671 if((err = ferror(infile))) {
1672 return SCALPEL_ERROR_FILE_READ;
1673 }
1674 else if(bytesread == 0) {
1675 // no error, but image file exhausted
1676 success = 0;
1677 continue;
1678 }
1679 }
1680
1681 success = 1;
1682
1683 // progress report needs real file position
1684 fileposition = ftello(infile);
1685 displayPosition(&displayUnits, fileposition - filebegin,
1686 filesize, state->imagefile);
1687
1688 // if using coverage map for carving, need adjusted file position
1689 fileposition = ftello_use_coverage_map(state, infile);
1690
1691 // signal check
1692 if(signal_caught == SIGTERM || signal_caught == SIGINT) {
1693 clean_up(state, signal_caught);
1694 }
1695
1696 // deal with work for this SIZE_OF_BUFFER-sized block by
1697 // examining the associated queue
1698 rewind_queue(&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER]);
1699
1700 while (!end_of_queue
1701 (&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER])) {
1702 struct CarveInfo *carve;
1703 int operation;
1704 unsigned long long bytestowrite = 0, byteswritten = 0, offset = 0;
1705
1706 peek_at_current(&carvelists
1707 [(fileposition - bytesread) / SIZE_OF_BUFFER], &carve);
1708 operation =
1709 current_priority(&carvelists
1710 [(fileposition - bytesread) / SIZE_OF_BUFFER]);
1711
1712 // open file, if beginning of carve operation or file had to be closed
1713 // previously due to resource limitations
1714 if(operation == STARTSTOPCARVE ||
1715 operation == STARTCARVE || carve->fp == 0) {
1716
1717 if(!state->previewMode && state->modeVerbose) {
1718 fprintf(stdout, "OPENING %s\n", carve->filename);
1719 }
1720
1721 carve->fp = (FILE *) 1;
1722 if(!state->previewMode) {
1723 carve->fp = fopen(carve->filename, "ab");
1724 }
1725
1726 if(!carve->fp) {
1727 fprintf(stderr, "Error opening file: %s -- %s\n",
1728 carve->filename, strerror(errno));
1729 fprintf(state->auditFile, "Error opening file: %s -- %s\n",
1730 carve->filename, strerror(errno));
1731 return SCALPEL_ERROR_FILE_WRITE;
1732 }
1733 else {
1734 CURRENTFILESOPEN++;
1735 }
1736 }
1737
1738 // write some portion of current readbuffer
1739 switch (operation) {
1740 case CONTINUECARVE:
1741 offset = 0;
1742 bytestowrite = SIZE_OF_BUFFER;
1743 break;
1744 case STARTSTOPCARVE:
1745 offset = carve->start - (fileposition - bytesread);
1746 bytestowrite = carve->stop - carve->start + 1;
1747 break;
1748 case STARTCARVE:
1749 offset = carve->start - (fileposition - bytesread);
1750 bytestowrite = (carve->stop - carve->start + 1) >
1751 (SIZE_OF_BUFFER - offset) ? (SIZE_OF_BUFFER - offset) :
1752 (carve->stop - carve->start + 1);
1753 break;
1754 case STOPCARVE:
1755 offset = 0;
1756 bytestowrite = carve->stop - (fileposition - bytesread) + 1;
1757 break;
1758 }
1759
1760 if(!state->previewMode) {
1761 // struct timeval writenow, writethen;
1762 // gettimeofday(&writethen, 0);
1763 if((byteswritten = fwrite(readbuffer + offset,
1764 sizeof(char),
1765 bytestowrite, carve->fp)) != bytestowrite) {
1766
1767 fprintf(stderr, "Error writing to file: %s -- %s\n",
1768 carve->filename, strerror(ferror(carve->fp)));
1769 fprintf(state->auditFile,
1770 "Error writing to file: %s -- %s\n",
1771 carve->filename, strerror(ferror(carve->fp)));
1772 return SCALPEL_ERROR_FILE_WRITE;
1773 }
1774 }
1775
1776 // close file, if necessary. Always do it on STARTSTOPCARVE and
1777 // STOPCARVE, but also do it if we have a large number of files
1778 // open, otherwise we'll run out of available file handles. Updating the
1779 // coverage blockmap and auditing is done here, when a file being carved
1780 // is closed for the last time.
1781 if(operation == STARTSTOPCARVE ||
1782 operation == STOPCARVE || CURRENTFILESOPEN > MAX_FILES_TO_OPEN) {
1783 err = 0;
1784 if(!state->previewMode) {
1785 if(state->modeVerbose) {
1786 fprintf(stdout, "CLOSING %s\n", carve->filename);
1787 }
1788 err = fclose(carve->fp);
1789 }
1790
1791 if(err) {
1792 fprintf(stderr, "Error closing file: %s -- %s\n\n",
1793 carve->filename, strerror(ferror(carve->fp)));
1794 fprintf(state->auditFile,
1795 "Error closing file: %s -- %s\n\n",
1796 carve->filename, strerror(ferror(carve->fp)));
1797 return SCALPEL_ERROR_FILE_WRITE;
1798 }
1799 else {
1800 CURRENTFILESOPEN--;
1801 carve->fp = 0;
1802
1803 // release filename buffer if it won't be needed again. Don't release it
1804 // if the file was closed only because a large number of files are currently
1805 // open!
1806 if(operation == STARTSTOPCARVE || operation == STOPCARVE) {
1807 auditUpdateCoverageBlockmap(state, carve);
1808 free(carve->filename);
1809 }
1810 }
1811 }
1812 next_element(&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER]);
1813 }
1814 }
1815
1816 // closeFile(infile);
1817 fclose(infile);
1818
1819 // write header/footer database, if necessary, before
1820 // cleanup for current image file.
1821
1822 if(state->generateHeaderFooterDatabase) {
1823 if((err = writeHeaderFooterDatabase(state)) != SCALPEL_OK) {
1824 return err;
1825 }
1826 }
1827
1828 // tear down coverage maps, if necessary
1829 destroyCoverageMaps(state);
1830
1831 printf("Processing of image file complete. Cleaning up...\n");
1832
1833 // tear down header/footer databases
1834
1835 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1836 currentneedle = &(state->SearchSpec[needlenum]);
1837 if(currentneedle->offsets.headers) {
1838 free(currentneedle->offsets.headers);
1839 }
1840 if(currentneedle->offsets.footers) {
1841 free(currentneedle->offsets.footers);
1842 }
1843 currentneedle->offsets.headers = 0;
1844 currentneedle->offsets.footers = 0;
1845 currentneedle->offsets.numheaders = 0;
1846 currentneedle->offsets.numfooters = 0;
1847 currentneedle->offsets.headerstorage = 0;
1848 currentneedle->offsets.footerstorage = 0;
1849 currentneedle->numfilestocarve = 0;
1850 }
1851
1852 // tear down work queues--no memory deallocation for each queue
1853 // entry required, because memory associated with fp and the
1854 // filename was freed after the carved file was closed.
1855
1856 // destroy queues
1857 for(i = 0; i < 2 + (filesize / SIZE_OF_BUFFER); i++) {
1858 destroy_queue(&carvelists[i]);
1859 }
1860 // destroy array of queues
1861 free(carvelists);
1862
1863 printf("Done.");
1864 return SCALPEL_OK;
1865 }
1866
1867
1868
1869
1870
1871
1872 // The coverage blockmap marks which blocks (of a user-specified size)
1873 // have been "covered" by a carved file. If the coverage blockmap
1874 // is to be modified, check to see if it exists. If it does, then
1875 // open it and set the file handle in the Scalpel state. If it
1876 // doesn't, create a zeroed copy and set the file handle in the
1877 // Scalpel state. If the coverage blockmap is guiding carving, then
1878 // create the coverage bitmap and initialize it using the coverage
1879 // blockmap file. The difference between the coverage blockmap (on
1880 // disk) and the coverage bitmap (in memory) is that the blockmap
1881 // counts carved files that cover a block. The coverage bitmap only
1882 // indicates if ANY carved file covers a block. 'filesize' is the
1883 // size of the image file being examined.
1884
1885 static int
setupCoverageMaps(struct scalpelState * state,unsigned long long filesize)1886 setupCoverageMaps(struct scalpelState *state, unsigned long long filesize) {
1887
1888 char fn[MAX_STRING_LENGTH]; // filename for coverage blockmap
1889 unsigned long long i, k;
1890 int empty;
1891 unsigned int blocksize, entry;
1892
1893
1894 state->coveragebitmap = 0;
1895 state->coverageblockmap = 0;
1896
1897 if(!state->useCoverageBlockmap && !state->updateCoverageBlockmap) {
1898 return SCALPEL_OK;
1899 }
1900
1901 fprintf(stdout, "Setting up coverage blockmap.\n");
1902 // generate pathname for coverage blockmap
1903 snprintf(fn, MAX_STRING_LENGTH, "%s", state->coveragefile);
1904
1905 fprintf(stdout, "Coverage blockmap is \"%s\".\n", fn);
1906
1907 empty = ((state->coverageblockmap = fopen(fn, "rb")) == NULL);
1908 fprintf(stdout, "Coverage blockmap file is %s.\n",
1909 (empty ? "EMPTY" : "NOT EMPTY"));
1910
1911 if(!empty) {
1912 #ifdef _WIN32
1913 // set binary mode for Win32
1914 setmode(fileno(state->coverageblockmap), O_BINARY);
1915 #endif
1916 #ifdef __linux
1917 fcntl(fileno(state->coverageblockmap), F_SETFL, O_LARGEFILE);
1918 #endif
1919
1920 fprintf(stdout, "Reading blocksize from coverage blockmap file.\n");
1921
1922 // read block size and make sure it matches user-specified block size
1923 if(fread(&blocksize, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
1924 fprintf(stderr,
1925 "Error reading coverage blockmap blocksize in\ncoverage blockmap file: %s\n",
1926 fn);
1927 fprintf(state->auditFile,
1928 "Error reading coverage blockmap blocksize in\ncoverage blockmap file: %s\n",
1929 fn);
1930 return SCALPEL_ERROR_FATAL_READ;
1931 }
1932
1933 if(state->coverageblocksize != 0 && blocksize != state->coverageblocksize) {
1934 fprintf(stderr,
1935 "User-specified blocksize does not match blocksize in\ncoverage blockmap file: %s; aborting.\n",
1936 fn);
1937 fprintf(state->auditFile,
1938 "User-specified blocksize does not match blocksize in\ncoverage blockmap file: %s\n",
1939 fn);
1940 return SCALPEL_GENERAL_ABORT;
1941 }
1942
1943 state->coverageblocksize = blocksize;
1944 fprintf(stdout, "Blocksize for coverage blockmap is %u.\n",
1945 state->coverageblocksize);
1946
1947 state->coveragenumblocks =
1948 (unsigned long
1949 long)(ceil((double)filesize / (double)state->coverageblocksize));
1950 #ifdef _WIN32
1951 fprintf(stdout, "# of blocks in coverage blockmap is %I64u.\n",
1952 state->coveragenumblocks);
1953 #else
1954 fprintf(stdout, "# of blocks in coverage blockmap is %llu.\n",
1955 state->coveragenumblocks);
1956 #endif
1957
1958 fprintf(stdout, "Allocating and clearing in-core coverage bitmap.\n");
1959 // for bitmap, 8 bits per unsigned char, with each bit representing one
1960 // block
1961 state->coveragebitmap =
1962 (unsigned char *)malloc((state->coveragenumblocks / 8) *
1963 sizeof(unsigned char));
1964 checkMemoryAllocation(state, state->coveragebitmap, __LINE__, __FILE__,
1965 "coveragebitmap");
1966
1967 // zap coverage bitmap
1968 for(k = 0; k < state->coveragenumblocks / 8; k++) {
1969 state->coveragebitmap[k] = 0;
1970 }
1971
1972 fprintf(stdout,
1973 "Reading existing coverage blockmap...this may take a while.\n");
1974
1975 for(i = 0; i < state->coveragenumblocks; i++) {
1976 fseeko(state->coverageblockmap, (i + 1) * sizeof(unsigned int), SEEK_SET);
1977 if(fread(&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
1978 fprintf(stderr,
1979 "Error reading coverage blockmap entry (blockmap truncated?): %s\n",
1980 fn);
1981 fprintf(state->auditFile,
1982 "Error reading coverage blockmap entry (blockmap truncated?): %s\n",
1983 fn);
1984 return SCALPEL_ERROR_FATAL_READ;
1985 }
1986 if(entry) {
1987 state->coveragebitmap[i / 8] |= 1 << (i % 8);
1988 }
1989 }
1990 }
1991 else if(empty && state->useCoverageBlockmap && !state->updateCoverageBlockmap) {
1992 fprintf(stderr,
1993 "-u option requires that the blockmap file %s exist.\n", fn);
1994 fprintf(state->auditFile,
1995 "-u option requires that the blockmap file %s exist.\n", fn);
1996 return SCALPEL_GENERAL_ABORT;
1997 }
1998 else {
1999 // empty coverage blockmap
2000 if(state->coverageblocksize == 0) { // user didn't override default
2001 state->coverageblocksize = 512;
2002 }
2003 fprintf(stdout, "Blocksize for coverage blockmap is %u.\n",
2004 state->coverageblocksize);
2005 state->coveragenumblocks =
2006 (unsigned long long)ceil((double)filesize /
2007 (double)state->coverageblocksize);
2008 #ifdef _WIN32
2009 fprintf(stdout, "# of blocks in coverage blockmap is %I64u.\n",
2010 state->coveragenumblocks);
2011 #else
2012 fprintf(stdout, "# of blocks in coverage blockmap is %llu.\n",
2013 state->coveragenumblocks);
2014 #endif
2015
2016 fprintf(stdout, "Allocating and clearing in-core coverage bitmap.\n");
2017 // for bitmap, 8 bits per unsigned char, with each bit representing one
2018 // block
2019 state->coveragebitmap =
2020 (unsigned char *)malloc((state->coveragenumblocks / 8) *
2021 sizeof(unsigned char));
2022 checkMemoryAllocation(state, state->coveragebitmap, __LINE__, __FILE__,
2023 "coveragebitmap");
2024
2025 // zap coverage bitmap
2026 for(k = 0; k < state->coveragenumblocks / 8; k++) {
2027 state->coveragebitmap[k] = 0;
2028 }
2029 }
2030
2031 // change mode to read/write for future updates if coverage blockmap will be updated
2032 if(state->updateCoverageBlockmap) {
2033 if(state->modeVerbose) {
2034 fprintf(stdout, "Changing mode of coverage blockmap file to R/W.\n");
2035 }
2036
2037 if(!empty) {
2038 fclose(state->coverageblockmap);
2039 }
2040 if((state->coverageblockmap = fopen(fn, (empty ? "w+b" : "r+b"))) == NULL) {
2041 fprintf(stderr, "Error writing to coverage blockmap file: %s\n", fn);
2042 fprintf(state->auditFile,
2043 "Error writing to coverage blockmap file: %s\n", fn);
2044 return SCALPEL_ERROR_FILE_WRITE;
2045 }
2046
2047 #ifdef _WIN32
2048 // set binary mode for Win32
2049 setmode(fileno(state->coverageblockmap), O_BINARY);
2050 #endif
2051 #ifdef __linux
2052 fcntl(fileno(state->coverageblockmap), F_SETFL, O_LARGEFILE);
2053 #endif
2054
2055 if(empty) {
2056 // create entries in empty coverage blockmap file
2057 fprintf(stdout,
2058 "Writing empty coverage blockmap...this may take a while.\n");
2059 entry = 0;
2060 if(fwrite
2061 (&(state->coverageblocksize), sizeof(unsigned int), 1,
2062 state->coverageblockmap) != 1) {
2063 fprintf(stderr,
2064 "Error writing initial entry in coverage blockmap file!\n");
2065 fprintf(state->auditFile,
2066 "Error writing initial entry in coverage blockmap file!\n");
2067 return SCALPEL_ERROR_FILE_WRITE;
2068 }
2069 for(k = 0; k < state->coveragenumblocks; k++) {
2070 if(fwrite
2071 (&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
2072 fprintf(stderr, "Error writing to coverage blockmap file!\n");
2073 fprintf(state->auditFile,
2074 "Error writing to coverage blockmap file!\n");
2075 return SCALPEL_ERROR_FILE_WRITE;
2076 }
2077 }
2078 }
2079 }
2080
2081 fprintf(stdout, "Finished setting up coverage blockmap.\n");
2082
2083 return SCALPEL_OK;
2084
2085 }
2086
2087 // map carve->start ... carve->stop into a queue of 'fragments' that
2088 // define a carved file in the disk image.
2089 static void
generateFragments(struct scalpelState * state,Queue * fragments,CarveInfo * carve)2090 generateFragments(struct scalpelState *state, Queue * fragments,
2091 CarveInfo * carve) {
2092
2093 unsigned long long curblock, neededbytes =
2094 carve->stop - carve->start + 1, bytestoskip, morebytes, totalbytes =
2095 0, curpos;
2096
2097 Fragment frag;
2098
2099
2100 init_queue(fragments, sizeof(struct Fragment), TRUE, 0, TRUE);
2101
2102 if(!state->useCoverageBlockmap) {
2103 // no translation necessary
2104 frag.start = carve->start;
2105 frag.stop = carve->stop;
2106 add_to_queue(fragments, &frag, 0);
2107 return;
2108 }
2109 else {
2110 curpos = positionUseCoverageBlockmap(state, carve->start);
2111 curblock = curpos / state->coverageblocksize;
2112
2113 while (totalbytes < neededbytes && curblock < state->coveragenumblocks) {
2114
2115 morebytes = 0;
2116 bytestoskip = 0;
2117
2118 // skip covered blocks
2119 while (curblock < state->coveragenumblocks &&
2120 (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2121 bytestoskip += state->coverageblocksize -
2122 curpos % state->coverageblocksize;
2123 curblock++;
2124 }
2125
2126 curpos += bytestoskip;
2127
2128 // accumulate uncovered blocks in fragment
2129 while (curblock < state->coveragenumblocks &&
2130 ((state->
2131 coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2132 && totalbytes + morebytes < neededbytes) {
2133
2134 morebytes += state->coverageblocksize -
2135 curpos % state->coverageblocksize;
2136
2137 curblock++;
2138 }
2139
2140 // cap size
2141 if(totalbytes + morebytes > neededbytes) {
2142 morebytes = neededbytes - totalbytes;
2143 }
2144
2145 frag.start = curpos;
2146 curpos += morebytes;
2147 frag.stop = curpos - 1;
2148 totalbytes += morebytes;
2149
2150 add_to_queue(fragments, &frag, 0);
2151 }
2152 }
2153 }
2154
2155
2156 // If the coverage blockmap is used to guide carving, then use the
2157 // coverage blockmap to map a logical index in the disk image (i.e.,
2158 // the index skips covered blocks) to an actual disk image index. If
2159 // the coverage blockmap isn't being used, just returns the second
2160 // argument.
2161 //
2162 // ***This function assumes that the 'position' does NOT lie
2163 // within a covered block! ***
2164 static unsigned long long
positionUseCoverageBlockmap(struct scalpelState * state,unsigned long long position)2165 positionUseCoverageBlockmap(struct scalpelState *state,
2166 unsigned long long position) {
2167
2168
2169 unsigned long long totalbytes = 0, neededbytes = position,
2170 morebytes, curblock = 0, curpos = 0, bytestoskip;
2171
2172 if(!state->useCoverageBlockmap) {
2173 return position;
2174 }
2175 else {
2176 while (totalbytes < neededbytes && curblock < state->coveragenumblocks) {
2177 morebytes = 0;
2178 bytestoskip = 0;
2179
2180 // skip covered blocks
2181 while (curblock < state->coveragenumblocks &&
2182 (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2183 bytestoskip += state->coverageblocksize -
2184 curpos % state->coverageblocksize;
2185 curblock++;
2186 }
2187
2188 curpos += bytestoskip;
2189
2190 // accumulate uncovered blocks
2191 while (curblock < state->coveragenumblocks &&
2192 ((state->
2193 coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2194 && totalbytes + morebytes < neededbytes) {
2195
2196 morebytes += state->coverageblocksize -
2197 curpos % state->coverageblocksize;
2198 curblock++;
2199 }
2200
2201 // cap size
2202 if(totalbytes + morebytes > neededbytes) {
2203 morebytes = neededbytes - totalbytes;
2204 }
2205
2206 curpos += morebytes;
2207 totalbytes += morebytes;
2208 }
2209
2210 return curpos;
2211 }
2212 }
2213
2214
2215
2216 // update the coverage blockmap for a carved file (if appropriate) and write entries into
2217 // the audit log describing the carved file. If the file is fragmented, then multiple
2218 // lines are written to indicate where the fragments occur.
2219 static int
auditUpdateCoverageBlockmap(struct scalpelState * state,struct CarveInfo * carve)2220 auditUpdateCoverageBlockmap(struct scalpelState *state,
2221 struct CarveInfo *carve) {
2222
2223 struct Queue fragments;
2224 Fragment *frag;
2225 int err;
2226 unsigned long long k;
2227
2228 // If the coverage blockmap used to guide carving, then carve->start and
2229 // carve->stop may not correspond to addresses in the disk image--the coverage blockmap
2230 // processing layer in Scalpel may have skipped "in use" blocks. Transform carve->start
2231 // and carve->stop into a list of fragments that contain real disk image offsets.
2232 generateFragments(state, &fragments, carve);
2233
2234 rewind_queue(&fragments);
2235 while (!end_of_queue(&fragments)) {
2236 frag = (Fragment *) pointer_to_current(&fragments);
2237 fprintf(state->auditFile, "%s", base_name(carve->filename));
2238 #ifdef _WIN32
2239 fprintf(state->auditFile, "%13I64u\t\t", frag->start);
2240 #else
2241 fprintf(state->auditFile, "%13llu\t\t", frag->start);
2242 #endif
2243
2244 fprintf(state->auditFile, "%3s", carve->chopped ? "YES " : "NO ");
2245
2246 #ifdef _WIN32
2247 fprintf(state->auditFile, "%13I64u\t\t", frag->stop - frag->start + 1);
2248 #else
2249 fprintf(state->auditFile, "%13llu\t\t", frag->stop - frag->start + 1);
2250 #endif
2251
2252 fprintf(state->auditFile, "%s\n", base_name(state->imagefile));
2253
2254 fflush(state->auditFile);
2255
2256 // update coverage blockmap, if appropriate
2257 if(state->updateCoverageBlockmap) {
2258 for(k = frag->start / state->coverageblocksize;
2259 k <= frag->stop / state->coverageblocksize; k++) {
2260 if((err = updateCoverageBlockmap(state, k)) != SCALPEL_OK) {
2261 destroy_queue(&fragments);
2262 return err;
2263 }
2264 }
2265 }
2266 next_element(&fragments);
2267 }
2268
2269 destroy_queue(&fragments);
2270
2271 return SCALPEL_OK;
2272 }
2273
2274
2275
2276 static int
updateCoverageBlockmap(struct scalpelState * state,unsigned long long block)2277 updateCoverageBlockmap(struct scalpelState *state, unsigned long long block) {
2278
2279 unsigned int entry;
2280
2281 if(state->updateCoverageBlockmap) {
2282 // first entry in file is block size, so seek one unsigned int further
2283 fseeko(state->coverageblockmap, (block + 1) * sizeof(unsigned int),
2284 SEEK_SET);
2285 if(fread(&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
2286 fprintf(stderr, "Error reading coverage blockmap entry!\n");
2287 fprintf(state->auditFile, "Error reading coverage blockmap entry!\n");
2288 return SCALPEL_ERROR_FATAL_READ;
2289 }
2290 entry++;
2291 // first entry in file is block size, so seek one unsigned int further
2292 fseeko(state->coverageblockmap, (block + 1) * sizeof(unsigned int),
2293 SEEK_SET);
2294 if(fwrite(&entry, sizeof(unsigned int), 1, state->coverageblockmap)
2295 != 1) {
2296 fprintf(stderr, "Error writing to coverage blockmap file!\n");
2297 fprintf(state->auditFile, "Error writing to coverage blockmap file!\n");
2298 return SCALPEL_ERROR_FILE_WRITE;
2299 }
2300 }
2301
2302 return SCALPEL_OK;
2303 }
2304
2305
2306
destroyCoverageMaps(struct scalpelState * state)2307 static void destroyCoverageMaps(struct scalpelState *state) {
2308
2309 // free memory associated with coverage bitmap, close coverage blockmap file
2310
2311 if(state->coveragebitmap) {
2312 free(state->coveragebitmap);
2313 }
2314
2315 if(state->useCoverageBlockmap || state->updateCoverageBlockmap) {
2316 fclose(state->coverageblockmap);
2317 }
2318 }
2319
2320
2321 // simple wrapper for fseeko with SEEK_CUR semantics that uses the
2322 // coverage bitmap to skip over covered blocks, IF the coverage
2323 // blockmap is being used. The offset is adjusted so that covered
2324 // blocks are silently skipped when seeking if the coverage blockmap
2325 // is used, otherwise an fseeko() with an umodified offset is
2326 // performed.
2327 static int
fseeko_use_coverage_map(struct scalpelState * state,FILE * fp,off64_t offset)2328 fseeko_use_coverage_map(struct scalpelState *state, FILE * fp, off64_t offset) {
2329
2330 off64_t currentpos;
2331
2332 // BUG TEST: revision 5 changed curbloc to unsigned, removed all tests for
2333 // curblock >= 0 from each of next three while loops
2334 // we should put back guards to make sure we don't underflow
2335 // what did I break? anything? maybe not?
2336
2337 unsigned long long curblock, bytestoskip, bytestokeep, totalbytes = 0;
2338 int sign;
2339
2340 if(state->useCoverageBlockmap) {
2341 currentpos = ftello(fp);
2342 sign = (offset > 0 ? 1 : -1);
2343
2344 curblock = currentpos / state->coverageblocksize;
2345
2346 while (totalbytes < (offset > 0 ? offset : offset * -1) &&
2347 curblock < state->coveragenumblocks) {
2348 bytestoskip = 0;
2349
2350 // covered blocks increase offset
2351 while (curblock < state->coveragenumblocks &&
2352 (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2353
2354 bytestoskip += (state->coverageblocksize -
2355 currentpos % state->coverageblocksize);
2356 curblock += sign;
2357 }
2358
2359 offset += (bytestoskip * sign);
2360 currentpos += (bytestoskip * sign);
2361
2362 bytestokeep = 0;
2363
2364 // uncovered blocks don't increase offset
2365 while (curblock < state->coveragenumblocks &&
2366 ((state->
2367 coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2368 && totalbytes < (offset > 0 ? offset : offset * -1)) {
2369
2370 bytestokeep += (state->coverageblocksize -
2371 currentpos % state->coverageblocksize);
2372
2373 curblock += sign;
2374 }
2375
2376 totalbytes += bytestokeep;
2377 currentpos += (bytestokeep * sign);
2378 }
2379 }
2380
2381 return fseeko(fp, offset, SEEK_CUR);
2382 }
2383
2384
2385
2386 // simple wrapper for ftello() that uses the coverage bitmap to
2387 // report the current file position *minus* the contribution of
2388 // marked blocks, IF the coverage blockmap is being used. If a
2389 // coverage blockmap isn't in use, just performs a standard ftello()
2390 // call.
2391 //
2392 // GGRIII: *** This could use optimization, e.g., use of a pre-computed
2393 // table to avoid walking the coverage bitmap on each call.
2394
ftello_use_coverage_map(struct scalpelState * state,FILE * fp)2395 static off64_t ftello_use_coverage_map(struct scalpelState *state, FILE * fp) {
2396
2397 off64_t currentpos, decrease = 0;
2398 unsigned long long endblock, k;
2399
2400 currentpos = ftello(fp);
2401
2402 if(state->useCoverageBlockmap) {
2403 endblock = currentpos / state->coverageblocksize;
2404
2405 // covered blocks don't contribute to current file position
2406 for(k = 0; k <= endblock; k++) {
2407 if(state->coveragebitmap[k / 8] & (1 << (k % 8))) {
2408 decrease += state->coverageblocksize;
2409 }
2410 }
2411
2412 if(state->coveragebitmap[endblock / 8] & (1 << (endblock % 8))) {
2413 decrease += (state->coverageblocksize -
2414 currentpos % state->coverageblocksize);
2415 }
2416
2417 if(state->modeVerbose && state->useCoverageBlockmap) {
2418 #ifdef _WIN32
2419 fprintf(stdout,
2420 "Coverage map decreased current file position by %I64u bytes.\n",
2421 (unsigned long long)decrease);
2422 #else
2423 fprintf(stdout,
2424 "Coverage map decreased current file position by %llu bytes.\n",
2425 (unsigned long long)decrease);
2426 #endif
2427 }
2428 }
2429
2430 return currentpos - decrease;
2431 }
2432
2433
2434
2435 // simple wrapper for fread() that uses the coverage bitmap--the read silently
2436 // skips blocks that are marked covered (corresponding bit in coverage
2437 // bitmap is 1)
2438 static size_t
fread_use_coverage_map(struct scalpelState * state,void * ptr,size_t size,size_t nmemb,FILE * stream)2439 fread_use_coverage_map(struct scalpelState *state, void *ptr,
2440 size_t size, size_t nmemb, FILE * stream) {
2441
2442 unsigned long long curblock, neededbytes = nmemb * size, bytestoskip,
2443 bytestoread, bytesread, totalbytesread = 0, curpos;
2444 int shortread;
2445 // gettimeofday_t readnow, readthen;
2446
2447 // gettimeofday(&readthen, 0);
2448
2449 if(state->useCoverageBlockmap) {
2450 if(state->modeVerbose) {
2451 #ifdef _WIN32
2452 fprintf(stdout,
2453 "Issuing coverage map-based READ, wants %I64u bytes.\n",
2454 neededbytes);
2455 #else
2456 fprintf(stdout,
2457 "Issuing coverage map-based READ, wants %llu bytes.\n",
2458 neededbytes);
2459 #endif
2460 }
2461
2462 curpos = ftello(stream);
2463 curblock = curpos / state->coverageblocksize;
2464 shortread = 0;
2465
2466 while (totalbytesread < neededbytes
2467 && curblock < state->coveragenumblocks && !shortread) {
2468 bytestoread = 0;
2469 bytestoskip = 0;
2470
2471 // skip covered blocks
2472 while (curblock < state->coveragenumblocks &&
2473 (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2474 bytestoskip += (state->coverageblocksize -
2475 curpos % state->coverageblocksize);
2476 curblock++;
2477 }
2478
2479 curpos += bytestoskip;
2480
2481
2482 if(state->modeVerbose) {
2483 #ifdef _WIN32
2484 fprintf(stdout,
2485 "fread using coverage map to skip %I64u bytes.\n", bytestoskip);
2486 #else
2487 fprintf(stdout,
2488 "fread using coverage map to skip %llu bytes.\n", bytestoskip);
2489 #endif
2490 }
2491
2492 fseeko(stream, (off64_t) bytestoskip, SEEK_CUR);
2493
2494 // accumulate uncovered blocks for read
2495 while (curblock < state->coveragenumblocks &&
2496 ((state->
2497 coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2498 && totalbytesread + bytestoread <= neededbytes) {
2499
2500 bytestoread += (state->coverageblocksize -
2501 curpos % state->coverageblocksize);
2502
2503 curblock++;
2504 }
2505
2506 // cap read size
2507 if(totalbytesread + bytestoread > neededbytes) {
2508 bytestoread = neededbytes - totalbytesread;
2509 }
2510
2511
2512 if(state->modeVerbose) {
2513 #ifdef _WIN32
2514 fprintf(stdout,
2515 "fread using coverage map found %I64u consecutive bytes.\n",
2516 bytestoread);
2517 #else
2518 fprintf(stdout,
2519 "fread using coverage map found %llu consecutive bytes.\n",
2520 bytestoread);
2521 #endif
2522 }
2523
2524 if((bytesread =
2525 fread((char *)ptr + totalbytesread, 1, (size_t) bytestoread,
2526 stream)) < bytestoread) {
2527 shortread = 1;
2528 }
2529
2530 totalbytesread += bytesread;
2531 curpos += bytestoread;
2532
2533 if(state->modeVerbose) {
2534 #ifdef _WIN32
2535 fprintf(stdout, "fread using coverage map read %I64u bytes.\n",
2536 bytesread);
2537 #else
2538 fprintf(stdout, "fread using coverage map read %llu bytes.\n",
2539 bytesread);
2540 #endif
2541 }
2542 }
2543
2544 if(state->modeVerbose) {
2545 fprintf(stdout, "Coverage map-based READ complete.\n");
2546 }
2547
2548 // conform with fread() semantics by returning # of items read
2549 return totalbytesread / size;
2550 }
2551 else {
2552 size_t ret = fread(ptr, size, nmemb, stream);
2553 return ret;
2554 }
2555 }
2556
2557 #ifdef MULTICORE_THREADING
2558
2559 // threaded header/footer search
threadedFindAll(void * args)2560 static void *threadedFindAll(void *args) {
2561
2562 int id = ((ThreadFindAllParams *) args)->id;
2563 char *str;
2564 size_t length;
2565 char *startpos;
2566 long offset;
2567 char **foundat;
2568 size_t *foundatlens;
2569 size_t *table = 0;
2570 regex_t *regexp = 0;
2571 int strisRE;
2572 int casesensitive;
2573 int nosearchoverlap;
2574 struct scalpelState *state;
2575
2576 regmatch_t *match;
2577
2578 // wait for work
2579 // sem_wait(&workavailable[id]);
2580
2581 // you need to be holding the workcomplete mutex initially
2582 pthread_mutex_lock(&workcomplete[id]);
2583 pthread_mutex_lock(&workavailable[id]);
2584
2585 while (1) {
2586 // get args that define current workload
2587 str = ((ThreadFindAllParams *) args)->str;
2588 length = ((ThreadFindAllParams *) args)->length;
2589 startpos = ((ThreadFindAllParams *) args)->startpos;
2590 offset = ((ThreadFindAllParams *) args)->offset;
2591 foundat = ((ThreadFindAllParams *) args)->foundat;
2592 foundatlens = ((ThreadFindAllParams *) args)->foundatlens;
2593 strisRE = ((ThreadFindAllParams *) args)->strisRE;
2594 if(strisRE) {
2595 regexp = ((ThreadFindAllParams *) args)->regex;
2596 }
2597 else {
2598 table = ((ThreadFindAllParams *) args)->table;
2599 }
2600 casesensitive = ((ThreadFindAllParams *) args)->casesensitive;
2601 nosearchoverlap = ((ThreadFindAllParams *) args)->nosearchoverlap;
2602 state = ((ThreadFindAllParams *) args)->state;
2603
2604 if(state->modeVerbose) {
2605 printf("needle search thread # %d awake.\n", id);
2606 }
2607
2608 while (startpos) {
2609 if(!strisRE) {
2610 startpos = bm_needleinhaystack(str,
2611 length,
2612 startpos,
2613 offset - (long)startpos,
2614 table, casesensitive);
2615 }
2616 else {
2617 //printf("Before regexp search, startpos = %p\n", startpos);
2618 match = re_needleinhaystack(regexp, startpos, offset - (long)startpos);
2619 if(!match) {
2620 startpos = 0;
2621 }
2622 else {
2623 startpos = match->rm_so + startpos;
2624 length = match->rm_eo - match->rm_so;
2625 free(match);
2626 //printf("After regexp search, startpos = %p\n", startpos);
2627 }
2628 }
2629
2630 if(startpos) {
2631 // remember match location
2632 foundat[(long)(foundat[MAX_MATCHES_PER_BUFFER])] = startpos;
2633 foundatlens[(long)(foundat[MAX_MATCHES_PER_BUFFER])] = length;
2634 foundat[MAX_MATCHES_PER_BUFFER]++;
2635
2636 // move past match position. Foremost 0.69 didn't find overlapping
2637 // headers/footers. If you need that behavior, specify "-r" on the
2638 // command line. Scalpel's default behavior is to find overlapping
2639 // headers/footers.
2640
2641 if(nosearchoverlap) {
2642 startpos += length;
2643 }
2644 else {
2645 startpos++;
2646 }
2647 }
2648 }
2649
2650 if(state->modeVerbose) {
2651 printf("needle search thread # %d asleep.\n", id);
2652 }
2653
2654 // signal completion of work
2655 // sem_post(&workcomplete[id]);
2656 pthread_mutex_unlock(&workcomplete[id]);
2657
2658 // wait for more work
2659 // sem_wait(&workavailable[id]);
2660 pthread_mutex_lock(&workavailable[id]);
2661
2662 }
2663
2664 return 0;
2665
2666 }
2667
2668 #endif
2669
2670
2671 // Buffers for reading image in and holding gpu results.
2672 // The ourCudaMallocHost call MUST be executed by the
2673 // gpu_handler thread.
init_store()2674 void init_store() {
2675
2676 // 3 queues:
2677 // inque filled by reader, emptied by gpu_handler
2678 // outque filled by gpu_handler, emptied by host thread
2679 // freequeue filled by host, emptied by reader
2680
2681 // the queues hold pointers to full and empty SIZE_OF_BUFFER read buffers
2682 full_readbuf = syncqueue_init("full_readbuf", QUEUELEN);
2683 empty_readbuf = syncqueue_init("empty_readbuf", QUEUELEN);
2684 #ifdef GPU_THREADING
2685 results_readbuf = syncqueue_init("results_readbuf", QUEUELEN);
2686 #endif
2687
2688 // backing store of actual buffers pointed to above, along with some
2689 // necessary bookeeping info
2690 readbuf_info *readbuf_store;
2691 if((readbuf_store =
2692 (readbuf_info *)malloc(QUEUELEN * sizeof(readbuf_info))) == 0) {
2693 fprintf(stderr, (char *)"malloc %lu failed in streaming reader\n",
2694 (unsigned long)QUEUELEN * sizeof(readbuf_info));
2695 }
2696
2697 // initialization
2698 int g;
2699 for(g = 0; g < QUEUELEN; g++) {
2700 readbuf_store[g].bytesread = 0;
2701 readbuf_store[g].beginreadpos = 0;
2702
2703 // for fast gpu operation we need to use the CUDA pinned-memory allocations
2704 #ifdef GPU_THREADING
2705 ourCudaMallocHost((void **)&(readbuf_store[g].readbuf), SIZE_OF_BUFFER);
2706 #else
2707 readbuf_store[g].readbuf = (char *)malloc(SIZE_OF_BUFFER);
2708 #endif
2709
2710 // put pointer to empty but initialized readbuf in the empty que
2711 put(empty_readbuf, (void *)(&readbuf_store[g]));
2712 }
2713 }
2714
2715
2716 // initialize thread-related data structures for either GPU_THREADING or
2717 // MULTICORE_THREADING models
init_threading_model(struct scalpelState * state)2718 int init_threading_model(struct scalpelState *state) {
2719
2720 int i;
2721
2722 #ifdef GPU_THREADING
2723
2724 printf("GPU-based threading model enabled.\n");
2725
2726 #endif
2727
2728 #ifdef MULTICORE_THREADING
2729
2730 printf("Multi-core CPU threading model enabled.\n");
2731 printf("Initializing thread group data structures.\n");
2732
2733 // initialize global data structures for threads
2734 searchthreads = (pthread_t *) malloc(state->specLines * sizeof(pthread_t));
2735 checkMemoryAllocation(state, searchthreads, __LINE__, __FILE__,
2736 "searchthreads");
2737 threadargs =
2738 (ThreadFindAllParams *) malloc(state->specLines *
2739 sizeof(ThreadFindAllParams));
2740 checkMemoryAllocation(state, threadargs, __LINE__, __FILE__, "args");
2741 foundat = (char ***)malloc(state->specLines * sizeof(char *));
2742 checkMemoryAllocation(state, foundat, __LINE__, __FILE__, "foundat");
2743 foundatlens = (size_t **) malloc(state->specLines * sizeof(size_t));
2744 checkMemoryAllocation(state, foundatlens, __LINE__, __FILE__, "foundatlens");
2745 workavailable = (pthread_mutex_t *)malloc(state->specLines * sizeof(pthread_mutex_t));
2746
2747 checkMemoryAllocation(state, workavailable, __LINE__, __FILE__,
2748 "workavailable");
2749 workcomplete = (pthread_mutex_t *)malloc(state->specLines * sizeof(pthread_mutex_t));
2750
2751 checkMemoryAllocation(state, workcomplete, __LINE__, __FILE__,
2752 "workcomplete");
2753
2754 printf("Creating threads...\n");
2755 for(i = 0; i < state->specLines; i++) {
2756 foundat[i] = (char **)malloc((MAX_MATCHES_PER_BUFFER + 1) * sizeof(char *));
2757 checkMemoryAllocation(state, foundat[i], __LINE__, __FILE__, "foundat");
2758 foundatlens[i] = (size_t *) malloc(MAX_MATCHES_PER_BUFFER * sizeof(size_t));
2759 checkMemoryAllocation(state, foundatlens[i], __LINE__, __FILE__,
2760 "foundatlens");
2761 foundat[i][MAX_MATCHES_PER_BUFFER] = 0;
2762
2763 // BUG: NEED PROPER ERROR CODE/MESSAGE FOR MX CREATION FAILURE
2764 if(pthread_mutex_init(&workavailable[i], 0)) {
2765 //return SCALPEL_ERROR_PTHREAD_FAILURE;
2766 fprintf(stderr, "COULDN'T CREATE MUTEX\n");
2767 exit(1);
2768 }
2769
2770 pthread_mutex_lock(&workavailable[i]);
2771
2772 if(pthread_mutex_init(&workcomplete[i], 0)) {
2773 fprintf(stderr, "COULDN'T CREATE MUTEX\n");
2774 exit(1);
2775 }
2776
2777 // create a thread in the thread pool; thread will block on workavailable[]
2778 // semaphore until there's work to do
2779
2780 // FIX: NEED PROPER ERROR CODE/MESSAGE FOR THREAD CREATION FAILURE
2781 threadargs[i].id = i; // thread needs to read id before blocking
2782 if(pthread_create
2783 (&searchthreads[i], NULL, &threadedFindAll, &threadargs[i])) {
2784 //return SCALPEL_ERROR_PTHREAD_FAILURE;
2785 fprintf(stderr, "COULDN'T CREATE THREAD\n");
2786 exit(1);
2787 }
2788 }
2789 printf("Thread creation completed.\n");
2790
2791 #endif
2792
2793 return 0;
2794 }
2795
2796
2797 // write header/footer database for current image file into the
2798 // Scalpel output directory. No information is written into the
2799 // database for file types without a suffix. The filename used
2800 // is the current image filename with ".hfd" appended. The
2801 // format of the database file is straightforward:
2802 //
2803 // suffix_#1 (string)
2804 // number_of_headers (unsigned long long)
2805 // header_pos_#1 (unsigned long long)
2806 // header_pos_#2 (unsigned long long)
2807 // ...
2808 // number_of_footers (unsigned long long)
2809 // footer_pos_#1 (unsigned long long)
2810 // footer_pos_#2 (unsigned long long)
2811 // ...
2812 // suffix_#2 (string)
2813 // number_of_headers (unsigned long long)
2814 // header_pos_#1 (unsigned long long)
2815 // header_pos_#2 (unsigned long long)
2816 // ...
2817 // number_of_footers (unsigned long long)
2818 // footer_pos_#1 (unsigned long long)
2819 // footer_pos_#2 (unsigned long long)
2820 // ...
2821 // ...
2822 //
2823 // If state->useCoverageBlockmap, then translation is required to
2824 // produce real disk image addresses for the generated header/footer
2825 // database file, because the Scalpel carving engine isn't aware of
2826 // gaps created by blocks that are covered by previously carved files.
2827
writeHeaderFooterDatabase(struct scalpelState * state)2828 static int writeHeaderFooterDatabase(struct scalpelState *state) {
2829
2830 FILE *dbfile;
2831 char fn[MAX_STRING_LENGTH]; // filename for header/footer database
2832 int needlenum;
2833 struct SearchSpecLine *currentneedle;
2834 unsigned long long i;
2835
2836 // generate unique name for header/footer database
2837 snprintf(fn, MAX_STRING_LENGTH, "%s/%s.hfd",
2838 state->outputdirectory, base_name(state->imagefile));
2839
2840 if((dbfile = fopen(fn, "w")) == NULL) {
2841 fprintf(stderr, "Error writing to header/footer database file: %s\n", fn);
2842 fprintf(state->auditFile,
2843 "Error writing to header/footer database file: %s\n", fn);
2844 return SCALPEL_ERROR_FILE_WRITE;
2845 }
2846
2847 #ifdef _WIN32
2848 // set binary mode for Win32
2849 setmode(fileno(dbfile), O_BINARY);
2850 #endif
2851 #ifdef __linux
2852 fcntl(fileno(dbfile), F_SETFL, O_LARGEFILE);
2853 #endif
2854
2855 for(needlenum = 0; needlenum < state->specLines; needlenum++) {
2856 currentneedle = &(state->SearchSpec[needlenum]);
2857
2858 if(currentneedle->suffix[0] != SCALPEL_NOEXTENSION) {
2859 // output current suffix
2860 if(fprintf(dbfile, "%s\n", currentneedle->suffix) <= 0) {
2861 fprintf(stderr,
2862 "Error writing to header/footer database file: %s\n", fn);
2863 fprintf(state->auditFile,
2864 "Error writing to header/footer database file: %s\n", fn);
2865 return SCALPEL_ERROR_FILE_WRITE;
2866 }
2867
2868 // # of headers
2869 #ifdef _WIN32
2870 if(fprintf(dbfile, "%I64u\n", currentneedle->offsets.numheaders)
2871 <= 0) {
2872 #else
2873 if(fprintf(dbfile, "%llu\n", currentneedle->offsets.numheaders) <= 0) {
2874 #endif
2875 fprintf(stderr,
2876 "Error writing to header/footer database file: %s\n", fn);
2877 fprintf(state->auditFile,
2878 "Error writing to header/footer database file: %s\n", fn);
2879 return SCALPEL_ERROR_FILE_WRITE;
2880 }
2881
2882 // all header positions for current suffix
2883 for(i = 0; i < currentneedle->offsets.numheaders; i++) {
2884 #ifdef _WIN32
2885 if(fprintf
2886 (dbfile, "%I64u\n",
2887 positionUseCoverageBlockmap(state,
2888 currentneedle->offsets.
2889 headers[i])) <= 0) {
2890 #else
2891 if(fprintf
2892 (dbfile, "%llu\n",
2893 positionUseCoverageBlockmap(state,
2894 currentneedle->offsets.
2895 headers[i])) <= 0) {
2896 #endif
2897 fprintf(stderr,
2898 "Error writing to header/footer database file: %s\n", fn);
2899 fprintf(state->auditFile,
2900 "Error writing to header/footer database file: %s\n", fn);
2901 return SCALPEL_ERROR_FILE_WRITE;
2902 }
2903 }
2904
2905 // # of footers
2906 #ifdef _WIN32
2907 if(fprintf(dbfile, "%I64u\n", currentneedle->offsets.numfooters)
2908 <= 0) {
2909 #else
2910 if(fprintf(dbfile, "%llu\n", currentneedle->offsets.numfooters) <= 0) {
2911 #endif
2912 fprintf(stderr,
2913 "Error writing to header/footer database file: %s\n", fn);
2914 fprintf(state->auditFile,
2915 "Error writing to header/footer database file: %s\n", fn);
2916 return SCALPEL_ERROR_FILE_WRITE;
2917 }
2918
2919 // all footer positions for current suffix
2920 for(i = 0; i < currentneedle->offsets.numfooters; i++) {
2921 #ifdef _WIN32
2922 if(fprintf
2923 (dbfile, "%I64u\n",
2924 positionUseCoverageBlockmap(state,
2925 currentneedle->offsets.
2926 footers[i])) <= 0) {
2927 #else
2928 if(fprintf
2929 (dbfile, "%llu\n",
2930 positionUseCoverageBlockmap(state,
2931 currentneedle->offsets.
2932 footers[i])) <= 0) {
2933 #endif
2934 fprintf(stderr,
2935 "Error writing to header/footer database file: %s\n", fn);
2936 fprintf(state->auditFile,
2937 "Error writing to header/footer database file: %s\n", fn);
2938 return SCALPEL_ERROR_FILE_WRITE;
2939 }
2940 }
2941 }
2942 }
2943 fclose(dbfile);
2944 return SCALPEL_OK;
2945 }
2946
2947