1 // Scalpel Copyright (C) 2005-11 by Golden G. Richard III and
2 // 2007-11 by Vico Marziale.
3 // Written by Golden G. Richard III and Vico Marziale.
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 // General Public License for more details.
14 
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 // 02110-1301, USA.
19 //
20 // Thanks to Kris Kendall, Jesse Kornblum, et al for their work
21 // on Foremost.  Foremost 0.69 was used as the starting point for
22 // Scalpel, in 2005.
23 
24 
25 #include "scalpel.h"
26 
27 
28 /////////// GLOBALS ////////////
29 
30 static char *readbuffer;	// Read buffer--process image files in
31                                 // SIZE_OF_BUFFER-size chunks.
32 
33 // Info needed for each of above SIZE_OF_BUFFER chunks.
34 typedef struct readbuf_info {
35   long long bytesread;		// number of bytes in this buf
36   long long beginreadpos;	// position in the image
37   char *readbuf;		// pointer SIZE_OF_BUFFER array
38 } readbuf_info;
39 
40 
41 // queues to facilitiate async reads, concurrent cpu, gpu work
42 syncqueue_t *full_readbuf;	// que of full buffers read from image
43 syncqueue_t *empty_readbuf;	// que of empty buffers
44 
45 sig_atomic_t reads_finished;
46 //pthread_cond_t reader_done = PTHREAD_COND_INITIALIZER;
47 
48 #ifdef GPU_THREADING
49 // GPU only threading globals
50 syncqueue_t *results_readbuf;	// que of encoded results from gpu
51 //pthread_cond_t gpu_handler_done = PTHREAD_COND_INITIALIZER;
52 sig_atomic_t gpu_finished = FALSE;	// for thread synchronization
53 char localpattern[MAX_PATTERNS][MAX_PATTERN_LENGTH];	// search patterns
54 char locallookup_headers[LOOKUP_ROWS][LOOKUP_COLUMNS];	// header lookup table
55 char locallookup_footers[LOOKUP_ROWS][LOOKUP_COLUMNS];	// footer lookup table
56 
57 #endif
58 
59 
60 
61 #ifdef MULTICORE_THREADING
62 // Multi-core only threading globals
63 
64 // Parameters to threads under the MULTICORE_THREADING model, to allow
65 // efficient header/footer searches.
66 typedef struct ThreadFindAllParams {
67   int id;
68   char *str;
69   size_t length;
70   char *startpos;
71   long offset;
72   char **foundat;
73   size_t *foundatlens;
74   int strisRE;
75   union {
76     size_t *table;
77     regex_t *regex;
78   };
79   int casesensitive;
80   int nosearchoverlap;
81   struct scalpelState *state;
82 } ThreadFindAllParams;
83 
84 // TODO:  These structures could be released after the dig phase; they aren't needed in the carving phase since it's not
85 // threaded.  Look into this in the future.
86 
87 static pthread_t *searchthreads;	// thread pool for header/footer searches
88 static ThreadFindAllParams *threadargs;	// argument structures for threads
89 static char ***foundat;		// locations of headers/footers discovered by threads
90 static size_t **foundatlens;	// lengths of discovered headers/footers
91 //static sem_t *workavailable;	// semaphores that block threads until
92 // header/footer searches are required
93 static pthread_mutex_t *workavailable;
94 //static sem_t *workcomplete;	// semaphores that allow main thread to wait
95 // for all search threads to complete current job
96 static pthread_mutex_t *workcomplete;
97 
98 #endif
99 
100 // prototypes for private dig.c functions
101 static unsigned long long adjustForEmbedding(struct SearchSpecLine
102 					     *currentneedle,
103 					     unsigned long long headerindex,
104 					     unsigned long long *prevstopindex);
105 static int writeHeaderFooterDatabase(struct scalpelState *state);
106 static int setupCoverageMaps(struct scalpelState *state,
107 			     unsigned long long filesize);
108 static int auditUpdateCoverageBlockmap(struct scalpelState *state,
109 				       struct CarveInfo *carve);
110 static int updateCoverageBlockmap(struct scalpelState *state,
111 				  unsigned long long block);
112 static void generateFragments(struct scalpelState *state, Queue * fragments,
113 			      struct CarveInfo *carve);
114 static unsigned long long positionUseCoverageBlockmap(struct scalpelState
115 						      *state,
116 						      unsigned long long
117 						      position);
118 static void destroyCoverageMaps(struct scalpelState *state);
119 static int fseeko_use_coverage_map(struct scalpelState *state, FILE * fp,
120 				   off64_t offset);
121 static off64_t ftello_use_coverage_map(struct scalpelState *state, FILE * fp);
122 static size_t fread_use_coverage_map(struct scalpelState *state, void *ptr,
123 				     size_t size, size_t nmemb, FILE * stream);
124 static void printhex(char *s, int len);
125 static void clean_up(struct scalpelState *state, int signum);
126 static int displayPosition(int *units,
127 			   unsigned long long pos,
128 			   unsigned long long size, char *fn);
129 static int setupAuditFile(struct scalpelState *state);
130 static int digBuffer(struct scalpelState *state,
131 		     unsigned long long lengthofbuf,
132 		     unsigned long long offset);
133 #ifdef MULTICORE_THREADING
134 static void *threadedFindAll(void *args);
135 #endif
136 
137 
138 // force header/footer matching to deal with embedded headers/footers
139 static unsigned long long
adjustForEmbedding(struct SearchSpecLine * currentneedle,unsigned long long headerindex,unsigned long long * prevstopindex)140 adjustForEmbedding(struct SearchSpecLine *currentneedle,
141 		   unsigned long long headerindex,
142 		   unsigned long long *prevstopindex) {
143 
144   unsigned long long h = headerindex + 1;
145   unsigned long long f = 0;
146   int header = 0;
147   int footer = 0;
148   long long headerstack = 0;
149   unsigned long long start = currentneedle->offsets.headers[headerindex] + 1;
150   unsigned long long candidatefooter;
151   int morecandidates = 1;
152   int moretests = 1;
153 
154   // skip footers which precede header
155   while (*prevstopindex < currentneedle->offsets.numfooters &&
156 	 currentneedle->offsets.footers[*prevstopindex] < start) {
157     (*prevstopindex)++;
158   }
159 
160   f = *prevstopindex;
161   candidatefooter = *prevstopindex;
162 
163   // skip footers until header/footer count balances
164   while (candidatefooter < currentneedle->offsets.numfooters && morecandidates) {
165     // see if this footer is viable
166     morecandidates = 0;		// assumption is yes, viable
167     moretests = 1;
168     while (moretests && start < currentneedle->offsets.footers[candidatefooter]) {
169       header = 0;
170       footer = 0;
171       if(h < currentneedle->offsets.numheaders
172 	 && currentneedle->offsets.headers[h] >= start
173 	 && currentneedle->offsets.headers[h] <
174 	 currentneedle->offsets.footers[candidatefooter]) {
175 	header = 1;
176       }
177       if(f < currentneedle->offsets.numfooters
178 	 && currentneedle->offsets.footers[f] >= start
179 	 && currentneedle->offsets.footers[f] <
180 	 currentneedle->offsets.footers[candidatefooter]) {
181 	footer = 1;
182       }
183 
184       if(header && (!footer ||
185 		    currentneedle->offsets.headers[h] <
186 		    currentneedle->offsets.footers[f])) {
187 	h++;
188 	headerstack++;
189 	start = (h < currentneedle->offsets.numheaders)
190 	  ? currentneedle->offsets.headers[h] : start + 1;
191       }
192       else if(footer) {
193 	f++;
194 	headerstack--;
195 	if(headerstack < 0) {
196 	  headerstack = 0;
197 	}
198 	start = (f < currentneedle->offsets.numfooters)
199 	  ? currentneedle->offsets.footers[f] : start + 1;
200       }
201       else {
202 	moretests = 0;
203       }
204     }
205 
206     if(headerstack > 0) {
207       // header/footer imbalance, need to try next footer
208       candidatefooter++;
209       // this footer counts against footer deficit!
210       headerstack--;
211       morecandidates = 1;
212     }
213   }
214 
215   return (headerstack > 0) ? 999999999999999999ULL : candidatefooter;
216 }
217 
218 
219 // output hex notation for chars in 's'
printhex(char * s,int len)220 static void printhex(char *s, int len) {
221 
222   int i;
223   for(i = 0; i < len; i++) {
224     printf("\\x%.2x", (unsigned char)s[i]);
225   }
226 }
227 
228 
clean_up(struct scalpelState * state,int signum)229 static void clean_up(struct scalpelState *state, int signum) {
230 
231   scalpelLog(state, "Cleaning up...\n");
232   scalpelLog(state,
233 	     "\nCaught signal: %s. Program is terminating early\n",
234 	     (char *)strsignal(signum));
235   closeAuditFile(state->auditFile);
236   exit(1);
237 }
238 
239 
240 
241 // display progress bar
242 static int
displayPosition(int * units,unsigned long long pos,unsigned long long size,char * fn)243 displayPosition(int *units,
244 		unsigned long long pos, unsigned long long size, char *fn) {
245 
246   double percentDone = (((double)pos) / (double)(size) * 100);
247   double position = (double)pos;
248   int count;
249   int barlength, i, len;
250   double elapsed;
251   long remaining;
252 
253   char buf[MAX_STRING_LENGTH];
254   char line[MAX_STRING_LENGTH];
255 
256 #ifdef _WIN32
257   static LARGE_INTEGER start;
258   LARGE_INTEGER now;
259   static LARGE_INTEGER freq;
260   QueryPerformanceFrequency(&freq);
261 #else
262   static struct timeval start;
263   struct timeval now, td;
264 #endif
265 
266   // get current time and remember start time when first chunk of
267   // an image file is read
268 
269   if(pos <= SIZE_OF_BUFFER) {
270     gettimeofday(&start, (struct timezone *)0);
271   }
272   gettimeofday(&now, (struct timezone *)0);
273 
274   // First, reduce the position to the right units
275   for(count = 0; count < *units; count++) {
276     position = position / 1024;
277   }
278 
279   // Now check if we've hit the next type of units
280   while (position > 1023) {
281     position = position / 1024;
282     (*units)++;
283   }
284 
285   switch (*units) {
286 
287   case UNITS_BYTES:
288     sprintf(buf, "bytes");
289     break;
290   case UNITS_KILOB:
291     sprintf(buf, "KB");
292     break;
293   case UNITS_MEGAB:
294     sprintf(buf, "MB");
295     break;
296   case UNITS_GIGAB:
297     sprintf(buf, "GB");
298     break;
299   case UNITS_TERAB:
300     sprintf(buf, "TB");
301     break;
302   case UNITS_PETAB:
303     sprintf(buf, "PB");
304     break;
305   case UNITS_EXAB:
306     sprintf(buf, "EB");
307     break;
308 
309   default:
310     fprintf(stdout, "Unable to compute progress.\n");
311     return SCALPEL_OK;
312   }
313 
314   len = 0;
315   len +=
316     snprintf(line + len, sizeof(line) - len, "\r%s: %5.1f%% ", fn, percentDone);
317   barlength = ttywidth - strlen(fn) - strlen(buf) - 32;
318   if(barlength > 0) {
319     i = barlength * (int)percentDone / 100;
320     len += snprintf(line + len, sizeof(line) - len,
321 		    "|%.*s%*s|", i,
322 		    "****************************************************************************************************************************************************************",
323 		    barlength - i, "");
324   }
325 
326   len += snprintf(line + len, sizeof(line) - len, " %6.1f %s", position, buf);
327 
328 #ifdef _WIN32
329   elapsed =
330     ((double)now.QuadPart - (double)start.QuadPart) / ((double)freq.QuadPart);
331   //printf("elapsed: %f\n",elapsed);
332 #else
333   timersub(&now, &start, &td);
334   elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
335 #endif
336   remaining = (long)((100 - percentDone) / percentDone * elapsed);
337   //printf("Ratio remaining: %f\n",(100-percentDone)/percentDone);
338   //printf("Elapsed time: %f\n",elapsed);
339   if(remaining >= 100 * (60 * 60)) {	//60*60 is seconds per hour
340     len += snprintf(line + len, sizeof(line) - len, " --:--ETA");
341   }
342   else {
343     i = remaining / (60 * 60);
344     if(i)
345       len += snprintf(line + len, sizeof(line) - len, " %2d:", i);
346     else
347       len += snprintf(line + len, sizeof(line) - len, "    ");
348     i = remaining % (60 * 60);
349     len +=
350       snprintf(line + len, sizeof(line) - len, "%02d:%02d ETA", i / 60, i % 60);
351   }
352 
353   fprintf(stdout, "%s", line);
354   fflush(stdout);
355 
356   return SCALPEL_OK;
357 }
358 
359 
360 
361 // create initial entries in audit for each image file processed
setupAuditFile(struct scalpelState * state)362 static int setupAuditFile(struct scalpelState *state) {
363 
364   char imageFile[MAX_STRING_LENGTH];
365 
366   if(realpath(state->imagefile, imageFile)) {
367     scalpelLog(state, "\nOpening target \"%s\"\n\n", imageFile);
368   }
369   else {
370     //handleError(state, SCALPEL_ERROR_FILE_OPEN);
371     return SCALPEL_ERROR_FILE_OPEN;
372   }
373 
374 #ifdef _WIN32
375   if(state->skip) {
376     fprintf(state->auditFile, "Skipped the first %I64u bytes of %s...\n",
377 	    state->skip, state->imagefile);
378     if(state->modeVerbose) {
379       fprintf(stdout, "Skipped the first %I64u bytes of %s...\n",
380 	      state->skip, state->imagefile);
381     }
382   }
383 #else
384   if(state->skip) {
385     fprintf(state->auditFile, "Skipped the first %llu bytes of %s...\n",
386 	    state->skip, state->imagefile);
387     if(state->modeVerbose) {
388       fprintf(stdout, "Skipped the first %llu bytes of %s...\n",
389 	      state->skip, state->imagefile);
390     }
391   }
392 #endif
393 
394   fprintf(state->auditFile, "The following files were carved:\n");
395   fprintf(state->auditFile,
396 	  "File\t\t  Start\t\t\tChop\t\tLength\t\tExtracted From\n");
397 
398 	  return SCALPEL_OK;
399 }
400 
401 
402 
403 static int
digBuffer(struct scalpelState * state,unsigned long long lengthofbuf,unsigned long long offset)404 digBuffer(struct scalpelState *state, unsigned long long lengthofbuf,
405 	  unsigned long long offset) {
406 
407   unsigned long long startLocation = 0;
408   int needlenum, i = 0;
409   struct SearchSpecLine *currentneedle = 0;
410 //  gettimeofday_t srchnow, srchthen;
411 
412   // for each file type, find all headers and some (or all) footers
413 
414   // signal check
415   if(signal_caught == SIGTERM || signal_caught == SIGINT) {
416     clean_up(state, signal_caught);
417   }
418 
419 
420   ///////////////////////////////////////////////////
421   ///////////////////////////////////////////////////
422   ////////////////GPU-based SEARCH///////////////////
423   ///////////////////////////////////////////////////
424   ///////////////////////////////////////////////////
425 #ifdef GPU_THREADING		// code for searches on GPUs
426 
427 
428   // BUG (GGRIII): GPU MODEL DOES NOT SUPPORT REGULAR EXPRESSIONS
429   // (LMIII): NOR WILL IT EVER.
430   // GGRIII: let's not be hasty.  :)
431 
432   // In GPU mode, the needle search has already been done. readbuffer now holds
433   // the encoded results, which must be decoded into scalpel structures.
434 
435   // Each needle found is encoded in 2 bytes, one for the <type and one for position in buffer?>
436 
437     int longestneedle = findLongestNeedle(state->SearchSpec);
438 
439   // Compute the size of the results buffer.
440   int result_chunks = (lengthofbuf / (THREADS_PER_BLOCK - longestneedle)) + 1;
441   int result_length = result_chunks * RESULTS_SIZE_PER_BLOCK;
442 
443   // Decode the results from the gpu.
444   int d = 0;
445   for(d = 0; d < result_length; d += 2) {
446     i = (d / RESULTS_SIZE_PER_BLOCK) * (THREADS_PER_BLOCK - longestneedle) +
447       (unsigned char)(readbuffer[d + 1]);
448     if(readbuffer[d] > 0) {
449 
450       needlenum = readbuffer[d] - 1;
451       currentneedle = &(state->SearchSpec[needlenum]);
452       startLocation = offset + i;
453 
454       // found a header--record location in header offsets database
455       if(state->modeVerbose) {
456 #ifdef _WIN32
457 	fprintf(stdout, "A %s header was found at : %I64u\n",
458 		currentneedle->suffix,
459 		positionUseCoverageBlockmap(state, startLocation));
460 #else
461 	fprintf(stdout, "A %s header was found at : %llu\n",
462 		currentneedle->suffix,
463 		positionUseCoverageBlockmap(state, startLocation));
464 #endif
465       }
466 
467       currentneedle->offsets.numheaders++;
468       if(currentneedle->offsets.headerstorage <=
469 	 currentneedle->offsets.numheaders) {
470 	// need more memory for header offset storage--add an
471 	// additional 100 elements
472 	currentneedle->offsets.headers = (unsigned long long *)
473 	  realloc(currentneedle->offsets.headers,
474 		  sizeof(unsigned long long) *
475 		  (currentneedle->offsets.numheaders + 100));
476 
477 
478 	checkMemoryAllocation(state, currentneedle->offsets.headers,
479 			      __LINE__, __FILE__, "header array");
480 	// inserted
481 	currentneedle->offsets.headerlens = (size_t *)
482 	  realloc(currentneedle->offsets.headerlens, sizeof(size_t) *
483 		  (currentneedle->offsets.numheaders + 100));
484 	checkMemoryAllocation(state, currentneedle->offsets.headerlens,
485 			      __LINE__, __FILE__, "header array");
486 	currentneedle->offsets.headerstorage =
487 	  currentneedle->offsets.numheaders + 100;
488 
489 	if(state->modeVerbose) {
490 #ifdef _WIN32
491 	  fprintf(stdout,
492 		  "Memory reallocation performed, total header storage = %I64u\n",
493 		  currentneedle->offsets.headerstorage);
494 #else
495 	  fprintf(stdout,
496 		  "-->>Memory reallocation performed, total header storage = %llu\n",
497 		  currentneedle->offsets.headerstorage);
498 #endif
499 	}
500       }
501       currentneedle->offsets.headers[currentneedle->offsets.numheaders -
502 				     1] = startLocation;
503 
504     }
505     else if(readbuffer[d] < 0) {	// footer
506 
507       needlenum = -1 * readbuffer[d] - 1;
508       currentneedle = &(state->SearchSpec[needlenum]);
509       startLocation = offset + i;
510       // found a footer--record location in footer offsets database
511       if(state->modeVerbose) {
512 #ifdef _WIN32
513 	fprintf(stdout, "A %s footer was found at : %I64u\n",
514 		currentneedle->suffix,
515 		positionUseCoverageBlockmap(state, startLocation));
516 #else
517 	fprintf(stdout, "A %s footer was found at : %llu\n",
518 		currentneedle->suffix,
519 		positionUseCoverageBlockmap(state, startLocation));
520 #endif
521       }
522 
523       currentneedle->offsets.numfooters++;
524       if(currentneedle->offsets.footerstorage <=
525 	 currentneedle->offsets.numfooters) {
526 	// need more memory for footer offset storage--add an
527 	// additional 100 elements
528 	currentneedle->offsets.footers = (unsigned long long *)
529 	  realloc(currentneedle->offsets.footers,
530 		  sizeof(unsigned long long) *
531 		  (currentneedle->offsets.numfooters + 100));
532 
533 	checkMemoryAllocation(state, currentneedle->offsets.footers,
534 			      __LINE__, __FILE__, "footer array");
535 	currentneedle->offsets.footerlens =
536 	  (size_t *) realloc(currentneedle->offsets.footerlens,
537 			     sizeof(size_t) *
538 			     (currentneedle->offsets.numfooters + 100));
539 	checkMemoryAllocation(state, currentneedle->offsets.footerlens,
540 			      __LINE__, __FILE__, "footer array");
541 	currentneedle->offsets.footerstorage =
542 	  currentneedle->offsets.numfooters + 100;
543 
544 	if(state->modeVerbose) {
545 #ifdef _WIN32
546 	  fprintf(stdout,
547 		  "Memory reallocation performed, total footer storage = %I64u\n",
548 		  currentneedle->offsets.footerstorage);
549 #else
550 	  fprintf(stdout,
551 		  "Memory reallocation performed, total footer storage = %llu\n",
552 		  currentneedle->offsets.footerstorage);
553 #endif
554 	}
555       }
556       currentneedle->offsets.footers[currentneedle->offsets.numfooters -
557 				     1] = startLocation;
558 
559     }
560   }
561 
562 #endif
563   ///////////////////////////////////////////////////
564   ///////////////////////////////////////////////////
565   ////////////////END GPU-BASED SEARCH///////////////
566   ///////////////////////////////////////////////////
567   ///////////////////////////////////////////////////
568 
569 
570 
571   ///////////////////////////////////////////////////
572   ///////////////////////////////////////////////////
573   ///////////////MULTI-THREADED SEARCH //////////////
574   ///////////////////////////////////////////////////
575   ///////////////////////////////////////////////////
576 #ifdef MULTICORE_THREADING
577 
578   // as of v1.9, this is now the lowest common denominator mode
579 
580   // ---------------- threaded header search ------------------ //
581   // ---------------- threaded header search ------------------ //
582 
583   //  gettimeofday(&srchthen, 0);
584   if(state->modeVerbose) {
585     printf("Waking up threads for header searches.\n");
586   }
587   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
588     currentneedle = &(state->SearchSpec[needlenum]);
589     // # of matches in last element of foundat array
590     foundat[needlenum][MAX_MATCHES_PER_BUFFER] = 0;
591     threadargs[needlenum].id = needlenum;
592     threadargs[needlenum].str = currentneedle->begin;
593     threadargs[needlenum].length = currentneedle->beginlength;
594     threadargs[needlenum].startpos = readbuffer;
595     threadargs[needlenum].offset = (long)readbuffer + lengthofbuf;
596     threadargs[needlenum].foundat = foundat[needlenum];
597     threadargs[needlenum].foundatlens = foundatlens[needlenum];
598     threadargs[needlenum].strisRE = currentneedle->beginisRE;
599     if(currentneedle->beginisRE) {
600       threadargs[needlenum].regex = &(currentneedle->beginstate.re);
601     }
602     else {
603       threadargs[needlenum].table = currentneedle->beginstate.bm_table;
604     }
605     threadargs[needlenum].casesensitive = currentneedle->casesensitive;
606     threadargs[needlenum].nosearchoverlap = state->noSearchOverlap;
607     threadargs[needlenum].state = state;
608 
609     // unblock thread
610     //    sem_post(&workavailable[needlenum]);
611 
612     pthread_mutex_unlock(&workavailable[needlenum]);
613 
614   }
615 
616   // ---------- thread group synchronization point ----------- //
617   // ---------- thread group synchronization point ----------- //
618 
619   if(state->modeVerbose) {
620     printf("Waiting for thread group synchronization.\n");
621   }
622 
623   // wait for all threads to complete header search before proceeding
624   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
625     //    sem_wait(&workcomplete[needlenum]);
626 
627     pthread_mutex_lock(&workcomplete[needlenum]);
628 
629   }
630 
631   if(state->modeVerbose) {
632     printf("Thread group synchronization complete.\n");
633   }
634 
635   // digest header locations discovered by the thread group
636 
637   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
638     currentneedle = &(state->SearchSpec[needlenum]);
639 
640     // number of matches stored in last element of vector
641     for(i = 0; i < (long)foundat[needlenum][MAX_MATCHES_PER_BUFFER]; i++) {
642       startLocation = offset + (foundat[needlenum][i] - readbuffer);
643       // found a header--record location in header offsets database
644       if(state->modeVerbose) {
645 #ifdef _WIN32
646 	fprintf(stdout, "A %s header was found at : %I64u\n",
647 		currentneedle->suffix,
648 		positionUseCoverageBlockmap(state, startLocation));
649 #else
650 	fprintf(stdout, "A %s header was found at : %llu\n",
651 		currentneedle->suffix,
652 		positionUseCoverageBlockmap(state, startLocation));
653 #endif
654       }
655 
656       currentneedle->offsets.numheaders++;
657       if(currentneedle->offsets.headerstorage <=
658 	 currentneedle->offsets.numheaders) {
659 	// need more memory for header offset storage--add an
660 	// additional 100 elements
661 	currentneedle->offsets.headers = (unsigned long long *)
662 	  realloc(currentneedle->offsets.headers,
663 		  sizeof(unsigned long long) *
664 		  (currentneedle->offsets.numheaders + 100));
665 	checkMemoryAllocation(state, currentneedle->offsets.headers,
666 			      __LINE__, __FILE__, "header array");
667 	currentneedle->offsets.headerlens =
668 	  (size_t *) realloc(currentneedle->offsets.headerlens,
669 			     sizeof(size_t) *
670 			     (currentneedle->offsets.numheaders + 100));
671 	checkMemoryAllocation(state, currentneedle->offsets.headerlens,
672 			      __LINE__, __FILE__, "header array");
673 
674 	currentneedle->offsets.headerstorage =
675 	  currentneedle->offsets.numheaders + 100;
676 
677 	if(state->modeVerbose) {
678 #ifdef _WIN32
679 	  fprintf(stdout,
680 		  "Memory reallocation performed, total header storage = %I64u\n",
681 		  currentneedle->offsets.headerstorage);
682 #else
683 	  fprintf(stdout,
684 		  "Memory reallocation performed, total header storage = %llu\n",
685 		  currentneedle->offsets.headerstorage);
686 #endif
687 	}
688       }
689       currentneedle->offsets.headers[currentneedle->offsets.numheaders -
690 				     1] = startLocation;
691       currentneedle->offsets.headerlens[currentneedle->offsets.
692 					numheaders - 1] =
693 	foundatlens[needlenum][i];
694     }
695   }
696 
697 
698   // ---------------- threaded footer search ------------------ //
699   // ---------------- threaded footer search ------------------ //
700 
701   // now footer search, if:
702   //
703   // there's a footer for the file type AND
704   //
705   // (at least one header for that type has been previously seen and
706   // at least one header is viable--that is, it was found in the current
707   // buffer, or it's less than the max carve distance behind the current
708   // file offset
709   //
710   // OR
711   //
712   // a header/footer database is being created.  In this case, ALL headers and
713   // footers must be discovered)
714 
715   if(state->modeVerbose) {
716     printf("Waking up threads for footer searches.\n");
717   }
718 
719   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
720     currentneedle = &(state->SearchSpec[needlenum]);
721     if(
722        // regular case--want to search for only "viable" (in the sense that they are
723        // useful for carving unfragmented files) footers, to save time
724        (currentneedle->offsets.numheaders > 0 &&
725 	currentneedle->endlength &&
726 	(currentneedle->offsets.
727 	 headers[currentneedle->offsets.numheaders - 1] > offset
728 	 || (offset -
729 	     currentneedle->offsets.headers[currentneedle->offsets.
730 					    numheaders - 1] <
731 	     currentneedle->length))) ||
732        // generating header/footer database, need to find all footers
733        // BUG:  ALSO need to do this for discovery of fragmented files--document this
734        (currentneedle->endlength && state->generateHeaderFooterDatabase)) {
735       // # of matches in last element of foundat array
736       foundat[needlenum][MAX_MATCHES_PER_BUFFER] = 0;
737       threadargs[needlenum].id = needlenum;
738       threadargs[needlenum].str = currentneedle->end;
739       threadargs[needlenum].length = currentneedle->endlength;
740       threadargs[needlenum].startpos = readbuffer;
741       threadargs[needlenum].offset = (long)readbuffer + lengthofbuf;
742       threadargs[needlenum].foundat = foundat[needlenum];
743       threadargs[needlenum].foundatlens = foundatlens[needlenum];
744       threadargs[needlenum].strisRE = currentneedle->endisRE;
745       if(currentneedle->endisRE) {
746 	threadargs[needlenum].regex = &(currentneedle->endstate.re);
747       }
748       else {
749 	threadargs[needlenum].table = currentneedle->endstate.bm_table;
750       }
751       threadargs[needlenum].casesensitive = currentneedle->casesensitive;
752       threadargs[needlenum].nosearchoverlap = state->noSearchOverlap;
753       threadargs[needlenum].state = state;
754 
755       // unblock thread
756       //      sem_post(&workavailable[needlenum]);
757       pthread_mutex_unlock(&workavailable[needlenum]);
758 
759     }
760     else {
761       threadargs[needlenum].length = 0;
762     }
763   }
764 
765   if(state->modeVerbose) {
766     printf("Waiting for thread group synchronization.\n");
767   }
768 
769   // ---------- thread group synchronization point ----------- //
770   // ---------- thread group synchronization point ----------- //
771 
772   // wait for all threads to complete footer search before proceeding
773   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
774     if(threadargs[needlenum].length > 0) {
775       //      sem_wait(&workcomplete[needlenum]);
776       pthread_mutex_lock(&workcomplete[needlenum]);
777     }
778   }
779 
780   if(state->modeVerbose) {
781     printf("Thread group synchronization complete.\n");
782   }
783 
784   // digest footer locations discovered by the thread group
785 
786   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
787     currentneedle = &(state->SearchSpec[needlenum]);
788     // number of matches stored in last element of vector
789     for(i = 0; i < (long)foundat[needlenum][MAX_MATCHES_PER_BUFFER]; i++) {
790       startLocation = offset + (foundat[needlenum][i] - readbuffer);
791       if(state->modeVerbose) {
792 #ifdef _WIN32
793 	fprintf(stdout, "A %s footer was found at : %I64u\n",
794 		currentneedle->suffix,
795 		positionUseCoverageBlockmap(state, startLocation));
796 #else
797 	fprintf(stdout, "A %s footer was found at : %llu\n",
798 		currentneedle->suffix,
799 		positionUseCoverageBlockmap(state, startLocation));
800 #endif
801       }
802 
803       currentneedle->offsets.numfooters++;
804       if(currentneedle->offsets.footerstorage <=
805 	 currentneedle->offsets.numfooters) {
806 	// need more memory for footer offset storage--add an
807 	// additional 100 elements
808 	currentneedle->offsets.footers = (unsigned long long *)
809 	  realloc(currentneedle->offsets.footers,
810 		  sizeof(unsigned long long) *
811 		  (currentneedle->offsets.numfooters + 100));
812 	checkMemoryAllocation(state, currentneedle->offsets.footers,
813 			      __LINE__, __FILE__, "footer array");
814 	currentneedle->offsets.footerlens =
815 	  (size_t *) realloc(currentneedle->offsets.footerlens,
816 			     sizeof(size_t) *
817 			     (currentneedle->offsets.numfooters + 100));
818 	checkMemoryAllocation(state, currentneedle->offsets.footerlens,
819 			      __LINE__, __FILE__, "footer array");
820 	currentneedle->offsets.footerstorage =
821 	  currentneedle->offsets.numfooters + 100;
822 
823 	if(state->modeVerbose) {
824 #ifdef _WIN32
825 	  fprintf(stdout,
826 		  "Memory reallocation performed, total footer storage = %I64u\n",
827 		  currentneedle->offsets.footerstorage);
828 #else
829 	  fprintf(stdout,
830 		  "Memory reallocation performed, total footer storage = %llu\n",
831 		  currentneedle->offsets.footerstorage);
832 #endif
833 	}
834       }
835       currentneedle->offsets.footers[currentneedle->offsets.numfooters -
836 				     1] = startLocation;
837       currentneedle->offsets.footerlens[currentneedle->offsets.
838 					numfooters - 1] =
839 	foundatlens[needlenum][i];
840     }
841   }
842 
843 #endif // multi-core CPU code
844   ///////////////////////////////////////////////////
845   ///////////////////////////////////////////////////
846   ///////////END MULTI-THREADED SEARCH///////////////
847   ///////////////////////////////////////////////////
848   ///////////////////////////////////////////////////
849 
850   return SCALPEL_OK;
851 }
852 
853 
854 
855 
856 ////////////////////////////////////////////////////////////////////////////////
857 /////////////////////// LMIII //////////////////////////////////////////////////
858 ////////////////////////////////////////////////////////////////////////////////
859 
860 /* In order to facilitate asynchronous reads, we will set up a reader
861    pthread, whose only job is to read the image in SIZE_OF_BUFFER
862    chunks using the coverage map functions.
863 
864    This thread will buffer QUEUELEN reads.
865 */
866 
867 
868 #ifdef GPU_THREADING
gpu_handler(void * sss)869 void *gpu_handler(void *sss) {
870 
871   struct scalpelState *state = (struct scalpelState *)sss;
872 
873   char *fullbuf;
874 
875   // Initialize constant table of headers/footers on GPU.  First array
876   // element is header # 1, second is footer # 1, third is header # 2, etc.
877   // zero-length header marks end of array.
878 
879   // Each needle is stored as follows:
880   // byte 0: size of needle
881   // bytes 1 - sizeof needle: the needle itself
882   // byte PATTERN_WCSHIFT: number of leading wildcards
883   // byte PATTERN_CASESEN: bool this needle is case sensitive
884 
885   // Example: wpc header
886   // (one leading wildcard, case sensitive)
887   // {\x04?WPC\x00\x00 ... \x00\x01\x01}
888 
889   // htm header:
890   // (no leading wildcard, not case sensitive)
891   // \x05<html\x00\x00 ... \x00\x00\x00}
892 
893   // We also create 2 lookup tables for efficiency, one for headers and one for
894   // footers. Each is a LOOKUP_ROWS x LOOKUP_COLUMNS array. Each row of the
895   // headers lookup table holds indexes into the h/f table described above of
896   // headers which begin with the array index of this row, eg lookup_headers[7]
897   // holds a list of the indexes of the headers whose first byte is \x07.
898 
899 
900   // first, build local copies ...
901   bzero(localpattern, sizeof(localpattern));
902   bzero(locallookup_headers, sizeof(locallookup_headers));
903   bzero(locallookup_footers, sizeof(locallookup_footers));
904   int i = 0;
905   for(i = 0; i < LOOKUP_ROWS; i++) {
906     locallookup_headers[i][0] = LOOKUP_ENDOFROW;
907     locallookup_footers[i][0] = LOOKUP_ENDOFROW;
908   }
909 
910   struct SearchSpecLine *currentneedle = 0;
911   int j = 0;
912   int k = 0;
913   for(i = 0; i < state->specLines; i++) {
914     currentneedle = &(state->SearchSpec[i]);
915 
916     // header/footer initialization for one Scalpel rule
917 
918     localpattern[j][0] = currentneedle->beginlength;
919     memcpy(&(localpattern[j][1]), currentneedle->begin,
920 	   currentneedle->beginlength);
921     localpattern[j][PATTERN_CASESEN] = currentneedle->casesensitive;
922 
923     // Here's the rub. Some patterns start with the wildcard. In order for
924     // the lookup tables to work out efficiently, we need to be able to skip
925     // over leading wildcards, so we keep track of the shift from the
926     // beginning of the pattern to the first non-wildcard character.
927     int wc_shift = 0;
928     while ((localpattern[j][wc_shift + 1] == wildcard)
929 	   && (wc_shift < localpattern[j][0])) {
930       wc_shift++;
931     }
932 
933     // Now we can set up the lookup table to use the first non-wildcard
934     // byte of the pattern.
935     k = 0;
936     while (locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k]
937 	   != LOOKUP_ENDOFROW) {
938       k++;
939     }
940     locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k] = j;
941     locallookup_headers[(unsigned char)localpattern[j][wc_shift + 1]][k + 1] =
942       LOOKUP_ENDOFROW;
943 
944     // And let's not forget to store the shift, we'll need in in the GPU when
945     // we search.
946     localpattern[j][PATTERN_WCSHIFT] = wc_shift;
947 
948     // We never expect a footer to begin with a wildcard -- it makes no sense.
949     // BUG: Assume we'll find them anyway.
950     localpattern[j + 1][0] = currentneedle->endlength;
951     memcpy(&(localpattern[j + 1][1]), currentneedle->end,
952 	   currentneedle->endlength);
953     localpattern[j + 1][PATTERN_CASESEN] = currentneedle->casesensitive;
954     if(currentneedle->endlength > 0) {
955       k = 0;
956       while (locallookup_footers[(unsigned char)localpattern[j + 1][1]][k] !=
957 	     LOOKUP_ENDOFROW) {
958 	k++;
959       }
960       locallookup_footers[(unsigned char)localpattern[j + 1][1]][k] = j + 1;
961       locallookup_footers[(unsigned char)localpattern[j + 1][1]][k + 1] =
962 	LOOKUP_ENDOFROW;
963     }
964     j += 2;
965   }
966   localpattern[j][0] = 0;	// mark end of array
967   localpattern[j + 1][0] = 0;
968 
969 
970   // ...then copy to GPU constant memory.  This assignment is persistent across
971   // the entire Scalpel execution.
972   copytodevicepattern(localpattern);
973   copytodevicelookup_headers(locallookup_headers);
974   copytodevicelookup_footers(locallookup_footers);
975 
976   printf("Copying tables to GPU constant memory complete.\n");
977 
978   int longestneedle = findLongestNeedle(state->SearchSpec);
979   gpu_init(longestneedle);
980   printf("Initializing GPU buffers complete.\n");
981 
982   // Now we get buffers from the full_readbuf queue, send them to the GPU for
983   // seach, wait till GPU finishes, and put the results buffers into the
984   // results_readbuf queue.
985   while (!reads_finished) {
986       readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
987       fullbuf = rinfo->readbuf;
988       gpuSearchBuffer(fullbuf, (unsigned int)rinfo->bytesread, fullbuf,
989 		      longestneedle, wildcard);
990       put(results_readbuf, (void *)rinfo);
991     }
992 
993   // reads are finished, but there may still be stuff in the queue
994   while (!full_readbuf->empty) {
995 		readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
996     fullbuf = rinfo->readbuf;
997     gpuSearchBuffer(fullbuf, (unsigned int)rinfo->bytesread, fullbuf,
998 		    longestneedle, wildcard);
999     put(results_readbuf, (void *)rinfo);
1000   }
1001 
1002   // Done with image.
1003   gpu_finished = TRUE;
1004   gpu_cleanup();
1005   pthread_exit(0);
1006 }
1007 #endif
1008 
1009 
1010 
1011 // Streaming reader gets empty buffers from the empty_readbuf queue, reads
1012 // SIZE_OF_BUFFER chunks of the input image into the buffers and puts them into
1013 // the full_readbuf queue for processing.
streaming_reader(void * sss)1014 void *streaming_reader(void *sss) {
1015 
1016   struct scalpelState *state = (struct scalpelState *)sss;
1017 
1018   long long filesize = 0, bytesread = 0, filebegin = 0,
1019     fileposition = 0, beginreadpos = 0;
1020   long err = SCALPEL_OK;
1021   int displayUnits = UNITS_BYTES;
1022   int longestneedle = findLongestNeedle(state->SearchSpec);
1023 
1024 	reads_finished = FALSE;
1025 
1026   filebegin = ftello(state->infile);
1027   if((filesize = measureOpenFile(state->infile, state)) == -1) {
1028     fprintf(stderr,
1029 	    "ERROR: Couldn't measure size of image file %s\n",
1030 	    state->imagefile);
1031     err=SCALPEL_ERROR_FILE_READ;
1032 //    goto exit_reader_thread;
1033   }
1034 
1035   // Get empty buffer from empty_readbuf queue
1036   readbuf_info *rinfo = (readbuf_info *)get(empty_readbuf);
1037 
1038   // Read chunk of image into empty buffer.
1039   while ((bytesread =
1040 	  fread_use_coverage_map(state, rinfo->readbuf, 1, SIZE_OF_BUFFER,
1041 				 state->infile)) > longestneedle - 1) {
1042 
1043     if(state->modeVerbose) {
1044 #ifdef _WIN32
1045       fprintf(stdout, "Read %I64u bytes from image file.\n", bytesread);
1046 #else
1047       fprintf(stdout, "Read %llu bytes from image file.\n", bytesread);
1048 #endif
1049     }
1050 
1051     if((err = ferror(state->infile))) {
1052       err = SCALPEL_ERROR_FILE_READ;
1053       goto exit_reader_thread;
1054     }
1055 
1056     // progress report needs a fileposition that doesn't depend on coverage map
1057     fileposition = ftello(state->infile);
1058     displayPosition(&displayUnits, fileposition - filebegin,
1059 		    filesize, state->imagefile);
1060 
1061     // if carving is dependent on coverage map, need adjusted fileposition
1062     fileposition = ftello_use_coverage_map(state, state->infile);
1063     beginreadpos = fileposition - bytesread;
1064 
1065     //signal check
1066     if(signal_caught == SIGTERM || signal_caught == SIGINT) {
1067       clean_up(state, signal_caught);
1068     }
1069 
1070     // Put now-full buffer into full_readbuf queue.
1071     // Note that if the -s option was used we need to adjust the relative begin
1072     // position
1073     rinfo->bytesread = bytesread;
1074     rinfo->beginreadpos = beginreadpos - state->skip;
1075     put(full_readbuf, (void *)rinfo);
1076 
1077     // At this point, the host, GPU, whatever can start searching the buffer.
1078 
1079     // Get another empty buffer.
1080     rinfo = (readbuf_info *)get(empty_readbuf);
1081 
1082     // move file position back a bit so headers and footers that fall
1083     // across SIZE_OF_BUFFER boundaries in the image file aren't
1084     // missed
1085     fseeko_use_coverage_map(state, state->infile, -1 * (longestneedle - 1));
1086   }
1087 
1088  exit_reader_thread:
1089   if (err != SCALPEL_OK) {
1090     handleError(state, err);
1091   }
1092   // Done reading image.
1093   reads_finished = TRUE;
1094   if (state->infile) {
1095     fclose(state->infile);
1096   }
1097   pthread_exit(0);
1098   return NULL;
1099 }
1100 
1101 
1102 
1103 // Scalpel's approach dictates that this function digAllFiles an image
1104 // file, building the header/footer offset database.  The task of
1105 // extracting files from the image has been moved to carveImageFile(),
1106 // which operates in a second pass over the image.  Digging for
1107 // header/footer values proceeds in SIZE_OF_BUFFER sized chunks of the
1108 // image file.  This buffer is now global and named "readbuffer".
digImageFile(struct scalpelState * state)1109 int digImageFile(struct scalpelState *state) {
1110 
1111   int status, err;
1112   int longestneedle = findLongestNeedle(state->SearchSpec);
1113   long long filebegin, filesize;
1114 
1115 
1116   if ((err = setupAuditFile(state)) != SCALPEL_OK) {
1117     return err;
1118   }
1119 
1120   if(state->SearchSpec[0].suffix == NULL) {
1121     return SCALPEL_ERROR_NO_SEARCH_SPEC;
1122   }
1123 
1124   // open current image file
1125   if((state->infile = fopen(state->imagefile, "rb")) == NULL) {
1126     return SCALPEL_ERROR_FILE_OPEN;
1127   }
1128 
1129 #ifdef _WIN32
1130   // set binary mode for Win32
1131   setmode(fileno(state->infile), O_BINARY);
1132 #endif
1133 #ifdef __linux
1134   fcntl(fileno(state->infile), F_SETFL, O_LARGEFILE);
1135 #endif
1136 
1137   // skip initial portion of input file, if that cmd line option
1138   // was set
1139   if(state->skip > 0) {
1140     if(!skipInFile(state, state->infile)) {
1141       return SCALPEL_ERROR_FILE_READ;
1142     }
1143 
1144     // ***GGRIII: want to update coverage bitmap when skip is specified????
1145   }
1146 
1147   filebegin = ftello(state->infile);
1148   if((filesize = measureOpenFile(state->infile, state)) == -1) {
1149     fprintf(stderr,
1150 	    "ERROR: Couldn't measure size of image file %s\n",
1151 	    state->imagefile);
1152     return SCALPEL_ERROR_FILE_READ;
1153   }
1154 
1155   // can't process an image file smaller than the longest needle
1156   if(filesize <= longestneedle * 2) {
1157     return SCALPEL_ERROR_FILE_TOO_SMALL;
1158   }
1159 
1160 #ifdef _WIN32
1161   if(state->modeVerbose) {
1162     fprintf(stdout, "Total file size is %I64u bytes\n", filesize);
1163   }
1164 #else
1165   if(state->modeVerbose) {
1166     fprintf(stdout, "Total file size is %llu bytes\n", filesize);
1167   }
1168 #endif
1169 
1170   // allocate and initialize coverage bitmap and blockmap, if appropriate
1171   if((err = setupCoverageMaps(state, filesize)) != SCALPEL_OK) {
1172     return err;
1173   }
1174 
1175   // process SIZE_OF_BUFFER-sized chunks of the current image
1176   // file and look for both headers and footers, recording their
1177   // offsets for use in the 2nd scalpel phase, when file data will
1178   // be extracted.
1179 
1180   fprintf(stdout, "Image file pass 1/2.\n");
1181 
1182   // Create and start the streaming reader thread for this image file.
1183   reads_finished = FALSE;
1184   pthread_t reader;
1185   if(pthread_create(&reader, NULL, streaming_reader, (void *)state) != 0) {
1186     return SCALPEL_ERROR_PTHREAD_FAILURE;
1187   }
1188 
1189   // wait on reader mutex "reads_begun"
1190 
1191 
1192 #ifdef GPU_THREADING
1193 
1194   // Create and start the gpu_handler for searches on this image.
1195   gpu_finished = FALSE;
1196   pthread_t gpu;
1197   if(pthread_create(&gpu, NULL, gpu_handler, (void *)state) != 0) {
1198     return SCALPEL_ERROR_PTHREAD_FAILURE;
1199   }
1200 
1201   // The reader is now reading in the image, and the GPU is searching the
1202   // buffers. We get the results buffers and call digBuffer to decode them.
1203   while (!gpu_finished) {  // || !results_readbuf->empty) {
1204       readbuf_info *rinfo = (readbuf_info *)get(results_readbuf);
1205       readbuffer = rinfo->readbuf;
1206       if((status =
1207 	  digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1208 		    )) != SCALPEL_OK) {
1209 	return status;
1210       }
1211       put(empty_readbuf, (void *)rinfo);
1212     }
1213 
1214 	// the GPU is finished, but there may still be results buffers to decode
1215 	while (!results_readbuf->empty) {
1216       readbuf_info *rinfo = (readbuf_info *)get(results_readbuf);
1217       readbuffer = rinfo->readbuf;
1218       if((status =
1219 	  digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1220 		    )) != SCALPEL_OK) {
1221 	return status;
1222       }
1223       put(empty_readbuf, (void *)rinfo);
1224     }
1225 
1226 
1227 #endif
1228 
1229 #ifdef MULTICORE_THREADING
1230 
1231   // The reader is now reading in chunks of the image. We call digbuffer on
1232   // these chunks for multi-threaded search.
1233 
1234   while (!reads_finished || !full_readbuf->empty) {
1235 
1236     readbuf_info *rinfo = (readbuf_info *)get(full_readbuf);
1237     readbuffer = rinfo->readbuf;
1238     if((status =
1239 	digBuffer(state, rinfo->bytesread, rinfo->beginreadpos
1240 		  )) != SCALPEL_OK) {
1241       return status;
1242     }
1243     put(empty_readbuf, (void *)rinfo);
1244   }
1245 
1246 #endif
1247 
1248   return SCALPEL_OK;
1249 }
1250 
1251 
1252 
1253 
1254 // carveImageFile() uses the header/footer offsets database
1255 // created by digImageFile() to build a list of files to carve.  These
1256 // files are then carved during a single, sequential pass over the
1257 // image file.  The global 'readbuffer' is used as a buffer in this
1258 // function.
1259 
carveImageFile(struct scalpelState * state)1260 int carveImageFile(struct scalpelState *state) {
1261 
1262   FILE *infile;
1263   struct SearchSpecLine *currentneedle;
1264   struct CarveInfo *carveinfo;
1265   char fn[MAX_STRING_LENGTH];	// temp buffer for output filename
1266   char orgdir[MAX_STRING_LENGTH];	// buffer for name of organizing subdirectory
1267   long long start, stop;	// temp begin/end bytes for file to carve
1268   unsigned long long prevstopindex;	// tracks index of first 'reasonable'
1269   // footer
1270   int needlenum;
1271   long long filesize = 0, bytesread = 0, fileposition = 0, filebegin = 0;
1272   long err = 0;
1273   int displayUnits = UNITS_BYTES;
1274   int success = 0;
1275   long long i, j;
1276   int halt;
1277   char chopped;			// file chopped because it exceeds
1278   // max carve size for type?
1279   int CURRENTFILESOPEN = 0;	// number of files open (during carve)
1280   unsigned long long firstcandidatefooter=0;
1281 
1282   // index of header and footer within image file, in SIZE_OF_BUFFER
1283   // blocks
1284   unsigned long long headerblockindex, footerblockindex;
1285 
1286   struct Queue *carvelists;	// one entry for each SIZE_OF_BUFFER bytes of
1287   // input file
1288 //  struct timeval queuenow, queuethen;
1289 
1290   // open image file and get size so carvelists can be allocated
1291   if((infile = fopen(state->imagefile, "rb")) == NULL) {
1292     fprintf(stderr, "ERROR: Couldn't open input file: %s -- %s\n",
1293 	    (*(state->imagefile) == '\0') ? "<blank>" : state->imagefile,
1294 	    strerror(errno));
1295     return SCALPEL_ERROR_FILE_OPEN;
1296   }
1297 
1298 #ifdef _WIN32
1299   // explicit binary option for Win32
1300   setmode(fileno(infile), O_BINARY);
1301 #endif
1302 #ifdef __linux
1303   fcntl(fileno(infile), F_SETFL, O_LARGEFILE);
1304 #endif
1305 
1306   // If skip was activated, then there's no way headers/footers were
1307   // found there, so skip during the carve operations, too
1308 
1309   if(state->skip > 0) {
1310     if(!skipInFile(state, infile)) {
1311       return SCALPEL_ERROR_FILE_READ;
1312     }
1313   }
1314 
1315   filebegin = ftello(infile);
1316   if((filesize = measureOpenFile(infile, state)) == -1) {
1317     fprintf(stderr,
1318 	    "ERROR: Couldn't measure size of image file %s\n",
1319 	    state->imagefile);
1320     return SCALPEL_ERROR_FILE_READ;
1321   }
1322 
1323 
1324 //  gettimeofday(&queuethen, 0);
1325 
1326   // allocate memory for carvelists--we alloc a queue for each
1327   // SIZE_OF_BUFFER bytes in advance because it's simpler and an empty
1328   // queue doesn't consume much memory, anyway.
1329 
1330   carvelists =
1331     (Queue *) malloc(sizeof(Queue) * (2 + (filesize / SIZE_OF_BUFFER)));
1332   checkMemoryAllocation(state, carvelists, __LINE__, __FILE__, "carvelists");
1333 
1334   // queue associated with each buffer of data holds pointers to
1335   // CarveInfo structures.
1336 
1337   fprintf(stdout, "Allocating work queues...\n");
1338 
1339   for(i = 0; i < 2 + (filesize / SIZE_OF_BUFFER); i++) {
1340     init_queue(&carvelists[i], sizeof(struct CarveInfo *), TRUE, 0, TRUE);
1341   }
1342   fprintf(stdout, "Work queues allocation complete. Building work queues...\n");
1343 
1344   // build carvelists before 2nd pass over image file
1345 
1346   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1347 
1348     currentneedle = &(state->SearchSpec[needlenum]);
1349 
1350     // handle each discovered header independently
1351 
1352     prevstopindex = 0;
1353     for(i = 0; i < (long long)currentneedle->offsets.numheaders; i++) {
1354       start = currentneedle->offsets.headers[i];
1355 
1356 			////////////// DEBUG ////////////////////////
1357 			//fprintf(stdout, "start: %lu\n", start);
1358 
1359       // block aligned test for "-q"
1360 
1361       if(state->blockAlignedOnly && start % state->alignedblocksize != 0) {
1362 	continue;
1363       }
1364 
1365       stop = 0;
1366       chopped = 0;
1367 
1368       // case 1: no footer defined for this file type
1369       if(!currentneedle->endlength) {
1370 
1371 	// this is the unfortunate case--if file type doesn't have a footer,
1372 	// all we can done is carve a block between header position and
1373 	// maximum carve size.
1374 	stop = start + currentneedle->length - 1;
1375 	// these are always considered chopped, because we don't really
1376 	// know the actual size
1377 	chopped = 1;
1378       }
1379       else if(currentneedle->searchtype == SEARCHTYPE_FORWARD ||
1380 	      currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT) {
1381 	// footer defined: use FORWARD or FORWARD_NEXT semantics.
1382 	// Stop at first occurrence of footer, but for FORWARD,
1383 	// include the header in the carved file; for FORWARD_NEXT,
1384 	// don't include footer in carved file.  For FORWARD_NEXT, if
1385 	// no footer is found, then the maximum carve size for this
1386 	// file type will be used and carving will proceed.  For
1387 	// FORWARD, if no footer is found then no carving will be
1388 	// performed unless -b was specified on the command line.
1389 
1390 	halt = 0;
1391 
1392 	if (state->handleEmbedded &&
1393 	    (currentneedle->searchtype == SEARCHTYPE_FORWARD ||
1394 	     currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT)) {
1395 	  firstcandidatefooter=adjustForEmbedding(currentneedle, i, &prevstopindex);
1396 	}
1397 	else {
1398 	  firstcandidatefooter=prevstopindex;
1399 	}
1400 
1401 	for (j=firstcandidatefooter;
1402 	     j < (long long)currentneedle->offsets.numfooters && ! halt; j++) {
1403 
1404 	  if((long long)currentneedle->offsets.footers[j] <= start) {
1405 	    if (! state->handleEmbedded) {
1406 	      prevstopindex=j;
1407 	    }
1408 
1409 	  }
1410 	  else {
1411 	    halt = 1;
1412 	    stop = currentneedle->offsets.footers[j];
1413 
1414 	    if(currentneedle->searchtype == SEARCHTYPE_FORWARD) {
1415 	      // include footer in carved file
1416 	      stop += currentneedle->endlength - 1;
1417 	      // 	BUG? this or above?		    stop += currentneedle->offsets.footerlens[j] - 1;
1418 	    }
1419 	    else {
1420 	      // FORWARD_NEXT--don't include footer in carved file
1421 	      stop--;
1422 	    }
1423 	    // sanity check on size of potential file to carve--different
1424 	    // actions depending on FORWARD or FORWARD_NEXT semantics
1425 	    if(stop - start + 1 > (long long)currentneedle->length) {
1426 	      if(currentneedle->searchtype == SEARCHTYPE_FORWARD) {
1427 		// if the user specified -b, then foremost 0.69
1428 		// compatibility is desired: carve this file even
1429 		// though the footer wasn't found and indicate
1430 		// the file was chopped, in the log.  Otherwise,
1431 		// carve nothing and move on.
1432 		if(state->carveWithMissingFooters) {
1433 		  stop = start + currentneedle->length - 1;
1434 		  chopped = 1;
1435 		}
1436 		else {
1437 		  stop = 0;
1438 		}
1439 	      }
1440 	      else {
1441 		// footer found for FORWARD_NEXT, but distance exceeds
1442 		// max carve size for this file type, so use max carve
1443 		// size as stop
1444 		stop = start + currentneedle->length - 1;
1445 		chopped = 1;
1446 	      }
1447 	    }
1448 	  }
1449 	}
1450 	if(!halt &&
1451 	   (currentneedle->searchtype == SEARCHTYPE_FORWARD_NEXT ||
1452 	    (currentneedle->searchtype == SEARCHTYPE_FORWARD &&
1453 	     state->carveWithMissingFooters))) {
1454 	  // no footer found for SEARCHTYPE_FORWARD_NEXT, or no footer
1455 	  // found for SEARCHTYPE_FORWARD and user specified -b, so just use
1456 	  // max carve size for this file type as stop
1457 	  stop = start + currentneedle->length - 1;
1458 	}
1459       }
1460       else {
1461 	// footer defined: use REVERSE semantics: want matching footer
1462 	// as far away from header as possible, within maximum carving
1463 	// size for this file type.  Don't bother to look at footers
1464 	// that can't possibly match a header and remember this info
1465 	// in prevstopindex, as the next headers will be even deeper
1466 	// into the image file.  Footer is included in carved file for
1467 	// this type of carve.
1468 	halt = 0;
1469 	for(j = prevstopindex; j < (long long)currentneedle->offsets.numfooters &&
1470 	      !halt; j++) {
1471 	  if((long long)currentneedle->offsets.footers[j] <= start) {
1472 	    prevstopindex = j;
1473 	  }
1474 	  else if(currentneedle->offsets.footers[j] - start <=
1475 		  currentneedle->length) {
1476 	    stop = currentneedle->offsets.footers[j]
1477 	      + currentneedle->endlength - 1;
1478 	  }
1479 	  else {
1480 	    halt = 1;
1481 	  }
1482 	}
1483       }
1484 
1485       // if stop <> 0, then we have enough information to set up a
1486       // file carving operation.  It must pass the minimum carve size
1487       // test, if currentneedle->minLength != 0.
1488       //     if(stop) {
1489       if (stop && (stop - start + 1) >= (long long)currentneedle->minlength) {
1490 
1491 	// don't carve past end of image file...
1492 	stop = stop > filesize ? filesize : stop;
1493 
1494 	// find indices (in SIZE_OF_BUFFER units) of header and
1495 	// footer, so the carveinfo can be placed into the right
1496 	// queues.  The priority of each element in a queue allows the
1497 	// appropriate thing to be done (e.g., STARTSTOPCARVE,
1498 	// STARTCARVE, STOPCARVE, CONTINUECARVE).
1499 
1500 	headerblockindex = start / SIZE_OF_BUFFER;
1501 	footerblockindex = stop / SIZE_OF_BUFFER;
1502 
1503 	// set up a struct CarveInfo for inclusion into the
1504 	// appropriate carvelists
1505 
1506 	// generate unique filename for file to carve
1507 
1508 	if(state->organizeSubdirectories) {
1509 	  snprintf(orgdir, MAX_STRING_LENGTH, "%s/%s-%d-%1lu",
1510 		   state->outputdirectory,
1511 		   currentneedle->suffix,
1512 		   needlenum, currentneedle->organizeDirNum);
1513 	  if(!state->previewMode) {
1514 #ifdef _WIN32
1515 	    mkdir(orgdir);
1516 #else
1517 	    mkdir(orgdir, 0777);
1518 #endif
1519 	  }
1520 	}
1521 	else {
1522 	  snprintf(orgdir, MAX_STRING_LENGTH, "%s", state->outputdirectory);
1523 	}
1524 
1525 	if(state->modeNoSuffix || currentneedle->suffix[0] ==
1526 	   SCALPEL_NOEXTENSION) {
1527 #ifdef _WIN32
1528 	  snprintf(fn, MAX_STRING_LENGTH, "%s/%08I64u",
1529 		   orgdir, state->fileswritten);
1530 #else
1531 	  snprintf(fn, MAX_STRING_LENGTH, "%s/%08llu",
1532 		   orgdir, state->fileswritten);
1533 #endif
1534 
1535 	}
1536 	else {
1537 #ifdef _WIN32
1538 	  snprintf(fn, MAX_STRING_LENGTH, "%s/%08I64u.%s",
1539 		   orgdir, state->fileswritten, currentneedle->suffix);
1540 #else
1541 	  snprintf(fn, MAX_STRING_LENGTH, "%s/%08llu.%s",
1542 		   orgdir, state->fileswritten, currentneedle->suffix);
1543 #endif
1544 	}
1545 	state->fileswritten++;
1546 	currentneedle->numfilestocarve++;
1547 	if(currentneedle->numfilestocarve % state->organizeMaxFilesPerSub == 0) {
1548 	  currentneedle->organizeDirNum++;
1549 	}
1550 
1551 	carveinfo = (struct CarveInfo *)malloc(sizeof(struct CarveInfo));
1552 	checkMemoryAllocation(state, carveinfo, __LINE__, __FILE__,
1553 			      "carveinfo");
1554 
1555 	// remember filename
1556 	carveinfo->filename = (char *)malloc(strlen(fn) + 1);
1557 	checkMemoryAllocation(state, carveinfo->filename, __LINE__,
1558 			      __FILE__, "carveinfo");
1559 	strcpy(carveinfo->filename, fn);
1560 	carveinfo->start = start;
1561 	carveinfo->stop = stop;
1562 	carveinfo->chopped = chopped;
1563 
1564 	// fp will be allocated when the first byte of the file is
1565 	// in the current buffer and cleaned up when we encounter the
1566 	// last byte of the file.
1567 	carveinfo->fp = 0;
1568 
1569 	if(headerblockindex == footerblockindex) {
1570 	  // header and footer will both appear in the same buffer
1571 	  add_to_queue(&carvelists[headerblockindex],
1572 		       &carveinfo, STARTSTOPCARVE);
1573 	}
1574 	else {
1575 	  // header/footer will appear in different buffers, add carveinfo to
1576 	  // stop and start lists...
1577 	  add_to_queue(&carvelists[headerblockindex], &carveinfo, STARTCARVE);
1578 	  add_to_queue(&carvelists[footerblockindex], &carveinfo, STOPCARVE);
1579 	  // .. and to all lists in between (these will result in a full
1580 	  // SIZE_OF_BUFFER bytes being carved into the file).
1581 	  for(j = (long long)headerblockindex + 1; j < (long long)footerblockindex; j++) {
1582 	    add_to_queue(&carvelists[j], &carveinfo, CONTINUECARVE);
1583 	  }
1584 	}
1585       }
1586     }
1587   }
1588 
1589   fprintf(stdout, "Work queues built.  Workload:\n");
1590   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1591     currentneedle = &(state->SearchSpec[needlenum]);
1592     fprintf(stdout, "%s with header \"", currentneedle->suffix);
1593     fprintf(stdout, "%s", currentneedle->begintext);
1594     fprintf(stdout, "\" and footer \"");
1595     if(currentneedle->end == 0) {
1596       fprintf(stdout, "NONE");
1597     }
1598     else {
1599       fprintf(stdout, "%s", currentneedle->endtext);
1600     }
1601 #ifdef _WIN32
1602     fprintf(stdout, "\" --> %I64u files\n", currentneedle->numfilestocarve);
1603 #else
1604     fprintf(stdout, "\" --> %llu files\n", currentneedle->numfilestocarve);
1605 #endif
1606 
1607   }
1608 
1609   if(state->previewMode) {
1610     fprintf(stdout, "** PREVIEW MODE: GENERATING AUDIT LOG ONLY **\n");
1611     fprintf(stdout, "** NO CARVED FILES WILL BE WRITTEN **\n");
1612   }
1613 
1614   fprintf(stdout, "Carving files from image.\n");
1615   fprintf(stdout, "Image file pass 2/2.\n");
1616 
1617   // now read image file in SIZE_OF_BUFFER-sized windows, writing
1618   // carved files to output directory
1619 
1620   success = 1;
1621   while (success) {
1622 
1623     unsigned long long biglseek = 0L;
1624     // goal: skip reading buffers for which there is no work to do by using one big
1625     // seek
1626     fileposition = ftello_use_coverage_map(state, infile);
1627 
1628     while (queue_length(&carvelists[fileposition / SIZE_OF_BUFFER]) == 0
1629 	   && success) {
1630       biglseek += SIZE_OF_BUFFER;
1631       fileposition += SIZE_OF_BUFFER;
1632       success = fileposition <= filesize;
1633 
1634     }
1635 
1636     if(success && biglseek) {
1637       fseeko_use_coverage_map(state, infile, biglseek);
1638     }
1639 
1640     if(!success) {
1641       // not an error--just means we've exhausted the image file--show
1642       // progress report then quit carving
1643       displayPosition(&displayUnits, filesize, filesize, state->imagefile);
1644 
1645       continue;
1646     }
1647 
1648     if(!state->previewMode) {
1649       bytesread =
1650 	fread_use_coverage_map(state, readbuffer, 1, SIZE_OF_BUFFER, infile);
1651       // Check for read errors
1652       if((err = ferror(infile))) {
1653 	return SCALPEL_ERROR_FILE_READ;
1654       }
1655       else if(bytesread == 0) {
1656 	// no error, but image file exhausted
1657 	success = 0;
1658 	continue;
1659       }
1660     }
1661     else {
1662       // in preview mode, seeks are used in the 2nd pass instead of
1663       // reads.  This isn't optimal, but it's fast enough and avoids
1664       // complicating the file carving code further.
1665 
1666       fileposition = ftello_use_coverage_map(state, infile);
1667       fseeko_use_coverage_map(state, infile, SIZE_OF_BUFFER);
1668       bytesread = ftello_use_coverage_map(state, infile) - fileposition;
1669 
1670       // Check for errors
1671       if((err = ferror(infile))) {
1672 	return SCALPEL_ERROR_FILE_READ;
1673       }
1674       else if(bytesread == 0) {
1675 	// no error, but image file exhausted
1676 	success = 0;
1677 	continue;
1678       }
1679     }
1680 
1681     success = 1;
1682 
1683     // progress report needs real file position
1684     fileposition = ftello(infile);
1685     displayPosition(&displayUnits, fileposition - filebegin,
1686 		    filesize, state->imagefile);
1687 
1688     // if using coverage map for carving, need adjusted file position
1689     fileposition = ftello_use_coverage_map(state, infile);
1690 
1691     // signal check
1692     if(signal_caught == SIGTERM || signal_caught == SIGINT) {
1693       clean_up(state, signal_caught);
1694     }
1695 
1696     // deal with work for this SIZE_OF_BUFFER-sized block by
1697     // examining the associated queue
1698     rewind_queue(&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER]);
1699 
1700     while (!end_of_queue
1701 	   (&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER])) {
1702       struct CarveInfo *carve;
1703       int operation;
1704       unsigned long long bytestowrite = 0, byteswritten = 0, offset = 0;
1705 
1706       peek_at_current(&carvelists
1707 		      [(fileposition - bytesread) / SIZE_OF_BUFFER], &carve);
1708       operation =
1709 	current_priority(&carvelists
1710 			 [(fileposition - bytesread) / SIZE_OF_BUFFER]);
1711 
1712       // open file, if beginning of carve operation or file had to be closed
1713       // previously due to resource limitations
1714       if(operation == STARTSTOPCARVE ||
1715 	 operation == STARTCARVE || carve->fp == 0) {
1716 
1717 	if(!state->previewMode && state->modeVerbose) {
1718 	  fprintf(stdout, "OPENING %s\n", carve->filename);
1719 	}
1720 
1721 	carve->fp = (FILE *) 1;
1722 	if(!state->previewMode) {
1723 	  carve->fp = fopen(carve->filename, "ab");
1724 	}
1725 
1726 	if(!carve->fp) {
1727 	  fprintf(stderr, "Error opening file: %s -- %s\n",
1728 		  carve->filename, strerror(errno));
1729 	  fprintf(state->auditFile, "Error opening file: %s -- %s\n",
1730 		  carve->filename, strerror(errno));
1731 	  return SCALPEL_ERROR_FILE_WRITE;
1732 	}
1733 	else {
1734 	  CURRENTFILESOPEN++;
1735 	}
1736       }
1737 
1738       // write some portion of current readbuffer
1739       switch (operation) {
1740       case CONTINUECARVE:
1741 	offset = 0;
1742 	bytestowrite = SIZE_OF_BUFFER;
1743 	break;
1744       case STARTSTOPCARVE:
1745 	offset = carve->start - (fileposition - bytesread);
1746 	bytestowrite = carve->stop - carve->start + 1;
1747 	break;
1748       case STARTCARVE:
1749 	offset = carve->start - (fileposition - bytesread);
1750 	bytestowrite = (carve->stop - carve->start + 1) >
1751 	  (SIZE_OF_BUFFER - offset) ? (SIZE_OF_BUFFER - offset) :
1752 	  (carve->stop - carve->start + 1);
1753 	break;
1754       case STOPCARVE:
1755 	offset = 0;
1756 	bytestowrite = carve->stop - (fileposition - bytesread) + 1;
1757 	break;
1758       }
1759 
1760       if(!state->previewMode) {
1761 //	struct timeval writenow, writethen;
1762 //	gettimeofday(&writethen, 0);
1763 	if((byteswritten = fwrite(readbuffer + offset,
1764 				  sizeof(char),
1765 				  bytestowrite, carve->fp)) != bytestowrite) {
1766 
1767 	  fprintf(stderr, "Error writing to file: %s -- %s\n",
1768 		  carve->filename, strerror(ferror(carve->fp)));
1769 	  fprintf(state->auditFile,
1770 		  "Error writing to file: %s -- %s\n",
1771 		  carve->filename, strerror(ferror(carve->fp)));
1772 	  return SCALPEL_ERROR_FILE_WRITE;
1773 	}
1774       }
1775 
1776       // close file, if necessary.  Always do it on STARTSTOPCARVE and
1777       // STOPCARVE, but also do it if we have a large number of files
1778       // open, otherwise we'll run out of available file handles.  Updating the
1779       // coverage blockmap and auditing is done here, when a file being carved
1780       // is closed for the last time.
1781       if(operation == STARTSTOPCARVE ||
1782 	 operation == STOPCARVE || CURRENTFILESOPEN > MAX_FILES_TO_OPEN) {
1783 	err = 0;
1784 	if(!state->previewMode) {
1785 	  if(state->modeVerbose) {
1786 	    fprintf(stdout, "CLOSING %s\n", carve->filename);
1787 	  }
1788 	  err = fclose(carve->fp);
1789 	}
1790 
1791 	if(err) {
1792 	  fprintf(stderr, "Error closing file: %s -- %s\n\n",
1793 		  carve->filename, strerror(ferror(carve->fp)));
1794 	  fprintf(state->auditFile,
1795 		  "Error closing file: %s -- %s\n\n",
1796 		  carve->filename, strerror(ferror(carve->fp)));
1797 	  return SCALPEL_ERROR_FILE_WRITE;
1798 	}
1799 	else {
1800 	  CURRENTFILESOPEN--;
1801 	  carve->fp = 0;
1802 
1803 	  // release filename buffer if it won't be needed again.  Don't release it
1804 	  // if the file was closed only because a large number of files are currently
1805 	  // open!
1806 	  if(operation == STARTSTOPCARVE || operation == STOPCARVE) {
1807 	    auditUpdateCoverageBlockmap(state, carve);
1808 	    free(carve->filename);
1809 	  }
1810 	}
1811       }
1812       next_element(&carvelists[(fileposition - bytesread) / SIZE_OF_BUFFER]);
1813     }
1814   }
1815 
1816   //  closeFile(infile);
1817   fclose(infile);
1818 
1819   // write header/footer database, if necessary, before
1820   // cleanup for current image file.
1821 
1822   if(state->generateHeaderFooterDatabase) {
1823     if((err = writeHeaderFooterDatabase(state)) != SCALPEL_OK) {
1824       return err;
1825     }
1826   }
1827 
1828   // tear down coverage maps, if necessary
1829   destroyCoverageMaps(state);
1830 
1831   printf("Processing of image file complete. Cleaning up...\n");
1832 
1833   // tear down header/footer databases
1834 
1835   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
1836     currentneedle = &(state->SearchSpec[needlenum]);
1837     if(currentneedle->offsets.headers) {
1838       free(currentneedle->offsets.headers);
1839     }
1840     if(currentneedle->offsets.footers) {
1841       free(currentneedle->offsets.footers);
1842     }
1843     currentneedle->offsets.headers = 0;
1844     currentneedle->offsets.footers = 0;
1845     currentneedle->offsets.numheaders = 0;
1846     currentneedle->offsets.numfooters = 0;
1847     currentneedle->offsets.headerstorage = 0;
1848     currentneedle->offsets.footerstorage = 0;
1849     currentneedle->numfilestocarve = 0;
1850   }
1851 
1852   // tear down work queues--no memory deallocation for each queue
1853   // entry required, because memory associated with fp and the
1854   // filename was freed after the carved file was closed.
1855 
1856   // destroy queues
1857   for(i = 0; i < 2 + (filesize / SIZE_OF_BUFFER); i++) {
1858     destroy_queue(&carvelists[i]);
1859   }
1860   // destroy array of queues
1861   free(carvelists);
1862 
1863   printf("Done.");
1864   return SCALPEL_OK;
1865 }
1866 
1867 
1868 
1869 
1870 
1871 
1872 // The coverage blockmap marks which blocks (of a user-specified size)
1873 // have been "covered" by a carved file.  If the coverage blockmap
1874 // is to be modified, check to see if it exists.  If it does, then
1875 // open it and set the file handle in the Scalpel state.  If it
1876 // doesn't, create a zeroed copy and set the file handle in the
1877 // Scalpel state.  If the coverage blockmap is guiding carving, then
1878 // create the coverage bitmap and initialize it using the coverage
1879 // blockmap file.  The difference between the coverage blockmap (on
1880 // disk) and the coverage bitmap (in memory) is that the blockmap
1881 // counts carved files that cover a block.  The coverage bitmap only
1882 // indicates if ANY carved file covers a block.  'filesize' is the
1883 // size of the image file being examined.
1884 
1885 static int
setupCoverageMaps(struct scalpelState * state,unsigned long long filesize)1886 setupCoverageMaps(struct scalpelState *state, unsigned long long filesize) {
1887 
1888   char fn[MAX_STRING_LENGTH];	// filename for coverage blockmap
1889   unsigned long long i, k;
1890   int empty;
1891   unsigned int blocksize, entry;
1892 
1893 
1894   state->coveragebitmap = 0;
1895   state->coverageblockmap = 0;
1896 
1897   if(!state->useCoverageBlockmap && !state->updateCoverageBlockmap) {
1898     return SCALPEL_OK;
1899   }
1900 
1901   fprintf(stdout, "Setting up coverage blockmap.\n");
1902   // generate pathname for coverage blockmap
1903   snprintf(fn, MAX_STRING_LENGTH, "%s", state->coveragefile);
1904 
1905   fprintf(stdout, "Coverage blockmap is \"%s\".\n", fn);
1906 
1907   empty = ((state->coverageblockmap = fopen(fn, "rb")) == NULL);
1908   fprintf(stdout, "Coverage blockmap file is %s.\n",
1909 	  (empty ? "EMPTY" : "NOT EMPTY"));
1910 
1911   if(!empty) {
1912 #ifdef _WIN32
1913     // set binary mode for Win32
1914     setmode(fileno(state->coverageblockmap), O_BINARY);
1915 #endif
1916 #ifdef __linux
1917     fcntl(fileno(state->coverageblockmap), F_SETFL, O_LARGEFILE);
1918 #endif
1919 
1920     fprintf(stdout, "Reading blocksize from coverage blockmap file.\n");
1921 
1922     // read block size and make sure it matches user-specified block size
1923     if(fread(&blocksize, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
1924       fprintf(stderr,
1925 	      "Error reading coverage blockmap blocksize in\ncoverage blockmap file: %s\n",
1926 	      fn);
1927       fprintf(state->auditFile,
1928 	      "Error reading coverage blockmap blocksize in\ncoverage blockmap file: %s\n",
1929 	      fn);
1930       return SCALPEL_ERROR_FATAL_READ;
1931     }
1932 
1933     if(state->coverageblocksize != 0 && blocksize != state->coverageblocksize) {
1934       fprintf(stderr,
1935 	      "User-specified blocksize does not match blocksize in\ncoverage blockmap file: %s; aborting.\n",
1936 	      fn);
1937       fprintf(state->auditFile,
1938 	      "User-specified blocksize does not match blocksize in\ncoverage blockmap file: %s\n",
1939 	      fn);
1940       return SCALPEL_GENERAL_ABORT;
1941     }
1942 
1943     state->coverageblocksize = blocksize;
1944     fprintf(stdout, "Blocksize for coverage blockmap is %u.\n",
1945 	    state->coverageblocksize);
1946 
1947     state->coveragenumblocks =
1948       (unsigned long
1949        long)(ceil((double)filesize / (double)state->coverageblocksize));
1950 #ifdef _WIN32
1951     fprintf(stdout, "# of blocks in coverage blockmap is %I64u.\n",
1952 	    state->coveragenumblocks);
1953 #else
1954     fprintf(stdout, "# of blocks in coverage blockmap is %llu.\n",
1955 	    state->coveragenumblocks);
1956 #endif
1957 
1958     fprintf(stdout, "Allocating and clearing in-core coverage bitmap.\n");
1959     // for bitmap, 8 bits per unsigned char, with each bit representing one
1960     // block
1961     state->coveragebitmap =
1962       (unsigned char *)malloc((state->coveragenumblocks / 8) *
1963 			      sizeof(unsigned char));
1964     checkMemoryAllocation(state, state->coveragebitmap, __LINE__, __FILE__,
1965 			  "coveragebitmap");
1966 
1967     // zap coverage bitmap
1968     for(k = 0; k < state->coveragenumblocks / 8; k++) {
1969       state->coveragebitmap[k] = 0;
1970     }
1971 
1972     fprintf(stdout,
1973 	    "Reading existing coverage blockmap...this may take a while.\n");
1974 
1975     for(i = 0; i < state->coveragenumblocks; i++) {
1976       fseeko(state->coverageblockmap, (i + 1) * sizeof(unsigned int), SEEK_SET);
1977       if(fread(&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
1978 	fprintf(stderr,
1979 		"Error reading coverage blockmap entry (blockmap truncated?): %s\n",
1980 		fn);
1981 	fprintf(state->auditFile,
1982 		"Error reading coverage blockmap entry (blockmap truncated?): %s\n",
1983 		fn);
1984 	return SCALPEL_ERROR_FATAL_READ;
1985       }
1986       if(entry) {
1987 	state->coveragebitmap[i / 8] |= 1 << (i % 8);
1988       }
1989     }
1990   }
1991   else if(empty && state->useCoverageBlockmap && !state->updateCoverageBlockmap) {
1992     fprintf(stderr,
1993 	    "-u option requires that the blockmap file %s exist.\n", fn);
1994     fprintf(state->auditFile,
1995 	    "-u option requires that the blockmap file %s exist.\n", fn);
1996     return SCALPEL_GENERAL_ABORT;
1997   }
1998   else {
1999     // empty coverage blockmap
2000     if(state->coverageblocksize == 0) {	// user didn't override default
2001       state->coverageblocksize = 512;
2002     }
2003     fprintf(stdout, "Blocksize for coverage blockmap is %u.\n",
2004 	    state->coverageblocksize);
2005     state->coveragenumblocks =
2006       (unsigned long long)ceil((double)filesize /
2007 			       (double)state->coverageblocksize);
2008 #ifdef _WIN32
2009     fprintf(stdout, "# of blocks in coverage blockmap is %I64u.\n",
2010 	    state->coveragenumblocks);
2011 #else
2012     fprintf(stdout, "# of blocks in coverage blockmap is %llu.\n",
2013 	    state->coveragenumblocks);
2014 #endif
2015 
2016     fprintf(stdout, "Allocating and clearing in-core coverage bitmap.\n");
2017     // for bitmap, 8 bits per unsigned char, with each bit representing one
2018     // block
2019     state->coveragebitmap =
2020       (unsigned char *)malloc((state->coveragenumblocks / 8) *
2021 			      sizeof(unsigned char));
2022     checkMemoryAllocation(state, state->coveragebitmap, __LINE__, __FILE__,
2023 			  "coveragebitmap");
2024 
2025     // zap coverage bitmap
2026     for(k = 0; k < state->coveragenumblocks / 8; k++) {
2027       state->coveragebitmap[k] = 0;
2028     }
2029   }
2030 
2031   // change mode to read/write for future updates if coverage blockmap will be updated
2032   if(state->updateCoverageBlockmap) {
2033     if(state->modeVerbose) {
2034       fprintf(stdout, "Changing mode of coverage blockmap file to R/W.\n");
2035     }
2036 
2037     if(!empty) {
2038       fclose(state->coverageblockmap);
2039     }
2040     if((state->coverageblockmap = fopen(fn, (empty ? "w+b" : "r+b"))) == NULL) {
2041       fprintf(stderr, "Error writing to coverage blockmap file: %s\n", fn);
2042       fprintf(state->auditFile,
2043 	      "Error writing to coverage blockmap file: %s\n", fn);
2044       return SCALPEL_ERROR_FILE_WRITE;
2045     }
2046 
2047 #ifdef _WIN32
2048     // set binary mode for Win32
2049     setmode(fileno(state->coverageblockmap), O_BINARY);
2050 #endif
2051 #ifdef __linux
2052     fcntl(fileno(state->coverageblockmap), F_SETFL, O_LARGEFILE);
2053 #endif
2054 
2055     if(empty) {
2056       // create entries in empty coverage blockmap file
2057       fprintf(stdout,
2058 	      "Writing empty coverage blockmap...this may take a while.\n");
2059       entry = 0;
2060       if(fwrite
2061 	 (&(state->coverageblocksize), sizeof(unsigned int), 1,
2062 	  state->coverageblockmap) != 1) {
2063 	fprintf(stderr,
2064 		"Error writing initial entry in coverage blockmap file!\n");
2065 	fprintf(state->auditFile,
2066 		"Error writing initial entry in coverage blockmap file!\n");
2067 	return SCALPEL_ERROR_FILE_WRITE;
2068       }
2069       for(k = 0; k < state->coveragenumblocks; k++) {
2070 	if(fwrite
2071 	   (&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
2072 	  fprintf(stderr, "Error writing to coverage blockmap file!\n");
2073 	  fprintf(state->auditFile,
2074 		  "Error writing to coverage blockmap file!\n");
2075 	  return SCALPEL_ERROR_FILE_WRITE;
2076 	}
2077       }
2078     }
2079   }
2080 
2081   fprintf(stdout, "Finished setting up coverage blockmap.\n");
2082 
2083   return SCALPEL_OK;
2084 
2085 }
2086 
2087 // map carve->start ... carve->stop into a queue of 'fragments' that
2088 // define a carved file in the disk image.
2089 static void
generateFragments(struct scalpelState * state,Queue * fragments,CarveInfo * carve)2090 generateFragments(struct scalpelState *state, Queue * fragments,
2091 		  CarveInfo * carve) {
2092 
2093   unsigned long long curblock, neededbytes =
2094     carve->stop - carve->start + 1, bytestoskip, morebytes, totalbytes =
2095     0, curpos;
2096 
2097   Fragment frag;
2098 
2099 
2100   init_queue(fragments, sizeof(struct Fragment), TRUE, 0, TRUE);
2101 
2102   if(!state->useCoverageBlockmap) {
2103     // no translation necessary
2104     frag.start = carve->start;
2105     frag.stop = carve->stop;
2106     add_to_queue(fragments, &frag, 0);
2107     return;
2108   }
2109   else {
2110     curpos = positionUseCoverageBlockmap(state, carve->start);
2111     curblock = curpos / state->coverageblocksize;
2112 
2113     while (totalbytes < neededbytes && curblock < state->coveragenumblocks) {
2114 
2115       morebytes = 0;
2116       bytestoskip = 0;
2117 
2118       // skip covered blocks
2119       while (curblock < state->coveragenumblocks &&
2120 	     (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2121 	bytestoskip += state->coverageblocksize -
2122 	  curpos % state->coverageblocksize;
2123 	curblock++;
2124       }
2125 
2126       curpos += bytestoskip;
2127 
2128       // accumulate uncovered blocks in fragment
2129       while (curblock < state->coveragenumblocks &&
2130 	     ((state->
2131 	       coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2132 	     && totalbytes + morebytes < neededbytes) {
2133 
2134 	morebytes += state->coverageblocksize -
2135 	  curpos % state->coverageblocksize;
2136 
2137 	curblock++;
2138       }
2139 
2140       // cap size
2141       if(totalbytes + morebytes > neededbytes) {
2142 	morebytes = neededbytes - totalbytes;
2143       }
2144 
2145       frag.start = curpos;
2146       curpos += morebytes;
2147       frag.stop = curpos - 1;
2148       totalbytes += morebytes;
2149 
2150       add_to_queue(fragments, &frag, 0);
2151     }
2152   }
2153 }
2154 
2155 
2156 // If the coverage blockmap is used to guide carving, then use the
2157 // coverage blockmap to map a logical index in the disk image (i.e.,
2158 // the index skips covered blocks) to an actual disk image index.  If
2159 // the coverage blockmap isn't being used, just returns the second
2160 // argument.
2161 //
2162 // ***This function assumes that the 'position' does NOT lie
2163 // within a covered block! ***
2164 static unsigned long long
positionUseCoverageBlockmap(struct scalpelState * state,unsigned long long position)2165 positionUseCoverageBlockmap(struct scalpelState *state,
2166 			    unsigned long long position) {
2167 
2168 
2169   unsigned long long totalbytes = 0, neededbytes = position,
2170     morebytes, curblock = 0, curpos = 0, bytestoskip;
2171 
2172   if(!state->useCoverageBlockmap) {
2173     return position;
2174   }
2175   else {
2176     while (totalbytes < neededbytes && curblock < state->coveragenumblocks) {
2177       morebytes = 0;
2178       bytestoskip = 0;
2179 
2180       // skip covered blocks
2181       while (curblock < state->coveragenumblocks &&
2182 	     (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2183 	bytestoskip += state->coverageblocksize -
2184 	  curpos % state->coverageblocksize;
2185 	curblock++;
2186       }
2187 
2188       curpos += bytestoskip;
2189 
2190       // accumulate uncovered blocks
2191       while (curblock < state->coveragenumblocks &&
2192 	     ((state->
2193 	       coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2194 	     && totalbytes + morebytes < neededbytes) {
2195 
2196 	morebytes += state->coverageblocksize -
2197 	  curpos % state->coverageblocksize;
2198 	curblock++;
2199       }
2200 
2201       // cap size
2202       if(totalbytes + morebytes > neededbytes) {
2203 	morebytes = neededbytes - totalbytes;
2204       }
2205 
2206       curpos += morebytes;
2207       totalbytes += morebytes;
2208     }
2209 
2210     return curpos;
2211   }
2212 }
2213 
2214 
2215 
2216 // update the coverage blockmap for a carved file (if appropriate) and write entries into
2217 // the audit log describing the carved file.  If the file is fragmented, then multiple
2218 // lines are written to indicate where the fragments occur.
2219 static int
auditUpdateCoverageBlockmap(struct scalpelState * state,struct CarveInfo * carve)2220 auditUpdateCoverageBlockmap(struct scalpelState *state,
2221 			    struct CarveInfo *carve) {
2222 
2223   struct Queue fragments;
2224   Fragment *frag;
2225   int err;
2226   unsigned long long k;
2227 
2228   // If the coverage blockmap used to guide carving, then carve->start and
2229   // carve->stop may not correspond to addresses in the disk image--the coverage blockmap
2230   // processing layer in Scalpel may have skipped "in use" blocks.  Transform carve->start
2231   // and carve->stop into a list of fragments that contain real disk image offsets.
2232   generateFragments(state, &fragments, carve);
2233 
2234   rewind_queue(&fragments);
2235   while (!end_of_queue(&fragments)) {
2236     frag = (Fragment *) pointer_to_current(&fragments);
2237     fprintf(state->auditFile, "%s", base_name(carve->filename));
2238 #ifdef _WIN32
2239     fprintf(state->auditFile, "%13I64u\t\t", frag->start);
2240 #else
2241     fprintf(state->auditFile, "%13llu\t\t", frag->start);
2242 #endif
2243 
2244     fprintf(state->auditFile, "%3s", carve->chopped ? "YES   " : "NO    ");
2245 
2246 #ifdef _WIN32
2247     fprintf(state->auditFile, "%13I64u\t\t", frag->stop - frag->start + 1);
2248 #else
2249     fprintf(state->auditFile, "%13llu\t\t", frag->stop - frag->start + 1);
2250 #endif
2251 
2252     fprintf(state->auditFile, "%s\n", base_name(state->imagefile));
2253 
2254     fflush(state->auditFile);
2255 
2256     // update coverage blockmap, if appropriate
2257     if(state->updateCoverageBlockmap) {
2258       for(k = frag->start / state->coverageblocksize;
2259 	  k <= frag->stop / state->coverageblocksize; k++) {
2260 	if((err = updateCoverageBlockmap(state, k)) != SCALPEL_OK) {
2261 	  destroy_queue(&fragments);
2262 	  return err;
2263 	}
2264       }
2265     }
2266     next_element(&fragments);
2267   }
2268 
2269   destroy_queue(&fragments);
2270 
2271   return SCALPEL_OK;
2272 }
2273 
2274 
2275 
2276 static int
updateCoverageBlockmap(struct scalpelState * state,unsigned long long block)2277 updateCoverageBlockmap(struct scalpelState *state, unsigned long long block) {
2278 
2279   unsigned int entry;
2280 
2281   if(state->updateCoverageBlockmap) {
2282     // first entry in file is block size, so seek one unsigned int further
2283     fseeko(state->coverageblockmap, (block + 1) * sizeof(unsigned int),
2284 	   SEEK_SET);
2285     if(fread(&entry, sizeof(unsigned int), 1, state->coverageblockmap) != 1) {
2286       fprintf(stderr, "Error reading coverage blockmap entry!\n");
2287       fprintf(state->auditFile, "Error reading coverage blockmap entry!\n");
2288       return SCALPEL_ERROR_FATAL_READ;
2289     }
2290     entry++;
2291     // first entry in file is block size, so seek one unsigned int further
2292     fseeko(state->coverageblockmap, (block + 1) * sizeof(unsigned int),
2293 	   SEEK_SET);
2294     if(fwrite(&entry, sizeof(unsigned int), 1, state->coverageblockmap)
2295        != 1) {
2296       fprintf(stderr, "Error writing to coverage blockmap file!\n");
2297       fprintf(state->auditFile, "Error writing to coverage blockmap file!\n");
2298       return SCALPEL_ERROR_FILE_WRITE;
2299     }
2300   }
2301 
2302   return SCALPEL_OK;
2303 }
2304 
2305 
2306 
destroyCoverageMaps(struct scalpelState * state)2307 static void destroyCoverageMaps(struct scalpelState *state) {
2308 
2309   // free memory associated with coverage bitmap, close coverage blockmap file
2310 
2311   if(state->coveragebitmap) {
2312     free(state->coveragebitmap);
2313   }
2314 
2315   if(state->useCoverageBlockmap || state->updateCoverageBlockmap) {
2316     fclose(state->coverageblockmap);
2317   }
2318 }
2319 
2320 
2321 // simple wrapper for fseeko with SEEK_CUR semantics that uses the
2322 // coverage bitmap to skip over covered blocks, IF the coverage
2323 // blockmap is being used.  The offset is adjusted so that covered
2324 // blocks are silently skipped when seeking if the coverage blockmap
2325 // is used, otherwise an fseeko() with an umodified offset is
2326 // performed.
2327 static int
fseeko_use_coverage_map(struct scalpelState * state,FILE * fp,off64_t offset)2328 fseeko_use_coverage_map(struct scalpelState *state, FILE * fp, off64_t offset) {
2329 
2330   off64_t currentpos;
2331 
2332   // BUG TEST: revision 5 changed curbloc to unsigned, removed all tests for
2333   // curblock >= 0 from each of next three while loops
2334   // we should put back guards to make sure we don't underflow
2335   // what did I break? anything? maybe not?
2336 
2337   unsigned long long curblock, bytestoskip, bytestokeep, totalbytes = 0;
2338   int sign;
2339 
2340   if(state->useCoverageBlockmap) {
2341     currentpos = ftello(fp);
2342     sign = (offset > 0 ? 1 : -1);
2343 
2344     curblock = currentpos / state->coverageblocksize;
2345 
2346     while (totalbytes < (offset > 0 ? offset : offset * -1) &&
2347 	   curblock < state->coveragenumblocks) {
2348       bytestoskip = 0;
2349 
2350       // covered blocks increase offset
2351       while (curblock < state->coveragenumblocks &&
2352 	     (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2353 
2354 	bytestoskip += (state->coverageblocksize -
2355 			currentpos % state->coverageblocksize);
2356 	curblock += sign;
2357       }
2358 
2359       offset += (bytestoskip * sign);
2360       currentpos += (bytestoskip * sign);
2361 
2362       bytestokeep = 0;
2363 
2364       // uncovered blocks don't increase offset
2365       while (curblock < state->coveragenumblocks &&
2366 	     ((state->
2367 	       coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2368 	     && totalbytes < (offset > 0 ? offset : offset * -1)) {
2369 
2370 	bytestokeep += (state->coverageblocksize -
2371 			currentpos % state->coverageblocksize);
2372 
2373 	curblock += sign;
2374       }
2375 
2376       totalbytes += bytestokeep;
2377       currentpos += (bytestokeep * sign);
2378     }
2379   }
2380 
2381   return fseeko(fp, offset, SEEK_CUR);
2382 }
2383 
2384 
2385 
2386 // simple wrapper for ftello() that uses the coverage bitmap to
2387 // report the current file position *minus* the contribution of
2388 // marked blocks, IF the coverage blockmap is being used.  If a
2389 // coverage blockmap isn't in use, just performs a standard ftello()
2390 // call.
2391 //
2392 // GGRIII:  *** This could use optimization, e.g., use of a pre-computed
2393 // table to avoid walking the coverage bitmap on each call.
2394 
ftello_use_coverage_map(struct scalpelState * state,FILE * fp)2395 static off64_t ftello_use_coverage_map(struct scalpelState *state, FILE * fp) {
2396 
2397   off64_t currentpos, decrease = 0;
2398   unsigned long long endblock, k;
2399 
2400   currentpos = ftello(fp);
2401 
2402   if(state->useCoverageBlockmap) {
2403     endblock = currentpos / state->coverageblocksize;
2404 
2405     // covered blocks don't contribute to current file position
2406     for(k = 0; k <= endblock; k++) {
2407       if(state->coveragebitmap[k / 8] & (1 << (k % 8))) {
2408 	decrease += state->coverageblocksize;
2409       }
2410     }
2411 
2412     if(state->coveragebitmap[endblock / 8] & (1 << (endblock % 8))) {
2413       decrease += (state->coverageblocksize -
2414 		   currentpos % state->coverageblocksize);
2415     }
2416 
2417     if(state->modeVerbose && state->useCoverageBlockmap) {
2418 #ifdef _WIN32
2419       fprintf(stdout,
2420 	      "Coverage map decreased current file position by %I64u bytes.\n",
2421 	      (unsigned long long)decrease);
2422 #else
2423       fprintf(stdout,
2424 	      "Coverage map decreased current file position by %llu bytes.\n",
2425 	      (unsigned long long)decrease);
2426 #endif
2427     }
2428   }
2429 
2430   return currentpos - decrease;
2431 }
2432 
2433 
2434 
2435 // simple wrapper for fread() that uses the coverage bitmap--the read silently
2436 // skips blocks that are marked covered (corresponding bit in coverage
2437 // bitmap is 1)
2438 static size_t
fread_use_coverage_map(struct scalpelState * state,void * ptr,size_t size,size_t nmemb,FILE * stream)2439 fread_use_coverage_map(struct scalpelState *state, void *ptr,
2440 		       size_t size, size_t nmemb, FILE * stream) {
2441 
2442   unsigned long long curblock, neededbytes = nmemb * size, bytestoskip,
2443     bytestoread, bytesread, totalbytesread = 0, curpos;
2444   int shortread;
2445 //  gettimeofday_t readnow, readthen;
2446 
2447 //  gettimeofday(&readthen, 0);
2448 
2449   if(state->useCoverageBlockmap) {
2450     if(state->modeVerbose) {
2451 #ifdef _WIN32
2452       fprintf(stdout,
2453 	      "Issuing coverage map-based READ, wants %I64u bytes.\n",
2454 	      neededbytes);
2455 #else
2456       fprintf(stdout,
2457 	      "Issuing coverage map-based READ, wants %llu bytes.\n",
2458 	      neededbytes);
2459 #endif
2460     }
2461 
2462     curpos = ftello(stream);
2463     curblock = curpos / state->coverageblocksize;
2464     shortread = 0;
2465 
2466     while (totalbytesread < neededbytes
2467 	   && curblock < state->coveragenumblocks && !shortread) {
2468       bytestoread = 0;
2469       bytestoskip = 0;
2470 
2471       // skip covered blocks
2472       while (curblock < state->coveragenumblocks &&
2473 	     (state->coveragebitmap[curblock / 8] & (1 << (curblock % 8)))) {
2474 	bytestoskip += (state->coverageblocksize -
2475 			curpos % state->coverageblocksize);
2476 	curblock++;
2477       }
2478 
2479       curpos += bytestoskip;
2480 
2481 
2482       if(state->modeVerbose) {
2483 #ifdef _WIN32
2484 	fprintf(stdout,
2485 		"fread using coverage map to skip %I64u bytes.\n", bytestoskip);
2486 #else
2487 	fprintf(stdout,
2488 		"fread using coverage map to skip %llu bytes.\n", bytestoskip);
2489 #endif
2490       }
2491 
2492       fseeko(stream, (off64_t) bytestoskip, SEEK_CUR);
2493 
2494       // accumulate uncovered blocks for read
2495       while (curblock < state->coveragenumblocks &&
2496 	     ((state->
2497 	       coveragebitmap[curblock / 8] & (1 << (curblock % 8))) == 0)
2498 	     && totalbytesread + bytestoread <= neededbytes) {
2499 
2500 	bytestoread += (state->coverageblocksize -
2501 			curpos % state->coverageblocksize);
2502 
2503 	curblock++;
2504       }
2505 
2506       // cap read size
2507       if(totalbytesread + bytestoread > neededbytes) {
2508 	bytestoread = neededbytes - totalbytesread;
2509       }
2510 
2511 
2512       if(state->modeVerbose) {
2513 #ifdef _WIN32
2514 	fprintf(stdout,
2515 		"fread using coverage map found %I64u consecutive bytes.\n",
2516 		bytestoread);
2517 #else
2518 	fprintf(stdout,
2519 		"fread using coverage map found %llu consecutive bytes.\n",
2520 		bytestoread);
2521 #endif
2522       }
2523 
2524       if((bytesread =
2525 	  fread((char *)ptr + totalbytesread, 1, (size_t) bytestoread,
2526 		stream)) < bytestoread) {
2527 	shortread = 1;
2528       }
2529 
2530       totalbytesread += bytesread;
2531       curpos += bytestoread;
2532 
2533       if(state->modeVerbose) {
2534 #ifdef _WIN32
2535 	fprintf(stdout, "fread using coverage map read %I64u bytes.\n",
2536 		bytesread);
2537 #else
2538 	fprintf(stdout, "fread using coverage map read %llu bytes.\n",
2539 		bytesread);
2540 #endif
2541       }
2542     }
2543 
2544     if(state->modeVerbose) {
2545       fprintf(stdout, "Coverage map-based READ complete.\n");
2546     }
2547 
2548     // conform with fread() semantics by returning # of items read
2549     return totalbytesread / size;
2550   }
2551   else {
2552     size_t ret = fread(ptr, size, nmemb, stream);
2553     return ret;
2554   }
2555 }
2556 
2557 #ifdef MULTICORE_THREADING
2558 
2559 // threaded header/footer search
threadedFindAll(void * args)2560 static void *threadedFindAll(void *args) {
2561 
2562   int id = ((ThreadFindAllParams *) args)->id;
2563   char *str;
2564   size_t length;
2565   char *startpos;
2566   long offset;
2567   char **foundat;
2568   size_t *foundatlens;
2569   size_t *table = 0;
2570   regex_t *regexp = 0;
2571   int strisRE;
2572   int casesensitive;
2573   int nosearchoverlap;
2574   struct scalpelState *state;
2575 
2576   regmatch_t *match;
2577 
2578   // wait for work
2579   //  sem_wait(&workavailable[id]);
2580 
2581   // you need to be holding the workcomplete mutex initially
2582   pthread_mutex_lock(&workcomplete[id]);
2583   pthread_mutex_lock(&workavailable[id]);
2584 
2585   while (1) {
2586     // get args that define current workload
2587     str = ((ThreadFindAllParams *) args)->str;
2588     length = ((ThreadFindAllParams *) args)->length;
2589     startpos = ((ThreadFindAllParams *) args)->startpos;
2590     offset = ((ThreadFindAllParams *) args)->offset;
2591     foundat = ((ThreadFindAllParams *) args)->foundat;
2592     foundatlens = ((ThreadFindAllParams *) args)->foundatlens;
2593     strisRE = ((ThreadFindAllParams *) args)->strisRE;
2594     if(strisRE) {
2595       regexp = ((ThreadFindAllParams *) args)->regex;
2596     }
2597     else {
2598       table = ((ThreadFindAllParams *) args)->table;
2599     }
2600     casesensitive = ((ThreadFindAllParams *) args)->casesensitive;
2601     nosearchoverlap = ((ThreadFindAllParams *) args)->nosearchoverlap;
2602     state = ((ThreadFindAllParams *) args)->state;
2603 
2604     if(state->modeVerbose) {
2605       printf("needle search thread # %d awake.\n", id);
2606     }
2607 
2608     while (startpos) {
2609       if(!strisRE) {
2610 	startpos = bm_needleinhaystack(str,
2611 				       length,
2612 				       startpos,
2613 				       offset - (long)startpos,
2614 				       table, casesensitive);
2615       }
2616       else {
2617 	//printf("Before regexp search, startpos = %p\n", startpos);
2618 	match = re_needleinhaystack(regexp, startpos, offset - (long)startpos);
2619 	if(!match) {
2620 	  startpos = 0;
2621 	}
2622 	else {
2623 	  startpos = match->rm_so + startpos;
2624 	  length = match->rm_eo - match->rm_so;
2625 	  free(match);
2626 	  //printf("After regexp search, startpos = %p\n", startpos);
2627 	}
2628       }
2629 
2630       if(startpos) {
2631 	// remember match location
2632 	foundat[(long)(foundat[MAX_MATCHES_PER_BUFFER])] = startpos;
2633 	foundatlens[(long)(foundat[MAX_MATCHES_PER_BUFFER])] = length;
2634 	foundat[MAX_MATCHES_PER_BUFFER]++;
2635 
2636 	// move past match position.  Foremost 0.69 didn't find overlapping
2637 	// headers/footers.  If you need that behavior, specify "-r" on the
2638 	// command line.  Scalpel's default behavior is to find overlapping
2639 	// headers/footers.
2640 
2641 	if(nosearchoverlap) {
2642 	  startpos += length;
2643 	}
2644 	else {
2645 	  startpos++;
2646 	}
2647       }
2648     }
2649 
2650     if(state->modeVerbose) {
2651       printf("needle search thread # %d asleep.\n", id);
2652     }
2653 
2654     // signal completion of work
2655     //    sem_post(&workcomplete[id]);
2656     pthread_mutex_unlock(&workcomplete[id]);
2657 
2658     // wait for more work
2659     //    sem_wait(&workavailable[id]);
2660     pthread_mutex_lock(&workavailable[id]);
2661 
2662   }
2663 
2664   return 0;
2665 
2666 }
2667 
2668 #endif
2669 
2670 
2671 // Buffers for reading image in and holding gpu results.
2672 // The ourCudaMallocHost call MUST be executed by the
2673 // gpu_handler thread.
init_store()2674 void init_store() {
2675 
2676   // 3 queues:
2677   //              inque filled by reader, emptied by gpu_handler
2678   //              outque filled by gpu_handler, emptied by host thread
2679   //              freequeue filled by host, emptied by reader
2680 
2681   // the queues hold pointers to full and empty SIZE_OF_BUFFER read buffers
2682   full_readbuf = syncqueue_init("full_readbuf", QUEUELEN);
2683   empty_readbuf = syncqueue_init("empty_readbuf", QUEUELEN);
2684 #ifdef GPU_THREADING
2685   results_readbuf = syncqueue_init("results_readbuf", QUEUELEN);
2686 #endif
2687 
2688   // backing store of actual buffers pointed to above, along with some
2689   // necessary bookeeping info
2690   readbuf_info *readbuf_store;
2691   if((readbuf_store =
2692       (readbuf_info *)malloc(QUEUELEN * sizeof(readbuf_info))) == 0) {
2693     fprintf(stderr, (char *)"malloc %lu failed in streaming reader\n",
2694 	    (unsigned long)QUEUELEN * sizeof(readbuf_info));
2695   }
2696 
2697   // initialization
2698   int g;
2699   for(g = 0; g < QUEUELEN; g++) {
2700     readbuf_store[g].bytesread = 0;
2701     readbuf_store[g].beginreadpos = 0;
2702 
2703     // for fast gpu operation we need to use the CUDA pinned-memory allocations
2704 #ifdef GPU_THREADING
2705     ourCudaMallocHost((void **)&(readbuf_store[g].readbuf), SIZE_OF_BUFFER);
2706 #else
2707     readbuf_store[g].readbuf = (char *)malloc(SIZE_OF_BUFFER);
2708 #endif
2709 
2710     // put pointer to empty but initialized readbuf in the empty que
2711     put(empty_readbuf, (void *)(&readbuf_store[g]));
2712   }
2713 }
2714 
2715 
2716 // initialize thread-related data structures for either GPU_THREADING or
2717 // MULTICORE_THREADING models
init_threading_model(struct scalpelState * state)2718 int init_threading_model(struct scalpelState *state) {
2719 
2720   int i;
2721 
2722 #ifdef GPU_THREADING
2723 
2724   printf("GPU-based threading model enabled.\n");
2725 
2726 #endif
2727 
2728 #ifdef MULTICORE_THREADING
2729 
2730   printf("Multi-core CPU threading model enabled.\n");
2731   printf("Initializing thread group data structures.\n");
2732 
2733   // initialize global data structures for threads
2734   searchthreads = (pthread_t *) malloc(state->specLines * sizeof(pthread_t));
2735   checkMemoryAllocation(state, searchthreads, __LINE__, __FILE__,
2736 			"searchthreads");
2737   threadargs =
2738     (ThreadFindAllParams *) malloc(state->specLines *
2739 				   sizeof(ThreadFindAllParams));
2740   checkMemoryAllocation(state, threadargs, __LINE__, __FILE__, "args");
2741   foundat = (char ***)malloc(state->specLines * sizeof(char *));
2742   checkMemoryAllocation(state, foundat, __LINE__, __FILE__, "foundat");
2743   foundatlens = (size_t **) malloc(state->specLines * sizeof(size_t));
2744   checkMemoryAllocation(state, foundatlens, __LINE__, __FILE__, "foundatlens");
2745   workavailable = (pthread_mutex_t *)malloc(state->specLines * sizeof(pthread_mutex_t));
2746 
2747   checkMemoryAllocation(state, workavailable, __LINE__, __FILE__,
2748 			"workavailable");
2749   workcomplete = (pthread_mutex_t *)malloc(state->specLines * sizeof(pthread_mutex_t));
2750 
2751   checkMemoryAllocation(state, workcomplete, __LINE__, __FILE__,
2752 			"workcomplete");
2753 
2754   printf("Creating threads...\n");
2755   for(i = 0; i < state->specLines; i++) {
2756     foundat[i] = (char **)malloc((MAX_MATCHES_PER_BUFFER + 1) * sizeof(char *));
2757     checkMemoryAllocation(state, foundat[i], __LINE__, __FILE__, "foundat");
2758     foundatlens[i] = (size_t *) malloc(MAX_MATCHES_PER_BUFFER * sizeof(size_t));
2759     checkMemoryAllocation(state, foundatlens[i], __LINE__, __FILE__,
2760 			  "foundatlens");
2761     foundat[i][MAX_MATCHES_PER_BUFFER] = 0;
2762 
2763     // BUG:  NEED PROPER ERROR CODE/MESSAGE FOR MX CREATION FAILURE
2764     if(pthread_mutex_init(&workavailable[i], 0)) {
2765     	//return SCALPEL_ERROR_PTHREAD_FAILURE;
2766       fprintf(stderr, "COULDN'T CREATE MUTEX\n");
2767       exit(1);
2768     }
2769 
2770     pthread_mutex_lock(&workavailable[i]);
2771 
2772     if(pthread_mutex_init(&workcomplete[i], 0)) {
2773       fprintf(stderr, "COULDN'T CREATE MUTEX\n");
2774       exit(1);
2775     }
2776 
2777     // create a thread in the thread pool; thread will block on workavailable[]
2778     // semaphore until there's work to do
2779 
2780     // FIX:  NEED PROPER ERROR CODE/MESSAGE FOR THREAD CREATION FAILURE
2781     threadargs[i].id = i;	// thread needs to read id before blocking
2782     if(pthread_create
2783        (&searchthreads[i], NULL, &threadedFindAll, &threadargs[i])) {
2784       //return SCALPEL_ERROR_PTHREAD_FAILURE;
2785       fprintf(stderr, "COULDN'T CREATE THREAD\n");
2786       exit(1);
2787     }
2788   }
2789   printf("Thread creation completed.\n");
2790 
2791 #endif
2792 
2793   return 0;
2794 }
2795 
2796 
2797 // write header/footer database for current image file into the
2798 // Scalpel output directory. No information is written into the
2799 // database for file types without a suffix.  The filename used
2800 // is the current image filename with ".hfd" appended.  The
2801 // format of the database file is straightforward:
2802 //
2803 // suffix_#1 (string)
2804 // number_of_headers (unsigned long long)
2805 // header_pos_#1 (unsigned long long)
2806 // header_pos_#2 (unsigned long long)
2807 // ...
2808 // number_of_footers (unsigned long long)
2809 // footer_pos_#1 (unsigned long long)
2810 // footer_pos_#2 (unsigned long long)
2811 // ...
2812 // suffix_#2 (string)
2813 // number_of_headers (unsigned long long)
2814 // header_pos_#1 (unsigned long long)
2815 // header_pos_#2 (unsigned long long)
2816 // ...
2817 // number_of_footers (unsigned long long)
2818 // footer_pos_#1 (unsigned long long)
2819 // footer_pos_#2 (unsigned long long)
2820 // ...
2821 // ...
2822 //
2823 // If state->useCoverageBlockmap, then translation is required to
2824 // produce real disk image addresses for the generated header/footer
2825 // database file, because the Scalpel carving engine isn't aware of
2826 // gaps created by blocks that are covered by previously carved files.
2827 
writeHeaderFooterDatabase(struct scalpelState * state)2828 static int writeHeaderFooterDatabase(struct scalpelState *state) {
2829 
2830   FILE *dbfile;
2831   char fn[MAX_STRING_LENGTH];	// filename for header/footer database
2832   int needlenum;
2833   struct SearchSpecLine *currentneedle;
2834   unsigned long long i;
2835 
2836   // generate unique name for header/footer database
2837   snprintf(fn, MAX_STRING_LENGTH, "%s/%s.hfd",
2838 	   state->outputdirectory, base_name(state->imagefile));
2839 
2840   if((dbfile = fopen(fn, "w")) == NULL) {
2841     fprintf(stderr, "Error writing to header/footer database file: %s\n", fn);
2842     fprintf(state->auditFile,
2843 	    "Error writing to header/footer database file: %s\n", fn);
2844     return SCALPEL_ERROR_FILE_WRITE;
2845   }
2846 
2847 #ifdef _WIN32
2848   // set binary mode for Win32
2849   setmode(fileno(dbfile), O_BINARY);
2850 #endif
2851 #ifdef __linux
2852   fcntl(fileno(dbfile), F_SETFL, O_LARGEFILE);
2853 #endif
2854 
2855   for(needlenum = 0; needlenum < state->specLines; needlenum++) {
2856     currentneedle = &(state->SearchSpec[needlenum]);
2857 
2858     if(currentneedle->suffix[0] != SCALPEL_NOEXTENSION) {
2859       // output current suffix
2860       if(fprintf(dbfile, "%s\n", currentneedle->suffix) <= 0) {
2861 	fprintf(stderr,
2862 		"Error writing to header/footer database file: %s\n", fn);
2863 	fprintf(state->auditFile,
2864 		"Error writing to header/footer database file: %s\n", fn);
2865 	return SCALPEL_ERROR_FILE_WRITE;
2866       }
2867 
2868       // # of headers
2869 #ifdef _WIN32
2870       if(fprintf(dbfile, "%I64u\n", currentneedle->offsets.numheaders)
2871 	 <= 0) {
2872 #else
2873 	if(fprintf(dbfile, "%llu\n", currentneedle->offsets.numheaders) <= 0) {
2874 #endif
2875 	  fprintf(stderr,
2876 		  "Error writing to header/footer database file: %s\n", fn);
2877 	  fprintf(state->auditFile,
2878 		  "Error writing to header/footer database file: %s\n", fn);
2879 	  return SCALPEL_ERROR_FILE_WRITE;
2880 	}
2881 
2882 	// all header positions for current suffix
2883 	for(i = 0; i < currentneedle->offsets.numheaders; i++) {
2884 #ifdef _WIN32
2885 	  if(fprintf
2886 	     (dbfile, "%I64u\n",
2887 	      positionUseCoverageBlockmap(state,
2888 					  currentneedle->offsets.
2889 					  headers[i])) <= 0) {
2890 #else
2891 	    if(fprintf
2892 	       (dbfile, "%llu\n",
2893 		positionUseCoverageBlockmap(state,
2894 					    currentneedle->offsets.
2895 					    headers[i])) <= 0) {
2896 #endif
2897 	      fprintf(stderr,
2898 		      "Error writing to header/footer database file: %s\n", fn);
2899 	      fprintf(state->auditFile,
2900 		      "Error writing to header/footer database file: %s\n", fn);
2901 	      return SCALPEL_ERROR_FILE_WRITE;
2902 	    }
2903 	  }
2904 
2905 	  // # of footers
2906 #ifdef _WIN32
2907 	  if(fprintf(dbfile, "%I64u\n", currentneedle->offsets.numfooters)
2908 	     <= 0) {
2909 #else
2910 	    if(fprintf(dbfile, "%llu\n", currentneedle->offsets.numfooters) <= 0) {
2911 #endif
2912 	      fprintf(stderr,
2913 		      "Error writing to header/footer database file: %s\n", fn);
2914 	      fprintf(state->auditFile,
2915 		      "Error writing to header/footer database file: %s\n", fn);
2916 	      return SCALPEL_ERROR_FILE_WRITE;
2917 	    }
2918 
2919 	    // all footer positions for current suffix
2920 	    for(i = 0; i < currentneedle->offsets.numfooters; i++) {
2921 #ifdef _WIN32
2922 	      if(fprintf
2923 		 (dbfile, "%I64u\n",
2924 		  positionUseCoverageBlockmap(state,
2925 					      currentneedle->offsets.
2926 					      footers[i])) <= 0) {
2927 #else
2928 		if(fprintf
2929 		   (dbfile, "%llu\n",
2930 		    positionUseCoverageBlockmap(state,
2931 						currentneedle->offsets.
2932 						footers[i])) <= 0) {
2933 #endif
2934 		  fprintf(stderr,
2935 			  "Error writing to header/footer database file: %s\n", fn);
2936 		  fprintf(state->auditFile,
2937 			  "Error writing to header/footer database file: %s\n", fn);
2938 		  return SCALPEL_ERROR_FILE_WRITE;
2939 		}
2940 	      }
2941 	    }
2942 	  }
2943 	  fclose(dbfile);
2944 	  return SCALPEL_OK;
2945 	}
2946 
2947