1
2 /* FOREMOST
3 *
4 * By Jesse Kornblum, Kris Kendall, & Nick Mikus
5 *
6 * This is a work of the US Government. In accordance with 17 USC 105,
7 * copyright protection is not available for any work of the US Government.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15 #include "main.h"
16
user_interrupt(f_state * s,f_info * i)17 int user_interrupt (f_state * s, f_info * i)
18 {
19 audit_msg(s, "Interrupt received at %s", current_time());
20
21 /* RBF - Write user_interrupt */
22 fclose(i->handle);
23 free(s);
24 free(i);
25 cleanup_output(s);
26 exit(-1);
27 return FALSE;
28 }
29
read_from_disk(u_int64_t offset,f_info * i,u_int64_t length)30 unsigned char *read_from_disk(u_int64_t offset, f_info *i, u_int64_t length)
31 {
32
33 u_int64_t bytesread = 0;
34 unsigned char *newbuf = (unsigned char *)malloc(length * sizeof(char));
35 if (!newbuf) {
36 fprintf(stderr, "Ran out of memory in read_from_disk()\n");
37 exit(1);
38 }
39
40 fseeko(i->handle, offset, SEEK_SET);
41 bytesread = fread(newbuf, 1, length, i->handle);
42 if (bytesread != length)
43 {
44 free(newbuf);
45 return NULL;
46 }
47 else
48 {
49 return newbuf;
50 }
51 }
52
53 /*
54 Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
55 and allows the starting position in the buffer to be manually set, which allows data to be skipped
56 */
bm_search_skipn(unsigned char * needle,size_t needle_len,unsigned char * haystack,size_t haystack_len,size_t table[UCHAR_MAX+1],int casesensitive,int searchtype,int start_pos)57 unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len, unsigned char *haystack,
58 size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive,
59 int searchtype, int start_pos)
60 {
61 register size_t shift = 0;
62 register size_t pos = start_pos;
63 unsigned char *here;
64
65 if (needle_len == 0)
66 return haystack;
67
68 if (searchtype == SEARCHTYPE_FORWARD || searchtype == SEARCHTYPE_FORWARD_NEXT)
69 {
70 while (pos < haystack_len)
71 {
72 while (pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0)
73 {
74 pos += shift;
75 }
76
77 if (0 == shift)
78 {
79 here = (unsigned char *) &haystack[pos - needle_len + 1];
80 if (0 == memwildcardcmp(needle, here, needle_len, casesensitive))
81 {
82 return (here);
83 }
84 else
85 pos++;
86 }
87 }
88
89 return NULL;
90 }
91 else if (searchtype == SEARCHTYPE_REVERSE) //Run our search backwards
92 {
93 while (pos < haystack_len)
94 {
95 while
96 (
97 pos < haystack_len &&
98 (shift = table[(unsigned char)haystack[haystack_len - pos - 1]]) > 0
99 )
100 {
101 pos += shift;
102 }
103
104 if (0 == shift)
105 {
106 if (0 == memwildcardcmp(needle, here = (unsigned char *) &haystack[haystack_len - pos - 1],
107 needle_len, casesensitive))
108 {
109 return (here);
110 }
111 else
112 pos++;
113 }
114 }
115
116 return NULL;
117 }
118
119 return NULL;
120 }
121
122 /*
123 Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
124 and allows the starting position in the buffer to be manually set, which allows data to be skipped
125 */
bm_search(unsigned char * needle,size_t needle_len,unsigned char * haystack,size_t haystack_len,size_t table[UCHAR_MAX+1],int case_sen,int searchtype)126 unsigned char *bm_search(unsigned char *needle, size_t needle_len, unsigned char *haystack,
127 size_t haystack_len, size_t table[UCHAR_MAX + 1], int case_sen,
128 int searchtype)
129 {
130
131 //printf("The needle2 is:\t");
132 //printx(needle,0,needle_len);
133 return bm_search_skipn(needle,
134 needle_len,
135 haystack,
136 haystack_len,
137 table,
138 case_sen,
139 searchtype,
140 needle_len - 1);
141
142 }
143
setup_stream(f_state * s,f_info * i)144 void setup_stream(f_state *s, f_info *i)
145 {
146 char buffer[MAX_STRING_LENGTH];
147 u_int64_t skip = (((u_int64_t) s->skip) * ((u_int64_t) s->block_size));
148 #ifdef DEBUG
149 printf("s->skip=%d s->block_size=%d total=%llu\n",
150 s->skip,
151 s->block_size,
152 (((u_int64_t) s->skip) * ((u_int64_t) s->block_size)));
153 #endif
154 i->bytes_read = 0;
155 i->total_megs = i->total_bytes / ONE_MEGABYTE;
156
157 if (i->total_bytes != 0)
158 {
159 audit_msg(s,
160 "Length: %s (%llu bytes)",
161 human_readable(i->total_bytes, buffer),
162 i->total_bytes);
163 }
164 else
165 audit_msg(s, "Length: Unknown");
166
167 if (s->skip != 0)
168 {
169 audit_msg(s, "Skipping: %s (%llu bytes)", human_readable(skip, buffer), skip);
170 fseeko(i->handle, skip, SEEK_SET);
171 if (i->total_bytes != 0)
172 i->total_bytes -= skip;
173 }
174
175 audit_msg(s, " ");
176
177 #ifdef __WIN32
178 i->last_read = 0;
179 i->overflow_count = 0;
180 #endif
181
182 }
183
audit_layout(f_state * s)184 void audit_layout(f_state *s)
185 {
186 audit_msg(s,
187 "Num\t %s (bs=%d)\t %10s\t %s\t %s \n",
188 "Name",
189 s->block_size,
190 "Size",
191 "File Offset",
192 "Comment");
193
194 }
195
dumpInd(unsigned char * ind,int bs)196 void dumpInd(unsigned char *ind, int bs)
197 {
198 int i = 0;
199 printf("\n/*******************************/\n");
200
201 while (bs > 0)
202 {
203 if (i % 10 == 0)
204 printf("\n");
205
206 //printx(ind,0,10);
207 printf("%4u ", htoi(ind, FOREMOST_LITTLE_ENDIAN));
208
209 bs -= 4;
210 ind += 4;
211 i++;
212 }
213
214 printf("\n/*******************************/\n");
215 }
216
217 /********************************************************************************
218 *Function: ind_block
219 *Description: check if the block foundat is pointing to looks like an indirect
220 * block
221 *Return: TRUE/FALSE
222 **********************************************************************************/
ind_block(unsigned char * foundat,u_int64_t buflen,int bs)223 int ind_block(unsigned char *foundat, u_int64_t buflen, int bs)
224 {
225
226 unsigned char *temp = foundat;
227 int jump = 12 * bs;
228 unsigned int block = 0;
229 unsigned int block2 = 0;
230 unsigned int dif = 0;
231 int i = 0;
232 unsigned int one = 1;
233 unsigned int numbers = (bs / 4) - 1;
234
235 //int reconstruct=FALSE;
236
237 /*Make sure we don't jump past the end of the buffer*/
238 if (buflen < jump + 16)
239 return FALSE;
240
241 while (i < numbers)
242 {
243 block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
244
245 if (block < 0)
246 return FALSE;
247
248 if (block == 0)
249 {
250 break;
251 }
252
253 i++;
254 block2 = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
255 if (block2 < 0)
256 return FALSE;
257
258 if (block2 == 0)
259 {
260 break;
261 }
262
263 dif = block2 - block;
264
265 if (dif == one)
266 {
267
268 #ifdef DEBUG
269 printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
270 #endif
271 }
272 else
273 {
274
275 #ifdef DEBUG
276 printf("Failure, dif!=1\n");
277 printf("\tblock1:=%u, block2:=%u dif=%u\n", block, block2, dif);
278 #endif
279
280 return FALSE;
281 }
282
283 #ifdef DEBUG
284 printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
285 #endif
286 }
287
288 if (i == 0)
289 return FALSE;
290
291 /*Check if the rest of the bytes are zero'd out */
292 for (i = i + 1; i < numbers; i++)
293 {
294 block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
295 if (block != 0)
296 {
297
298 //printf("Failure, 0 test\n");
299 return FALSE;
300 }
301 }
302
303 return TRUE;
304 }
305
306 /********************************************************************************
307 *Function: search_chunk
308 *Description: Analyze the given chunk by running each defined search spec on it
309 *Return: TRUE/FALSE
310 **********************************************************************************/
search_chunk(f_state * s,unsigned char * buf,f_info * i,u_int64_t chunk_size,u_int64_t f_offset)311 int search_chunk(f_state *s, unsigned char *buf, f_info *i, u_int64_t chunk_size, u_int64_t f_offset)
312 {
313
314 u_int64_t c_offset = 0;
315 //u_int64_t foundat_off = 0;
316 //u_int64_t buf_off = 0;
317
318 unsigned char *foundat = buf;
319 unsigned char *current_pos = NULL;
320 unsigned char *header_pos = NULL;
321 unsigned char *newbuf = NULL;
322 unsigned char *ind_ptr = NULL;
323 u_int64_t current_buflen = chunk_size;
324 int tryBS[3] = { 4096, 1024, 512 };
325 unsigned char *extractbuf = NULL;
326 u_int64_t file_size = 0;
327 s_spec *needle = NULL;
328 int j = 0;
329 int bs = 0;
330 int rem = 0;
331 int x = 0;
332 int found_ind = FALSE;
333 off_t saveme;
334 //char comment[32];
335 for (j = 0; j < s->num_builtin; j++)
336 {
337 needle = &search_spec[j];
338 foundat = buf; /*reset the buffer for the next search spec*/
339 #ifdef DEBUG
340 printf(" SEARCHING FOR %s's\n", needle->suffix);
341 #endif
342 bs = 0;
343 current_buflen = chunk_size;
344 while (foundat)
345 {
346 needle->written = FALSE;
347 found_ind = FALSE;
348 memset(needle->comment, 0, COMMENT_LENGTH - 1);
349 if (chunk_size <= (foundat - buf)) {
350 #ifdef DEBUG
351 printf("avoided seg fault in search_chunk()\n");
352 #endif
353 foundat = NULL;
354 break;
355 }
356 current_buflen = chunk_size - (foundat - buf);
357
358 //if((foundat-buf)< 1 ) break;
359 #ifdef DEBUG
360 //foundat_off=foundat;
361 //buf_off=buf;
362 //printf("current buf:=%llu (foundat-buf)=%llu \n", current_buflen, (u_int64_t) (foundat_off - buf_off));
363 #endif
364 if (signal_caught == SIGTERM || signal_caught == SIGINT)
365 {
366 user_interrupt(s, i);
367 printf("Cleaning up.\n");
368 signal_caught = 0;
369 }
370
371 if (get_mode(s, mode_quick)) /*RUN QUICK SEARCH*/
372 {
373 #ifdef DEBUG
374
375 //printf("quick mode is on\n");
376 #endif
377
378 /*Check if we are not on a block head, adjust if so*/
379 rem = (foundat - buf) % s->block_size;
380 if (rem != 0)
381 {
382 foundat += (s->block_size - rem);
383 }
384
385 if (memwildcardcmp(needle->header, foundat, needle->header_len, needle->case_sen
386 ) != 0)
387 {
388
389 /*No match, jump to the next block*/
390 if (current_buflen > s->block_size)
391 {
392 foundat += s->block_size;
393 continue;
394 }
395 else /*We are out of buffer lets go to the next search spec*/
396 {
397 foundat = NULL;
398 break;
399 }
400 }
401
402 header_pos = foundat;
403 }
404 else /**********RUN STANDARD SEARCH********************/
405 {
406 foundat = bm_search(needle->header,
407 needle->header_len,
408 foundat,
409 current_buflen, //How much to search through
410 needle->header_bm_table,
411 needle->case_sen, //casesensative
412 SEARCHTYPE_FORWARD);
413
414 header_pos = foundat;
415 }
416
417 if (foundat != NULL && foundat >= 0) /*We got something, run the appropriate heuristic to find the EOF*/
418 {
419 current_buflen = chunk_size - (foundat - buf);
420
421 if (get_mode(s, mode_ind_blk))
422 {
423 #ifdef DEBUG
424 printf("ind blk detection on\n");
425 #endif
426
427 //dumpInd(foundat+12*1024,1024);
428 for (x = 0; x < 3; x++)
429 {
430 bs = tryBS[x];
431
432 if (ind_block(foundat, current_buflen, bs))
433 {
434 if (get_mode(s, mode_verbose))
435 {
436 sprintf(needle->comment, " (IND BLK bs:=%d)", bs);
437 }
438
439 //dumpInd(foundat+12*bs,bs);
440 #ifdef DEBUG
441 printf("performing mem move\n");
442 #endif
443 if(current_buflen > 13 * bs)//Make sure we have enough buffer
444 {
445 if (!memmove(foundat + 12 * bs, foundat + 13 * bs, current_buflen - 13 * bs))
446 break;
447
448 found_ind = TRUE;
449 #ifdef DEBUG
450 printf("performing mem move complete\n");
451 #endif
452 ind_ptr = foundat + 12 * bs;
453 current_buflen -= bs;
454 chunk_size -= bs;
455 break;
456 }
457 }
458
459 }
460
461 }
462
463 c_offset = (foundat - buf);
464 current_pos = foundat;
465
466 /*Now lets analyze the file and see if we can determine its size*/
467
468 // printf("c_offset=%llu %x %x %llx\n", c_offset,foundat,buf,c_offset);
469 foundat = extract_file(s, c_offset, foundat, current_buflen, needle, f_offset);
470 #ifdef DEBUG
471 if (foundat == NULL)
472 {
473 printf("Foundat == NULL!!!\n");
474 }
475 #endif
476 if (get_mode(s, mode_write_all))
477 {
478 if (needle->written == FALSE)
479 {
480
481 /*write every header we find*/
482 if (current_buflen >= needle->max_len)
483 {
484 file_size = needle->max_len;
485 }
486 else
487 {
488 file_size = current_buflen;
489 }
490
491 sprintf(needle->comment, " (Header dump)");
492 extractbuf = (unsigned char *)malloc(file_size * sizeof(char));
493 memcpy(extractbuf, header_pos, file_size);
494 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
495 free(extractbuf);
496 }
497 }
498 else if (!foundat) /*Should we search further?*/
499 {
500
501 /*We couldn't determine where the file ends, now lets check to see
502 * if we should try again
503 */
504 if (current_buflen < needle->max_len) /*We need to bridge the gap*/
505 {
506 #ifdef DEBUG
507 printf(" Bridge the gap\n");
508 #endif
509 saveme = ftello(i->handle);
510 /*grow the buffer and try to extract again*/
511 newbuf = read_from_disk(c_offset + f_offset, i, needle->max_len);
512 if (newbuf == NULL)
513 break;
514 current_pos = extract_file(s,
515 c_offset,
516 newbuf,
517 needle->max_len,
518 needle,
519 f_offset);
520
521 /*Lets put the fp back*/
522 fseeko(i->handle, saveme, SEEK_SET);
523
524
525 free(newbuf);
526 }
527 else
528 {
529 foundat = header_pos; /*reset the foundat pointer to the location of the last header*/
530 foundat += needle->header_len + 1; /*jump past the header*/
531 }
532 }
533
534
535 }
536
537 if (found_ind)
538 {
539
540 /*Put the ind blk back in, re-arrange the buffer so that the future blks names come out correct*/
541 #ifdef DEBUG
542 printf("Replacing the ind block\n");
543 #endif
544 /*This is slow, should we do this??????*/
545 if (!memmove(ind_ptr + 1 * bs, ind_ptr, current_buflen - 13 * bs))
546 break;
547 memset(ind_ptr, 0, bs - 1);
548 chunk_size += bs;
549 memset(needle->comment, 0, COMMENT_LENGTH - 1);
550 }
551 } //end while
552 }
553
554 return TRUE;
555 }
556
557 /********************************************************************************
558 *Function: search_stream
559 *Description: Analyze the file by reading 1 chunk (default: 100MB) at a time and
560 *passing it to search_chunk
561 *Return: TRUE/FALSE
562 **********************************************************************************/
search_stream(f_state * s,f_info * i)563 int search_stream(f_state *s, f_info *i)
564 {
565 u_int64_t bytesread = 0;
566 u_int64_t f_offset = 0;
567 u_int64_t chunk_size = ((u_int64_t) s->chunk_size) * MEGABYTE;
568 unsigned char *buf = (unsigned char *)malloc(sizeof(char) * chunk_size);
569
570 setup_stream(s, i);
571
572 audit_layout(s);
573 #ifdef DEBUG
574 printf("\n\t READING THE FILE INTO MEMORY\n");
575 #endif
576
577 while ((bytesread = fread(buf, 1, chunk_size, i->handle)) > 0)
578 {
579 if (signal_caught == SIGTERM || signal_caught == SIGINT)
580 {
581 user_interrupt(s, i);
582 printf("Cleaning up.\n");
583 signal_caught = 0;
584 }
585
586 #ifdef DEBUG
587 printf("\n\tbytes_read:=%llu\n", bytesread);
588 #endif
589 search_chunk(s, buf, i, bytesread, f_offset);
590 f_offset += bytesread;
591 if (!get_mode(s, mode_quiet))
592 {
593 fprintf(stderr, "*");
594
595 //displayPosition(s,i,f_offset);
596 }
597
598 /*FIX ME***
599 * We should jump back and make sure we didn't miss any headers that are
600 * bridged between chunks. What is the best way to do this?\
601 */
602 }
603
604 if (!get_mode(s, mode_quiet))
605 {
606 fprintf(stderr, "|\n");
607 }
608
609 #ifdef DEBUG
610 printf("\n\tDONE READING bytes_read:=%llu\n", bytesread);
611 #endif
612 if (signal_caught == SIGTERM || signal_caught == SIGINT)
613 {
614 user_interrupt(s, i);
615 printf("Cleaning up.\n");
616 signal_caught = 0;
617 }
618
619 free(buf);
620 return FALSE;
621 }
622
audit_start(f_state * s,f_info * i)623 void audit_start(f_state *s, f_info *i)
624 {
625 if (!get_mode(s, mode_quiet))
626 {
627 fprintf(stderr, "Processing: %s\n|", i->file_name);
628 }
629
630 audit_msg(s, FOREMOST_DIVIDER);
631 audit_msg(s, "File: %s", i->file_name);
632 audit_msg(s, "Start: %s", current_time());
633 }
634
audit_finish(f_state * s,f_info * i)635 void audit_finish(f_state *s, f_info *i)
636 {
637 audit_msg(s, "Finish: %s", current_time());
638 }
639
process_file(f_state * s)640 int process_file(f_state *s)
641 {
642
643 //printf("processing file\n");
644 f_info *i = (f_info *)malloc(sizeof(f_info));
645 char temp[PATH_MAX];
646
647 if ((realpath(s->input_file, temp)) == NULL)
648 {
649 print_error(s, s->input_file, strerror(errno));
650 return TRUE;
651 }
652
653 i->file_name = strdup(s->input_file);
654 i->is_stdin = FALSE;
655 audit_start(s, i);
656
657 // printf("opening file %s\n",i->file_name);
658 #if defined(__LINUX)
659 #ifdef DEBUG
660 printf("Using 64 bit fopen\n");
661 #endif
662 i->handle = fopen64(i->file_name, "rb");
663 #elif defined(__WIN32)
664
665 /*I would like to be able to read from
666 * physical devices in Windows, have played
667 * with different options to fopen and the
668 * dd src says you need write access on WinXP
669 * but nothing seems to work*/
670 i->handle = fopen(i->file_name, "rb");
671 #else
672 i->handle = fopen(i->file_name, "rb");
673 #endif
674 if (i->handle == NULL)
675 {
676 print_error(s, s->input_file, strerror(errno));
677 audit_msg(s, "Error: %s", strerror(errno));
678 return TRUE;
679 }
680
681 i->total_bytes = find_file_size(i->handle);
682 search_stream(s, i);
683 audit_finish(s, i);
684
685 fclose(i->handle);
686 free(i);
687 return FALSE;
688 }
689
process_stdin(f_state * s)690 int process_stdin(f_state *s)
691 {
692 f_info *i = (f_info *)malloc(sizeof(f_info));
693
694 i->file_name = strdup("stdin");
695 s->input_file = "stdin";
696 i->handle = stdin;
697 i->is_stdin = TRUE;
698
699 /* We can't compute the size of this stream, we just ignore it*/
700 i->total_bytes = 0;
701 audit_start(s, i);
702
703 search_stream(s, i);
704
705 free(i->file_name);
706 free(i);
707 return FALSE;
708 }
709