1 
2 	 /* FOREMOST
3  *
4  * By Jesse Kornblum, Kris Kendall, & Nick Mikus
5  *
6  * This is a work of the US Government. In accordance with 17 USC 105,
7  * copyright protection is not available for any work of the US Government.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  */
14 
15 #include "main.h"
16 
user_interrupt(f_state * s,f_info * i)17 int user_interrupt (f_state * s, f_info * i)
18 	{
19 	audit_msg(s, "Interrupt received at %s", current_time());
20 
21 	/* RBF - Write user_interrupt */
22 	fclose(i->handle);
23 	free(s);
24 	free(i);
25 	cleanup_output(s);
26 	exit(-1);
27 	return FALSE;
28 	}
29 
read_from_disk(u_int64_t offset,f_info * i,u_int64_t length)30 unsigned char *read_from_disk(u_int64_t offset, f_info *i, u_int64_t length)
31 {
32 
33 	u_int64_t		bytesread = 0;
34 	unsigned char	*newbuf = (unsigned char *)malloc(length * sizeof(char));
35 	if (!newbuf) {
36            fprintf(stderr, "Ran out of memory in read_from_disk()\n");
37            exit(1);
38          }
39 
40 	fseeko(i->handle, offset, SEEK_SET);
41 	bytesread = fread(newbuf, 1, length, i->handle);
42 	if (bytesread != length)
43 	{
44 		free(newbuf);
45 		return NULL;
46 	}
47 	else
48 	{
49 		return newbuf;
50 	}
51 }
52 
53 /*
54    Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
55    and allows the starting position in the buffer to be manually set, which allows data to be skipped
56 */
bm_search_skipn(unsigned char * needle,size_t needle_len,unsigned char * haystack,size_t haystack_len,size_t table[UCHAR_MAX+1],int casesensitive,int searchtype,int start_pos)57 unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len, unsigned char *haystack,
58 							   size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive,
59 							   int searchtype, int start_pos)
60 {
61 	register size_t shift = 0;
62 	register size_t pos = start_pos;
63 	unsigned char	*here;
64 
65 	if (needle_len == 0)
66 		return haystack;
67 
68 	if (searchtype == SEARCHTYPE_FORWARD || searchtype == SEARCHTYPE_FORWARD_NEXT)
69 		{
70 		while (pos < haystack_len)
71 			{
72 			while (pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0)
73 				{
74 				pos += shift;
75 				}
76 
77 			if (0 == shift)
78 				{
79 				here = (unsigned char *) &haystack[pos - needle_len + 1];
80 				if (0 == memwildcardcmp(needle, here, needle_len, casesensitive))
81 					{
82 					return (here);
83 					}
84 				else
85 					pos++;
86 				}
87 			}
88 
89 		return NULL;
90 		}
91 	else if (searchtype == SEARCHTYPE_REVERSE)	//Run our search backwards
92 		{
93 		while (pos < haystack_len)
94 			{
95 			while
96 			(
97 				pos < haystack_len &&
98 				(shift = table[(unsigned char)haystack[haystack_len - pos - 1]]) > 0
99 			)
100 				{
101 				pos += shift;
102 				}
103 
104 			if (0 == shift)
105 				{
106 				if (0 == memwildcardcmp(needle, here = (unsigned char *) &haystack[haystack_len - pos - 1],
107 					needle_len, casesensitive))
108 					{
109 					return (here);
110 					}
111 				else
112 					pos++;
113 				}
114 			}
115 
116 		return NULL;
117 		}
118 
119 	return NULL;
120 }
121 
122 /*
123    Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches)
124    and allows the starting position in the buffer to be manually set, which allows data to be skipped
125 */
bm_search(unsigned char * needle,size_t needle_len,unsigned char * haystack,size_t haystack_len,size_t table[UCHAR_MAX+1],int case_sen,int searchtype)126 unsigned char *bm_search(unsigned char *needle, size_t needle_len, unsigned char *haystack,
127 						 size_t haystack_len, size_t table[UCHAR_MAX + 1], int case_sen,
128 						 int searchtype)
129 {
130 
131 	//printf("The needle2 is:\t");
132 	//printx(needle,0,needle_len);
133 	return bm_search_skipn(needle,
134 						   needle_len,
135 						   haystack,
136 						   haystack_len,
137 						   table,
138 						   case_sen,
139 						   searchtype,
140 						   needle_len - 1);
141 
142 }
143 
setup_stream(f_state * s,f_info * i)144 void setup_stream(f_state *s, f_info *i)
145 {
146 	char	buffer[MAX_STRING_LENGTH];
147 	u_int64_t	skip = (((u_int64_t) s->skip) * ((u_int64_t) s->block_size));
148 #ifdef DEBUG
149 	printf("s->skip=%d s->block_size=%d total=%llu\n",
150 		   s->skip,
151 		   s->block_size,
152 		   (((u_int64_t) s->skip) * ((u_int64_t) s->block_size)));
153 #endif
154 	i->bytes_read = 0;
155 	i->total_megs = i->total_bytes / ONE_MEGABYTE;
156 
157 	if (i->total_bytes != 0)
158 		{
159 		audit_msg(s,
160 				  "Length: %s (%llu bytes)",
161 				  human_readable(i->total_bytes, buffer),
162 				  i->total_bytes);
163 		}
164 	else
165 		audit_msg(s, "Length: Unknown");
166 
167 	if (s->skip != 0)
168 		{
169 		audit_msg(s, "Skipping: %s (%llu bytes)", human_readable(skip, buffer), skip);
170 		fseeko(i->handle, skip, SEEK_SET);
171 		if (i->total_bytes != 0)
172 			i->total_bytes -= skip;
173 		}
174 
175 	audit_msg(s, " ");
176 
177 #ifdef __WIN32
178 	i->last_read = 0;
179 	i->overflow_count = 0;
180 #endif
181 
182 }
183 
audit_layout(f_state * s)184 void audit_layout(f_state *s)
185 {
186 	audit_msg(s,
187 			  "Num\t %s (bs=%d)\t %10s\t %s\t %s \n",
188 			  "Name",
189 			  s->block_size,
190 			  "Size",
191 			  "File Offset",
192 			  "Comment");
193 
194 }
195 
dumpInd(unsigned char * ind,int bs)196 void dumpInd(unsigned char *ind, int bs)
197 {
198 	int i = 0;
199 	printf("\n/*******************************/\n");
200 
201 	while (bs > 0)
202 		{
203 		if (i % 10 == 0)
204 			printf("\n");
205 
206 		//printx(ind,0,10);
207 		printf("%4u ", htoi(ind, FOREMOST_LITTLE_ENDIAN));
208 
209 		bs -= 4;
210 		ind += 4;
211 		i++;
212 		}
213 
214 	printf("\n/*******************************/\n");
215 }
216 
217 /********************************************************************************
218  *Function: ind_block
219  *Description: check if the block foundat is pointing to looks like an indirect
220  *	block
221  *Return: TRUE/FALSE
222  **********************************************************************************/
ind_block(unsigned char * foundat,u_int64_t buflen,int bs)223 int ind_block(unsigned char *foundat, u_int64_t buflen, int bs)
224 {
225 
226 	unsigned char	*temp = foundat;
227 	int				jump = 12 * bs;
228 	unsigned int	block = 0;
229 	unsigned int	block2 = 0;
230 	unsigned int	dif = 0;
231 	int				i = 0;
232 	unsigned int	one = 1;
233 	unsigned int	numbers = (bs / 4) - 1;
234 
235 	//int reconstruct=FALSE;
236 
237 	/*Make sure we don't jump past the end of the buffer*/
238 	if (buflen < jump + 16)
239 		return FALSE;
240 
241 	while (i < numbers)
242 		{
243 		block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
244 
245 		if (block < 0)
246 			return FALSE;
247 
248 		if (block == 0)
249 			{
250 			break;
251 			}
252 
253 		i++;
254 		block2 = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
255 		if (block2 < 0)
256 			return FALSE;
257 
258 		if (block2 == 0)
259 			{
260 			break;
261 			}
262 
263 		dif = block2 - block;
264 
265 		if (dif == one)
266 		{
267 
268 #ifdef DEBUG
269 			printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
270 #endif
271 		}
272 		else
273 		{
274 
275 #ifdef DEBUG
276 			printf("Failure, dif!=1\n");
277 			printf("\tblock1:=%u, block2:=%u dif=%u\n", block, block2, dif);
278 #endif
279 
280 			return FALSE;
281 		}
282 
283 #ifdef DEBUG
284 		printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif);
285 #endif
286 		}
287 
288 	if (i == 0)
289 		return FALSE;
290 
291 	/*Check if the rest of the bytes are zero'd out */
292 	for (i = i + 1; i < numbers; i++)
293 		{
294 		block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN);
295 		if (block != 0)
296 			{
297 
298 			//printf("Failure, 0 test\n");
299 			return FALSE;
300 			}
301 		}
302 
303 	return TRUE;
304 }
305 
306 /********************************************************************************
307  *Function: search_chunk
308  *Description: Analyze the given chunk by running each defined search spec on it
309  *Return: TRUE/FALSE
310  **********************************************************************************/
search_chunk(f_state * s,unsigned char * buf,f_info * i,u_int64_t chunk_size,u_int64_t f_offset)311 int search_chunk(f_state *s, unsigned char *buf, f_info *i, u_int64_t chunk_size, u_int64_t f_offset)
312 {
313 
314 	u_int64_t		c_offset = 0;
315 	//u_int64_t               foundat_off = 0;
316 	//u_int64_t               buf_off = 0;
317 
318 	unsigned char	*foundat = buf;
319 	unsigned char	*current_pos = NULL;
320 	unsigned char	*header_pos = NULL;
321 	unsigned char	*newbuf = NULL;
322 	unsigned char	*ind_ptr = NULL;
323 	u_int64_t		current_buflen = chunk_size;
324 	int				tryBS[3] = { 4096, 1024, 512 };
325 	unsigned char	*extractbuf = NULL;
326 	u_int64_t		file_size = 0;
327 	s_spec			*needle = NULL;
328 	int				j = 0;
329 	int				bs = 0;
330 	int				rem = 0;
331 	int				x = 0;
332 	int				found_ind = FALSE;
333 	 off_t saveme;
334 	//char comment[32];
335 	for (j = 0; j < s->num_builtin; j++)
336 		{
337 		needle = &search_spec[j];
338 		foundat = buf;										/*reset the buffer for the next search spec*/
339 #ifdef DEBUG
340 		printf("	SEARCHING FOR %s's\n", needle->suffix);
341 #endif
342 		bs = 0;
343 		current_buflen = chunk_size;
344 		while (foundat)
345 			{
346 			needle->written = FALSE;
347 			found_ind = FALSE;
348 			memset(needle->comment, 0, COMMENT_LENGTH - 1);
349                         if (chunk_size <= (foundat - buf)) {
350 #ifdef DEBUG
351 				printf("avoided seg fault in search_chunk()\n");
352 #endif
353 				foundat = NULL;
354 				break;
355 			}
356 			current_buflen = chunk_size - (foundat - buf);
357 
358 			//if((foundat-buf)< 1 ) break;
359 #ifdef DEBUG
360 			//foundat_off=foundat;
361 			//buf_off=buf;
362 			//printf("current buf:=%llu (foundat-buf)=%llu \n", current_buflen, (u_int64_t) (foundat_off - buf_off));
363 #endif
364 			if (signal_caught == SIGTERM || signal_caught == SIGINT)
365 				{
366 				user_interrupt(s, i);
367 				printf("Cleaning up.\n");
368 				signal_caught = 0;
369 				}
370 
371 			if (get_mode(s, mode_quick))					/*RUN QUICK SEARCH*/
372 			{
373 #ifdef DEBUG
374 
375 				//printf("quick mode is on\n");
376 #endif
377 
378 				/*Check if we are not on a block head, adjust if so*/
379 				rem = (foundat - buf) % s->block_size;
380 				if (rem != 0)
381 					{
382 					foundat += (s->block_size - rem);
383 					}
384 
385 				if (memwildcardcmp(needle->header, foundat, needle->header_len, needle->case_sen
386 					) != 0)
387 					{
388 
389 					/*No match, jump to the next block*/
390 					if (current_buflen > s->block_size)
391 						{
392 						foundat += s->block_size;
393 						continue;
394 						}
395 					else									/*We are out of buffer lets go to the next search spec*/
396 						{
397 						foundat = NULL;
398 						break;
399 						}
400 					}
401 
402 				header_pos = foundat;
403 			}
404 			else											/**********RUN STANDARD SEARCH********************/
405 				{
406 				foundat = bm_search(needle->header,
407 									needle->header_len,
408 									foundat,
409 									current_buflen,			//How much to search through
410 									needle->header_bm_table,
411 									needle->case_sen,		//casesensative
412 									SEARCHTYPE_FORWARD);
413 
414 				header_pos = foundat;
415 				}
416 
417 			if (foundat != NULL && foundat >= 0)			/*We got something, run the appropriate heuristic to find the EOF*/
418 				{
419 				current_buflen = chunk_size - (foundat - buf);
420 
421 				if (get_mode(s, mode_ind_blk))
422 				{
423 #ifdef DEBUG
424 					printf("ind blk detection on\n");
425 #endif
426 
427 					//dumpInd(foundat+12*1024,1024);
428 					for (x = 0; x < 3; x++)
429 						{
430 						bs = tryBS[x];
431 
432 						if (ind_block(foundat, current_buflen, bs))
433 							{
434 							if (get_mode(s, mode_verbose))
435 								{
436 								sprintf(needle->comment, " (IND BLK bs:=%d)", bs);
437 								}
438 
439 							//dumpInd(foundat+12*bs,bs);
440 #ifdef DEBUG
441 							printf("performing mem move\n");
442 #endif
443 							if(current_buflen >  13 * bs)//Make sure we have enough buffer
444 								{
445 								if (!memmove(foundat + 12 * bs, foundat + 13 * bs, current_buflen - 13 * bs))
446 								break;
447 
448 								found_ind = TRUE;
449 #ifdef DEBUG
450 								printf("performing mem move complete\n");
451 #endif
452 								ind_ptr = foundat + 12 * bs;
453 								current_buflen -= bs;
454 								chunk_size -= bs;
455 								break;
456 								}
457 							}
458 
459 						}
460 
461 				}
462 
463 				c_offset = (foundat - buf);
464 				current_pos = foundat;
465 
466 				/*Now lets analyze the file and see if we can determine its size*/
467 
468 				// printf("c_offset=%llu %x %x %llx\n", c_offset,foundat,buf,c_offset);
469 				foundat = extract_file(s, c_offset, foundat, current_buflen, needle, f_offset);
470 #ifdef DEBUG
471 				if (foundat == NULL)
472 					{
473 					printf("Foundat == NULL!!!\n");
474 					}
475 #endif
476 				if (get_mode(s, mode_write_all))
477 					{
478 					if (needle->written == FALSE)
479 						{
480 
481 						/*write every header we find*/
482 						if (current_buflen >= needle->max_len)
483 							{
484 							file_size = needle->max_len;
485 							}
486 						else
487 							{
488 							file_size = current_buflen;
489 							}
490 
491 						sprintf(needle->comment, " (Header dump)");
492 						extractbuf = (unsigned char *)malloc(file_size * sizeof(char));
493 						memcpy(extractbuf, header_pos, file_size);
494 						write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
495 						free(extractbuf);
496 						}
497 					}
498 				else if (!foundat)							/*Should we search further?*/
499 					{
500 
501 					/*We couldn't determine where the file ends, now lets check to see
502 			* if we should try again
503 			*/
504 					if (current_buflen < needle->max_len)	/*We need to bridge the gap*/
505 					{
506 #ifdef DEBUG
507 						printf("	Bridge the gap\n");
508 #endif
509 						saveme = ftello(i->handle);
510 						/*grow the buffer and try to extract again*/
511 						newbuf = read_from_disk(c_offset + f_offset, i, needle->max_len);
512 						if (newbuf == NULL)
513 							break;
514 						current_pos = extract_file(s,
515 												   c_offset,
516 												   newbuf,
517 												   needle->max_len,
518 												   needle,
519 												   f_offset);
520 
521 						/*Lets put the fp back*/
522 						fseeko(i->handle, saveme, SEEK_SET);
523 
524 
525 						free(newbuf);
526 					}
527 					else
528 						{
529 						foundat = header_pos;				/*reset the foundat pointer to the location of the last header*/
530 						foundat += needle->header_len + 1;	/*jump past the header*/
531 						}
532 					}
533 
534 
535 				}
536 
537 			if (found_ind)
538 				{
539 
540 				/*Put the ind blk back in, re-arrange the buffer so that the future blks names come out correct*/
541 #ifdef DEBUG
542 						printf("Replacing the ind block\n");
543 #endif
544 				/*This is slow, should we do this??????*/
545 				if (!memmove(ind_ptr + 1 * bs, ind_ptr, current_buflen - 13 * bs))
546 					break;
547 				memset(ind_ptr, 0, bs - 1);
548 				chunk_size += bs;
549 				memset(needle->comment, 0, COMMENT_LENGTH - 1);
550 				}
551 			}	//end while
552 		}
553 
554 	return TRUE;
555 }
556 
557 /********************************************************************************
558  *Function: search_stream
559  *Description: Analyze the file by reading 1 chunk (default: 100MB) at a time and
560  *passing it to	search_chunk
561  *Return: TRUE/FALSE
562  **********************************************************************************/
search_stream(f_state * s,f_info * i)563 int search_stream(f_state *s, f_info *i)
564 {
565 	u_int64_t		bytesread = 0;
566 	u_int64_t		f_offset = 0;
567 	u_int64_t		chunk_size = ((u_int64_t) s->chunk_size) * MEGABYTE;
568 	unsigned char	*buf = (unsigned char *)malloc(sizeof(char) * chunk_size);
569 
570 	setup_stream(s, i);
571 
572 	audit_layout(s);
573 #ifdef DEBUG
574 	printf("\n\t READING THE FILE INTO MEMORY\n");
575 #endif
576 
577 	while ((bytesread = fread(buf, 1, chunk_size, i->handle)) > 0)
578 		{
579 		if (signal_caught == SIGTERM || signal_caught == SIGINT)
580 			{
581 			user_interrupt(s, i);
582 			printf("Cleaning up.\n");
583 			signal_caught = 0;
584 			}
585 
586 #ifdef DEBUG
587 		printf("\n\tbytes_read:=%llu\n", bytesread);
588 #endif
589 		search_chunk(s, buf, i, bytesread, f_offset);
590 		f_offset += bytesread;
591 		if (!get_mode(s, mode_quiet))
592 			{
593 			fprintf(stderr, "*");
594 
595 			//displayPosition(s,i,f_offset);
596 			}
597 
598 		/*FIX ME***
599 	* We should jump back and make sure we didn't miss any headers that are
600 	* bridged between chunks.  What is the best way to do this?\
601   	*/
602 		}
603 
604 	if (!get_mode(s, mode_quiet))
605 		{
606 		fprintf(stderr, "|\n");
607 		}
608 
609 #ifdef DEBUG
610 	printf("\n\tDONE READING bytes_read:=%llu\n", bytesread);
611 #endif
612 	if (signal_caught == SIGTERM || signal_caught == SIGINT)
613 		{
614 		user_interrupt(s, i);
615 		printf("Cleaning up.\n");
616 		signal_caught = 0;
617 		}
618 
619 	free(buf);
620 	return FALSE;
621 }
622 
audit_start(f_state * s,f_info * i)623 void audit_start(f_state *s, f_info *i)
624 {
625 	if (!get_mode(s, mode_quiet))
626 		{
627 		fprintf(stderr, "Processing: %s\n|", i->file_name);
628 		}
629 
630 	audit_msg(s, FOREMOST_DIVIDER);
631 	audit_msg(s, "File: %s", i->file_name);
632 	audit_msg(s, "Start: %s", current_time());
633 }
634 
audit_finish(f_state * s,f_info * i)635 void audit_finish(f_state *s, f_info *i)
636 {
637 	audit_msg(s, "Finish: %s", current_time());
638 }
639 
process_file(f_state * s)640 int process_file(f_state *s)
641 {
642 
643 	//printf("processing file\n");
644 	f_info	*i = (f_info *)malloc(sizeof(f_info));
645 	char	temp[PATH_MAX];
646 
647 	if ((realpath(s->input_file, temp)) == NULL)
648 		{
649 		print_error(s, s->input_file, strerror(errno));
650 		return TRUE;
651 		}
652 
653 	i->file_name = strdup(s->input_file);
654 	i->is_stdin = FALSE;
655 	audit_start(s, i);
656 
657 	//  printf("opening file %s\n",i->file_name);
658 #if defined(__LINUX)
659 	#ifdef DEBUG
660 	printf("Using 64 bit fopen\n");
661 	#endif
662 	i->handle = fopen64(i->file_name, "rb");
663 #elif defined(__WIN32)
664 
665 	/*I would like to be able to read from
666 	* physical devices in Windows, have played
667 	* with different options to fopen and the
668 	* dd src says you need write access on WinXP
669 	* but nothing seems to work*/
670 	i->handle = fopen(i->file_name, "rb");
671 #else
672 	i->handle = fopen(i->file_name, "rb");
673 #endif
674 	if (i->handle == NULL)
675 		{
676 		print_error(s, s->input_file, strerror(errno));
677 		audit_msg(s, "Error: %s", strerror(errno));
678 		return TRUE;
679 		}
680 
681 	i->total_bytes = find_file_size(i->handle);
682 	search_stream(s, i);
683 	audit_finish(s, i);
684 
685 	fclose(i->handle);
686 	free(i);
687 	return FALSE;
688 }
689 
process_stdin(f_state * s)690 int process_stdin(f_state *s)
691 {
692 	f_info	*i = (f_info *)malloc(sizeof(f_info));
693 
694 	i->file_name = strdup("stdin");
695 	s->input_file = "stdin";
696 	i->handle = stdin;
697 	i->is_stdin = TRUE;
698 
699 	/* We can't compute the size of this stream, we just ignore it*/
700 	i->total_bytes = 0;
701 	audit_start(s, i);
702 
703 	search_stream(s, i);
704 
705 	free(i->file_name);
706 	free(i);
707 	return FALSE;
708 }
709