1 
2 	 /* extract.c
3  * Copyright (c) 2005, Nick Mikus
4  * This file contains the file specific functions used to extract
5  * data from an image.
6  *
7  * Each has a similar structure
8  * f_state *s:  state of the program.
9  * c_offset:	offset that the header was recorded within the current chunk
10  * foundat:	The location the header was "foundat"
11  * buflen:	How much buffer is left until the end of the current chunk
12  * needle:	Search specification
13  * f_offset:	Offset that the current chunk is located within the file
14  */
15 
16 #include "main.h"
17 #include "extract.h"
18 #include "ole.h"
19 extern unsigned char buffer[OUR_BLK_SIZE];
20 extern int	verbose;
21 extern int	dir_count;
22 extern int	block_list[OUR_BLK_SIZE / sizeof(int)];
23 extern int	*FAT;
24 extern char *extract_name;
25 extern int	extract;
26 extern int	FATblk;
27 extern int	highblk;
28 
29 /********************************************************************************
30  *Function: extract_zip
31  *Description: Given that we have a ZIP header jump through the file headers
32     until we reach the EOF.
33  *Return: A pointer to where the EOF of the ZIP is in the current buffer
34 **********************************************************************************/
extract_zip(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)35 unsigned char *extract_zip(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
36 						   s_spec *needle, u_int64_t f_offset, char *type)
37 {
38 	unsigned char				*currentpos = NULL;
39 	unsigned char				*buf = foundat;
40 	unsigned short				comment_length = 0;
41 	unsigned char				*extractbuf = NULL;
42 	struct zipLocalFileHeader	localFH;
43 	u_int64_t					bytes_to_search = 50 * KILOBYTE;
44 	u_int64_t					file_size = 0;
45 	int							oOffice = FALSE;
46 	int							office2007 = FALSE;
47 
48 	char						comment[32];
49 	localFH.genFlag=0;
50 	localFH.compressed=0;
51 	localFH.uncompressed =0;
52 	if (buflen < 100)
53 		return NULL;
54 
55 	if (strncmp((char *) &foundat[30], "mimetypeapplication/vnd.sun.xml.", 32) == 0)
56 		{
57 		oOffice = TRUE;
58 		if (strncmp((char *) &foundat[62], "calc", 4) == 0)
59 			{
60 			needle->suffix = "sxc";
61 			}
62 		else if (strncmp((char *) &foundat[62], "impress", 7) == 0)
63 			{
64 			needle->suffix = "sxi";
65 			}
66 		else if (strncmp((char *) &foundat[62], "writer", 6) == 0)
67 			{
68 			needle->suffix = "sxw";
69 			}
70 		else
71 			{
72 			sprintf(comment, " (OpenOffice Doc?)");
73 			strcat(needle->comment, comment);
74 			needle->suffix = "sx";
75 			}
76 		}
77 	else
78 		{
79 		needle->suffix = "zip";
80 		}
81 
82 
83 	while (1)	//Jump through each local file header until the central directory structure is reached, much faster than searching
84 		{
85 
86 		if (foundat[2] == '\x03' && foundat[3] == '\x04')	//Verfiy we are looking at a local file header//
87 			{
88 
89 			localFH.compression=htos(&foundat[8], FOREMOST_LITTLE_ENDIAN);
90 			localFH.compressed = htoi(&foundat[18], FOREMOST_LITTLE_ENDIAN);
91 			localFH.uncompressed = htoi(&foundat[22], FOREMOST_LITTLE_ENDIAN);
92 			localFH.filename_length = htos(&foundat[26], FOREMOST_LITTLE_ENDIAN);
93 			localFH.extra_length = htos(&foundat[28], FOREMOST_LITTLE_ENDIAN);;
94 			localFH.genFlag = htos(&foundat[6], FOREMOST_LITTLE_ENDIAN);
95 
96 			// Sanity checking
97 			if (localFH.compressed > needle->max_len)
98 				return foundat + needle->header_len;
99 
100 			if (localFH.filename_length > 100)
101 				return foundat + needle->header_len;
102 
103 			//Check if we should grab more from the disk
104 			if (localFH.compressed + 30 > buflen - (foundat - buf))
105 				{
106 				return NULL;
107 				}
108 
109 			//Size of the local file header data structure
110 			foundat += 30;
111 
112 			if (strcmp(needle->suffix,"zip")==0)
113 				{
114 				if (strncmp((char *)foundat, "content.xml", 11) == 0 && strcmp(needle->suffix,"zip")==0)
115 					{
116 					oOffice = TRUE;
117 					sprintf(comment, " (OpenOffice Doc?)");
118 					strcat(needle->comment, comment);
119 					needle->suffix = "sx";
120 					}
121 				else if (strstr((char *)foundat, ".class") || strstr((char *)foundat, ".jar") ||
122 						 strstr((char *)foundat, ".java"))
123 					{
124 					needle->suffix = "jar";
125 					}
126 				else if(strncmp((char *)foundat, "[Content_Types].xml",19)==0)
127 					{
128 						office2007=TRUE;
129 					}
130 				else if(strncmp((char *)foundat, "ppt/slides",10)==0 && office2007==TRUE)
131 					{
132 						needle->suffix = "pptx";
133 					}
134 				else if(strncmp((char *)foundat, "word/document.xml",17)==0 && office2007==TRUE)
135 					{
136 						needle->suffix = "docx";
137 					}
138 				else if(strncmp((char *)foundat, "xl/workbook.xml",15)==0 && office2007==TRUE)
139 					{
140 						needle->suffix = "xlsx";
141 					}
142 
143 
144 				else
145 					{
146 						printf("foundat=%s\n",foundat);
147 					}
148 				}
149 
150 			foundat += localFH.compressed;
151 			foundat += localFH.filename_length;
152 			foundat += localFH.extra_length;
153 
154 			if (localFH.genFlag == 8)
155 				{
156 #ifdef DEBUG
157 					fprintf(stderr,"We have extra stuff!!!");
158 #endif
159 				}
160 
161 
162 			if(localFH.genFlag & 1<<3 && localFH.uncompressed==0 &&  localFH.compressed==0 )
163 				{
164 #ifdef DEBUG
165 				fprintf(stderr,"No data to jmp Just search for the next file Footer (localFH.genFlag:=%d)\n",localFH.genFlag);
166 #endif
167 				break;
168 				}
169 
170 	#ifdef DEBUG
171 				printf("localFH.compressed:=%d  localFH.uncompressed:=%d\n\t jumping %d bytes filename=%d bytes",
172 					   localFH.compressed,
173 					   localFH.uncompressed,localFH.filename_length+localFH.compressed+localFH.extra_length,localFH.filename_length);
174 				printx(foundat, 0, 16);
175 	#endif
176 
177 			}
178 		else if (oOffice && localFH.genFlag == 8)
179 			{
180 			break;
181 			}
182 		else
183 			{
184 			break;
185 			}
186 
187 
188 	}//end while loop
189 
190 	if (oOffice)
191 		{
192 
193 		//We have an OO doc how long should we search for?
194 		bytes_to_search = 1 * MEGABYTE;
195 		}
196 	else if (localFH.genFlag & 1<<3 && localFH.uncompressed==0 &&  localFH.compressed==0 )
197 		{
198 		bytes_to_search = needle->max_len;
199 		}
200 	else
201 		{
202 		bytes_to_search = (buflen < (foundat - buf) ? buflen : buflen - (foundat - buf));
203 		}
204 
205 	//Make sure we are not searching more than what he have
206         if (buflen <= (foundat - buf)) {
207 #ifdef DEBUG
208 		printf("avoided bug in extract_zip!\n");
209 #endif
210 		bytes_to_search = 0;
211 	} else {
212 		if (buflen - (foundat - buf) < bytes_to_search)
213 		{
214 		bytes_to_search = buflen - (foundat - buf);
215 		}
216 	}
217 
218 
219 	currentpos = foundat;
220 #ifdef DEBUG
221 	printf("Search for the footer bytes_to_search:=%lld buflen:=%lld\n", bytes_to_search, buflen);
222 #endif
223 
224 	foundat = bm_search(needle->footer,
225 						needle->footer_len,
226 						foundat,
227 						bytes_to_search,
228 						needle->footer_bm_table,
229 						needle->case_sen,
230 						SEARCHTYPE_FORWARD);
231 #ifdef DEBUG
232 	printf("Search complete \n");
233 #endif
234 
235 	if (foundat)											/*Found the end of the central directory structure, determine the exact length and extract*/
236 	{
237 
238 		/*Jump to the comment length field*/
239 #ifdef DEBUG
240 		printf("distance searched:=%lu\n", foundat - currentpos);
241 #endif
242 		if (buflen - (foundat - buf) > 20)
243 			{
244 			foundat += 20;
245 			}
246 		else
247 			{
248 			return NULL;
249 			}
250 
251 		comment_length = htos(foundat, FOREMOST_LITTLE_ENDIAN);
252 		foundat += comment_length + 2;
253 		file_size = (foundat - buf);
254 #ifdef DEBUG
255 		printf("File size %lld\n", file_size);
256 		printf("Found a %s type:=%s\n", needle->suffix, type);
257 #endif
258 		extractbuf = buf;
259 		if (strcmp(type,"all")==0 || strcmp(type,needle->suffix)==0)
260 		{
261 #ifdef DEBUG
262 			printf("Writing a %s to disk\n", needle->suffix);
263 #endif
264 			write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
265 		}
266 
267 #ifdef DEBUG
268 		printf("Found a %s\n", needle->suffix);
269 #endif
270 		return foundat-2;
271 	}
272 
273 	if (bytes_to_search > buflen - (currentpos - buf))
274 		return NULL;
275 
276 #ifdef DEBUG
277 	printf("I give up \n");
278 #endif
279 	return currentpos;
280 }
281 
282 /********************************************************************************
283  *Function: extract_pdf
284  *Description: Given that we have a PDF header check if it is Linearized, if so
285     grab the file size and we are done, else search for the %%EOF
286 *Return: A pointer to where the EOF of the PDF is in the current buffer
287 **********************************************************************************/
extract_pdf(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)288 unsigned char *extract_pdf(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
289 						   s_spec *needle, u_int64_t f_offset)
290 {
291 	unsigned char		*currentpos = NULL;
292 	unsigned char		*buf = foundat;
293 	unsigned char		*extractbuf = NULL;
294 	unsigned char		*tempsize;
295 	unsigned long int	size = 0;
296 	int					file_size = 0;
297 	unsigned char		*header = foundat;
298 	int					bytes_to_search = 0;
299 	char				comment[32];
300 
301 	foundat += needle->header_len;	/* Jump Past the %PDF HEADER */
302 	currentpos = foundat;
303 
304 #ifdef DEBUG
305 	printf("PDF SEARCH\n");
306 #endif
307 
308 	/*Determine when we have searched enough*/
309 	if (buflen >= needle->max_len)
310 		{
311 		bytes_to_search = needle->max_len;
312 		}
313 	else
314 		{
315 		bytes_to_search = buflen;
316 		}
317 
318 	/*Check if the buffer is less than 100 bytes, if so search what we have*/
319 	if (buflen < 512)
320 		return NULL;
321 	else
322 		{
323 		currentpos = foundat;
324 
325 		/*Check for .obj in the first 100 bytes*/
326 		foundat = bm_search(needle->markerlist[1].value,
327 							needle->markerlist[1].len,
328 							foundat,
329 							100,
330 							needle->markerlist[1].marker_bm_table,
331 							needle->case_sen,
332 							SEARCHTYPE_FORWARD);
333 
334 		if (!foundat)
335 		{
336 #ifdef DEBUG
337 			printf("no obj found\n");
338 #endif
339 			return currentpos + 100;
340 		}
341 
342 		foundat = currentpos;
343 
344 		/*Search for "./L " to see if the file is linearized*/
345 		foundat = bm_search(needle->markerlist[2].value,
346 							needle->markerlist[2].len,
347 							foundat,
348 							512,
349 							needle->markerlist[2].marker_bm_table,
350 							needle->case_sen,
351 							SEARCHTYPE_FORWARD);
352 
353 		if (foundat)
354 			{
355 			foundat = bm_search(needle->markerlist[0].value,
356 								needle->markerlist[0].len,
357 								foundat,
358 								512,
359 								needle->markerlist[0].marker_bm_table,
360 								needle->case_sen,
361 								SEARCHTYPE_FORWARD);
362 			}
363 		else
364 		{
365 #ifdef DEBUG
366 			printf("not linearized\n");
367 #endif
368 		}
369 		}
370 
371 	if (foundat)					/*The PDF is linearized extract the size and we are done*/
372 		{
373 		sprintf(comment, " (PDF is Linearized)");
374 		strcat(needle->comment, comment);
375 
376 		foundat += needle->markerlist[0].len;
377 		tempsize = (unsigned char *)malloc(8 * sizeof(char));
378 		tempsize = memcpy(tempsize, foundat, 8);
379 		size = atoi((char *)tempsize);
380 
381 		free(tempsize);
382 		if (size <= 0)
383 			return foundat;
384 		if (size > buflen)
385 			{
386 			if (size > needle->max_len)
387 				return foundat;
388 			else
389 				return NULL;
390 			}
391 
392 		header += size;
393 		foundat = header;
394 		foundat -= needle->footer_len;
395 
396 		/*Jump back 10 bytes and see if we actually have and EOF there*/
397 		foundat -= 10;
398 		currentpos = foundat;
399 		foundat = bm_search(needle->footer,
400 							needle->footer_len,
401 							foundat,
402 							needle->footer_len + 9,
403 							needle->footer_bm_table,
404 							needle->case_sen,
405 							SEARCHTYPE_FORWARD);
406 		if (foundat)				/*There is an valid EOF at the end, Write to disk*/
407 			{
408 			foundat += needle->footer_len + 1;
409 			file_size = (foundat - buf);
410 
411 			extractbuf = buf;
412 			write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
413 
414 			return foundat;
415 			}
416 
417 		return NULL;
418 
419 		}
420 	else							/*Search for Linearized PDF failed, just look for %%EOF */
421 	{
422 #ifdef DEBUG
423 		printf("	Linearized search failed, searching %d bytes, buflen:=%lld\n",
424 			   bytes_to_search,
425 			   buflen - (header - buf));
426 #endif
427 		foundat = currentpos;
428 		foundat = bm_search(needle->footer,
429 							needle->footer_len,
430 							foundat,
431 							bytes_to_search,
432 							needle->footer_bm_table,
433 							needle->case_sen,
434 							SEARCHTYPE_FORWARD);
435 
436 		if (foundat)				/*Write the non-linearized PDF to disk*/
437 			{
438 			foundat += needle->footer_len + 1;
439 			file_size = (foundat - buf);
440 			extractbuf = buf;
441 
442 			write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
443 
444 			return foundat;
445 
446 			}
447 
448 		return NULL;
449 	}
450 
451 }
452 
453 /********************************************************************************
454  *Function: extract_cpp
455  *Description: Use keywords to attempt to find C/C++ source code
456 *Return: A pointer to where the EOF of the CPP file is in the current buffer
457 **********************************************************************************/
extract_cpp(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)458 unsigned char *extract_cpp(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
459 						   s_spec *needle, u_int64_t f_offset)
460 {
461 
462 	unsigned char	*header = foundat;
463 	unsigned char	*buf = foundat;
464 	unsigned char	*extractbuf = NULL;
465 	int				end = 0;
466 	int				start = 0;
467 	int				i = 0;
468 	int				marker_score = 0;
469 	int				ok = FALSE;
470 	int				file_size = 0;
471 	unsigned char	*footer = NULL;
472 
473 	/*Search for a " or a < within 20 bytes of a #include statement*/
474 	for (i = 0; i < 20; i++)
475 		{
476 		if (foundat[i] == '\x22' || foundat[i] == '\x3C')
477 			{
478 			ok = TRUE;
479 			}
480 		}
481 
482 	if (!ok)
483 		return foundat + needle->header_len;
484 
485 	/*Keep running through the buffer until an non printable character is reached*/
486 	while (isprint(foundat[end]) || foundat[end] == '\x0a' || foundat[end] == '\x09')
487 		{
488 		end++;
489 		}
490 
491 	foundat += end - 1;
492 	footer = foundat;
493 
494 	if (end < 50)
495 		return foundat;
496 
497 	/*Now lets go the other way and grab all those comments at the begining of the file*/
498 	while (isprint(buf[start]) || buf[start] == '\x0a' || buf[start] == '\x09')
499 		{
500 		start--;
501 		}
502 
503 	header = &buf[start + 1];
504 	file_size = (footer - header);
505 
506 	foundat = header;
507 
508 	/*Now we have an ascii file to look for keywords in*/
509 	foundat = bm_search(needle->footer,
510 						needle->footer_len,
511 						header,
512 						file_size,
513 						needle->footer_bm_table,
514 						FALSE,
515 						SEARCHTYPE_FORWARD);
516 	if (foundat)
517 		marker_score += 1;
518 
519 	foundat = header;
520 	foundat = bm_search(needle->markerlist[0].value,
521 						needle->markerlist[0].len,
522 						header,
523 						file_size,
524 						needle->markerlist[0].marker_bm_table,
525 						1,
526 						SEARCHTYPE_FORWARD);
527 	if (foundat)
528 		marker_score += 1;
529 
530 	if (marker_score == 0)
531 		return foundat;
532 
533 	if (foundat)
534 		{
535 		extractbuf = buf;
536 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset + start + 1);
537 
538 		return footer;
539 
540 		}
541 
542 	return NULL;
543 }
544 
545 /********************************************************************************
546  *Function: extract_htm
547  *Description: Given that we have a HTM header
548     search for the file EOF and check that the bytes areound the header are ascii
549 *Return: A pointer to where the EOF of the HTM is in the current buffer
550 **********************************************************************************/
extract_htm(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)551 unsigned char *extract_htm(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
552 						   s_spec *needle, u_int64_t f_offset)
553 {
554 	unsigned char	*buf = foundat;
555 	unsigned char	*extractbuf = NULL;
556 	unsigned char	*currentpos = NULL;
557 
558 	int				bytes_to_search = 0;
559 	int				i = 0;
560 	int				file_size = 0;
561 
562 	/*Jump past the <HTML tag*/
563 	foundat += needle->header_len;
564 
565 	/*Check the first 16 bytes to see if they are ASCII*/
566 	for (i = 0; i < 16; i++)
567 		{
568 		if (!isprint(foundat[i]) && foundat[i] != '\x0a' && foundat[i] != '\x09')
569 			{
570 			return foundat + 16;
571 			}
572 		}
573 
574 	/*Determine if the buffer is large enough to encompass a reasonable search*/
575 	if (buflen < needle->max_len)
576 		{
577 		bytes_to_search = buflen - (foundat - buf);
578 		}
579 	else
580 		{
581 		bytes_to_search = needle->max_len;
582 		}
583 
584 	/*Store the current position and search for the HTML> tag*/
585 	currentpos = foundat;
586 	foundat = bm_search(needle->footer,
587 						needle->footer_len,
588 						foundat,
589 						bytes_to_search,
590 						needle->footer_bm_table,
591 						needle->case_sen,
592 						SEARCHTYPE_FORWARD);
593 	if (foundat)	//Found the footer, write to disk
594 		{
595 		file_size = (foundat - buf) + needle->footer_len;
596 		extractbuf = buf;
597 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
598 		foundat += needle->footer_len;
599 		return foundat;
600 
601 		}
602 	else
603 		{
604 		return NULL;
605 		}
606 
607 }
608 
609 /********************************************************************************
610  *Function: validOLEheader
611  *Description: run various tests aginst an OLE-HEADER to determine whether or not
612  	it is valid.
613 *Return: TRUE/FALSE
614 **********************************************************************************/
valid_ole_header(struct OLE_HDR * h)615 int valid_ole_header(struct OLE_HDR *h)
616 {
617 
618 	if (htos((unsigned char *) &h->reserved, FOREMOST_LITTLE_ENDIAN) != 0 ||
619 		htoi((unsigned char *) &h->reserved1, FOREMOST_LITTLE_ENDIAN) != 0 ||
620 		htoi((unsigned char *) &h->reserved2, FOREMOST_LITTLE_ENDIAN) != 0)
621 		{
622 		return FALSE;
623 		}
624 
625 	/*The minimum sector shift is usually 2^6(64) and the uSectorShift is 2^9(512))*/
626 	if (htos((unsigned char *) &h->uMiniSectorShift, FOREMOST_LITTLE_ENDIAN) != 6 ||
627 		htos((unsigned char *) &h->uSectorShift, FOREMOST_LITTLE_ENDIAN) != 9 ||
628 		htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN) < 0)
629 		{
630 		return FALSE;
631 		}
632 
633 	/*Sanity Checking*/
634 	if (htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN) <= 0 ||
635 		htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN) > 100)
636 		{
637 		return FALSE;
638 		}
639 
640 	if (htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN) < 0 ||
641 		htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN) > 100)
642 		{
643 		return FALSE;
644 		}
645 
646 	return TRUE;
647 
648 }
649 
650 /********************************************************************************
651  *Function:checkOleName
652  *Description: Determine what type of file is stored in the OLE format based on the
653  	names of DIRENT in the FAT table.
654 *Return: A char* consisting of the suffix of the appropriate file.
655 **********************************************************************************/
check_ole_name(char * name)656 char *check_ole_name(char *name)
657 {
658 	if (strstr(name, "WordDocument"))
659 		{
660 		return "doc";
661 		}
662 	else if (strstr(name, "Worksheet") || strstr(name, "Book") || strstr(name, "Workbook"))
663 		{
664 		return "xls";
665 		}
666 	else if (strstr(name, "Power"))
667 		{
668 		return "ppt";
669 		}
670 	else if (strstr(name, "Access") || strstr(name, "AccessObjSiteData"))
671 		{
672 		return "mbd";
673 		}
674 	else if (strstr(name, "Visio"))
675 		{
676 		return "vis";
677 		}
678 	else if (strstr(name, "Sfx"))
679 		{
680 		return "sdw";
681 		}
682 	else
683 		{
684 		return NULL;
685 		}
686 
687 	return NULL;
688 
689 }
690 
adjust_bs(int size,int bs)691 int adjust_bs(int size, int bs)
692 {
693 	int rem = (size % bs);
694 
695 	if (rem == 0)
696 		{
697 
698 		return size;
699 		}
700 
701 #ifdef DEBUG
702 	printf("\tnew size:=%d\n", size + (bs - rem));
703 #endif
704 	return (size + (bs - rem));
705 
706 }
707 
708 /********************************************************************************
709  *Function: extract_ole
710  *Description: Given that we have a OLE header, jump through the OLE structure and
711     determine what type of file it is.
712 *Return: A pointer to where the EOF of the OLE is in the current buffer
713 **********************************************************************************/
extract_ole(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)714 unsigned char *extract_ole(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
715 						   s_spec *needle, u_int64_t f_offset, char *type)
716 {
717 	unsigned char	*buf = foundat;
718 	unsigned char	*extractbuf = NULL;
719 	char			*temp = NULL;
720 	char			*suffix = "ole";
721 	int				totalsize = 0;
722 	int				extrasize = 0;
723 	int				oldblk = 0;
724 	int				i, j;
725 	int				size = 0;
726 	int				blknum = 0;
727 	int				validblk = 512;
728 	int				file_size = 0;
729 	int				num_extra_FAT_blocks = 0;
730 	unsigned char	*htoi_c = NULL;
731 	int				extra_dir_blocks = 0;
732 	int				num_FAT_blocks = 0;
733 	int				next_FAT_block = 0;
734 	unsigned char	*p;
735 	int				fib = 1024;
736 	struct OLE_HDR	*h = NULL;
737 
738 	int				result = 0;
739 	int				highblock = 0;
740 	unsigned long	miniSectorCutoff = 0;
741 	unsigned long	csectMiniFat = 0;
742 
743 	/*Deal with globals defined in the OLE API, ugly*/
744 	if (dirlist != NULL)
745 		free(dirlist);
746 	if (FAT != NULL)
747 		free(FAT);
748 	init_ole();
749 
750 	if (buflen < validblk)
751 		validblk = buflen;
752 	h = (struct OLE_HDR *)foundat;	/*cast the header block to point at foundat*/
753 #ifdef DEBUG
754 	dump_header(h);
755 #endif
756 	num_FAT_blocks = htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN);
757 
758 	if (!valid_ole_header(h))
759 		return (buf + validblk);
760 
761 	miniSectorCutoff = htoi((unsigned char *) &h->miniSectorCutoff, FOREMOST_LITTLE_ENDIAN);
762 	csectMiniFat = htoi((unsigned char *) &h->csectMiniFat, FOREMOST_LITTLE_ENDIAN);
763 	next_FAT_block = htoi((unsigned char *) &h->FAT_next_block, FOREMOST_LITTLE_ENDIAN);
764 	num_extra_FAT_blocks = htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN);
765 
766 	FAT = (int *)Malloc(OUR_BLK_SIZE * (num_FAT_blocks + 1));
767 	p = (unsigned char *)FAT;
768 	memcpy(p, &h[1], OUR_BLK_SIZE - FAT_START);
769 	if (next_FAT_block > 0)
770 		{
771 		p += (OUR_BLK_SIZE - FAT_START);
772 		blknum = next_FAT_block;
773 		for (i = 0; i < num_extra_FAT_blocks; i++)
774 			{
775 			if (!get_block(buf, blknum, p, buflen))
776 				return buf + validblk;
777 			validblk = (blknum + 1) * OUR_BLK_SIZE;
778 			p += OUR_BLK_SIZE - sizeof(int);
779 			blknum = htoi(p, FOREMOST_LITTLE_ENDIAN);
780 			}
781 		}
782 
783 	blknum = htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN);
784 
785 	if(blknum < 0)
786 	{
787 		return buf + 10;
788 	}
789 
790 	highblock = htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN);
791 #ifdef DEBUG
792 	printf("getting dir block\n");
793 #endif
794 
795 	//if(!get_dir_block (buf, blknum, buflen)) return buf+validblk;
796 	if (!get_block(buf, blknum, buffer, buflen))
797 		return buf + validblk;		/*GET DIR BLOCK*/
798 #ifdef DEBUG
799 	printf("done getting dir block\n");
800 #endif
801 	validblk = (blknum + 1) * OUR_BLK_SIZE;
802 	while (blknum != END_OF_CHAIN)
803 	{
804 #ifdef DEBUG
805 		printf("finding dir info extra_dir_blks:=%d\n", extra_dir_blocks);
806 #endif
807 		if (extra_dir_blocks > 300)
808 			return buf + validblk;
809 
810 		/**PROBLEMA**/
811 #ifdef DEBUG
812 		printf("***blknum:=%d FATblk:=%d ourblksize=%d\n", blknum, FATblk,OUR_BLK_SIZE);
813 #endif
814 		oldblk = blknum;
815 		htoi_c = (unsigned char *) &FAT[blknum / (OUR_BLK_SIZE / sizeof(int))];
816 
817 		FATblk = htoi(htoi_c, FOREMOST_LITTLE_ENDIAN);
818 #ifdef DEBUG
819 		printf("***blknum:=%d FATblk:=%d\n", blknum, FATblk);
820 #endif
821 
822 		if (!get_FAT_block(buf, blknum, block_list, buflen))
823 			return buf + validblk;
824 		blknum = htoi((unsigned char *) &block_list[blknum % 128], FOREMOST_LITTLE_ENDIAN);
825 #ifdef DEBUG
826 		printf("**blknum:=%d FATblk:=%d\n", blknum, FATblk);
827 #endif
828 		if (blknum == END_OF_CHAIN || oldblk == blknum)
829 		{
830 #ifdef DEBUG
831 			printf("EOC\n");
832 #endif
833 			break;
834 		}
835 
836 		extra_dir_blocks++;
837 		result = get_dir_block(buf, blknum, buflen);
838 		if (result == SHORT_BLOCK)
839 		{
840 #ifdef DEBUG
841 			printf("SHORT BLK\n");
842 #endif
843 			break;
844 		}
845 		else if (!result)
846 			return buf + validblk;
847 
848 	}
849 
850 #ifdef DEBUG
851 	printf("DONE WITH WHILE\n");
852 #endif
853 	blknum = htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN);
854 	size = OUR_BLK_SIZE * (extra_dir_blocks + 1);
855 	dirlist = (struct DIRECTORY *)Malloc(size);
856 	memset(dirlist, 0, size);
857 
858 	if (!get_block(buf, blknum, buffer, buflen))
859 		return buf + validblk;		/*GET DIR BLOCK*/
860 
861 	if (!get_dir_info(buffer))
862 		{
863 		return foundat + validblk;
864 		}
865 
866 	for (i = 0; i < extra_dir_blocks; i++)
867 		{
868 		if (!get_FAT_block(buf, blknum, block_list, buflen))
869 			return buf + validblk;
870 		blknum = htoi((unsigned char *) &block_list[blknum % 128], FOREMOST_LITTLE_ENDIAN);
871 		if (blknum == END_OF_CHAIN)
872 			break;
873 #ifdef DEBUG
874 		printf("getting dir blk blknum=%d\n", blknum);
875 #endif
876 		if (!get_block(buf, blknum, buffer, buflen))
877 			return buf + validblk;	/*GET DIR BLOCK*/
878 		if (!get_dir_info(buffer))
879 			{
880 			return buf + validblk;
881 			}
882 		}
883 
884 #ifdef DEBUG
885 	printf("dir count is %d\n", i);
886 #endif
887 	for (dl = dirlist, i = 0; i < dir_count; i++, dl++)
888 		{
889 		memset(buffer, ' ', 75);
890 		j = htoi((unsigned char *) &dl->level, FOREMOST_LITTLE_ENDIAN) * 4;
891 		sprintf((char *) &buffer[j], "%-s", dl->name);
892 		j = strlen((char *)buffer);
893 
894 		if (dl->name[0] == '@')
895 			return foundat + validblk;
896 		if (dl->type == STREAM)
897 			{
898 			buffer[j] = ' ';
899 			sprintf((char *) &buffer[60], "%8d\n", dl->size);
900 
901 			if (temp == NULL)		/*check if we have alread defined the type*/
902 				{
903 				temp = check_ole_name(dl->name);
904 				if (temp)
905 					suffix = temp;
906 				}
907 
908 			if (dl->size > miniSectorCutoff)
909 				{
910 				totalsize += adjust_bs(dl->size, 512);
911 				}
912 			else
913 				{
914 				totalsize += adjust_bs(dl->size, 64);
915 				}
916 
917 #ifdef DEBUG
918 			fprintf(stdout, buffer);
919 #endif
920 			}
921 		else
922 			{
923 			sprintf((char *) &buffer[j], "\n");
924 #ifdef DEBUG
925 			printf("\tnot stream data \n");
926 			fprintf(stdout, buffer);
927 #endif
928 
929 			extrasize += adjust_bs(dl->size, 512);
930 
931 			}
932 		}
933 
934 	totalsize += fib;
935 #ifdef DEBUG
936 	printf("DIR SIZE:=%d, numFATblks:=%d MiniFat:=%d\n",
937 		   adjust_bs(((dir_count) * 128), 512),
938 		   (num_FAT_blocks * 512),
939 		   adjust_bs((64 * csectMiniFat), 512));
940 #endif
941 	totalsize += adjust_bs(((dir_count) * 128), 512);
942 	totalsize += (num_FAT_blocks * 512);
943 	totalsize += adjust_bs((64 * csectMiniFat), 512);
944 	if ((highblk + 5) > highblock && highblk > 0)
945 		{
946 		highblock = highblk + 5;
947 		}
948 
949 	highblock = highblock * 512;
950 
951 #ifdef DEBUG
952 	printf("\t highblock:=%d\n", highblock);
953 #endif
954 	if (highblock > totalsize)
955 	{
956 #ifdef DEBUG
957 		printf("	Total size:=%d a difference of %lld\n", totalsize, buflen - totalsize);
958 		printf("	Extra size:=%d \n", extrasize);
959 		printf("	Highblock is greater than totalsize\n");
960 #endif
961 		totalsize = highblock;
962 	}
963 
964 	totalsize = adjust_bs(totalsize, 512);
965 #ifdef DEBUG
966 	printf("	Total size:=%d a difference of %lld\n", totalsize, buflen - totalsize);
967 	printf("	Extra size:=%d \n", extrasize);
968 #endif
969 
970 	if (buflen < totalsize)
971 	{
972 #ifdef DEBUG
973 		printf("	***Error not enough left in the buffer left:=%lld needed=%d***\n",
974 			   buflen,
975 			   totalsize);
976 #endif
977 		totalsize = buflen;
978 	}
979 
980 	foundat = buf;
981 	highblock -= 5 * 512;
982 	if (highblock > 0 && highblock < buflen)
983 		{
984 		foundat += highblock;
985 		}
986 	else
987 		{
988 		foundat += totalsize;
989 		}
990 
991 	/*Return to the highest blknum read in the file, that way we don't miss files that are close*/
992 	file_size = totalsize;
993 	extractbuf = buf;
994 
995 	if (suffix)
996 		needle->suffix = suffix;
997 
998 	if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
999 		{
1000 		return foundat;
1001 		}
1002 
1003 	write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1004 	return foundat;
1005 
1006 }
1007 
1008 //********************************************************************************/
check_mov(unsigned char * atom)1009 int check_mov(unsigned char *atom)
1010 {
1011 #ifdef DEBUG
1012 	printf("Atom:= %c%c%c%c\n", atom[0], atom[1], atom[2], atom[3]);
1013 #endif
1014 	if (strncmp((char *)atom, "free", 4) == 0 || strncmp((char *)atom, "mdat", 4) == 0 ||
1015 		strncmp((char *)atom, "free", 4) == 0 || strncmp((char *)atom, "wide", 4) == 0 ||
1016 		strncmp((char *)atom, "PICT", 4) == 0)
1017 		{
1018 		return TRUE;
1019 		}
1020 
1021 	if (strncmp((char *)atom, "trak", 4) == 0 || strncmp((char *)atom, "mdat", 4) == 0 ||
1022 		strncmp((char *)atom, "mp3", 3) == 0 || strncmp((char *)atom, "wide", 4) == 0 ||
1023 		strncmp((char *)atom, "moov", 4) == 0)
1024 		{
1025 		return TRUE;
1026 		}
1027 
1028 	return FALSE;
1029 }
1030 
1031 /********************************************************************************
1032  *Function: extract_mov
1033  *Description: Given that we have a MOV header JUMP through the mov data structures
1034     until we reach EOF
1035 *Return: A pointer to where the EOF of the MOV is in the current buffer
1036 **********************************************************************************/
extract_mov(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1037 unsigned char *extract_mov(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1038 						   s_spec *needle, u_int64_t f_offset)
1039 {
1040 	unsigned char	*buf = foundat - 4;
1041 	unsigned char	*extractbuf = NULL;
1042 	unsigned int	atomsize = 0;
1043 	unsigned int	filesize = 0;
1044 	int				mdat = FALSE;
1045 	foundat -= 4;
1046 	buflen += 4;
1047 	while (1)						/*Loop through all the atoms until the EOF is reached*/
1048 		{
1049 		atomsize = htoi(foundat, FOREMOST_BIG_ENDIAN);
1050 #ifdef DEBUG
1051 		printf("Atomsize:=%d\n", atomsize);
1052 #endif
1053 		if (atomsize <= 0 || atomsize > needle->max_len)
1054 			{
1055 			return foundat + needle->header_len + 4;
1056 			}
1057 
1058 		filesize += atomsize;		/*Add the atomsize to the total file size*/
1059 		if (filesize > buflen)
1060 		{
1061 #ifdef DEBUG
1062 			printf("file size > buflen fs:=%d bf:=%lld\n", filesize, buflen);
1063 #endif
1064 			if (buflen >= needle->max_len)
1065 				return foundat + needle->header_len + 4;
1066 			else
1067 				{
1068 				return NULL;
1069 				}
1070 		}
1071 
1072 		foundat += atomsize;
1073 		if (buflen - (foundat - buf) < 5)
1074 			{
1075 			if (mdat)
1076 				{
1077 				break;
1078 				}
1079 			else
1080 			{
1081 #ifdef DEBUG
1082 				printf("No mdat found");
1083 #endif
1084 				return foundat;
1085 			}
1086 			}
1087 
1088 		/*Check if we have an mdat atom, these are required thus can be used to
1089 	* Weed out corrupted file*/
1090 		if (strncmp((char *)foundat + 4, "mdat", 4) == 0)
1091 			{
1092 			mdat = TRUE;
1093 			}
1094 
1095 		if (check_mov(foundat + 4)) /*Check to see if we are at a valid header*/
1096 		{
1097 #ifdef DEBUG
1098 			printf("Checkmov succeeded\n");
1099 #endif
1100 		}
1101 		else
1102 		{
1103 #ifdef DEBUG
1104 			printf("Checkmov failed\n");
1105 #endif
1106 			if (mdat)
1107 				{
1108 				break;
1109 				}
1110 			else
1111 			{
1112 #ifdef DEBUG
1113 				printf("No mdat found");
1114 #endif
1115 				return foundat;
1116 
1117 			}
1118 		}
1119 		}							//End loop
1120 
1121 	if (foundat)
1122 		{
1123 
1124 		filesize = (foundat - buf);
1125 #ifdef DEBUG
1126 		printf("file size:=%d\n", filesize);
1127 #endif
1128 		extractbuf = buf;
1129 		write_to_disk(s, needle, filesize, extractbuf, c_offset + f_offset - 4);
1130 		return foundat;
1131 		}
1132 
1133 #ifdef DEBUG
1134 	printf("NULL Atomsize:=%d\n", atomsize);
1135 #endif
1136 	return NULL;
1137 
1138 }
1139 
1140 /********************************************************************************
1141  *Function: extract_wmv
1142  *Description: Given that we have a WMV header
1143     search for the file header and grab the file size.
1144 *Return: A pointer to where the EOF of the WMV is in the current buffer
1145 **********************************************************************************/
extract_wmv(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1146 unsigned char *extract_wmv(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1147 						   s_spec *needle, u_int64_t f_offset)
1148 {
1149 
1150 	unsigned char	*currentpos = NULL;
1151 	unsigned char	*header = foundat;
1152 	unsigned char	*extractbuf = NULL;
1153 	unsigned char	*buf = foundat;
1154 	unsigned int		size = 0;
1155 	u_int64_t		file_size = 0;
1156 	u_int64_t			headerSize = 0;
1157 	u_int64_t			fileObjHeaderSize = 0;
1158 	int				numberofHeaderObjects = 0;
1159 	int				reserved[2];
1160 	int				bytes_to_search = 0;
1161 
1162 	/*If we have less than a WMV header bail out*/
1163 	if (buflen < 70)
1164 		return NULL;
1165 
1166 	foundat += 16;		/*Jump to the header size*/
1167 	headerSize = htoll(foundat, FOREMOST_LITTLE_ENDIAN);
1168 	//printx(foundat,0,8);
1169 	foundat += 8;
1170 	numberofHeaderObjects = htoi(foundat, FOREMOST_LITTLE_ENDIAN);
1171 	foundat += 4;		//Jump to the begin File properties obj
1172 	reserved[0] = foundat[0];
1173 	reserved[1] = foundat[1];
1174 	foundat += 2;
1175 	//printf("found WMV\n");
1176 	//end header obj
1177 	//****************************************************/
1178 	//Sanity Check
1179 	//printf("WMV num_header_objs=%d headerSize=%llu\n",numberofHeaderObjects,headerSize);
1180 
1181 	if (headerSize <= 0 || numberofHeaderObjects <= 0 || reserved[0] != 1)
1182 		{
1183 		printf("WMV err num_header_objs=%d headerSize=%llu\n",numberofHeaderObjects,headerSize);
1184 		return foundat;
1185 		}
1186 
1187 	currentpos = foundat;
1188 	if (buflen - (foundat - buf) >= needle->max_len)
1189 		bytes_to_search = needle->max_len;
1190 	else
1191 		bytes_to_search = buflen - (foundat - buf);
1192 
1193 	/*Note we are not searching for the footer here, just the file header ID so we can get the file size*/
1194 	foundat = bm_search(needle->footer,
1195 						needle->footer_len,
1196 						foundat,
1197 						bytes_to_search,
1198 						needle->footer_bm_table,
1199 						needle->case_sen,
1200 						SEARCHTYPE_FORWARD);
1201 	if (foundat)
1202 		{
1203 		foundat += 16;	/*jump to the headersize*/
1204 		fileObjHeaderSize = htoll(foundat, FOREMOST_LITTLE_ENDIAN);
1205 		//printx(foundat,0,8);
1206 		foundat += 24;	//Jump to the file size obj
1207 		size = htoi(foundat, FOREMOST_LITTLE_ENDIAN);
1208 		//printx(foundat,0,8);
1209 
1210 #ifdef DEBUG
1211 		printf("SIZE:=%u fileObjHeaderSize=%llu\n", size,fileObjHeaderSize);
1212 #endif
1213 		}
1214 	else
1215 		{
1216 		return NULL;
1217 		}
1218 
1219 	/*Sanity check data*/
1220 	if (size > 0 && size <= needle->max_len && size <= buflen)
1221 		{
1222 		header += size;
1223 #ifdef DEBUG
1224 		printf("	Found a WMV at:=%lld,File size:=%lld\n", c_offset, size);
1225 		printf("	Headersize:=%d, numberofHeaderObjects:= %d ,reserved:=%d,%d\n",
1226 			   headerSize,
1227 			   numberofHeaderObjects,
1228 			   reserved[0],
1229 			   reserved[1]);
1230 #endif
1231 
1232 		/*Everything seem ok, write to disk*/
1233 		file_size = (header - buf);
1234 		extractbuf = buf;
1235 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1236 		foundat += file_size;
1237 		return header;
1238 		}
1239 
1240 	return NULL;
1241 
1242 }
1243 
1244 /********************************************************************************
1245  *Function: extract_riff
1246  *Description: Given that we have a RIFF header parse header and grab the file size.
1247  *Return: A pointer to where the EOF of the RIFF is in the current buffer
1248  **********************************************************************************/
extract_riff(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)1249 unsigned char *extract_riff(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1250 							s_spec *needle, u_int64_t f_offset, char *type)
1251 {
1252 	unsigned char	*buf = foundat;
1253 	unsigned char	*extractbuf = NULL;
1254 	int				size = 0;
1255 	u_int64_t		file_size = 0;
1256 
1257 	size = htoi(&foundat[4], FOREMOST_LITTLE_ENDIAN);		/* Grab the total file size in little endian from offset 4*/
1258 	if (strncmp((char *) &foundat[8], "AVI", 3) == 0)		/*Sanity Check*/
1259 		{
1260 		if (strncmp((char *) &foundat[12], "LIST", 4) == 0) /*Sanity Check*/
1261 			{
1262 			if (size > 0 && size <= needle->max_len && size <= buflen)
1263 			{
1264 #ifdef DEBUG
1265 				printf("\n	Found an AVI at:=%lld,File size:=%d\n", c_offset, size);
1266 #endif
1267 				file_size = size;
1268 				extractbuf = buf;
1269 				needle->suffix = "avi";
1270 				if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
1271 					return foundat + size;
1272 				write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1273 				foundat += size;
1274 				return foundat;
1275 			}
1276 
1277 			return buf + needle->header_len;
1278 
1279 			}
1280 		else
1281 			{
1282 			return buf + needle->header_len;
1283 			}
1284 		}
1285 	else if (strncmp((char *) &foundat[8], "WAVE", 4) == 0) /*Sanity Check*/
1286 		{
1287 		if (size > 0 && size <= needle->max_len && size <= buflen)
1288 		{
1289 #ifdef DEBUG
1290 			printf("\n	Found a WAVE at:=%lld,File size:=%d\n", c_offset, size);
1291 #endif
1292 
1293 			file_size = size;
1294 			extractbuf = buf;
1295 			needle->suffix = "wav";
1296 			if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
1297 				return foundat + size;
1298 
1299 			write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1300 			foundat += file_size;
1301 			return foundat;
1302 		}
1303 
1304 		return buf + needle->header_len;
1305 
1306 		}
1307 	else
1308 		{
1309 		return buf + needle->header_len;
1310 		}
1311 
1312 	return NULL;
1313 
1314 }
1315 
1316 /********************************************************************************
1317  *Function: extract_bmp
1318  *Description: Given that we have a BMP header parse header and grab the file size.
1319  *Return: A pointer to where the EOF of the BMP is in the current buffer
1320  **********************************************************************************/
extract_bmp(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1321 unsigned char *extract_bmp(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1322 						   s_spec *needle, u_int64_t f_offset)
1323 {
1324 	unsigned char	*buf = foundat;
1325 	int				size = 0;
1326 	int				headerlength = 0;
1327 	int				v_size = 0;
1328 	int				h_size = 0;
1329 	unsigned char	*extractbuf = NULL;
1330 	u_int64_t		file_size = 0;
1331 	char			comment[32];
1332 	int				dataOffset = 0;
1333 	int				dataSize = 0;
1334 
1335 	if (buflen < 100)
1336 		return buf + needle->header_len;
1337 
1338 	/*JUMP the first to bytes of the header (BM)*/
1339 	size = htoi(&foundat[2], FOREMOST_LITTLE_ENDIAN);	/*Grab the total file size in little_endian*/
1340 
1341 	/*Sanity Check*/
1342 	if (size <= 100 || size > needle->max_len)
1343 		return buf + needle->header_len;
1344 
1345 	dataOffset = htoi(&foundat[10], FOREMOST_LITTLE_ENDIAN);
1346 	dataSize = htoi(&foundat[34], FOREMOST_LITTLE_ENDIAN);
1347 
1348 	headerlength = htoi(&foundat[14], FOREMOST_LITTLE_ENDIAN);
1349 
1350 	if (dataSize + dataOffset != size)
1351 		{
1352 
1353 		//printf("newtest != dataSize:=%d dataOffset:=%d\n",dataSize,dataOffset);
1354 		}
1355 
1356 	//Header length
1357 	if (headerlength > 1000 || headerlength <= 0)
1358 		return buf + needle->header_len;
1359 
1360 	//foundat+=4;
1361 	v_size = htoi(&foundat[22], FOREMOST_LITTLE_ENDIAN);
1362 	h_size = htoi(&foundat[18], FOREMOST_LITTLE_ENDIAN);
1363 
1364 	//Vertical length
1365 	if (v_size <= 0 || v_size > 2000 || h_size <= 0)
1366 		return buf + needle->header_len;
1367 
1368 #ifdef DEBUG
1369 	printf("\n	The size of the BMP is %d, Header length:=%d , Vertical Size:= %d, dataSize:=%d dataOffset:=%d\n",
1370 	   size,
1371 		   headerlength,
1372 		   v_size,
1373 		   dataSize,
1374 		   dataOffset);
1375 #endif
1376 	if (size <= buflen)
1377 		{
1378 
1379 		sprintf(comment, " (%d x %d)", h_size, v_size);
1380 		strcat(needle->comment, comment);
1381 
1382 		file_size = size;
1383 		extractbuf = buf;
1384 
1385 		write_to_disk(s, needle, file_size, extractbuf, (c_offset + f_offset));
1386 		foundat += file_size;
1387 		return foundat;
1388 
1389 		}
1390 
1391 	return NULL;
1392 }
1393 
1394 /********************************************************************************
1395  *Function: extract_gif
1396  *Description: Given that we have a GIF header parse the given buffer to determine
1397  *	where the file ends.
1398  *Return: A pointer to where the EOF of the GIF is in the current buffer
1399  **********************************************************************************/
extract_gif(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1400 unsigned char *extract_gif(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1401 						   s_spec *needle, u_int64_t f_offset)
1402 {
1403 	unsigned char	*buf = foundat;
1404 	unsigned char	*currentpos = foundat;
1405 	unsigned char	*extractbuf = NULL;
1406 	int				bytes_to_search = 0;
1407 	unsigned short	width = 0;
1408 	unsigned short	height = 0;
1409 	u_int64_t		file_size = 0;
1410 	char			comment[32];
1411 	foundat += 4;		/*Jump the first 4 bytes of the gif header (GIF8)*/
1412 
1413 	/*Check if the GIF is type 89a or 87a*/
1414 	if (strncmp((char *)foundat, "9a", 2) == 0 || strncmp((char *)foundat, "7a", 2) == 0)
1415 		{
1416 		foundat += 2;	/*Jump the length of the header*/
1417 		width = htos(foundat, FOREMOST_LITTLE_ENDIAN);
1418 		height = htos(&foundat[2], FOREMOST_LITTLE_ENDIAN);
1419 
1420 		sprintf(comment, " (%d x %d)", width, height);
1421 		strcat(needle->comment, comment);
1422 
1423 		currentpos = foundat;
1424 		if (buflen - (foundat - buf) >= needle->max_len)
1425 			bytes_to_search = needle->max_len;
1426 		else
1427 			bytes_to_search = buflen - (foundat - buf);
1428 		foundat = bm_search(needle->footer,
1429 							needle->footer_len,
1430 							foundat,
1431 							bytes_to_search,
1432 							needle->footer_bm_table,
1433 							needle->case_sen,
1434 							SEARCHTYPE_FORWARD);
1435 		if (foundat)
1436 		{
1437 
1438 			/*We found the EOF, write the file to disk and return*/
1439 #ifdef DEBUG
1440 			printx(foundat, 0, 16);
1441 #endif
1442 			file_size = (foundat - buf) + needle->footer_len;
1443 #ifdef DEBUG
1444 			printf("The GIF file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
1445 #endif
1446 			extractbuf = buf;
1447 			write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1448 			foundat += needle->footer_len;
1449 			return foundat;
1450 		}
1451 
1452 		return NULL;
1453 
1454 		}
1455 	else				/*Invalid GIF header return the current pointer*/
1456 		{
1457 		return foundat;
1458 		}
1459 
1460 }
1461 
1462 /********************************************************************************
1463  *Function: extract_mpg
1464  * Not done yet
1465  **********************************************************************************/
extract_mpg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1466 unsigned char *extract_mpg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1467 						   s_spec *needle, u_int64_t f_offset)
1468 {
1469 	unsigned char	*buf = foundat;
1470 	unsigned char	*currentpos = NULL;
1471 
1472 	unsigned char	*extractbuf = NULL;
1473 	int				bytes_to_search = 0;
1474 	unsigned short	size = 0;
1475 	u_int64_t		file_size = 0;
1476 
1477 	/*
1478     size=htos(&foundat[4],FOREMOST_BIG_ENDIAN);
1479     printf("size:=%d\n",size);
1480 
1481     printx(foundat,0,16);
1482     foundat+=4;
1483     */
1484 	int				j = 0;
1485 	if (foundat[15] == (unsigned char)'\xBB')
1486 		{
1487 		}
1488 	else
1489 		{
1490 
1491 		return buf + needle->header_len;
1492 		}
1493 
1494 	if (buflen <= 2 * KILOBYTE)
1495 		{
1496 		bytes_to_search = buflen;
1497 		}
1498 	else
1499 		{
1500 		bytes_to_search = 2 * KILOBYTE;
1501 		}
1502 
1503 	while (1)
1504 		{
1505 		j = 0;
1506 		currentpos = foundat;
1507 #ifdef DEBUG
1508 		printf("Searching for marker\n");
1509 #endif
1510 		foundat = bm_search(needle->markerlist[0].value,
1511 							needle->markerlist[0].len,
1512 							foundat,
1513 							bytes_to_search,
1514 							needle->markerlist[0].marker_bm_table,
1515 							needle->case_sen,
1516 							SEARCHTYPE_FORWARD);
1517 
1518 		if (foundat)
1519 		{
1520 #ifdef DEBUG
1521 			printf("Found after searching %d\n", foundat - currentpos);
1522 #endif
1523 			while (1)
1524 				{
1525 
1526 				if (foundat[3] >= (unsigned char)'\xBB' && foundat[3] <= (unsigned char)'\xEF')
1527 				{
1528 #ifdef DEBUG
1529 					printf("jumping %d:\n", j);
1530 #endif
1531 					size = htos(&foundat[4], FOREMOST_BIG_ENDIAN);
1532 #ifdef DEBUG
1533 					printf("\t hit: ");
1534 					printx(foundat, 0, 16);
1535 					printf("size:=%d\n\tjump: ", size);
1536 #endif
1537 					file_size += (foundat - buf) + size;
1538 					if (size <= 0 || size > buflen - (foundat - buf))
1539 					{
1540 #ifdef DEBUG
1541 						printf("Not enough room in the buffer ");
1542 #endif
1543 						if (size <= 50 * KILOBYTE && size > 0)
1544 							{
1545 
1546 							/*We should probably search more*/
1547 							if (file_size < needle->max_len)
1548 								{
1549 								return NULL;
1550 								}
1551 							else
1552 								{
1553 								break;
1554 								}
1555 							}
1556 						else
1557 							{
1558 							return currentpos + needle->header_len;
1559 							}
1560 					}
1561 
1562 					foundat += size + 6;
1563 #ifdef DEBUG
1564 					printx(foundat, 0, 16);
1565 #endif
1566 					j++;
1567 				}
1568 				else
1569 					{
1570 
1571 					break;
1572 					}
1573 				}
1574 
1575 			if (foundat[3] == (unsigned char)'\xB9')
1576 				{
1577 				break;
1578 				}
1579 			else if (foundat[3] != (unsigned char)'\xBA' && foundat[3] != (unsigned char)'\x00')
1580 				{
1581 
1582 				/*This is the error state where this doesn't seem to be an mpg anymore*/
1583 				size = htos(&foundat[4], FOREMOST_BIG_ENDIAN);
1584 #ifdef DEBUG
1585 				printf("\t ***TEST: %x\n", foundat[3]);
1586 				printx(foundat, 0, 16);
1587 
1588 				printf("size:=%d\n", size);
1589 #endif
1590 				if ((currentpos - buf) >= 1 * MEGABYTE)
1591 					{
1592 					foundat = currentpos;
1593 					break;
1594 					}
1595 
1596 				return currentpos + needle->header_len;
1597 
1598 				}
1599 			else if (foundat[3] == (unsigned char)'\xB3')
1600 				{
1601 				foundat += 3;
1602 				}
1603 			else
1604 				{
1605 				foundat += 3;
1606 				}
1607 		}
1608 		else
1609 			{
1610 			if ((currentpos - buf) >= 1 * MEGABYTE)
1611 				{
1612 				foundat = currentpos;
1613 				break;
1614 				}
1615 			else
1616 			{
1617 #ifdef DEBUG
1618 				printf("RETURNING BUF\n");
1619 #endif
1620 				return buf + needle->header_len;
1621 			}
1622 			}
1623 		}
1624 
1625 	if (foundat)
1626 		{
1627 		file_size = (foundat - buf) + needle->footer_len;
1628 		if (file_size < 1 * KILOBYTE)
1629 			return buf + needle->header_len;
1630 		}
1631 	else
1632 		{
1633 		return buf + needle->header_len;
1634 		}
1635 
1636 	if (file_size > buflen)
1637 		file_size = buflen;
1638 	foundat = buf;
1639 #ifdef DEBUG
1640 	printf("The file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
1641 #endif
1642 
1643 	extractbuf = buf;
1644 	write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1645 	foundat += file_size;
1646 	return foundat;
1647 }
1648 
1649 
1650 /********************************************************************************
1651  *Function: extract_mp4
1652  * Not done yet
1653  **********************************************************************************/
extract_mp4(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1654 unsigned char *extract_mp4(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1655 						   s_spec *needle, u_int64_t f_offset)
1656 {
1657 	unsigned char	*buf = foundat;
1658 
1659 	unsigned char	*extractbuf = NULL;
1660 	unsigned int	size = 0;
1661 	u_int64_t		file_size = 0;
1662 
1663 
1664 	while(1)
1665 	{
1666 	 	size=htoi(&foundat[28],FOREMOST_BIG_ENDIAN);
1667 		if(size ==0)
1668 		{
1669 			//printf("size ==0\n");
1670 			foundat+=28;
1671 			break;
1672 		}
1673     		//printf("size:=%d\n",size);
1674 		if(size > 0 && size < buflen)
1675 		{
1676 			if(!isprint(foundat[32]) ||  !isprint(foundat[33]))
1677 			{
1678 				//printf("print err\n");
1679 				break;
1680 				//return foundat+8;
1681 			}
1682 			foundat+=size;
1683 
1684 		}
1685 		else
1686 		{
1687 			if (size < needle->max_len)
1688 			{
1689 				//printf("Searching More\n");
1690 				return NULL;
1691 			}
1692 			else
1693 			{
1694 				//printf("ERR\n");
1695 				//return foundat+8;
1696 				break;
1697 			}
1698 		}
1699 
1700 		//printx(foundat,0,32);
1701 
1702 	}
1703 	if (foundat)
1704 	{
1705 		file_size = (foundat - buf) + needle->footer_len;
1706 		if (file_size < 1 * KILOBYTE)
1707 			return buf + needle->header_len;
1708 	}
1709 
1710 
1711 	if (file_size > buflen)
1712 		file_size = buflen;
1713 	foundat = buf;
1714 
1715 
1716 	extractbuf = buf;
1717 	write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1718 	foundat += file_size;
1719 	return foundat;
1720 }
1721 
1722 
1723 /********************************************************************************
1724  *Function: extract_png
1725  *Description: Given that we have a PNG header parse the given buffer to determine
1726  *	where the file ends.
1727  *Return: A pointer to where the EOF of the PNG is in the current buffer
1728  **********************************************************************************/
extract_png(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1729 unsigned char *extract_png(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1730 						   s_spec *needle, u_int64_t f_offset)
1731 {
1732 	unsigned char	*buf = foundat;
1733 	unsigned char	*currentpos = NULL;
1734 
1735 	unsigned char	*extractbuf = NULL;
1736 	int				size = 0;
1737 	int				height = 0;
1738 	int				width = 0;
1739 	u_int64_t		file_size = 0;
1740 	char			comment[32];
1741 
1742 	if (buflen < 100)
1743 		return NULL;
1744 	foundat += 8;
1745 	width = htoi(&foundat[8], FOREMOST_BIG_ENDIAN);
1746 	height = htoi(&foundat[12], FOREMOST_BIG_ENDIAN);
1747 
1748 	if (width < 1 || height < 1)
1749 		return foundat;
1750 
1751 	if (width > 3000 || height > 3000)
1752 		return foundat;
1753 
1754 	sprintf(comment, " (%d x %d)", width, height);
1755 	strcat(needle->comment, comment);
1756 
1757 	while (1)	/* Jump through the headers until we reach the "data" part of the file*/
1758 		{
1759 		size = htoi(foundat, FOREMOST_BIG_ENDIAN);
1760 #ifdef DEBUG
1761 		printx(foundat, 0, 16);
1762 		printf("Size:=%d\n", size);
1763 #endif
1764 
1765 		currentpos = foundat;
1766 		if (size <= 0 || size > buflen - (foundat - buf))
1767 		{
1768 #ifdef DEBUG
1769 			printf("buflen - (foundat-buf)=%lu\n", buflen - (foundat - buf));
1770 #endif
1771 			return currentpos;
1772 		}
1773 
1774 		/*12 is the length of the size, TYPE, and CRC field*/
1775 		foundat += size + 12;
1776 
1777 		if (isprint(foundat[4]))
1778 			{
1779 			if (strncmp((char *) &foundat[4], "IEND", 4) == 0)
1780 				{
1781 				break;
1782 				}
1783 			}
1784 		else
1785 		{
1786 #ifdef DEBUG
1787 			printx(foundat, 0, 16);
1788 			printf("Not ascii returning\n");
1789 #endif
1790 			return currentpos;
1791 		}
1792 
1793 		}
1794 
1795 	if (foundat)
1796 		{
1797 		file_size = (foundat - buf) + htoi(foundat, FOREMOST_BIG_ENDIAN) + 12;
1798 
1799 		if (file_size > buflen)
1800 			file_size = buflen;
1801 		foundat = buf;
1802 #ifdef DEBUG
1803 		printf("The file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
1804 #endif
1805 		extractbuf = buf;
1806 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1807 		foundat += file_size;
1808 		return foundat;
1809 		}
1810 
1811 	return NULL;
1812 }
1813 
1814 /********************************************************************************
1815  *Function: extract_jpeg
1816  *Description: Given that we have a JPEG header parse the given buffer to determine
1817  *	where the file ends.
1818  *Return: A pointer to where the EOF of the JPEG is in the current buffer
1819  **********************************************************************************/
extract_jpeg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1820 unsigned char *extract_jpeg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1821 							s_spec *needle, u_int64_t f_offset)
1822 {
1823 	unsigned char	*buf = foundat;
1824 	unsigned char	*currentpos = NULL;
1825 
1826 	unsigned char	*extractbuf = NULL;
1827 	unsigned short	headersize;
1828 	int				bytes_to_search = 0;
1829 	int				hasTable = FALSE;
1830 	int				hasHuffman = FALSE;
1831 	u_int64_t		file_size = 0;
1832 
1833 	// char comment[32];
1834 
1835 	/*Check if we have a valid header*/
1836 	if (buflen < 128)
1837 		{
1838 		return NULL;
1839 		}
1840 
1841 	if (foundat[3] == (unsigned char)'\xe0')
1842 		{
1843 
1844 		//JFIF header
1845 		//sprintf(comment," (JFIF)");
1846 		//strcat(needle->comment,comment);
1847 		}
1848 	else if (foundat[3] == (unsigned char)'\xe1')
1849 		{
1850 
1851 		//sprintf(comment," (EXIF)");
1852 		//strcat(needle->comment,comment);
1853 		}
1854 	else
1855 		return foundat + needle->header_len;	//Invalid keep searching
1856 	while (1)									/* Jump through the headers until we reach the "data" part of the file*/
1857 	{
1858 #ifdef DEBUG
1859 		printx(foundat, 0, 16);
1860 #endif
1861 		foundat += 2;
1862 		headersize = htos(&foundat[2], FOREMOST_BIG_ENDIAN);
1863 #ifdef DEBUG
1864 		printf("Headersize:=%d buflen:=%lld\n", headersize, buflen);
1865 #endif
1866 
1867 
1868 		if (((foundat + headersize) - buf) > buflen){ return NULL; }
1869 
1870 		foundat += headersize;
1871 
1872 		if (foundat[2] != (unsigned char)'\xff')
1873 			{
1874 			break;
1875 			}
1876 
1877 		/*Ignore 2 "0xff" side by side*/
1878 		if (foundat[2] == (unsigned char)'\xff' && foundat[3] == (unsigned char)'\xff')
1879 			{
1880 			foundat++;
1881 			}
1882 
1883 		if (foundat[3] == (unsigned char)'\xdb' || foundat[4] == (unsigned char)'\xdb')
1884 			{
1885 			hasTable = TRUE;
1886 			}
1887 		else if (foundat[3] == (unsigned char)'\xc4')
1888 			{
1889 			hasHuffman = TRUE;
1890 			}
1891 	}
1892 
1893 	/*All jpegs must contain a Huffman marker as well as a quantization table*/
1894 	if (!hasTable || !hasHuffman)
1895 	{
1896 #ifdef DEBUG
1897 		printf("No Table or Huffman \n");
1898 #endif
1899 		return buf + needle->header_len;
1900 	}
1901 
1902 	currentpos = foundat;
1903 
1904 	//sprintf("Searching for footer\n");
1905 	if (buflen < (foundat - buf)) {
1906 #ifdef DEBUG
1907 		printf("avoided bug in extract_jpeg!\n");
1908 #endif
1909 		bytes_to_search = 0;
1910 	} else {
1911 		if (buflen - (foundat - buf) >= needle->max_len)
1912 			bytes_to_search = needle->max_len;
1913 		else
1914 			bytes_to_search = buflen - (foundat - buf);
1915 	}
1916 
1917 	foundat = bm_search(needle->footer,
1918 						needle->footer_len,
1919 						foundat,
1920 						bytes_to_search,
1921 						needle->footer_bm_table,
1922 						needle->case_sen,
1923 						SEARCHTYPE_FORWARD);
1924 
1925 	if (foundat)								/*Found found a valid JPEG*/
1926 		{
1927 
1928 		/*We found the EOF, write the file to disk and return*/
1929 		file_size = (foundat - buf) + needle->footer_len;
1930 #ifdef DEBUG
1931 		printf("The jpeg file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
1932 #endif
1933 
1934 		//extractbuf=(unsigned char*) malloc(file_size*sizeof(char));
1935 		//memcpy(extractbuf,buf,file_size);
1936 		extractbuf = buf;
1937 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1938 		foundat += needle->footer_len;
1939 
1940 		////free(extractbuf);
1941 		return foundat;
1942 		}
1943 	else
1944 		{
1945 		return NULL;
1946 		}
1947 
1948 }	//End extract_jpeg
1949 
1950 /********************************************************************************
1951  *Function: extract_generic
1952  *Description:
1953  *Return: A pointer to where the EOF of the
1954  **********************************************************************************/
extract_generic(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1955 unsigned char *extract_generic(f_state *s, u_int64_t c_offset, unsigned char *foundat,
1956 							   u_int64_t buflen, s_spec *needle, u_int64_t f_offset)
1957 {
1958 	unsigned char	*buf = foundat;
1959 	unsigned char	*endptr = foundat;
1960 	unsigned char	*beginptr = foundat;
1961 	unsigned char	*extractbuf = NULL;
1962 	int		bytes_to_search = 0;
1963 	u_int64_t	file_size = 0;
1964 	int begin=0;
1965 	int end=0;
1966 
1967 
1968 	if (buflen - (foundat - buf) >= needle->max_len)
1969 		bytes_to_search = needle->max_len;
1970 	else
1971 		bytes_to_search = buflen - (foundat - buf);
1972 
1973   	if(needle->searchtype ==SEARCHTYPE_FORWARD_NEXT)
1974 	{
1975 			foundat+=needle->header_len;
1976 			foundat = bm_search(needle->header,
1977 							needle->header_len,
1978 							foundat,
1979 							bytes_to_search,
1980 							needle->footer_bm_table,
1981 							needle->case_sen,
1982 							SEARCHTYPE_FORWARD);
1983 	}
1984 	else if(needle->searchtype ==SEARCHTYPE_ASCII)
1985 	{
1986 
1987 
1988 			while (isprint(foundat[end]) || foundat[end] == '\x0a' || foundat[end] == '\x0d' || foundat[end] == '\x09')
1989 			{
1990 				end++;
1991 			}
1992 
1993 			foundat+=end;
1994 			endptr=foundat;
1995 			foundat=buf;
1996 
1997 			while (isprint(foundat[begin-1]) || foundat[begin-1] == '\x0a' || foundat[begin-1] == '\x0d' || foundat[begin-1] == '\x09')
1998 			{
1999 				begin--;
2000 			}
2001 
2002 			foundat+=begin;
2003 			beginptr=foundat;
2004 
2005 			buf=beginptr;
2006 			foundat=endptr;
2007 			//printx(buf,0,4);
2008 
2009 			file_size=end-begin;
2010 			//fprintf(stderr,"file_size=%llu end=%d begin=%d ptrsize=%d ptrsize2=%d\n",file_size,end,begin,endptr-beginptr,foundat-buf);
2011 			if(buf==foundat)
2012 			{
2013 					fprintf(stderr,"Returning Foundat\n");
2014 					return foundat+needle->header_len;
2015 			}
2016 	}
2017   	else if (needle->footer == NULL || strlen((char *)needle->footer) < 1)
2018 	{
2019 #ifdef DEBUG
2020 		printf("footer is NULL\n");
2021 #endif
2022 		foundat = NULL;
2023 	}
2024 	else
2025 	{
2026 #ifdef DEBUG
2027 		printf("footer is not NULL %p\n", needle->footer);
2028 #endif
2029 		foundat = bm_search(needle->footer,
2030 							needle->footer_len,
2031 							foundat,
2032 							bytes_to_search,
2033 							needle->footer_bm_table,
2034 							needle->case_sen,
2035 							SEARCHTYPE_FORWARD);
2036 	}
2037 
2038 	if (foundat)
2039 	{
2040 #ifdef DEBUG
2041 		printf("found %s!!!\n", needle->footer);
2042 #endif
2043 		if(needle->searchtype ==SEARCHTYPE_FORWARD_NEXT || needle->searchtype ==SEARCHTYPE_ASCII)
2044 		{
2045 				file_size = (foundat - buf);
2046 		}
2047 		else
2048 		{
2049 				file_size = (foundat - buf) + needle->footer_len;
2050 		}
2051 	}
2052 	else
2053 	{
2054 		file_size = needle->max_len;
2055 	}
2056 
2057 	if (file_size == 0)
2058 	{
2059 		file_size = needle->max_len;
2060 	}
2061 
2062 	if (file_size > (buflen-begin))
2063 	{
2064 		file_size = buflen;
2065 	}
2066 
2067 #ifdef DEBUG
2068 	printf("The file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
2069 #endif
2070 
2071 	extractbuf = buf;
2072 	write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2073 
2074 	if(needle->searchtype !=SEARCHTYPE_ASCII)
2075 	{
2076 		foundat=buf;
2077 		foundat += needle->header_len;
2078 	}
2079 	return foundat;
2080 
2081 
2082 
2083 }
2084 
2085 /********************************************************************************
2086  *Function: extract_exe
2087  *Description:
2088  *Return: A pointer to where the EOF of the
2089  **********************************************************************************/
extract_exe(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2090 unsigned char *extract_exe(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2091 						   s_spec *needle, u_int64_t f_offset)
2092 {
2093 	unsigned char	*buf = foundat;
2094 	unsigned char	*extractbuf = NULL;
2095 	u_int64_t		file_size = 0;
2096 	unsigned short	pe_offset = 0;
2097 	unsigned int	SizeOfCode = 0;
2098 	unsigned int	SizeOfInitializedData = 0;
2099 	unsigned int	SizeOfUninitializedData = 0;
2100 	unsigned int	rva = 0;
2101 	unsigned int	offset = 0;
2102 	unsigned short	sections = 0;
2103 	unsigned int	sizeofimage = 0;
2104 	unsigned int	raw_section_size = 0;
2105 	unsigned int	size_of_headers = 0;
2106 	unsigned short	dll = 0;
2107 	unsigned int	sum = 0;
2108 	unsigned short	exe_char = 0;
2109 	unsigned int	align = 0;
2110 	int				i = 0;
2111 	time_t			compile_time = 0;
2112 	struct tm		*ret_time;
2113 	char			comment[32];
2114 	char			ascii_time[32];
2115 
2116 	if (buflen < 100)
2117 		return foundat + 2;
2118 	pe_offset = htos(&foundat[60], FOREMOST_LITTLE_ENDIAN);
2119 	if (pe_offset < 1 || pe_offset > 1000 || pe_offset > buflen)
2120 		{
2121 		return foundat + 60;
2122 		}
2123 
2124 	foundat += pe_offset;
2125 	if (foundat[0] != (unsigned char)'\x50' || foundat[1] != (unsigned char)'\x45')
2126 		{
2127 		return foundat;
2128 		}
2129 
2130 	sections = htos(&foundat[6], FOREMOST_LITTLE_ENDIAN);
2131 	if (buflen < (40 * sections + 224))
2132 		{
2133 		return foundat;
2134 		}
2135 
2136 	compile_time = (time_t) htoi(&foundat[8], FOREMOST_LITTLE_ENDIAN);
2137 	ret_time = gmtime(&compile_time);
2138 	sprintf(ascii_time,
2139 			"%02d/%02d/%04d %02d:%02d:%02d",
2140 			ret_time->tm_mon + 1,
2141 			ret_time->tm_mday,
2142 			ret_time->tm_year + 1900,
2143 			ret_time->tm_hour,
2144 			ret_time->tm_min,
2145 			ret_time->tm_sec);
2146 	chop(ascii_time);
2147 
2148 	sprintf(comment, ascii_time);
2149 	strcat(needle->comment, comment);
2150 	exe_char = htos(&foundat[22], FOREMOST_LITTLE_ENDIAN);
2151 	if (exe_char & 0x2000)
2152 		{
2153 		dll = 1;
2154 		}
2155 	else if (exe_char & 0x1000)
2156 		{
2157 
2158 		//printf("System File!!!\n");
2159 		}
2160 	else if (exe_char & 0x0002)
2161 		{
2162 
2163 		//printf("EXE !!!\n");
2164 		}
2165 	else
2166 		{
2167 		return foundat;
2168 		}
2169 
2170 	foundat += 0x18;	/*Jump to opt header should be 0x0b 0x01*/
2171 
2172 	SizeOfCode = htoi(&foundat[4], FOREMOST_LITTLE_ENDIAN);
2173 	SizeOfInitializedData = htoi(&foundat[8], FOREMOST_LITTLE_ENDIAN);
2174 	SizeOfUninitializedData = htoi(&foundat[12], FOREMOST_LITTLE_ENDIAN);
2175 	rva = htoi(&foundat[16], FOREMOST_LITTLE_ENDIAN);
2176 	align = htoi(&foundat[36], FOREMOST_LITTLE_ENDIAN);
2177 
2178 	sizeofimage = htoi(&foundat[56], FOREMOST_LITTLE_ENDIAN);
2179 	size_of_headers = htoi(&foundat[60], FOREMOST_LITTLE_ENDIAN);
2180 	foundat += 224;
2181 
2182 	/*Start of sections*/
2183 	for (i = 0; i < sections; i++)
2184 		{
2185 
2186 		//strncpy(name,foundat,8);
2187 		offset = htoi(&foundat[20], FOREMOST_LITTLE_ENDIAN);
2188 		raw_section_size = htoi(&foundat[16], FOREMOST_LITTLE_ENDIAN);
2189 
2190 		//printf("\t%s size=%d offset=%d\n",name,raw_section_size,offset);
2191 		foundat += 40;
2192 
2193 		//rem+=(raw_section_size%align);
2194 		//sum+=raw_section_size;
2195 		sum = offset + raw_section_size;
2196 		}
2197 
2198 	/*
2199     printf("rva is %d sum= %d\n",rva,sum);
2200     printf("soi is %d,soh is %d \n",sizeofimage,size_of_headers);
2201     printf("we are off by %d\n",sum-buflen);
2202     printf("soc=%d ,soidr=%d, souid=%d\n",SizeOfCode,SizeOfInitializedData,SizeOfUninitializedData);
2203     printf("fs=%d ,extr=%d\n",SizeOfCode+SizeOfInitializedData,SizeOfUninitializedData);
2204 		*/
2205 	file_size = sum;
2206 	if (file_size < 512 || file_size > 4 * MEGABYTE)
2207 		{
2208 		return foundat + 60;
2209 		}
2210 
2211 	if (file_size > buflen)
2212 		file_size = buflen;
2213 	foundat = buf;
2214 #ifdef DEBUG
2215 	printf("The file size is  %llu  c_offset:=%llu\n", file_size, c_offset);
2216 #endif
2217 
2218 	extractbuf = buf;
2219 	if (dll == 1)
2220 		{
2221 		strcpy(needle->suffix, "dll");
2222 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2223 		strcpy(needle->suffix, "exe");
2224 		}
2225 	else
2226 		{
2227 		write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2228 		}
2229 
2230 	foundat += needle->header_len;
2231 	return (buf + file_size);
2232 }
2233 
2234 
2235 /********************************************************************************
2236  *Function: extract_reg
2237  *Description:
2238  *Return: A pointer to where the EOF of the
2239  **********************************************************************************/
extract_reg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2240 unsigned char *extract_reg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2241 						   s_spec *needle, u_int64_t f_offset)
2242 {
2243 	unsigned char	*buf = foundat;
2244 	unsigned char	*extractbuf = NULL;
2245 	int sizeofreg = htoi(&foundat[0x28], FOREMOST_LITTLE_ENDIAN);
2246 	int file_size=0;
2247 	if(sizeofreg < 0 || sizeofreg > needle->max_len)
2248 	{
2249 		return (foundat+4);
2250 	}
2251 	foundat+=sizeofreg;
2252 	file_size = (foundat - buf);
2253 
2254 	extractbuf = buf;
2255 
2256 
2257 	write_to_disk(s, needle, file_size , extractbuf, c_offset + f_offset);
2258 
2259 
2260 	return NULL;
2261 }
2262 /********************************************************************************
2263  *Function: extract_rar
2264  *Description:
2265  *Return: A pointer to where the EOF of the
2266  **********************************************************************************/
extract_rar(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2267 unsigned char *extract_rar(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2268 						   s_spec *needle, u_int64_t f_offset)
2269 {
2270 	unsigned char	*buf = foundat;
2271 	unsigned char	*extractbuf = NULL;
2272 	u_int64_t		file_size = 0;
2273 	unsigned short	headersize = 0;
2274 	unsigned short	flags = 0;
2275 	unsigned int	filesize = 0;
2276 	unsigned int	tot_file_size = 0;
2277 	unsigned int	ufilesize = 0;
2278 	int				i = 0;
2279 	int				scan = 0;
2280 	int				flag = 0;
2281 	int				passwd = 0;
2282 	u_int64_t		bytes_to_search = 50 * KILOBYTE;
2283 	char			comment[32];
2284 
2285 	/*Marker Block*/
2286 	headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2287 	foundat += headersize;
2288 
2289 	/*Archive Block*/
2290 	headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2291 	filesize = htoi(&foundat[7], FOREMOST_LITTLE_ENDIAN);
2292 
2293 	if (foundat[2] != '\x73')
2294 		{
2295 		return foundat; /*Error*/
2296 		}
2297 
2298 	flags = htos(&foundat[3], FOREMOST_LITTLE_ENDIAN);
2299 	if ((flags & 0x01) != 0)
2300 		{
2301 		sprintf(comment, " Multi-volume:");
2302 		strcat(needle->comment, comment);
2303 		}
2304 
2305 	if (flags & 0x02)
2306 		{
2307 		sprintf(comment, " an archive comment is present:");
2308 		strcat(needle->comment, comment);
2309 		}
2310 
2311 	foundat += headersize;
2312 
2313 	if (foundat[2] != '\x74')
2314 		{
2315 		for (i = 0; i < 500; i++)
2316 			{
2317 			if (foundat[i] == '\x74')
2318 				{
2319 				foundat += i - 2;
2320 				scan = 1;
2321 				break;
2322 				}
2323 			}
2324 		}
2325 
2326 	if (headersize == 13 && foundat[2] != '\x74')
2327 		{
2328 
2329 		if (scan == 0)
2330 			{
2331 			sprintf(comment, "Encrypted Headers!");
2332 			strcat(needle->comment, comment);
2333 			}
2334 
2335 		if (buflen - (foundat - buf) >= needle->max_len)
2336 			bytes_to_search = needle->max_len;
2337 		else
2338 			bytes_to_search = buflen - (foundat - buf);
2339 
2340 		//printf("bytes_to_search:=%d needle->footer_len:=%d needle->header_len:=%d\n",bytes_to_search,needle->footer_len,needle->header_len);
2341 		foundat = bm_search(needle->footer,
2342 							needle->footer_len,
2343 							foundat,
2344 							bytes_to_search,
2345 							needle->footer_bm_table,
2346 							needle->case_sen,
2347 							SEARCHTYPE_FORWARD);
2348 		if (foundat == NULL)
2349 			{
2350 			tot_file_size = bytes_to_search;
2351 			foundat = buf + tot_file_size;
2352 			}
2353 		}
2354 	else
2355 		{
2356 
2357 		/*Loop through files*/
2358 		while (foundat[2] == '\x74')
2359 			{
2360 
2361 			headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2362 			filesize = htoi(&foundat[7], FOREMOST_LITTLE_ENDIAN);
2363 			ufilesize = htoi(&foundat[11], FOREMOST_LITTLE_ENDIAN);
2364 
2365 			if (headersize < 1 || headersize > buflen)
2366 				flag = 1;
2367 			if (filesize < 0 || filesize > buflen)
2368 				flag = 1;
2369 			if ((headersize + filesize) > buflen)
2370 				flag = 1;
2371 			if (ufilesize < 0)
2372 				flag = 1;
2373 
2374 			flags = htos(&foundat[3], FOREMOST_LITTLE_ENDIAN);
2375 			if ((flags & 0x04) != 0)
2376 				{
2377 				passwd = 1;
2378 				}
2379 
2380 			tot_file_size = (foundat - buf);
2381 			if ((tot_file_size + headersize + filesize) > buflen)
2382 				{
2383 				break;
2384 				}
2385 
2386 			foundat += headersize + filesize;
2387 			}
2388 
2389 		if (passwd == 1)
2390 			{
2391 			sprintf(comment, "Password Protected:");
2392 			strcat(needle->comment, comment);
2393 			}
2394 
2395 		if (flag == 1)
2396 			{
2397 			sprintf(comment, "Encrypted Headers!");
2398 			strcat(needle->comment, comment);
2399 			foundat = bm_search(needle->footer,
2400 								needle->footer_len,
2401 								foundat,
2402 								bytes_to_search,
2403 								needle->footer_bm_table,
2404 								needle->case_sen,
2405 								SEARCHTYPE_FORWARD);
2406 			if (foundat == NULL)
2407 				{
2408 				tot_file_size = bytes_to_search;
2409 				foundat = buf + tot_file_size;
2410 				}
2411 			}
2412 
2413 		if (foundat[2] != '\x7B' && tot_file_size == 0)
2414 			{
2415 
2416 			//printf("Error 7B!!!! %x\n",foundat[2]);
2417 			return foundat;
2418 			}
2419 
2420 		foundat += 7;
2421 
2422 		}
2423 
2424 	if (foundat)
2425 		{
2426 
2427 		/*We found the EOF, write the file to disk and return*/
2428 		tot_file_size = (foundat - buf);
2429 		if (tot_file_size > buflen)
2430 			file_size = buflen;
2431 
2432 		extractbuf = buf;
2433 		write_to_disk(s, needle, tot_file_size, extractbuf, c_offset + f_offset);
2434 		return foundat;
2435 		}
2436 	else
2437 		{
2438 		return NULL;
2439 		}
2440 
2441 	return NULL;
2442 }
2443 
extract_file(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2444 unsigned char *extract_file(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2445 							s_spec *needle, u_int64_t f_offset)
2446 {
2447 	if (needle->type == JPEG)
2448 		{
2449 		return extract_jpeg(s, c_offset, foundat, buflen, needle, f_offset);
2450 		}
2451 	else if (needle->type == GIF)
2452 		{
2453 		return extract_gif(s, c_offset, foundat, buflen, needle, f_offset);
2454 		}
2455 	else if (needle->type == PNG)
2456 		{
2457 		return extract_png(s, c_offset, foundat, buflen, needle, f_offset);
2458 		}
2459 	else if (needle->type == BMP)
2460 		{
2461 		return extract_bmp(s, c_offset, foundat, buflen, needle, f_offset);
2462 		}
2463 	else if (needle->type == RIFF)
2464 		{
2465 		needle->suffix = "rif";
2466 		return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "all");
2467 		}
2468 	else if (needle->type == AVI)
2469 		{
2470 		return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "avi");
2471 		}
2472 	else if (needle->type == WAV)
2473 		{
2474 		needle->suffix = "rif";
2475 		return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "wav");
2476 		}
2477 	else if (needle->type == WMV)
2478 		{
2479 		return extract_wmv(s, c_offset, foundat, buflen, needle, f_offset);
2480 		}
2481 	else if (needle->type == OLE)
2482 		{
2483 		needle->suffix = "ole";
2484 		return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "all");
2485 		}
2486 	else if (needle->type == DOC)
2487 		{
2488 		return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "doc");
2489 		}
2490 	else if (needle->type == PPT)
2491 		{
2492 		return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "ppt");
2493 		}
2494 	else if (needle->type == XLS)
2495 		{
2496 		needle->suffix = "ole";
2497 		return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "xls");
2498 		}
2499 	else if (needle->type == PDF)
2500 		{
2501 		return extract_pdf(s, c_offset, foundat, buflen, needle, f_offset);
2502 		}
2503 	else if (needle->type == CPP)
2504 		{
2505 		return extract_cpp(s, c_offset, foundat, buflen, needle, f_offset);
2506 		}
2507 	else if (needle->type == HTM)
2508 		{
2509 		return extract_htm(s, c_offset, foundat, buflen, needle, f_offset);
2510 		}
2511 	else if (needle->type == MPG)
2512 		{
2513 		return extract_mpg(s, c_offset, foundat, buflen, needle, f_offset);
2514 		}
2515 	else if (needle->type == MP4)
2516 		{
2517 		return extract_mp4(s, c_offset, foundat, buflen, needle, f_offset);
2518 		}
2519 	else if (needle->type == ZIP)
2520 		{
2521 		return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "all");
2522 		}
2523 	else if (needle->type == RAR)
2524 		{
2525 		return extract_rar(s, c_offset, foundat, buflen, needle, f_offset);
2526 		}
2527 	else if (needle->type == SXW)
2528 		{
2529 		return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxw");
2530 		}
2531 	else if (needle->type == SXC)
2532 		{
2533 		return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxc");
2534 		}
2535 	else if (needle->type == SXI)
2536 		{
2537 		return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxi");
2538 		}
2539 	else if (needle->type == EXE)
2540 		{
2541 		return extract_exe(s, c_offset, foundat, buflen, needle, f_offset);
2542 		}
2543 	else if (needle->type == MOV || needle->type == VJPEG)
2544 		{
2545 		return extract_mov(s, c_offset, foundat, buflen, needle, f_offset);
2546 		}
2547 	else if (needle->type == CONF)
2548 		{
2549 		return extract_generic(s, c_offset, foundat, buflen, needle, f_offset);
2550 		}
2551 	else
2552 		{
2553 		return NULL;
2554 		}
2555 	return NULL;
2556 }
2557