1
2 /* extract.c
3 * Copyright (c) 2005, Nick Mikus
4 * This file contains the file specific functions used to extract
5 * data from an image.
6 *
7 * Each has a similar structure
8 * f_state *s: state of the program.
9 * c_offset: offset that the header was recorded within the current chunk
10 * foundat: The location the header was "foundat"
11 * buflen: How much buffer is left until the end of the current chunk
12 * needle: Search specification
13 * f_offset: Offset that the current chunk is located within the file
14 */
15
16 #include "main.h"
17 #include "extract.h"
18 #include "ole.h"
19 extern unsigned char buffer[OUR_BLK_SIZE];
20 extern int verbose;
21 extern int dir_count;
22 extern int block_list[OUR_BLK_SIZE / sizeof(int)];
23 extern int *FAT;
24 extern char *extract_name;
25 extern int extract;
26 extern int FATblk;
27 extern int highblk;
28
29 /********************************************************************************
30 *Function: extract_zip
31 *Description: Given that we have a ZIP header jump through the file headers
32 until we reach the EOF.
33 *Return: A pointer to where the EOF of the ZIP is in the current buffer
34 **********************************************************************************/
extract_zip(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)35 unsigned char *extract_zip(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
36 s_spec *needle, u_int64_t f_offset, char *type)
37 {
38 unsigned char *currentpos = NULL;
39 unsigned char *buf = foundat;
40 unsigned short comment_length = 0;
41 unsigned char *extractbuf = NULL;
42 struct zipLocalFileHeader localFH;
43 u_int64_t bytes_to_search = 50 * KILOBYTE;
44 u_int64_t file_size = 0;
45 int oOffice = FALSE;
46 int office2007 = FALSE;
47
48 char comment[32];
49 localFH.genFlag=0;
50 localFH.compressed=0;
51 localFH.uncompressed =0;
52 if (buflen < 100)
53 return NULL;
54
55 if (strncmp((char *) &foundat[30], "mimetypeapplication/vnd.sun.xml.", 32) == 0)
56 {
57 oOffice = TRUE;
58 if (strncmp((char *) &foundat[62], "calc", 4) == 0)
59 {
60 needle->suffix = "sxc";
61 }
62 else if (strncmp((char *) &foundat[62], "impress", 7) == 0)
63 {
64 needle->suffix = "sxi";
65 }
66 else if (strncmp((char *) &foundat[62], "writer", 6) == 0)
67 {
68 needle->suffix = "sxw";
69 }
70 else
71 {
72 sprintf(comment, " (OpenOffice Doc?)");
73 strcat(needle->comment, comment);
74 needle->suffix = "sx";
75 }
76 }
77 else
78 {
79 needle->suffix = "zip";
80 }
81
82
83 while (1) //Jump through each local file header until the central directory structure is reached, much faster than searching
84 {
85
86 if (foundat[2] == '\x03' && foundat[3] == '\x04') //Verfiy we are looking at a local file header//
87 {
88
89 localFH.compression=htos(&foundat[8], FOREMOST_LITTLE_ENDIAN);
90 localFH.compressed = htoi(&foundat[18], FOREMOST_LITTLE_ENDIAN);
91 localFH.uncompressed = htoi(&foundat[22], FOREMOST_LITTLE_ENDIAN);
92 localFH.filename_length = htos(&foundat[26], FOREMOST_LITTLE_ENDIAN);
93 localFH.extra_length = htos(&foundat[28], FOREMOST_LITTLE_ENDIAN);;
94 localFH.genFlag = htos(&foundat[6], FOREMOST_LITTLE_ENDIAN);
95
96 // Sanity checking
97 if (localFH.compressed > needle->max_len)
98 return foundat + needle->header_len;
99
100 if (localFH.filename_length > 100)
101 return foundat + needle->header_len;
102
103 //Check if we should grab more from the disk
104 if (localFH.compressed + 30 > buflen - (foundat - buf))
105 {
106 return NULL;
107 }
108
109 //Size of the local file header data structure
110 foundat += 30;
111
112 if (strcmp(needle->suffix,"zip")==0)
113 {
114 if (strncmp((char *)foundat, "content.xml", 11) == 0 && strcmp(needle->suffix,"zip")==0)
115 {
116 oOffice = TRUE;
117 sprintf(comment, " (OpenOffice Doc?)");
118 strcat(needle->comment, comment);
119 needle->suffix = "sx";
120 }
121 else if (strstr((char *)foundat, ".class") || strstr((char *)foundat, ".jar") ||
122 strstr((char *)foundat, ".java"))
123 {
124 needle->suffix = "jar";
125 }
126 else if(strncmp((char *)foundat, "[Content_Types].xml",19)==0)
127 {
128 office2007=TRUE;
129 }
130 else if(strncmp((char *)foundat, "ppt/slides",10)==0 && office2007==TRUE)
131 {
132 needle->suffix = "pptx";
133 }
134 else if(strncmp((char *)foundat, "word/document.xml",17)==0 && office2007==TRUE)
135 {
136 needle->suffix = "docx";
137 }
138 else if(strncmp((char *)foundat, "xl/workbook.xml",15)==0 && office2007==TRUE)
139 {
140 needle->suffix = "xlsx";
141 }
142
143
144 else
145 {
146 printf("foundat=%s\n",foundat);
147 }
148 }
149
150 foundat += localFH.compressed;
151 foundat += localFH.filename_length;
152 foundat += localFH.extra_length;
153
154 if (localFH.genFlag == 8)
155 {
156 #ifdef DEBUG
157 fprintf(stderr,"We have extra stuff!!!");
158 #endif
159 }
160
161
162 if(localFH.genFlag & 1<<3 && localFH.uncompressed==0 && localFH.compressed==0 )
163 {
164 #ifdef DEBUG
165 fprintf(stderr,"No data to jmp Just search for the next file Footer (localFH.genFlag:=%d)\n",localFH.genFlag);
166 #endif
167 break;
168 }
169
170 #ifdef DEBUG
171 printf("localFH.compressed:=%d localFH.uncompressed:=%d\n\t jumping %d bytes filename=%d bytes",
172 localFH.compressed,
173 localFH.uncompressed,localFH.filename_length+localFH.compressed+localFH.extra_length,localFH.filename_length);
174 printx(foundat, 0, 16);
175 #endif
176
177 }
178 else if (oOffice && localFH.genFlag == 8)
179 {
180 break;
181 }
182 else
183 {
184 break;
185 }
186
187
188 }//end while loop
189
190 if (oOffice)
191 {
192
193 //We have an OO doc how long should we search for?
194 bytes_to_search = 1 * MEGABYTE;
195 }
196 else if (localFH.genFlag & 1<<3 && localFH.uncompressed==0 && localFH.compressed==0 )
197 {
198 bytes_to_search = needle->max_len;
199 }
200 else
201 {
202 bytes_to_search = (buflen < (foundat - buf) ? buflen : buflen - (foundat - buf));
203 }
204
205 //Make sure we are not searching more than what he have
206 if (buflen <= (foundat - buf)) {
207 #ifdef DEBUG
208 printf("avoided bug in extract_zip!\n");
209 #endif
210 bytes_to_search = 0;
211 } else {
212 if (buflen - (foundat - buf) < bytes_to_search)
213 {
214 bytes_to_search = buflen - (foundat - buf);
215 }
216 }
217
218
219 currentpos = foundat;
220 #ifdef DEBUG
221 printf("Search for the footer bytes_to_search:=%lld buflen:=%lld\n", bytes_to_search, buflen);
222 #endif
223
224 foundat = bm_search(needle->footer,
225 needle->footer_len,
226 foundat,
227 bytes_to_search,
228 needle->footer_bm_table,
229 needle->case_sen,
230 SEARCHTYPE_FORWARD);
231 #ifdef DEBUG
232 printf("Search complete \n");
233 #endif
234
235 if (foundat) /*Found the end of the central directory structure, determine the exact length and extract*/
236 {
237
238 /*Jump to the comment length field*/
239 #ifdef DEBUG
240 printf("distance searched:=%lu\n", foundat - currentpos);
241 #endif
242 if (buflen - (foundat - buf) > 20)
243 {
244 foundat += 20;
245 }
246 else
247 {
248 return NULL;
249 }
250
251 comment_length = htos(foundat, FOREMOST_LITTLE_ENDIAN);
252 foundat += comment_length + 2;
253 file_size = (foundat - buf);
254 #ifdef DEBUG
255 printf("File size %lld\n", file_size);
256 printf("Found a %s type:=%s\n", needle->suffix, type);
257 #endif
258 extractbuf = buf;
259 if (strcmp(type,"all")==0 || strcmp(type,needle->suffix)==0)
260 {
261 #ifdef DEBUG
262 printf("Writing a %s to disk\n", needle->suffix);
263 #endif
264 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
265 }
266
267 #ifdef DEBUG
268 printf("Found a %s\n", needle->suffix);
269 #endif
270 return foundat-2;
271 }
272
273 if (bytes_to_search > buflen - (currentpos - buf))
274 return NULL;
275
276 #ifdef DEBUG
277 printf("I give up \n");
278 #endif
279 return currentpos;
280 }
281
282 /********************************************************************************
283 *Function: extract_pdf
284 *Description: Given that we have a PDF header check if it is Linearized, if so
285 grab the file size and we are done, else search for the %%EOF
286 *Return: A pointer to where the EOF of the PDF is in the current buffer
287 **********************************************************************************/
extract_pdf(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)288 unsigned char *extract_pdf(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
289 s_spec *needle, u_int64_t f_offset)
290 {
291 unsigned char *currentpos = NULL;
292 unsigned char *buf = foundat;
293 unsigned char *extractbuf = NULL;
294 unsigned char *tempsize;
295 unsigned long int size = 0;
296 int file_size = 0;
297 unsigned char *header = foundat;
298 int bytes_to_search = 0;
299 char comment[32];
300
301 foundat += needle->header_len; /* Jump Past the %PDF HEADER */
302 currentpos = foundat;
303
304 #ifdef DEBUG
305 printf("PDF SEARCH\n");
306 #endif
307
308 /*Determine when we have searched enough*/
309 if (buflen >= needle->max_len)
310 {
311 bytes_to_search = needle->max_len;
312 }
313 else
314 {
315 bytes_to_search = buflen;
316 }
317
318 /*Check if the buffer is less than 100 bytes, if so search what we have*/
319 if (buflen < 512)
320 return NULL;
321 else
322 {
323 currentpos = foundat;
324
325 /*Check for .obj in the first 100 bytes*/
326 foundat = bm_search(needle->markerlist[1].value,
327 needle->markerlist[1].len,
328 foundat,
329 100,
330 needle->markerlist[1].marker_bm_table,
331 needle->case_sen,
332 SEARCHTYPE_FORWARD);
333
334 if (!foundat)
335 {
336 #ifdef DEBUG
337 printf("no obj found\n");
338 #endif
339 return currentpos + 100;
340 }
341
342 foundat = currentpos;
343
344 /*Search for "./L " to see if the file is linearized*/
345 foundat = bm_search(needle->markerlist[2].value,
346 needle->markerlist[2].len,
347 foundat,
348 512,
349 needle->markerlist[2].marker_bm_table,
350 needle->case_sen,
351 SEARCHTYPE_FORWARD);
352
353 if (foundat)
354 {
355 foundat = bm_search(needle->markerlist[0].value,
356 needle->markerlist[0].len,
357 foundat,
358 512,
359 needle->markerlist[0].marker_bm_table,
360 needle->case_sen,
361 SEARCHTYPE_FORWARD);
362 }
363 else
364 {
365 #ifdef DEBUG
366 printf("not linearized\n");
367 #endif
368 }
369 }
370
371 if (foundat) /*The PDF is linearized extract the size and we are done*/
372 {
373 sprintf(comment, " (PDF is Linearized)");
374 strcat(needle->comment, comment);
375
376 foundat += needle->markerlist[0].len;
377 tempsize = (unsigned char *)malloc(8 * sizeof(char));
378 tempsize = memcpy(tempsize, foundat, 8);
379 size = atoi((char *)tempsize);
380
381 free(tempsize);
382 if (size <= 0)
383 return foundat;
384 if (size > buflen)
385 {
386 if (size > needle->max_len)
387 return foundat;
388 else
389 return NULL;
390 }
391
392 header += size;
393 foundat = header;
394 foundat -= needle->footer_len;
395
396 /*Jump back 10 bytes and see if we actually have and EOF there*/
397 foundat -= 10;
398 currentpos = foundat;
399 foundat = bm_search(needle->footer,
400 needle->footer_len,
401 foundat,
402 needle->footer_len + 9,
403 needle->footer_bm_table,
404 needle->case_sen,
405 SEARCHTYPE_FORWARD);
406 if (foundat) /*There is an valid EOF at the end, Write to disk*/
407 {
408 foundat += needle->footer_len + 1;
409 file_size = (foundat - buf);
410
411 extractbuf = buf;
412 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
413
414 return foundat;
415 }
416
417 return NULL;
418
419 }
420 else /*Search for Linearized PDF failed, just look for %%EOF */
421 {
422 #ifdef DEBUG
423 printf(" Linearized search failed, searching %d bytes, buflen:=%lld\n",
424 bytes_to_search,
425 buflen - (header - buf));
426 #endif
427 foundat = currentpos;
428 foundat = bm_search(needle->footer,
429 needle->footer_len,
430 foundat,
431 bytes_to_search,
432 needle->footer_bm_table,
433 needle->case_sen,
434 SEARCHTYPE_FORWARD);
435
436 if (foundat) /*Write the non-linearized PDF to disk*/
437 {
438 foundat += needle->footer_len + 1;
439 file_size = (foundat - buf);
440 extractbuf = buf;
441
442 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
443
444 return foundat;
445
446 }
447
448 return NULL;
449 }
450
451 }
452
453 /********************************************************************************
454 *Function: extract_cpp
455 *Description: Use keywords to attempt to find C/C++ source code
456 *Return: A pointer to where the EOF of the CPP file is in the current buffer
457 **********************************************************************************/
extract_cpp(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)458 unsigned char *extract_cpp(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
459 s_spec *needle, u_int64_t f_offset)
460 {
461
462 unsigned char *header = foundat;
463 unsigned char *buf = foundat;
464 unsigned char *extractbuf = NULL;
465 int end = 0;
466 int start = 0;
467 int i = 0;
468 int marker_score = 0;
469 int ok = FALSE;
470 int file_size = 0;
471 unsigned char *footer = NULL;
472
473 /*Search for a " or a < within 20 bytes of a #include statement*/
474 for (i = 0; i < 20; i++)
475 {
476 if (foundat[i] == '\x22' || foundat[i] == '\x3C')
477 {
478 ok = TRUE;
479 }
480 }
481
482 if (!ok)
483 return foundat + needle->header_len;
484
485 /*Keep running through the buffer until an non printable character is reached*/
486 while (isprint(foundat[end]) || foundat[end] == '\x0a' || foundat[end] == '\x09')
487 {
488 end++;
489 }
490
491 foundat += end - 1;
492 footer = foundat;
493
494 if (end < 50)
495 return foundat;
496
497 /*Now lets go the other way and grab all those comments at the begining of the file*/
498 while (isprint(buf[start]) || buf[start] == '\x0a' || buf[start] == '\x09')
499 {
500 start--;
501 }
502
503 header = &buf[start + 1];
504 file_size = (footer - header);
505
506 foundat = header;
507
508 /*Now we have an ascii file to look for keywords in*/
509 foundat = bm_search(needle->footer,
510 needle->footer_len,
511 header,
512 file_size,
513 needle->footer_bm_table,
514 FALSE,
515 SEARCHTYPE_FORWARD);
516 if (foundat)
517 marker_score += 1;
518
519 foundat = header;
520 foundat = bm_search(needle->markerlist[0].value,
521 needle->markerlist[0].len,
522 header,
523 file_size,
524 needle->markerlist[0].marker_bm_table,
525 1,
526 SEARCHTYPE_FORWARD);
527 if (foundat)
528 marker_score += 1;
529
530 if (marker_score == 0)
531 return foundat;
532
533 if (foundat)
534 {
535 extractbuf = buf;
536 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset + start + 1);
537
538 return footer;
539
540 }
541
542 return NULL;
543 }
544
545 /********************************************************************************
546 *Function: extract_htm
547 *Description: Given that we have a HTM header
548 search for the file EOF and check that the bytes areound the header are ascii
549 *Return: A pointer to where the EOF of the HTM is in the current buffer
550 **********************************************************************************/
extract_htm(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)551 unsigned char *extract_htm(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
552 s_spec *needle, u_int64_t f_offset)
553 {
554 unsigned char *buf = foundat;
555 unsigned char *extractbuf = NULL;
556 unsigned char *currentpos = NULL;
557
558 int bytes_to_search = 0;
559 int i = 0;
560 int file_size = 0;
561
562 /*Jump past the <HTML tag*/
563 foundat += needle->header_len;
564
565 /*Check the first 16 bytes to see if they are ASCII*/
566 for (i = 0; i < 16; i++)
567 {
568 if (!isprint(foundat[i]) && foundat[i] != '\x0a' && foundat[i] != '\x09')
569 {
570 return foundat + 16;
571 }
572 }
573
574 /*Determine if the buffer is large enough to encompass a reasonable search*/
575 if (buflen < needle->max_len)
576 {
577 bytes_to_search = buflen - (foundat - buf);
578 }
579 else
580 {
581 bytes_to_search = needle->max_len;
582 }
583
584 /*Store the current position and search for the HTML> tag*/
585 currentpos = foundat;
586 foundat = bm_search(needle->footer,
587 needle->footer_len,
588 foundat,
589 bytes_to_search,
590 needle->footer_bm_table,
591 needle->case_sen,
592 SEARCHTYPE_FORWARD);
593 if (foundat) //Found the footer, write to disk
594 {
595 file_size = (foundat - buf) + needle->footer_len;
596 extractbuf = buf;
597 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
598 foundat += needle->footer_len;
599 return foundat;
600
601 }
602 else
603 {
604 return NULL;
605 }
606
607 }
608
609 /********************************************************************************
610 *Function: validOLEheader
611 *Description: run various tests aginst an OLE-HEADER to determine whether or not
612 it is valid.
613 *Return: TRUE/FALSE
614 **********************************************************************************/
valid_ole_header(struct OLE_HDR * h)615 int valid_ole_header(struct OLE_HDR *h)
616 {
617
618 if (htos((unsigned char *) &h->reserved, FOREMOST_LITTLE_ENDIAN) != 0 ||
619 htoi((unsigned char *) &h->reserved1, FOREMOST_LITTLE_ENDIAN) != 0 ||
620 htoi((unsigned char *) &h->reserved2, FOREMOST_LITTLE_ENDIAN) != 0)
621 {
622 return FALSE;
623 }
624
625 /*The minimum sector shift is usually 2^6(64) and the uSectorShift is 2^9(512))*/
626 if (htos((unsigned char *) &h->uMiniSectorShift, FOREMOST_LITTLE_ENDIAN) != 6 ||
627 htos((unsigned char *) &h->uSectorShift, FOREMOST_LITTLE_ENDIAN) != 9 ||
628 htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN) < 0)
629 {
630 return FALSE;
631 }
632
633 /*Sanity Checking*/
634 if (htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN) <= 0 ||
635 htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN) > 100)
636 {
637 return FALSE;
638 }
639
640 if (htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN) < 0 ||
641 htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN) > 100)
642 {
643 return FALSE;
644 }
645
646 return TRUE;
647
648 }
649
650 /********************************************************************************
651 *Function:checkOleName
652 *Description: Determine what type of file is stored in the OLE format based on the
653 names of DIRENT in the FAT table.
654 *Return: A char* consisting of the suffix of the appropriate file.
655 **********************************************************************************/
check_ole_name(char * name)656 char *check_ole_name(char *name)
657 {
658 if (strstr(name, "WordDocument"))
659 {
660 return "doc";
661 }
662 else if (strstr(name, "Worksheet") || strstr(name, "Book") || strstr(name, "Workbook"))
663 {
664 return "xls";
665 }
666 else if (strstr(name, "Power"))
667 {
668 return "ppt";
669 }
670 else if (strstr(name, "Access") || strstr(name, "AccessObjSiteData"))
671 {
672 return "mbd";
673 }
674 else if (strstr(name, "Visio"))
675 {
676 return "vis";
677 }
678 else if (strstr(name, "Sfx"))
679 {
680 return "sdw";
681 }
682 else
683 {
684 return NULL;
685 }
686
687 return NULL;
688
689 }
690
adjust_bs(int size,int bs)691 int adjust_bs(int size, int bs)
692 {
693 int rem = (size % bs);
694
695 if (rem == 0)
696 {
697
698 return size;
699 }
700
701 #ifdef DEBUG
702 printf("\tnew size:=%d\n", size + (bs - rem));
703 #endif
704 return (size + (bs - rem));
705
706 }
707
708 /********************************************************************************
709 *Function: extract_ole
710 *Description: Given that we have a OLE header, jump through the OLE structure and
711 determine what type of file it is.
712 *Return: A pointer to where the EOF of the OLE is in the current buffer
713 **********************************************************************************/
extract_ole(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)714 unsigned char *extract_ole(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
715 s_spec *needle, u_int64_t f_offset, char *type)
716 {
717 unsigned char *buf = foundat;
718 unsigned char *extractbuf = NULL;
719 char *temp = NULL;
720 char *suffix = "ole";
721 int totalsize = 0;
722 int extrasize = 0;
723 int oldblk = 0;
724 int i, j;
725 int size = 0;
726 int blknum = 0;
727 int validblk = 512;
728 int file_size = 0;
729 int num_extra_FAT_blocks = 0;
730 unsigned char *htoi_c = NULL;
731 int extra_dir_blocks = 0;
732 int num_FAT_blocks = 0;
733 int next_FAT_block = 0;
734 unsigned char *p;
735 int fib = 1024;
736 struct OLE_HDR *h = NULL;
737
738 int result = 0;
739 int highblock = 0;
740 unsigned long miniSectorCutoff = 0;
741 unsigned long csectMiniFat = 0;
742
743 /*Deal with globals defined in the OLE API, ugly*/
744 if (dirlist != NULL)
745 free(dirlist);
746 if (FAT != NULL)
747 free(FAT);
748 init_ole();
749
750 if (buflen < validblk)
751 validblk = buflen;
752 h = (struct OLE_HDR *)foundat; /*cast the header block to point at foundat*/
753 #ifdef DEBUG
754 dump_header(h);
755 #endif
756 num_FAT_blocks = htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN);
757
758 if (!valid_ole_header(h))
759 return (buf + validblk);
760
761 miniSectorCutoff = htoi((unsigned char *) &h->miniSectorCutoff, FOREMOST_LITTLE_ENDIAN);
762 csectMiniFat = htoi((unsigned char *) &h->csectMiniFat, FOREMOST_LITTLE_ENDIAN);
763 next_FAT_block = htoi((unsigned char *) &h->FAT_next_block, FOREMOST_LITTLE_ENDIAN);
764 num_extra_FAT_blocks = htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN);
765
766 FAT = (int *)Malloc(OUR_BLK_SIZE * (num_FAT_blocks + 1));
767 p = (unsigned char *)FAT;
768 memcpy(p, &h[1], OUR_BLK_SIZE - FAT_START);
769 if (next_FAT_block > 0)
770 {
771 p += (OUR_BLK_SIZE - FAT_START);
772 blknum = next_FAT_block;
773 for (i = 0; i < num_extra_FAT_blocks; i++)
774 {
775 if (!get_block(buf, blknum, p, buflen))
776 return buf + validblk;
777 validblk = (blknum + 1) * OUR_BLK_SIZE;
778 p += OUR_BLK_SIZE - sizeof(int);
779 blknum = htoi(p, FOREMOST_LITTLE_ENDIAN);
780 }
781 }
782
783 blknum = htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN);
784
785 if(blknum < 0)
786 {
787 return buf + 10;
788 }
789
790 highblock = htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN);
791 #ifdef DEBUG
792 printf("getting dir block\n");
793 #endif
794
795 //if(!get_dir_block (buf, blknum, buflen)) return buf+validblk;
796 if (!get_block(buf, blknum, buffer, buflen))
797 return buf + validblk; /*GET DIR BLOCK*/
798 #ifdef DEBUG
799 printf("done getting dir block\n");
800 #endif
801 validblk = (blknum + 1) * OUR_BLK_SIZE;
802 while (blknum != END_OF_CHAIN)
803 {
804 #ifdef DEBUG
805 printf("finding dir info extra_dir_blks:=%d\n", extra_dir_blocks);
806 #endif
807 if (extra_dir_blocks > 300)
808 return buf + validblk;
809
810 /**PROBLEMA**/
811 #ifdef DEBUG
812 printf("***blknum:=%d FATblk:=%d ourblksize=%d\n", blknum, FATblk,OUR_BLK_SIZE);
813 #endif
814 oldblk = blknum;
815 htoi_c = (unsigned char *) &FAT[blknum / (OUR_BLK_SIZE / sizeof(int))];
816
817 FATblk = htoi(htoi_c, FOREMOST_LITTLE_ENDIAN);
818 #ifdef DEBUG
819 printf("***blknum:=%d FATblk:=%d\n", blknum, FATblk);
820 #endif
821
822 if (!get_FAT_block(buf, blknum, block_list, buflen))
823 return buf + validblk;
824 blknum = htoi((unsigned char *) &block_list[blknum % 128], FOREMOST_LITTLE_ENDIAN);
825 #ifdef DEBUG
826 printf("**blknum:=%d FATblk:=%d\n", blknum, FATblk);
827 #endif
828 if (blknum == END_OF_CHAIN || oldblk == blknum)
829 {
830 #ifdef DEBUG
831 printf("EOC\n");
832 #endif
833 break;
834 }
835
836 extra_dir_blocks++;
837 result = get_dir_block(buf, blknum, buflen);
838 if (result == SHORT_BLOCK)
839 {
840 #ifdef DEBUG
841 printf("SHORT BLK\n");
842 #endif
843 break;
844 }
845 else if (!result)
846 return buf + validblk;
847
848 }
849
850 #ifdef DEBUG
851 printf("DONE WITH WHILE\n");
852 #endif
853 blknum = htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN);
854 size = OUR_BLK_SIZE * (extra_dir_blocks + 1);
855 dirlist = (struct DIRECTORY *)Malloc(size);
856 memset(dirlist, 0, size);
857
858 if (!get_block(buf, blknum, buffer, buflen))
859 return buf + validblk; /*GET DIR BLOCK*/
860
861 if (!get_dir_info(buffer))
862 {
863 return foundat + validblk;
864 }
865
866 for (i = 0; i < extra_dir_blocks; i++)
867 {
868 if (!get_FAT_block(buf, blknum, block_list, buflen))
869 return buf + validblk;
870 blknum = htoi((unsigned char *) &block_list[blknum % 128], FOREMOST_LITTLE_ENDIAN);
871 if (blknum == END_OF_CHAIN)
872 break;
873 #ifdef DEBUG
874 printf("getting dir blk blknum=%d\n", blknum);
875 #endif
876 if (!get_block(buf, blknum, buffer, buflen))
877 return buf + validblk; /*GET DIR BLOCK*/
878 if (!get_dir_info(buffer))
879 {
880 return buf + validblk;
881 }
882 }
883
884 #ifdef DEBUG
885 printf("dir count is %d\n", i);
886 #endif
887 for (dl = dirlist, i = 0; i < dir_count; i++, dl++)
888 {
889 memset(buffer, ' ', 75);
890 j = htoi((unsigned char *) &dl->level, FOREMOST_LITTLE_ENDIAN) * 4;
891 sprintf((char *) &buffer[j], "%-s", dl->name);
892 j = strlen((char *)buffer);
893
894 if (dl->name[0] == '@')
895 return foundat + validblk;
896 if (dl->type == STREAM)
897 {
898 buffer[j] = ' ';
899 sprintf((char *) &buffer[60], "%8d\n", dl->size);
900
901 if (temp == NULL) /*check if we have alread defined the type*/
902 {
903 temp = check_ole_name(dl->name);
904 if (temp)
905 suffix = temp;
906 }
907
908 if (dl->size > miniSectorCutoff)
909 {
910 totalsize += adjust_bs(dl->size, 512);
911 }
912 else
913 {
914 totalsize += adjust_bs(dl->size, 64);
915 }
916
917 #ifdef DEBUG
918 fprintf(stdout, buffer);
919 #endif
920 }
921 else
922 {
923 sprintf((char *) &buffer[j], "\n");
924 #ifdef DEBUG
925 printf("\tnot stream data \n");
926 fprintf(stdout, buffer);
927 #endif
928
929 extrasize += adjust_bs(dl->size, 512);
930
931 }
932 }
933
934 totalsize += fib;
935 #ifdef DEBUG
936 printf("DIR SIZE:=%d, numFATblks:=%d MiniFat:=%d\n",
937 adjust_bs(((dir_count) * 128), 512),
938 (num_FAT_blocks * 512),
939 adjust_bs((64 * csectMiniFat), 512));
940 #endif
941 totalsize += adjust_bs(((dir_count) * 128), 512);
942 totalsize += (num_FAT_blocks * 512);
943 totalsize += adjust_bs((64 * csectMiniFat), 512);
944 if ((highblk + 5) > highblock && highblk > 0)
945 {
946 highblock = highblk + 5;
947 }
948
949 highblock = highblock * 512;
950
951 #ifdef DEBUG
952 printf("\t highblock:=%d\n", highblock);
953 #endif
954 if (highblock > totalsize)
955 {
956 #ifdef DEBUG
957 printf(" Total size:=%d a difference of %lld\n", totalsize, buflen - totalsize);
958 printf(" Extra size:=%d \n", extrasize);
959 printf(" Highblock is greater than totalsize\n");
960 #endif
961 totalsize = highblock;
962 }
963
964 totalsize = adjust_bs(totalsize, 512);
965 #ifdef DEBUG
966 printf(" Total size:=%d a difference of %lld\n", totalsize, buflen - totalsize);
967 printf(" Extra size:=%d \n", extrasize);
968 #endif
969
970 if (buflen < totalsize)
971 {
972 #ifdef DEBUG
973 printf(" ***Error not enough left in the buffer left:=%lld needed=%d***\n",
974 buflen,
975 totalsize);
976 #endif
977 totalsize = buflen;
978 }
979
980 foundat = buf;
981 highblock -= 5 * 512;
982 if (highblock > 0 && highblock < buflen)
983 {
984 foundat += highblock;
985 }
986 else
987 {
988 foundat += totalsize;
989 }
990
991 /*Return to the highest blknum read in the file, that way we don't miss files that are close*/
992 file_size = totalsize;
993 extractbuf = buf;
994
995 if (suffix)
996 needle->suffix = suffix;
997
998 if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
999 {
1000 return foundat;
1001 }
1002
1003 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1004 return foundat;
1005
1006 }
1007
1008 //********************************************************************************/
check_mov(unsigned char * atom)1009 int check_mov(unsigned char *atom)
1010 {
1011 #ifdef DEBUG
1012 printf("Atom:= %c%c%c%c\n", atom[0], atom[1], atom[2], atom[3]);
1013 #endif
1014 if (strncmp((char *)atom, "free", 4) == 0 || strncmp((char *)atom, "mdat", 4) == 0 ||
1015 strncmp((char *)atom, "free", 4) == 0 || strncmp((char *)atom, "wide", 4) == 0 ||
1016 strncmp((char *)atom, "PICT", 4) == 0)
1017 {
1018 return TRUE;
1019 }
1020
1021 if (strncmp((char *)atom, "trak", 4) == 0 || strncmp((char *)atom, "mdat", 4) == 0 ||
1022 strncmp((char *)atom, "mp3", 3) == 0 || strncmp((char *)atom, "wide", 4) == 0 ||
1023 strncmp((char *)atom, "moov", 4) == 0)
1024 {
1025 return TRUE;
1026 }
1027
1028 return FALSE;
1029 }
1030
1031 /********************************************************************************
1032 *Function: extract_mov
1033 *Description: Given that we have a MOV header JUMP through the mov data structures
1034 until we reach EOF
1035 *Return: A pointer to where the EOF of the MOV is in the current buffer
1036 **********************************************************************************/
extract_mov(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1037 unsigned char *extract_mov(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1038 s_spec *needle, u_int64_t f_offset)
1039 {
1040 unsigned char *buf = foundat - 4;
1041 unsigned char *extractbuf = NULL;
1042 unsigned int atomsize = 0;
1043 unsigned int filesize = 0;
1044 int mdat = FALSE;
1045 foundat -= 4;
1046 buflen += 4;
1047 while (1) /*Loop through all the atoms until the EOF is reached*/
1048 {
1049 atomsize = htoi(foundat, FOREMOST_BIG_ENDIAN);
1050 #ifdef DEBUG
1051 printf("Atomsize:=%d\n", atomsize);
1052 #endif
1053 if (atomsize <= 0 || atomsize > needle->max_len)
1054 {
1055 return foundat + needle->header_len + 4;
1056 }
1057
1058 filesize += atomsize; /*Add the atomsize to the total file size*/
1059 if (filesize > buflen)
1060 {
1061 #ifdef DEBUG
1062 printf("file size > buflen fs:=%d bf:=%lld\n", filesize, buflen);
1063 #endif
1064 if (buflen >= needle->max_len)
1065 return foundat + needle->header_len + 4;
1066 else
1067 {
1068 return NULL;
1069 }
1070 }
1071
1072 foundat += atomsize;
1073 if (buflen - (foundat - buf) < 5)
1074 {
1075 if (mdat)
1076 {
1077 break;
1078 }
1079 else
1080 {
1081 #ifdef DEBUG
1082 printf("No mdat found");
1083 #endif
1084 return foundat;
1085 }
1086 }
1087
1088 /*Check if we have an mdat atom, these are required thus can be used to
1089 * Weed out corrupted file*/
1090 if (strncmp((char *)foundat + 4, "mdat", 4) == 0)
1091 {
1092 mdat = TRUE;
1093 }
1094
1095 if (check_mov(foundat + 4)) /*Check to see if we are at a valid header*/
1096 {
1097 #ifdef DEBUG
1098 printf("Checkmov succeeded\n");
1099 #endif
1100 }
1101 else
1102 {
1103 #ifdef DEBUG
1104 printf("Checkmov failed\n");
1105 #endif
1106 if (mdat)
1107 {
1108 break;
1109 }
1110 else
1111 {
1112 #ifdef DEBUG
1113 printf("No mdat found");
1114 #endif
1115 return foundat;
1116
1117 }
1118 }
1119 } //End loop
1120
1121 if (foundat)
1122 {
1123
1124 filesize = (foundat - buf);
1125 #ifdef DEBUG
1126 printf("file size:=%d\n", filesize);
1127 #endif
1128 extractbuf = buf;
1129 write_to_disk(s, needle, filesize, extractbuf, c_offset + f_offset - 4);
1130 return foundat;
1131 }
1132
1133 #ifdef DEBUG
1134 printf("NULL Atomsize:=%d\n", atomsize);
1135 #endif
1136 return NULL;
1137
1138 }
1139
1140 /********************************************************************************
1141 *Function: extract_wmv
1142 *Description: Given that we have a WMV header
1143 search for the file header and grab the file size.
1144 *Return: A pointer to where the EOF of the WMV is in the current buffer
1145 **********************************************************************************/
extract_wmv(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1146 unsigned char *extract_wmv(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1147 s_spec *needle, u_int64_t f_offset)
1148 {
1149
1150 unsigned char *currentpos = NULL;
1151 unsigned char *header = foundat;
1152 unsigned char *extractbuf = NULL;
1153 unsigned char *buf = foundat;
1154 unsigned int size = 0;
1155 u_int64_t file_size = 0;
1156 u_int64_t headerSize = 0;
1157 u_int64_t fileObjHeaderSize = 0;
1158 int numberofHeaderObjects = 0;
1159 int reserved[2];
1160 int bytes_to_search = 0;
1161
1162 /*If we have less than a WMV header bail out*/
1163 if (buflen < 70)
1164 return NULL;
1165
1166 foundat += 16; /*Jump to the header size*/
1167 headerSize = htoll(foundat, FOREMOST_LITTLE_ENDIAN);
1168 //printx(foundat,0,8);
1169 foundat += 8;
1170 numberofHeaderObjects = htoi(foundat, FOREMOST_LITTLE_ENDIAN);
1171 foundat += 4; //Jump to the begin File properties obj
1172 reserved[0] = foundat[0];
1173 reserved[1] = foundat[1];
1174 foundat += 2;
1175 //printf("found WMV\n");
1176 //end header obj
1177 //****************************************************/
1178 //Sanity Check
1179 //printf("WMV num_header_objs=%d headerSize=%llu\n",numberofHeaderObjects,headerSize);
1180
1181 if (headerSize <= 0 || numberofHeaderObjects <= 0 || reserved[0] != 1)
1182 {
1183 printf("WMV err num_header_objs=%d headerSize=%llu\n",numberofHeaderObjects,headerSize);
1184 return foundat;
1185 }
1186
1187 currentpos = foundat;
1188 if (buflen - (foundat - buf) >= needle->max_len)
1189 bytes_to_search = needle->max_len;
1190 else
1191 bytes_to_search = buflen - (foundat - buf);
1192
1193 /*Note we are not searching for the footer here, just the file header ID so we can get the file size*/
1194 foundat = bm_search(needle->footer,
1195 needle->footer_len,
1196 foundat,
1197 bytes_to_search,
1198 needle->footer_bm_table,
1199 needle->case_sen,
1200 SEARCHTYPE_FORWARD);
1201 if (foundat)
1202 {
1203 foundat += 16; /*jump to the headersize*/
1204 fileObjHeaderSize = htoll(foundat, FOREMOST_LITTLE_ENDIAN);
1205 //printx(foundat,0,8);
1206 foundat += 24; //Jump to the file size obj
1207 size = htoi(foundat, FOREMOST_LITTLE_ENDIAN);
1208 //printx(foundat,0,8);
1209
1210 #ifdef DEBUG
1211 printf("SIZE:=%u fileObjHeaderSize=%llu\n", size,fileObjHeaderSize);
1212 #endif
1213 }
1214 else
1215 {
1216 return NULL;
1217 }
1218
1219 /*Sanity check data*/
1220 if (size > 0 && size <= needle->max_len && size <= buflen)
1221 {
1222 header += size;
1223 #ifdef DEBUG
1224 printf(" Found a WMV at:=%lld,File size:=%lld\n", c_offset, size);
1225 printf(" Headersize:=%d, numberofHeaderObjects:= %d ,reserved:=%d,%d\n",
1226 headerSize,
1227 numberofHeaderObjects,
1228 reserved[0],
1229 reserved[1]);
1230 #endif
1231
1232 /*Everything seem ok, write to disk*/
1233 file_size = (header - buf);
1234 extractbuf = buf;
1235 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1236 foundat += file_size;
1237 return header;
1238 }
1239
1240 return NULL;
1241
1242 }
1243
1244 /********************************************************************************
1245 *Function: extract_riff
1246 *Description: Given that we have a RIFF header parse header and grab the file size.
1247 *Return: A pointer to where the EOF of the RIFF is in the current buffer
1248 **********************************************************************************/
extract_riff(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset,char * type)1249 unsigned char *extract_riff(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1250 s_spec *needle, u_int64_t f_offset, char *type)
1251 {
1252 unsigned char *buf = foundat;
1253 unsigned char *extractbuf = NULL;
1254 int size = 0;
1255 u_int64_t file_size = 0;
1256
1257 size = htoi(&foundat[4], FOREMOST_LITTLE_ENDIAN); /* Grab the total file size in little endian from offset 4*/
1258 if (strncmp((char *) &foundat[8], "AVI", 3) == 0) /*Sanity Check*/
1259 {
1260 if (strncmp((char *) &foundat[12], "LIST", 4) == 0) /*Sanity Check*/
1261 {
1262 if (size > 0 && size <= needle->max_len && size <= buflen)
1263 {
1264 #ifdef DEBUG
1265 printf("\n Found an AVI at:=%lld,File size:=%d\n", c_offset, size);
1266 #endif
1267 file_size = size;
1268 extractbuf = buf;
1269 needle->suffix = "avi";
1270 if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
1271 return foundat + size;
1272 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1273 foundat += size;
1274 return foundat;
1275 }
1276
1277 return buf + needle->header_len;
1278
1279 }
1280 else
1281 {
1282 return buf + needle->header_len;
1283 }
1284 }
1285 else if (strncmp((char *) &foundat[8], "WAVE", 4) == 0) /*Sanity Check*/
1286 {
1287 if (size > 0 && size <= needle->max_len && size <= buflen)
1288 {
1289 #ifdef DEBUG
1290 printf("\n Found a WAVE at:=%lld,File size:=%d\n", c_offset, size);
1291 #endif
1292
1293 file_size = size;
1294 extractbuf = buf;
1295 needle->suffix = "wav";
1296 if (!strstr(needle->suffix, type) && strcmp(type,"all")!=0)
1297 return foundat + size;
1298
1299 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1300 foundat += file_size;
1301 return foundat;
1302 }
1303
1304 return buf + needle->header_len;
1305
1306 }
1307 else
1308 {
1309 return buf + needle->header_len;
1310 }
1311
1312 return NULL;
1313
1314 }
1315
1316 /********************************************************************************
1317 *Function: extract_bmp
1318 *Description: Given that we have a BMP header parse header and grab the file size.
1319 *Return: A pointer to where the EOF of the BMP is in the current buffer
1320 **********************************************************************************/
extract_bmp(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1321 unsigned char *extract_bmp(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1322 s_spec *needle, u_int64_t f_offset)
1323 {
1324 unsigned char *buf = foundat;
1325 int size = 0;
1326 int headerlength = 0;
1327 int v_size = 0;
1328 int h_size = 0;
1329 unsigned char *extractbuf = NULL;
1330 u_int64_t file_size = 0;
1331 char comment[32];
1332 int dataOffset = 0;
1333 int dataSize = 0;
1334
1335 if (buflen < 100)
1336 return buf + needle->header_len;
1337
1338 /*JUMP the first to bytes of the header (BM)*/
1339 size = htoi(&foundat[2], FOREMOST_LITTLE_ENDIAN); /*Grab the total file size in little_endian*/
1340
1341 /*Sanity Check*/
1342 if (size <= 100 || size > needle->max_len)
1343 return buf + needle->header_len;
1344
1345 dataOffset = htoi(&foundat[10], FOREMOST_LITTLE_ENDIAN);
1346 dataSize = htoi(&foundat[34], FOREMOST_LITTLE_ENDIAN);
1347
1348 headerlength = htoi(&foundat[14], FOREMOST_LITTLE_ENDIAN);
1349
1350 if (dataSize + dataOffset != size)
1351 {
1352
1353 //printf("newtest != dataSize:=%d dataOffset:=%d\n",dataSize,dataOffset);
1354 }
1355
1356 //Header length
1357 if (headerlength > 1000 || headerlength <= 0)
1358 return buf + needle->header_len;
1359
1360 //foundat+=4;
1361 v_size = htoi(&foundat[22], FOREMOST_LITTLE_ENDIAN);
1362 h_size = htoi(&foundat[18], FOREMOST_LITTLE_ENDIAN);
1363
1364 //Vertical length
1365 if (v_size <= 0 || v_size > 2000 || h_size <= 0)
1366 return buf + needle->header_len;
1367
1368 #ifdef DEBUG
1369 printf("\n The size of the BMP is %d, Header length:=%d , Vertical Size:= %d, dataSize:=%d dataOffset:=%d\n",
1370 size,
1371 headerlength,
1372 v_size,
1373 dataSize,
1374 dataOffset);
1375 #endif
1376 if (size <= buflen)
1377 {
1378
1379 sprintf(comment, " (%d x %d)", h_size, v_size);
1380 strcat(needle->comment, comment);
1381
1382 file_size = size;
1383 extractbuf = buf;
1384
1385 write_to_disk(s, needle, file_size, extractbuf, (c_offset + f_offset));
1386 foundat += file_size;
1387 return foundat;
1388
1389 }
1390
1391 return NULL;
1392 }
1393
1394 /********************************************************************************
1395 *Function: extract_gif
1396 *Description: Given that we have a GIF header parse the given buffer to determine
1397 * where the file ends.
1398 *Return: A pointer to where the EOF of the GIF is in the current buffer
1399 **********************************************************************************/
extract_gif(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1400 unsigned char *extract_gif(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1401 s_spec *needle, u_int64_t f_offset)
1402 {
1403 unsigned char *buf = foundat;
1404 unsigned char *currentpos = foundat;
1405 unsigned char *extractbuf = NULL;
1406 int bytes_to_search = 0;
1407 unsigned short width = 0;
1408 unsigned short height = 0;
1409 u_int64_t file_size = 0;
1410 char comment[32];
1411 foundat += 4; /*Jump the first 4 bytes of the gif header (GIF8)*/
1412
1413 /*Check if the GIF is type 89a or 87a*/
1414 if (strncmp((char *)foundat, "9a", 2) == 0 || strncmp((char *)foundat, "7a", 2) == 0)
1415 {
1416 foundat += 2; /*Jump the length of the header*/
1417 width = htos(foundat, FOREMOST_LITTLE_ENDIAN);
1418 height = htos(&foundat[2], FOREMOST_LITTLE_ENDIAN);
1419
1420 sprintf(comment, " (%d x %d)", width, height);
1421 strcat(needle->comment, comment);
1422
1423 currentpos = foundat;
1424 if (buflen - (foundat - buf) >= needle->max_len)
1425 bytes_to_search = needle->max_len;
1426 else
1427 bytes_to_search = buflen - (foundat - buf);
1428 foundat = bm_search(needle->footer,
1429 needle->footer_len,
1430 foundat,
1431 bytes_to_search,
1432 needle->footer_bm_table,
1433 needle->case_sen,
1434 SEARCHTYPE_FORWARD);
1435 if (foundat)
1436 {
1437
1438 /*We found the EOF, write the file to disk and return*/
1439 #ifdef DEBUG
1440 printx(foundat, 0, 16);
1441 #endif
1442 file_size = (foundat - buf) + needle->footer_len;
1443 #ifdef DEBUG
1444 printf("The GIF file size is %llu c_offset:=%llu\n", file_size, c_offset);
1445 #endif
1446 extractbuf = buf;
1447 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1448 foundat += needle->footer_len;
1449 return foundat;
1450 }
1451
1452 return NULL;
1453
1454 }
1455 else /*Invalid GIF header return the current pointer*/
1456 {
1457 return foundat;
1458 }
1459
1460 }
1461
1462 /********************************************************************************
1463 *Function: extract_mpg
1464 * Not done yet
1465 **********************************************************************************/
extract_mpg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1466 unsigned char *extract_mpg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1467 s_spec *needle, u_int64_t f_offset)
1468 {
1469 unsigned char *buf = foundat;
1470 unsigned char *currentpos = NULL;
1471
1472 unsigned char *extractbuf = NULL;
1473 int bytes_to_search = 0;
1474 unsigned short size = 0;
1475 u_int64_t file_size = 0;
1476
1477 /*
1478 size=htos(&foundat[4],FOREMOST_BIG_ENDIAN);
1479 printf("size:=%d\n",size);
1480
1481 printx(foundat,0,16);
1482 foundat+=4;
1483 */
1484 int j = 0;
1485 if (foundat[15] == (unsigned char)'\xBB')
1486 {
1487 }
1488 else
1489 {
1490
1491 return buf + needle->header_len;
1492 }
1493
1494 if (buflen <= 2 * KILOBYTE)
1495 {
1496 bytes_to_search = buflen;
1497 }
1498 else
1499 {
1500 bytes_to_search = 2 * KILOBYTE;
1501 }
1502
1503 while (1)
1504 {
1505 j = 0;
1506 currentpos = foundat;
1507 #ifdef DEBUG
1508 printf("Searching for marker\n");
1509 #endif
1510 foundat = bm_search(needle->markerlist[0].value,
1511 needle->markerlist[0].len,
1512 foundat,
1513 bytes_to_search,
1514 needle->markerlist[0].marker_bm_table,
1515 needle->case_sen,
1516 SEARCHTYPE_FORWARD);
1517
1518 if (foundat)
1519 {
1520 #ifdef DEBUG
1521 printf("Found after searching %d\n", foundat - currentpos);
1522 #endif
1523 while (1)
1524 {
1525
1526 if (foundat[3] >= (unsigned char)'\xBB' && foundat[3] <= (unsigned char)'\xEF')
1527 {
1528 #ifdef DEBUG
1529 printf("jumping %d:\n", j);
1530 #endif
1531 size = htos(&foundat[4], FOREMOST_BIG_ENDIAN);
1532 #ifdef DEBUG
1533 printf("\t hit: ");
1534 printx(foundat, 0, 16);
1535 printf("size:=%d\n\tjump: ", size);
1536 #endif
1537 file_size += (foundat - buf) + size;
1538 if (size <= 0 || size > buflen - (foundat - buf))
1539 {
1540 #ifdef DEBUG
1541 printf("Not enough room in the buffer ");
1542 #endif
1543 if (size <= 50 * KILOBYTE && size > 0)
1544 {
1545
1546 /*We should probably search more*/
1547 if (file_size < needle->max_len)
1548 {
1549 return NULL;
1550 }
1551 else
1552 {
1553 break;
1554 }
1555 }
1556 else
1557 {
1558 return currentpos + needle->header_len;
1559 }
1560 }
1561
1562 foundat += size + 6;
1563 #ifdef DEBUG
1564 printx(foundat, 0, 16);
1565 #endif
1566 j++;
1567 }
1568 else
1569 {
1570
1571 break;
1572 }
1573 }
1574
1575 if (foundat[3] == (unsigned char)'\xB9')
1576 {
1577 break;
1578 }
1579 else if (foundat[3] != (unsigned char)'\xBA' && foundat[3] != (unsigned char)'\x00')
1580 {
1581
1582 /*This is the error state where this doesn't seem to be an mpg anymore*/
1583 size = htos(&foundat[4], FOREMOST_BIG_ENDIAN);
1584 #ifdef DEBUG
1585 printf("\t ***TEST: %x\n", foundat[3]);
1586 printx(foundat, 0, 16);
1587
1588 printf("size:=%d\n", size);
1589 #endif
1590 if ((currentpos - buf) >= 1 * MEGABYTE)
1591 {
1592 foundat = currentpos;
1593 break;
1594 }
1595
1596 return currentpos + needle->header_len;
1597
1598 }
1599 else if (foundat[3] == (unsigned char)'\xB3')
1600 {
1601 foundat += 3;
1602 }
1603 else
1604 {
1605 foundat += 3;
1606 }
1607 }
1608 else
1609 {
1610 if ((currentpos - buf) >= 1 * MEGABYTE)
1611 {
1612 foundat = currentpos;
1613 break;
1614 }
1615 else
1616 {
1617 #ifdef DEBUG
1618 printf("RETURNING BUF\n");
1619 #endif
1620 return buf + needle->header_len;
1621 }
1622 }
1623 }
1624
1625 if (foundat)
1626 {
1627 file_size = (foundat - buf) + needle->footer_len;
1628 if (file_size < 1 * KILOBYTE)
1629 return buf + needle->header_len;
1630 }
1631 else
1632 {
1633 return buf + needle->header_len;
1634 }
1635
1636 if (file_size > buflen)
1637 file_size = buflen;
1638 foundat = buf;
1639 #ifdef DEBUG
1640 printf("The file size is %llu c_offset:=%llu\n", file_size, c_offset);
1641 #endif
1642
1643 extractbuf = buf;
1644 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1645 foundat += file_size;
1646 return foundat;
1647 }
1648
1649
1650 /********************************************************************************
1651 *Function: extract_mp4
1652 * Not done yet
1653 **********************************************************************************/
extract_mp4(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1654 unsigned char *extract_mp4(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1655 s_spec *needle, u_int64_t f_offset)
1656 {
1657 unsigned char *buf = foundat;
1658
1659 unsigned char *extractbuf = NULL;
1660 unsigned int size = 0;
1661 u_int64_t file_size = 0;
1662
1663
1664 while(1)
1665 {
1666 size=htoi(&foundat[28],FOREMOST_BIG_ENDIAN);
1667 if(size ==0)
1668 {
1669 //printf("size ==0\n");
1670 foundat+=28;
1671 break;
1672 }
1673 //printf("size:=%d\n",size);
1674 if(size > 0 && size < buflen)
1675 {
1676 if(!isprint(foundat[32]) || !isprint(foundat[33]))
1677 {
1678 //printf("print err\n");
1679 break;
1680 //return foundat+8;
1681 }
1682 foundat+=size;
1683
1684 }
1685 else
1686 {
1687 if (size < needle->max_len)
1688 {
1689 //printf("Searching More\n");
1690 return NULL;
1691 }
1692 else
1693 {
1694 //printf("ERR\n");
1695 //return foundat+8;
1696 break;
1697 }
1698 }
1699
1700 //printx(foundat,0,32);
1701
1702 }
1703 if (foundat)
1704 {
1705 file_size = (foundat - buf) + needle->footer_len;
1706 if (file_size < 1 * KILOBYTE)
1707 return buf + needle->header_len;
1708 }
1709
1710
1711 if (file_size > buflen)
1712 file_size = buflen;
1713 foundat = buf;
1714
1715
1716 extractbuf = buf;
1717 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1718 foundat += file_size;
1719 return foundat;
1720 }
1721
1722
1723 /********************************************************************************
1724 *Function: extract_png
1725 *Description: Given that we have a PNG header parse the given buffer to determine
1726 * where the file ends.
1727 *Return: A pointer to where the EOF of the PNG is in the current buffer
1728 **********************************************************************************/
extract_png(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1729 unsigned char *extract_png(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1730 s_spec *needle, u_int64_t f_offset)
1731 {
1732 unsigned char *buf = foundat;
1733 unsigned char *currentpos = NULL;
1734
1735 unsigned char *extractbuf = NULL;
1736 int size = 0;
1737 int height = 0;
1738 int width = 0;
1739 u_int64_t file_size = 0;
1740 char comment[32];
1741
1742 if (buflen < 100)
1743 return NULL;
1744 foundat += 8;
1745 width = htoi(&foundat[8], FOREMOST_BIG_ENDIAN);
1746 height = htoi(&foundat[12], FOREMOST_BIG_ENDIAN);
1747
1748 if (width < 1 || height < 1)
1749 return foundat;
1750
1751 if (width > 3000 || height > 3000)
1752 return foundat;
1753
1754 sprintf(comment, " (%d x %d)", width, height);
1755 strcat(needle->comment, comment);
1756
1757 while (1) /* Jump through the headers until we reach the "data" part of the file*/
1758 {
1759 size = htoi(foundat, FOREMOST_BIG_ENDIAN);
1760 #ifdef DEBUG
1761 printx(foundat, 0, 16);
1762 printf("Size:=%d\n", size);
1763 #endif
1764
1765 currentpos = foundat;
1766 if (size <= 0 || size > buflen - (foundat - buf))
1767 {
1768 #ifdef DEBUG
1769 printf("buflen - (foundat-buf)=%lu\n", buflen - (foundat - buf));
1770 #endif
1771 return currentpos;
1772 }
1773
1774 /*12 is the length of the size, TYPE, and CRC field*/
1775 foundat += size + 12;
1776
1777 if (isprint(foundat[4]))
1778 {
1779 if (strncmp((char *) &foundat[4], "IEND", 4) == 0)
1780 {
1781 break;
1782 }
1783 }
1784 else
1785 {
1786 #ifdef DEBUG
1787 printx(foundat, 0, 16);
1788 printf("Not ascii returning\n");
1789 #endif
1790 return currentpos;
1791 }
1792
1793 }
1794
1795 if (foundat)
1796 {
1797 file_size = (foundat - buf) + htoi(foundat, FOREMOST_BIG_ENDIAN) + 12;
1798
1799 if (file_size > buflen)
1800 file_size = buflen;
1801 foundat = buf;
1802 #ifdef DEBUG
1803 printf("The file size is %llu c_offset:=%llu\n", file_size, c_offset);
1804 #endif
1805 extractbuf = buf;
1806 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1807 foundat += file_size;
1808 return foundat;
1809 }
1810
1811 return NULL;
1812 }
1813
1814 /********************************************************************************
1815 *Function: extract_jpeg
1816 *Description: Given that we have a JPEG header parse the given buffer to determine
1817 * where the file ends.
1818 *Return: A pointer to where the EOF of the JPEG is in the current buffer
1819 **********************************************************************************/
extract_jpeg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1820 unsigned char *extract_jpeg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
1821 s_spec *needle, u_int64_t f_offset)
1822 {
1823 unsigned char *buf = foundat;
1824 unsigned char *currentpos = NULL;
1825
1826 unsigned char *extractbuf = NULL;
1827 unsigned short headersize;
1828 int bytes_to_search = 0;
1829 int hasTable = FALSE;
1830 int hasHuffman = FALSE;
1831 u_int64_t file_size = 0;
1832
1833 // char comment[32];
1834
1835 /*Check if we have a valid header*/
1836 if (buflen < 128)
1837 {
1838 return NULL;
1839 }
1840
1841 if (foundat[3] == (unsigned char)'\xe0')
1842 {
1843
1844 //JFIF header
1845 //sprintf(comment," (JFIF)");
1846 //strcat(needle->comment,comment);
1847 }
1848 else if (foundat[3] == (unsigned char)'\xe1')
1849 {
1850
1851 //sprintf(comment," (EXIF)");
1852 //strcat(needle->comment,comment);
1853 }
1854 else
1855 return foundat + needle->header_len; //Invalid keep searching
1856 while (1) /* Jump through the headers until we reach the "data" part of the file*/
1857 {
1858 #ifdef DEBUG
1859 printx(foundat, 0, 16);
1860 #endif
1861 foundat += 2;
1862 headersize = htos(&foundat[2], FOREMOST_BIG_ENDIAN);
1863 #ifdef DEBUG
1864 printf("Headersize:=%d buflen:=%lld\n", headersize, buflen);
1865 #endif
1866
1867
1868 if (((foundat + headersize) - buf) > buflen){ return NULL; }
1869
1870 foundat += headersize;
1871
1872 if (foundat[2] != (unsigned char)'\xff')
1873 {
1874 break;
1875 }
1876
1877 /*Ignore 2 "0xff" side by side*/
1878 if (foundat[2] == (unsigned char)'\xff' && foundat[3] == (unsigned char)'\xff')
1879 {
1880 foundat++;
1881 }
1882
1883 if (foundat[3] == (unsigned char)'\xdb' || foundat[4] == (unsigned char)'\xdb')
1884 {
1885 hasTable = TRUE;
1886 }
1887 else if (foundat[3] == (unsigned char)'\xc4')
1888 {
1889 hasHuffman = TRUE;
1890 }
1891 }
1892
1893 /*All jpegs must contain a Huffman marker as well as a quantization table*/
1894 if (!hasTable || !hasHuffman)
1895 {
1896 #ifdef DEBUG
1897 printf("No Table or Huffman \n");
1898 #endif
1899 return buf + needle->header_len;
1900 }
1901
1902 currentpos = foundat;
1903
1904 //sprintf("Searching for footer\n");
1905 if (buflen < (foundat - buf)) {
1906 #ifdef DEBUG
1907 printf("avoided bug in extract_jpeg!\n");
1908 #endif
1909 bytes_to_search = 0;
1910 } else {
1911 if (buflen - (foundat - buf) >= needle->max_len)
1912 bytes_to_search = needle->max_len;
1913 else
1914 bytes_to_search = buflen - (foundat - buf);
1915 }
1916
1917 foundat = bm_search(needle->footer,
1918 needle->footer_len,
1919 foundat,
1920 bytes_to_search,
1921 needle->footer_bm_table,
1922 needle->case_sen,
1923 SEARCHTYPE_FORWARD);
1924
1925 if (foundat) /*Found found a valid JPEG*/
1926 {
1927
1928 /*We found the EOF, write the file to disk and return*/
1929 file_size = (foundat - buf) + needle->footer_len;
1930 #ifdef DEBUG
1931 printf("The jpeg file size is %llu c_offset:=%llu\n", file_size, c_offset);
1932 #endif
1933
1934 //extractbuf=(unsigned char*) malloc(file_size*sizeof(char));
1935 //memcpy(extractbuf,buf,file_size);
1936 extractbuf = buf;
1937 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
1938 foundat += needle->footer_len;
1939
1940 ////free(extractbuf);
1941 return foundat;
1942 }
1943 else
1944 {
1945 return NULL;
1946 }
1947
1948 } //End extract_jpeg
1949
1950 /********************************************************************************
1951 *Function: extract_generic
1952 *Description:
1953 *Return: A pointer to where the EOF of the
1954 **********************************************************************************/
extract_generic(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)1955 unsigned char *extract_generic(f_state *s, u_int64_t c_offset, unsigned char *foundat,
1956 u_int64_t buflen, s_spec *needle, u_int64_t f_offset)
1957 {
1958 unsigned char *buf = foundat;
1959 unsigned char *endptr = foundat;
1960 unsigned char *beginptr = foundat;
1961 unsigned char *extractbuf = NULL;
1962 int bytes_to_search = 0;
1963 u_int64_t file_size = 0;
1964 int begin=0;
1965 int end=0;
1966
1967
1968 if (buflen - (foundat - buf) >= needle->max_len)
1969 bytes_to_search = needle->max_len;
1970 else
1971 bytes_to_search = buflen - (foundat - buf);
1972
1973 if(needle->searchtype ==SEARCHTYPE_FORWARD_NEXT)
1974 {
1975 foundat+=needle->header_len;
1976 foundat = bm_search(needle->header,
1977 needle->header_len,
1978 foundat,
1979 bytes_to_search,
1980 needle->footer_bm_table,
1981 needle->case_sen,
1982 SEARCHTYPE_FORWARD);
1983 }
1984 else if(needle->searchtype ==SEARCHTYPE_ASCII)
1985 {
1986
1987
1988 while (isprint(foundat[end]) || foundat[end] == '\x0a' || foundat[end] == '\x0d' || foundat[end] == '\x09')
1989 {
1990 end++;
1991 }
1992
1993 foundat+=end;
1994 endptr=foundat;
1995 foundat=buf;
1996
1997 while (isprint(foundat[begin-1]) || foundat[begin-1] == '\x0a' || foundat[begin-1] == '\x0d' || foundat[begin-1] == '\x09')
1998 {
1999 begin--;
2000 }
2001
2002 foundat+=begin;
2003 beginptr=foundat;
2004
2005 buf=beginptr;
2006 foundat=endptr;
2007 //printx(buf,0,4);
2008
2009 file_size=end-begin;
2010 //fprintf(stderr,"file_size=%llu end=%d begin=%d ptrsize=%d ptrsize2=%d\n",file_size,end,begin,endptr-beginptr,foundat-buf);
2011 if(buf==foundat)
2012 {
2013 fprintf(stderr,"Returning Foundat\n");
2014 return foundat+needle->header_len;
2015 }
2016 }
2017 else if (needle->footer == NULL || strlen((char *)needle->footer) < 1)
2018 {
2019 #ifdef DEBUG
2020 printf("footer is NULL\n");
2021 #endif
2022 foundat = NULL;
2023 }
2024 else
2025 {
2026 #ifdef DEBUG
2027 printf("footer is not NULL %p\n", needle->footer);
2028 #endif
2029 foundat = bm_search(needle->footer,
2030 needle->footer_len,
2031 foundat,
2032 bytes_to_search,
2033 needle->footer_bm_table,
2034 needle->case_sen,
2035 SEARCHTYPE_FORWARD);
2036 }
2037
2038 if (foundat)
2039 {
2040 #ifdef DEBUG
2041 printf("found %s!!!\n", needle->footer);
2042 #endif
2043 if(needle->searchtype ==SEARCHTYPE_FORWARD_NEXT || needle->searchtype ==SEARCHTYPE_ASCII)
2044 {
2045 file_size = (foundat - buf);
2046 }
2047 else
2048 {
2049 file_size = (foundat - buf) + needle->footer_len;
2050 }
2051 }
2052 else
2053 {
2054 file_size = needle->max_len;
2055 }
2056
2057 if (file_size == 0)
2058 {
2059 file_size = needle->max_len;
2060 }
2061
2062 if (file_size > (buflen-begin))
2063 {
2064 file_size = buflen;
2065 }
2066
2067 #ifdef DEBUG
2068 printf("The file size is %llu c_offset:=%llu\n", file_size, c_offset);
2069 #endif
2070
2071 extractbuf = buf;
2072 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2073
2074 if(needle->searchtype !=SEARCHTYPE_ASCII)
2075 {
2076 foundat=buf;
2077 foundat += needle->header_len;
2078 }
2079 return foundat;
2080
2081
2082
2083 }
2084
2085 /********************************************************************************
2086 *Function: extract_exe
2087 *Description:
2088 *Return: A pointer to where the EOF of the
2089 **********************************************************************************/
extract_exe(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2090 unsigned char *extract_exe(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2091 s_spec *needle, u_int64_t f_offset)
2092 {
2093 unsigned char *buf = foundat;
2094 unsigned char *extractbuf = NULL;
2095 u_int64_t file_size = 0;
2096 unsigned short pe_offset = 0;
2097 unsigned int SizeOfCode = 0;
2098 unsigned int SizeOfInitializedData = 0;
2099 unsigned int SizeOfUninitializedData = 0;
2100 unsigned int rva = 0;
2101 unsigned int offset = 0;
2102 unsigned short sections = 0;
2103 unsigned int sizeofimage = 0;
2104 unsigned int raw_section_size = 0;
2105 unsigned int size_of_headers = 0;
2106 unsigned short dll = 0;
2107 unsigned int sum = 0;
2108 unsigned short exe_char = 0;
2109 unsigned int align = 0;
2110 int i = 0;
2111 time_t compile_time = 0;
2112 struct tm *ret_time;
2113 char comment[32];
2114 char ascii_time[32];
2115
2116 if (buflen < 100)
2117 return foundat + 2;
2118 pe_offset = htos(&foundat[60], FOREMOST_LITTLE_ENDIAN);
2119 if (pe_offset < 1 || pe_offset > 1000 || pe_offset > buflen)
2120 {
2121 return foundat + 60;
2122 }
2123
2124 foundat += pe_offset;
2125 if (foundat[0] != (unsigned char)'\x50' || foundat[1] != (unsigned char)'\x45')
2126 {
2127 return foundat;
2128 }
2129
2130 sections = htos(&foundat[6], FOREMOST_LITTLE_ENDIAN);
2131 if (buflen < (40 * sections + 224))
2132 {
2133 return foundat;
2134 }
2135
2136 compile_time = (time_t) htoi(&foundat[8], FOREMOST_LITTLE_ENDIAN);
2137 ret_time = gmtime(&compile_time);
2138 sprintf(ascii_time,
2139 "%02d/%02d/%04d %02d:%02d:%02d",
2140 ret_time->tm_mon + 1,
2141 ret_time->tm_mday,
2142 ret_time->tm_year + 1900,
2143 ret_time->tm_hour,
2144 ret_time->tm_min,
2145 ret_time->tm_sec);
2146 chop(ascii_time);
2147
2148 sprintf(comment, ascii_time);
2149 strcat(needle->comment, comment);
2150 exe_char = htos(&foundat[22], FOREMOST_LITTLE_ENDIAN);
2151 if (exe_char & 0x2000)
2152 {
2153 dll = 1;
2154 }
2155 else if (exe_char & 0x1000)
2156 {
2157
2158 //printf("System File!!!\n");
2159 }
2160 else if (exe_char & 0x0002)
2161 {
2162
2163 //printf("EXE !!!\n");
2164 }
2165 else
2166 {
2167 return foundat;
2168 }
2169
2170 foundat += 0x18; /*Jump to opt header should be 0x0b 0x01*/
2171
2172 SizeOfCode = htoi(&foundat[4], FOREMOST_LITTLE_ENDIAN);
2173 SizeOfInitializedData = htoi(&foundat[8], FOREMOST_LITTLE_ENDIAN);
2174 SizeOfUninitializedData = htoi(&foundat[12], FOREMOST_LITTLE_ENDIAN);
2175 rva = htoi(&foundat[16], FOREMOST_LITTLE_ENDIAN);
2176 align = htoi(&foundat[36], FOREMOST_LITTLE_ENDIAN);
2177
2178 sizeofimage = htoi(&foundat[56], FOREMOST_LITTLE_ENDIAN);
2179 size_of_headers = htoi(&foundat[60], FOREMOST_LITTLE_ENDIAN);
2180 foundat += 224;
2181
2182 /*Start of sections*/
2183 for (i = 0; i < sections; i++)
2184 {
2185
2186 //strncpy(name,foundat,8);
2187 offset = htoi(&foundat[20], FOREMOST_LITTLE_ENDIAN);
2188 raw_section_size = htoi(&foundat[16], FOREMOST_LITTLE_ENDIAN);
2189
2190 //printf("\t%s size=%d offset=%d\n",name,raw_section_size,offset);
2191 foundat += 40;
2192
2193 //rem+=(raw_section_size%align);
2194 //sum+=raw_section_size;
2195 sum = offset + raw_section_size;
2196 }
2197
2198 /*
2199 printf("rva is %d sum= %d\n",rva,sum);
2200 printf("soi is %d,soh is %d \n",sizeofimage,size_of_headers);
2201 printf("we are off by %d\n",sum-buflen);
2202 printf("soc=%d ,soidr=%d, souid=%d\n",SizeOfCode,SizeOfInitializedData,SizeOfUninitializedData);
2203 printf("fs=%d ,extr=%d\n",SizeOfCode+SizeOfInitializedData,SizeOfUninitializedData);
2204 */
2205 file_size = sum;
2206 if (file_size < 512 || file_size > 4 * MEGABYTE)
2207 {
2208 return foundat + 60;
2209 }
2210
2211 if (file_size > buflen)
2212 file_size = buflen;
2213 foundat = buf;
2214 #ifdef DEBUG
2215 printf("The file size is %llu c_offset:=%llu\n", file_size, c_offset);
2216 #endif
2217
2218 extractbuf = buf;
2219 if (dll == 1)
2220 {
2221 strcpy(needle->suffix, "dll");
2222 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2223 strcpy(needle->suffix, "exe");
2224 }
2225 else
2226 {
2227 write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset);
2228 }
2229
2230 foundat += needle->header_len;
2231 return (buf + file_size);
2232 }
2233
2234
2235 /********************************************************************************
2236 *Function: extract_reg
2237 *Description:
2238 *Return: A pointer to where the EOF of the
2239 **********************************************************************************/
extract_reg(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2240 unsigned char *extract_reg(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2241 s_spec *needle, u_int64_t f_offset)
2242 {
2243 unsigned char *buf = foundat;
2244 unsigned char *extractbuf = NULL;
2245 int sizeofreg = htoi(&foundat[0x28], FOREMOST_LITTLE_ENDIAN);
2246 int file_size=0;
2247 if(sizeofreg < 0 || sizeofreg > needle->max_len)
2248 {
2249 return (foundat+4);
2250 }
2251 foundat+=sizeofreg;
2252 file_size = (foundat - buf);
2253
2254 extractbuf = buf;
2255
2256
2257 write_to_disk(s, needle, file_size , extractbuf, c_offset + f_offset);
2258
2259
2260 return NULL;
2261 }
2262 /********************************************************************************
2263 *Function: extract_rar
2264 *Description:
2265 *Return: A pointer to where the EOF of the
2266 **********************************************************************************/
extract_rar(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2267 unsigned char *extract_rar(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2268 s_spec *needle, u_int64_t f_offset)
2269 {
2270 unsigned char *buf = foundat;
2271 unsigned char *extractbuf = NULL;
2272 u_int64_t file_size = 0;
2273 unsigned short headersize = 0;
2274 unsigned short flags = 0;
2275 unsigned int filesize = 0;
2276 unsigned int tot_file_size = 0;
2277 unsigned int ufilesize = 0;
2278 int i = 0;
2279 int scan = 0;
2280 int flag = 0;
2281 int passwd = 0;
2282 u_int64_t bytes_to_search = 50 * KILOBYTE;
2283 char comment[32];
2284
2285 /*Marker Block*/
2286 headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2287 foundat += headersize;
2288
2289 /*Archive Block*/
2290 headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2291 filesize = htoi(&foundat[7], FOREMOST_LITTLE_ENDIAN);
2292
2293 if (foundat[2] != '\x73')
2294 {
2295 return foundat; /*Error*/
2296 }
2297
2298 flags = htos(&foundat[3], FOREMOST_LITTLE_ENDIAN);
2299 if ((flags & 0x01) != 0)
2300 {
2301 sprintf(comment, " Multi-volume:");
2302 strcat(needle->comment, comment);
2303 }
2304
2305 if (flags & 0x02)
2306 {
2307 sprintf(comment, " an archive comment is present:");
2308 strcat(needle->comment, comment);
2309 }
2310
2311 foundat += headersize;
2312
2313 if (foundat[2] != '\x74')
2314 {
2315 for (i = 0; i < 500; i++)
2316 {
2317 if (foundat[i] == '\x74')
2318 {
2319 foundat += i - 2;
2320 scan = 1;
2321 break;
2322 }
2323 }
2324 }
2325
2326 if (headersize == 13 && foundat[2] != '\x74')
2327 {
2328
2329 if (scan == 0)
2330 {
2331 sprintf(comment, "Encrypted Headers!");
2332 strcat(needle->comment, comment);
2333 }
2334
2335 if (buflen - (foundat - buf) >= needle->max_len)
2336 bytes_to_search = needle->max_len;
2337 else
2338 bytes_to_search = buflen - (foundat - buf);
2339
2340 //printf("bytes_to_search:=%d needle->footer_len:=%d needle->header_len:=%d\n",bytes_to_search,needle->footer_len,needle->header_len);
2341 foundat = bm_search(needle->footer,
2342 needle->footer_len,
2343 foundat,
2344 bytes_to_search,
2345 needle->footer_bm_table,
2346 needle->case_sen,
2347 SEARCHTYPE_FORWARD);
2348 if (foundat == NULL)
2349 {
2350 tot_file_size = bytes_to_search;
2351 foundat = buf + tot_file_size;
2352 }
2353 }
2354 else
2355 {
2356
2357 /*Loop through files*/
2358 while (foundat[2] == '\x74')
2359 {
2360
2361 headersize = htos(&foundat[5], FOREMOST_LITTLE_ENDIAN);
2362 filesize = htoi(&foundat[7], FOREMOST_LITTLE_ENDIAN);
2363 ufilesize = htoi(&foundat[11], FOREMOST_LITTLE_ENDIAN);
2364
2365 if (headersize < 1 || headersize > buflen)
2366 flag = 1;
2367 if (filesize < 0 || filesize > buflen)
2368 flag = 1;
2369 if ((headersize + filesize) > buflen)
2370 flag = 1;
2371 if (ufilesize < 0)
2372 flag = 1;
2373
2374 flags = htos(&foundat[3], FOREMOST_LITTLE_ENDIAN);
2375 if ((flags & 0x04) != 0)
2376 {
2377 passwd = 1;
2378 }
2379
2380 tot_file_size = (foundat - buf);
2381 if ((tot_file_size + headersize + filesize) > buflen)
2382 {
2383 break;
2384 }
2385
2386 foundat += headersize + filesize;
2387 }
2388
2389 if (passwd == 1)
2390 {
2391 sprintf(comment, "Password Protected:");
2392 strcat(needle->comment, comment);
2393 }
2394
2395 if (flag == 1)
2396 {
2397 sprintf(comment, "Encrypted Headers!");
2398 strcat(needle->comment, comment);
2399 foundat = bm_search(needle->footer,
2400 needle->footer_len,
2401 foundat,
2402 bytes_to_search,
2403 needle->footer_bm_table,
2404 needle->case_sen,
2405 SEARCHTYPE_FORWARD);
2406 if (foundat == NULL)
2407 {
2408 tot_file_size = bytes_to_search;
2409 foundat = buf + tot_file_size;
2410 }
2411 }
2412
2413 if (foundat[2] != '\x7B' && tot_file_size == 0)
2414 {
2415
2416 //printf("Error 7B!!!! %x\n",foundat[2]);
2417 return foundat;
2418 }
2419
2420 foundat += 7;
2421
2422 }
2423
2424 if (foundat)
2425 {
2426
2427 /*We found the EOF, write the file to disk and return*/
2428 tot_file_size = (foundat - buf);
2429 if (tot_file_size > buflen)
2430 file_size = buflen;
2431
2432 extractbuf = buf;
2433 write_to_disk(s, needle, tot_file_size, extractbuf, c_offset + f_offset);
2434 return foundat;
2435 }
2436 else
2437 {
2438 return NULL;
2439 }
2440
2441 return NULL;
2442 }
2443
extract_file(f_state * s,u_int64_t c_offset,unsigned char * foundat,u_int64_t buflen,s_spec * needle,u_int64_t f_offset)2444 unsigned char *extract_file(f_state *s, u_int64_t c_offset, unsigned char *foundat, u_int64_t buflen,
2445 s_spec *needle, u_int64_t f_offset)
2446 {
2447 if (needle->type == JPEG)
2448 {
2449 return extract_jpeg(s, c_offset, foundat, buflen, needle, f_offset);
2450 }
2451 else if (needle->type == GIF)
2452 {
2453 return extract_gif(s, c_offset, foundat, buflen, needle, f_offset);
2454 }
2455 else if (needle->type == PNG)
2456 {
2457 return extract_png(s, c_offset, foundat, buflen, needle, f_offset);
2458 }
2459 else if (needle->type == BMP)
2460 {
2461 return extract_bmp(s, c_offset, foundat, buflen, needle, f_offset);
2462 }
2463 else if (needle->type == RIFF)
2464 {
2465 needle->suffix = "rif";
2466 return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "all");
2467 }
2468 else if (needle->type == AVI)
2469 {
2470 return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "avi");
2471 }
2472 else if (needle->type == WAV)
2473 {
2474 needle->suffix = "rif";
2475 return extract_riff(s, c_offset, foundat, buflen, needle, f_offset, "wav");
2476 }
2477 else if (needle->type == WMV)
2478 {
2479 return extract_wmv(s, c_offset, foundat, buflen, needle, f_offset);
2480 }
2481 else if (needle->type == OLE)
2482 {
2483 needle->suffix = "ole";
2484 return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "all");
2485 }
2486 else if (needle->type == DOC)
2487 {
2488 return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "doc");
2489 }
2490 else if (needle->type == PPT)
2491 {
2492 return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "ppt");
2493 }
2494 else if (needle->type == XLS)
2495 {
2496 needle->suffix = "ole";
2497 return extract_ole(s, c_offset, foundat, buflen, needle, f_offset, "xls");
2498 }
2499 else if (needle->type == PDF)
2500 {
2501 return extract_pdf(s, c_offset, foundat, buflen, needle, f_offset);
2502 }
2503 else if (needle->type == CPP)
2504 {
2505 return extract_cpp(s, c_offset, foundat, buflen, needle, f_offset);
2506 }
2507 else if (needle->type == HTM)
2508 {
2509 return extract_htm(s, c_offset, foundat, buflen, needle, f_offset);
2510 }
2511 else if (needle->type == MPG)
2512 {
2513 return extract_mpg(s, c_offset, foundat, buflen, needle, f_offset);
2514 }
2515 else if (needle->type == MP4)
2516 {
2517 return extract_mp4(s, c_offset, foundat, buflen, needle, f_offset);
2518 }
2519 else if (needle->type == ZIP)
2520 {
2521 return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "all");
2522 }
2523 else if (needle->type == RAR)
2524 {
2525 return extract_rar(s, c_offset, foundat, buflen, needle, f_offset);
2526 }
2527 else if (needle->type == SXW)
2528 {
2529 return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxw");
2530 }
2531 else if (needle->type == SXC)
2532 {
2533 return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxc");
2534 }
2535 else if (needle->type == SXI)
2536 {
2537 return extract_zip(s, c_offset, foundat, buflen, needle, f_offset, "sxi");
2538 }
2539 else if (needle->type == EXE)
2540 {
2541 return extract_exe(s, c_offset, foundat, buflen, needle, f_offset);
2542 }
2543 else if (needle->type == MOV || needle->type == VJPEG)
2544 {
2545 return extract_mov(s, c_offset, foundat, buflen, needle, f_offset);
2546 }
2547 else if (needle->type == CONF)
2548 {
2549 return extract_generic(s, c_offset, foundat, buflen, needle, f_offset);
2550 }
2551 else
2552 {
2553 return NULL;
2554 }
2555 return NULL;
2556 }
2557