1 /*
2 
3     File: file_zip.c
4 
5     Copyright (C) 1998-2009 Christophe GRENIER <grenier@cgsecurity.org>
6     Copyright (C) 2007      Christophe GISQUET <christophe.gisquet@free.fr>
7 
8     This software is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write the Free Software Foundation, Inc., 51
20     Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 
22     Information about ZIP file format: http://www.info-zip.org/doc/appnote-iz-latest.zip
23  */
24 
25 /* Abolutely required for the zip64 stuff */
26 /* #define _FILE_OFFSET_BITS 64 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 #ifdef HAVE_STRING_H
32 #include <string.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #include <stdio.h>
38 #include "types.h"
39 #include "filegen.h"
40 #include "common.h"
41 #include "log.h"
42 
43 /* #define DEBUG_ZIP */
44 
45 extern const file_hint_t file_hint_doc;
46 static void register_header_check_zip(file_stat_t *file_stat);
47 static int header_check_zip(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
48 static void file_check_zip(file_recovery_t *file_recovery);
49 static unsigned int pos_in_mem(const unsigned char *haystack, const unsigned int haystack_size, const unsigned char *needle, const unsigned int needle_size);
50 static void file_rename_zip(file_recovery_t *file_recovery);
51 static char first_filename[256];
52 
53 const file_hint_t file_hint_zip= {
54   .extension="zip",
55   .description="zip archive including OpenOffice and MSOffice 2007",
56   .max_filesize=PHOTOREC_MAX_FILE_SIZE,
57   .recover=1,
58   .enable_by_default=1,
59   .register_header_check=&register_header_check_zip
60 };
61 
62 static const unsigned char zip_header[4]  = { 'P', 'K', 0x03, 0x04};
63 #define ZIP_CENTRAL_DIR         0x02014B50
64 #define ZIP_FILE_ENTRY          0x04034B50
65 #define ZIP_SIGNATURE           0x05054B50
66 #define ZIP_END_CENTRAL_DIR     0x06054B50
67 #define ZIP_CENTRAL_DIR64       0x06064B50
68 #define ZIP_END_CENTRAL_DIR64   0x07064B50
69 #define ZIP_DATA_DESCRIPTOR     0x08074B50
70 
71 struct zip_file_entry {
72   uint16_t version;                 /** Version needed to extract */
73 
74   uint16_t is_encrypted:1;          /** File is encrypted? */
75   uint16_t compression_info:2;      /** Info about compression method used */
76   uint16_t has_descriptor:1;        /** Compressed data followed by descriptor? */
77   uint16_t enhanced_deflate:1;      /** Reserved for use with method 8 */
78   uint16_t is_patched:1;            /** File is compressed with patched data? */
79   uint16_t strong_encrypt:1;        /** Strong encryption (version >= 50) */
80   uint16_t unused2:4;               /** Unused */
81   uint16_t uses_unicode:1;          /** Filename and comments are in UTF-8 */
82   uint16_t unused3:1;               /** Reserved by PKWARE for enhanced compression. */
83   uint16_t encrypted_central_dir:1; /** Selected data values in the Local Header are masked */
84   uint16_t unused1:2;               /** Unused */
85 
86   uint16_t compression;             /** Compression method */
87   uint16_t last_mod_time;           /** Last moditication file time */
88   uint16_t last_mod_date;           /** Last moditication file date */
89   uint32_t crc32;                   /** CRC32 */
90   uint32_t compressed_size;         /** Compressed size */
91   uint32_t uncompressed_size;       /** Uncompressed size */
92   uint16_t filename_length;         /** Filename length */
93   uint16_t extra_length;            /** Extra fields length */
94 } __attribute__ ((gcc_struct, __packed__));
95 
96 struct zip64_extra_entry
97 {
98   uint16_t tag;
99   uint16_t size;
100   uint64_t uncompressed_size;
101   uint64_t compressed_size;
102   uint64_t offset;		/* Offset of local header record */
103   uint32_t disk_start_number;	/* Number of the disk on which this file starts  */
104 } __attribute__ ((gcc_struct, __packed__));
105 
106 typedef struct zip_file_entry zip_file_entry_t;
107 typedef struct zip64_extra_entry zip64_extra_entry_t;
108 
109 static uint32_t expected_compressed_size=0;
110 
file_get_pos(FILE * f,const void * needle,const unsigned int size)111 static int64_t file_get_pos(FILE *f, const void* needle, const unsigned int size)
112 {
113   char     *buffer =(char *)MALLOC(4096);
114   int64_t  total   = 0;
115 #ifdef DEBUG_ZIP
116   log_trace("zip: file_get_pos(f, needle, %u)\n", size);
117 #endif
118 
119   while (!feof(f))
120   {
121     int      count = 0;
122     unsigned int left;
123     const unsigned int read_size= fread(buffer, 1, 4096, f);
124     left  = read_size;
125 
126     while (left>=size)
127     {
128       if (buffer[count]==*(const char *)needle && memcmp(buffer+count, needle, size)==0)
129       {
130 	free(buffer);
131         if(my_fseek(f, (off_t)count-read_size, SEEK_CUR)<0)
132 	{
133 	  log_trace("zip: file_get_pos count-read failed\n");
134           return -1;
135 	}
136         return total;
137       }
138       count++;
139       total++;
140       left--;
141     }
142     if(feof(f) || my_fseek(f, (off_t)1-size, SEEK_CUR)<0)
143     {
144       log_trace("zip: file_get_pos 1-size failed\n");
145       free(buffer);
146       return -1;
147     }
148   }
149   free(buffer);
150   return -1;
151 }
152 
zip_parse_file_entry(file_recovery_t * fr,const char ** ext,const unsigned int file_nbr)153 static int zip_parse_file_entry(file_recovery_t *fr, const char **ext, const unsigned int file_nbr)
154 {
155   zip_file_entry_t  file;
156   zip64_extra_entry_t extra;
157   uint64_t          len;
158   unsigned int krita=0;
159   if (fread(&file, sizeof(file), 1, fr->handle) != 1)
160   {
161 #ifdef DEBUG_ZIP
162     log_trace("zip: Unexpected EOF reading header of file_entry\n");
163 #endif
164     return -1;
165   }
166   fr->file_size += sizeof(file);
167 #ifdef DEBUG_ZIP
168   log_info("%u Comp=%u %u CRC32=0x%08X extra_length=%u ",
169       le32(file.compressed_size),
170       le16(file.compression),
171       le32(file.uncompressed_size),
172       le32(file.crc32),
173       le16(file.extra_length));
174 #endif
175   /* Avoid Jan  1  1980 files */
176   if(le16(file.last_mod_time)!=0 || le16(file.last_mod_date)!=33)
177   {
178     /* Use the more recent file to set the time/date of the recovered archive */
179     const time_t tmp=date_dos2unix(le16(file.last_mod_time), le16(file.last_mod_date));
180     if(fr->time < tmp)
181       fr->time=tmp;
182   }
183   len = le16(file.filename_length);
184   if (len)
185   {
186     char *filename=(char *)MALLOC(len+1);
187     if (fread(filename, len, 1, fr->handle) != 1)
188     {
189 #ifdef DEBUG_ZIP
190       log_trace("zip: Unexpected EOF in file_entry header: %lu bytes expected\n", len);
191 #endif
192       free(filename);
193       return -1;
194     }
195     fr->file_size += len;
196     filename[len]='\0';
197     if(first_filename[0]=='\0')
198     {
199       const unsigned int len_tmp=(len<255?len:255);
200       strncpy(first_filename, filename, len_tmp);
201       first_filename[len_tmp]='\0';
202     }
203 #ifdef DEBUG_ZIP
204     log_info("%s\n", filename);
205 #endif
206     if(*ext==NULL)
207     {
208       static int msoffice=0;
209       static int sh3d=0;
210       static const char *ext_msoffice=NULL;
211       if(file_nbr==0)
212       {
213 	msoffice=0;
214 	sh3d=0;
215 	ext_msoffice=NULL;
216       }
217       if(len==19 && memcmp(filename, "[Content_Types].xml", 19)==0)
218 	msoffice=1;
219       else if(file_nbr==0)
220       {
221 	if(len==8 && memcmp(filename, "mimetype", 8)==0 && le16(file.extra_length)==0)
222 	{
223 	  unsigned char buffer[128];
224 	  const unsigned int compressed_size=le32(file.compressed_size);
225 	  const int to_read=(compressed_size < 128 ? compressed_size: 128);
226 	  if( fread(buffer, to_read, 1, fr->handle)!=1)
227 	  {
228 #ifdef DEBUG_ZIP
229 	    log_trace("zip: Unexpected EOF in file_entry data: %u bytes expected\n",
230 	    compressed_size);
231 #endif
232 	    free(filename);
233 	    return -1;
234 	  }
235 	  if (my_fseek(fr->handle, -to_read, SEEK_CUR) < 0)
236 	  {
237 	    log_info("fseek failed\n");
238 	    free(filename);
239 	    return -1;
240 	  }
241 	  if(compressed_size==16      && memcmp(buffer,"image/openraster",16)==0)
242 	    *ext="ora";
243 	  else if(compressed_size==20 && memcmp(buffer,"application/epub+zip",20)==0)
244 	    *ext="epub";
245 	  else if(compressed_size==28 && memcmp(buffer,"application/vnd.sun.xml.calc",28)==0)
246 	    *ext="sxc";
247 	  else if(compressed_size==28 && memcmp(buffer,"application/vnd.sun.xml.draw",28)==0)
248 	    *ext="sxd";
249 	  else if(compressed_size==31 && memcmp(buffer,"application/vnd.sun.xml.impress",31)==0)
250 	    *ext="sxi";
251 	  else if(compressed_size==30 && memcmp(buffer,"application/vnd.sun.xml.writer",30)==0)
252 	    *ext="sxw";
253 	  else if(compressed_size==39 && memcmp(buffer,"application/vnd.oasis.opendocument.text",39)==0)
254 	    *ext="odt";
255 	  else if(compressed_size==43 && memcmp(buffer,"application/vnd.oasis.opendocument.graphics",43)==0)
256 	    *ext="odg";
257 	  else if(compressed_size==46 && memcmp(buffer,"application/vnd.oasis.opendocument.spreadsheet",46)==0)
258 	    *ext="ods";
259 	  else if(compressed_size==47 && memcmp(buffer,"application/vnd.oasis.opendocument.presentation",47)==0)
260 	    *ext="odp";
261 	  else if(memcmp(buffer,"application/x-krita",19)==0)
262 	  {
263 	    *ext="kra";
264 	    krita=19;
265 	  }
266 	  else
267 	  { /* default to writer */
268 	    *ext="sxw";
269 	  }
270 	}
271 	/* Zipped Keyhole Markup Language (KML) used by Google Earth */
272 	else if(len==7 && memcmp(filename, "doc.kml", 7)==0)
273 	  *ext="kmz";
274 	else if(len==4 && memcmp(filename, "Home", 4)==0)
275 	  sh3d=1;
276 	/* Celtx, Screenwriting & Media Pre-production file */
277 	else if(len==9 && memcmp(filename, "local.rdf", 9)==0)
278 	  *ext="celtx";
279 	else if(len==13 && memcmp(filename, "document.json", 13)==0)
280 	  *ext="sketch";
281       }
282       else if(file_nbr==1 && sh3d==1)
283       {
284 	if(len==1 && filename[0]=='0')
285 	  *ext="sh3d";
286       }
287       if(strncmp(filename, "word/", 5)==0)
288 	ext_msoffice="docx";
289       else if(strncmp(filename, "xl/", 3)==0)
290 	ext_msoffice="xlsx";
291       else if(strncmp(filename, "ppt/", 4)==0)
292 	ext_msoffice="pptx";
293       else if(strncmp(filename, "visio/", 6)==0)
294 	ext_msoffice="vsdx";
295       if(msoffice && ext_msoffice!=NULL)
296 	*ext=ext_msoffice;
297     }
298     if(*ext==NULL)
299     {
300 	/* iWork */
301       if(len==23 && memcmp(filename, "QuickLook/Thumbnail.jpg", 23)==0)
302 	*ext="pages";
303       else if(len==20 && strncasecmp(filename, "META-INF/MANIFEST.MF", 20)==0)
304 	*ext="jar";
305       else if(len==15 && strncasecmp(filename, "chrome.manifest", 15)==0)
306 	*ext="xpi";
307       /* SMART Notebook */
308       else if(len==15 && memcmp(filename, "imsmanifest.xml", 15)==0)
309 	*ext="notebook";
310       /* Apple Numbers */
311       else if(len==18 && memcmp(filename, "Index/Document.iwa", 18)==0)
312 	*ext="numbers";
313       else if(len==19 && memcmp(filename, "AndroidManifest.xml", 19)==0)
314 	*ext="apk";
315       else if(len==30 && memcmp(filename, "xsd/MindManagerApplication.xsd", 30)==0)
316 	*ext="mmap";
317     }
318     free(filename);
319   }
320 #ifdef DEBUG_ZIP
321   log_info("\n");
322 #endif
323   len = le16(file.extra_length);
324   memset(&extra, 0, sizeof(extra));
325   if (len>0)
326   {
327     if (fread(&extra, sizeof(extra), 1, fr->handle) != 1)
328     {
329 #ifdef DEBUG_ZIP
330       log_trace("zip: Unexpected EOF in file_entry header: %lu bytes expected\n", len);
331 #endif
332     }
333     if (my_fseek(fr->handle, fr->file_size, SEEK_SET) == -1 ||
334 	my_fseek(fr->handle, len, SEEK_CUR) == -1)
335     {
336 #ifdef DEBUG_ZIP
337       log_trace("zip: Unexpected EOF in file_entry header: %lu bytes expected\n", len);
338 #endif
339       return -1;
340     }
341     fr->file_size += len;
342   }
343   len = le32(file.compressed_size);
344   if(len==0xffffffff && le16(extra.tag)==1)
345   {
346     len = le64(extra.compressed_size);
347     /* Avoid endless loop */
348     if( fr->file_size + len < fr->file_size)
349       return -1;
350   }
351   if(krita>0)
352     len=krita;
353   if (len>0)
354   {
355     if (my_fseek(fr->handle, len, SEEK_CUR) == -1)
356     {
357 #ifdef DEBUG_ZIP
358       log_trace("zip: Unexpected EOF in file_entry data: %lu bytes expected\n", len);
359 #endif
360       return -1;
361     }
362 #ifdef DEBUG_ZIP
363     log_trace("zip: Data of length %lu\n", len);
364 #endif
365     fr->file_size += len;
366   }
367   expected_compressed_size=len;
368   if (file.has_descriptor && (le16(file.compression)==8 || le16(file.compression)==9))
369   {
370     /* The fields crc-32, compressed size and uncompressed size
371        are set to zero in the local header.  The correct values
372        are put in the data descriptor immediately following the
373        compressed data.
374        Typically used in OOO documents
375        Search ZIP_DATA_DESCRIPTOR */
376     static const unsigned char zip_data_desc_header[4]= {0x50, 0x4B, 0x07, 0x08};
377     const int64_t pos = file_get_pos(fr->handle, zip_data_desc_header, 4);
378 #ifdef DEBUG_ZIP
379     log_trace("Searched footer, got length %lli\n", (long long int)pos);
380 #endif
381     if (pos < 0)
382       return -1;
383     if (pos > 0)
384     {
385       fr->file_size += pos;
386       expected_compressed_size=pos;
387     }
388   }
389   return 0;
390 }
391 
zip_parse_central_dir(file_recovery_t * fr)392 static int zip_parse_central_dir(file_recovery_t *fr)
393 {
394   zip_file_entry_t  file;
395   uint32_t          len;
396   struct {
397     /* Fields common with zip_file_entry removed */
398     uint16_t comment_length;          /** Comment length */
399     uint16_t disk_number_start;       /** Disk number start */
400     uint16_t internal_attr;           /** Internal file attributes */
401     uint32_t external_attr;           /** External file attributes */
402     uint32_t offset_header;           /** Relative offset of local header */
403   } __attribute__ ((gcc_struct, __packed__)) dir;
404   if (my_fseek(fr->handle, 2, SEEK_CUR) == -1)
405   {
406 #ifdef DEBUG_ZIP
407     log_trace("Unexpected EOF skipping version from central_dir\n");
408 #endif
409     return -1;
410   }
411   fr->file_size += 2;
412 
413   if (fread(&file, sizeof(file), 1, fr->handle) != 1)
414   {
415 #ifdef DEBUG_ZIP
416     log_trace("Unexpected EOF reading 1st part of central_dir\n");
417 #endif
418     return -1;
419   }
420   fr->file_size += sizeof(file);
421 #ifdef DEBUG_ZIP
422   log_trace("zip: Central dir with CRC 0x%08X\n", file.crc32);
423 #endif
424 
425   if (fread(&dir, sizeof(dir), 1, fr->handle) != 1)
426   {
427 #ifdef DEBUG_ZIP
428     log_trace("zip: Unexpected EOF reading 2nd part of central_dir\n");
429 #endif
430     return -1;
431   }
432   fr->file_size += sizeof(dir);
433 
434   /* Rest of the block - could attempt CRC check */
435   len = le16(file.extra_length) + le16(dir.comment_length) + le16(file.filename_length);
436   if (my_fseek(fr->handle, len, SEEK_CUR) == -1)
437   {
438 #ifdef DEBUG_ZIP
439     log_trace("zip: Unexpected EOF in central_dir: %u bytes expected\n", len);
440 #endif
441     return -1;
442   }
443   fr->file_size += len;
444 #ifdef DEBUG_ZIP
445   log_trace("zip: Data of total length %u\n", len);
446 #endif
447   return 0;
448 }
449 
zip64_parse_end_central_dir(file_recovery_t * fr)450 static int zip64_parse_end_central_dir(file_recovery_t *fr)
451 {
452   struct {
453     uint64_t end_size;                /** Size of zip64 end of central directory record */
454     uint16_t version_made;            /** Version made by */
455     uint16_t version_needed;          /** Version needed to extract */
456     uint32_t number_disk;             /** Number of this disk */
457     uint32_t number_disk2;            /** Number of the disk with the start of the central directory */
458     uint64_t number_entries;          /** Total number of entries in the central directory on this disk */
459     uint64_t number_entries2;         /** Total number of entries in the central directory */
460     uint64_t size;                    /** Size of the central directory */
461     uint64_t offset;                  /** Offset of start of central directory */
462   } __attribute__ ((gcc_struct, __packed__)) dir;
463 
464   if (fread(&dir, sizeof(dir), 1, fr->handle) != 1)
465   {
466 #ifdef DEBUG_ZIP
467     log_trace("zip: Unexpected EOF reading end_central_dir_64\n");
468 #endif
469     return -1;
470   }
471   fr->file_size += sizeof(dir);
472 
473   if (dir.end_size > 0)
474   {
475     const uint64_t len = le64(dir.end_size);
476     /* Avoid endless loop */
477     if( fr->file_size + len <= fr->file_size)
478       return -1;
479     if (my_fseek(fr->handle, len, SEEK_CUR) == -1)
480     {
481 #ifdef DEBUG_ZIP
482       log_trace("zip: Unexpected EOF in end_central_dir_64: expected %llu bytes\n", (long long unsigned)len);
483 #endif
484       return -1;
485     }
486     fr->file_size += len;
487 #ifdef DEBUG_ZIP
488     log_trace("zip: End of 64b central dir of length %llu\n", (long long unsigned)len);
489 #endif
490   }
491 
492   return 0;
493 }
494 
zip_parse_end_central_dir(file_recovery_t * fr)495 static int zip_parse_end_central_dir(file_recovery_t *fr)
496 {
497   struct {
498     uint16_t number_disk;             /** Number of this disk */
499     uint16_t number_disk2;            /** Number in the central dir */
500     uint16_t total_number_disk;       /** Total number of entries in this disk */
501     uint16_t total_number_disk2;      /** Total number of entries in the central dir */
502     uint32_t size;                    /** Size of the central directory */
503     uint32_t offset;                  /** Offset of start of central directory */
504     uint16_t comment_length;          /** Comment length */
505   } __attribute__ ((gcc_struct, __packed__)) dir;
506 
507   if (fread(&dir, sizeof(dir), 1, fr->handle) != 1)
508   {
509 #ifdef DEBUG_ZIP
510     log_trace("zip: Unexpected EOF reading header of zip_parse_end_central_dir\n");
511 #endif
512     return -1;
513   }
514   fr->file_size += sizeof(dir);
515 
516   if (dir.comment_length)
517   {
518     const uint16_t len = le16(dir.comment_length);
519     if (my_fseek(fr->handle, len, SEEK_CUR) == -1)
520     {
521 #ifdef DEBUG_ZIP
522       log_trace("zip: Unexpected EOF in end_central_dir: expected %u bytes\n", len);
523 #endif
524       return -1;
525     }
526     fr->file_size += len;
527 #ifdef DEBUG_ZIP
528     log_trace("zip: Comment of length %u\n", len);
529 #endif
530   }
531   return 0;
532 }
533 
zip_parse_data_desc(file_recovery_t * fr)534 static int zip_parse_data_desc(file_recovery_t *fr)
535 {
536   struct {
537     uint32_t crc32;                  /** Checksum (CRC32) */
538     uint32_t compressed_size;        /** Compressed size (bytes) */
539     uint32_t uncompressed_size;      /** Uncompressed size (bytes) */
540   } __attribute__ ((gcc_struct, __packed__)) desc;
541 
542   if (fread(&desc, sizeof(desc), 1, fr->handle) != 1)
543   {
544 #ifdef DEBUG_ZIP
545     log_trace("zip: Unexpected EOF reading header of data_desc\n");
546 #endif
547     return -1;
548   }
549   fr->file_size += sizeof(desc);
550 #ifdef DEBUG_ZIP
551   log_info("compressed_size=%u/%u uncompressed_size=%u CRC32=0x%08X\n",
552       le32(desc.compressed_size),
553       expected_compressed_size,
554       le32(desc.uncompressed_size),
555       le32(desc.crc32));
556 #endif
557   if(le32(desc.compressed_size)!=expected_compressed_size)
558     return -1;
559   return 0;
560 }
561 
zip_parse_signature(file_recovery_t * fr)562 static int zip_parse_signature(file_recovery_t *fr)
563 {
564   uint16_t len;
565 
566   if (fread(&len, 2, 1, fr->handle) != 1)
567   {
568 #ifdef DEBUG_ZIP
569     log_trace("zip: Unexpected EOF reading length of signature\n");
570 #endif
571     return -1;
572   }
573   fr->file_size += 2;
574 
575   if (len)
576   {
577     len = le16(len);
578     if (my_fseek(fr->handle, len, SEEK_CUR) == -1)
579     {
580 #ifdef DEBUG_ZIP
581       log_trace("zip: Unexpected EOF in zip_parse_signature: expected %u bytes\n", len);
582 #endif
583       return -1;
584     }
585     fr->file_size += len;
586   }
587 
588   return 0;
589 }
590 
zip64_parse_end_central_dir_locator(file_recovery_t * fr)591 static int zip64_parse_end_central_dir_locator(file_recovery_t *fr)
592 {
593   struct {
594     uint32_t disk_number;       /** Number of the disk with the start of the zip64 end of central directory */
595     uint64_t relative_offset;   /** Relative offset of the zip64 end of central directory record */
596     uint32_t disk_total_number; /** Total number of disks */
597   } __attribute__ ((gcc_struct, __packed__)) loc;
598 
599   if (fread(&loc, sizeof(loc), 1, fr->handle) != 1)
600   {
601 #ifdef DEBUG_ZIP
602     log_trace("zip: Unexpected EOF reading 1st part of end_central_dir_locator\n");
603 #endif
604     return -1;
605   }
606   fr->file_size += sizeof(loc);
607   return 0;
608 }
609 
file_check_zip(file_recovery_t * fr)610 static void file_check_zip(file_recovery_t *fr)
611 {
612   const char *ext=NULL;
613   unsigned int file_nbr=0;
614   fr->file_size = 0;
615   fr->offset_error=0;
616   fr->offset_ok=0;
617   first_filename[0]='\0';
618   if(my_fseek(fr->handle, 0, SEEK_SET) < 0)
619     return ;
620   while (1)
621   {
622     uint64_t file_size_old;
623     uint32_t header;
624     int      status;
625 
626     if (fread(&header, 4, 1, fr->handle)!=1)
627     {
628 #ifdef DEBUG_ZIP
629       log_trace("Failed to read block header\n");
630 #endif
631       fr->offset_error=fr->file_size;
632       fr->file_size=0;
633       return;
634     }
635 
636     header = le32(header);
637 #ifdef DEBUG_ZIP
638     log_trace("Header 0x%08X at 0x%llx\n", header, (long long unsigned int)fr->file_size);
639     log_flush();
640 #endif
641     fr->file_size += 4;
642     file_size_old=fr->file_size;
643 
644     switch (header)
645     {
646       case ZIP_CENTRAL_DIR: /* Central dir */
647         status = zip_parse_central_dir(fr);
648         break;
649       case ZIP_CENTRAL_DIR64: /* 64b end central dir */
650         status = zip64_parse_end_central_dir(fr);
651         break;
652       case ZIP_END_CENTRAL_DIR: /* End central dir */
653         status = zip_parse_end_central_dir(fr);
654         break;
655       case ZIP_END_CENTRAL_DIR64: /* 64b end central dir locator */
656         status = zip64_parse_end_central_dir_locator(fr);
657         break;
658       case ZIP_DATA_DESCRIPTOR: /* Data descriptor */
659         status = zip_parse_data_desc(fr);
660         break;
661       case ZIP_FILE_ENTRY: /* File Entry */
662         status = zip_parse_file_entry(fr, &ext, file_nbr);
663 	file_nbr++;
664         break;
665       case ZIP_SIGNATURE: /* Signature */
666         status = zip_parse_signature(fr);
667         break;
668       default:
669 #ifdef DEBUG_ZIP
670         if ((header&0xFFFF) != 0x4B50)
671           log_trace("Not a zip block: 0x%08X\n", header);
672         else
673           log_trace("Unparsable block with ID 0x%04X\n", header>>16);
674 #endif
675         status = -1;
676         break;
677     }
678 
679     /* Verify status */
680     if (status<0)
681     {
682       fr->offset_error = fr->file_size;
683       fr->file_size = 0;
684       return;
685     }
686     /* Only end of central dir is end of archive, 64b version of it is before */
687     if (header==ZIP_END_CENTRAL_DIR)
688       return;
689     fr->offset_ok=file_size_old;
690   }
691 }
692 
file_rename_zip(file_recovery_t * file_recovery)693 static void file_rename_zip(file_recovery_t *file_recovery)
694 {
695   const char *ext=NULL;
696   unsigned int file_nbr=0;
697   file_recovery_t fr;
698   reset_file_recovery(&fr);
699   if((fr.handle=fopen(file_recovery->filename, "rb"))==NULL)
700     return;
701   fr.file_size = 0;
702   fr.offset_error=0;
703   first_filename[0]='\0';
704   if(my_fseek(fr.handle, 0, SEEK_SET) < 0)
705   {
706     fclose(fr.handle);
707     return ;
708   }
709   while (1)
710   {
711     uint32_t header;
712     int      status;
713 
714     if (fread(&header, 4, 1, fr.handle)!=1)
715     {
716 #ifdef DEBUG_ZIP
717       log_trace("Failed to read block header\n");
718 #endif
719       fclose(fr.handle);
720       return;
721     }
722 
723     header = le32(header);
724 #ifdef DEBUG_ZIP
725     log_trace("Header 0x%08X at 0x%llx\n", header, (long long unsigned int)fr.file_size);
726     log_flush();
727 #endif
728     fr.file_size += 4;
729 
730     switch (header)
731     {
732       case ZIP_CENTRAL_DIR: /* Central dir */
733         status = zip_parse_central_dir(&fr);
734         break;
735       case ZIP_CENTRAL_DIR64: /* 64b end central dir */
736         status = zip64_parse_end_central_dir(&fr);
737         break;
738       case ZIP_END_CENTRAL_DIR: /* End central dir */
739         status = zip_parse_end_central_dir(&fr);
740         break;
741       case ZIP_END_CENTRAL_DIR64: /* 64b end central dir locator */
742         status = zip64_parse_end_central_dir_locator(&fr);
743         break;
744       case ZIP_DATA_DESCRIPTOR: /* Data descriptor */
745         status = zip_parse_data_desc(&fr);
746         break;
747       case ZIP_FILE_ENTRY: /* File Entry */
748         status = zip_parse_file_entry(&fr, &ext, file_nbr);
749 	file_nbr++;
750 	if(ext!=NULL)
751 	{
752 	  fclose(fr.handle);
753 	  file_rename(file_recovery, NULL, 0, 0, ext, 1);
754 	  return;
755 	}
756         break;
757       case ZIP_SIGNATURE: /* Signature */
758         status = zip_parse_signature(&fr);
759         break;
760       default:
761 #ifdef DEBUG_ZIP
762         if ((header&0xFFFF) != 0x4B50)
763           log_trace("Not a zip block: 0x%08X\n", header);
764         else
765           log_trace("Unparsable block with ID 0x%04X\n", header>>16);
766 #endif
767         status = -1;
768         break;
769     }
770 
771     /* Verify status */
772     if (status<0)
773     {
774       fclose(fr.handle);
775       return;
776     }
777     /* Only end of central dir is end of archive, 64b version of it is before */
778     if (header==ZIP_END_CENTRAL_DIR)
779     {
780       unsigned int len;
781       fclose(fr.handle);
782       for(len=0; len<32 &&
783 	  first_filename[len]!='\0' &&
784 	  first_filename[len]!='.' &&
785 	  first_filename[len]!='/' &&
786 	  first_filename[len]!='\\';
787 	  len++);
788       file_rename(file_recovery, first_filename, len, 0, "zip", 0);
789       return;
790     }
791   }
792 }
793 
header_check_zip(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)794 static int header_check_zip(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
795 {
796   const zip_file_entry_t *file=(const zip_file_entry_t *)&buffer[4];
797   const unsigned int len=le16(file->filename_length);
798 #ifdef DEBUG_ZIP
799   log_trace("header_check_zip\n");
800 #endif
801   if(file_recovery->file_stat!=NULL &&
802       file_recovery->file_stat->file_hint==&file_hint_doc)
803   {
804     if(header_ignored_adv(file_recovery, file_recovery_new)==0)
805       return 0;
806   }
807   /* A zip file begins by ZIP_FILE_ENTRY, this signature can also be
808    * found for each compressed file */
809   if(file_recovery->file_stat!=NULL &&
810       file_recovery->file_stat->file_hint==&file_hint_zip &&
811       safe_header_only==0)
812   {
813     if(header_ignored_adv(file_recovery, file_recovery_new)==0)
814       return 0;
815   }
816   reset_file_recovery(file_recovery_new);
817   file_recovery_new->min_filesize=21;
818   file_recovery_new->file_check=&file_check_zip;
819   if(len==8 && memcmp(&buffer[30],"mimetype",8)==0)
820   {
821     const unsigned int compressed_size=le32(file->compressed_size);
822     /* Mypaint .ora */
823     if(compressed_size==16 && memcmp(&buffer[38],"image/openraster",16)==0)
824       file_recovery_new->extension="ora";
825     else if(compressed_size==20 && memcmp(&buffer[38],"application/epub+zip",20)==0)
826       file_recovery_new->extension="epub";
827     else if(compressed_size==28 && memcmp(&buffer[38],"application/vnd.sun.xml.calc",28)==0)
828       file_recovery_new->extension="sxc";
829     else if(compressed_size==28 && memcmp(&buffer[38],"application/vnd.sun.xml.draw",28)==0)
830       file_recovery_new->extension="sxd";
831     else if(compressed_size==31 && memcmp(&buffer[38],"application/vnd.sun.xml.impress",31)==0)
832       file_recovery_new->extension="sxi";
833     else if(compressed_size==30 && memcmp(&buffer[38],"application/vnd.sun.xml.writer",30)==0)
834       file_recovery_new->extension="sxw";
835     else if(compressed_size==39 && memcmp(&buffer[38],"application/vnd.oasis.opendocument.text",39)==0)
836       file_recovery_new->extension="odt";
837     else if(compressed_size==43 && memcmp(&buffer[38],"application/vnd.oasis.opendocument.graphics",43)==0)
838       file_recovery_new->extension="odg";
839     else if(compressed_size==45 && memcmp(&buffer[38],"application/vnd.adobe.sparkler.project+dcxucf",45)==0)
840       file_recovery_new->extension="xd";
841     else if(compressed_size==46 && memcmp(&buffer[38],"application/vnd.oasis.opendocument.spreadsheet",46)==0)
842       file_recovery_new->extension="ods";
843     else if(compressed_size==47 && memcmp(&buffer[38],"application/vnd.oasis.opendocument.presentation",47)==0)
844       file_recovery_new->extension="odp";
845     else if(memcmp(&buffer[38],"application/x-krita",19)==0)
846       file_recovery_new->extension="kra";
847     else
848     { /* default to writer */
849       file_recovery_new->extension="sxw";
850     }
851   }
852   else if(len==19 && memcmp(&buffer[30],"[Content_Types].xml",19)==0)
853   {
854     if(pos_in_mem(&buffer[0], buffer_size, (const unsigned char*)"word/", 5)!=0)
855       file_recovery_new->extension="docx";
856     else if(pos_in_mem(&buffer[0], 2000, (const unsigned char*)"xl/", 3)!=0)
857       file_recovery_new->extension="xlsx";
858     else if(pos_in_mem(&buffer[0], buffer_size, (const unsigned char*)"ppt/", 4)!=0)
859       file_recovery_new->extension="pptx";
860     else if(pos_in_mem(&buffer[0], buffer_size, (const unsigned char*)"visio/", 6)!=0)
861       file_recovery_new->extension="vsdx";
862     else
863       file_recovery_new->extension="docx";
864     file_recovery_new->file_rename=&file_rename_zip;
865   }
866   /* Extended Renoise song file */
867   else if(len==8 && memcmp(&buffer[30], "Song.xml", 8)==0)
868     file_recovery_new->extension="xrns";
869   else if(len==4 && memcmp(&buffer[30], "Home", 4)==0)
870     file_recovery_new->extension="sh3d";
871   /* Apple Numbers */
872   else if(len==18 && memcmp(&buffer[30], "Index/Document.iwa", 18)==0)
873     file_recovery_new->extension="numbers";
874   else
875   {
876     file_recovery_new->extension=file_hint_zip.extension;
877     file_recovery_new->file_rename=&file_rename_zip;
878   }
879   return 1;
880 }
881 
header_check_winzip(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)882 static int header_check_winzip(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
883 {
884   reset_file_recovery(file_recovery_new);
885   file_recovery_new->file_check=&file_check_zip;
886   file_recovery_new->extension=file_hint_zip.extension;
887   return 1;
888 }
889 
pos_in_mem(const unsigned char * haystack,const unsigned int haystack_size,const unsigned char * needle,const unsigned int needle_size)890 static unsigned int pos_in_mem(const unsigned char *haystack, const unsigned int haystack_size, const unsigned char *needle, const unsigned int needle_size)
891 {
892   unsigned int i;
893   if(haystack_size < needle_size)
894     return 0;
895   for(i=0; i <= haystack_size - needle_size; i++)
896     if(memcmp(&haystack[i],needle,needle_size)==0)
897       return (i+needle_size);
898   return 0;
899 }
900 
register_header_check_zip(file_stat_t * file_stat)901 static void register_header_check_zip(file_stat_t *file_stat)
902 {
903   static const unsigned char zip_header2[8]  = { 'P', 'K', '0', '0', 'P', 'K', 0x03, 0x04}; /* WinZIPv8-compressed files. */
904   register_header_check(0, zip_header,sizeof(zip_header), &header_check_zip, file_stat);
905   register_header_check(0, zip_header2,sizeof(zip_header2), &header_check_winzip, file_stat);
906 }
907