1 // Copyright 2015 The Bazel Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // zip.cc -- .zip (.jar) file reading/writing routines.
16 //
17 
18 // See README.txt for details.
19 //
20 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT
21 // for definition of PKZIP file format.
22 
23 #define _FILE_OFFSET_BITS 64  // Support zip files larger than 2GB
24 
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <limits>
35 #include <vector>
36 
37 #include "third_party/ijar/mapped_file.h"
38 #include "third_party/ijar/platform_utils.h"
39 #include "third_party/ijar/zip.h"
40 #include "third_party/ijar/zlib_client.h"
41 
42 #define LOCAL_FILE_HEADER_SIGNATURE   0x04034b50
43 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50
44 #define UNIX_ZIP_FILE_VERSION 0x0300
45 #define DIGITAL_SIGNATURE             0x05054b50
46 #define ZIP64_EOCD_SIGNATURE          0x06064b50
47 #define ZIP64_EOCD_LOCATOR_SIGNATURE  0x07064b50
48 #define EOCD_SIGNATURE                0x06054b50
49 #define DATA_DESCRIPTOR_SIGNATURE     0x08074b50
50 
51 #define U2_MAX 0xffff
52 #define U4_MAX 0xffffffffUL
53 
54 #define ZIP64_EOCD_LOCATOR_SIZE 20
55 // zip64 eocd is fixed size in the absence of a zip64 extensible data sector
56 #define ZIP64_EOCD_FIXED_SIZE 56
57 
58 // version to extract: 1.0 - default value from APPNOTE.TXT.
59 // Output JAR files contain no extra ZIP features, so this is enough.
60 #define ZIP_VERSION_TO_EXTRACT                10
61 #define COMPRESSION_METHOD_STORED             0   // no compression
62 #define COMPRESSION_METHOD_DEFLATED           8
63 
64 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3)
65 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11)
66 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1))
67 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \
68   (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \
69   | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \
70   | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED)
71 
72 namespace devtools_ijar {
73 // In the absence of ZIP64 support, zip files are limited to 4GB.
74 // http://www.info-zip.org/FAQ.html#limits
75 static const size_t kMaximumOutputSize = std::numeric_limits<uint32_t>::max();
76 
77 static const u4 kDefaultTimestamp =
78     30 << 25 | 1 << 21 | 1 << 16;  // January 1, 2010 in DOS time
79 
80 //
81 // A class representing a ZipFile for reading. Its public API is exposed
82 // using the ZipExtractor abstract class.
83 //
84 class InputZipFile : public ZipExtractor {
85  public:
86   InputZipFile(ZipExtractorProcessor *processor, const char* filename);
87   virtual ~InputZipFile();
88 
GetError()89   virtual const char* GetError() {
90     if (errmsg[0] == 0) {
91       return NULL;
92     }
93     return errmsg;
94   }
95 
96   bool Open();
97   virtual bool ProcessNext();
98   virtual void Reset();
GetSize()99   virtual size_t GetSize() {
100     return input_file_->Length();
101   }
102 
103   virtual u8 CalculateOutputLength();
104 
105   virtual bool ProcessCentralDirEntry(const u1 *&p, size_t *compressed_size,
106                                       size_t *uncompressed_size, char *filename,
107                                       size_t filename_size, u4 *attr,
108                                       u4 *offset);
109 
110  private:
111   ZipExtractorProcessor *processor;
112   const char* filename_;
113   MappedInputFile *input_file_;
114 
115   // InputZipFile is responsible for maintaining the following
116   // pointers. They are allocated by the Create() method before
117   // the object is actually created using mmap.
118   const u1 * zipdata_in_;   // start of input file mmap
119   size_t bytes_unmapped_;         // bytes that have already been unmapped
120   const u1 * central_dir_;  // central directory in input file
121 
122   size_t in_offset_;  // offset  the input file
123 
124   const u1 *p;  // input cursor
125 
126   const u1* central_dir_current_;  // central dir input cursor
127 
128   // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every
129   // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is
130   // not enough, we bail out. We only decompress class files, so they should
131   // be smaller than 64K anyway, but we give a little leeway.
132   // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the
133   // ZIP. It is set to 2GB here because no one has audited the code for 64-bit
134   // cleanliness.
135   static const size_t INITIAL_BUFFER_SIZE = 256 * 1024;  // 256K
136   static const size_t MAX_BUFFER_SIZE = std::numeric_limits<int32_t>::max();
137   static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024;
138 
139   // These metadata fields are the fields of the ZIP header of the file being
140   // processed.
141   u2 extract_version_;
142   u2 general_purpose_bit_flag_;
143   u2 compression_method_;
144   u4 uncompressed_size_;
145   u4 compressed_size_;
146   u2 file_name_length_;
147   u2 extra_field_length_;
148   const u1 *file_name_;
149   const u1 *extra_field_;
150 
151   // Copy of the last filename entry - Null-terminated.
152   char filename[PATH_MAX];
153   // The external file attribute field
154   u4 attr;
155 
156   // last error
157   char errmsg[4*PATH_MAX];
158 
159   Decompressor *decompressor_;
160 
error(const char * fmt,...)161   int error(const char *fmt, ...) {
162     va_list ap;
163     va_start(ap, fmt);
164     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
165     va_end(ap);
166     return -1;
167   }
168 
169   // Check that at least n bytes remain in the input file, otherwise
170   // abort with an error message.  "state" is the name of the field
171   // we're about to read, for diagnostics.
EnsureRemaining(size_t n,const char * state)172   int EnsureRemaining(size_t n, const char *state) {
173     size_t in_offset = p - zipdata_in_;
174     size_t remaining = input_file_->Length() - in_offset;
175     if (n > remaining) {
176       return error("Premature end of file (at offset %zd, state=%s); "
177                    "expected %zd more bytes but found %zd.\n",
178                    in_offset, state, n, remaining);
179     }
180     return 0;
181   }
182 
183   // Read one entry from input zip file
184   int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size);
185 
186   // Uncompress a file from the archive using zlib. The pointer returned
187   // is owned by InputZipFile, so it must not be freed. Advances the input
188   // cursor to the first byte after the compressed data.
189   u1* UncompressFile();
190 
191   // Skip a file
192   int SkipFile(const bool compressed);
193 
194   // Process a file
195   int ProcessFile(const bool compressed);
196 };
197 
198 //
199 // A class implementing ZipBuilder that represent an open zip file for writing.
200 //
201 class OutputZipFile : public ZipBuilder {
202  public:
OutputZipFile(const char * filename,size_t estimated_size)203   OutputZipFile(const char *filename, size_t estimated_size)
204       : output_file_(NULL),
205         filename_(filename),
206         estimated_size_(estimated_size),
207         finished_(false) {
208     errmsg[0] = 0;
209   }
210 
GetError()211   virtual const char* GetError() {
212     if (errmsg[0] == 0) {
213       return NULL;
214     }
215     return errmsg;
216   }
217 
~OutputZipFile()218   virtual ~OutputZipFile() { Finish(); }
219   virtual u1* NewFile(const char* filename, const u4 attr);
220   virtual int FinishFile(size_t filelength, bool compress = false,
221                          bool compute_crc = false);
222   virtual int WriteEmptyFile(const char *filename);
GetSize()223   virtual size_t GetSize() {
224     return Offset(q);
225   }
GetNumberFiles()226   virtual int GetNumberFiles() {
227     return entries_.size();
228   }
229   virtual int Finish();
230   bool Open();
231 
232  private:
233   struct LocalFileEntry {
234     // Start of the local header (in the output buffer).
235     size_t local_header_offset;
236 
237     // Sizes of the file entry
238     size_t uncompressed_length;
239     size_t compressed_length;
240 
241     // Compression method
242     u2 compression_method;
243 
244     // CRC32
245     u4 crc32;
246 
247     // external attributes field
248     u4 external_attr;
249 
250     // Start/length of the file_name in the local header.
251     u1 *file_name;
252     u2 file_name_length;
253 
254     // Start/length of the extra_field in the local header.
255     const u1 *extra_field;
256     u2 extra_field_length;
257   };
258 
259   MappedOutputFile* output_file_;
260   const char* filename_;
261   size_t estimated_size_;
262   bool finished_;
263 
264   // OutputZipFile is responsible for maintaining the following
265   // pointers. They are allocated by the Create() method before
266   // the object is actually created using mmap.
267   u1 *zipdata_out_;        // start of output file mmap
268   u1 *q;  // output cursor
269 
270   u1 *header_ptr;  // Current pointer to "compression method" entry.
271 
272   // List of entries to write the central directory
273   std::vector<LocalFileEntry*> entries_;
274 
275   // last error
276   char errmsg[4*PATH_MAX];
277 
error(const char * fmt,...)278   int error(const char *fmt, ...) {
279     va_list ap;
280     va_start(ap, fmt);
281     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
282     va_end(ap);
283     return -1;
284   }
285 
286   // Write the ZIP central directory structure for each local file
287   // entry in "entries".
288   void WriteCentralDirectory();
289 
290   // Returns the offset of the pointer relative to the start of the
291   // output zip file.
Offset(const u1 * const x)292   size_t Offset(const u1 *const x) {
293     return x - zipdata_out_;
294   }
295 
296   // Write ZIP file header in the output. Since the compressed size is not
297   // known in advance, it must be recorded later. This method returns a pointer
298   // to "compressed size" in the file header that should be passed to
299   // WriteFileSizeInLocalFileHeader() later.
300   u1* WriteLocalFileHeader(const char *filename, const u4 attr);
301 
302   // Fill in the "compressed size" and "uncompressed size" fields in a local
303   // file header previously written by WriteLocalFileHeader().
304   size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr,
305                                         size_t out_length,
306                                         bool compress = false,
307                                         const u4 crc = 0);
308 };
309 
310 //
311 // Implementation of InputZipFile
312 //
ProcessNext()313 bool InputZipFile::ProcessNext() {
314   // Process the next entry in the central directory. Also make sure that the
315   // content pointer is in sync.
316   size_t compressed, uncompressed;
317   u4 offset;
318   if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed,
319                               filename, PATH_MAX, &attr, &offset)) {
320     return false;
321   }
322 
323   // There might be an offset specified in the central directory that does
324   // not match the file offset, if so, correct the pointer.
325   if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) {
326     p = zipdata_in_ + offset;
327   }
328 
329   if (EnsureRemaining(4, "signature") < 0) {
330     return false;
331   }
332   u4 signature = get_u4le(p);
333   if (signature == LOCAL_FILE_HEADER_SIGNATURE) {
334     if (ProcessLocalFileEntry(compressed, uncompressed) < 0) {
335       return false;
336     }
337   } else {
338     error("local file header signature for file %s not found\n", filename);
339     return false;
340   }
341 
342   return true;
343 }
344 
ProcessLocalFileEntry(size_t compressed_size,size_t uncompressed_size)345 int InputZipFile::ProcessLocalFileEntry(
346     size_t compressed_size, size_t uncompressed_size) {
347   if (EnsureRemaining(26, "extract_version") < 0) {
348     return -1;
349   }
350   extract_version_ = get_u2le(p);
351   general_purpose_bit_flag_ = get_u2le(p);
352 
353   if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) {
354     return error("Unsupported value (0x%04x) in general purpose bit flag.\n",
355                  general_purpose_bit_flag_);
356   }
357 
358   compression_method_ = get_u2le(p);
359 
360   if (compression_method_ != COMPRESSION_METHOD_DEFLATED &&
361       compression_method_ != COMPRESSION_METHOD_STORED) {
362     return error("Unsupported compression method (%d).\n",
363                  compression_method_);
364   }
365 
366   // skip over: last_mod_file_time, last_mod_file_date, crc32
367   p += 2 + 2 + 4;
368   compressed_size_ = get_u4le(p);
369   uncompressed_size_ = get_u4le(p);
370   file_name_length_ = get_u2le(p);
371   extra_field_length_ = get_u2le(p);
372 
373   if (EnsureRemaining(file_name_length_, "file_name") < 0) {
374     return -1;
375   }
376   file_name_ = p;
377   p += file_name_length_;
378 
379   if (EnsureRemaining(extra_field_length_, "extra_field") < 0) {
380     return -1;
381   }
382   extra_field_ = p;
383   p += extra_field_length_;
384 
385   bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED;
386 
387   // If the zip is compressed, compressed and uncompressed size members are
388   // zero in the local file header. If not, check that they are the same as the
389   // lengths from the central directory, otherwise, just believe the central
390   // directory
391   if (compressed_size_ == 0) {
392     compressed_size_ = compressed_size;
393   } else {
394     if (compressed_size_ != compressed_size) {
395       return error("central directory and file header inconsistent\n");
396     }
397   }
398 
399   if (uncompressed_size_ == 0) {
400     uncompressed_size_ = uncompressed_size;
401   } else {
402     if (uncompressed_size_ != uncompressed_size) {
403       return error("central directory and file header inconsistent\n");
404     }
405   }
406 
407   if (processor->Accept(filename, attr)) {
408     if (ProcessFile(is_compressed) < 0) {
409       return -1;
410     }
411   } else {
412     if (SkipFile(is_compressed) < 0) {
413       return -1;
414     }
415   }
416 
417   if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) {
418     // Skip the data descriptor. Some implementations do not put the signature
419     // here, so check if the next 4 bytes are a signature, and if so, skip the
420     // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip
421     // the next 8 bytes (because the value just read was the CRC).
422     u4 signature = get_u4le(p);
423     if (signature == DATA_DESCRIPTOR_SIGNATURE) {
424       p += 4 * 3;
425     } else {
426       p += 4 * 2;
427     }
428   }
429 
430   size_t bytes_processed = p - zipdata_in_;
431   if (bytes_processed > bytes_unmapped_ + MAX_MAPPED_REGION) {
432     input_file_->Discard(MAX_MAPPED_REGION);
433     bytes_unmapped_ += MAX_MAPPED_REGION;
434   }
435 
436   return 0;
437 }
438 
SkipFile(const bool compressed)439 int InputZipFile::SkipFile(const bool compressed) {
440   if (!compressed) {
441     // In this case, compressed_size_ == uncompressed_size_ (since the file is
442     // uncompressed), so we can use either.
443     if (compressed_size_ != uncompressed_size_) {
444       return error("compressed size != uncompressed size, although the file "
445                    "is uncompressed.\n");
446     }
447   }
448 
449   if (EnsureRemaining(compressed_size_, "file_data") < 0) {
450     return -1;
451   }
452   p += compressed_size_;
453   return 0;
454 }
455 
UncompressFile()456 u1* InputZipFile::UncompressFile() {
457   size_t in_offset = p - zipdata_in_;
458   size_t remaining = input_file_->Length() - in_offset;
459   DecompressedFile *decompressed_file =
460       decompressor_->UncompressFile(p, remaining);
461   if (decompressed_file == NULL) {
462     if (decompressor_->GetError() != NULL) {
463       error(decompressor_->GetError());
464     }
465     return NULL;
466   } else {
467     compressed_size_ = decompressed_file->compressed_size;
468     uncompressed_size_ = decompressed_file->uncompressed_size;
469     u1 *uncompressed_data = decompressed_file->uncompressed_data;
470     free(decompressed_file);
471     p += compressed_size_;
472     return uncompressed_data;
473   }
474 }
475 
ProcessFile(const bool compressed)476 int InputZipFile::ProcessFile(const bool compressed) {
477   const u1 *file_data;
478   if (compressed) {
479     file_data = UncompressFile();
480     if (file_data == NULL) {
481       return -1;
482     }
483   } else {
484     // In this case, compressed_size_ == uncompressed_size_ (since the file is
485     // uncompressed), so we can use either.
486     if (compressed_size_ != uncompressed_size_) {
487       return error("compressed size != uncompressed size, although the file "
488                    "is uncompressed.\n");
489     }
490 
491     if (EnsureRemaining(compressed_size_, "file_data") < 0) {
492       return -1;
493     }
494     file_data = p;
495     p += compressed_size_;
496   }
497   processor->Process(filename, attr, file_data, uncompressed_size_);
498   return 0;
499 }
500 
501 
502 // Reads and returns some metadata of the next file from the central directory:
503 // - compressed size
504 // - uncompressed size
505 // - whether the entry is a class file (to be included in the output).
506 // Precondition: p points to the beginning of an entry in the central dir
507 // Postcondition: p points to the beginning of the next entry in the central dir
508 // Returns true if the central directory contains another file and false if not.
509 // Of course, in the latter case, the size output variables are not changed.
510 // Note that the central directory is always followed by another data structure
511 // that has a signature, so parsing it this way is safe.
ProcessCentralDirEntry(const u1 * & p,size_t * compressed_size,size_t * uncompressed_size,char * filename,size_t filename_size,u4 * attr,u4 * offset)512 bool InputZipFile::ProcessCentralDirEntry(const u1 *&p, size_t *compressed_size,
513                                           size_t *uncompressed_size,
514                                           char *filename, size_t filename_size,
515                                           u4 *attr, u4 *offset) {
516   u4 signature = get_u4le(p);
517 
518   if (signature != CENTRAL_FILE_HEADER_SIGNATURE) {
519     if (signature != DIGITAL_SIGNATURE && signature != EOCD_SIGNATURE &&
520         signature != ZIP64_EOCD_SIGNATURE) {
521       error("invalid central file header signature: 0x%x\n", signature);
522     }
523     return false;
524   }
525 
526   p += 16;  // skip to 'compressed size' field
527   *compressed_size = get_u4le(p);
528   *uncompressed_size = get_u4le(p);
529   u2 file_name_length = get_u2le(p);
530   u2 extra_field_length = get_u2le(p);
531   u2 file_comment_length = get_u2le(p);
532   p += 4;  // skip to external file attributes field
533   *attr = get_u4le(p);
534   *offset = get_u4le(p);
535   {
536     size_t len = (file_name_length < filename_size)
537       ? file_name_length
538       : (filename_size - 1);
539     memcpy(reinterpret_cast<void*>(filename), p, len);
540     filename[len] = 0;
541   }
542   p += file_name_length;
543   p += extra_field_length;
544   p += file_comment_length;
545   return true;
546 }
547 
548 // Gives a maximum bound on the size of the interface JAR. Basically, adds
549 // the difference between the compressed and uncompressed sizes to the size
550 // of the input file.
CalculateOutputLength()551 u8 InputZipFile::CalculateOutputLength() {
552   const u1* current = central_dir_;
553 
554   u8 compressed_size = 0;
555   u8 uncompressed_size = 0;
556   u8 skipped_compressed_size = 0;
557   u4 attr;
558   u4 offset;
559   char filename[PATH_MAX];
560 
561   while (true) {
562     size_t file_compressed, file_uncompressed;
563     if (!ProcessCentralDirEntry(current,
564                                 &file_compressed, &file_uncompressed,
565                                 filename, PATH_MAX, &attr, &offset)) {
566       break;
567     }
568 
569     if (processor->Accept(filename, attr)) {
570       compressed_size += (u8) file_compressed;
571       uncompressed_size += (u8) file_uncompressed;
572     } else {
573       skipped_compressed_size += file_compressed;
574     }
575   }
576 
577   // The worst case is when the output is simply the input uncompressed. The
578   // metadata in the zip file will stay the same, so the file will grow by the
579   // difference between the compressed and uncompressed sizes.
580   return (u8) input_file_->Length() - skipped_compressed_size
581       + (uncompressed_size - compressed_size);
582 }
583 
584 // An end of central directory record, sized for optional zip64 contents.
585 struct EndOfCentralDirectoryRecord {
586   u4 number_of_this_disk;
587   u4 disk_with_central_dir;
588   u8 central_dir_entries_on_this_disk;
589   u8 central_dir_entries;
590   u8 central_dir_size;
591   u8 central_dir_offset;
592 };
593 
594 // Checks for a zip64 end of central directory record. If a valid zip64 EOCD is
595 // found, updates the original EOCD record and returns true.
MaybeReadZip64CentralDirectory(const u1 * bytes,size_t,const u1 * current,const u1 ** end_of_central_dir,EndOfCentralDirectoryRecord * cd)596 bool MaybeReadZip64CentralDirectory(const u1 *bytes, size_t /*in_length*/,
597                                     const u1 *current,
598                                     const u1 **end_of_central_dir,
599                                     EndOfCentralDirectoryRecord *cd) {
600   if (current < bytes) {
601     return false;
602   }
603   const u1 *candidate = current;
604   u4 zip64_directory_signature = get_u4le(current);
605   if (zip64_directory_signature != ZIP64_EOCD_SIGNATURE) {
606     return false;
607   }
608 
609   // size of zip64 end of central directory record
610   // (fixed size unless there's a zip64 extensible data sector, which
611   // we don't need to read)
612   get_u8le(current);
613   get_u2be(current);  // version made by
614   get_u2be(current);  // version needed to extract
615 
616   u4 number_of_this_disk = get_u4be(current);
617   u4 disk_with_central_dir = get_u4le(current);
618   u8 central_dir_entries_on_this_disk = get_u8le(current);
619   u8 central_dir_entries = get_u8le(current);
620   u8 central_dir_size = get_u8le(current);
621   u8 central_dir_offset = get_u8le(current);
622 
623   // check for a zip64 EOCD that matches the regular EOCD
624   if (number_of_this_disk != cd->number_of_this_disk &&
625       cd->number_of_this_disk != U2_MAX) {
626     return false;
627   }
628   if (disk_with_central_dir != cd->disk_with_central_dir &&
629       cd->disk_with_central_dir != U2_MAX) {
630     return false;
631   }
632   if (central_dir_entries_on_this_disk !=
633           cd->central_dir_entries_on_this_disk &&
634       cd->central_dir_entries_on_this_disk != U2_MAX) {
635     return false;
636   }
637   if (central_dir_entries != cd->central_dir_entries &&
638       cd->central_dir_entries != U2_MAX) {
639     return false;
640   }
641   if (central_dir_size != cd->central_dir_size &&
642       cd->central_dir_size != U4_MAX) {
643     return false;
644   }
645   if (central_dir_offset != cd->central_dir_offset &&
646       cd->central_dir_offset != U4_MAX) {
647     return false;
648   }
649 
650   *end_of_central_dir = candidate;
651   cd->number_of_this_disk = number_of_this_disk;
652   cd->disk_with_central_dir = disk_with_central_dir;
653   cd->central_dir_entries_on_this_disk = central_dir_entries_on_this_disk;
654   cd->central_dir_entries = central_dir_entries;
655   cd->central_dir_size = central_dir_size;
656   cd->central_dir_offset = central_dir_offset;
657   return true;
658 }
659 
660 // Starting from the end of central directory record, attempts to locate a zip64
661 // end of central directory record. If found, updates the given record and
662 // offset with the zip64 data. Returns false on error.
FindZip64CentralDirectory(const u1 * bytes,size_t in_length,const u1 ** end_of_central_dir,EndOfCentralDirectoryRecord * cd)663 bool FindZip64CentralDirectory(const u1 *bytes, size_t in_length,
664                                const u1 **end_of_central_dir,
665                                EndOfCentralDirectoryRecord *cd) {
666   // In the absence of a zip64 extensible data sector, the zip64 EOCD is at a
667   // fixed offset from the regular central directory.
668   if (MaybeReadZip64CentralDirectory(
669           bytes, in_length,
670           *end_of_central_dir - ZIP64_EOCD_LOCATOR_SIZE - ZIP64_EOCD_FIXED_SIZE,
671           end_of_central_dir, cd)) {
672     return true;
673   }
674 
675   // If we couldn't find a zip64 EOCD at a fixed offset, either it doesn't exist
676   // or there was a zip64 extensible data sector, so try going through the
677   // locator. This approach doesn't work if data was prepended to the archive
678   // without updating the offset in the locator.
679   const u1 *zip64_locator = *end_of_central_dir - ZIP64_EOCD_LOCATOR_SIZE;
680   if (zip64_locator - ZIP64_EOCD_FIXED_SIZE < bytes) {
681     return true;
682   }
683   u4 zip64_locator_signature = get_u4le(zip64_locator);
684   if (zip64_locator_signature != ZIP64_EOCD_LOCATOR_SIGNATURE) {
685     return true;
686   }
687   u4 disk_with_zip64_central_directory = get_u4le(zip64_locator);
688   u8 zip64_end_of_central_dir_offset = get_u8le(zip64_locator);
689   u4 zip64_total_disks = get_u4le(zip64_locator);
690   if (MaybeReadZip64CentralDirectory(bytes, in_length,
691                                      bytes + zip64_end_of_central_dir_offset,
692                                      end_of_central_dir, cd)) {
693     if (disk_with_zip64_central_directory != 0 || zip64_total_disks != 1) {
694       fprintf(stderr, "multi-disk JAR files are not supported\n");
695       return false;
696     }
697     return true;
698   }
699   return true;
700 }
701 
702 // Given the data in the zip file, returns the offset of the central directory
703 // and the number of files contained in it.
FindZipCentralDirectory(const u1 * bytes,size_t in_length,u4 * offset,const u1 ** central_dir)704 bool FindZipCentralDirectory(const u1 *bytes, size_t in_length, u4 *offset,
705                              const u1 **central_dir) {
706   static const int MAX_COMMENT_LENGTH = 0xffff;
707   static const int CENTRAL_DIR_LOCATOR_SIZE = 22;
708   // Maximum distance of start of central dir locator from end of file
709   static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE;
710   const u1* last_pos_to_check = in_length < MAX_DELTA
711       ? bytes
712       : bytes + (in_length - MAX_DELTA);
713   const u1* current;
714   bool found = false;
715 
716   for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE;
717        current >= last_pos_to_check;
718        current-- ) {
719     const u1* p = current;
720     if (get_u4le(p) != EOCD_SIGNATURE) {
721       continue;
722     }
723 
724     p += 16;  // skip to comment length field
725     u2 comment_length = get_u2le(p);
726 
727     // Does the comment go exactly till the end of the file?
728     if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE
729         != bytes + in_length) {
730       continue;
731     }
732 
733     // Hooray, we found it!
734     found = true;
735     break;
736   }
737 
738   if (!found) {
739     fprintf(stderr, "file is invalid or corrupted (missing end of central "
740                     "directory record)\n");
741     return false;
742   }
743 
744   EndOfCentralDirectoryRecord cd;
745   const u1* end_of_central_dir = current;
746   get_u4le(current);  // central directory locator signature, already checked
747   cd.number_of_this_disk = get_u2le(current);
748   cd.disk_with_central_dir = get_u2le(current);
749   cd.central_dir_entries_on_this_disk = get_u2le(current);
750   cd.central_dir_entries = get_u2le(current);
751   cd.central_dir_size = get_u4le(current);
752   cd.central_dir_offset = get_u4le(current);
753   u2 file_comment_length = get_u2le(current);
754   current += file_comment_length;  // set current to the end of the central dir
755 
756   if (!FindZip64CentralDirectory(bytes, in_length, &end_of_central_dir, &cd)) {
757     return false;
758   }
759 
760   if (cd.number_of_this_disk != 0 || cd.disk_with_central_dir != 0 ||
761       cd.central_dir_entries_on_this_disk != cd.central_dir_entries) {
762     fprintf(stderr, "multi-disk JAR files are not supported\n");
763     return false;
764   }
765 
766   // Do not change output values before determining that they are OK.
767   *offset = cd.central_dir_offset;
768   // Central directory start can then be used to determine the actual
769   // starts of the zip file (which can be different in case of a non-zip
770   // header like for auto-extractable binaries).
771   *central_dir = end_of_central_dir - cd.central_dir_size;
772   return true;
773 }
774 
Reset()775 void InputZipFile::Reset() {
776   central_dir_current_ = central_dir_;
777   bytes_unmapped_ = 0;
778   p = zipdata_in_ + in_offset_;
779 }
780 
ProcessAll()781 int ZipExtractor::ProcessAll() {
782   while (ProcessNext()) {}
783   if (GetError() != NULL) {
784     return -1;
785   }
786   return 0;
787 }
788 
Create(const char * filename,ZipExtractorProcessor * processor)789 ZipExtractor* ZipExtractor::Create(const char* filename,
790                                    ZipExtractorProcessor *processor) {
791   InputZipFile* result = new InputZipFile(processor, filename);
792   if (!result->Open()) {
793     fprintf(stderr, "Opening zip \"%s\": %s\n", filename, result->GetError());
794     delete result;
795     return NULL;
796   }
797 
798   return result;
799 }
800 
801 // zipdata_in_, in_offset_, p, central_dir_current_
802 
InputZipFile(ZipExtractorProcessor * processor,const char * filename)803 InputZipFile::InputZipFile(ZipExtractorProcessor *processor,
804                            const char* filename)
805     : processor(processor), filename_(filename), input_file_(NULL),
806       bytes_unmapped_(0) {
807   decompressor_ = new Decompressor();
808   errmsg[0] = 0;
809 }
810 
Open()811 bool InputZipFile::Open() {
812   MappedInputFile* input_file = new MappedInputFile(filename_);
813   if (!input_file->Opened()) {
814     snprintf(errmsg, sizeof(errmsg), "%s", input_file->Error());
815     delete input_file;
816     return false;
817   }
818 
819   void *zipdata_in = input_file->Buffer();
820   u4 central_dir_offset;
821   const u1 *central_dir = NULL;
822 
823   if (!devtools_ijar::FindZipCentralDirectory(
824           static_cast<const u1*>(zipdata_in), input_file->Length(),
825           &central_dir_offset, &central_dir)) {
826     errno = EIO;  // we don't really have a good error number
827     error("Cannot find central directory");
828     delete input_file;
829     return false;
830   }
831   const u1 *zipdata_start = static_cast<const u1*>(zipdata_in);
832   in_offset_ = - static_cast<off_t>(zipdata_start
833                                     + central_dir_offset
834                                     - central_dir);
835 
836   input_file_ = input_file;
837   zipdata_in_ = zipdata_start;
838   central_dir_ = central_dir;
839   central_dir_current_ = central_dir;
840   p = zipdata_in_ + in_offset_;
841   errmsg[0] = 0;
842   return true;
843 }
844 
~InputZipFile()845 InputZipFile::~InputZipFile() {
846   delete decompressor_;
847   if (input_file_ != NULL) {
848     input_file_->Close();
849     delete input_file_;
850   }
851 }
852 
853 
854 //
855 // Implementation of OutputZipFile
856 //
WriteEmptyFile(const char * filename)857 int OutputZipFile::WriteEmptyFile(const char *filename) {
858   const u1* file_name = (const u1*) filename;
859   size_t file_name_length = strlen(filename);
860 
861   LocalFileEntry *entry = new LocalFileEntry;
862   entry->local_header_offset = Offset(q);
863   entry->external_attr = 0;
864   entry->crc32 = 0;
865 
866   // Output the ZIP local_file_header:
867   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
868   put_u2le(q, 10);  // extract_version
869   put_u2le(q, 0);  // general_purpose_bit_flag
870   put_u2le(q, 0);  // compression_method
871   put_u4le(q, kDefaultTimestamp);  // last_mod_file date and time
872   put_u4le(q, entry->crc32);  // crc32
873   put_u4le(q, 0);  // compressed_size
874   put_u4le(q, 0);  // uncompressed_size
875   put_u2le(q, file_name_length);
876   put_u2le(q, 0);  // extra_field_length
877   put_n(q, file_name, file_name_length);
878 
879   entry->file_name_length = file_name_length;
880   entry->extra_field_length = 0;
881   entry->compressed_length = 0;
882   entry->uncompressed_length = 0;
883   entry->compression_method = 0;
884   entry->extra_field = (const u1 *)"";
885   entry->file_name = (u1*) strdup((const char *) file_name);
886   entries_.push_back(entry);
887 
888   return 0;
889 }
890 
WriteCentralDirectory()891 void OutputZipFile::WriteCentralDirectory() {
892   // central directory:
893   const u1 *central_directory_start = q;
894   for (size_t ii = 0; ii < entries_.size(); ++ii) {
895     LocalFileEntry *entry = entries_[ii];
896     put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE);
897     put_u2le(q, UNIX_ZIP_FILE_VERSION);
898 
899     put_u2le(q, ZIP_VERSION_TO_EXTRACT);  // version to extract
900     put_u2le(q, 0);  // general purpose bit flag
901     put_u2le(q, entry->compression_method);  // compression method:
902     put_u4le(q, kDefaultTimestamp);          // last_mod_file date and time
903     put_u4le(q, entry->crc32);  // crc32
904     put_u4le(q, entry->compressed_length);    // compressed_size
905     put_u4le(q, entry->uncompressed_length);  // uncompressed_size
906     put_u2le(q, entry->file_name_length);
907     put_u2le(q, entry->extra_field_length);
908 
909     put_u2le(q, 0);  // file comment length
910     put_u2le(q, 0);  // disk number start
911     put_u2le(q, 0);  // internal file attributes
912     put_u4le(q, entry->external_attr);  // external file attributes
913     // relative offset of local header:
914     put_u4le(q, entry->local_header_offset);
915 
916     put_n(q, entry->file_name, entry->file_name_length);
917     put_n(q, entry->extra_field, entry->extra_field_length);
918   }
919   u8 central_directory_size = q - central_directory_start;
920 
921   if (entries_.size() > U2_MAX || central_directory_size > U4_MAX ||
922       Offset(central_directory_start) > U4_MAX) {
923     u1 *zip64_end_of_central_directory_start = q;
924 
925     put_u4le(q, ZIP64_EOCD_SIGNATURE);
926     // signature and size field doesn't count towards size
927     put_u8le(q, ZIP64_EOCD_FIXED_SIZE - 12);
928     put_u2le(q, UNIX_ZIP_FILE_VERSION);  // version made by
929     put_u2le(q, 0);  // version needed to extract
930     put_u4le(q, 0);  // number of this disk
931     put_u4le(q, 0);  // # of the disk with the start of the central directory
932     put_u8le(q, entries_.size());  // # central dir entries on this disk
933     put_u8le(q, entries_.size());  // total # entries in the central directory
934     put_u8le(q, central_directory_size);  // size of the central directory
935     // offset of start of central directory wrt starting disk
936     put_u8le(q, Offset(central_directory_start));
937 
938     put_u4le(q, ZIP64_EOCD_LOCATOR_SIGNATURE);
939     // number of the disk with the start of the zip64 end of central directory
940     put_u4le(q, 0);
941     // relative offset of the zip64 end of central directory record
942     put_u8le(q, Offset(zip64_end_of_central_directory_start));
943     // total number of disks
944     put_u4le(q, 1);
945 
946     put_u4le(q, EOCD_SIGNATURE);
947     put_u2le(q, 0);  // number of this disk
948     put_u2le(q, 0);  // # of disk with the start of the central directory
949     // # central dir entries on this disk
950     put_u2le(q, entries_.size() > 0xffff ? 0xffff : entries_.size());
951     // total # entries in the central directory
952     put_u2le(q, entries_.size() > 0xffff ? 0xffff : entries_.size());
953     // size of the central directory
954     put_u4le(q,
955              central_directory_size > U4_MAX ? U4_MAX : central_directory_size);
956     // offset of start of central
957     put_u4le(q, Offset(central_directory_start) > U4_MAX
958                     ? U4_MAX
959                     : Offset(central_directory_start));
960     put_u2le(q, 0);  // .ZIP file comment length
961 
962   } else {
963     put_u4le(q, EOCD_SIGNATURE);
964     put_u2le(q, 0);  // number of this disk
965     put_u2le(q, 0);  // # of the disk with the start of the central directory
966     put_u2le(q, entries_.size());  // # central dir entries on this disk
967     put_u2le(q, entries_.size());  // total # entries in the central directory
968     put_u4le(q, central_directory_size);  // size of the central directory
969     // offset of start of central directory wrt starting disk
970     put_u4le(q, Offset(central_directory_start));
971     put_u2le(q, 0);  // .ZIP file comment length
972   }
973 }
974 
WriteLocalFileHeader(const char * filename,const u4 attr)975 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) {
976   off_t file_name_length_ = strlen(filename);
977   LocalFileEntry *entry = new LocalFileEntry;
978   entry->local_header_offset = Offset(q);
979   entry->file_name_length = file_name_length_;
980   entry->file_name = new u1[file_name_length_];
981   entry->external_attr = attr;
982   memcpy(entry->file_name, filename, file_name_length_);
983   entry->extra_field_length = 0;
984   entry->extra_field = (const u1 *)"";
985   entry->crc32 = 0;
986 
987   // Output the ZIP local_file_header:
988   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
989   put_u2le(q, ZIP_VERSION_TO_EXTRACT);     // version to extract
990   put_u2le(q, 0);                          // general purpose bit flag
991   u1 *header_ptr = q;
992   put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method = placeholder
993   put_u4le(q, kDefaultTimestamp);          // last_mod_file date and time
994   put_u4le(q, entry->crc32);               // crc32
995   put_u4le(q, 0);  // compressed_size = placeholder
996   put_u4le(q, 0);  // uncompressed_size = placeholder
997   put_u2le(q, entry->file_name_length);
998   put_u2le(q, entry->extra_field_length);
999 
1000   put_n(q, entry->file_name, entry->file_name_length);
1001   put_n(q, entry->extra_field, entry->extra_field_length);
1002   entries_.push_back(entry);
1003 
1004   return header_ptr;
1005 }
1006 
WriteFileSizeInLocalFileHeader(u1 * header_ptr,size_t out_length,bool compress,const u4 crc)1007 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr,
1008                                                      size_t out_length,
1009                                                      bool compress,
1010                                                      const u4 crc) {
1011   size_t compressed_size = out_length;
1012   if (compress) {
1013     compressed_size = TryDeflate(q, out_length);
1014   }
1015   // compression method
1016   if (compressed_size < out_length) {
1017     put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED);
1018   } else {
1019     put_u2le(header_ptr, COMPRESSION_METHOD_STORED);
1020   }
1021   header_ptr += 4;
1022   put_u4le(header_ptr, crc);              // crc32
1023   put_u4le(header_ptr, compressed_size);  // compressed_size
1024   put_u4le(header_ptr, out_length);       // uncompressed_size
1025   return compressed_size;
1026 }
1027 
Finish()1028 int OutputZipFile::Finish() {
1029   if (finished_) {
1030     return 0;
1031   }
1032 
1033   finished_ = true;
1034   WriteCentralDirectory();
1035   if (output_file_->Close(GetSize()) < 0) {
1036     return error("%s", output_file_->Error());
1037   }
1038   delete output_file_;
1039   output_file_ = NULL;
1040   return 0;
1041 }
1042 
NewFile(const char * filename,const u4 attr)1043 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) {
1044   header_ptr = WriteLocalFileHeader(filename, attr);
1045   return q;
1046 }
1047 
FinishFile(size_t filelength,bool compress,bool compute_crc)1048 int OutputZipFile::FinishFile(size_t filelength, bool compress,
1049                               bool compute_crc) {
1050   u4 crc = 0;
1051   if (compute_crc) {
1052     crc = ComputeCrcChecksum(q, filelength);
1053 
1054     if (filelength > 0 && crc == 0) {
1055       fprintf(stderr, "Error calculating CRC32 checksum.\n");
1056       return -1;
1057     }
1058   }
1059   size_t compressed_size =
1060       WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc);
1061 
1062   if (compressed_size == 0 && filelength > 0) {
1063     fprintf(stderr, "Error compressing files.\n");
1064     return -1;
1065   }
1066 
1067   entries_.back()->crc32 = crc;
1068   entries_.back()->compressed_length = compressed_size;
1069   entries_.back()->uncompressed_length = filelength;
1070   if (compressed_size < filelength) {
1071     entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED;
1072   } else {
1073     entries_.back()->compression_method = COMPRESSION_METHOD_STORED;
1074   }
1075   q += compressed_size;
1076   return 0;
1077 }
1078 
Open()1079 bool OutputZipFile::Open() {
1080   if (estimated_size_ > kMaximumOutputSize) {
1081     fprintf(stderr,
1082             "Uncompressed input jar has size %zu, "
1083             "which exceeds the maximum supported output size %zu.\n"
1084             "Assuming that ijar will be smaller and hoping for the best.\n",
1085             estimated_size_, kMaximumOutputSize);
1086     estimated_size_ = kMaximumOutputSize;
1087   }
1088 
1089   MappedOutputFile* output_file = new MappedOutputFile(
1090       filename_, estimated_size_);
1091   if (!output_file->Opened()) {
1092     snprintf(errmsg, sizeof(errmsg), "%s", output_file->Error());
1093     delete output_file;
1094     return false;
1095   }
1096 
1097   output_file_ = output_file;
1098   q = output_file->Buffer();
1099   zipdata_out_ = output_file->Buffer();
1100   return true;
1101 }
1102 
Create(const char * zip_file,size_t estimated_size)1103 ZipBuilder *ZipBuilder::Create(const char *zip_file, size_t estimated_size) {
1104   OutputZipFile* result = new OutputZipFile(zip_file, estimated_size);
1105   if (!result->Open()) {
1106     fprintf(stderr, "%s\n", result->GetError());
1107     delete result;
1108     return NULL;
1109   }
1110 
1111   return result;
1112 }
1113 
EstimateSize(char const * const * files,char const * const * zip_paths,int nb_entries)1114 u8 ZipBuilder::EstimateSize(char const* const* files,
1115                             char const* const* zip_paths,
1116                             int nb_entries) {
1117   Stat file_stat;
1118   // Digital signature field size = 6, End of central directory = 22, Total = 28
1119   u8 size = 28;
1120   // Count the size of all the files in the input to estimate the size of the
1121   // output.
1122   for (int i = 0; i < nb_entries; i++) {
1123     file_stat.total_size = 0;
1124     if (files[i] != NULL && !stat_file(files[i], &file_stat)) {
1125       fprintf(stderr, "File %s does not seem to exist.", files[i]);
1126       return 0;
1127     }
1128     size += file_stat.total_size;
1129     // Add sizes of Zip meta data
1130     // local file header = 30 bytes
1131     // data descriptor = 12 bytes
1132     // central directory descriptor = 46 bytes
1133     //    Total: 88bytes
1134     size += 88;
1135     // The filename is stored twice (once in the central directory
1136     // and once in the local file header).
1137     size += strlen((zip_paths[i] != NULL) ? zip_paths[i] : files[i]) * 2;
1138   }
1139   return size;
1140 }
1141 
1142 }  // namespace devtools_ijar
1143