1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/zlib/google/zip_reader.h"
6 
7 #include <utility>
8 
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/threading/sequenced_task_runner_handle.h"
16 #include "build/build_config.h"
17 #include "third_party/zlib/google/zip_internal.h"
18 
19 #if defined(USE_SYSTEM_MINIZIP)
20 #include <minizip/unzip.h>
21 #else
22 #include "third_party/zlib/contrib/minizip/unzip.h"
23 #if defined(OS_WIN)
24 #include "third_party/zlib/contrib/minizip/iowin32.h"
25 #endif  // defined(OS_WIN)
26 #endif  // defined(USE_SYSTEM_MINIZIP)
27 
28 namespace zip {
29 
30 namespace {
31 
32 // StringWriterDelegate --------------------------------------------------------
33 
34 // A writer delegate that writes no more than |max_read_bytes| to a given
35 // std::string.
36 class StringWriterDelegate : public WriterDelegate {
37  public:
38   StringWriterDelegate(size_t max_read_bytes, std::string* output);
39 
40   StringWriterDelegate(const StringWriterDelegate&) = delete;
41   StringWriterDelegate& operator=(const StringWriterDelegate&) = delete;
42 
43   ~StringWriterDelegate() override;
44 
45   // WriterDelegate methods:
46 
47   // Returns true.
48   bool PrepareOutput() override;
49 
50   // Appends |num_bytes| bytes from |data| to the output string. Returns false
51   // if |num_bytes| will cause the string to exceed |max_read_bytes|.
52   bool WriteBytes(const char* data, int num_bytes) override;
53 
54   void SetTimeModified(const base::Time& time) override;
55 
56  private:
57   size_t max_read_bytes_;
58   std::string* output_;
59 };
60 
StringWriterDelegate(size_t max_read_bytes,std::string * output)61 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
62                                            std::string* output)
63     : max_read_bytes_(max_read_bytes),
64       output_(output) {
65 }
66 
~StringWriterDelegate()67 StringWriterDelegate::~StringWriterDelegate() {
68 }
69 
PrepareOutput()70 bool StringWriterDelegate::PrepareOutput() {
71   return true;
72 }
73 
WriteBytes(const char * data,int num_bytes)74 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
75   if (output_->size() + num_bytes > max_read_bytes_)
76     return false;
77   output_->append(data, num_bytes);
78   return true;
79 }
80 
SetTimeModified(const base::Time & time)81 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
82   // Do nothing.
83 }
84 
85 }  // namespace
86 
87 // TODO(satorux): The implementation assumes that file names in zip files
88 // are encoded in UTF-8. This is true for zip files created by Zip()
89 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)90 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
91                                 const unz_file_info& raw_file_info)
92     : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
93       is_directory_(false),
94       is_unsafe_(false),
95       is_encrypted_(false) {
96   original_size_ = raw_file_info.uncompressed_size;
97 
98   // Directory entries in zip files end with "/".
99   is_directory_ = base::EndsWith(file_name_in_zip, "/",
100                                  base::CompareCase::INSENSITIVE_ASCII);
101 
102   // Check the file name here for directory traversal issues.
103   is_unsafe_ = file_path_.ReferencesParent();
104 
105   // We also consider that the file name is unsafe, if it's invalid UTF-8.
106   std::u16string file_name_utf16;
107   if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
108                          &file_name_utf16)) {
109     is_unsafe_ = true;
110   }
111 
112   // We also consider that the file name is unsafe, if it's absolute.
113   // On Windows, IsAbsolute() returns false for paths starting with "/".
114   if (file_path_.IsAbsolute() ||
115       base::StartsWith(file_name_in_zip, "/",
116                        base::CompareCase::INSENSITIVE_ASCII))
117     is_unsafe_ = true;
118 
119   // Whether the file is encrypted is bit 0 of the flag.
120   is_encrypted_ = raw_file_info.flag & 1;
121 
122   // Construct the last modified time. The timezone info is not present in
123   // zip files, so we construct the time as local time.
124   base::Time::Exploded exploded_time = {};  // Zero-clear.
125   exploded_time.year = raw_file_info.tmu_date.tm_year;
126   // The month in zip file is 0-based, whereas ours is 1-based.
127   exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
128   exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
129   exploded_time.hour = raw_file_info.tmu_date.tm_hour;
130   exploded_time.minute = raw_file_info.tmu_date.tm_min;
131   exploded_time.second = raw_file_info.tmu_date.tm_sec;
132   exploded_time.millisecond = 0;
133 
134   if (!base::Time::FromUTCExploded(exploded_time, &last_modified_))
135     last_modified_ = base::Time::UnixEpoch();
136 }
137 
ZipReader()138 ZipReader::ZipReader() {
139   Reset();
140 }
141 
~ZipReader()142 ZipReader::~ZipReader() {
143   Close();
144 }
145 
Open(const base::FilePath & zip_file_path)146 bool ZipReader::Open(const base::FilePath& zip_file_path) {
147   DCHECK(!zip_file_);
148 
149   // Use of "Unsafe" function does not look good, but there is no way to do
150   // this safely on Linux. See file_util.h for details.
151   zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
152   if (!zip_file_) {
153     return false;
154   }
155 
156   return OpenInternal();
157 }
158 
OpenFromPlatformFile(base::PlatformFile zip_fd)159 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
160   DCHECK(!zip_file_);
161 
162 #if defined(OS_POSIX) || defined(OS_FUCHSIA)
163   zip_file_ = internal::OpenFdForUnzipping(zip_fd);
164 #elif defined(OS_WIN)
165   zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
166 #endif
167   if (!zip_file_) {
168     return false;
169   }
170 
171   return OpenInternal();
172 }
173 
OpenFromString(const std::string & data)174 bool ZipReader::OpenFromString(const std::string& data) {
175   zip_file_ = internal::PrepareMemoryForUnzipping(data);
176   if (!zip_file_)
177     return false;
178   return OpenInternal();
179 }
180 
Close()181 void ZipReader::Close() {
182   if (zip_file_) {
183     unzClose(zip_file_);
184   }
185   Reset();
186 }
187 
HasMore()188 bool ZipReader::HasMore() {
189   return !reached_end_;
190 }
191 
AdvanceToNextEntry()192 bool ZipReader::AdvanceToNextEntry() {
193   DCHECK(zip_file_);
194 
195   // Should not go further if we already reached the end.
196   if (reached_end_)
197     return false;
198 
199   unz_file_pos position = {};
200   if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
201     return false;
202   const int current_entry_index = position.num_of_file;
203   // If we are currently at the last entry, then the next position is the
204   // end of the zip file, so mark that we reached the end.
205   if (current_entry_index + 1 == num_entries_) {
206     reached_end_ = true;
207   } else {
208     DCHECK_LT(current_entry_index + 1, num_entries_);
209     if (unzGoToNextFile(zip_file_) != UNZ_OK) {
210       return false;
211     }
212   }
213   current_entry_info_.reset();
214   return true;
215 }
216 
OpenCurrentEntryInZip()217 bool ZipReader::OpenCurrentEntryInZip() {
218   DCHECK(zip_file_);
219 
220   unz_file_info raw_file_info = {};
221   char raw_file_name_in_zip[internal::kZipMaxPath] = {};
222   const int result = unzGetCurrentFileInfo(zip_file_,
223                                            &raw_file_info,
224                                            raw_file_name_in_zip,
225                                            sizeof(raw_file_name_in_zip) - 1,
226                                            NULL,  // extraField.
227                                            0,  // extraFieldBufferSize.
228                                            NULL,  // szComment.
229                                            0);  // commentBufferSize.
230   if (result != UNZ_OK)
231     return false;
232   if (raw_file_name_in_zip[0] == '\0')
233     return false;
234   current_entry_info_.reset(
235       new EntryInfo(raw_file_name_in_zip, raw_file_info));
236   return true;
237 }
238 
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const239 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
240                                     uint64_t num_bytes_to_extract) const {
241   DCHECK(zip_file_);
242 
243   const int open_result = unzOpenCurrentFile(zip_file_);
244   if (open_result != UNZ_OK)
245     return false;
246 
247   if (!delegate->PrepareOutput())
248     return false;
249   std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
250 
251   uint64_t remaining_capacity = num_bytes_to_extract;
252   bool entire_file_extracted = false;
253 
254   while (remaining_capacity > 0) {
255     const int num_bytes_read =
256         unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
257 
258     if (num_bytes_read == 0) {
259       entire_file_extracted = true;
260       break;
261     } else if (num_bytes_read < 0) {
262       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
263       break;
264     } else if (num_bytes_read > 0) {
265       uint64_t num_bytes_to_write = std::min<uint64_t>(
266           remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
267       if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
268         break;
269       if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
270         // Ensures function returns true if the entire file has been read.
271         entire_file_extracted =
272             (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
273       }
274       CHECK_GE(remaining_capacity, num_bytes_to_write);
275       remaining_capacity -= num_bytes_to_write;
276     }
277   }
278 
279   unzCloseCurrentFile(zip_file_);
280 
281   if (entire_file_extracted &&
282       current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
283     delegate->SetTimeModified(current_entry_info()->last_modified());
284   }
285 
286   return entire_file_extracted;
287 }
288 
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)289 void ZipReader::ExtractCurrentEntryToFilePathAsync(
290     const base::FilePath& output_file_path,
291     SuccessCallback success_callback,
292     FailureCallback failure_callback,
293     const ProgressCallback& progress_callback) {
294   DCHECK(zip_file_);
295   DCHECK(current_entry_info_.get());
296 
297   // If this is a directory, just create it and return.
298   if (current_entry_info()->is_directory()) {
299     if (base::CreateDirectory(output_file_path)) {
300       base::SequencedTaskRunnerHandle::Get()->PostTask(
301           FROM_HERE, std::move(success_callback));
302     } else {
303       DVLOG(1) << "Unzip failed: unable to create directory.";
304       base::SequencedTaskRunnerHandle::Get()->PostTask(
305           FROM_HERE, std::move(failure_callback));
306     }
307     return;
308   }
309 
310   if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
311     DVLOG(1) << "Unzip failed: unable to open current zip entry.";
312     base::SequencedTaskRunnerHandle::Get()->PostTask(
313         FROM_HERE, std::move(failure_callback));
314     return;
315   }
316 
317   base::FilePath output_dir_path = output_file_path.DirName();
318   if (!base::CreateDirectory(output_dir_path)) {
319     DVLOG(1) << "Unzip failed: unable to create containing directory.";
320     base::SequencedTaskRunnerHandle::Get()->PostTask(
321         FROM_HERE, std::move(failure_callback));
322     return;
323   }
324 
325   const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
326   base::File output_file(output_file_path, flags);
327 
328   if (!output_file.IsValid()) {
329     DVLOG(1) << "Unzip failed: unable to create platform file at "
330              << output_file_path.value();
331     base::SequencedTaskRunnerHandle::Get()->PostTask(
332         FROM_HERE, std::move(failure_callback));
333     return;
334   }
335 
336   base::SequencedTaskRunnerHandle::Get()->PostTask(
337       FROM_HERE,
338       base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
339                      std::move(output_file), std::move(success_callback),
340                      std::move(failure_callback), progress_callback,
341                      0 /* initial offset */));
342 }
343 
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const344 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
345                                             std::string* output) const {
346   DCHECK(output);
347   DCHECK(zip_file_);
348 
349   if (max_read_bytes == 0) {
350     output->clear();
351     return true;
352   }
353 
354   if (current_entry_info()->is_directory()) {
355     output->clear();
356     return true;
357   }
358 
359   // The original_size() is the best hint for the real size, so it saves
360   // doing reallocations for the common case when the uncompressed size is
361   // correct. However, we need to assume that the uncompressed size could be
362   // incorrect therefore this function needs to read as much data as possible.
363   std::string contents;
364   contents.reserve(
365       static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
366                                    current_entry_info()->original_size())));
367 
368   StringWriterDelegate writer(max_read_bytes, &contents);
369   if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
370     if (contents.length() < max_read_bytes) {
371       // There was an error in extracting entry. If ExtractCurrentEntry()
372       // returns false, the entire file was not read - in which case
373       // contents.length() should equal |max_read_bytes| unless an error
374       // occurred which caused extraction to be aborted.
375       output->clear();
376     } else {
377       // |num_bytes| is less than the length of current entry.
378       output->swap(contents);
379     }
380     return false;
381   }
382   output->swap(contents);
383   return true;
384 }
385 
OpenInternal()386 bool ZipReader::OpenInternal() {
387   DCHECK(zip_file_);
388 
389   unz_global_info zip_info = {};  // Zero-clear.
390   if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
391     return false;
392   }
393   num_entries_ = zip_info.number_entry;
394   if (num_entries_ < 0)
395     return false;
396 
397   // We are already at the end if the zip file is empty.
398   reached_end_ = (num_entries_ == 0);
399   return true;
400 }
401 
Reset()402 void ZipReader::Reset() {
403   zip_file_ = NULL;
404   num_entries_ = 0;
405   reached_end_ = false;
406   current_entry_info_.reset();
407 }
408 
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)409 void ZipReader::ExtractChunk(base::File output_file,
410                              SuccessCallback success_callback,
411                              FailureCallback failure_callback,
412                              const ProgressCallback& progress_callback,
413                              const int64_t offset) {
414   char buffer[internal::kZipBufSize];
415 
416   const int num_bytes_read = unzReadCurrentFile(zip_file_,
417                                                 buffer,
418                                                 internal::kZipBufSize);
419 
420   if (num_bytes_read == 0) {
421     unzCloseCurrentFile(zip_file_);
422     std::move(success_callback).Run();
423   } else if (num_bytes_read < 0) {
424     DVLOG(1) << "Unzip failed: error while reading zipfile "
425              << "(" << num_bytes_read << ")";
426     std::move(failure_callback).Run();
427   } else {
428     if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
429       DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
430       std::move(failure_callback).Run();
431       return;
432     }
433 
434     int64_t current_progress = offset + num_bytes_read;
435 
436     progress_callback.Run(current_progress);
437 
438     base::SequencedTaskRunnerHandle::Get()->PostTask(
439         FROM_HERE,
440         base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
441                        std::move(output_file), std::move(success_callback),
442                        std::move(failure_callback), progress_callback,
443                        current_progress));
444   }
445 }
446 
447 // FileWriterDelegate ----------------------------------------------------------
448 
FileWriterDelegate(base::File * file)449 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
450 
FileWriterDelegate(std::unique_ptr<base::File> file)451 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
452     : file_(file.get()), owned_file_(std::move(file)) {}
453 
~FileWriterDelegate()454 FileWriterDelegate::~FileWriterDelegate() {
455   if (!file_->SetLength(file_length_)) {
456     DVPLOG(1) << "Failed updating length of written file";
457   }
458 }
459 
PrepareOutput()460 bool FileWriterDelegate::PrepareOutput() {
461   return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
462 }
463 
WriteBytes(const char * data,int num_bytes)464 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
465   int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
466   if (bytes_written > 0)
467     file_length_ += bytes_written;
468   return bytes_written == num_bytes;
469 }
470 
SetTimeModified(const base::Time & time)471 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
472   file_->SetTimes(base::Time::Now(), time);
473 }
474 
475 // FilePathWriterDelegate ------------------------------------------------------
476 
FilePathWriterDelegate(const base::FilePath & output_file_path)477 FilePathWriterDelegate::FilePathWriterDelegate(
478     const base::FilePath& output_file_path)
479     : output_file_path_(output_file_path) {}
480 
~FilePathWriterDelegate()481 FilePathWriterDelegate::~FilePathWriterDelegate() {}
482 
PrepareOutput()483 bool FilePathWriterDelegate::PrepareOutput() {
484   // We can't rely on parent directory entries being specified in the
485   // zip, so we make sure they are created.
486   if (!base::CreateDirectory(output_file_path_.DirName()))
487     return false;
488 
489   file_.Initialize(output_file_path_,
490                    base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
491   return file_.IsValid();
492 }
493 
WriteBytes(const char * data,int num_bytes)494 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
495   return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
496 }
497 
SetTimeModified(const base::Time & time)498 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
499   file_.Close();
500   base::TouchFile(output_file_path_, base::Time::Now(), time);
501 }
502 
503 }  // namespace zip
504