1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/zlib/google/zip_reader.h"
6
7 #include <utility>
8
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/threading/sequenced_task_runner_handle.h"
16 #include "build/build_config.h"
17 #include "third_party/zlib/google/zip_internal.h"
18
19 #if defined(USE_SYSTEM_MINIZIP)
20 #include <minizip/unzip.h>
21 #else
22 #include "third_party/zlib/contrib/minizip/unzip.h"
23 #if defined(OS_WIN)
24 #include "third_party/zlib/contrib/minizip/iowin32.h"
25 #endif // defined(OS_WIN)
26 #endif // defined(USE_SYSTEM_MINIZIP)
27
28 namespace zip {
29
30 namespace {
31
32 // StringWriterDelegate --------------------------------------------------------
33
34 // A writer delegate that writes no more than |max_read_bytes| to a given
35 // std::string.
36 class StringWriterDelegate : public WriterDelegate {
37 public:
38 StringWriterDelegate(size_t max_read_bytes, std::string* output);
39
40 StringWriterDelegate(const StringWriterDelegate&) = delete;
41 StringWriterDelegate& operator=(const StringWriterDelegate&) = delete;
42
43 ~StringWriterDelegate() override;
44
45 // WriterDelegate methods:
46
47 // Returns true.
48 bool PrepareOutput() override;
49
50 // Appends |num_bytes| bytes from |data| to the output string. Returns false
51 // if |num_bytes| will cause the string to exceed |max_read_bytes|.
52 bool WriteBytes(const char* data, int num_bytes) override;
53
54 void SetTimeModified(const base::Time& time) override;
55
56 private:
57 size_t max_read_bytes_;
58 std::string* output_;
59 };
60
StringWriterDelegate(size_t max_read_bytes,std::string * output)61 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
62 std::string* output)
63 : max_read_bytes_(max_read_bytes),
64 output_(output) {
65 }
66
~StringWriterDelegate()67 StringWriterDelegate::~StringWriterDelegate() {
68 }
69
PrepareOutput()70 bool StringWriterDelegate::PrepareOutput() {
71 return true;
72 }
73
WriteBytes(const char * data,int num_bytes)74 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
75 if (output_->size() + num_bytes > max_read_bytes_)
76 return false;
77 output_->append(data, num_bytes);
78 return true;
79 }
80
SetTimeModified(const base::Time & time)81 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
82 // Do nothing.
83 }
84
85 } // namespace
86
87 // TODO(satorux): The implementation assumes that file names in zip files
88 // are encoded in UTF-8. This is true for zip files created by Zip()
89 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)90 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
91 const unz_file_info& raw_file_info)
92 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
93 is_directory_(false),
94 is_unsafe_(false),
95 is_encrypted_(false) {
96 original_size_ = raw_file_info.uncompressed_size;
97
98 // Directory entries in zip files end with "/".
99 is_directory_ = base::EndsWith(file_name_in_zip, "/",
100 base::CompareCase::INSENSITIVE_ASCII);
101
102 // Check the file name here for directory traversal issues.
103 is_unsafe_ = file_path_.ReferencesParent();
104
105 // We also consider that the file name is unsafe, if it's invalid UTF-8.
106 std::u16string file_name_utf16;
107 if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
108 &file_name_utf16)) {
109 is_unsafe_ = true;
110 }
111
112 // We also consider that the file name is unsafe, if it's absolute.
113 // On Windows, IsAbsolute() returns false for paths starting with "/".
114 if (file_path_.IsAbsolute() ||
115 base::StartsWith(file_name_in_zip, "/",
116 base::CompareCase::INSENSITIVE_ASCII))
117 is_unsafe_ = true;
118
119 // Whether the file is encrypted is bit 0 of the flag.
120 is_encrypted_ = raw_file_info.flag & 1;
121
122 // Construct the last modified time. The timezone info is not present in
123 // zip files, so we construct the time as local time.
124 base::Time::Exploded exploded_time = {}; // Zero-clear.
125 exploded_time.year = raw_file_info.tmu_date.tm_year;
126 // The month in zip file is 0-based, whereas ours is 1-based.
127 exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
128 exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
129 exploded_time.hour = raw_file_info.tmu_date.tm_hour;
130 exploded_time.minute = raw_file_info.tmu_date.tm_min;
131 exploded_time.second = raw_file_info.tmu_date.tm_sec;
132 exploded_time.millisecond = 0;
133
134 if (!base::Time::FromUTCExploded(exploded_time, &last_modified_))
135 last_modified_ = base::Time::UnixEpoch();
136 }
137
ZipReader()138 ZipReader::ZipReader() {
139 Reset();
140 }
141
~ZipReader()142 ZipReader::~ZipReader() {
143 Close();
144 }
145
Open(const base::FilePath & zip_file_path)146 bool ZipReader::Open(const base::FilePath& zip_file_path) {
147 DCHECK(!zip_file_);
148
149 // Use of "Unsafe" function does not look good, but there is no way to do
150 // this safely on Linux. See file_util.h for details.
151 zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
152 if (!zip_file_) {
153 return false;
154 }
155
156 return OpenInternal();
157 }
158
OpenFromPlatformFile(base::PlatformFile zip_fd)159 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
160 DCHECK(!zip_file_);
161
162 #if defined(OS_POSIX) || defined(OS_FUCHSIA)
163 zip_file_ = internal::OpenFdForUnzipping(zip_fd);
164 #elif defined(OS_WIN)
165 zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
166 #endif
167 if (!zip_file_) {
168 return false;
169 }
170
171 return OpenInternal();
172 }
173
OpenFromString(const std::string & data)174 bool ZipReader::OpenFromString(const std::string& data) {
175 zip_file_ = internal::PrepareMemoryForUnzipping(data);
176 if (!zip_file_)
177 return false;
178 return OpenInternal();
179 }
180
Close()181 void ZipReader::Close() {
182 if (zip_file_) {
183 unzClose(zip_file_);
184 }
185 Reset();
186 }
187
HasMore()188 bool ZipReader::HasMore() {
189 return !reached_end_;
190 }
191
AdvanceToNextEntry()192 bool ZipReader::AdvanceToNextEntry() {
193 DCHECK(zip_file_);
194
195 // Should not go further if we already reached the end.
196 if (reached_end_)
197 return false;
198
199 unz_file_pos position = {};
200 if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
201 return false;
202 const int current_entry_index = position.num_of_file;
203 // If we are currently at the last entry, then the next position is the
204 // end of the zip file, so mark that we reached the end.
205 if (current_entry_index + 1 == num_entries_) {
206 reached_end_ = true;
207 } else {
208 DCHECK_LT(current_entry_index + 1, num_entries_);
209 if (unzGoToNextFile(zip_file_) != UNZ_OK) {
210 return false;
211 }
212 }
213 current_entry_info_.reset();
214 return true;
215 }
216
OpenCurrentEntryInZip()217 bool ZipReader::OpenCurrentEntryInZip() {
218 DCHECK(zip_file_);
219
220 unz_file_info raw_file_info = {};
221 char raw_file_name_in_zip[internal::kZipMaxPath] = {};
222 const int result = unzGetCurrentFileInfo(zip_file_,
223 &raw_file_info,
224 raw_file_name_in_zip,
225 sizeof(raw_file_name_in_zip) - 1,
226 NULL, // extraField.
227 0, // extraFieldBufferSize.
228 NULL, // szComment.
229 0); // commentBufferSize.
230 if (result != UNZ_OK)
231 return false;
232 if (raw_file_name_in_zip[0] == '\0')
233 return false;
234 current_entry_info_.reset(
235 new EntryInfo(raw_file_name_in_zip, raw_file_info));
236 return true;
237 }
238
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const239 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
240 uint64_t num_bytes_to_extract) const {
241 DCHECK(zip_file_);
242
243 const int open_result = unzOpenCurrentFile(zip_file_);
244 if (open_result != UNZ_OK)
245 return false;
246
247 if (!delegate->PrepareOutput())
248 return false;
249 std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
250
251 uint64_t remaining_capacity = num_bytes_to_extract;
252 bool entire_file_extracted = false;
253
254 while (remaining_capacity > 0) {
255 const int num_bytes_read =
256 unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
257
258 if (num_bytes_read == 0) {
259 entire_file_extracted = true;
260 break;
261 } else if (num_bytes_read < 0) {
262 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
263 break;
264 } else if (num_bytes_read > 0) {
265 uint64_t num_bytes_to_write = std::min<uint64_t>(
266 remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
267 if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
268 break;
269 if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
270 // Ensures function returns true if the entire file has been read.
271 entire_file_extracted =
272 (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
273 }
274 CHECK_GE(remaining_capacity, num_bytes_to_write);
275 remaining_capacity -= num_bytes_to_write;
276 }
277 }
278
279 unzCloseCurrentFile(zip_file_);
280
281 if (entire_file_extracted &&
282 current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
283 delegate->SetTimeModified(current_entry_info()->last_modified());
284 }
285
286 return entire_file_extracted;
287 }
288
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)289 void ZipReader::ExtractCurrentEntryToFilePathAsync(
290 const base::FilePath& output_file_path,
291 SuccessCallback success_callback,
292 FailureCallback failure_callback,
293 const ProgressCallback& progress_callback) {
294 DCHECK(zip_file_);
295 DCHECK(current_entry_info_.get());
296
297 // If this is a directory, just create it and return.
298 if (current_entry_info()->is_directory()) {
299 if (base::CreateDirectory(output_file_path)) {
300 base::SequencedTaskRunnerHandle::Get()->PostTask(
301 FROM_HERE, std::move(success_callback));
302 } else {
303 DVLOG(1) << "Unzip failed: unable to create directory.";
304 base::SequencedTaskRunnerHandle::Get()->PostTask(
305 FROM_HERE, std::move(failure_callback));
306 }
307 return;
308 }
309
310 if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
311 DVLOG(1) << "Unzip failed: unable to open current zip entry.";
312 base::SequencedTaskRunnerHandle::Get()->PostTask(
313 FROM_HERE, std::move(failure_callback));
314 return;
315 }
316
317 base::FilePath output_dir_path = output_file_path.DirName();
318 if (!base::CreateDirectory(output_dir_path)) {
319 DVLOG(1) << "Unzip failed: unable to create containing directory.";
320 base::SequencedTaskRunnerHandle::Get()->PostTask(
321 FROM_HERE, std::move(failure_callback));
322 return;
323 }
324
325 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
326 base::File output_file(output_file_path, flags);
327
328 if (!output_file.IsValid()) {
329 DVLOG(1) << "Unzip failed: unable to create platform file at "
330 << output_file_path.value();
331 base::SequencedTaskRunnerHandle::Get()->PostTask(
332 FROM_HERE, std::move(failure_callback));
333 return;
334 }
335
336 base::SequencedTaskRunnerHandle::Get()->PostTask(
337 FROM_HERE,
338 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
339 std::move(output_file), std::move(success_callback),
340 std::move(failure_callback), progress_callback,
341 0 /* initial offset */));
342 }
343
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const344 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
345 std::string* output) const {
346 DCHECK(output);
347 DCHECK(zip_file_);
348
349 if (max_read_bytes == 0) {
350 output->clear();
351 return true;
352 }
353
354 if (current_entry_info()->is_directory()) {
355 output->clear();
356 return true;
357 }
358
359 // The original_size() is the best hint for the real size, so it saves
360 // doing reallocations for the common case when the uncompressed size is
361 // correct. However, we need to assume that the uncompressed size could be
362 // incorrect therefore this function needs to read as much data as possible.
363 std::string contents;
364 contents.reserve(
365 static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
366 current_entry_info()->original_size())));
367
368 StringWriterDelegate writer(max_read_bytes, &contents);
369 if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
370 if (contents.length() < max_read_bytes) {
371 // There was an error in extracting entry. If ExtractCurrentEntry()
372 // returns false, the entire file was not read - in which case
373 // contents.length() should equal |max_read_bytes| unless an error
374 // occurred which caused extraction to be aborted.
375 output->clear();
376 } else {
377 // |num_bytes| is less than the length of current entry.
378 output->swap(contents);
379 }
380 return false;
381 }
382 output->swap(contents);
383 return true;
384 }
385
OpenInternal()386 bool ZipReader::OpenInternal() {
387 DCHECK(zip_file_);
388
389 unz_global_info zip_info = {}; // Zero-clear.
390 if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
391 return false;
392 }
393 num_entries_ = zip_info.number_entry;
394 if (num_entries_ < 0)
395 return false;
396
397 // We are already at the end if the zip file is empty.
398 reached_end_ = (num_entries_ == 0);
399 return true;
400 }
401
Reset()402 void ZipReader::Reset() {
403 zip_file_ = NULL;
404 num_entries_ = 0;
405 reached_end_ = false;
406 current_entry_info_.reset();
407 }
408
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)409 void ZipReader::ExtractChunk(base::File output_file,
410 SuccessCallback success_callback,
411 FailureCallback failure_callback,
412 const ProgressCallback& progress_callback,
413 const int64_t offset) {
414 char buffer[internal::kZipBufSize];
415
416 const int num_bytes_read = unzReadCurrentFile(zip_file_,
417 buffer,
418 internal::kZipBufSize);
419
420 if (num_bytes_read == 0) {
421 unzCloseCurrentFile(zip_file_);
422 std::move(success_callback).Run();
423 } else if (num_bytes_read < 0) {
424 DVLOG(1) << "Unzip failed: error while reading zipfile "
425 << "(" << num_bytes_read << ")";
426 std::move(failure_callback).Run();
427 } else {
428 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
429 DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
430 std::move(failure_callback).Run();
431 return;
432 }
433
434 int64_t current_progress = offset + num_bytes_read;
435
436 progress_callback.Run(current_progress);
437
438 base::SequencedTaskRunnerHandle::Get()->PostTask(
439 FROM_HERE,
440 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
441 std::move(output_file), std::move(success_callback),
442 std::move(failure_callback), progress_callback,
443 current_progress));
444 }
445 }
446
447 // FileWriterDelegate ----------------------------------------------------------
448
FileWriterDelegate(base::File * file)449 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
450
FileWriterDelegate(std::unique_ptr<base::File> file)451 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
452 : file_(file.get()), owned_file_(std::move(file)) {}
453
~FileWriterDelegate()454 FileWriterDelegate::~FileWriterDelegate() {
455 if (!file_->SetLength(file_length_)) {
456 DVPLOG(1) << "Failed updating length of written file";
457 }
458 }
459
PrepareOutput()460 bool FileWriterDelegate::PrepareOutput() {
461 return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
462 }
463
WriteBytes(const char * data,int num_bytes)464 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
465 int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
466 if (bytes_written > 0)
467 file_length_ += bytes_written;
468 return bytes_written == num_bytes;
469 }
470
SetTimeModified(const base::Time & time)471 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
472 file_->SetTimes(base::Time::Now(), time);
473 }
474
475 // FilePathWriterDelegate ------------------------------------------------------
476
FilePathWriterDelegate(const base::FilePath & output_file_path)477 FilePathWriterDelegate::FilePathWriterDelegate(
478 const base::FilePath& output_file_path)
479 : output_file_path_(output_file_path) {}
480
~FilePathWriterDelegate()481 FilePathWriterDelegate::~FilePathWriterDelegate() {}
482
PrepareOutput()483 bool FilePathWriterDelegate::PrepareOutput() {
484 // We can't rely on parent directory entries being specified in the
485 // zip, so we make sure they are created.
486 if (!base::CreateDirectory(output_file_path_.DirName()))
487 return false;
488
489 file_.Initialize(output_file_path_,
490 base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
491 return file_.IsValid();
492 }
493
WriteBytes(const char * data,int num_bytes)494 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
495 return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
496 }
497
SetTimeModified(const base::Time & time)498 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
499 file_.Close();
500 base::TouchFile(output_file_path_, base::Time::Now(), time);
501 }
502
503 } // namespace zip
504