1 //===-- ZipFile.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/ZipFile.h"
10 #include "lldb/Utility/DataBuffer.h"
11 #include "lldb/Utility/FileSpec.h"
12 #include "llvm/Support/Endian.h"
13 
14 using namespace lldb_private;
15 using namespace llvm::support;
16 
17 namespace {
18 
19 // Zip headers.
20 // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
21 
22 // The end of central directory record.
23 struct EocdRecord {
24   static constexpr char kSignature[] = {0x50, 0x4b, 0x05, 0x06};
25   char signature[sizeof(kSignature)];
26   unaligned_uint16_t disks;
27   unaligned_uint16_t cd_start_disk;
28   unaligned_uint16_t cds_on_this_disk;
29   unaligned_uint16_t cd_records;
30   unaligned_uint32_t cd_size;
31   unaligned_uint32_t cd_offset;
32   unaligned_uint16_t comment_length;
33 };
34 
35 // Logical find limit for the end of central directory record.
36 const size_t kEocdRecordFindLimit =
37     sizeof(EocdRecord) +
38     std::numeric_limits<decltype(EocdRecord::comment_length)>::max();
39 
40 // Central directory record.
41 struct CdRecord {
42   static constexpr char kSignature[] = {0x50, 0x4b, 0x01, 0x02};
43   char signature[sizeof(kSignature)];
44   unaligned_uint16_t version_made_by;
45   unaligned_uint16_t version_needed_to_extract;
46   unaligned_uint16_t general_purpose_bit_flag;
47   unaligned_uint16_t compression_method;
48   unaligned_uint16_t last_modification_time;
49   unaligned_uint16_t last_modification_date;
50   unaligned_uint32_t crc32;
51   unaligned_uint32_t compressed_size;
52   unaligned_uint32_t uncompressed_size;
53   unaligned_uint16_t file_name_length;
54   unaligned_uint16_t extra_field_length;
55   unaligned_uint16_t comment_length;
56   unaligned_uint16_t file_start_disk;
57   unaligned_uint16_t internal_file_attributes;
58   unaligned_uint32_t external_file_attributes;
59   unaligned_uint32_t local_file_header_offset;
60 };
61 // Immediately after CdRecord,
62 // - file name (file_name_length)
63 // - extra field (extra_field_length)
64 // - comment (comment_length)
65 
66 // Local file header.
67 struct LocalFileHeader {
68   static constexpr char kSignature[] = {0x50, 0x4b, 0x03, 0x04};
69   char signature[sizeof(kSignature)];
70   unaligned_uint16_t version_needed_to_extract;
71   unaligned_uint16_t general_purpose_bit_flag;
72   unaligned_uint16_t compression_method;
73   unaligned_uint16_t last_modification_time;
74   unaligned_uint16_t last_modification_date;
75   unaligned_uint32_t crc32;
76   unaligned_uint32_t compressed_size;
77   unaligned_uint32_t uncompressed_size;
78   unaligned_uint16_t file_name_length;
79   unaligned_uint16_t extra_field_length;
80 };
81 // Immediately after LocalFileHeader,
82 // - file name (file_name_length)
83 // - extra field (extra_field_length)
84 // - file data (should be compressed_size == uncompressed_size, page aligned)
85 
FindEocdRecord(lldb::DataBufferSP zip_data)86 const EocdRecord *FindEocdRecord(lldb::DataBufferSP zip_data) {
87   // Find backward the end of central directory record from the end of the zip
88   // file to the find limit.
89   const uint8_t *zip_data_end = zip_data->GetBytes() + zip_data->GetByteSize();
90   const uint8_t *find_limit = zip_data_end - kEocdRecordFindLimit;
91   const uint8_t *p = zip_data_end - sizeof(EocdRecord);
92   for (; p >= zip_data->GetBytes() && p >= find_limit; p--) {
93     auto eocd = reinterpret_cast<const EocdRecord *>(p);
94     if (::memcmp(eocd->signature, EocdRecord::kSignature,
95                  sizeof(EocdRecord::kSignature)) == 0) {
96       // Found the end of central directory. Sanity check the values.
97       if (eocd->cd_records * sizeof(CdRecord) > eocd->cd_size ||
98           zip_data->GetBytes() + eocd->cd_offset + eocd->cd_size > p)
99         return nullptr;
100 
101       // This is a valid end of central directory record.
102       return eocd;
103     }
104   }
105   return nullptr;
106 }
107 
GetFile(lldb::DataBufferSP zip_data,uint32_t local_file_header_offset,lldb::offset_t & file_offset,lldb::offset_t & file_size)108 bool GetFile(lldb::DataBufferSP zip_data, uint32_t local_file_header_offset,
109              lldb::offset_t &file_offset, lldb::offset_t &file_size) {
110   auto local_file_header = reinterpret_cast<const LocalFileHeader *>(
111       zip_data->GetBytes() + local_file_header_offset);
112   // The signature should match.
113   if (::memcmp(local_file_header->signature, LocalFileHeader::kSignature,
114                sizeof(LocalFileHeader::kSignature)) != 0)
115     return false;
116 
117   auto file_data = reinterpret_cast<const uint8_t *>(local_file_header + 1) +
118                    local_file_header->file_name_length +
119                    local_file_header->extra_field_length;
120   // File should be uncompressed.
121   if (local_file_header->compressed_size !=
122       local_file_header->uncompressed_size)
123     return false;
124 
125   // This file is valid. Return the file offset and size.
126   file_offset = file_data - zip_data->GetBytes();
127   file_size = local_file_header->uncompressed_size;
128   return true;
129 }
130 
FindFile(lldb::DataBufferSP zip_data,const EocdRecord * eocd,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)131 bool FindFile(lldb::DataBufferSP zip_data, const EocdRecord *eocd,
132               const llvm::StringRef file_path, lldb::offset_t &file_offset,
133               lldb::offset_t &file_size) {
134   // Find the file from the central directory records.
135   auto cd = reinterpret_cast<const CdRecord *>(zip_data->GetBytes() +
136                                                eocd->cd_offset);
137   size_t cd_records = eocd->cd_records;
138   for (size_t i = 0; i < cd_records; i++) {
139     // The signature should match.
140     if (::memcmp(cd->signature, CdRecord::kSignature,
141                  sizeof(CdRecord::kSignature)) != 0)
142       return false;
143 
144     // Sanity check the file name values.
145     auto file_name = reinterpret_cast<const char *>(cd + 1);
146     size_t file_name_length = cd->file_name_length;
147     if (file_name + file_name_length >= reinterpret_cast<const char *>(eocd) ||
148         file_name_length == 0)
149       return false;
150 
151     // Compare the file name.
152     if (file_path == llvm::StringRef(file_name, file_name_length)) {
153       // Found the file.
154       return GetFile(zip_data, cd->local_file_header_offset, file_offset,
155                      file_size);
156     } else {
157       // Skip to the next central directory record.
158       cd = reinterpret_cast<const CdRecord *>(
159           reinterpret_cast<const char *>(cd) + sizeof(CdRecord) +
160           cd->file_name_length + cd->extra_field_length + cd->comment_length);
161       // Sanity check the pointer.
162       if (reinterpret_cast<const char *>(cd) >=
163           reinterpret_cast<const char *>(eocd))
164         return false;
165     }
166   }
167 
168   return false;
169 }
170 
171 } // end anonymous namespace
172 
Find(lldb::DataBufferSP zip_data,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)173 bool ZipFile::Find(lldb::DataBufferSP zip_data, const llvm::StringRef file_path,
174                    lldb::offset_t &file_offset, lldb::offset_t &file_size) {
175   const EocdRecord *eocd = FindEocdRecord(zip_data);
176   if (!eocd)
177     return false;
178 
179   return FindFile(zip_data, eocd, file_path, file_offset, file_size);
180 }
181