1 //! Structs for reading a ZIP archive
2
3 use crc32::Crc32Reader;
4 use compression::CompressionMethod;
5 use spec;
6 use result::{ZipResult, ZipError};
7 use std::io;
8 use std::io::prelude::*;
9 use std::collections::HashMap;
10
11 use podio::{ReadPodExt, LittleEndian};
12 use types::{ZipFileData, System};
13 use cp437::FromCp437;
14 use msdos_time::{TmMsDosExt, MsDosDateTime};
15
16 #[cfg(feature = "deflate")]
17 use flate2;
18 #[cfg(feature = "deflate")]
19 use flate2::read::DeflateDecoder;
20
21 #[cfg(feature = "bzip2")]
22 use bzip2::read::BzDecoder;
23
24 mod ffi {
25 pub const S_IFDIR: u32 = 0o0040000;
26 pub const S_IFREG: u32 = 0o0100000;
27 }
28
29 const TM_1980_01_01 : ::time::Tm = ::time::Tm {
30 tm_sec: 0,
31 tm_min: 0,
32 tm_hour: 0,
33 tm_mday: 1,
34 tm_mon: 0,
35 tm_year: 80,
36 tm_wday: 2,
37 tm_yday: 0,
38 tm_isdst: -1,
39 tm_utcoff: 0,
40 tm_nsec: 0
41 };
42
43 /// Wrapper for reading the contents of a ZIP file.
44 ///
45 /// ```
46 /// fn doit() -> zip::result::ZipResult<()>
47 /// {
48 /// use std::io::prelude::*;
49 ///
50 /// // For demonstration purposes we read from an empty buffer.
51 /// // Normally a File object would be used.
52 /// let buf: &[u8] = &[0u8; 128];
53 /// let mut reader = std::io::Cursor::new(buf);
54 ///
55 /// let mut zip = try!(zip::ZipArchive::new(reader));
56 ///
57 /// for i in 0..zip.len()
58 /// {
59 /// let mut file = zip.by_index(i).unwrap();
60 /// println!("Filename: {}", file.name());
61 /// let first_byte = try!(file.bytes().next().unwrap());
62 /// println!("{}", first_byte);
63 /// }
64 /// Ok(())
65 /// }
66 ///
67 /// println!("Result: {:?}", doit());
68 /// ```
69 #[derive(Debug)]
70 pub struct ZipArchive<R: Read + io::Seek>
71 {
72 reader: R,
73 files: Vec<ZipFileData>,
74 names_map: HashMap<String, usize>,
75 offset: u64,
76 }
77
78 enum ZipFileReader<'a> {
79 Stored(Crc32Reader<io::Take<&'a mut Read>>),
80 #[cfg(feature = "deflate")]
81 Deflated(Crc32Reader<flate2::read::DeflateDecoder<io::Take<&'a mut Read>>>),
82 #[cfg(feature = "bzip2")]
83 Bzip2(Crc32Reader<BzDecoder<io::Take<&'a mut Read>>>),
84 }
85
86 /// A struct for reading a zip file
87 pub struct ZipFile<'a> {
88 data: &'a ZipFileData,
89 reader: ZipFileReader<'a>,
90 }
91
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>92 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>
93 {
94 Err(ZipError::UnsupportedArchive(detail))
95 }
96
97 impl<R: Read+io::Seek> ZipArchive<R>
98 {
99 /// Get the directory start offset and number of files. This is done in a
100 /// separate function to ease the control flow design.
get_directory_counts(mut reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64) -> ZipResult<(u64, u64, usize)>101 fn get_directory_counts(mut reader: &mut R,
102 footer: &spec::CentralDirectoryEnd,
103 cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> {
104 // Some zip files have data prepended to them, resulting in the
105 // offsets all being too small. Get the amount of error by comparing
106 // the actual file position we found the CDE at with the offset
107 // recorded in the CDE.
108 let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64)
109 .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
110 .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
111
112 let directory_start = footer.central_directory_offset as u64 + archive_offset;
113 let number_of_files = footer.number_of_files_on_this_disk as usize;
114
115 // See if there's a ZIP64 footer. The ZIP64 locator if present will
116 // have its signature 20 bytes in front of the standard footer. The
117 // standard footer, in turn, is 22+N bytes large, where N is the
118 // comment length. Therefore:
119
120 if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) {
121 // Empty Zip files will have nothing else so this error might be fine. If
122 // not, we'll find out soon.
123 return Ok((archive_offset, directory_start, number_of_files));
124 }
125
126 let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) {
127 Ok(loc) => loc,
128 Err(ZipError::InvalidArchive(_)) => {
129 // No ZIP64 header; that's actually fine. We're done here.
130 return Ok((archive_offset, directory_start, number_of_files));
131 },
132 Err(e) => {
133 // Yikes, a real problem
134 return Err(e);
135 },
136 };
137
138 // If we got here, this is indeed a ZIP64 file.
139
140 if footer.disk_number as u32 != locator64.disk_with_central_directory {
141 return unsupported_zip_error("Support for multi-disk files is not implemented")
142 }
143
144 // We need to reassess `archive_offset`. We know where the ZIP64
145 // central-directory-end structure *should* be, but unfortunately we
146 // don't know how to precisely relate that location to our current
147 // actual offset in the file, since there may be junk at its
148 // beginning. Therefore we need to perform another search, as in
149 // read::CentralDirectoryEnd::find_and_parse, except now we search
150 // forward.
151
152 let search_upper_bound = reader.seek(io::SeekFrom::Current(0))?
153 .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
154 .ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?;
155 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
156 &mut reader,
157 locator64.end_of_central_directory_offset,
158 search_upper_bound)?;
159
160 if footer.disk_number != footer.disk_with_central_directory {
161 return unsupported_zip_error("Support for multi-disk files is not implemented")
162 }
163
164 let directory_start = footer.central_directory_offset + archive_offset;
165 Ok((archive_offset, directory_start, footer.number_of_files as usize))
166 }
167
168 /// Opens a Zip archive and parses the central directory
new(mut reader: R) -> ZipResult<ZipArchive<R>>169 pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
170 let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
171
172 if footer.disk_number != footer.disk_with_central_directory
173 {
174 return unsupported_zip_error("Support for multi-disk files is not implemented")
175 }
176
177 let (archive_offset, directory_start, number_of_files) =
178 try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos));
179
180 let mut files = Vec::with_capacity(number_of_files);
181 let mut names_map = HashMap::new();
182
183 try!(reader.seek(io::SeekFrom::Start(directory_start)));
184 for _ in 0 .. number_of_files
185 {
186 let file = try!(central_header_to_zip_file(&mut reader, archive_offset));
187 names_map.insert(file.file_name.clone(), files.len());
188 files.push(file);
189 }
190
191 Ok(ZipArchive {
192 reader: reader,
193 files: files,
194 names_map: names_map,
195 offset: archive_offset,
196 })
197 }
198
199 /// Number of files contained in this zip.
200 ///
201 /// ```
202 /// fn iter() {
203 /// let mut zip = zip::ZipArchive::new(std::io::Cursor::new(vec![])).unwrap();
204 ///
205 /// for i in 0..zip.len() {
206 /// let mut file = zip.by_index(i).unwrap();
207 /// // Do something with file i
208 /// }
209 /// }
210 /// ```
len(&self) -> usize211 pub fn len(&self) -> usize
212 {
213 self.files.len()
214 }
215
216 /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
217 ///
218 /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
219 /// of that prepended data.
offset(&self) -> u64220 pub fn offset(&self) -> u64 {
221 self.offset
222 }
223
224 /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>225 pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>
226 {
227 let index = match self.names_map.get(name) {
228 Some(index) => *index,
229 None => { return Err(ZipError::FileNotFound); },
230 };
231 self.by_index(index)
232 }
233
234 /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>235 pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>
236 {
237 if file_number >= self.files.len() { return Err(ZipError::FileNotFound); }
238 let ref data = self.files[file_number];
239 let pos = data.data_start;
240
241 if data.encrypted
242 {
243 return unsupported_zip_error("Encrypted files are not supported")
244 }
245
246 try!(self.reader.seek(io::SeekFrom::Start(pos)));
247 let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size);
248
249 let reader = match data.compression_method
250 {
251 CompressionMethod::Stored =>
252 {
253 ZipFileReader::Stored(Crc32Reader::new(
254 limit_reader,
255 data.crc32))
256 },
257 #[cfg(feature = "deflate")]
258 CompressionMethod::Deflated =>
259 {
260 let deflate_reader = DeflateDecoder::new(limit_reader);
261 ZipFileReader::Deflated(Crc32Reader::new(
262 deflate_reader,
263 data.crc32))
264 },
265 #[cfg(feature = "bzip2")]
266 CompressionMethod::Bzip2 =>
267 {
268 let bzip2_reader = BzDecoder::new(limit_reader);
269 ZipFileReader::Bzip2(Crc32Reader::new(
270 bzip2_reader,
271 data.crc32))
272 },
273 _ => return unsupported_zip_error("Compression method not supported"),
274 };
275 Ok(ZipFile { reader: reader, data: data })
276 }
277
278 /// Unwrap and return the inner reader object
279 ///
280 /// The position of the reader is undefined.
into_inner(self) -> R281 pub fn into_inner(self) -> R
282 {
283 self.reader
284 }
285 }
286
central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>287 fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>
288 {
289 // Parse central header
290 let signature = try!(reader.read_u32::<LittleEndian>());
291 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE
292 {
293 return Err(ZipError::InvalidArchive("Invalid Central Directory header"))
294 }
295
296 let version_made_by = try!(reader.read_u16::<LittleEndian>());
297 let _version_to_extract = try!(reader.read_u16::<LittleEndian>());
298 let flags = try!(reader.read_u16::<LittleEndian>());
299 let encrypted = flags & 1 == 1;
300 let is_utf8 = flags & (1 << 11) != 0;
301 let compression_method = try!(reader.read_u16::<LittleEndian>());
302 let last_mod_time = try!(reader.read_u16::<LittleEndian>());
303 let last_mod_date = try!(reader.read_u16::<LittleEndian>());
304 let crc32 = try!(reader.read_u32::<LittleEndian>());
305 let compressed_size = try!(reader.read_u32::<LittleEndian>());
306 let uncompressed_size = try!(reader.read_u32::<LittleEndian>());
307 let file_name_length = try!(reader.read_u16::<LittleEndian>()) as usize;
308 let extra_field_length = try!(reader.read_u16::<LittleEndian>()) as usize;
309 let file_comment_length = try!(reader.read_u16::<LittleEndian>()) as usize;
310 let _disk_number = try!(reader.read_u16::<LittleEndian>());
311 let _internal_file_attributes = try!(reader.read_u16::<LittleEndian>());
312 let external_file_attributes = try!(reader.read_u32::<LittleEndian>());
313 let mut offset = try!(reader.read_u32::<LittleEndian>()) as u64;
314 let file_name_raw = try!(ReadPodExt::read_exact(reader, file_name_length));
315 let extra_field = try!(ReadPodExt::read_exact(reader, extra_field_length));
316 let file_comment_raw = try!(ReadPodExt::read_exact(reader, file_comment_length));
317
318 // Account for shifted zip offsets.
319 offset += archive_offset;
320
321 let file_name = match is_utf8
322 {
323 true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
324 false => file_name_raw.clone().from_cp437(),
325 };
326 let file_comment = match is_utf8
327 {
328 true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
329 false => file_comment_raw.from_cp437(),
330 };
331
332 // Remember end of central header
333 let return_position = try!(reader.seek(io::SeekFrom::Current(0)));
334
335 // Parse local header
336 try!(reader.seek(io::SeekFrom::Start(offset)));
337 let signature = try!(reader.read_u32::<LittleEndian>());
338 if signature != spec::LOCAL_FILE_HEADER_SIGNATURE
339 {
340 return Err(ZipError::InvalidArchive("Invalid local file header"))
341 }
342
343 try!(reader.seek(io::SeekFrom::Current(22)));
344 let file_name_length = try!(reader.read_u16::<LittleEndian>()) as u64;
345 let extra_field_length = try!(reader.read_u16::<LittleEndian>()) as u64;
346 let magic_and_header = 4 + 22 + 2 + 2;
347 let data_start = offset + magic_and_header + file_name_length + extra_field_length;
348
349 // Construct the result
350 let mut result = ZipFileData
351 {
352 system: System::from_u8((version_made_by >> 8) as u8),
353 version_made_by: version_made_by as u8,
354 encrypted: encrypted,
355 compression_method: CompressionMethod::from_u16(compression_method),
356 last_modified_time: ::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date)).unwrap_or(TM_1980_01_01),
357 crc32: crc32,
358 compressed_size: compressed_size as u64,
359 uncompressed_size: uncompressed_size as u64,
360 file_name: file_name,
361 file_name_raw: file_name_raw,
362 file_comment: file_comment,
363 header_start: offset,
364 data_start: data_start,
365 external_attributes: external_file_attributes,
366 };
367
368 match parse_extra_field(&mut result, &*extra_field) {
369 Ok(..) | Err(ZipError::Io(..)) => {},
370 Err(e) => try!(Err(e)),
371 }
372
373 // Go back after the central header
374 try!(reader.seek(io::SeekFrom::Start(return_position)));
375
376 Ok(result)
377 }
378
parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>379 fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
380 {
381 let mut reader = io::Cursor::new(data);
382
383 while (reader.position() as usize) < data.len()
384 {
385 let kind = try!(reader.read_u16::<LittleEndian>());
386 let len = try!(reader.read_u16::<LittleEndian>());
387 match kind
388 {
389 _ => try!(reader.seek(io::SeekFrom::Current(len as i64))),
390 };
391 }
392 Ok(())
393 }
394
395 /// Methods for retreiving information on zip files
396 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut Read397 fn get_reader(&mut self) -> &mut Read {
398 match self.reader {
399 ZipFileReader::Stored(ref mut r) => r as &mut Read,
400 #[cfg(feature = "deflate")]
401 ZipFileReader::Deflated(ref mut r) => r as &mut Read,
402 #[cfg(feature = "bzip2")]
403 ZipFileReader::Bzip2(ref mut r) => r as &mut Read,
404 }
405 }
406 /// Get the version of the file
version_made_by(&self) -> (u8, u8)407 pub fn version_made_by(&self) -> (u8, u8) {
408 (self.data.version_made_by / 10, self.data.version_made_by % 10)
409 }
410 /// Get the name of the file
name(&self) -> &str411 pub fn name(&self) -> &str {
412 &*self.data.file_name
413 }
414 /// Get the name of the file, in the raw (internal) byte representation.
name_raw(&self) -> &[u8]415 pub fn name_raw(&self) -> &[u8] {
416 &*self.data.file_name_raw
417 }
418 /// Get the comment of the file
comment(&self) -> &str419 pub fn comment(&self) -> &str {
420 &*self.data.file_comment
421 }
422 /// Get the compression method used to store the file
compression(&self) -> CompressionMethod423 pub fn compression(&self) -> CompressionMethod {
424 self.data.compression_method
425 }
426 /// Get the size of the file in the archive
compressed_size(&self) -> u64427 pub fn compressed_size(&self) -> u64 {
428 self.data.compressed_size
429 }
430 /// Get the size of the file when uncompressed
size(&self) -> u64431 pub fn size(&self) -> u64 {
432 self.data.uncompressed_size
433 }
434 /// Get the time the file was last modified
last_modified(&self) -> ::time::Tm435 pub fn last_modified(&self) -> ::time::Tm {
436 self.data.last_modified_time
437 }
438 /// Get unix mode for the file
unix_mode(&self) -> Option<u32>439 pub fn unix_mode(&self) -> Option<u32> {
440 match self.data.system {
441 System::Unix => {
442 Some(self.data.external_attributes >> 16)
443 },
444 System::Dos => {
445 // Interpret MSDOS directory bit
446 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
447 ffi::S_IFDIR | 0o0775
448 } else {
449 ffi::S_IFREG | 0o0664
450 };
451 if 0x01 == (self.data.external_attributes & 0x01) {
452 // Read-only bit; strip write permissions
453 mode &= 0o0555;
454 }
455 Some(mode)
456 },
457 _ => None,
458 }
459 }
460 /// Get the CRC32 hash of the original file
crc32(&self) -> u32461 pub fn crc32(&self) -> u32 {
462 self.data.crc32
463 }
464
465 /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64466 pub fn data_start(&self) -> u64 {
467 self.data.data_start
468 }
469 }
470
471 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>472 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
473 self.get_reader().read(buf)
474 }
475 }
476
477 #[cfg(test)]
478 mod test {
479 #[test]
invalid_offset()480 fn invalid_offset() {
481 use std::io;
482 use super::ZipArchive;
483
484 let mut v = Vec::new();
485 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
486 let reader = ZipArchive::new(io::Cursor::new(v));
487 assert!(reader.is_err());
488 }
489
490 #[test]
zip64_with_leading_junk()491 fn zip64_with_leading_junk() {
492 use std::io;
493 use super::ZipArchive;
494
495 let mut v = Vec::new();
496 v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
497 let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
498 assert!(reader.len() == 1);
499 }
500 }
501