1 //! Structs for reading a ZIP archive
2 
3 use crc32::Crc32Reader;
4 use compression::CompressionMethod;
5 use spec;
6 use result::{ZipResult, ZipError};
7 use std::io;
8 use std::io::prelude::*;
9 use std::collections::HashMap;
10 
11 use podio::{ReadPodExt, LittleEndian};
12 use types::{ZipFileData, System};
13 use cp437::FromCp437;
14 use msdos_time::{TmMsDosExt, MsDosDateTime};
15 
16 #[cfg(feature = "deflate")]
17 use flate2;
18 #[cfg(feature = "deflate")]
19 use flate2::read::DeflateDecoder;
20 
21 #[cfg(feature = "bzip2")]
22 use bzip2::read::BzDecoder;
23 
24 mod ffi {
25     pub const S_IFDIR: u32 = 0o0040000;
26     pub const S_IFREG: u32 = 0o0100000;
27 }
28 
29 const TM_1980_01_01 : ::time::Tm = ::time::Tm {
30 	tm_sec: 0,
31 	tm_min: 0,
32 	tm_hour: 0,
33 	tm_mday: 1,
34 	tm_mon: 0,
35 	tm_year: 80,
36 	tm_wday: 2,
37 	tm_yday: 0,
38 	tm_isdst: -1,
39 	tm_utcoff: 0,
40 	tm_nsec: 0
41 };
42 
43 /// Wrapper for reading the contents of a ZIP file.
44 ///
45 /// ```
46 /// fn doit() -> zip::result::ZipResult<()>
47 /// {
48 ///     use std::io::prelude::*;
49 ///
50 ///     // For demonstration purposes we read from an empty buffer.
51 ///     // Normally a File object would be used.
52 ///     let buf: &[u8] = &[0u8; 128];
53 ///     let mut reader = std::io::Cursor::new(buf);
54 ///
55 ///     let mut zip = try!(zip::ZipArchive::new(reader));
56 ///
57 ///     for i in 0..zip.len()
58 ///     {
59 ///         let mut file = zip.by_index(i).unwrap();
60 ///         println!("Filename: {}", file.name());
61 ///         let first_byte = try!(file.bytes().next().unwrap());
62 ///         println!("{}", first_byte);
63 ///     }
64 ///     Ok(())
65 /// }
66 ///
67 /// println!("Result: {:?}", doit());
68 /// ```
69 #[derive(Debug)]
70 pub struct ZipArchive<R: Read + io::Seek>
71 {
72     reader: R,
73     files: Vec<ZipFileData>,
74     names_map: HashMap<String, usize>,
75     offset: u64,
76 }
77 
78 enum ZipFileReader<'a> {
79     Stored(Crc32Reader<io::Take<&'a mut Read>>),
80     #[cfg(feature = "deflate")]
81     Deflated(Crc32Reader<flate2::read::DeflateDecoder<io::Take<&'a mut Read>>>),
82     #[cfg(feature = "bzip2")]
83     Bzip2(Crc32Reader<BzDecoder<io::Take<&'a mut Read>>>),
84 }
85 
86 /// A struct for reading a zip file
87 pub struct ZipFile<'a> {
88     data: &'a ZipFileData,
89     reader: ZipFileReader<'a>,
90 }
91 
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>92 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>
93 {
94     Err(ZipError::UnsupportedArchive(detail))
95 }
96 
97 impl<R: Read+io::Seek> ZipArchive<R>
98 {
99     /// Get the directory start offset and number of files. This is done in a
100     /// separate function to ease the control flow design.
get_directory_counts(mut reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64) -> ZipResult<(u64, u64, usize)>101     fn get_directory_counts(mut reader: &mut R,
102                             footer: &spec::CentralDirectoryEnd,
103                             cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> {
104         // Some zip files have data prepended to them, resulting in the
105         // offsets all being too small. Get the amount of error by comparing
106         // the actual file position we found the CDE at with the offset
107         // recorded in the CDE.
108         let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64)
109             .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
110             .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
111 
112         let directory_start = footer.central_directory_offset as u64 + archive_offset;
113         let number_of_files = footer.number_of_files_on_this_disk as usize;
114 
115         // See if there's a ZIP64 footer. The ZIP64 locator if present will
116         // have its signature 20 bytes in front of the standard footer. The
117         // standard footer, in turn, is 22+N bytes large, where N is the
118         // comment length. Therefore:
119 
120         if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) {
121             // Empty Zip files will have nothing else so this error might be fine. If
122             // not, we'll find out soon.
123             return Ok((archive_offset, directory_start, number_of_files));
124         }
125 
126         let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) {
127             Ok(loc) => loc,
128             Err(ZipError::InvalidArchive(_)) => {
129                 // No ZIP64 header; that's actually fine. We're done here.
130                 return Ok((archive_offset, directory_start, number_of_files));
131             },
132             Err(e) => {
133                 // Yikes, a real problem
134                 return Err(e);
135             },
136         };
137 
138         // If we got here, this is indeed a ZIP64 file.
139 
140         if footer.disk_number as u32 != locator64.disk_with_central_directory {
141             return unsupported_zip_error("Support for multi-disk files is not implemented")
142         }
143 
144         // We need to reassess `archive_offset`. We know where the ZIP64
145         // central-directory-end structure *should* be, but unfortunately we
146         // don't know how to precisely relate that location to our current
147         // actual offset in the file, since there may be junk at its
148         // beginning. Therefore we need to perform another search, as in
149         // read::CentralDirectoryEnd::find_and_parse, except now we search
150         // forward.
151 
152         let search_upper_bound = reader.seek(io::SeekFrom::Current(0))?
153             .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
154             .ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?;
155         let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
156             &mut reader,
157             locator64.end_of_central_directory_offset,
158             search_upper_bound)?;
159 
160         if footer.disk_number != footer.disk_with_central_directory {
161             return unsupported_zip_error("Support for multi-disk files is not implemented")
162         }
163 
164         let directory_start = footer.central_directory_offset + archive_offset;
165         Ok((archive_offset, directory_start, footer.number_of_files as usize))
166     }
167 
168     /// Opens a Zip archive and parses the central directory
new(mut reader: R) -> ZipResult<ZipArchive<R>>169     pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
170         let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
171 
172         if footer.disk_number != footer.disk_with_central_directory
173         {
174             return unsupported_zip_error("Support for multi-disk files is not implemented")
175         }
176 
177         let (archive_offset, directory_start, number_of_files) =
178             try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos));
179 
180         let mut files = Vec::with_capacity(number_of_files);
181         let mut names_map = HashMap::new();
182 
183         try!(reader.seek(io::SeekFrom::Start(directory_start)));
184         for _ in 0 .. number_of_files
185         {
186             let file = try!(central_header_to_zip_file(&mut reader, archive_offset));
187             names_map.insert(file.file_name.clone(), files.len());
188             files.push(file);
189         }
190 
191         Ok(ZipArchive {
192             reader: reader,
193             files: files,
194             names_map: names_map,
195             offset: archive_offset,
196         })
197     }
198 
199     /// Number of files contained in this zip.
200     ///
201     /// ```
202     /// fn iter() {
203     ///     let mut zip = zip::ZipArchive::new(std::io::Cursor::new(vec![])).unwrap();
204     ///
205     ///     for i in 0..zip.len() {
206     ///         let mut file = zip.by_index(i).unwrap();
207     ///         // Do something with file i
208     ///     }
209     /// }
210     /// ```
len(&self) -> usize211     pub fn len(&self) -> usize
212     {
213         self.files.len()
214     }
215 
216     /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
217     ///
218     /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
219     /// of that prepended data.
offset(&self) -> u64220     pub fn offset(&self) -> u64 {
221         self.offset
222     }
223 
224     /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>225     pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>
226     {
227         let index = match self.names_map.get(name) {
228             Some(index) => *index,
229             None => { return Err(ZipError::FileNotFound); },
230         };
231         self.by_index(index)
232     }
233 
234     /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>235     pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>
236     {
237         if file_number >= self.files.len() { return Err(ZipError::FileNotFound); }
238         let ref data = self.files[file_number];
239         let pos = data.data_start;
240 
241         if data.encrypted
242         {
243             return unsupported_zip_error("Encrypted files are not supported")
244         }
245 
246         try!(self.reader.seek(io::SeekFrom::Start(pos)));
247         let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size);
248 
249         let reader = match data.compression_method
250         {
251             CompressionMethod::Stored =>
252             {
253                 ZipFileReader::Stored(Crc32Reader::new(
254                     limit_reader,
255                     data.crc32))
256             },
257             #[cfg(feature = "deflate")]
258             CompressionMethod::Deflated =>
259             {
260                 let deflate_reader = DeflateDecoder::new(limit_reader);
261                 ZipFileReader::Deflated(Crc32Reader::new(
262                     deflate_reader,
263                     data.crc32))
264             },
265             #[cfg(feature = "bzip2")]
266             CompressionMethod::Bzip2 =>
267             {
268                 let bzip2_reader = BzDecoder::new(limit_reader);
269                 ZipFileReader::Bzip2(Crc32Reader::new(
270                     bzip2_reader,
271                     data.crc32))
272             },
273             _ => return unsupported_zip_error("Compression method not supported"),
274         };
275         Ok(ZipFile { reader: reader, data: data })
276     }
277 
278     /// Unwrap and return the inner reader object
279     ///
280     /// The position of the reader is undefined.
into_inner(self) -> R281     pub fn into_inner(self) -> R
282     {
283         self.reader
284     }
285 }
286 
central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>287 fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>
288 {
289     // Parse central header
290     let signature = try!(reader.read_u32::<LittleEndian>());
291     if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE
292     {
293         return Err(ZipError::InvalidArchive("Invalid Central Directory header"))
294     }
295 
296     let version_made_by = try!(reader.read_u16::<LittleEndian>());
297     let _version_to_extract = try!(reader.read_u16::<LittleEndian>());
298     let flags = try!(reader.read_u16::<LittleEndian>());
299     let encrypted = flags & 1 == 1;
300     let is_utf8 = flags & (1 << 11) != 0;
301     let compression_method = try!(reader.read_u16::<LittleEndian>());
302     let last_mod_time = try!(reader.read_u16::<LittleEndian>());
303     let last_mod_date = try!(reader.read_u16::<LittleEndian>());
304     let crc32 = try!(reader.read_u32::<LittleEndian>());
305     let compressed_size = try!(reader.read_u32::<LittleEndian>());
306     let uncompressed_size = try!(reader.read_u32::<LittleEndian>());
307     let file_name_length = try!(reader.read_u16::<LittleEndian>()) as usize;
308     let extra_field_length = try!(reader.read_u16::<LittleEndian>()) as usize;
309     let file_comment_length = try!(reader.read_u16::<LittleEndian>()) as usize;
310     let _disk_number = try!(reader.read_u16::<LittleEndian>());
311     let _internal_file_attributes = try!(reader.read_u16::<LittleEndian>());
312     let external_file_attributes = try!(reader.read_u32::<LittleEndian>());
313     let mut offset = try!(reader.read_u32::<LittleEndian>()) as u64;
314     let file_name_raw = try!(ReadPodExt::read_exact(reader, file_name_length));
315     let extra_field = try!(ReadPodExt::read_exact(reader, extra_field_length));
316     let file_comment_raw  = try!(ReadPodExt::read_exact(reader, file_comment_length));
317 
318     // Account for shifted zip offsets.
319     offset += archive_offset;
320 
321     let file_name = match is_utf8
322     {
323         true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
324         false => file_name_raw.clone().from_cp437(),
325     };
326     let file_comment = match is_utf8
327     {
328         true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
329         false => file_comment_raw.from_cp437(),
330     };
331 
332     // Remember end of central header
333     let return_position = try!(reader.seek(io::SeekFrom::Current(0)));
334 
335     // Parse local header
336     try!(reader.seek(io::SeekFrom::Start(offset)));
337     let signature = try!(reader.read_u32::<LittleEndian>());
338     if signature != spec::LOCAL_FILE_HEADER_SIGNATURE
339     {
340         return Err(ZipError::InvalidArchive("Invalid local file header"))
341     }
342 
343     try!(reader.seek(io::SeekFrom::Current(22)));
344     let file_name_length = try!(reader.read_u16::<LittleEndian>()) as u64;
345     let extra_field_length = try!(reader.read_u16::<LittleEndian>()) as u64;
346     let magic_and_header = 4 + 22 + 2 + 2;
347     let data_start = offset + magic_and_header + file_name_length + extra_field_length;
348 
349     // Construct the result
350     let mut result = ZipFileData
351     {
352         system: System::from_u8((version_made_by >> 8) as u8),
353         version_made_by: version_made_by as u8,
354         encrypted: encrypted,
355         compression_method: CompressionMethod::from_u16(compression_method),
356         last_modified_time: ::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date)).unwrap_or(TM_1980_01_01),
357         crc32: crc32,
358         compressed_size: compressed_size as u64,
359         uncompressed_size: uncompressed_size as u64,
360         file_name: file_name,
361         file_name_raw: file_name_raw,
362         file_comment: file_comment,
363         header_start: offset,
364         data_start: data_start,
365         external_attributes: external_file_attributes,
366     };
367 
368     match parse_extra_field(&mut result, &*extra_field) {
369         Ok(..) | Err(ZipError::Io(..)) => {},
370         Err(e) => try!(Err(e)),
371     }
372 
373     // Go back after the central header
374     try!(reader.seek(io::SeekFrom::Start(return_position)));
375 
376     Ok(result)
377 }
378 
parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>379 fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
380 {
381     let mut reader = io::Cursor::new(data);
382 
383     while (reader.position() as usize) < data.len()
384     {
385         let kind = try!(reader.read_u16::<LittleEndian>());
386         let len = try!(reader.read_u16::<LittleEndian>());
387         match kind
388         {
389             _ => try!(reader.seek(io::SeekFrom::Current(len as i64))),
390         };
391     }
392     Ok(())
393 }
394 
395 /// Methods for retreiving information on zip files
396 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut Read397     fn get_reader(&mut self) -> &mut Read {
398         match self.reader {
399            ZipFileReader::Stored(ref mut r) => r as &mut Read,
400            #[cfg(feature = "deflate")]
401            ZipFileReader::Deflated(ref mut r) => r as &mut Read,
402            #[cfg(feature = "bzip2")]
403            ZipFileReader::Bzip2(ref mut r) => r as &mut Read,
404         }
405     }
406     /// Get the version of the file
version_made_by(&self) -> (u8, u8)407     pub fn version_made_by(&self) -> (u8, u8) {
408         (self.data.version_made_by / 10, self.data.version_made_by % 10)
409     }
410     /// Get the name of the file
name(&self) -> &str411     pub fn name(&self) -> &str {
412         &*self.data.file_name
413     }
414     /// Get the name of the file, in the raw (internal) byte representation.
name_raw(&self) -> &[u8]415     pub fn name_raw(&self) -> &[u8] {
416         &*self.data.file_name_raw
417     }
418     /// Get the comment of the file
comment(&self) -> &str419     pub fn comment(&self) -> &str {
420         &*self.data.file_comment
421     }
422     /// Get the compression method used to store the file
compression(&self) -> CompressionMethod423     pub fn compression(&self) -> CompressionMethod {
424         self.data.compression_method
425     }
426     /// Get the size of the file in the archive
compressed_size(&self) -> u64427     pub fn compressed_size(&self) -> u64 {
428         self.data.compressed_size
429     }
430     /// Get the size of the file when uncompressed
size(&self) -> u64431     pub fn size(&self) -> u64 {
432         self.data.uncompressed_size
433     }
434     /// Get the time the file was last modified
last_modified(&self) -> ::time::Tm435     pub fn last_modified(&self) -> ::time::Tm {
436         self.data.last_modified_time
437     }
438     /// Get unix mode for the file
unix_mode(&self) -> Option<u32>439     pub fn unix_mode(&self) -> Option<u32> {
440         match self.data.system {
441             System::Unix => {
442                 Some(self.data.external_attributes >> 16)
443             },
444             System::Dos => {
445                 // Interpret MSDOS directory bit
446                 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
447                     ffi::S_IFDIR | 0o0775
448                 } else {
449                     ffi::S_IFREG | 0o0664
450                 };
451                 if 0x01 == (self.data.external_attributes & 0x01) {
452                     // Read-only bit; strip write permissions
453                     mode &= 0o0555;
454                 }
455                 Some(mode)
456             },
457             _ => None,
458         }
459     }
460     /// Get the CRC32 hash of the original file
crc32(&self) -> u32461     pub fn crc32(&self) -> u32 {
462         self.data.crc32
463     }
464 
465     /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64466     pub fn data_start(&self) -> u64 {
467         self.data.data_start
468     }
469 }
470 
471 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>472      fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
473          self.get_reader().read(buf)
474      }
475 }
476 
477 #[cfg(test)]
478 mod test {
479     #[test]
invalid_offset()480     fn invalid_offset() {
481         use std::io;
482         use super::ZipArchive;
483 
484         let mut v = Vec::new();
485         v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
486         let reader = ZipArchive::new(io::Cursor::new(v));
487         assert!(reader.is_err());
488     }
489 
490     #[test]
zip64_with_leading_junk()491     fn zip64_with_leading_junk() {
492         use std::io;
493         use super::ZipArchive;
494 
495         let mut v = Vec::new();
496         v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
497         let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
498         assert!(reader.len() == 1);
499     }
500 }
501