1 //! Types for reading ZIP archives
2 
3 use crate::compression::CompressionMethod;
4 use crate::crc32::Crc32Reader;
5 use crate::result::{InvalidPassword, ZipError, ZipResult};
6 use crate::spec;
7 use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
8 use std::borrow::Cow;
9 use std::collections::HashMap;
10 use std::io::{self, prelude::*};
11 use std::path::{Component, Path};
12 
13 use crate::cp437::FromCp437;
14 use crate::types::{DateTime, System, ZipFileData};
15 use byteorder::{LittleEndian, ReadBytesExt};
16 
17 #[cfg(any(
18     feature = "deflate",
19     feature = "deflate-miniz",
20     feature = "deflate-zlib"
21 ))]
22 use flate2::read::DeflateDecoder;
23 
24 #[cfg(feature = "bzip2")]
25 use bzip2::read::BzDecoder;
26 
27 mod ffi {
28     pub const S_IFDIR: u32 = 0o0040000;
29     pub const S_IFREG: u32 = 0o0100000;
30 }
31 
32 /// ZIP archive reader
33 ///
34 /// ```no_run
35 /// use std::io::prelude::*;
36 /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
37 ///     let mut zip = zip::ZipArchive::new(reader)?;
38 ///
39 ///     for i in 0..zip.len() {
40 ///         let mut file = zip.by_index(i)?;
41 ///         println!("Filename: {}", file.name());
42 ///         std::io::copy(&mut file, &mut std::io::stdout());
43 ///     }
44 ///
45 ///     Ok(())
46 /// }
47 /// ```
48 #[derive(Clone, Debug)]
49 pub struct ZipArchive<R> {
50     reader: R,
51     files: Vec<ZipFileData>,
52     names_map: HashMap<String, usize>,
53     offset: u64,
54     comment: Vec<u8>,
55 }
56 
57 enum CryptoReader<'a> {
58     Plaintext(io::Take<&'a mut dyn Read>),
59     ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
60 }
61 
62 impl<'a> Read for CryptoReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>63     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
64         match self {
65             CryptoReader::Plaintext(r) => r.read(buf),
66             CryptoReader::ZipCrypto(r) => r.read(buf),
67         }
68     }
69 }
70 
71 impl<'a> CryptoReader<'a> {
72     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>73     pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
74         match self {
75             CryptoReader::Plaintext(r) => r,
76             CryptoReader::ZipCrypto(r) => r.into_inner(),
77         }
78     }
79 }
80 
81 enum ZipFileReader<'a> {
82     NoReader,
83     Raw(io::Take<&'a mut dyn io::Read>),
84     Stored(Crc32Reader<CryptoReader<'a>>),
85     #[cfg(any(
86         feature = "deflate",
87         feature = "deflate-miniz",
88         feature = "deflate-zlib"
89     ))]
90     Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
91     #[cfg(feature = "bzip2")]
92     Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
93 }
94 
95 impl<'a> Read for ZipFileReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>96     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97         match self {
98             ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
99             ZipFileReader::Raw(r) => r.read(buf),
100             ZipFileReader::Stored(r) => r.read(buf),
101             #[cfg(any(
102                 feature = "deflate",
103                 feature = "deflate-miniz",
104                 feature = "deflate-zlib"
105             ))]
106             ZipFileReader::Deflated(r) => r.read(buf),
107             #[cfg(feature = "bzip2")]
108             ZipFileReader::Bzip2(r) => r.read(buf),
109         }
110     }
111 }
112 
113 impl<'a> ZipFileReader<'a> {
114     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>115     pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
116         match self {
117             ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
118             ZipFileReader::Raw(r) => r,
119             ZipFileReader::Stored(r) => r.into_inner().into_inner(),
120             #[cfg(any(
121                 feature = "deflate",
122                 feature = "deflate-miniz",
123                 feature = "deflate-zlib"
124             ))]
125             ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(),
126             #[cfg(feature = "bzip2")]
127             ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
128         }
129     }
130 }
131 
132 /// A struct for reading a zip file
133 pub struct ZipFile<'a> {
134     data: Cow<'a, ZipFileData>,
135     crypto_reader: Option<CryptoReader<'a>>,
136     reader: ZipFileReader<'a>,
137 }
138 
find_content<'a>( data: &mut ZipFileData, reader: &'a mut (impl Read + Seek), ) -> ZipResult<io::Take<&'a mut dyn Read>>139 fn find_content<'a>(
140     data: &mut ZipFileData,
141     reader: &'a mut (impl Read + Seek),
142 ) -> ZipResult<io::Take<&'a mut dyn Read>> {
143     // Parse local header
144     reader.seek(io::SeekFrom::Start(data.header_start))?;
145     let signature = reader.read_u32::<LittleEndian>()?;
146     if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
147         return Err(ZipError::InvalidArchive("Invalid local file header"));
148     }
149 
150     reader.seek(io::SeekFrom::Current(22))?;
151     let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
152     let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
153     let magic_and_header = 4 + 22 + 2 + 2;
154     data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
155 
156     reader.seek(io::SeekFrom::Start(data.data_start))?;
157     Ok((reader as &mut dyn Read).take(data.compressed_size))
158 }
159 
make_crypto_reader<'a>( compression_method: crate::compression::CompressionMethod, crc32: u32, last_modified_time: DateTime, using_data_descriptor: bool, reader: io::Take<&'a mut dyn io::Read>, password: Option<&[u8]>, ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>>160 fn make_crypto_reader<'a>(
161     compression_method: crate::compression::CompressionMethod,
162     crc32: u32,
163     last_modified_time: DateTime,
164     using_data_descriptor: bool,
165     reader: io::Take<&'a mut dyn io::Read>,
166     password: Option<&[u8]>,
167 ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
168     #[allow(deprecated)]
169     {
170         if let CompressionMethod::Unsupported(_) = compression_method {
171             return unsupported_zip_error("Compression method not supported");
172         }
173     }
174 
175     let reader = match password {
176         None => CryptoReader::Plaintext(reader),
177         Some(password) => {
178             let validator = if using_data_descriptor {
179                 ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
180             } else {
181                 ZipCryptoValidator::PkzipCrc32(crc32)
182             };
183             match ZipCryptoReader::new(reader, password).validate(validator)? {
184                 None => return Ok(Err(InvalidPassword)),
185                 Some(r) => CryptoReader::ZipCrypto(r),
186             }
187         }
188     };
189     Ok(Ok(reader))
190 }
191 
make_reader<'a>( compression_method: CompressionMethod, crc32: u32, reader: CryptoReader<'a>, ) -> ZipFileReader<'a>192 fn make_reader<'a>(
193     compression_method: CompressionMethod,
194     crc32: u32,
195     reader: CryptoReader<'a>,
196 ) -> ZipFileReader<'a> {
197     match compression_method {
198         CompressionMethod::Stored => ZipFileReader::Stored(Crc32Reader::new(reader, crc32)),
199         #[cfg(any(
200             feature = "deflate",
201             feature = "deflate-miniz",
202             feature = "deflate-zlib"
203         ))]
204         CompressionMethod::Deflated => {
205             let deflate_reader = DeflateDecoder::new(reader);
206             ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32))
207         }
208         #[cfg(feature = "bzip2")]
209         CompressionMethod::Bzip2 => {
210             let bzip2_reader = BzDecoder::new(reader);
211             ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32))
212         }
213         _ => panic!("Compression method not supported"),
214     }
215 }
216 
217 impl<R: Read + io::Seek> ZipArchive<R> {
218     /// Get the directory start offset and number of files. This is done in a
219     /// separate function to ease the control flow design.
get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult<(u64, u64, usize)>220     pub(crate) fn get_directory_counts(
221         reader: &mut R,
222         footer: &spec::CentralDirectoryEnd,
223         cde_start_pos: u64,
224     ) -> ZipResult<(u64, u64, usize)> {
225         // See if there's a ZIP64 footer. The ZIP64 locator if present will
226         // have its signature 20 bytes in front of the standard footer. The
227         // standard footer, in turn, is 22+N bytes large, where N is the
228         // comment length. Therefore:
229         let zip64locator = if reader
230             .seek(io::SeekFrom::End(
231                 -(20 + 22 + footer.zip_file_comment.len() as i64),
232             ))
233             .is_ok()
234         {
235             match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
236                 Ok(loc) => Some(loc),
237                 Err(ZipError::InvalidArchive(_)) => {
238                     // No ZIP64 header; that's actually fine. We're done here.
239                     None
240                 }
241                 Err(e) => {
242                     // Yikes, a real problem
243                     return Err(e);
244                 }
245             }
246         } else {
247             // Empty Zip files will have nothing else so this error might be fine. If
248             // not, we'll find out soon.
249             None
250         };
251 
252         match zip64locator {
253             None => {
254                 // Some zip files have data prepended to them, resulting in the
255                 // offsets all being too small. Get the amount of error by comparing
256                 // the actual file position we found the CDE at with the offset
257                 // recorded in the CDE.
258                 let archive_offset = cde_start_pos
259                     .checked_sub(footer.central_directory_size as u64)
260                     .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
261                     .ok_or(ZipError::InvalidArchive(
262                         "Invalid central directory size or offset",
263                     ))?;
264 
265                 let directory_start = footer.central_directory_offset as u64 + archive_offset;
266                 let number_of_files = footer.number_of_files_on_this_disk as usize;
267                 Ok((archive_offset, directory_start, number_of_files))
268             }
269             Some(locator64) => {
270                 // If we got here, this is indeed a ZIP64 file.
271 
272                 if footer.disk_number as u32 != locator64.disk_with_central_directory {
273                     return unsupported_zip_error(
274                         "Support for multi-disk files is not implemented",
275                     );
276                 }
277 
278                 // We need to reassess `archive_offset`. We know where the ZIP64
279                 // central-directory-end structure *should* be, but unfortunately we
280                 // don't know how to precisely relate that location to our current
281                 // actual offset in the file, since there may be junk at its
282                 // beginning. Therefore we need to perform another search, as in
283                 // read::CentralDirectoryEnd::find_and_parse, except now we search
284                 // forward.
285 
286                 let search_upper_bound = cde_start_pos
287                     .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
288                     .ok_or(ZipError::InvalidArchive(
289                         "File cannot contain ZIP64 central directory end",
290                     ))?;
291                 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
292                     reader,
293                     locator64.end_of_central_directory_offset,
294                     search_upper_bound,
295                 )?;
296 
297                 if footer.disk_number != footer.disk_with_central_directory {
298                     return unsupported_zip_error(
299                         "Support for multi-disk files is not implemented",
300                     );
301                 }
302 
303                 let directory_start = footer
304                     .central_directory_offset
305                     .checked_add(archive_offset)
306                     .ok_or_else(|| {
307                         ZipError::InvalidArchive("Invalid central directory size or offset")
308                     })?;
309 
310                 Ok((
311                     archive_offset,
312                     directory_start,
313                     footer.number_of_files as usize,
314                 ))
315             }
316         }
317     }
318 
319     /// Read a ZIP archive, collecting the files it contains
320     ///
321     /// This uses the central directory record of the ZIP file, and ignores local file headers
new(mut reader: R) -> ZipResult<ZipArchive<R>>322     pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
323         let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
324 
325         if footer.disk_number != footer.disk_with_central_directory {
326             return unsupported_zip_error("Support for multi-disk files is not implemented");
327         }
328 
329         let (archive_offset, directory_start, number_of_files) =
330             Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
331 
332         let mut files = Vec::new();
333         let mut names_map = HashMap::new();
334 
335         if let Err(_) = reader.seek(io::SeekFrom::Start(directory_start)) {
336             return Err(ZipError::InvalidArchive(
337                 "Could not seek to start of central directory",
338             ));
339         }
340 
341         for _ in 0..number_of_files {
342             let file = central_header_to_zip_file(&mut reader, archive_offset)?;
343             names_map.insert(file.file_name.clone(), files.len());
344             files.push(file);
345         }
346 
347         Ok(ZipArchive {
348             reader,
349             files,
350             names_map,
351             offset: archive_offset,
352             comment: footer.zip_file_comment,
353         })
354     }
355     /// Extract a Zip archive into a directory, overwriting files if they
356     /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
357     ///
358     /// Extraction is not atomic; If an error is encountered, some of the files
359     /// may be left on disk.
extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()>360     pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
361         use std::fs;
362 
363         for i in 0..self.len() {
364             let mut file = self.by_index(i)?;
365             let filepath = file
366                 .enclosed_name()
367                 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
368 
369             let outpath = directory.as_ref().join(filepath);
370 
371             if file.name().ends_with('/') {
372                 fs::create_dir_all(&outpath)?;
373             } else {
374                 if let Some(p) = outpath.parent() {
375                     if !p.exists() {
376                         fs::create_dir_all(&p)?;
377                     }
378                 }
379                 let mut outfile = fs::File::create(&outpath)?;
380                 io::copy(&mut file, &mut outfile)?;
381             }
382             // Get and Set permissions
383             #[cfg(unix)]
384             {
385                 use std::os::unix::fs::PermissionsExt;
386                 if let Some(mode) = file.unix_mode() {
387                     fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
388                 }
389             }
390         }
391         Ok(())
392     }
393 
394     /// Number of files contained in this zip.
len(&self) -> usize395     pub fn len(&self) -> usize {
396         self.files.len()
397     }
398 
399     /// Whether this zip archive contains no files
is_empty(&self) -> bool400     pub fn is_empty(&self) -> bool {
401         self.len() == 0
402     }
403 
404     /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
405     ///
406     /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
407     /// of that prepended data.
offset(&self) -> u64408     pub fn offset(&self) -> u64 {
409         self.offset
410     }
411 
412     /// Get the comment of the zip archive.
comment(&self) -> &[u8]413     pub fn comment(&self) -> &[u8] {
414         &self.comment
415     }
416 
417     /// Returns an iterator over all the file and directory names in this archive.
file_names(&self) -> impl Iterator<Item = &str>418     pub fn file_names(&self) -> impl Iterator<Item = &str> {
419         self.names_map.keys().map(|s| s.as_str())
420     }
421 
422     /// Search for a file entry by name, decrypt with given password
by_name_decrypt<'a>( &'a mut self, name: &str, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>423     pub fn by_name_decrypt<'a>(
424         &'a mut self,
425         name: &str,
426         password: &[u8],
427     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
428         self.by_name_with_optional_password(name, Some(password))
429     }
430 
431     /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>432     pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
433         Ok(self.by_name_with_optional_password(name, None)?.unwrap())
434     }
435 
by_name_with_optional_password<'a>( &'a mut self, name: &str, password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>436     fn by_name_with_optional_password<'a>(
437         &'a mut self,
438         name: &str,
439         password: Option<&[u8]>,
440     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
441         let index = match self.names_map.get(name) {
442             Some(index) => *index,
443             None => {
444                 return Err(ZipError::FileNotFound);
445             }
446         };
447         self.by_index_with_optional_password(index, password)
448     }
449 
450     /// Get a contained file by index, decrypt with given password
by_index_decrypt<'a>( &'a mut self, file_number: usize, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>451     pub fn by_index_decrypt<'a>(
452         &'a mut self,
453         file_number: usize,
454         password: &[u8],
455     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
456         self.by_index_with_optional_password(file_number, Some(password))
457     }
458 
459     /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>460     pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
461         Ok(self
462             .by_index_with_optional_password(file_number, None)?
463             .unwrap())
464     }
465 
466     /// Get a contained file by index without decompressing it
by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>467     pub fn by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
468         let reader = &mut self.reader;
469         self.files
470             .get_mut(file_number)
471             .ok_or(ZipError::FileNotFound)
472             .and_then(move |data| {
473                 Ok(ZipFile {
474                     crypto_reader: None,
475                     reader: ZipFileReader::Raw(find_content(data, reader)?),
476                     data: Cow::Borrowed(data),
477                 })
478             })
479     }
480 
by_index_with_optional_password<'a>( &'a mut self, file_number: usize, mut password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>481     fn by_index_with_optional_password<'a>(
482         &'a mut self,
483         file_number: usize,
484         mut password: Option<&[u8]>,
485     ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
486         if file_number >= self.files.len() {
487             return Err(ZipError::FileNotFound);
488         }
489         let data = &mut self.files[file_number];
490 
491         match (password, data.encrypted) {
492             (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
493             (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
494             _ => {}
495         }
496         let limit_reader = find_content(data, &mut self.reader)?;
497 
498         match make_crypto_reader(
499             data.compression_method,
500             data.crc32,
501             data.last_modified_time,
502             data.using_data_descriptor,
503             limit_reader,
504             password,
505         ) {
506             Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
507                 crypto_reader: Some(crypto_reader),
508                 reader: ZipFileReader::NoReader,
509                 data: Cow::Borrowed(data),
510             })),
511             Err(e) => Err(e),
512             Ok(Err(e)) => Ok(Err(e)),
513         }
514     }
515 
516     /// Unwrap and return the inner reader object
517     ///
518     /// The position of the reader is undefined.
into_inner(self) -> R519     pub fn into_inner(self) -> R {
520         self.reader
521     }
522 }
523 
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>524 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
525     Err(ZipError::UnsupportedArchive(detail))
526 }
527 
528 /// Parse a central directory entry to collect the information for the file.
central_header_to_zip_file<R: Read + io::Seek>( reader: &mut R, archive_offset: u64, ) -> ZipResult<ZipFileData>529 pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
530     reader: &mut R,
531     archive_offset: u64,
532 ) -> ZipResult<ZipFileData> {
533     let central_header_start = reader.seek(io::SeekFrom::Current(0))?;
534     // Parse central header
535     let signature = reader.read_u32::<LittleEndian>()?;
536     if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
537         return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
538     }
539 
540     let version_made_by = reader.read_u16::<LittleEndian>()?;
541     let _version_to_extract = reader.read_u16::<LittleEndian>()?;
542     let flags = reader.read_u16::<LittleEndian>()?;
543     let encrypted = flags & 1 == 1;
544     let is_utf8 = flags & (1 << 11) != 0;
545     let using_data_descriptor = flags & (1 << 3) != 0;
546     let compression_method = reader.read_u16::<LittleEndian>()?;
547     let last_mod_time = reader.read_u16::<LittleEndian>()?;
548     let last_mod_date = reader.read_u16::<LittleEndian>()?;
549     let crc32 = reader.read_u32::<LittleEndian>()?;
550     let compressed_size = reader.read_u32::<LittleEndian>()?;
551     let uncompressed_size = reader.read_u32::<LittleEndian>()?;
552     let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
553     let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
554     let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
555     let _disk_number = reader.read_u16::<LittleEndian>()?;
556     let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
557     let external_file_attributes = reader.read_u32::<LittleEndian>()?;
558     let offset = reader.read_u32::<LittleEndian>()? as u64;
559     let mut file_name_raw = vec![0; file_name_length];
560     reader.read_exact(&mut file_name_raw)?;
561     let mut extra_field = vec![0; extra_field_length];
562     reader.read_exact(&mut extra_field)?;
563     let mut file_comment_raw = vec![0; file_comment_length];
564     reader.read_exact(&mut file_comment_raw)?;
565 
566     let file_name = match is_utf8 {
567         true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
568         false => file_name_raw.clone().from_cp437(),
569     };
570     let file_comment = match is_utf8 {
571         true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
572         false => file_comment_raw.from_cp437(),
573     };
574 
575     // Construct the result
576     let mut result = ZipFileData {
577         system: System::from_u8((version_made_by >> 8) as u8),
578         version_made_by: version_made_by as u8,
579         encrypted,
580         using_data_descriptor,
581         compression_method: {
582             #[allow(deprecated)]
583             CompressionMethod::from_u16(compression_method)
584         },
585         last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
586         crc32,
587         compressed_size: compressed_size as u64,
588         uncompressed_size: uncompressed_size as u64,
589         file_name,
590         file_name_raw,
591         extra_field,
592         file_comment,
593         header_start: offset,
594         central_header_start,
595         data_start: 0,
596         external_attributes: external_file_attributes,
597         large_file: false,
598     };
599 
600     match parse_extra_field(&mut result) {
601         Ok(..) | Err(ZipError::Io(..)) => {}
602         Err(e) => return Err(e),
603     }
604 
605     // Account for shifted zip offsets.
606     result.header_start += archive_offset;
607 
608     Ok(result)
609 }
610 
parse_extra_field(file: &mut ZipFileData) -> ZipResult<()>611 fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
612     let mut reader = io::Cursor::new(&file.extra_field);
613 
614     while (reader.position() as usize) < file.extra_field.len() {
615         let kind = reader.read_u16::<LittleEndian>()?;
616         let len = reader.read_u16::<LittleEndian>()?;
617         let mut len_left = len as i64;
618         // Zip64 extended information extra field
619         if kind == 0x0001 {
620             if file.uncompressed_size == 0xFFFFFFFF {
621                 file.large_file = true;
622                 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
623                 len_left -= 8;
624             }
625             if file.compressed_size == 0xFFFFFFFF {
626                 file.large_file = true;
627                 file.compressed_size = reader.read_u64::<LittleEndian>()?;
628                 len_left -= 8;
629             }
630             if file.header_start == 0xFFFFFFFF {
631                 file.header_start = reader.read_u64::<LittleEndian>()?;
632                 len_left -= 8;
633             }
634             // Unparsed fields:
635             // u32: disk start number
636         }
637 
638         // We could also check for < 0 to check for errors
639         if len_left > 0 {
640             reader.seek(io::SeekFrom::Current(len_left))?;
641         }
642     }
643     Ok(())
644 }
645 
646 /// Methods for retrieving information on zip files
647 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut ZipFileReader<'a>648     fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
649         if let ZipFileReader::NoReader = self.reader {
650             let data = &self.data;
651             let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
652             self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
653         }
654         &mut self.reader
655     }
656 
get_raw_reader(&mut self) -> &mut dyn Read657     pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
658         if let ZipFileReader::NoReader = self.reader {
659             let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
660             self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
661         }
662         &mut self.reader
663     }
664 
665     /// Get the version of the file
version_made_by(&self) -> (u8, u8)666     pub fn version_made_by(&self) -> (u8, u8) {
667         (
668             self.data.version_made_by / 10,
669             self.data.version_made_by % 10,
670         )
671     }
672 
673     /// Get the name of the file
674     ///
675     /// # Warnings
676     ///
677     /// It is dangerous to use this name directly when extracting an archive.
678     /// It may contain an absolute path (`/etc/shadow`), or break out of the
679     /// current directory (`../runtime`). Carelessly writing to these paths
680     /// allows an attacker to craft a ZIP archive that will overwrite critical
681     /// files.
682     ///
683     /// You can use the [`ZipFile::enclosed_name`] method to validate the name
684     /// as a safe path.
name(&self) -> &str685     pub fn name(&self) -> &str {
686         &self.data.file_name
687     }
688 
689     /// Get the name of the file, in the raw (internal) byte representation.
690     ///
691     /// The encoding of this data is currently undefined.
name_raw(&self) -> &[u8]692     pub fn name_raw(&self) -> &[u8] {
693         &self.data.file_name_raw
694     }
695 
696     /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
697     /// removes a leading '/' and removes '..' parts.
698     #[deprecated(
699         since = "0.5.7",
700         note = "by stripping `..`s from the path, the meaning of paths can change.
701                 `mangled_name` can be used if this behaviour is desirable"
702     )]
sanitized_name(&self) -> ::std::path::PathBuf703     pub fn sanitized_name(&self) -> ::std::path::PathBuf {
704         self.mangled_name()
705     }
706 
707     /// Rewrite the path, ignoring any path components with special meaning.
708     ///
709     /// - Absolute paths are made relative
710     /// - [`ParentDir`]s are ignored
711     /// - Truncates the filename at a NULL byte
712     ///
713     /// This is appropriate if you need to be able to extract *something* from
714     /// any archive, but will easily misrepresent trivial paths like
715     /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
716     /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
717     ///
718     /// [`ParentDir`]: `Component::ParentDir`
mangled_name(&self) -> ::std::path::PathBuf719     pub fn mangled_name(&self) -> ::std::path::PathBuf {
720         self.data.file_name_sanitized()
721     }
722 
723     /// Ensure the file path is safe to use as a [`Path`].
724     ///
725     /// - It can't contain NULL bytes
726     /// - It can't resolve to a path outside the current directory
727     ///   > `foo/../bar` is fine, `foo/../../bar` is not.
728     /// - It can't be an absolute path
729     ///
730     /// This will read well-formed ZIP files correctly, and is resistant
731     /// to path-based exploits. It is recommended over
732     /// [`ZipFile::mangled_name`].
enclosed_name(&self) -> Option<&Path>733     pub fn enclosed_name(&self) -> Option<&Path> {
734         if self.data.file_name.contains('\0') {
735             return None;
736         }
737         let path = Path::new(&self.data.file_name);
738         let mut depth = 0usize;
739         for component in path.components() {
740             match component {
741                 Component::Prefix(_) | Component::RootDir => return None,
742                 Component::ParentDir => depth = depth.checked_sub(1)?,
743                 Component::Normal(_) => depth += 1,
744                 Component::CurDir => (),
745             }
746         }
747         Some(path)
748     }
749 
750     /// Get the comment of the file
comment(&self) -> &str751     pub fn comment(&self) -> &str {
752         &self.data.file_comment
753     }
754 
755     /// Get the compression method used to store the file
compression(&self) -> CompressionMethod756     pub fn compression(&self) -> CompressionMethod {
757         self.data.compression_method
758     }
759 
760     /// Get the size of the file in the archive
compressed_size(&self) -> u64761     pub fn compressed_size(&self) -> u64 {
762         self.data.compressed_size
763     }
764 
765     /// Get the size of the file when uncompressed
size(&self) -> u64766     pub fn size(&self) -> u64 {
767         self.data.uncompressed_size
768     }
769 
770     /// Get the time the file was last modified
last_modified(&self) -> DateTime771     pub fn last_modified(&self) -> DateTime {
772         self.data.last_modified_time
773     }
774     /// Returns whether the file is actually a directory
is_dir(&self) -> bool775     pub fn is_dir(&self) -> bool {
776         self.name()
777             .chars()
778             .rev()
779             .next()
780             .map_or(false, |c| c == '/' || c == '\\')
781     }
782 
783     /// Returns whether the file is a regular file
is_file(&self) -> bool784     pub fn is_file(&self) -> bool {
785         !self.is_dir()
786     }
787 
788     /// Get unix mode for the file
unix_mode(&self) -> Option<u32>789     pub fn unix_mode(&self) -> Option<u32> {
790         if self.data.external_attributes == 0 {
791             return None;
792         }
793 
794         match self.data.system {
795             System::Unix => Some(self.data.external_attributes >> 16),
796             System::Dos => {
797                 // Interpret MSDOS directory bit
798                 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
799                     ffi::S_IFDIR | 0o0775
800                 } else {
801                     ffi::S_IFREG | 0o0664
802                 };
803                 if 0x01 == (self.data.external_attributes & 0x01) {
804                     // Read-only bit; strip write permissions
805                     mode &= 0o0555;
806                 }
807                 Some(mode)
808             }
809             _ => None,
810         }
811     }
812 
813     /// Get the CRC32 hash of the original file
crc32(&self) -> u32814     pub fn crc32(&self) -> u32 {
815         self.data.crc32
816     }
817 
818     /// Get the extra data of the zip header for this file
extra_data(&self) -> &[u8]819     pub fn extra_data(&self) -> &[u8] {
820         &self.data.extra_field
821     }
822 
823     /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64824     pub fn data_start(&self) -> u64 {
825         self.data.data_start
826     }
827 
828     /// Get the starting offset of the zip header for this file
header_start(&self) -> u64829     pub fn header_start(&self) -> u64 {
830         self.data.header_start
831     }
832     /// Get the starting offset of the zip header in the central directory for this file
central_header_start(&self) -> u64833     pub fn central_header_start(&self) -> u64 {
834         self.data.central_header_start
835     }
836 }
837 
838 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>839     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
840         self.get_reader().read(buf)
841     }
842 }
843 
844 impl<'a> Drop for ZipFile<'a> {
drop(&mut self)845     fn drop(&mut self) {
846         // self.data is Owned, this reader is constructed by a streaming reader.
847         // In this case, we want to exhaust the reader so that the next file is accessible.
848         if let Cow::Owned(_) = self.data {
849             let mut buffer = [0; 1 << 16];
850 
851             // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
852             let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
853                 ZipFileReader::NoReader => {
854                     let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
855                     innerreader.expect("Invalid reader state").into_inner()
856                 }
857                 reader => {
858                     let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
859                     innerreader.into_inner()
860                 }
861             };
862 
863             loop {
864                 match reader.read(&mut buffer) {
865                     Ok(0) => break,
866                     Ok(_) => (),
867                     Err(e) => panic!(
868                         "Could not consume all of the output of the current ZipFile: {:?}",
869                         e
870                     ),
871                 }
872             }
873         }
874     }
875 }
876 
877 /// Read ZipFile structures from a non-seekable reader.
878 ///
879 /// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
880 /// as some information will be missing when reading this manner.
881 ///
882 /// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
883 /// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
884 /// is encountered. No more files should be read after this.
885 ///
886 /// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
887 /// the structure is done.
888 ///
889 /// Missing fields are:
890 /// * `comment`: set to an empty string
891 /// * `data_start`: set to 0
892 /// * `external_attributes`: `unix_mode()`: will return None
read_zipfile_from_stream<'a, R: io::Read>( reader: &'a mut R, ) -> ZipResult<Option<ZipFile<'_>>>893 pub fn read_zipfile_from_stream<'a, R: io::Read>(
894     reader: &'a mut R,
895 ) -> ZipResult<Option<ZipFile<'_>>> {
896     let signature = reader.read_u32::<LittleEndian>()?;
897 
898     match signature {
899         spec::LOCAL_FILE_HEADER_SIGNATURE => (),
900         spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
901         _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
902     }
903 
904     let version_made_by = reader.read_u16::<LittleEndian>()?;
905     let flags = reader.read_u16::<LittleEndian>()?;
906     let encrypted = flags & 1 == 1;
907     let is_utf8 = flags & (1 << 11) != 0;
908     let using_data_descriptor = flags & (1 << 3) != 0;
909     #[allow(deprecated)]
910     let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
911     let last_mod_time = reader.read_u16::<LittleEndian>()?;
912     let last_mod_date = reader.read_u16::<LittleEndian>()?;
913     let crc32 = reader.read_u32::<LittleEndian>()?;
914     let compressed_size = reader.read_u32::<LittleEndian>()?;
915     let uncompressed_size = reader.read_u32::<LittleEndian>()?;
916     let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
917     let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
918 
919     let mut file_name_raw = vec![0; file_name_length];
920     reader.read_exact(&mut file_name_raw)?;
921     let mut extra_field = vec![0; extra_field_length];
922     reader.read_exact(&mut extra_field)?;
923 
924     let file_name = match is_utf8 {
925         true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
926         false => file_name_raw.clone().from_cp437(),
927     };
928 
929     let mut result = ZipFileData {
930         system: System::from_u8((version_made_by >> 8) as u8),
931         version_made_by: version_made_by as u8,
932         encrypted,
933         using_data_descriptor,
934         compression_method,
935         last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
936         crc32,
937         compressed_size: compressed_size as u64,
938         uncompressed_size: uncompressed_size as u64,
939         file_name,
940         file_name_raw,
941         extra_field,
942         file_comment: String::new(), // file comment is only available in the central directory
943         // header_start and data start are not available, but also don't matter, since seeking is
944         // not available.
945         header_start: 0,
946         data_start: 0,
947         central_header_start: 0,
948         // The external_attributes field is only available in the central directory.
949         // We set this to zero, which should be valid as the docs state 'If input came
950         // from standard input, this field is set to zero.'
951         external_attributes: 0,
952         large_file: false,
953     };
954 
955     match parse_extra_field(&mut result) {
956         Ok(..) | Err(ZipError::Io(..)) => {}
957         Err(e) => return Err(e),
958     }
959 
960     if encrypted {
961         return unsupported_zip_error("Encrypted files are not supported");
962     }
963     if using_data_descriptor {
964         return unsupported_zip_error("The file length is not available in the local header");
965     }
966 
967     let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size as u64);
968 
969     let result_crc32 = result.crc32;
970     let result_compression_method = result.compression_method;
971     let crypto_reader = make_crypto_reader(
972         result_compression_method,
973         result_crc32,
974         result.last_modified_time,
975         result.using_data_descriptor,
976         limit_reader,
977         None,
978     )?
979     .unwrap();
980 
981     Ok(Some(ZipFile {
982         data: Cow::Owned(result),
983         crypto_reader: None,
984         reader: make_reader(result_compression_method, result_crc32, crypto_reader),
985     }))
986 }
987 
988 #[cfg(test)]
989 mod test {
990     #[test]
invalid_offset()991     fn invalid_offset() {
992         use super::ZipArchive;
993         use std::io;
994 
995         let mut v = Vec::new();
996         v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
997         let reader = ZipArchive::new(io::Cursor::new(v));
998         assert!(reader.is_err());
999     }
1000 
1001     #[test]
invalid_offset2()1002     fn invalid_offset2() {
1003         use super::ZipArchive;
1004         use std::io;
1005 
1006         let mut v = Vec::new();
1007         v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1008         let reader = ZipArchive::new(io::Cursor::new(v));
1009         assert!(reader.is_err());
1010     }
1011 
1012     #[test]
zip64_with_leading_junk()1013     fn zip64_with_leading_junk() {
1014         use super::ZipArchive;
1015         use std::io;
1016 
1017         let mut v = Vec::new();
1018         v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
1019         let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1020         assert!(reader.len() == 1);
1021     }
1022 
1023     #[test]
zip_contents()1024     fn zip_contents() {
1025         use super::ZipArchive;
1026         use std::io;
1027 
1028         let mut v = Vec::new();
1029         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1030         let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1031         assert!(reader.comment() == b"");
1032         assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1033     }
1034 
1035     #[test]
zip_read_streaming()1036     fn zip_read_streaming() {
1037         use super::read_zipfile_from_stream;
1038         use std::io;
1039 
1040         let mut v = Vec::new();
1041         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1042         let mut reader = io::Cursor::new(v);
1043         loop {
1044             match read_zipfile_from_stream(&mut reader).unwrap() {
1045                 None => break,
1046                 _ => (),
1047             }
1048         }
1049     }
1050 
1051     #[test]
zip_clone()1052     fn zip_clone() {
1053         use super::ZipArchive;
1054         use std::io::{self, Read};
1055 
1056         let mut v = Vec::new();
1057         v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1058         let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1059         let mut reader2 = reader1.clone();
1060 
1061         let mut file1 = reader1.by_index(0).unwrap();
1062         let mut file2 = reader2.by_index(0).unwrap();
1063 
1064         let t = file1.last_modified();
1065         assert_eq!(
1066             (
1067                 t.year(),
1068                 t.month(),
1069                 t.day(),
1070                 t.hour(),
1071                 t.minute(),
1072                 t.second()
1073             ),
1074             (1980, 1, 1, 0, 0, 0)
1075         );
1076 
1077         let mut buf1 = [0; 5];
1078         let mut buf2 = [0; 5];
1079         let mut buf3 = [0; 5];
1080         let mut buf4 = [0; 5];
1081 
1082         file1.read(&mut buf1).unwrap();
1083         file2.read(&mut buf2).unwrap();
1084         file1.read(&mut buf3).unwrap();
1085         file2.read(&mut buf4).unwrap();
1086 
1087         assert_eq!(buf1, buf2);
1088         assert_eq!(buf3, buf4);
1089         assert!(buf1 != buf3);
1090     }
1091 
1092     #[test]
file_and_dir_predicates()1093     fn file_and_dir_predicates() {
1094         use super::ZipArchive;
1095         use std::io;
1096 
1097         let mut v = Vec::new();
1098         v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1099         let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1100 
1101         for i in 0..zip.len() {
1102             let zip_file = zip.by_index(i).unwrap();
1103             let full_name = zip_file.enclosed_name().unwrap();
1104             let file_name = full_name.file_name().unwrap().to_str().unwrap();
1105             assert!(
1106                 (file_name.starts_with("dir") && zip_file.is_dir())
1107                     || (file_name.starts_with("file") && zip_file.is_file())
1108             );
1109         }
1110     }
1111 }
1112