1 //! Types for reading ZIP archives
2
3 use crate::compression::CompressionMethod;
4 use crate::crc32::Crc32Reader;
5 use crate::result::{InvalidPassword, ZipError, ZipResult};
6 use crate::spec;
7 use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
8 use std::borrow::Cow;
9 use std::collections::HashMap;
10 use std::io::{self, prelude::*};
11 use std::path::{Component, Path};
12
13 use crate::cp437::FromCp437;
14 use crate::types::{DateTime, System, ZipFileData};
15 use byteorder::{LittleEndian, ReadBytesExt};
16
17 #[cfg(any(
18 feature = "deflate",
19 feature = "deflate-miniz",
20 feature = "deflate-zlib"
21 ))]
22 use flate2::read::DeflateDecoder;
23
24 #[cfg(feature = "bzip2")]
25 use bzip2::read::BzDecoder;
26
27 mod ffi {
28 pub const S_IFDIR: u32 = 0o0040000;
29 pub const S_IFREG: u32 = 0o0100000;
30 }
31
32 /// ZIP archive reader
33 ///
34 /// ```no_run
35 /// use std::io::prelude::*;
36 /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
37 /// let mut zip = zip::ZipArchive::new(reader)?;
38 ///
39 /// for i in 0..zip.len() {
40 /// let mut file = zip.by_index(i)?;
41 /// println!("Filename: {}", file.name());
42 /// std::io::copy(&mut file, &mut std::io::stdout());
43 /// }
44 ///
45 /// Ok(())
46 /// }
47 /// ```
48 #[derive(Clone, Debug)]
49 pub struct ZipArchive<R> {
50 reader: R,
51 files: Vec<ZipFileData>,
52 names_map: HashMap<String, usize>,
53 offset: u64,
54 comment: Vec<u8>,
55 }
56
57 enum CryptoReader<'a> {
58 Plaintext(io::Take<&'a mut dyn Read>),
59 ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
60 }
61
62 impl<'a> Read for CryptoReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>63 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
64 match self {
65 CryptoReader::Plaintext(r) => r.read(buf),
66 CryptoReader::ZipCrypto(r) => r.read(buf),
67 }
68 }
69 }
70
71 impl<'a> CryptoReader<'a> {
72 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>73 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
74 match self {
75 CryptoReader::Plaintext(r) => r,
76 CryptoReader::ZipCrypto(r) => r.into_inner(),
77 }
78 }
79 }
80
81 enum ZipFileReader<'a> {
82 NoReader,
83 Raw(io::Take<&'a mut dyn io::Read>),
84 Stored(Crc32Reader<CryptoReader<'a>>),
85 #[cfg(any(
86 feature = "deflate",
87 feature = "deflate-miniz",
88 feature = "deflate-zlib"
89 ))]
90 Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
91 #[cfg(feature = "bzip2")]
92 Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
93 }
94
95 impl<'a> Read for ZipFileReader<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>96 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97 match self {
98 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
99 ZipFileReader::Raw(r) => r.read(buf),
100 ZipFileReader::Stored(r) => r.read(buf),
101 #[cfg(any(
102 feature = "deflate",
103 feature = "deflate-miniz",
104 feature = "deflate-zlib"
105 ))]
106 ZipFileReader::Deflated(r) => r.read(buf),
107 #[cfg(feature = "bzip2")]
108 ZipFileReader::Bzip2(r) => r.read(buf),
109 }
110 }
111 }
112
113 impl<'a> ZipFileReader<'a> {
114 /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> io::Take<&'a mut dyn Read>115 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
116 match self {
117 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
118 ZipFileReader::Raw(r) => r,
119 ZipFileReader::Stored(r) => r.into_inner().into_inner(),
120 #[cfg(any(
121 feature = "deflate",
122 feature = "deflate-miniz",
123 feature = "deflate-zlib"
124 ))]
125 ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(),
126 #[cfg(feature = "bzip2")]
127 ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
128 }
129 }
130 }
131
132 /// A struct for reading a zip file
133 pub struct ZipFile<'a> {
134 data: Cow<'a, ZipFileData>,
135 crypto_reader: Option<CryptoReader<'a>>,
136 reader: ZipFileReader<'a>,
137 }
138
find_content<'a>( data: &mut ZipFileData, reader: &'a mut (impl Read + Seek), ) -> ZipResult<io::Take<&'a mut dyn Read>>139 fn find_content<'a>(
140 data: &mut ZipFileData,
141 reader: &'a mut (impl Read + Seek),
142 ) -> ZipResult<io::Take<&'a mut dyn Read>> {
143 // Parse local header
144 reader.seek(io::SeekFrom::Start(data.header_start))?;
145 let signature = reader.read_u32::<LittleEndian>()?;
146 if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
147 return Err(ZipError::InvalidArchive("Invalid local file header"));
148 }
149
150 reader.seek(io::SeekFrom::Current(22))?;
151 let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
152 let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
153 let magic_and_header = 4 + 22 + 2 + 2;
154 data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
155
156 reader.seek(io::SeekFrom::Start(data.data_start))?;
157 Ok((reader as &mut dyn Read).take(data.compressed_size))
158 }
159
make_crypto_reader<'a>( compression_method: crate::compression::CompressionMethod, crc32: u32, last_modified_time: DateTime, using_data_descriptor: bool, reader: io::Take<&'a mut dyn io::Read>, password: Option<&[u8]>, ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>>160 fn make_crypto_reader<'a>(
161 compression_method: crate::compression::CompressionMethod,
162 crc32: u32,
163 last_modified_time: DateTime,
164 using_data_descriptor: bool,
165 reader: io::Take<&'a mut dyn io::Read>,
166 password: Option<&[u8]>,
167 ) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
168 #[allow(deprecated)]
169 {
170 if let CompressionMethod::Unsupported(_) = compression_method {
171 return unsupported_zip_error("Compression method not supported");
172 }
173 }
174
175 let reader = match password {
176 None => CryptoReader::Plaintext(reader),
177 Some(password) => {
178 let validator = if using_data_descriptor {
179 ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
180 } else {
181 ZipCryptoValidator::PkzipCrc32(crc32)
182 };
183 match ZipCryptoReader::new(reader, password).validate(validator)? {
184 None => return Ok(Err(InvalidPassword)),
185 Some(r) => CryptoReader::ZipCrypto(r),
186 }
187 }
188 };
189 Ok(Ok(reader))
190 }
191
make_reader<'a>( compression_method: CompressionMethod, crc32: u32, reader: CryptoReader<'a>, ) -> ZipFileReader<'a>192 fn make_reader<'a>(
193 compression_method: CompressionMethod,
194 crc32: u32,
195 reader: CryptoReader<'a>,
196 ) -> ZipFileReader<'a> {
197 match compression_method {
198 CompressionMethod::Stored => ZipFileReader::Stored(Crc32Reader::new(reader, crc32)),
199 #[cfg(any(
200 feature = "deflate",
201 feature = "deflate-miniz",
202 feature = "deflate-zlib"
203 ))]
204 CompressionMethod::Deflated => {
205 let deflate_reader = DeflateDecoder::new(reader);
206 ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32))
207 }
208 #[cfg(feature = "bzip2")]
209 CompressionMethod::Bzip2 => {
210 let bzip2_reader = BzDecoder::new(reader);
211 ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32))
212 }
213 _ => panic!("Compression method not supported"),
214 }
215 }
216
217 impl<R: Read + io::Seek> ZipArchive<R> {
218 /// Get the directory start offset and number of files. This is done in a
219 /// separate function to ease the control flow design.
get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult<(u64, u64, usize)>220 pub(crate) fn get_directory_counts(
221 reader: &mut R,
222 footer: &spec::CentralDirectoryEnd,
223 cde_start_pos: u64,
224 ) -> ZipResult<(u64, u64, usize)> {
225 // See if there's a ZIP64 footer. The ZIP64 locator if present will
226 // have its signature 20 bytes in front of the standard footer. The
227 // standard footer, in turn, is 22+N bytes large, where N is the
228 // comment length. Therefore:
229 let zip64locator = if reader
230 .seek(io::SeekFrom::End(
231 -(20 + 22 + footer.zip_file_comment.len() as i64),
232 ))
233 .is_ok()
234 {
235 match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
236 Ok(loc) => Some(loc),
237 Err(ZipError::InvalidArchive(_)) => {
238 // No ZIP64 header; that's actually fine. We're done here.
239 None
240 }
241 Err(e) => {
242 // Yikes, a real problem
243 return Err(e);
244 }
245 }
246 } else {
247 // Empty Zip files will have nothing else so this error might be fine. If
248 // not, we'll find out soon.
249 None
250 };
251
252 match zip64locator {
253 None => {
254 // Some zip files have data prepended to them, resulting in the
255 // offsets all being too small. Get the amount of error by comparing
256 // the actual file position we found the CDE at with the offset
257 // recorded in the CDE.
258 let archive_offset = cde_start_pos
259 .checked_sub(footer.central_directory_size as u64)
260 .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
261 .ok_or(ZipError::InvalidArchive(
262 "Invalid central directory size or offset",
263 ))?;
264
265 let directory_start = footer.central_directory_offset as u64 + archive_offset;
266 let number_of_files = footer.number_of_files_on_this_disk as usize;
267 Ok((archive_offset, directory_start, number_of_files))
268 }
269 Some(locator64) => {
270 // If we got here, this is indeed a ZIP64 file.
271
272 if footer.disk_number as u32 != locator64.disk_with_central_directory {
273 return unsupported_zip_error(
274 "Support for multi-disk files is not implemented",
275 );
276 }
277
278 // We need to reassess `archive_offset`. We know where the ZIP64
279 // central-directory-end structure *should* be, but unfortunately we
280 // don't know how to precisely relate that location to our current
281 // actual offset in the file, since there may be junk at its
282 // beginning. Therefore we need to perform another search, as in
283 // read::CentralDirectoryEnd::find_and_parse, except now we search
284 // forward.
285
286 let search_upper_bound = cde_start_pos
287 .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
288 .ok_or(ZipError::InvalidArchive(
289 "File cannot contain ZIP64 central directory end",
290 ))?;
291 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
292 reader,
293 locator64.end_of_central_directory_offset,
294 search_upper_bound,
295 )?;
296
297 if footer.disk_number != footer.disk_with_central_directory {
298 return unsupported_zip_error(
299 "Support for multi-disk files is not implemented",
300 );
301 }
302
303 let directory_start = footer
304 .central_directory_offset
305 .checked_add(archive_offset)
306 .ok_or_else(|| {
307 ZipError::InvalidArchive("Invalid central directory size or offset")
308 })?;
309
310 Ok((
311 archive_offset,
312 directory_start,
313 footer.number_of_files as usize,
314 ))
315 }
316 }
317 }
318
319 /// Read a ZIP archive, collecting the files it contains
320 ///
321 /// This uses the central directory record of the ZIP file, and ignores local file headers
new(mut reader: R) -> ZipResult<ZipArchive<R>>322 pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
323 let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
324
325 if footer.disk_number != footer.disk_with_central_directory {
326 return unsupported_zip_error("Support for multi-disk files is not implemented");
327 }
328
329 let (archive_offset, directory_start, number_of_files) =
330 Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
331
332 let mut files = Vec::new();
333 let mut names_map = HashMap::new();
334
335 if let Err(_) = reader.seek(io::SeekFrom::Start(directory_start)) {
336 return Err(ZipError::InvalidArchive(
337 "Could not seek to start of central directory",
338 ));
339 }
340
341 for _ in 0..number_of_files {
342 let file = central_header_to_zip_file(&mut reader, archive_offset)?;
343 names_map.insert(file.file_name.clone(), files.len());
344 files.push(file);
345 }
346
347 Ok(ZipArchive {
348 reader,
349 files,
350 names_map,
351 offset: archive_offset,
352 comment: footer.zip_file_comment,
353 })
354 }
355 /// Extract a Zip archive into a directory, overwriting files if they
356 /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
357 ///
358 /// Extraction is not atomic; If an error is encountered, some of the files
359 /// may be left on disk.
extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()>360 pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
361 use std::fs;
362
363 for i in 0..self.len() {
364 let mut file = self.by_index(i)?;
365 let filepath = file
366 .enclosed_name()
367 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
368
369 let outpath = directory.as_ref().join(filepath);
370
371 if file.name().ends_with('/') {
372 fs::create_dir_all(&outpath)?;
373 } else {
374 if let Some(p) = outpath.parent() {
375 if !p.exists() {
376 fs::create_dir_all(&p)?;
377 }
378 }
379 let mut outfile = fs::File::create(&outpath)?;
380 io::copy(&mut file, &mut outfile)?;
381 }
382 // Get and Set permissions
383 #[cfg(unix)]
384 {
385 use std::os::unix::fs::PermissionsExt;
386 if let Some(mode) = file.unix_mode() {
387 fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
388 }
389 }
390 }
391 Ok(())
392 }
393
394 /// Number of files contained in this zip.
len(&self) -> usize395 pub fn len(&self) -> usize {
396 self.files.len()
397 }
398
399 /// Whether this zip archive contains no files
is_empty(&self) -> bool400 pub fn is_empty(&self) -> bool {
401 self.len() == 0
402 }
403
404 /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
405 ///
406 /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
407 /// of that prepended data.
offset(&self) -> u64408 pub fn offset(&self) -> u64 {
409 self.offset
410 }
411
412 /// Get the comment of the zip archive.
comment(&self) -> &[u8]413 pub fn comment(&self) -> &[u8] {
414 &self.comment
415 }
416
417 /// Returns an iterator over all the file and directory names in this archive.
file_names(&self) -> impl Iterator<Item = &str>418 pub fn file_names(&self) -> impl Iterator<Item = &str> {
419 self.names_map.keys().map(|s| s.as_str())
420 }
421
422 /// Search for a file entry by name, decrypt with given password
by_name_decrypt<'a>( &'a mut self, name: &str, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>423 pub fn by_name_decrypt<'a>(
424 &'a mut self,
425 name: &str,
426 password: &[u8],
427 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
428 self.by_name_with_optional_password(name, Some(password))
429 }
430
431 /// Search for a file entry by name
by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>>432 pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
433 Ok(self.by_name_with_optional_password(name, None)?.unwrap())
434 }
435
by_name_with_optional_password<'a>( &'a mut self, name: &str, password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>436 fn by_name_with_optional_password<'a>(
437 &'a mut self,
438 name: &str,
439 password: Option<&[u8]>,
440 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
441 let index = match self.names_map.get(name) {
442 Some(index) => *index,
443 None => {
444 return Err(ZipError::FileNotFound);
445 }
446 };
447 self.by_index_with_optional_password(index, password)
448 }
449
450 /// Get a contained file by index, decrypt with given password
by_index_decrypt<'a>( &'a mut self, file_number: usize, password: &[u8], ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>451 pub fn by_index_decrypt<'a>(
452 &'a mut self,
453 file_number: usize,
454 password: &[u8],
455 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
456 self.by_index_with_optional_password(file_number, Some(password))
457 }
458
459 /// Get a contained file by index
by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>460 pub fn by_index<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
461 Ok(self
462 .by_index_with_optional_password(file_number, None)?
463 .unwrap())
464 }
465
466 /// Get a contained file by index without decompressing it
by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>>467 pub fn by_index_raw<'a>(&'a mut self, file_number: usize) -> ZipResult<ZipFile<'a>> {
468 let reader = &mut self.reader;
469 self.files
470 .get_mut(file_number)
471 .ok_or(ZipError::FileNotFound)
472 .and_then(move |data| {
473 Ok(ZipFile {
474 crypto_reader: None,
475 reader: ZipFileReader::Raw(find_content(data, reader)?),
476 data: Cow::Borrowed(data),
477 })
478 })
479 }
480
by_index_with_optional_password<'a>( &'a mut self, file_number: usize, mut password: Option<&[u8]>, ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>>481 fn by_index_with_optional_password<'a>(
482 &'a mut self,
483 file_number: usize,
484 mut password: Option<&[u8]>,
485 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
486 if file_number >= self.files.len() {
487 return Err(ZipError::FileNotFound);
488 }
489 let data = &mut self.files[file_number];
490
491 match (password, data.encrypted) {
492 (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
493 (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
494 _ => {}
495 }
496 let limit_reader = find_content(data, &mut self.reader)?;
497
498 match make_crypto_reader(
499 data.compression_method,
500 data.crc32,
501 data.last_modified_time,
502 data.using_data_descriptor,
503 limit_reader,
504 password,
505 ) {
506 Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
507 crypto_reader: Some(crypto_reader),
508 reader: ZipFileReader::NoReader,
509 data: Cow::Borrowed(data),
510 })),
511 Err(e) => Err(e),
512 Ok(Err(e)) => Ok(Err(e)),
513 }
514 }
515
516 /// Unwrap and return the inner reader object
517 ///
518 /// The position of the reader is undefined.
into_inner(self) -> R519 pub fn into_inner(self) -> R {
520 self.reader
521 }
522 }
523
unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>524 fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
525 Err(ZipError::UnsupportedArchive(detail))
526 }
527
528 /// Parse a central directory entry to collect the information for the file.
central_header_to_zip_file<R: Read + io::Seek>( reader: &mut R, archive_offset: u64, ) -> ZipResult<ZipFileData>529 pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
530 reader: &mut R,
531 archive_offset: u64,
532 ) -> ZipResult<ZipFileData> {
533 let central_header_start = reader.seek(io::SeekFrom::Current(0))?;
534 // Parse central header
535 let signature = reader.read_u32::<LittleEndian>()?;
536 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
537 return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
538 }
539
540 let version_made_by = reader.read_u16::<LittleEndian>()?;
541 let _version_to_extract = reader.read_u16::<LittleEndian>()?;
542 let flags = reader.read_u16::<LittleEndian>()?;
543 let encrypted = flags & 1 == 1;
544 let is_utf8 = flags & (1 << 11) != 0;
545 let using_data_descriptor = flags & (1 << 3) != 0;
546 let compression_method = reader.read_u16::<LittleEndian>()?;
547 let last_mod_time = reader.read_u16::<LittleEndian>()?;
548 let last_mod_date = reader.read_u16::<LittleEndian>()?;
549 let crc32 = reader.read_u32::<LittleEndian>()?;
550 let compressed_size = reader.read_u32::<LittleEndian>()?;
551 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
552 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
553 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
554 let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
555 let _disk_number = reader.read_u16::<LittleEndian>()?;
556 let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
557 let external_file_attributes = reader.read_u32::<LittleEndian>()?;
558 let offset = reader.read_u32::<LittleEndian>()? as u64;
559 let mut file_name_raw = vec![0; file_name_length];
560 reader.read_exact(&mut file_name_raw)?;
561 let mut extra_field = vec![0; extra_field_length];
562 reader.read_exact(&mut extra_field)?;
563 let mut file_comment_raw = vec![0; file_comment_length];
564 reader.read_exact(&mut file_comment_raw)?;
565
566 let file_name = match is_utf8 {
567 true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
568 false => file_name_raw.clone().from_cp437(),
569 };
570 let file_comment = match is_utf8 {
571 true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
572 false => file_comment_raw.from_cp437(),
573 };
574
575 // Construct the result
576 let mut result = ZipFileData {
577 system: System::from_u8((version_made_by >> 8) as u8),
578 version_made_by: version_made_by as u8,
579 encrypted,
580 using_data_descriptor,
581 compression_method: {
582 #[allow(deprecated)]
583 CompressionMethod::from_u16(compression_method)
584 },
585 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
586 crc32,
587 compressed_size: compressed_size as u64,
588 uncompressed_size: uncompressed_size as u64,
589 file_name,
590 file_name_raw,
591 extra_field,
592 file_comment,
593 header_start: offset,
594 central_header_start,
595 data_start: 0,
596 external_attributes: external_file_attributes,
597 large_file: false,
598 };
599
600 match parse_extra_field(&mut result) {
601 Ok(..) | Err(ZipError::Io(..)) => {}
602 Err(e) => return Err(e),
603 }
604
605 // Account for shifted zip offsets.
606 result.header_start += archive_offset;
607
608 Ok(result)
609 }
610
parse_extra_field(file: &mut ZipFileData) -> ZipResult<()>611 fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
612 let mut reader = io::Cursor::new(&file.extra_field);
613
614 while (reader.position() as usize) < file.extra_field.len() {
615 let kind = reader.read_u16::<LittleEndian>()?;
616 let len = reader.read_u16::<LittleEndian>()?;
617 let mut len_left = len as i64;
618 // Zip64 extended information extra field
619 if kind == 0x0001 {
620 if file.uncompressed_size == 0xFFFFFFFF {
621 file.large_file = true;
622 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
623 len_left -= 8;
624 }
625 if file.compressed_size == 0xFFFFFFFF {
626 file.large_file = true;
627 file.compressed_size = reader.read_u64::<LittleEndian>()?;
628 len_left -= 8;
629 }
630 if file.header_start == 0xFFFFFFFF {
631 file.header_start = reader.read_u64::<LittleEndian>()?;
632 len_left -= 8;
633 }
634 // Unparsed fields:
635 // u32: disk start number
636 }
637
638 // We could also check for < 0 to check for errors
639 if len_left > 0 {
640 reader.seek(io::SeekFrom::Current(len_left))?;
641 }
642 }
643 Ok(())
644 }
645
646 /// Methods for retrieving information on zip files
647 impl<'a> ZipFile<'a> {
get_reader(&mut self) -> &mut ZipFileReader<'a>648 fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
649 if let ZipFileReader::NoReader = self.reader {
650 let data = &self.data;
651 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
652 self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
653 }
654 &mut self.reader
655 }
656
get_raw_reader(&mut self) -> &mut dyn Read657 pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
658 if let ZipFileReader::NoReader = self.reader {
659 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
660 self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
661 }
662 &mut self.reader
663 }
664
665 /// Get the version of the file
version_made_by(&self) -> (u8, u8)666 pub fn version_made_by(&self) -> (u8, u8) {
667 (
668 self.data.version_made_by / 10,
669 self.data.version_made_by % 10,
670 )
671 }
672
673 /// Get the name of the file
674 ///
675 /// # Warnings
676 ///
677 /// It is dangerous to use this name directly when extracting an archive.
678 /// It may contain an absolute path (`/etc/shadow`), or break out of the
679 /// current directory (`../runtime`). Carelessly writing to these paths
680 /// allows an attacker to craft a ZIP archive that will overwrite critical
681 /// files.
682 ///
683 /// You can use the [`ZipFile::enclosed_name`] method to validate the name
684 /// as a safe path.
name(&self) -> &str685 pub fn name(&self) -> &str {
686 &self.data.file_name
687 }
688
689 /// Get the name of the file, in the raw (internal) byte representation.
690 ///
691 /// The encoding of this data is currently undefined.
name_raw(&self) -> &[u8]692 pub fn name_raw(&self) -> &[u8] {
693 &self.data.file_name_raw
694 }
695
696 /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
697 /// removes a leading '/' and removes '..' parts.
698 #[deprecated(
699 since = "0.5.7",
700 note = "by stripping `..`s from the path, the meaning of paths can change.
701 `mangled_name` can be used if this behaviour is desirable"
702 )]
sanitized_name(&self) -> ::std::path::PathBuf703 pub fn sanitized_name(&self) -> ::std::path::PathBuf {
704 self.mangled_name()
705 }
706
707 /// Rewrite the path, ignoring any path components with special meaning.
708 ///
709 /// - Absolute paths are made relative
710 /// - [`ParentDir`]s are ignored
711 /// - Truncates the filename at a NULL byte
712 ///
713 /// This is appropriate if you need to be able to extract *something* from
714 /// any archive, but will easily misrepresent trivial paths like
715 /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
716 /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
717 ///
718 /// [`ParentDir`]: `Component::ParentDir`
mangled_name(&self) -> ::std::path::PathBuf719 pub fn mangled_name(&self) -> ::std::path::PathBuf {
720 self.data.file_name_sanitized()
721 }
722
723 /// Ensure the file path is safe to use as a [`Path`].
724 ///
725 /// - It can't contain NULL bytes
726 /// - It can't resolve to a path outside the current directory
727 /// > `foo/../bar` is fine, `foo/../../bar` is not.
728 /// - It can't be an absolute path
729 ///
730 /// This will read well-formed ZIP files correctly, and is resistant
731 /// to path-based exploits. It is recommended over
732 /// [`ZipFile::mangled_name`].
enclosed_name(&self) -> Option<&Path>733 pub fn enclosed_name(&self) -> Option<&Path> {
734 if self.data.file_name.contains('\0') {
735 return None;
736 }
737 let path = Path::new(&self.data.file_name);
738 let mut depth = 0usize;
739 for component in path.components() {
740 match component {
741 Component::Prefix(_) | Component::RootDir => return None,
742 Component::ParentDir => depth = depth.checked_sub(1)?,
743 Component::Normal(_) => depth += 1,
744 Component::CurDir => (),
745 }
746 }
747 Some(path)
748 }
749
750 /// Get the comment of the file
comment(&self) -> &str751 pub fn comment(&self) -> &str {
752 &self.data.file_comment
753 }
754
755 /// Get the compression method used to store the file
compression(&self) -> CompressionMethod756 pub fn compression(&self) -> CompressionMethod {
757 self.data.compression_method
758 }
759
760 /// Get the size of the file in the archive
compressed_size(&self) -> u64761 pub fn compressed_size(&self) -> u64 {
762 self.data.compressed_size
763 }
764
765 /// Get the size of the file when uncompressed
size(&self) -> u64766 pub fn size(&self) -> u64 {
767 self.data.uncompressed_size
768 }
769
770 /// Get the time the file was last modified
last_modified(&self) -> DateTime771 pub fn last_modified(&self) -> DateTime {
772 self.data.last_modified_time
773 }
774 /// Returns whether the file is actually a directory
is_dir(&self) -> bool775 pub fn is_dir(&self) -> bool {
776 self.name()
777 .chars()
778 .rev()
779 .next()
780 .map_or(false, |c| c == '/' || c == '\\')
781 }
782
783 /// Returns whether the file is a regular file
is_file(&self) -> bool784 pub fn is_file(&self) -> bool {
785 !self.is_dir()
786 }
787
788 /// Get unix mode for the file
unix_mode(&self) -> Option<u32>789 pub fn unix_mode(&self) -> Option<u32> {
790 if self.data.external_attributes == 0 {
791 return None;
792 }
793
794 match self.data.system {
795 System::Unix => Some(self.data.external_attributes >> 16),
796 System::Dos => {
797 // Interpret MSDOS directory bit
798 let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
799 ffi::S_IFDIR | 0o0775
800 } else {
801 ffi::S_IFREG | 0o0664
802 };
803 if 0x01 == (self.data.external_attributes & 0x01) {
804 // Read-only bit; strip write permissions
805 mode &= 0o0555;
806 }
807 Some(mode)
808 }
809 _ => None,
810 }
811 }
812
813 /// Get the CRC32 hash of the original file
crc32(&self) -> u32814 pub fn crc32(&self) -> u32 {
815 self.data.crc32
816 }
817
818 /// Get the extra data of the zip header for this file
extra_data(&self) -> &[u8]819 pub fn extra_data(&self) -> &[u8] {
820 &self.data.extra_field
821 }
822
823 /// Get the starting offset of the data of the compressed file
data_start(&self) -> u64824 pub fn data_start(&self) -> u64 {
825 self.data.data_start
826 }
827
828 /// Get the starting offset of the zip header for this file
header_start(&self) -> u64829 pub fn header_start(&self) -> u64 {
830 self.data.header_start
831 }
832 /// Get the starting offset of the zip header in the central directory for this file
central_header_start(&self) -> u64833 pub fn central_header_start(&self) -> u64 {
834 self.data.central_header_start
835 }
836 }
837
838 impl<'a> Read for ZipFile<'a> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>839 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
840 self.get_reader().read(buf)
841 }
842 }
843
844 impl<'a> Drop for ZipFile<'a> {
drop(&mut self)845 fn drop(&mut self) {
846 // self.data is Owned, this reader is constructed by a streaming reader.
847 // In this case, we want to exhaust the reader so that the next file is accessible.
848 if let Cow::Owned(_) = self.data {
849 let mut buffer = [0; 1 << 16];
850
851 // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
852 let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
853 ZipFileReader::NoReader => {
854 let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
855 innerreader.expect("Invalid reader state").into_inner()
856 }
857 reader => {
858 let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
859 innerreader.into_inner()
860 }
861 };
862
863 loop {
864 match reader.read(&mut buffer) {
865 Ok(0) => break,
866 Ok(_) => (),
867 Err(e) => panic!(
868 "Could not consume all of the output of the current ZipFile: {:?}",
869 e
870 ),
871 }
872 }
873 }
874 }
875 }
876
877 /// Read ZipFile structures from a non-seekable reader.
878 ///
879 /// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
880 /// as some information will be missing when reading this manner.
881 ///
882 /// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
883 /// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
884 /// is encountered. No more files should be read after this.
885 ///
886 /// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
887 /// the structure is done.
888 ///
889 /// Missing fields are:
890 /// * `comment`: set to an empty string
891 /// * `data_start`: set to 0
892 /// * `external_attributes`: `unix_mode()`: will return None
read_zipfile_from_stream<'a, R: io::Read>( reader: &'a mut R, ) -> ZipResult<Option<ZipFile<'_>>>893 pub fn read_zipfile_from_stream<'a, R: io::Read>(
894 reader: &'a mut R,
895 ) -> ZipResult<Option<ZipFile<'_>>> {
896 let signature = reader.read_u32::<LittleEndian>()?;
897
898 match signature {
899 spec::LOCAL_FILE_HEADER_SIGNATURE => (),
900 spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
901 _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
902 }
903
904 let version_made_by = reader.read_u16::<LittleEndian>()?;
905 let flags = reader.read_u16::<LittleEndian>()?;
906 let encrypted = flags & 1 == 1;
907 let is_utf8 = flags & (1 << 11) != 0;
908 let using_data_descriptor = flags & (1 << 3) != 0;
909 #[allow(deprecated)]
910 let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
911 let last_mod_time = reader.read_u16::<LittleEndian>()?;
912 let last_mod_date = reader.read_u16::<LittleEndian>()?;
913 let crc32 = reader.read_u32::<LittleEndian>()?;
914 let compressed_size = reader.read_u32::<LittleEndian>()?;
915 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
916 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
917 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
918
919 let mut file_name_raw = vec![0; file_name_length];
920 reader.read_exact(&mut file_name_raw)?;
921 let mut extra_field = vec![0; extra_field_length];
922 reader.read_exact(&mut extra_field)?;
923
924 let file_name = match is_utf8 {
925 true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
926 false => file_name_raw.clone().from_cp437(),
927 };
928
929 let mut result = ZipFileData {
930 system: System::from_u8((version_made_by >> 8) as u8),
931 version_made_by: version_made_by as u8,
932 encrypted,
933 using_data_descriptor,
934 compression_method,
935 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
936 crc32,
937 compressed_size: compressed_size as u64,
938 uncompressed_size: uncompressed_size as u64,
939 file_name,
940 file_name_raw,
941 extra_field,
942 file_comment: String::new(), // file comment is only available in the central directory
943 // header_start and data start are not available, but also don't matter, since seeking is
944 // not available.
945 header_start: 0,
946 data_start: 0,
947 central_header_start: 0,
948 // The external_attributes field is only available in the central directory.
949 // We set this to zero, which should be valid as the docs state 'If input came
950 // from standard input, this field is set to zero.'
951 external_attributes: 0,
952 large_file: false,
953 };
954
955 match parse_extra_field(&mut result) {
956 Ok(..) | Err(ZipError::Io(..)) => {}
957 Err(e) => return Err(e),
958 }
959
960 if encrypted {
961 return unsupported_zip_error("Encrypted files are not supported");
962 }
963 if using_data_descriptor {
964 return unsupported_zip_error("The file length is not available in the local header");
965 }
966
967 let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size as u64);
968
969 let result_crc32 = result.crc32;
970 let result_compression_method = result.compression_method;
971 let crypto_reader = make_crypto_reader(
972 result_compression_method,
973 result_crc32,
974 result.last_modified_time,
975 result.using_data_descriptor,
976 limit_reader,
977 None,
978 )?
979 .unwrap();
980
981 Ok(Some(ZipFile {
982 data: Cow::Owned(result),
983 crypto_reader: None,
984 reader: make_reader(result_compression_method, result_crc32, crypto_reader),
985 }))
986 }
987
988 #[cfg(test)]
989 mod test {
990 #[test]
invalid_offset()991 fn invalid_offset() {
992 use super::ZipArchive;
993 use std::io;
994
995 let mut v = Vec::new();
996 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
997 let reader = ZipArchive::new(io::Cursor::new(v));
998 assert!(reader.is_err());
999 }
1000
1001 #[test]
invalid_offset2()1002 fn invalid_offset2() {
1003 use super::ZipArchive;
1004 use std::io;
1005
1006 let mut v = Vec::new();
1007 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1008 let reader = ZipArchive::new(io::Cursor::new(v));
1009 assert!(reader.is_err());
1010 }
1011
1012 #[test]
zip64_with_leading_junk()1013 fn zip64_with_leading_junk() {
1014 use super::ZipArchive;
1015 use std::io;
1016
1017 let mut v = Vec::new();
1018 v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
1019 let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1020 assert!(reader.len() == 1);
1021 }
1022
1023 #[test]
zip_contents()1024 fn zip_contents() {
1025 use super::ZipArchive;
1026 use std::io;
1027
1028 let mut v = Vec::new();
1029 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1030 let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1031 assert!(reader.comment() == b"");
1032 assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1033 }
1034
1035 #[test]
zip_read_streaming()1036 fn zip_read_streaming() {
1037 use super::read_zipfile_from_stream;
1038 use std::io;
1039
1040 let mut v = Vec::new();
1041 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1042 let mut reader = io::Cursor::new(v);
1043 loop {
1044 match read_zipfile_from_stream(&mut reader).unwrap() {
1045 None => break,
1046 _ => (),
1047 }
1048 }
1049 }
1050
1051 #[test]
zip_clone()1052 fn zip_clone() {
1053 use super::ZipArchive;
1054 use std::io::{self, Read};
1055
1056 let mut v = Vec::new();
1057 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1058 let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1059 let mut reader2 = reader1.clone();
1060
1061 let mut file1 = reader1.by_index(0).unwrap();
1062 let mut file2 = reader2.by_index(0).unwrap();
1063
1064 let t = file1.last_modified();
1065 assert_eq!(
1066 (
1067 t.year(),
1068 t.month(),
1069 t.day(),
1070 t.hour(),
1071 t.minute(),
1072 t.second()
1073 ),
1074 (1980, 1, 1, 0, 0, 0)
1075 );
1076
1077 let mut buf1 = [0; 5];
1078 let mut buf2 = [0; 5];
1079 let mut buf3 = [0; 5];
1080 let mut buf4 = [0; 5];
1081
1082 file1.read(&mut buf1).unwrap();
1083 file2.read(&mut buf2).unwrap();
1084 file1.read(&mut buf3).unwrap();
1085 file2.read(&mut buf4).unwrap();
1086
1087 assert_eq!(buf1, buf2);
1088 assert_eq!(buf3, buf4);
1089 assert!(buf1 != buf3);
1090 }
1091
1092 #[test]
file_and_dir_predicates()1093 fn file_and_dir_predicates() {
1094 use super::ZipArchive;
1095 use std::io;
1096
1097 let mut v = Vec::new();
1098 v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1099 let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1100
1101 for i in 0..zip.len() {
1102 let zip_file = zip.by_index(i).unwrap();
1103 let full_name = zip_file.enclosed_name().unwrap();
1104 let file_name = full_name.file_name().unwrap().to_str().unwrap();
1105 assert!(
1106 (file_name.starts_with("dir") && zip_file.is_dir())
1107 || (file_name.starts_with("file") && zip_file.is_file())
1108 );
1109 }
1110 }
1111 }
1112