1 use std::cell::{Cell, RefCell};
2 use std::cmp;
3 use std::fs;
4 use std::io;
5 use std::io::prelude::*;
6 use std::marker;
7 use std::path::Path;
8 
9 use crate::entry::{EntryFields, EntryIo};
10 use crate::error::TarError;
11 use crate::other;
12 use crate::pax::pax_extensions_size;
13 use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
14 
15 /// A top-level representation of an archive file.
16 ///
17 /// This archive can have an entry added to it and it can be iterated over.
18 pub struct Archive<R: ?Sized + Read> {
19     inner: ArchiveInner<R>,
20 }
21 
22 pub struct ArchiveInner<R: ?Sized> {
23     pos: Cell<u64>,
24     unpack_xattrs: bool,
25     preserve_permissions: bool,
26     preserve_mtime: bool,
27     overwrite: bool,
28     ignore_zeros: bool,
29     obj: RefCell<R>,
30 }
31 
32 /// An iterator over the entries of an archive.
33 pub struct Entries<'a, R: 'a + Read> {
34     fields: EntriesFields<'a>,
35     _ignored: marker::PhantomData<&'a Archive<R>>,
36 }
37 
38 struct EntriesFields<'a> {
39     archive: &'a Archive<dyn Read + 'a>,
40     next: u64,
41     done: bool,
42     raw: bool,
43 }
44 
45 impl<R: Read> Archive<R> {
46     /// Create a new archive with the underlying object as the reader.
new(obj: R) -> Archive<R>47     pub fn new(obj: R) -> Archive<R> {
48         Archive {
49             inner: ArchiveInner {
50                 unpack_xattrs: false,
51                 preserve_permissions: false,
52                 preserve_mtime: true,
53                 overwrite: true,
54                 ignore_zeros: false,
55                 obj: RefCell::new(obj),
56                 pos: Cell::new(0),
57             },
58         }
59     }
60 
61     /// Unwrap this archive, returning the underlying object.
into_inner(self) -> R62     pub fn into_inner(self) -> R {
63         self.inner.obj.into_inner()
64     }
65 
66     /// Construct an iterator over the entries in this archive.
67     ///
68     /// Note that care must be taken to consider each entry within an archive in
69     /// sequence. If entries are processed out of sequence (from what the
70     /// iterator returns), then the contents read for each entry may be
71     /// corrupted.
entries(&mut self) -> io::Result<Entries<R>>72     pub fn entries(&mut self) -> io::Result<Entries<R>> {
73         let me: &mut Archive<dyn Read> = self;
74         me._entries().map(|fields| Entries {
75             fields: fields,
76             _ignored: marker::PhantomData,
77         })
78     }
79 
80     /// Unpacks the contents tarball into the specified `dst`.
81     ///
82     /// This function will iterate over the entire contents of this tarball,
83     /// extracting each file in turn to the location specified by the entry's
84     /// path name.
85     ///
86     /// This operation is relatively sensitive in that it will not write files
87     /// outside of the path specified by `dst`. Files in the archive which have
88     /// a '..' in their path are skipped during the unpacking process.
89     ///
90     /// # Examples
91     ///
92     /// ```no_run
93     /// use std::fs::File;
94     /// use tar::Archive;
95     ///
96     /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
97     /// ar.unpack("foo").unwrap();
98     /// ```
unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()>99     pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
100         let me: &mut Archive<dyn Read> = self;
101         me._unpack(dst.as_ref())
102     }
103 
104     /// Indicate whether extended file attributes (xattrs on Unix) are preserved
105     /// when unpacking this archive.
106     ///
107     /// This flag is disabled by default and is currently only implemented on
108     /// Unix using xattr support. This may eventually be implemented for
109     /// Windows, however, if other archive implementations are found which do
110     /// this as well.
set_unpack_xattrs(&mut self, unpack_xattrs: bool)111     pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
112         self.inner.unpack_xattrs = unpack_xattrs;
113     }
114 
115     /// Indicate whether extended permissions (like suid on Unix) are preserved
116     /// when unpacking this entry.
117     ///
118     /// This flag is disabled by default and is currently only implemented on
119     /// Unix.
set_preserve_permissions(&mut self, preserve: bool)120     pub fn set_preserve_permissions(&mut self, preserve: bool) {
121         self.inner.preserve_permissions = preserve;
122     }
123 
124     /// Indicate whether files and symlinks should be overwritten on extraction.
set_overwrite(&mut self, overwrite: bool)125     pub fn set_overwrite(&mut self, overwrite: bool) {
126         self.inner.overwrite = overwrite;
127     }
128 
129     /// Indicate whether access time information is preserved when unpacking
130     /// this entry.
131     ///
132     /// This flag is enabled by default.
set_preserve_mtime(&mut self, preserve: bool)133     pub fn set_preserve_mtime(&mut self, preserve: bool) {
134         self.inner.preserve_mtime = preserve;
135     }
136 
137     /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
138     /// entries.
139     ///
140     /// This can be used in case multiple tar archives have been concatenated together.
set_ignore_zeros(&mut self, ignore_zeros: bool)141     pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
142         self.inner.ignore_zeros = ignore_zeros;
143     }
144 }
145 
146 impl<'a> Archive<dyn Read + 'a> {
_entries(&mut self) -> io::Result<EntriesFields>147     fn _entries(&mut self) -> io::Result<EntriesFields> {
148         if self.inner.pos.get() != 0 {
149             return Err(other(
150                 "cannot call entries unless archive is at \
151                  position 0",
152             ));
153         }
154         Ok(EntriesFields {
155             archive: self,
156             done: false,
157             next: 0,
158             raw: false,
159         })
160     }
161 
_unpack(&mut self, dst: &Path) -> io::Result<()>162     fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
163         if dst.symlink_metadata().is_err() {
164             fs::create_dir_all(&dst)
165                 .map_err(|e| TarError::new(&format!("failed to create `{}`", dst.display()), e))?;
166         }
167 
168         // Canonicalizing the dst directory will prepend the path with '\\?\'
169         // on windows which will allow windows APIs to treat the path as an
170         // extended-length path with a 32,767 character limit. Otherwise all
171         // unpacked paths over 260 characters will fail on creation with a
172         // NotFound exception.
173         let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
174 
175         // Delay any directory entries until the end (they will be created if needed by
176         // descendants), to ensure that directory permissions do not interfer with descendant
177         // extraction.
178         let mut directories = Vec::new();
179         for entry in self._entries()? {
180             let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
181             if file.header().entry_type() == crate::EntryType::Directory {
182                 directories.push(file);
183             } else {
184                 file.unpack_in(dst)?;
185             }
186         }
187         for mut dir in directories {
188             dir.unpack_in(dst)?;
189         }
190 
191         Ok(())
192     }
193 
skip(&self, mut amt: u64) -> io::Result<()>194     fn skip(&self, mut amt: u64) -> io::Result<()> {
195         let mut buf = [0u8; 4096 * 8];
196         while amt > 0 {
197             let n = cmp::min(amt, buf.len() as u64);
198             let n = (&self.inner).read(&mut buf[..n as usize])?;
199             if n == 0 {
200                 return Err(other("unexpected EOF during skip"));
201             }
202             amt -= n as u64;
203         }
204         Ok(())
205     }
206 }
207 
208 impl<'a, R: Read> Entries<'a, R> {
209     /// Indicates whether this iterator will return raw entries or not.
210     ///
211     /// If the raw list of entries are returned, then no preprocessing happens
212     /// on account of this library, for example taking into account GNU long name
213     /// or long link archive members. Raw iteration is disabled by default.
raw(self, raw: bool) -> Entries<'a, R>214     pub fn raw(self, raw: bool) -> Entries<'a, R> {
215         Entries {
216             fields: EntriesFields {
217                 raw: raw,
218                 ..self.fields
219             },
220             _ignored: marker::PhantomData,
221         }
222     }
223 }
224 impl<'a, R: Read> Iterator for Entries<'a, R> {
225     type Item = io::Result<Entry<'a, R>>;
226 
next(&mut self) -> Option<io::Result<Entry<'a, R>>>227     fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
228         self.fields
229             .next()
230             .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
231     }
232 }
233 
234 impl<'a> EntriesFields<'a> {
next_entry_raw( &mut self, pax_size: Option<u64>, ) -> io::Result<Option<Entry<'a, io::Empty>>>235     fn next_entry_raw(
236         &mut self,
237         pax_size: Option<u64>,
238     ) -> io::Result<Option<Entry<'a, io::Empty>>> {
239         let mut header = Header::new_old();
240         let mut header_pos = self.next;
241         loop {
242             // Seek to the start of the next header in the archive
243             let delta = self.next - self.archive.inner.pos.get();
244             self.archive.skip(delta)?;
245 
246             // EOF is an indicator that we are at the end of the archive.
247             if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
248                 return Ok(None);
249             }
250 
251             // If a header is not all zeros, we have another valid header.
252             // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
253             // end of the archive.
254             if !header.as_bytes().iter().all(|i| *i == 0) {
255                 self.next += 512;
256                 break;
257             }
258 
259             if !self.archive.inner.ignore_zeros {
260                 return Ok(None);
261             }
262             self.next += 512;
263             header_pos = self.next;
264         }
265 
266         // Make sure the checksum is ok
267         let sum = header.as_bytes()[..148]
268             .iter()
269             .chain(&header.as_bytes()[156..])
270             .fold(0, |a, b| a + (*b as u32))
271             + 8 * 32;
272         let cksum = header.cksum()?;
273         if sum != cksum {
274             return Err(other("archive header checksum mismatch"));
275         }
276 
277         let file_pos = self.next;
278         let mut size = header.entry_size()?;
279         if size == 0 {
280             if let Some(pax_size) = pax_size {
281                 size = pax_size;
282             }
283         }
284         let ret = EntryFields {
285             size: size,
286             header_pos: header_pos,
287             file_pos: file_pos,
288             data: vec![EntryIo::Data((&self.archive.inner).take(size))],
289             header: header,
290             long_pathname: None,
291             long_linkname: None,
292             pax_extensions: None,
293             unpack_xattrs: self.archive.inner.unpack_xattrs,
294             preserve_permissions: self.archive.inner.preserve_permissions,
295             preserve_mtime: self.archive.inner.preserve_mtime,
296             overwrite: self.archive.inner.overwrite,
297         };
298 
299         // Store where the next entry is, rounding up by 512 bytes (the size of
300         // a header);
301         let size = (size + 511) & !(512 - 1);
302         self.next += size;
303 
304         Ok(Some(ret.into_entry()))
305     }
306 
next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>>307     fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
308         if self.raw {
309             return self.next_entry_raw(None);
310         }
311 
312         let mut gnu_longname = None;
313         let mut gnu_longlink = None;
314         let mut pax_extensions = None;
315         let mut pax_size = None;
316         let mut processed = 0;
317         loop {
318             processed += 1;
319             let entry = match self.next_entry_raw(pax_size)? {
320                 Some(entry) => entry,
321                 None if processed > 1 => {
322                     return Err(other(
323                         "members found describing a future member \
324                          but no future member found",
325                     ));
326                 }
327                 None => return Ok(None),
328             };
329 
330             let is_recognized_header =
331                 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
332 
333             if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
334                 if gnu_longname.is_some() {
335                     return Err(other(
336                         "two long name entries describing \
337                          the same member",
338                     ));
339                 }
340                 gnu_longname = Some(EntryFields::from(entry).read_all()?);
341                 continue;
342             }
343 
344             if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
345                 if gnu_longlink.is_some() {
346                     return Err(other(
347                         "two long name entries describing \
348                          the same member",
349                     ));
350                 }
351                 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
352                 continue;
353             }
354 
355             if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
356                 if pax_extensions.is_some() {
357                     return Err(other(
358                         "two pax extensions entries describing \
359                          the same member",
360                     ));
361                 }
362                 pax_extensions = Some(EntryFields::from(entry).read_all()?);
363                 if let Some(pax_extensions_ref) = &pax_extensions {
364                     pax_size = pax_extensions_size(pax_extensions_ref);
365                 }
366                 continue;
367             }
368 
369             let mut fields = EntryFields::from(entry);
370             fields.long_pathname = gnu_longname;
371             fields.long_linkname = gnu_longlink;
372             fields.pax_extensions = pax_extensions;
373             self.parse_sparse_header(&mut fields)?;
374             return Ok(Some(fields.into_entry()));
375         }
376     }
377 
parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()>378     fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
379         if !entry.header.entry_type().is_gnu_sparse() {
380             return Ok(());
381         }
382         let gnu = match entry.header.as_gnu() {
383             Some(gnu) => gnu,
384             None => return Err(other("sparse entry type listed but not GNU header")),
385         };
386 
387         // Sparse files are represented internally as a list of blocks that are
388         // read. Blocks are either a bunch of 0's or they're data from the
389         // underlying archive.
390         //
391         // Blocks of a sparse file are described by the `GnuSparseHeader`
392         // structure, some of which are contained in `GnuHeader` but some of
393         // which may also be contained after the first header in further
394         // headers.
395         //
396         // We read off all the blocks here and use the `add_block` function to
397         // incrementally add them to the list of I/O block (in `entry.data`).
398         // The `add_block` function also validates that each chunk comes after
399         // the previous, we don't overrun the end of the file, and each block is
400         // aligned to a 512-byte boundary in the archive itself.
401         //
402         // At the end we verify that the sparse file size (`Header::size`) is
403         // the same as the current offset (described by the list of blocks) as
404         // well as the amount of data read equals the size of the entry
405         // (`Header::entry_size`).
406         entry.data.truncate(0);
407 
408         let mut cur = 0;
409         let mut remaining = entry.size;
410         {
411             let data = &mut entry.data;
412             let reader = &self.archive.inner;
413             let size = entry.size;
414             let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
415                 if block.is_empty() {
416                     return Ok(());
417                 }
418                 let off = block.offset()?;
419                 let len = block.length()?;
420                 if len != 0 && (size - remaining) % 512 != 0 {
421                     return Err(other(
422                         "previous block in sparse file was not \
423                          aligned to 512-byte boundary",
424                     ));
425                 } else if off < cur {
426                     return Err(other(
427                         "out of order or overlapping sparse \
428                          blocks",
429                     ));
430                 } else if cur < off {
431                     let block = io::repeat(0).take(off - cur);
432                     data.push(EntryIo::Pad(block));
433                 }
434                 cur = off
435                     .checked_add(len)
436                     .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
437                 remaining = remaining.checked_sub(len).ok_or_else(|| {
438                     other(
439                         "sparse file consumed more data than the header \
440                          listed",
441                     )
442                 })?;
443                 data.push(EntryIo::Data(reader.take(len)));
444                 Ok(())
445             };
446             for block in gnu.sparse.iter() {
447                 add_block(block)?
448             }
449             if gnu.is_extended() {
450                 let mut ext = GnuExtSparseHeader::new();
451                 ext.isextended[0] = 1;
452                 while ext.is_extended() {
453                     if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
454                         return Err(other("failed to read extension"));
455                     }
456 
457                     self.next += 512;
458                     for block in ext.sparse.iter() {
459                         add_block(block)?;
460                     }
461                 }
462             }
463         }
464         if cur != gnu.real_size()? {
465             return Err(other(
466                 "mismatch in sparse file chunks and \
467                  size in header",
468             ));
469         }
470         entry.size = cur;
471         if remaining > 0 {
472             return Err(other(
473                 "mismatch in sparse file chunks and \
474                  entry size in header",
475             ));
476         }
477         Ok(())
478     }
479 }
480 
481 impl<'a> Iterator for EntriesFields<'a> {
482     type Item = io::Result<Entry<'a, io::Empty>>;
483 
next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>>484     fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
485         if self.done {
486             None
487         } else {
488             match self.next_entry() {
489                 Ok(Some(e)) => Some(Ok(e)),
490                 Ok(None) => {
491                     self.done = true;
492                     None
493                 }
494                 Err(e) => {
495                     self.done = true;
496                     Some(Err(e))
497                 }
498             }
499         }
500     }
501 }
502 
503 impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>504     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
505         self.obj.borrow_mut().read(into).map(|i| {
506             self.pos.set(self.pos.get() + i as u64);
507             i
508         })
509     }
510 }
511 
512 /// Try to fill the buffer from the reader.
513 ///
514 /// If the reader reaches its end before filling the buffer at all, returns `false`.
515 /// Otherwise returns `true`.
try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool>516 fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
517     let mut read = 0;
518     while read < buf.len() {
519         match r.read(&mut buf[read..])? {
520             0 => {
521                 if read == 0 {
522                     return Ok(false);
523                 }
524 
525                 return Err(other("failed to read entire block"));
526             }
527             n => read += n,
528         }
529     }
530     Ok(true)
531 }
532