1 use std::cell::{Cell, RefCell};
2 use std::cmp;
3 use std::fs;
4 use std::io;
5 use std::io::prelude::*;
6 use std::marker;
7 use std::path::Path;
8
9 use crate::entry::{EntryFields, EntryIo};
10 use crate::error::TarError;
11 use crate::other;
12 use crate::pax::pax_extensions_size;
13 use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
14
15 /// A top-level representation of an archive file.
16 ///
17 /// This archive can have an entry added to it and it can be iterated over.
18 pub struct Archive<R: ?Sized + Read> {
19 inner: ArchiveInner<R>,
20 }
21
22 pub struct ArchiveInner<R: ?Sized> {
23 pos: Cell<u64>,
24 unpack_xattrs: bool,
25 preserve_permissions: bool,
26 preserve_mtime: bool,
27 overwrite: bool,
28 ignore_zeros: bool,
29 obj: RefCell<R>,
30 }
31
32 /// An iterator over the entries of an archive.
33 pub struct Entries<'a, R: 'a + Read> {
34 fields: EntriesFields<'a>,
35 _ignored: marker::PhantomData<&'a Archive<R>>,
36 }
37
38 struct EntriesFields<'a> {
39 archive: &'a Archive<dyn Read + 'a>,
40 next: u64,
41 done: bool,
42 raw: bool,
43 }
44
45 impl<R: Read> Archive<R> {
46 /// Create a new archive with the underlying object as the reader.
new(obj: R) -> Archive<R>47 pub fn new(obj: R) -> Archive<R> {
48 Archive {
49 inner: ArchiveInner {
50 unpack_xattrs: false,
51 preserve_permissions: false,
52 preserve_mtime: true,
53 overwrite: true,
54 ignore_zeros: false,
55 obj: RefCell::new(obj),
56 pos: Cell::new(0),
57 },
58 }
59 }
60
61 /// Unwrap this archive, returning the underlying object.
into_inner(self) -> R62 pub fn into_inner(self) -> R {
63 self.inner.obj.into_inner()
64 }
65
66 /// Construct an iterator over the entries in this archive.
67 ///
68 /// Note that care must be taken to consider each entry within an archive in
69 /// sequence. If entries are processed out of sequence (from what the
70 /// iterator returns), then the contents read for each entry may be
71 /// corrupted.
entries(&mut self) -> io::Result<Entries<R>>72 pub fn entries(&mut self) -> io::Result<Entries<R>> {
73 let me: &mut Archive<dyn Read> = self;
74 me._entries().map(|fields| Entries {
75 fields: fields,
76 _ignored: marker::PhantomData,
77 })
78 }
79
80 /// Unpacks the contents tarball into the specified `dst`.
81 ///
82 /// This function will iterate over the entire contents of this tarball,
83 /// extracting each file in turn to the location specified by the entry's
84 /// path name.
85 ///
86 /// This operation is relatively sensitive in that it will not write files
87 /// outside of the path specified by `dst`. Files in the archive which have
88 /// a '..' in their path are skipped during the unpacking process.
89 ///
90 /// # Examples
91 ///
92 /// ```no_run
93 /// use std::fs::File;
94 /// use tar::Archive;
95 ///
96 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
97 /// ar.unpack("foo").unwrap();
98 /// ```
unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()>99 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
100 let me: &mut Archive<dyn Read> = self;
101 me._unpack(dst.as_ref())
102 }
103
104 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
105 /// when unpacking this archive.
106 ///
107 /// This flag is disabled by default and is currently only implemented on
108 /// Unix using xattr support. This may eventually be implemented for
109 /// Windows, however, if other archive implementations are found which do
110 /// this as well.
set_unpack_xattrs(&mut self, unpack_xattrs: bool)111 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
112 self.inner.unpack_xattrs = unpack_xattrs;
113 }
114
115 /// Indicate whether extended permissions (like suid on Unix) are preserved
116 /// when unpacking this entry.
117 ///
118 /// This flag is disabled by default and is currently only implemented on
119 /// Unix.
set_preserve_permissions(&mut self, preserve: bool)120 pub fn set_preserve_permissions(&mut self, preserve: bool) {
121 self.inner.preserve_permissions = preserve;
122 }
123
124 /// Indicate whether files and symlinks should be overwritten on extraction.
set_overwrite(&mut self, overwrite: bool)125 pub fn set_overwrite(&mut self, overwrite: bool) {
126 self.inner.overwrite = overwrite;
127 }
128
129 /// Indicate whether access time information is preserved when unpacking
130 /// this entry.
131 ///
132 /// This flag is enabled by default.
set_preserve_mtime(&mut self, preserve: bool)133 pub fn set_preserve_mtime(&mut self, preserve: bool) {
134 self.inner.preserve_mtime = preserve;
135 }
136
137 /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
138 /// entries.
139 ///
140 /// This can be used in case multiple tar archives have been concatenated together.
set_ignore_zeros(&mut self, ignore_zeros: bool)141 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
142 self.inner.ignore_zeros = ignore_zeros;
143 }
144 }
145
146 impl<'a> Archive<dyn Read + 'a> {
_entries(&mut self) -> io::Result<EntriesFields>147 fn _entries(&mut self) -> io::Result<EntriesFields> {
148 if self.inner.pos.get() != 0 {
149 return Err(other(
150 "cannot call entries unless archive is at \
151 position 0",
152 ));
153 }
154 Ok(EntriesFields {
155 archive: self,
156 done: false,
157 next: 0,
158 raw: false,
159 })
160 }
161
_unpack(&mut self, dst: &Path) -> io::Result<()>162 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
163 if dst.symlink_metadata().is_err() {
164 fs::create_dir_all(&dst)
165 .map_err(|e| TarError::new(&format!("failed to create `{}`", dst.display()), e))?;
166 }
167
168 // Canonicalizing the dst directory will prepend the path with '\\?\'
169 // on windows which will allow windows APIs to treat the path as an
170 // extended-length path with a 32,767 character limit. Otherwise all
171 // unpacked paths over 260 characters will fail on creation with a
172 // NotFound exception.
173 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
174
175 // Delay any directory entries until the end (they will be created if needed by
176 // descendants), to ensure that directory permissions do not interfer with descendant
177 // extraction.
178 let mut directories = Vec::new();
179 for entry in self._entries()? {
180 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
181 if file.header().entry_type() == crate::EntryType::Directory {
182 directories.push(file);
183 } else {
184 file.unpack_in(dst)?;
185 }
186 }
187 for mut dir in directories {
188 dir.unpack_in(dst)?;
189 }
190
191 Ok(())
192 }
193
skip(&self, mut amt: u64) -> io::Result<()>194 fn skip(&self, mut amt: u64) -> io::Result<()> {
195 let mut buf = [0u8; 4096 * 8];
196 while amt > 0 {
197 let n = cmp::min(amt, buf.len() as u64);
198 let n = (&self.inner).read(&mut buf[..n as usize])?;
199 if n == 0 {
200 return Err(other("unexpected EOF during skip"));
201 }
202 amt -= n as u64;
203 }
204 Ok(())
205 }
206 }
207
208 impl<'a, R: Read> Entries<'a, R> {
209 /// Indicates whether this iterator will return raw entries or not.
210 ///
211 /// If the raw list of entries are returned, then no preprocessing happens
212 /// on account of this library, for example taking into account GNU long name
213 /// or long link archive members. Raw iteration is disabled by default.
raw(self, raw: bool) -> Entries<'a, R>214 pub fn raw(self, raw: bool) -> Entries<'a, R> {
215 Entries {
216 fields: EntriesFields {
217 raw: raw,
218 ..self.fields
219 },
220 _ignored: marker::PhantomData,
221 }
222 }
223 }
224 impl<'a, R: Read> Iterator for Entries<'a, R> {
225 type Item = io::Result<Entry<'a, R>>;
226
next(&mut self) -> Option<io::Result<Entry<'a, R>>>227 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
228 self.fields
229 .next()
230 .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
231 }
232 }
233
234 impl<'a> EntriesFields<'a> {
next_entry_raw( &mut self, pax_size: Option<u64>, ) -> io::Result<Option<Entry<'a, io::Empty>>>235 fn next_entry_raw(
236 &mut self,
237 pax_size: Option<u64>,
238 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
239 let mut header = Header::new_old();
240 let mut header_pos = self.next;
241 loop {
242 // Seek to the start of the next header in the archive
243 let delta = self.next - self.archive.inner.pos.get();
244 self.archive.skip(delta)?;
245
246 // EOF is an indicator that we are at the end of the archive.
247 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
248 return Ok(None);
249 }
250
251 // If a header is not all zeros, we have another valid header.
252 // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
253 // end of the archive.
254 if !header.as_bytes().iter().all(|i| *i == 0) {
255 self.next += 512;
256 break;
257 }
258
259 if !self.archive.inner.ignore_zeros {
260 return Ok(None);
261 }
262 self.next += 512;
263 header_pos = self.next;
264 }
265
266 // Make sure the checksum is ok
267 let sum = header.as_bytes()[..148]
268 .iter()
269 .chain(&header.as_bytes()[156..])
270 .fold(0, |a, b| a + (*b as u32))
271 + 8 * 32;
272 let cksum = header.cksum()?;
273 if sum != cksum {
274 return Err(other("archive header checksum mismatch"));
275 }
276
277 let file_pos = self.next;
278 let mut size = header.entry_size()?;
279 if size == 0 {
280 if let Some(pax_size) = pax_size {
281 size = pax_size;
282 }
283 }
284 let ret = EntryFields {
285 size: size,
286 header_pos: header_pos,
287 file_pos: file_pos,
288 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
289 header: header,
290 long_pathname: None,
291 long_linkname: None,
292 pax_extensions: None,
293 unpack_xattrs: self.archive.inner.unpack_xattrs,
294 preserve_permissions: self.archive.inner.preserve_permissions,
295 preserve_mtime: self.archive.inner.preserve_mtime,
296 overwrite: self.archive.inner.overwrite,
297 };
298
299 // Store where the next entry is, rounding up by 512 bytes (the size of
300 // a header);
301 let size = (size + 511) & !(512 - 1);
302 self.next += size;
303
304 Ok(Some(ret.into_entry()))
305 }
306
next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>>307 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
308 if self.raw {
309 return self.next_entry_raw(None);
310 }
311
312 let mut gnu_longname = None;
313 let mut gnu_longlink = None;
314 let mut pax_extensions = None;
315 let mut pax_size = None;
316 let mut processed = 0;
317 loop {
318 processed += 1;
319 let entry = match self.next_entry_raw(pax_size)? {
320 Some(entry) => entry,
321 None if processed > 1 => {
322 return Err(other(
323 "members found describing a future member \
324 but no future member found",
325 ));
326 }
327 None => return Ok(None),
328 };
329
330 let is_recognized_header =
331 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
332
333 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
334 if gnu_longname.is_some() {
335 return Err(other(
336 "two long name entries describing \
337 the same member",
338 ));
339 }
340 gnu_longname = Some(EntryFields::from(entry).read_all()?);
341 continue;
342 }
343
344 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
345 if gnu_longlink.is_some() {
346 return Err(other(
347 "two long name entries describing \
348 the same member",
349 ));
350 }
351 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
352 continue;
353 }
354
355 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
356 if pax_extensions.is_some() {
357 return Err(other(
358 "two pax extensions entries describing \
359 the same member",
360 ));
361 }
362 pax_extensions = Some(EntryFields::from(entry).read_all()?);
363 if let Some(pax_extensions_ref) = &pax_extensions {
364 pax_size = pax_extensions_size(pax_extensions_ref);
365 }
366 continue;
367 }
368
369 let mut fields = EntryFields::from(entry);
370 fields.long_pathname = gnu_longname;
371 fields.long_linkname = gnu_longlink;
372 fields.pax_extensions = pax_extensions;
373 self.parse_sparse_header(&mut fields)?;
374 return Ok(Some(fields.into_entry()));
375 }
376 }
377
parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()>378 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
379 if !entry.header.entry_type().is_gnu_sparse() {
380 return Ok(());
381 }
382 let gnu = match entry.header.as_gnu() {
383 Some(gnu) => gnu,
384 None => return Err(other("sparse entry type listed but not GNU header")),
385 };
386
387 // Sparse files are represented internally as a list of blocks that are
388 // read. Blocks are either a bunch of 0's or they're data from the
389 // underlying archive.
390 //
391 // Blocks of a sparse file are described by the `GnuSparseHeader`
392 // structure, some of which are contained in `GnuHeader` but some of
393 // which may also be contained after the first header in further
394 // headers.
395 //
396 // We read off all the blocks here and use the `add_block` function to
397 // incrementally add them to the list of I/O block (in `entry.data`).
398 // The `add_block` function also validates that each chunk comes after
399 // the previous, we don't overrun the end of the file, and each block is
400 // aligned to a 512-byte boundary in the archive itself.
401 //
402 // At the end we verify that the sparse file size (`Header::size`) is
403 // the same as the current offset (described by the list of blocks) as
404 // well as the amount of data read equals the size of the entry
405 // (`Header::entry_size`).
406 entry.data.truncate(0);
407
408 let mut cur = 0;
409 let mut remaining = entry.size;
410 {
411 let data = &mut entry.data;
412 let reader = &self.archive.inner;
413 let size = entry.size;
414 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
415 if block.is_empty() {
416 return Ok(());
417 }
418 let off = block.offset()?;
419 let len = block.length()?;
420 if len != 0 && (size - remaining) % 512 != 0 {
421 return Err(other(
422 "previous block in sparse file was not \
423 aligned to 512-byte boundary",
424 ));
425 } else if off < cur {
426 return Err(other(
427 "out of order or overlapping sparse \
428 blocks",
429 ));
430 } else if cur < off {
431 let block = io::repeat(0).take(off - cur);
432 data.push(EntryIo::Pad(block));
433 }
434 cur = off
435 .checked_add(len)
436 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
437 remaining = remaining.checked_sub(len).ok_or_else(|| {
438 other(
439 "sparse file consumed more data than the header \
440 listed",
441 )
442 })?;
443 data.push(EntryIo::Data(reader.take(len)));
444 Ok(())
445 };
446 for block in gnu.sparse.iter() {
447 add_block(block)?
448 }
449 if gnu.is_extended() {
450 let mut ext = GnuExtSparseHeader::new();
451 ext.isextended[0] = 1;
452 while ext.is_extended() {
453 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
454 return Err(other("failed to read extension"));
455 }
456
457 self.next += 512;
458 for block in ext.sparse.iter() {
459 add_block(block)?;
460 }
461 }
462 }
463 }
464 if cur != gnu.real_size()? {
465 return Err(other(
466 "mismatch in sparse file chunks and \
467 size in header",
468 ));
469 }
470 entry.size = cur;
471 if remaining > 0 {
472 return Err(other(
473 "mismatch in sparse file chunks and \
474 entry size in header",
475 ));
476 }
477 Ok(())
478 }
479 }
480
481 impl<'a> Iterator for EntriesFields<'a> {
482 type Item = io::Result<Entry<'a, io::Empty>>;
483
next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>>484 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
485 if self.done {
486 None
487 } else {
488 match self.next_entry() {
489 Ok(Some(e)) => Some(Ok(e)),
490 Ok(None) => {
491 self.done = true;
492 None
493 }
494 Err(e) => {
495 self.done = true;
496 Some(Err(e))
497 }
498 }
499 }
500 }
501 }
502
503 impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>504 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
505 self.obj.borrow_mut().read(into).map(|i| {
506 self.pos.set(self.pos.get() + i as u64);
507 i
508 })
509 }
510 }
511
512 /// Try to fill the buffer from the reader.
513 ///
514 /// If the reader reaches its end before filling the buffer at all, returns `false`.
515 /// Otherwise returns `true`.
try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool>516 fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
517 let mut read = 0;
518 while read < buf.len() {
519 match r.read(&mut buf[read..])? {
520 0 => {
521 if read == 0 {
522 return Ok(false);
523 }
524
525 return Err(other("failed to read entire block"));
526 }
527 n => read += n,
528 }
529 }
530 Ok(true)
531 }
532