1 use std::fs;
2 use std::io;
3 use std::io::prelude::*;
4 use std::path::Path;
5 use std::str;
6 
7 use crate::header::{path2bytes, HeaderMode};
8 use crate::{other, EntryType, Header};
9 
10 /// A structure for building archives
11 ///
12 /// This structure has methods for building up an archive from scratch into any
13 /// arbitrary writer.
14 pub struct Builder<W: Write> {
15     mode: HeaderMode,
16     follow: bool,
17     finished: bool,
18     obj: Option<W>,
19 }
20 
21 impl<W: Write> Builder<W> {
22     /// Create a new archive builder with the underlying object as the
23     /// destination of all data written. The builder will use
24     /// `HeaderMode::Complete` by default.
new(obj: W) -> Builder<W>25     pub fn new(obj: W) -> Builder<W> {
26         Builder {
27             mode: HeaderMode::Complete,
28             follow: true,
29             finished: false,
30             obj: Some(obj),
31         }
32     }
33 
34     /// Changes the HeaderMode that will be used when reading fs Metadata for
35     /// methods that implicitly read metadata for an input Path. Notably, this
36     /// does _not_ apply to `append(Header)`.
mode(&mut self, mode: HeaderMode)37     pub fn mode(&mut self, mode: HeaderMode) {
38         self.mode = mode;
39     }
40 
41     /// Follow symlinks, archiving the contents of the file they point to rather
42     /// than adding a symlink to the archive. Defaults to true.
follow_symlinks(&mut self, follow: bool)43     pub fn follow_symlinks(&mut self, follow: bool) {
44         self.follow = follow;
45     }
46 
47     /// Gets shared reference to the underlying object.
get_ref(&self) -> &W48     pub fn get_ref(&self) -> &W {
49         self.obj.as_ref().unwrap()
50     }
51 
52     /// Gets mutable reference to the underlying object.
53     ///
54     /// Note that care must be taken while writing to the underlying
55     /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
56     /// useful in the situations when one needs to be ensured that
57     /// tar entry was flushed to the disk.
get_mut(&mut self) -> &mut W58     pub fn get_mut(&mut self) -> &mut W {
59         self.obj.as_mut().unwrap()
60     }
61 
62     /// Unwrap this archive, returning the underlying object.
63     ///
64     /// This function will finish writing the archive if the `finish` function
65     /// hasn't yet been called, returning any I/O error which happens during
66     /// that operation.
into_inner(mut self) -> io::Result<W>67     pub fn into_inner(mut self) -> io::Result<W> {
68         if !self.finished {
69             self.finish()?;
70         }
71         Ok(self.obj.take().unwrap())
72     }
73 
74     /// Adds a new entry to this archive.
75     ///
76     /// This function will append the header specified, followed by contents of
77     /// the stream specified by `data`. To produce a valid archive the `size`
78     /// field of `header` must be the same as the length of the stream that's
79     /// being written. Additionally the checksum for the header should have been
80     /// set via the `set_cksum` method.
81     ///
82     /// Note that this will not attempt to seek the archive to a valid position,
83     /// so if the archive is in the middle of a read or some other similar
84     /// operation then this may corrupt the archive.
85     ///
86     /// Also note that after all entries have been written to an archive the
87     /// `finish` function needs to be called to finish writing the archive.
88     ///
89     /// # Errors
90     ///
91     /// This function will return an error for any intermittent I/O error which
92     /// occurs when either reading or writing.
93     ///
94     /// # Examples
95     ///
96     /// ```
97     /// use tar::{Builder, Header};
98     ///
99     /// let mut header = Header::new_gnu();
100     /// header.set_path("foo").unwrap();
101     /// header.set_size(4);
102     /// header.set_cksum();
103     ///
104     /// let mut data: &[u8] = &[1, 2, 3, 4];
105     ///
106     /// let mut ar = Builder::new(Vec::new());
107     /// ar.append(&header, data).unwrap();
108     /// let data = ar.into_inner().unwrap();
109     /// ```
append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()>110     pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
111         append(self.get_mut(), header, &mut data)
112     }
113 
114     /// Adds a new entry to this archive with the specified path.
115     ///
116     /// This function will set the specified path in the given header, which may
117     /// require appending a GNU long-name extension entry to the archive first.
118     /// The checksum for the header will be automatically updated via the
119     /// `set_cksum` method after setting the path. No other metadata in the
120     /// header will be modified.
121     ///
122     /// Then it will append the header, followed by contents of the stream
123     /// specified by `data`. To produce a valid archive the `size` field of
124     /// `header` must be the same as the length of the stream that's being
125     /// written.
126     ///
127     /// Note that this will not attempt to seek the archive to a valid position,
128     /// so if the archive is in the middle of a read or some other similar
129     /// operation then this may corrupt the archive.
130     ///
131     /// Also note that after all entries have been written to an archive the
132     /// `finish` function needs to be called to finish writing the archive.
133     ///
134     /// # Errors
135     ///
136     /// This function will return an error for any intermittent I/O error which
137     /// occurs when either reading or writing.
138     ///
139     /// # Examples
140     ///
141     /// ```
142     /// use tar::{Builder, Header};
143     ///
144     /// let mut header = Header::new_gnu();
145     /// header.set_size(4);
146     /// header.set_cksum();
147     ///
148     /// let mut data: &[u8] = &[1, 2, 3, 4];
149     ///
150     /// let mut ar = Builder::new(Vec::new());
151     /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
152     /// let data = ar.into_inner().unwrap();
153     /// ```
append_data<P: AsRef<Path>, R: Read>( &mut self, header: &mut Header, path: P, data: R, ) -> io::Result<()>154     pub fn append_data<P: AsRef<Path>, R: Read>(
155         &mut self,
156         header: &mut Header,
157         path: P,
158         data: R,
159     ) -> io::Result<()> {
160         prepare_header_path(self.get_mut(), header, path.as_ref())?;
161         header.set_cksum();
162         self.append(&header, data)
163     }
164 
165     /// Adds a file on the local filesystem to this archive.
166     ///
167     /// This function will open the file specified by `path` and insert the file
168     /// into the archive with the appropriate metadata set, returning any I/O
169     /// error which occurs while writing. The path name for the file inside of
170     /// this archive will be the same as `path`, and it is required that the
171     /// path is a relative path.
172     ///
173     /// Note that this will not attempt to seek the archive to a valid position,
174     /// so if the archive is in the middle of a read or some other similar
175     /// operation then this may corrupt the archive.
176     ///
177     /// Also note that after all files have been written to an archive the
178     /// `finish` function needs to be called to finish writing the archive.
179     ///
180     /// # Examples
181     ///
182     /// ```no_run
183     /// use tar::Builder;
184     ///
185     /// let mut ar = Builder::new(Vec::new());
186     ///
187     /// ar.append_path("foo/bar.txt").unwrap();
188     /// ```
append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()>189     pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
190         let mode = self.mode.clone();
191         let follow = self.follow;
192         append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow)
193     }
194 
195     /// Adds a file on the local filesystem to this archive under another name.
196     ///
197     /// This function will open the file specified by `path` and insert the file
198     /// into the archive as `name` with appropriate metadata set, returning any
199     /// I/O error which occurs while writing. The path name for the file inside
200     /// of this archive will be `name` is required to be a relative path.
201     ///
202     /// Note that this will not attempt to seek the archive to a valid position,
203     /// so if the archive is in the middle of a read or some other similar
204     /// operation then this may corrupt the archive.
205     ///
206     /// Note if the `path` is a directory. This will just add an entry to the archive,
207     /// rather than contents of the directory.
208     ///
209     /// Also note that after all files have been written to an archive the
210     /// `finish` function needs to be called to finish writing the archive.
211     ///
212     /// # Examples
213     ///
214     /// ```no_run
215     /// use tar::Builder;
216     ///
217     /// let mut ar = Builder::new(Vec::new());
218     ///
219     /// // Insert the local file "foo/bar.txt" in the archive but with the name
220     /// // "bar/foo.txt".
221     /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
222     /// ```
append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>( &mut self, path: P, name: N, ) -> io::Result<()>223     pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
224         &mut self,
225         path: P,
226         name: N,
227     ) -> io::Result<()> {
228         let mode = self.mode.clone();
229         let follow = self.follow;
230         append_path_with_name(
231             self.get_mut(),
232             path.as_ref(),
233             Some(name.as_ref()),
234             mode,
235             follow,
236         )
237     }
238 
239     /// Adds a file to this archive with the given path as the name of the file
240     /// in the archive.
241     ///
242     /// This will use the metadata of `file` to populate a `Header`, and it will
243     /// then append the file to the archive with the name `path`.
244     ///
245     /// Note that this will not attempt to seek the archive to a valid position,
246     /// so if the archive is in the middle of a read or some other similar
247     /// operation then this may corrupt the archive.
248     ///
249     /// Also note that after all files have been written to an archive the
250     /// `finish` function needs to be called to finish writing the archive.
251     ///
252     /// # Examples
253     ///
254     /// ```no_run
255     /// use std::fs::File;
256     /// use tar::Builder;
257     ///
258     /// let mut ar = Builder::new(Vec::new());
259     ///
260     /// // Open the file at one location, but insert it into the archive with a
261     /// // different name.
262     /// let mut f = File::open("foo/bar/baz.txt").unwrap();
263     /// ar.append_file("bar/baz.txt", &mut f).unwrap();
264     /// ```
append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()>265     pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
266         let mode = self.mode.clone();
267         append_file(self.get_mut(), path.as_ref(), file, mode)
268     }
269 
270     /// Adds a directory to this archive with the given path as the name of the
271     /// directory in the archive.
272     ///
273     /// This will use `stat` to populate a `Header`, and it will then append the
274     /// directory to the archive with the name `path`.
275     ///
276     /// Note that this will not attempt to seek the archive to a valid position,
277     /// so if the archive is in the middle of a read or some other similar
278     /// operation then this may corrupt the archive.
279     ///
280     /// Note this will not add the contents of the directory to the archive.
281     /// See `append_dir_all` for recusively adding the contents of the directory.
282     ///
283     /// Also note that after all files have been written to an archive the
284     /// `finish` function needs to be called to finish writing the archive.
285     ///
286     /// # Examples
287     ///
288     /// ```
289     /// use std::fs;
290     /// use tar::Builder;
291     ///
292     /// let mut ar = Builder::new(Vec::new());
293     ///
294     /// // Use the directory at one location, but insert it into the archive
295     /// // with a different name.
296     /// ar.append_dir("bardir", ".").unwrap();
297     /// ```
append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef<Path>, Q: AsRef<Path>,298     pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
299     where
300         P: AsRef<Path>,
301         Q: AsRef<Path>,
302     {
303         let mode = self.mode.clone();
304         append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode)
305     }
306 
307     /// Adds a directory and all of its contents (recursively) to this archive
308     /// with the given path as the name of the directory in the archive.
309     ///
310     /// Note that this will not attempt to seek the archive to a valid position,
311     /// so if the archive is in the middle of a read or some other similar
312     /// operation then this may corrupt the archive.
313     ///
314     /// Also note that after all files have been written to an archive the
315     /// `finish` function needs to be called to finish writing the archive.
316     ///
317     /// # Examples
318     ///
319     /// ```
320     /// use std::fs;
321     /// use tar::Builder;
322     ///
323     /// let mut ar = Builder::new(Vec::new());
324     ///
325     /// // Use the directory at one location, but insert it into the archive
326     /// // with a different name.
327     /// ar.append_dir_all("bardir", ".").unwrap();
328     /// ```
append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef<Path>, Q: AsRef<Path>,329     pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
330     where
331         P: AsRef<Path>,
332         Q: AsRef<Path>,
333     {
334         let mode = self.mode.clone();
335         let follow = self.follow;
336         append_dir_all(
337             self.get_mut(),
338             path.as_ref(),
339             src_path.as_ref(),
340             mode,
341             follow,
342         )
343     }
344 
345     /// Finish writing this archive, emitting the termination sections.
346     ///
347     /// This function should only be called when the archive has been written
348     /// entirely and if an I/O error happens the underlying object still needs
349     /// to be acquired.
350     ///
351     /// In most situations the `into_inner` method should be preferred.
finish(&mut self) -> io::Result<()>352     pub fn finish(&mut self) -> io::Result<()> {
353         if self.finished {
354             return Ok(());
355         }
356         self.finished = true;
357         self.get_mut().write_all(&[0; 1024])
358     }
359 }
360 
append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()>361 fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
362     dst.write_all(header.as_bytes())?;
363     let len = io::copy(&mut data, &mut dst)?;
364 
365     // Pad with zeros if necessary.
366     let buf = [0; 512];
367     let remaining = 512 - (len % 512);
368     if remaining < 512 {
369         dst.write_all(&buf[..remaining as usize])?;
370     }
371 
372     Ok(())
373 }
374 
append_path_with_name( dst: &mut dyn Write, path: &Path, name: Option<&Path>, mode: HeaderMode, follow: bool, ) -> io::Result<()>375 fn append_path_with_name(
376     dst: &mut dyn Write,
377     path: &Path,
378     name: Option<&Path>,
379     mode: HeaderMode,
380     follow: bool,
381 ) -> io::Result<()> {
382     let stat = if follow {
383         fs::metadata(path).map_err(|err| {
384             io::Error::new(
385                 err.kind(),
386                 format!("{} when getting metadata for {}", err, path.display()),
387             )
388         })?
389     } else {
390         fs::symlink_metadata(path).map_err(|err| {
391             io::Error::new(
392                 err.kind(),
393                 format!("{} when getting metadata for {}", err, path.display()),
394             )
395         })?
396     };
397     let ar_name = name.unwrap_or(path);
398     if stat.is_file() {
399         append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None)
400     } else if stat.is_dir() {
401         append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None)
402     } else if stat.file_type().is_symlink() {
403         let link_name = fs::read_link(path)?;
404         append_fs(
405             dst,
406             ar_name,
407             &stat,
408             &mut io::empty(),
409             mode,
410             Some(&link_name),
411         )
412     } else {
413         Err(other(&format!("{} has unknown file type", path.display())))
414     }
415 }
416 
append_file( dst: &mut dyn Write, path: &Path, file: &mut fs::File, mode: HeaderMode, ) -> io::Result<()>417 fn append_file(
418     dst: &mut dyn Write,
419     path: &Path,
420     file: &mut fs::File,
421     mode: HeaderMode,
422 ) -> io::Result<()> {
423     let stat = file.metadata()?;
424     append_fs(dst, path, &stat, file, mode, None)
425 }
426 
append_dir( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, ) -> io::Result<()>427 fn append_dir(
428     dst: &mut dyn Write,
429     path: &Path,
430     src_path: &Path,
431     mode: HeaderMode,
432 ) -> io::Result<()> {
433     let stat = fs::metadata(src_path)?;
434     append_fs(dst, path, &stat, &mut io::empty(), mode, None)
435 }
436 
prepare_header(size: u64, entry_type: u8) -> Header437 fn prepare_header(size: u64, entry_type: u8) -> Header {
438     let mut header = Header::new_gnu();
439     let name = b"././@LongLink";
440     header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
441     header.set_mode(0o644);
442     header.set_uid(0);
443     header.set_gid(0);
444     header.set_mtime(0);
445     // + 1 to be compliant with GNU tar
446     header.set_size(size + 1);
447     header.set_entry_type(EntryType::new(entry_type));
448     header.set_cksum();
449     header
450 }
451 
prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()>452 fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
453     // Try to encode the path directly in the header, but if it ends up not
454     // working (probably because it's too long) then try to use the GNU-specific
455     // long name extension by emitting an entry which indicates that it's the
456     // filename.
457     if let Err(e) = header.set_path(path) {
458         let data = path2bytes(&path)?;
459         let max = header.as_old().name.len();
460         // Since `e` isn't specific enough to let us know the path is indeed too
461         // long, verify it first before using the extension.
462         if data.len() < max {
463             return Err(e);
464         }
465         let header2 = prepare_header(data.len() as u64, b'L');
466         // null-terminated string
467         let mut data2 = data.chain(io::repeat(0).take(1));
468         append(dst, &header2, &mut data2)?;
469 
470         // Truncate the path to store in the header we're about to emit to
471         // ensure we've got something at least mentioned. Note that we use
472         // `str`-encoding to be compatible with Windows, but in general the
473         // entry in the header itself shouldn't matter too much since extraction
474         // doesn't look at it.
475         let truncated = match str::from_utf8(&data[..max]) {
476             Ok(s) => s,
477             Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
478         };
479         header.set_path(truncated)?;
480     }
481     Ok(())
482 }
483 
prepare_header_link( dst: &mut dyn Write, header: &mut Header, link_name: &Path, ) -> io::Result<()>484 fn prepare_header_link(
485     dst: &mut dyn Write,
486     header: &mut Header,
487     link_name: &Path,
488 ) -> io::Result<()> {
489     // Same as previous function but for linkname
490     if let Err(e) = header.set_link_name(&link_name) {
491         let data = path2bytes(&link_name)?;
492         if data.len() < header.as_old().linkname.len() {
493             return Err(e);
494         }
495         let header2 = prepare_header(data.len() as u64, b'K');
496         let mut data2 = data.chain(io::repeat(0).take(1));
497         append(dst, &header2, &mut data2)?;
498     }
499     Ok(())
500 }
501 
append_fs( dst: &mut dyn Write, path: &Path, meta: &fs::Metadata, read: &mut dyn Read, mode: HeaderMode, link_name: Option<&Path>, ) -> io::Result<()>502 fn append_fs(
503     dst: &mut dyn Write,
504     path: &Path,
505     meta: &fs::Metadata,
506     read: &mut dyn Read,
507     mode: HeaderMode,
508     link_name: Option<&Path>,
509 ) -> io::Result<()> {
510     let mut header = Header::new_gnu();
511 
512     prepare_header_path(dst, &mut header, path)?;
513     header.set_metadata_in_mode(meta, mode);
514     if let Some(link_name) = link_name {
515         prepare_header_link(dst, &mut header, link_name)?;
516     }
517     header.set_cksum();
518     append(dst, &header, read)
519 }
520 
append_dir_all( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, follow: bool, ) -> io::Result<()>521 fn append_dir_all(
522     dst: &mut dyn Write,
523     path: &Path,
524     src_path: &Path,
525     mode: HeaderMode,
526     follow: bool,
527 ) -> io::Result<()> {
528     let mut stack = vec![(src_path.to_path_buf(), true, false)];
529     while let Some((src, is_dir, is_symlink)) = stack.pop() {
530         let dest = path.join(src.strip_prefix(&src_path).unwrap());
531         // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
532         if is_dir || (is_symlink && follow && src.is_dir()) {
533             for entry in fs::read_dir(&src)? {
534                 let entry = entry?;
535                 let file_type = entry.file_type()?;
536                 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
537             }
538             if dest != Path::new("") {
539                 append_dir(dst, &dest, &src, mode)?;
540             }
541         } else if !follow && is_symlink {
542             let stat = fs::symlink_metadata(&src)?;
543             let link_name = fs::read_link(&src)?;
544             append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?;
545         } else {
546             append_file(dst, &dest, &mut fs::File::open(src)?, mode)?;
547         }
548     }
549     Ok(())
550 }
551 
552 impl<W: Write> Drop for Builder<W> {
drop(&mut self)553     fn drop(&mut self) {
554         let _ = self.finish();
555     }
556 }
557