1 use std::fs;
2 use std::io;
3 use std::io::prelude::*;
4 use std::path::Path;
5 use std::str;
6
7 use crate::header::{path2bytes, HeaderMode};
8 use crate::{other, EntryType, Header};
9
10 /// A structure for building archives
11 ///
12 /// This structure has methods for building up an archive from scratch into any
13 /// arbitrary writer.
14 pub struct Builder<W: Write> {
15 mode: HeaderMode,
16 follow: bool,
17 finished: bool,
18 obj: Option<W>,
19 }
20
21 impl<W: Write> Builder<W> {
22 /// Create a new archive builder with the underlying object as the
23 /// destination of all data written. The builder will use
24 /// `HeaderMode::Complete` by default.
new(obj: W) -> Builder<W>25 pub fn new(obj: W) -> Builder<W> {
26 Builder {
27 mode: HeaderMode::Complete,
28 follow: true,
29 finished: false,
30 obj: Some(obj),
31 }
32 }
33
34 /// Changes the HeaderMode that will be used when reading fs Metadata for
35 /// methods that implicitly read metadata for an input Path. Notably, this
36 /// does _not_ apply to `append(Header)`.
mode(&mut self, mode: HeaderMode)37 pub fn mode(&mut self, mode: HeaderMode) {
38 self.mode = mode;
39 }
40
41 /// Follow symlinks, archiving the contents of the file they point to rather
42 /// than adding a symlink to the archive. Defaults to true.
follow_symlinks(&mut self, follow: bool)43 pub fn follow_symlinks(&mut self, follow: bool) {
44 self.follow = follow;
45 }
46
47 /// Gets shared reference to the underlying object.
get_ref(&self) -> &W48 pub fn get_ref(&self) -> &W {
49 self.obj.as_ref().unwrap()
50 }
51
52 /// Gets mutable reference to the underlying object.
53 ///
54 /// Note that care must be taken while writing to the underlying
55 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
56 /// useful in the situations when one needs to be ensured that
57 /// tar entry was flushed to the disk.
get_mut(&mut self) -> &mut W58 pub fn get_mut(&mut self) -> &mut W {
59 self.obj.as_mut().unwrap()
60 }
61
62 /// Unwrap this archive, returning the underlying object.
63 ///
64 /// This function will finish writing the archive if the `finish` function
65 /// hasn't yet been called, returning any I/O error which happens during
66 /// that operation.
into_inner(mut self) -> io::Result<W>67 pub fn into_inner(mut self) -> io::Result<W> {
68 if !self.finished {
69 self.finish()?;
70 }
71 Ok(self.obj.take().unwrap())
72 }
73
74 /// Adds a new entry to this archive.
75 ///
76 /// This function will append the header specified, followed by contents of
77 /// the stream specified by `data`. To produce a valid archive the `size`
78 /// field of `header` must be the same as the length of the stream that's
79 /// being written. Additionally the checksum for the header should have been
80 /// set via the `set_cksum` method.
81 ///
82 /// Note that this will not attempt to seek the archive to a valid position,
83 /// so if the archive is in the middle of a read or some other similar
84 /// operation then this may corrupt the archive.
85 ///
86 /// Also note that after all entries have been written to an archive the
87 /// `finish` function needs to be called to finish writing the archive.
88 ///
89 /// # Errors
90 ///
91 /// This function will return an error for any intermittent I/O error which
92 /// occurs when either reading or writing.
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// use tar::{Builder, Header};
98 ///
99 /// let mut header = Header::new_gnu();
100 /// header.set_path("foo").unwrap();
101 /// header.set_size(4);
102 /// header.set_cksum();
103 ///
104 /// let mut data: &[u8] = &[1, 2, 3, 4];
105 ///
106 /// let mut ar = Builder::new(Vec::new());
107 /// ar.append(&header, data).unwrap();
108 /// let data = ar.into_inner().unwrap();
109 /// ```
append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()>110 pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
111 append(self.get_mut(), header, &mut data)
112 }
113
114 /// Adds a new entry to this archive with the specified path.
115 ///
116 /// This function will set the specified path in the given header, which may
117 /// require appending a GNU long-name extension entry to the archive first.
118 /// The checksum for the header will be automatically updated via the
119 /// `set_cksum` method after setting the path. No other metadata in the
120 /// header will be modified.
121 ///
122 /// Then it will append the header, followed by contents of the stream
123 /// specified by `data`. To produce a valid archive the `size` field of
124 /// `header` must be the same as the length of the stream that's being
125 /// written.
126 ///
127 /// Note that this will not attempt to seek the archive to a valid position,
128 /// so if the archive is in the middle of a read or some other similar
129 /// operation then this may corrupt the archive.
130 ///
131 /// Also note that after all entries have been written to an archive the
132 /// `finish` function needs to be called to finish writing the archive.
133 ///
134 /// # Errors
135 ///
136 /// This function will return an error for any intermittent I/O error which
137 /// occurs when either reading or writing.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use tar::{Builder, Header};
143 ///
144 /// let mut header = Header::new_gnu();
145 /// header.set_size(4);
146 /// header.set_cksum();
147 ///
148 /// let mut data: &[u8] = &[1, 2, 3, 4];
149 ///
150 /// let mut ar = Builder::new(Vec::new());
151 /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
152 /// let data = ar.into_inner().unwrap();
153 /// ```
append_data<P: AsRef<Path>, R: Read>( &mut self, header: &mut Header, path: P, data: R, ) -> io::Result<()>154 pub fn append_data<P: AsRef<Path>, R: Read>(
155 &mut self,
156 header: &mut Header,
157 path: P,
158 data: R,
159 ) -> io::Result<()> {
160 prepare_header_path(self.get_mut(), header, path.as_ref())?;
161 header.set_cksum();
162 self.append(&header, data)
163 }
164
165 /// Adds a file on the local filesystem to this archive.
166 ///
167 /// This function will open the file specified by `path` and insert the file
168 /// into the archive with the appropriate metadata set, returning any I/O
169 /// error which occurs while writing. The path name for the file inside of
170 /// this archive will be the same as `path`, and it is required that the
171 /// path is a relative path.
172 ///
173 /// Note that this will not attempt to seek the archive to a valid position,
174 /// so if the archive is in the middle of a read or some other similar
175 /// operation then this may corrupt the archive.
176 ///
177 /// Also note that after all files have been written to an archive the
178 /// `finish` function needs to be called to finish writing the archive.
179 ///
180 /// # Examples
181 ///
182 /// ```no_run
183 /// use tar::Builder;
184 ///
185 /// let mut ar = Builder::new(Vec::new());
186 ///
187 /// ar.append_path("foo/bar.txt").unwrap();
188 /// ```
append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()>189 pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
190 let mode = self.mode.clone();
191 let follow = self.follow;
192 append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow)
193 }
194
195 /// Adds a file on the local filesystem to this archive under another name.
196 ///
197 /// This function will open the file specified by `path` and insert the file
198 /// into the archive as `name` with appropriate metadata set, returning any
199 /// I/O error which occurs while writing. The path name for the file inside
200 /// of this archive will be `name` is required to be a relative path.
201 ///
202 /// Note that this will not attempt to seek the archive to a valid position,
203 /// so if the archive is in the middle of a read or some other similar
204 /// operation then this may corrupt the archive.
205 ///
206 /// Note if the `path` is a directory. This will just add an entry to the archive,
207 /// rather than contents of the directory.
208 ///
209 /// Also note that after all files have been written to an archive the
210 /// `finish` function needs to be called to finish writing the archive.
211 ///
212 /// # Examples
213 ///
214 /// ```no_run
215 /// use tar::Builder;
216 ///
217 /// let mut ar = Builder::new(Vec::new());
218 ///
219 /// // Insert the local file "foo/bar.txt" in the archive but with the name
220 /// // "bar/foo.txt".
221 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
222 /// ```
append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>( &mut self, path: P, name: N, ) -> io::Result<()>223 pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
224 &mut self,
225 path: P,
226 name: N,
227 ) -> io::Result<()> {
228 let mode = self.mode.clone();
229 let follow = self.follow;
230 append_path_with_name(
231 self.get_mut(),
232 path.as_ref(),
233 Some(name.as_ref()),
234 mode,
235 follow,
236 )
237 }
238
239 /// Adds a file to this archive with the given path as the name of the file
240 /// in the archive.
241 ///
242 /// This will use the metadata of `file` to populate a `Header`, and it will
243 /// then append the file to the archive with the name `path`.
244 ///
245 /// Note that this will not attempt to seek the archive to a valid position,
246 /// so if the archive is in the middle of a read or some other similar
247 /// operation then this may corrupt the archive.
248 ///
249 /// Also note that after all files have been written to an archive the
250 /// `finish` function needs to be called to finish writing the archive.
251 ///
252 /// # Examples
253 ///
254 /// ```no_run
255 /// use std::fs::File;
256 /// use tar::Builder;
257 ///
258 /// let mut ar = Builder::new(Vec::new());
259 ///
260 /// // Open the file at one location, but insert it into the archive with a
261 /// // different name.
262 /// let mut f = File::open("foo/bar/baz.txt").unwrap();
263 /// ar.append_file("bar/baz.txt", &mut f).unwrap();
264 /// ```
append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()>265 pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
266 let mode = self.mode.clone();
267 append_file(self.get_mut(), path.as_ref(), file, mode)
268 }
269
270 /// Adds a directory to this archive with the given path as the name of the
271 /// directory in the archive.
272 ///
273 /// This will use `stat` to populate a `Header`, and it will then append the
274 /// directory to the archive with the name `path`.
275 ///
276 /// Note that this will not attempt to seek the archive to a valid position,
277 /// so if the archive is in the middle of a read or some other similar
278 /// operation then this may corrupt the archive.
279 ///
280 /// Note this will not add the contents of the directory to the archive.
281 /// See `append_dir_all` for recusively adding the contents of the directory.
282 ///
283 /// Also note that after all files have been written to an archive the
284 /// `finish` function needs to be called to finish writing the archive.
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use std::fs;
290 /// use tar::Builder;
291 ///
292 /// let mut ar = Builder::new(Vec::new());
293 ///
294 /// // Use the directory at one location, but insert it into the archive
295 /// // with a different name.
296 /// ar.append_dir("bardir", ".").unwrap();
297 /// ```
append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef<Path>, Q: AsRef<Path>,298 pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
299 where
300 P: AsRef<Path>,
301 Q: AsRef<Path>,
302 {
303 let mode = self.mode.clone();
304 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode)
305 }
306
307 /// Adds a directory and all of its contents (recursively) to this archive
308 /// with the given path as the name of the directory in the archive.
309 ///
310 /// Note that this will not attempt to seek the archive to a valid position,
311 /// so if the archive is in the middle of a read or some other similar
312 /// operation then this may corrupt the archive.
313 ///
314 /// Also note that after all files have been written to an archive the
315 /// `finish` function needs to be called to finish writing the archive.
316 ///
317 /// # Examples
318 ///
319 /// ```
320 /// use std::fs;
321 /// use tar::Builder;
322 ///
323 /// let mut ar = Builder::new(Vec::new());
324 ///
325 /// // Use the directory at one location, but insert it into the archive
326 /// // with a different name.
327 /// ar.append_dir_all("bardir", ".").unwrap();
328 /// ```
append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef<Path>, Q: AsRef<Path>,329 pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
330 where
331 P: AsRef<Path>,
332 Q: AsRef<Path>,
333 {
334 let mode = self.mode.clone();
335 let follow = self.follow;
336 append_dir_all(
337 self.get_mut(),
338 path.as_ref(),
339 src_path.as_ref(),
340 mode,
341 follow,
342 )
343 }
344
345 /// Finish writing this archive, emitting the termination sections.
346 ///
347 /// This function should only be called when the archive has been written
348 /// entirely and if an I/O error happens the underlying object still needs
349 /// to be acquired.
350 ///
351 /// In most situations the `into_inner` method should be preferred.
finish(&mut self) -> io::Result<()>352 pub fn finish(&mut self) -> io::Result<()> {
353 if self.finished {
354 return Ok(());
355 }
356 self.finished = true;
357 self.get_mut().write_all(&[0; 1024])
358 }
359 }
360
append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()>361 fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
362 dst.write_all(header.as_bytes())?;
363 let len = io::copy(&mut data, &mut dst)?;
364
365 // Pad with zeros if necessary.
366 let buf = [0; 512];
367 let remaining = 512 - (len % 512);
368 if remaining < 512 {
369 dst.write_all(&buf[..remaining as usize])?;
370 }
371
372 Ok(())
373 }
374
append_path_with_name( dst: &mut dyn Write, path: &Path, name: Option<&Path>, mode: HeaderMode, follow: bool, ) -> io::Result<()>375 fn append_path_with_name(
376 dst: &mut dyn Write,
377 path: &Path,
378 name: Option<&Path>,
379 mode: HeaderMode,
380 follow: bool,
381 ) -> io::Result<()> {
382 let stat = if follow {
383 fs::metadata(path).map_err(|err| {
384 io::Error::new(
385 err.kind(),
386 format!("{} when getting metadata for {}", err, path.display()),
387 )
388 })?
389 } else {
390 fs::symlink_metadata(path).map_err(|err| {
391 io::Error::new(
392 err.kind(),
393 format!("{} when getting metadata for {}", err, path.display()),
394 )
395 })?
396 };
397 let ar_name = name.unwrap_or(path);
398 if stat.is_file() {
399 append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None)
400 } else if stat.is_dir() {
401 append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None)
402 } else if stat.file_type().is_symlink() {
403 let link_name = fs::read_link(path)?;
404 append_fs(
405 dst,
406 ar_name,
407 &stat,
408 &mut io::empty(),
409 mode,
410 Some(&link_name),
411 )
412 } else {
413 Err(other(&format!("{} has unknown file type", path.display())))
414 }
415 }
416
append_file( dst: &mut dyn Write, path: &Path, file: &mut fs::File, mode: HeaderMode, ) -> io::Result<()>417 fn append_file(
418 dst: &mut dyn Write,
419 path: &Path,
420 file: &mut fs::File,
421 mode: HeaderMode,
422 ) -> io::Result<()> {
423 let stat = file.metadata()?;
424 append_fs(dst, path, &stat, file, mode, None)
425 }
426
append_dir( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, ) -> io::Result<()>427 fn append_dir(
428 dst: &mut dyn Write,
429 path: &Path,
430 src_path: &Path,
431 mode: HeaderMode,
432 ) -> io::Result<()> {
433 let stat = fs::metadata(src_path)?;
434 append_fs(dst, path, &stat, &mut io::empty(), mode, None)
435 }
436
prepare_header(size: u64, entry_type: u8) -> Header437 fn prepare_header(size: u64, entry_type: u8) -> Header {
438 let mut header = Header::new_gnu();
439 let name = b"././@LongLink";
440 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
441 header.set_mode(0o644);
442 header.set_uid(0);
443 header.set_gid(0);
444 header.set_mtime(0);
445 // + 1 to be compliant with GNU tar
446 header.set_size(size + 1);
447 header.set_entry_type(EntryType::new(entry_type));
448 header.set_cksum();
449 header
450 }
451
prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()>452 fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
453 // Try to encode the path directly in the header, but if it ends up not
454 // working (probably because it's too long) then try to use the GNU-specific
455 // long name extension by emitting an entry which indicates that it's the
456 // filename.
457 if let Err(e) = header.set_path(path) {
458 let data = path2bytes(&path)?;
459 let max = header.as_old().name.len();
460 // Since `e` isn't specific enough to let us know the path is indeed too
461 // long, verify it first before using the extension.
462 if data.len() < max {
463 return Err(e);
464 }
465 let header2 = prepare_header(data.len() as u64, b'L');
466 // null-terminated string
467 let mut data2 = data.chain(io::repeat(0).take(1));
468 append(dst, &header2, &mut data2)?;
469
470 // Truncate the path to store in the header we're about to emit to
471 // ensure we've got something at least mentioned. Note that we use
472 // `str`-encoding to be compatible with Windows, but in general the
473 // entry in the header itself shouldn't matter too much since extraction
474 // doesn't look at it.
475 let truncated = match str::from_utf8(&data[..max]) {
476 Ok(s) => s,
477 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
478 };
479 header.set_path(truncated)?;
480 }
481 Ok(())
482 }
483
prepare_header_link( dst: &mut dyn Write, header: &mut Header, link_name: &Path, ) -> io::Result<()>484 fn prepare_header_link(
485 dst: &mut dyn Write,
486 header: &mut Header,
487 link_name: &Path,
488 ) -> io::Result<()> {
489 // Same as previous function but for linkname
490 if let Err(e) = header.set_link_name(&link_name) {
491 let data = path2bytes(&link_name)?;
492 if data.len() < header.as_old().linkname.len() {
493 return Err(e);
494 }
495 let header2 = prepare_header(data.len() as u64, b'K');
496 let mut data2 = data.chain(io::repeat(0).take(1));
497 append(dst, &header2, &mut data2)?;
498 }
499 Ok(())
500 }
501
append_fs( dst: &mut dyn Write, path: &Path, meta: &fs::Metadata, read: &mut dyn Read, mode: HeaderMode, link_name: Option<&Path>, ) -> io::Result<()>502 fn append_fs(
503 dst: &mut dyn Write,
504 path: &Path,
505 meta: &fs::Metadata,
506 read: &mut dyn Read,
507 mode: HeaderMode,
508 link_name: Option<&Path>,
509 ) -> io::Result<()> {
510 let mut header = Header::new_gnu();
511
512 prepare_header_path(dst, &mut header, path)?;
513 header.set_metadata_in_mode(meta, mode);
514 if let Some(link_name) = link_name {
515 prepare_header_link(dst, &mut header, link_name)?;
516 }
517 header.set_cksum();
518 append(dst, &header, read)
519 }
520
append_dir_all( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, follow: bool, ) -> io::Result<()>521 fn append_dir_all(
522 dst: &mut dyn Write,
523 path: &Path,
524 src_path: &Path,
525 mode: HeaderMode,
526 follow: bool,
527 ) -> io::Result<()> {
528 let mut stack = vec![(src_path.to_path_buf(), true, false)];
529 while let Some((src, is_dir, is_symlink)) = stack.pop() {
530 let dest = path.join(src.strip_prefix(&src_path).unwrap());
531 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
532 if is_dir || (is_symlink && follow && src.is_dir()) {
533 for entry in fs::read_dir(&src)? {
534 let entry = entry?;
535 let file_type = entry.file_type()?;
536 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
537 }
538 if dest != Path::new("") {
539 append_dir(dst, &dest, &src, mode)?;
540 }
541 } else if !follow && is_symlink {
542 let stat = fs::symlink_metadata(&src)?;
543 let link_name = fs::read_link(&src)?;
544 append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?;
545 } else {
546 append_file(dst, &dest, &mut fs::File::open(src)?, mode)?;
547 }
548 }
549 Ok(())
550 }
551
552 impl<W: Write> Drop for Builder<W> {
drop(&mut self)553 fn drop(&mut self) {
554 let _ = self.finish();
555 }
556 }
557