1 /* Create a tar archive.
2 
3    Copyright 1985-2021 Free Software Foundation, Inc.
4 
5    This file is part of GNU tar.
6 
7    GNU tar is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    GNU tar is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20    Written by John Gilmore, on 1985-08-25.  */
21 
22 #include <system.h>
23 
24 #include <areadlink.h>
25 #include <quotearg.h>
26 
27 #include "common.h"
28 #include <hash.h>
29 
30 /* Error number to use when an impostor is discovered.
31    Pretend the impostor isn't there.  */
32 enum { IMPOSTOR_ERRNO = ENOENT };
33 
34 struct link
35   {
36     dev_t dev;
37     ino_t ino;
38     nlink_t nlink;
39     char name[1];
40   };
41 
42 struct exclusion_tag
43 {
44   const char *name;
45   size_t length;
46   enum exclusion_tag_type type;
47   bool (*predicate) (int fd);
48   struct exclusion_tag *next;
49 };
50 
51 static struct exclusion_tag *exclusion_tags;
52 
53 void
add_exclusion_tag(const char * name,enum exclusion_tag_type type,bool (* predicate)(int fd))54 add_exclusion_tag (const char *name, enum exclusion_tag_type type,
55 		   bool (*predicate) (int fd))
56 {
57   struct exclusion_tag *tag = xmalloc (sizeof tag[0]);
58   tag->next = exclusion_tags;
59   tag->name = name;
60   tag->type = type;
61   tag->predicate = predicate;
62   tag->length = strlen (name);
63   exclusion_tags = tag;
64 }
65 
66 void
exclusion_tag_warning(const char * dirname,const char * tagname,const char * message)67 exclusion_tag_warning (const char *dirname, const char *tagname,
68 		       const char *message)
69 {
70   if (verbose_option)
71     WARNOPT (WARN_CACHEDIR,
72 	     (0, 0,
73 	      _("%s: contains a cache directory tag %s; %s"),
74 	      quotearg_colon (dirname),
75 	      quotearg_n (1, tagname),
76 	      message));
77 }
78 
79 enum exclusion_tag_type
check_exclusion_tags(struct tar_stat_info const * st,char const ** tag_file_name)80 check_exclusion_tags (struct tar_stat_info const *st, char const **tag_file_name)
81 {
82   struct exclusion_tag *tag;
83 
84   for (tag = exclusion_tags; tag; tag = tag->next)
85     {
86       int tagfd = subfile_open (st, tag->name, open_read_flags);
87       if (0 <= tagfd)
88 	{
89 	  bool satisfied = !tag->predicate || tag->predicate (tagfd);
90 	  close (tagfd);
91 	  if (satisfied)
92 	    {
93 	      if (tag_file_name)
94 		*tag_file_name = tag->name;
95 	      return tag->type;
96 	    }
97 	}
98     }
99 
100   return exclusion_tag_none;
101 }
102 
103 /* Exclusion predicate to test if the named file (usually "CACHEDIR.TAG")
104    contains a valid header, as described at:
105 	http://www.brynosaurus.com/cachedir
106    Applications can write this file into directories they create
107    for use as caches containing purely regenerable, non-precious data,
108    allowing us to avoid archiving them if --exclude-caches is specified. */
109 
110 #define CACHEDIR_SIGNATURE "Signature: 8a477f597d28d172789f06886806bc55"
111 #define CACHEDIR_SIGNATURE_SIZE (sizeof CACHEDIR_SIGNATURE - 1)
112 
113 bool
cachedir_file_p(int fd)114 cachedir_file_p (int fd)
115 {
116   char tagbuf[CACHEDIR_SIGNATURE_SIZE];
117 
118   return
119     (read (fd, tagbuf, CACHEDIR_SIGNATURE_SIZE) == CACHEDIR_SIGNATURE_SIZE
120      && memcmp (tagbuf, CACHEDIR_SIGNATURE, CACHEDIR_SIGNATURE_SIZE) == 0);
121 }
122 
123 
124 /* The maximum uintmax_t value that can be represented with DIGITS digits,
125    assuming that each digit is BITS_PER_DIGIT wide.  */
126 #define MAX_VAL_WITH_DIGITS(digits, bits_per_digit) \
127    ((digits) * (bits_per_digit) < sizeof (uintmax_t) * CHAR_BIT \
128     ? ((uintmax_t) 1 << ((digits) * (bits_per_digit))) - 1 \
129     : (uintmax_t) -1)
130 
131 /* The maximum uintmax_t value that can be represented with octal
132    digits and a trailing NUL in BUFFER.  */
133 #define MAX_OCTAL_VAL(buffer) MAX_VAL_WITH_DIGITS (sizeof (buffer) - 1, LG_8)
134 
135 /* Convert VALUE to an octal representation suitable for tar headers.
136    Output to buffer WHERE with size SIZE.
137    The result is undefined if SIZE is 0 or if VALUE is too large to fit.  */
138 
139 static void
to_octal(uintmax_t value,char * where,size_t size)140 to_octal (uintmax_t value, char *where, size_t size)
141 {
142   uintmax_t v = value;
143   size_t i = size;
144 
145   do
146     {
147       where[--i] = '0' + (v & ((1 << LG_8) - 1));
148       v >>= LG_8;
149     }
150   while (i);
151 }
152 
153 /* Copy at most LEN bytes from the string SRC to DST.  Terminate with
154    NUL unless SRC is LEN or more bytes long.  */
155 
156 static void
tar_copy_str(char * dst,const char * src,size_t len)157 tar_copy_str (char *dst, const char *src, size_t len)
158 {
159   size_t i;
160   for (i = 0; i < len; i++)
161     if (! (dst[i] = src[i]))
162       break;
163 }
164 
165 /* Same as tar_copy_str, but always terminate with NUL if using
166    is OLDGNU format */
167 
168 static void
tar_name_copy_str(char * dst,const char * src,size_t len)169 tar_name_copy_str (char *dst, const char *src, size_t len)
170 {
171   tar_copy_str (dst, src, len);
172   if (archive_format == OLDGNU_FORMAT)
173     dst[len-1] = 0;
174 }
175 
176 /* Convert NEGATIVE VALUE to a base-256 representation suitable for
177    tar headers.  NEGATIVE is 1 if VALUE was negative before being cast
178    to uintmax_t, 0 otherwise.  Output to buffer WHERE with size SIZE.
179    The result is undefined if SIZE is 0 or if VALUE is too large to
180    fit.  */
181 
182 static void
to_base256(int negative,uintmax_t value,char * where,size_t size)183 to_base256 (int negative, uintmax_t value, char *where, size_t size)
184 {
185   uintmax_t v = value;
186   uintmax_t propagated_sign_bits =
187     ((uintmax_t) - negative << (CHAR_BIT * sizeof v - LG_256));
188   size_t i = size;
189 
190   do
191     {
192       where[--i] = v & ((1 << LG_256) - 1);
193       v = propagated_sign_bits | (v >> LG_256);
194     }
195   while (i);
196 }
197 
198 #define GID_TO_CHARS(val, where) gid_to_chars (val, where, sizeof (where))
199 #define MAJOR_TO_CHARS(val, where) major_to_chars (val, where, sizeof (where))
200 #define MINOR_TO_CHARS(val, where) minor_to_chars (val, where, sizeof (where))
201 #define MODE_TO_CHARS(val, where) mode_to_chars (val, where, sizeof (where))
202 #define UID_TO_CHARS(val, where) uid_to_chars (val, where, sizeof (where))
203 
204 #define UNAME_TO_CHARS(name, buf) string_to_chars (name, buf, sizeof (buf))
205 #define GNAME_TO_CHARS(name, buf) string_to_chars (name, buf, sizeof (buf))
206 
207 static bool
208 to_chars (int negative, uintmax_t value, size_t valsize,
209 	  uintmax_t (*substitute) (int *),
210 	  char *where, size_t size, const char *type);
211 
212 static bool
to_chars_subst(int negative,int gnu_format,uintmax_t value,size_t valsize,uintmax_t (* substitute)(int *),char * where,size_t size,const char * type)213 to_chars_subst (int negative, int gnu_format, uintmax_t value, size_t valsize,
214 		uintmax_t (*substitute) (int *),
215 		char *where, size_t size, const char *type)
216 {
217   uintmax_t maxval = (gnu_format
218 		      ? MAX_VAL_WITH_DIGITS (size - 1, LG_256)
219 		      : MAX_VAL_WITH_DIGITS (size - 1, LG_8));
220   char valbuf[UINTMAX_STRSIZE_BOUND + 1];
221   char maxbuf[UINTMAX_STRSIZE_BOUND];
222   char minbuf[UINTMAX_STRSIZE_BOUND + 1];
223   char const *minval_string;
224   char const *maxval_string = STRINGIFY_BIGINT (maxval, maxbuf);
225   char const *value_string;
226 
227   if (gnu_format)
228     {
229       uintmax_t m = maxval + 1 ? maxval + 1 : maxval / 2 + 1;
230       char *p = STRINGIFY_BIGINT (m, minbuf + 1);
231       *--p = '-';
232       minval_string = p;
233     }
234   else
235     minval_string = "0";
236 
237   if (negative)
238     {
239       char *p = STRINGIFY_BIGINT (- value, valbuf + 1);
240       *--p = '-';
241       value_string = p;
242     }
243   else
244     value_string = STRINGIFY_BIGINT (value, valbuf);
245 
246   if (substitute)
247     {
248       int negsub;
249       uintmax_t sub = substitute (&negsub) & maxval;
250       /* NOTE: This is one of the few places where GNU_FORMAT differs from
251 	 OLDGNU_FORMAT.  The actual differences are:
252 
253 	 1. In OLDGNU_FORMAT all strings in a tar header end in \0
254 	 2. Incremental archives use oldgnu_header.
255 
256 	 Apart from this they are completely identical. */
257       uintmax_t s = (negsub &= archive_format == GNU_FORMAT) ? - sub : sub;
258       char subbuf[UINTMAX_STRSIZE_BOUND + 1];
259       char *sub_string = STRINGIFY_BIGINT (s, subbuf + 1);
260       if (negsub)
261 	*--sub_string = '-';
262       WARN ((0, 0, _("value %s out of %s range %s..%s; substituting %s"),
263 	     value_string, type, minval_string, maxval_string,
264 	     sub_string));
265       return to_chars (negsub, s, valsize, 0, where, size, type);
266     }
267   else
268     ERROR ((0, 0, _("value %s out of %s range %s..%s"),
269 	    value_string, type, minval_string, maxval_string));
270   return false;
271 }
272 
273 /* Convert NEGATIVE VALUE (which was originally of size VALSIZE) to
274    external form, using SUBSTITUTE (...) if VALUE won't fit.  Output
275    to buffer WHERE with size SIZE.  NEGATIVE is 1 iff VALUE was
276    negative before being cast to uintmax_t; its original bitpattern
277    can be deduced from VALSIZE, its original size before casting.
278    TYPE is the kind of value being output (useful for diagnostics).
279    Prefer the POSIX format of SIZE - 1 octal digits (with leading zero
280    digits), followed by '\0'.  If this won't work, and if GNU or
281    OLDGNU format is allowed, use '\200' followed by base-256, or (if
282    NEGATIVE is nonzero) '\377' followed by two's complement base-256.
283    If neither format works, use SUBSTITUTE (...)  instead.  Pass to
284    SUBSTITUTE the address of an 0-or-1 flag recording whether the
285    substitute value is negative.  */
286 
287 static bool
to_chars(int negative,uintmax_t value,size_t valsize,uintmax_t (* substitute)(int *),char * where,size_t size,const char * type)288 to_chars (int negative, uintmax_t value, size_t valsize,
289 	  uintmax_t (*substitute) (int *),
290 	  char *where, size_t size, const char *type)
291 {
292   int gnu_format = (archive_format == GNU_FORMAT
293 		    || archive_format == OLDGNU_FORMAT);
294 
295   /* Generate the POSIX octal representation if the number fits.  */
296   if (! negative && value <= MAX_VAL_WITH_DIGITS (size - 1, LG_8))
297     {
298       where[size - 1] = '\0';
299       to_octal (value, where, size - 1);
300       return true;
301     }
302   else if (gnu_format)
303     {
304       /* Try to cope with the number by using traditional GNU format
305 	 methods */
306 
307       /* Generate the base-256 representation if the number fits.  */
308       if (((negative ? -1 - value : value)
309 	   <= MAX_VAL_WITH_DIGITS (size - 1, LG_256)))
310 	{
311 	  where[0] = negative ? -1 : 1 << (LG_256 - 1);
312 	  to_base256 (negative, value, where + 1, size - 1);
313 	  return true;
314 	}
315 
316       /* Otherwise, if the number is negative, and if it would not cause
317 	 ambiguity on this host by confusing positive with negative
318 	 values, then generate the POSIX octal representation of the value
319 	 modulo 2**(field bits).  The resulting tar file is
320 	 machine-dependent, since it depends on the host word size.  Yuck!
321 	 But this is the traditional behavior.  */
322       else if (negative && valsize * CHAR_BIT <= (size - 1) * LG_8)
323 	{
324 	  static int warned_once;
325 	  if (! warned_once)
326 	    {
327 	      warned_once = 1;
328 	      WARN ((0, 0, _("Generating negative octal headers")));
329 	    }
330 	  where[size - 1] = '\0';
331 	  to_octal (value & MAX_VAL_WITH_DIGITS (valsize * CHAR_BIT, 1),
332 		    where, size - 1);
333 	  return true;
334 	}
335       /* Otherwise fall back to substitution, if possible: */
336     }
337   else
338     substitute = NULL; /* No substitution for formats, other than GNU */
339 
340   return to_chars_subst (negative, gnu_format, value, valsize, substitute,
341 			 where, size, type);
342 }
343 
344 static uintmax_t
gid_substitute(int * negative)345 gid_substitute (int *negative)
346 {
347   gid_t r;
348 #ifdef GID_NOBODY
349   r = GID_NOBODY;
350 #else
351   static gid_t gid_nobody;
352   if (!gid_nobody && !gname_to_gid ("nobody", &gid_nobody))
353     gid_nobody = -2;
354   r = gid_nobody;
355 #endif
356   *negative = r < 0;
357   return r;
358 }
359 
360 static bool
gid_to_chars(gid_t v,char * p,size_t s)361 gid_to_chars (gid_t v, char *p, size_t s)
362 {
363   return to_chars (v < 0, (uintmax_t) v, sizeof v, gid_substitute, p, s, "gid_t");
364 }
365 
366 static bool
major_to_chars(major_t v,char * p,size_t s)367 major_to_chars (major_t v, char *p, size_t s)
368 {
369   return to_chars (v < 0, (uintmax_t) v, sizeof v, 0, p, s, "major_t");
370 }
371 
372 static bool
minor_to_chars(minor_t v,char * p,size_t s)373 minor_to_chars (minor_t v, char *p, size_t s)
374 {
375   return to_chars (v < 0, (uintmax_t) v, sizeof v, 0, p, s, "minor_t");
376 }
377 
378 static bool
mode_to_chars(mode_t v,char * p,size_t s)379 mode_to_chars (mode_t v, char *p, size_t s)
380 {
381   /* In the common case where the internal and external mode bits are the same,
382      and we are not using POSIX or GNU format,
383      propagate all unknown bits to the external mode.
384      This matches historical practice.
385      Otherwise, just copy the bits we know about.  */
386   int negative;
387   uintmax_t u;
388   if (S_ISUID == TSUID && S_ISGID == TSGID && S_ISVTX == TSVTX
389       && S_IRUSR == TUREAD && S_IWUSR == TUWRITE && S_IXUSR == TUEXEC
390       && S_IRGRP == TGREAD && S_IWGRP == TGWRITE && S_IXGRP == TGEXEC
391       && S_IROTH == TOREAD && S_IWOTH == TOWRITE && S_IXOTH == TOEXEC
392       && archive_format != POSIX_FORMAT
393       && archive_format != USTAR_FORMAT
394       && archive_format != GNU_FORMAT)
395     {
396       negative = v < 0;
397       u = v;
398     }
399   else
400     {
401       negative = 0;
402       u = ((v & S_ISUID ? TSUID : 0)
403 	   | (v & S_ISGID ? TSGID : 0)
404 	   | (v & S_ISVTX ? TSVTX : 0)
405 	   | (v & S_IRUSR ? TUREAD : 0)
406 	   | (v & S_IWUSR ? TUWRITE : 0)
407 	   | (v & S_IXUSR ? TUEXEC : 0)
408 	   | (v & S_IRGRP ? TGREAD : 0)
409 	   | (v & S_IWGRP ? TGWRITE : 0)
410 	   | (v & S_IXGRP ? TGEXEC : 0)
411 	   | (v & S_IROTH ? TOREAD : 0)
412 	   | (v & S_IWOTH ? TOWRITE : 0)
413 	   | (v & S_IXOTH ? TOEXEC : 0));
414     }
415   return to_chars (negative, u, sizeof v, 0, p, s, "mode_t");
416 }
417 
418 bool
off_to_chars(off_t v,char * p,size_t s)419 off_to_chars (off_t v, char *p, size_t s)
420 {
421   return to_chars (v < 0, (uintmax_t) v, sizeof v, 0, p, s, "off_t");
422 }
423 
424 bool
time_to_chars(time_t v,char * p,size_t s)425 time_to_chars (time_t v, char *p, size_t s)
426 {
427   return to_chars (v < 0, (uintmax_t) v, sizeof v, 0, p, s, "time_t");
428 }
429 
430 static uintmax_t
uid_substitute(int * negative)431 uid_substitute (int *negative)
432 {
433   uid_t r;
434 #ifdef UID_NOBODY
435   r = UID_NOBODY;
436 #else
437   static uid_t uid_nobody;
438   if (!uid_nobody && !uname_to_uid ("nobody", &uid_nobody))
439     uid_nobody = -2;
440   r = uid_nobody;
441 #endif
442   *negative = r < 0;
443   return r;
444 }
445 
446 static bool
uid_to_chars(uid_t v,char * p,size_t s)447 uid_to_chars (uid_t v, char *p, size_t s)
448 {
449   return to_chars (v < 0, (uintmax_t) v, sizeof v, uid_substitute, p, s, "uid_t");
450 }
451 
452 static bool
uintmax_to_chars(uintmax_t v,char * p,size_t s)453 uintmax_to_chars (uintmax_t v, char *p, size_t s)
454 {
455   return to_chars (0, v, sizeof v, 0, p, s, "uintmax_t");
456 }
457 
458 static void
string_to_chars(char const * str,char * p,size_t s)459 string_to_chars (char const *str, char *p, size_t s)
460 {
461   tar_copy_str (p, str, s);
462   p[s - 1] = '\0';
463 }
464 
465 
466 /* A directory is always considered dumpable.
467    Otherwise, only regular and contiguous files are considered dumpable.
468    Such a file is dumpable if it is sparse and both --sparse and --totals
469    are specified.
470    Otherwise, it is dumpable unless any of the following conditions occur:
471 
472    a) it is empty *and* world-readable, or
473    b) current archive is /dev/null */
474 
475 static bool
file_dumpable_p(struct stat const * st)476 file_dumpable_p (struct stat const *st)
477 {
478   if (S_ISDIR (st->st_mode))
479     return true;
480   if (! (S_ISREG (st->st_mode) || S_ISCTG (st->st_mode)))
481     return false;
482   if (dev_null_output)
483     return totals_option && sparse_option && ST_IS_SPARSE (*st);
484   return ! (st->st_size == 0 && (st->st_mode & MODE_R) == MODE_R);
485 }
486 
487 
488 /* Writing routines.  */
489 
490 /* Write the EOT block(s).  Zero at least two blocks, through the end
491    of the record.  Old tar, as previous versions of GNU tar, writes
492    garbage after two zeroed blocks.  */
493 void
write_eot(void)494 write_eot (void)
495 {
496   union block *pointer = find_next_block ();
497   memset (pointer->buffer, 0, BLOCKSIZE);
498   set_next_block_after (pointer);
499   pointer = find_next_block ();
500   memset (pointer->buffer, 0, available_space_after (pointer));
501   set_next_block_after (pointer);
502 }
503 
504 /* Write a "private" header */
505 union block *
start_private_header(const char * name,size_t size,time_t t)506 start_private_header (const char *name, size_t size, time_t t)
507 {
508   union block *header = find_next_block ();
509 
510   memset (header->buffer, 0, sizeof (union block));
511 
512   tar_name_copy_str (header->header.name, name, NAME_FIELD_SIZE);
513   OFF_TO_CHARS (size, header->header.size);
514 
515   TIME_TO_CHARS (t < 0 ? 0 : min (t, MAX_OCTAL_VAL (header->header.mtime)),
516 		 header->header.mtime);
517   MODE_TO_CHARS (S_IFREG|S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, header->header.mode);
518   UID_TO_CHARS (0, header->header.uid);
519   GID_TO_CHARS (0, header->header.gid);
520   memcpy (header->header.magic, TMAGIC, TMAGLEN);
521   memcpy (header->header.version, TVERSION, TVERSLEN);
522   return header;
523 }
524 
525 /* Create a new header and store there at most NAME_FIELD_SIZE bytes of
526    the file name */
527 
528 static union block *
write_short_name(struct tar_stat_info * st)529 write_short_name (struct tar_stat_info *st)
530 {
531   union block *header = find_next_block ();
532   memset (header->buffer, 0, sizeof (union block));
533   tar_name_copy_str (header->header.name, st->file_name, NAME_FIELD_SIZE);
534   return header;
535 }
536 
537 /* Write a GNUTYPE_LONGLINK or GNUTYPE_LONGNAME block.  */
538 static void
write_gnu_long_link(struct tar_stat_info * st,const char * p,char type)539 write_gnu_long_link (struct tar_stat_info *st, const char *p, char type)
540 {
541   size_t size = strlen (p) + 1;
542   size_t bufsize;
543   union block *header;
544 
545   header = start_private_header ("././@LongLink", size, 0);
546   if (! numeric_owner_option)
547     {
548       static char *uname, *gname;
549       if (!uname)
550 	{
551 	  uid_to_uname (0, &uname);
552 	  gid_to_gname (0, &gname);
553 	}
554       UNAME_TO_CHARS (uname, header->header.uname);
555       GNAME_TO_CHARS (gname, header->header.gname);
556     }
557 
558   strcpy (header->buffer + offsetof (struct posix_header, magic),
559 	  OLDGNU_MAGIC);
560   header->header.typeflag = type;
561   finish_header (st, header, -1);
562 
563   header = find_next_block ();
564 
565   bufsize = available_space_after (header);
566 
567   while (bufsize < size)
568     {
569       memcpy (header->buffer, p, bufsize);
570       p += bufsize;
571       size -= bufsize;
572       set_next_block_after (header + (bufsize - 1) / BLOCKSIZE);
573       header = find_next_block ();
574       bufsize = available_space_after (header);
575     }
576   memcpy (header->buffer, p, size);
577   memset (header->buffer + size, 0, bufsize - size);
578   set_next_block_after (header + (size - 1) / BLOCKSIZE);
579 }
580 
581 static size_t
split_long_name(const char * name,size_t length)582 split_long_name (const char *name, size_t length)
583 {
584   size_t i;
585 
586   if (length > PREFIX_FIELD_SIZE + 1)
587     length = PREFIX_FIELD_SIZE + 1;
588   else if (ISSLASH (name[length - 1]))
589     length--;
590   for (i = length - 1; i > 0; i--)
591     if (ISSLASH (name[i]))
592       break;
593   return i;
594 }
595 
596 static union block *
write_ustar_long_name(const char * name)597 write_ustar_long_name (const char *name)
598 {
599   size_t length = strlen (name);
600   size_t i, nlen;
601   union block *header;
602 
603   if (length > PREFIX_FIELD_SIZE + NAME_FIELD_SIZE + 1)
604     {
605       ERROR ((0, 0, _("%s: file name is too long (max %d); not dumped"),
606 	      quotearg_colon (name),
607 	      PREFIX_FIELD_SIZE + NAME_FIELD_SIZE + 1));
608       return NULL;
609     }
610 
611   i = split_long_name (name, length);
612   if (i == 0 || (nlen = length - i - 1) > NAME_FIELD_SIZE || nlen == 0)
613     {
614       ERROR ((0, 0,
615 	      _("%s: file name is too long (cannot be split); not dumped"),
616 	      quotearg_colon (name)));
617       return NULL;
618     }
619 
620   header = find_next_block ();
621   memset (header->buffer, 0, sizeof (header->buffer));
622   memcpy (header->header.prefix, name, i);
623   memcpy (header->header.name, name + i + 1, length - i - 1);
624 
625   return header;
626 }
627 
628 /* Write a long link name, depending on the current archive format */
629 static void
write_long_link(struct tar_stat_info * st)630 write_long_link (struct tar_stat_info *st)
631 {
632   switch (archive_format)
633     {
634     case POSIX_FORMAT:
635       xheader_store ("linkpath", st, NULL);
636       break;
637 
638     case V7_FORMAT:			/* old V7 tar format */
639     case USTAR_FORMAT:
640     case STAR_FORMAT:
641       ERROR ((0, 0,
642 	      _("%s: link name is too long; not dumped"),
643 	      quotearg_colon (st->link_name)));
644       break;
645 
646     case OLDGNU_FORMAT:
647     case GNU_FORMAT:
648       write_gnu_long_link (st, st->link_name, GNUTYPE_LONGLINK);
649       break;
650 
651     default:
652       abort(); /*FIXME*/
653     }
654 }
655 
656 static union block *
write_long_name(struct tar_stat_info * st)657 write_long_name (struct tar_stat_info *st)
658 {
659   switch (archive_format)
660     {
661     case POSIX_FORMAT:
662       xheader_store ("path", st, NULL);
663       break;
664 
665     case V7_FORMAT:
666       if (strlen (st->file_name) > NAME_FIELD_SIZE-1)
667 	{
668 	  ERROR ((0, 0, _("%s: file name is too long (max %d); not dumped"),
669 		  quotearg_colon (st->file_name),
670 		  NAME_FIELD_SIZE - 1));
671 	  return NULL;
672 	}
673       break;
674 
675     case USTAR_FORMAT:
676     case STAR_FORMAT:
677       return write_ustar_long_name (st->file_name);
678 
679     case OLDGNU_FORMAT:
680     case GNU_FORMAT:
681       write_gnu_long_link (st, st->file_name, GNUTYPE_LONGNAME);
682       break;
683 
684     default:
685       abort(); /*FIXME*/
686     }
687   return write_short_name (st);
688 }
689 
690 union block *
write_extended(bool global,struct tar_stat_info * st,union block * old_header)691 write_extended (bool global, struct tar_stat_info *st, union block *old_header)
692 {
693   union block *header, hp;
694   char *p;
695   int type;
696   time_t t;
697 
698   if (st->xhdr.buffer || st->xhdr.stk == NULL)
699     return old_header;
700 
701   xheader_finish (&st->xhdr);
702   memcpy (hp.buffer, old_header, sizeof (hp));
703   if (global)
704     {
705       type = XGLTYPE;
706       p = xheader_ghdr_name ();
707       t = start_time.tv_sec;
708     }
709   else
710     {
711       type = XHDTYPE;
712       p = xheader_xhdr_name (st);
713       t = set_mtime_option ? mtime_option.tv_sec : st->stat.st_mtime;
714     }
715   xheader_write (type, p, t, &st->xhdr);
716   free (p);
717   header = find_next_block ();
718   memcpy (header, &hp.buffer, sizeof (hp.buffer));
719   return header;
720 }
721 
722 static union block *
write_header_name(struct tar_stat_info * st)723 write_header_name (struct tar_stat_info *st)
724 {
725   if (archive_format == POSIX_FORMAT && !string_ascii_p (st->file_name))
726     {
727       xheader_store ("path", st, NULL);
728       return write_short_name (st);
729     }
730   else if (NAME_FIELD_SIZE - (archive_format == OLDGNU_FORMAT)
731 	   < strlen (st->file_name))
732     return write_long_name (st);
733   else
734     return write_short_name (st);
735 }
736 
737 
738 /* Header handling.  */
739 
740 /* Make a header block for the file whose stat info is st,
741    and return its address.  */
742 
743 union block *
start_header(struct tar_stat_info * st)744 start_header (struct tar_stat_info *st)
745 {
746   union block *header;
747   char const *uname = NULL;
748   char const *gname = NULL;
749 
750   header = write_header_name (st);
751   if (!header)
752     return NULL;
753 
754   /* Override some stat fields, if requested to do so.  */
755   owner_map_translate (st->stat.st_uid, &st->stat.st_uid, &uname);
756   group_map_translate (st->stat.st_gid, &st->stat.st_gid, &gname);
757 
758   if (mode_option)
759     st->stat.st_mode =
760       ((st->stat.st_mode & ~MODE_ALL)
761        | mode_adjust (st->stat.st_mode, S_ISDIR (st->stat.st_mode) != 0,
762 		      initial_umask, mode_option, NULL));
763 
764   /* Paul Eggert tried the trivial test ($WRITER cf a b; $READER tvf a)
765      for a few tars and came up with the following interoperability
766      matrix:
767 
768 	      WRITER
769 	1 2 3 4 5 6 7 8 9   READER
770 	. . . . . . . . .   1 = SunOS 4.2 tar
771 	# . . # # . . # #   2 = NEC SVR4.0.2 tar
772 	. . . # # . . # .   3 = Solaris 2.1 tar
773 	. . . . . . . . .   4 = GNU tar 1.11.1
774 	. . . . . . . . .   5 = HP-UX 8.07 tar
775 	. . . . . . . . .   6 = Ultrix 4.1
776 	. . . . . . . . .   7 = AIX 3.2
777 	. . . . . . . . .   8 = Hitachi HI-UX 1.03
778 	. . . . . . . . .   9 = Omron UNIOS-B 4.3BSD 1.60Beta
779 
780 	     . = works
781 	     # = "impossible file type"
782 
783      The following mask for old archive removes the '#'s in column 4
784      above, thus making GNU tar both a universal donor and a universal
785      acceptor for Paul's test.  */
786 
787   if (archive_format == V7_FORMAT || archive_format == USTAR_FORMAT)
788     MODE_TO_CHARS (st->stat.st_mode & MODE_ALL, header->header.mode);
789   else
790     MODE_TO_CHARS (st->stat.st_mode, header->header.mode);
791 
792   {
793     uid_t uid = st->stat.st_uid;
794     if (archive_format == POSIX_FORMAT
795 	&& MAX_OCTAL_VAL (header->header.uid) < uid)
796       {
797 	xheader_store ("uid", st, NULL);
798 	uid = 0;
799       }
800     if (!UID_TO_CHARS (uid, header->header.uid))
801       return NULL;
802   }
803 
804   {
805     gid_t gid = st->stat.st_gid;
806     if (archive_format == POSIX_FORMAT
807 	&& MAX_OCTAL_VAL (header->header.gid) < gid)
808       {
809 	xheader_store ("gid", st, NULL);
810 	gid = 0;
811       }
812     if (!GID_TO_CHARS (gid, header->header.gid))
813       return NULL;
814   }
815 
816   {
817     off_t size = st->stat.st_size;
818     if (archive_format == POSIX_FORMAT
819 	&& MAX_OCTAL_VAL (header->header.size) < size)
820       {
821 	xheader_store ("size", st, NULL);
822 	size = 0;
823       }
824     if (!OFF_TO_CHARS (size, header->header.size))
825       return NULL;
826   }
827 
828   {
829     struct timespec mtime;
830 
831     switch (set_mtime_option)
832       {
833       case USE_FILE_MTIME:
834 	mtime = st->mtime;
835 	break;
836 
837       case FORCE_MTIME:
838 	mtime = mtime_option;
839 	break;
840 
841       case CLAMP_MTIME:
842 	mtime = timespec_cmp (st->mtime, mtime_option) > 0
843 	           ? mtime_option : st->mtime;
844 	break;
845       }
846 
847     if (archive_format == POSIX_FORMAT)
848       {
849 	if (MAX_OCTAL_VAL (header->header.mtime) < mtime.tv_sec
850 	    || mtime.tv_nsec != 0)
851 	  xheader_store ("mtime", st, &mtime);
852 	if (MAX_OCTAL_VAL (header->header.mtime) < mtime.tv_sec)
853 	  mtime.tv_sec = 0;
854       }
855     if (!TIME_TO_CHARS (mtime.tv_sec, header->header.mtime))
856       return NULL;
857   }
858 
859   /* FIXME */
860   if (S_ISCHR (st->stat.st_mode)
861       || S_ISBLK (st->stat.st_mode))
862     {
863       major_t devmajor = major (st->stat.st_rdev);
864       minor_t devminor = minor (st->stat.st_rdev);
865 
866       if (archive_format == POSIX_FORMAT
867 	  && MAX_OCTAL_VAL (header->header.devmajor) < devmajor)
868 	{
869 	  xheader_store ("devmajor", st, NULL);
870 	  devmajor = 0;
871 	}
872       if (!MAJOR_TO_CHARS (devmajor, header->header.devmajor))
873 	return NULL;
874 
875       if (archive_format == POSIX_FORMAT
876 	  && MAX_OCTAL_VAL (header->header.devminor) < devminor)
877 	{
878 	  xheader_store ("devminor", st, NULL);
879 	  devminor = 0;
880 	}
881       if (!MINOR_TO_CHARS (devminor, header->header.devminor))
882 	return NULL;
883     }
884   else if (archive_format != GNU_FORMAT && archive_format != OLDGNU_FORMAT)
885     {
886       if (!(MAJOR_TO_CHARS (0, header->header.devmajor)
887 	    && MINOR_TO_CHARS (0, header->header.devminor)))
888 	return NULL;
889     }
890 
891   if (archive_format == POSIX_FORMAT)
892     {
893       xheader_store ("atime", st, NULL);
894       xheader_store ("ctime", st, NULL);
895     }
896   else if (incremental_option)
897     if (archive_format == OLDGNU_FORMAT || archive_format == GNU_FORMAT)
898       {
899 	TIME_TO_CHARS (st->atime.tv_sec, header->oldgnu_header.atime);
900 	TIME_TO_CHARS (st->ctime.tv_sec, header->oldgnu_header.ctime);
901       }
902 
903   header->header.typeflag = archive_format == V7_FORMAT ? AREGTYPE : REGTYPE;
904 
905   switch (archive_format)
906     {
907     case V7_FORMAT:
908       break;
909 
910     case OLDGNU_FORMAT:
911     case GNU_FORMAT:   /*FIXME?*/
912       /* Overwrite header->header.magic and header.version in one blow.  */
913       strcpy (header->buffer + offsetof (struct posix_header, magic),
914 	      OLDGNU_MAGIC);
915       break;
916 
917     case POSIX_FORMAT:
918     case USTAR_FORMAT:
919       memcpy (header->header.magic, TMAGIC, TMAGLEN);
920       memcpy (header->header.version, TVERSION, TVERSLEN);
921       break;
922 
923     default:
924       abort ();
925     }
926 
927   if (archive_format == V7_FORMAT || numeric_owner_option)
928     {
929       /* header->header.[ug]name are left as the empty string.  */
930     }
931   else
932     {
933       if (uname)
934 	st->uname = xstrdup (uname);
935       else
936 	uid_to_uname (st->stat.st_uid, &st->uname);
937 
938       if (gname)
939 	st->gname = xstrdup (gname);
940       else
941 	gid_to_gname (st->stat.st_gid, &st->gname);
942 
943       if (archive_format == POSIX_FORMAT
944 	  && (strlen (st->uname) > UNAME_FIELD_SIZE
945 	      || !string_ascii_p (st->uname)))
946 	xheader_store ("uname", st, NULL);
947       UNAME_TO_CHARS (st->uname, header->header.uname);
948 
949       if (archive_format == POSIX_FORMAT
950 	  && (strlen (st->gname) > GNAME_FIELD_SIZE
951 	      || !string_ascii_p (st->gname)))
952 	xheader_store ("gname", st, NULL);
953       GNAME_TO_CHARS (st->gname, header->header.gname);
954     }
955 
956   if (archive_format == POSIX_FORMAT)
957     {
958       if (acls_option > 0)
959         {
960           if (st->acls_a_ptr)
961             xheader_store ("SCHILY.acl.access", st, NULL);
962           if (st->acls_d_ptr)
963             xheader_store ("SCHILY.acl.default", st, NULL);
964         }
965       if ((selinux_context_option > 0) && st->cntx_name)
966         xheader_store ("RHT.security.selinux", st, NULL);
967       if (xattrs_option > 0)
968         {
969           size_t scan_xattr = 0;
970           struct xattr_array *xattr_map = st->xattr_map;
971 
972           while (scan_xattr < st->xattr_map_size)
973             {
974               xheader_store (xattr_map[scan_xattr].xkey, st, &scan_xattr);
975               ++scan_xattr;
976             }
977         }
978     }
979 
980   return header;
981 }
982 
983 void
simple_finish_header(union block * header)984 simple_finish_header (union block *header)
985 {
986   size_t i;
987   int sum;
988   char *p;
989 
990   memcpy (header->header.chksum, CHKBLANKS, sizeof header->header.chksum);
991 
992   sum = 0;
993   p = header->buffer;
994   for (i = sizeof *header; i-- != 0; )
995     /* We can't use unsigned char here because of old compilers, e.g. V7.  */
996     sum += 0xFF & *p++;
997 
998   /* Fill in the checksum field.  It's formatted differently from the
999      other fields: it has [6] digits, a null, then a space -- rather than
1000      digits, then a null.  We use to_chars.
1001      The final space is already there, from
1002      checksumming, and to_chars doesn't modify it.
1003 
1004      This is a fast way to do:
1005 
1006      sprintf(header->header.chksum, "%6o", sum);  */
1007 
1008   uintmax_to_chars ((uintmax_t) sum, header->header.chksum, 7);
1009 
1010   set_next_block_after (header);
1011 }
1012 
1013 /* Finish off a filled-in header block and write it out.  We also
1014    print the file name and/or full info if verbose is on.  If BLOCK_ORDINAL
1015    is not negative, is the block ordinal of the first record for this
1016    file, which may be a preceding long name or long link record.  */
1017 void
finish_header(struct tar_stat_info * st,union block * header,off_t block_ordinal)1018 finish_header (struct tar_stat_info *st,
1019 	       union block *header, off_t block_ordinal)
1020 {
1021   /* Note: It is important to do this before the call to write_extended(),
1022      so that the actual ustar header is printed */
1023   if (verbose_option
1024       && header->header.typeflag != GNUTYPE_LONGLINK
1025       && header->header.typeflag != GNUTYPE_LONGNAME
1026       && header->header.typeflag != XHDTYPE
1027       && header->header.typeflag != XGLTYPE)
1028     {
1029       /* FIXME: This global is used in print_header, sigh.  */
1030       current_format = archive_format;
1031       print_header (st, header, block_ordinal);
1032     }
1033 
1034   header = write_extended (false, st, header);
1035   simple_finish_header (header);
1036 }
1037 
1038 
1039 void
pad_archive(off_t size_left)1040 pad_archive (off_t size_left)
1041 {
1042   union block *blk;
1043   while (size_left > 0)
1044     {
1045       blk = find_next_block ();
1046       memset (blk->buffer, 0, BLOCKSIZE);
1047       set_next_block_after (blk);
1048       size_left -= BLOCKSIZE;
1049     }
1050 }
1051 
1052 static enum dump_status
dump_regular_file(int fd,struct tar_stat_info * st)1053 dump_regular_file (int fd, struct tar_stat_info *st)
1054 {
1055   off_t size_left = st->stat.st_size;
1056   off_t block_ordinal;
1057   union block *blk;
1058 
1059   block_ordinal = current_block_ordinal ();
1060   blk = start_header (st);
1061   if (!blk)
1062     return dump_status_fail;
1063 
1064   /* Mark contiguous files, if we support them.  */
1065   if (archive_format != V7_FORMAT && S_ISCTG (st->stat.st_mode))
1066     blk->header.typeflag = CONTTYPE;
1067 
1068   finish_header (st, blk, block_ordinal);
1069 
1070   mv_begin_write (st->file_name, st->stat.st_size, st->stat.st_size);
1071   while (size_left > 0)
1072     {
1073       size_t bufsize, count;
1074 
1075       blk = find_next_block ();
1076 
1077       bufsize = available_space_after (blk);
1078 
1079       if (size_left < bufsize)
1080 	{
1081 	  /* Last read -- zero out area beyond.  */
1082 	  bufsize = size_left;
1083 	  count = bufsize % BLOCKSIZE;
1084 	  if (count)
1085 	    memset (blk->buffer + size_left, 0, BLOCKSIZE - count);
1086 	}
1087 
1088       count = (fd <= 0) ? bufsize : blocking_read (fd, blk->buffer, bufsize);
1089       if (count == SAFE_READ_ERROR)
1090 	{
1091 	  read_diag_details (st->orig_file_name,
1092 	                     st->stat.st_size - size_left, bufsize);
1093 	  pad_archive (size_left);
1094 	  return dump_status_short;
1095 	}
1096       size_left -= count;
1097       set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE);
1098 
1099       if (count != bufsize)
1100 	{
1101 	  char buf[UINTMAX_STRSIZE_BOUND];
1102 	  memset (blk->buffer + count, 0, bufsize - count);
1103 	  WARNOPT (WARN_FILE_SHRANK,
1104 		   (0, 0,
1105 		    ngettext ("%s: File shrank by %s byte; padding with zeros",
1106 			      "%s: File shrank by %s bytes; padding with zeros",
1107 			      size_left),
1108 		    quotearg_colon (st->orig_file_name),
1109 		    STRINGIFY_BIGINT (size_left, buf)));
1110 	  if (! ignore_failed_read_option)
1111 	    set_exit_status (TAREXIT_DIFFERS);
1112 	  pad_archive (size_left - (bufsize - count));
1113 	  return dump_status_short;
1114 	}
1115     }
1116   return dump_status_ok;
1117 }
1118 
1119 
1120 /* Copy info from the directory identified by ST into the archive.
1121    DIRECTORY contains the directory's entries.  */
1122 
1123 static void
dump_dir0(struct tar_stat_info * st,char const * directory)1124 dump_dir0 (struct tar_stat_info *st, char const *directory)
1125 {
1126   bool top_level = ! st->parent;
1127   const char *tag_file_name;
1128   union block *blk = NULL;
1129   off_t block_ordinal = current_block_ordinal ();
1130 
1131   st->stat.st_size = 0;	/* force 0 size on dir */
1132 
1133   blk = start_header (st);
1134   if (!blk)
1135     return;
1136 
1137   info_attach_exclist (st);
1138 
1139   if (incremental_option && archive_format != POSIX_FORMAT)
1140     blk->header.typeflag = GNUTYPE_DUMPDIR;
1141   else /* if (standard_option) */
1142     blk->header.typeflag = DIRTYPE;
1143 
1144   /* If we're gnudumping, we aren't done yet so don't close it.  */
1145 
1146   if (!incremental_option)
1147     finish_header (st, blk, block_ordinal);
1148   else if (gnu_list_name->directory)
1149     {
1150       if (archive_format == POSIX_FORMAT)
1151 	{
1152 	  xheader_store ("GNU.dumpdir", st,
1153 			 safe_directory_contents (gnu_list_name->directory));
1154 	  finish_header (st, blk, block_ordinal);
1155 	}
1156       else
1157 	{
1158 	  off_t size_left;
1159 	  off_t totsize;
1160 	  size_t bufsize;
1161 	  ssize_t count;
1162 	  const char *buffer, *p_buffer;
1163 
1164 	  block_ordinal = current_block_ordinal ();
1165 	  buffer = safe_directory_contents (gnu_list_name->directory);
1166 	  totsize = dumpdir_size (buffer);
1167 	  OFF_TO_CHARS (totsize, blk->header.size);
1168 	  finish_header (st, blk, block_ordinal);
1169 	  p_buffer = buffer;
1170 	  size_left = totsize;
1171 
1172 	  mv_begin_write (st->file_name, totsize, totsize);
1173 	  while (size_left > 0)
1174 	    {
1175 	      blk = find_next_block ();
1176 	      bufsize = available_space_after (blk);
1177 	      if (size_left < bufsize)
1178 		{
1179 		  bufsize = size_left;
1180 		  count = bufsize % BLOCKSIZE;
1181 		  if (count)
1182 		    memset (blk->buffer + size_left, 0, BLOCKSIZE - count);
1183 		}
1184 	      memcpy (blk->buffer, p_buffer, bufsize);
1185 	      size_left -= bufsize;
1186 	      p_buffer += bufsize;
1187 	      set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE);
1188 	    }
1189 	}
1190       return;
1191     }
1192 
1193   if (!recursion_option)
1194     return;
1195 
1196   if (one_file_system_option
1197       && !top_level
1198       && st->parent->stat.st_dev != st->stat.st_dev)
1199     {
1200       if (verbose_option)
1201 	WARNOPT (WARN_XDEV,
1202 		 (0, 0,
1203 		  _("%s: file is on a different filesystem; not dumped"),
1204 		  quotearg_colon (st->orig_file_name)));
1205     }
1206   else
1207     {
1208       char *name_buf;
1209       size_t name_size;
1210 
1211       switch (check_exclusion_tags (st, &tag_file_name))
1212 	{
1213 	case exclusion_tag_all:
1214 	  /* Handled in dump_file0 */
1215 	  break;
1216 
1217 	case exclusion_tag_none:
1218 	  {
1219 	    char const *entry;
1220 	    size_t entry_len;
1221 	    size_t name_len;
1222 
1223 	    name_buf = xstrdup (st->orig_file_name);
1224 	    name_size = name_len = strlen (name_buf);
1225 
1226 	    /* Now output all the files in the directory.  */
1227 	    for (entry = directory; (entry_len = strlen (entry)) != 0;
1228 		 entry += entry_len + 1)
1229 	      {
1230 		if (name_size < name_len + entry_len)
1231 		  {
1232 		    name_size = name_len + entry_len;
1233 		    name_buf = xrealloc (name_buf, name_size + 1);
1234 		  }
1235 		strcpy (name_buf + name_len, entry);
1236 		if (!excluded_name (name_buf, st))
1237 		  dump_file (st, entry, name_buf);
1238 	      }
1239 
1240 	    free (name_buf);
1241 	  }
1242 	  break;
1243 
1244 	case exclusion_tag_contents:
1245 	  exclusion_tag_warning (st->orig_file_name, tag_file_name,
1246 				 _("contents not dumped"));
1247 	  name_size = strlen (st->orig_file_name) + strlen (tag_file_name) + 1;
1248 	  name_buf = xmalloc (name_size);
1249 	  strcpy (name_buf, st->orig_file_name);
1250 	  strcat (name_buf, tag_file_name);
1251 	  dump_file (st, tag_file_name, name_buf);
1252 	  free (name_buf);
1253 	  break;
1254 
1255 	case exclusion_tag_under:
1256 	  exclusion_tag_warning (st->orig_file_name, tag_file_name,
1257 				 _("contents not dumped"));
1258 	  break;
1259 	}
1260     }
1261 }
1262 
1263 /* Ensure exactly one trailing slash.  */
1264 static void
ensure_slash(char ** pstr)1265 ensure_slash (char **pstr)
1266 {
1267   size_t len = strlen (*pstr);
1268   while (len >= 1 && ISSLASH ((*pstr)[len - 1]))
1269     len--;
1270   if (!ISSLASH ((*pstr)[len]))
1271     *pstr = xrealloc (*pstr, len + 2);
1272   (*pstr)[len++] = '/';
1273   (*pstr)[len] = '\0';
1274 }
1275 
1276 /* If we just ran out of file descriptors, release a file descriptor
1277    in the directory chain somewhere leading from DIR->parent->parent
1278    up through the root.  Return true if successful, false (preserving
1279    errno == EMFILE) otherwise.
1280 
1281    Do not release DIR's file descriptor, or DIR's parent, as other
1282    code assumes that they work.  On some operating systems, another
1283    process can claim file descriptor resources as we release them, and
1284    some calls or their emulations require multiple file descriptors,
1285    so callers should not give up if a single release doesn't work.  */
1286 
1287 static bool
open_failure_recover(struct tar_stat_info const * dir)1288 open_failure_recover (struct tar_stat_info const *dir)
1289 {
1290   if (errno == EMFILE && dir && dir->parent)
1291     {
1292       struct tar_stat_info *p;
1293       for (p = dir->parent->parent; p; p = p->parent)
1294 	if (0 < p->fd && (! p->parent || p->parent->fd <= 0))
1295 	  {
1296 	    tar_stat_close (p);
1297 	    return true;
1298 	  }
1299       errno = EMFILE;
1300     }
1301 
1302   return false;
1303 }
1304 
1305 /* Return the directory entries of ST, in a dynamically allocated buffer,
1306    each entry followed by '\0' and the last followed by an extra '\0'.
1307    Return null on failure, setting errno.  */
1308 char *
get_directory_entries(struct tar_stat_info * st)1309 get_directory_entries (struct tar_stat_info *st)
1310 {
1311   while (! (st->dirstream = fdopendir (st->fd)))
1312     if (! open_failure_recover (st))
1313       return 0;
1314   return streamsavedir (st->dirstream, savedir_sort_order);
1315 }
1316 
1317 /* Dump the directory ST.  Return true if successful, false (emitting
1318    diagnostics) otherwise.  Get ST's entries, recurse through its
1319    subdirectories, and clean up file descriptors afterwards.  */
1320 static bool
dump_dir(struct tar_stat_info * st)1321 dump_dir (struct tar_stat_info *st)
1322 {
1323   char *directory = get_directory_entries (st);
1324   if (! directory)
1325     {
1326       savedir_diag (st->orig_file_name);
1327       return false;
1328     }
1329 
1330   dump_dir0 (st, directory);
1331 
1332   restore_parent_fd (st);
1333   free (directory);
1334   return true;
1335 }
1336 
1337 
1338 /* Number of links a file can have without having to be entered into
1339    the link table.  Typically this is 1, but in trickier circumstances
1340    it is 0.  */
1341 static nlink_t trivial_link_count;
1342 
1343 
1344 /* Main functions of this module.  */
1345 
1346 void
create_archive(void)1347 create_archive (void)
1348 {
1349   struct name const *p;
1350 
1351   trivial_link_count = filename_args != FILES_MANY && ! dereference_option;
1352 
1353   open_archive (ACCESS_WRITE);
1354   buffer_write_global_xheader ();
1355 
1356   if (incremental_option)
1357     {
1358       size_t buffer_size = 0;
1359       char *buffer = NULL;
1360       const char *q;
1361 
1362       collect_and_sort_names ();
1363 
1364       while ((p = name_from_list ()) != NULL)
1365 	if (!excluded_name (p->name, NULL))
1366 	  dump_file (0, p->name, p->name);
1367 
1368       blank_name_list ();
1369       while ((p = name_from_list ()) != NULL)
1370 	if (!excluded_name (p->name, NULL))
1371 	  {
1372 	    struct tar_stat_info st;
1373 	    size_t plen = strlen (p->name);
1374 	    while (buffer_size <= plen)
1375 	      buffer = x2realloc (buffer, &buffer_size);
1376 	    memcpy (buffer, p->name, plen);
1377 	    if (! ISSLASH (buffer[plen - 1]))
1378 	      buffer[plen++] = DIRECTORY_SEPARATOR;
1379 	    tar_stat_init (&st);
1380 	    q = directory_contents (p->directory);
1381 	    if (q)
1382 	      while (*q)
1383 		{
1384 		  size_t qlen = strlen (q);
1385 		  if (*q == 'Y')
1386 		    {
1387 		      if (! st.orig_file_name)
1388 			{
1389 			  int fd = openat (chdir_fd, p->name,
1390 					   open_searchdir_flags);
1391 			  if (fd < 0)
1392 			    {
1393 			      file_removed_diag (p->name, !p->parent,
1394 						 open_diag);
1395 			      break;
1396 			    }
1397 			  st.fd = fd;
1398 			  if (fstat (fd, &st.stat) != 0)
1399 			    {
1400 			      file_removed_diag (p->name, !p->parent,
1401 						 stat_diag);
1402 			      break;
1403 			    }
1404 			  st.orig_file_name = xstrdup (p->name);
1405 			}
1406 		      while (buffer_size < plen + qlen)
1407 			buffer = x2realloc (buffer, &buffer_size);
1408 		      strcpy (buffer + plen, q + 1);
1409 		      dump_file (&st, q + 1, buffer);
1410 		    }
1411 		  q += qlen + 1;
1412 		}
1413 	    tar_stat_destroy (&st);
1414 	  }
1415       free (buffer);
1416     }
1417   else
1418     {
1419       const char *name;
1420       while ((name = name_next (1)) != NULL)
1421 	if (!excluded_name (name, NULL))
1422 	  dump_file (0, name, name);
1423     }
1424 
1425   write_eot ();
1426   close_archive ();
1427   finish_deferred_unlinks ();
1428   if (listed_incremental_option)
1429     write_directory_file ();
1430 }
1431 
1432 
1433 /* Calculate the hash of a link.  */
1434 static size_t
hash_link(void const * entry,size_t n_buckets)1435 hash_link (void const *entry, size_t n_buckets)
1436 {
1437   struct link const *l = entry;
1438   uintmax_t num = l->dev ^ l->ino;
1439   return num % n_buckets;
1440 }
1441 
1442 /* Compare two links for equality.  */
1443 static bool
compare_links(void const * entry1,void const * entry2)1444 compare_links (void const *entry1, void const *entry2)
1445 {
1446   struct link const *link1 = entry1;
1447   struct link const *link2 = entry2;
1448   return ((link1->dev ^ link2->dev) | (link1->ino ^ link2->ino)) == 0;
1449 }
1450 
1451 static void
unknown_file_error(char const * p)1452 unknown_file_error (char const *p)
1453 {
1454   WARNOPT (WARN_FILE_IGNORED,
1455 	   (0, 0, _("%s: Unknown file type; file ignored"),
1456 	    quotearg_colon (p)));
1457   if (!ignore_failed_read_option)
1458     set_exit_status (TAREXIT_FAILURE);
1459 }
1460 
1461 
1462 /* Handling of hard links */
1463 
1464 /* Table of all non-directories that we've written so far.  Any time
1465    we see another, we check the table and avoid dumping the data
1466    again if we've done it once already.  */
1467 static Hash_table *link_table;
1468 
1469 /* Try to dump stat as a hard link to another file in the archive.
1470    Return true if successful.  */
1471 static bool
dump_hard_link(struct tar_stat_info * st)1472 dump_hard_link (struct tar_stat_info *st)
1473 {
1474   if (link_table
1475       && (trivial_link_count < st->stat.st_nlink || remove_files_option))
1476     {
1477       struct link lp;
1478       struct link *duplicate;
1479       off_t block_ordinal;
1480       union block *blk;
1481 
1482       lp.ino = st->stat.st_ino;
1483       lp.dev = st->stat.st_dev;
1484 
1485       if ((duplicate = hash_lookup (link_table, &lp)))
1486 	{
1487 	  /* We found a link.  */
1488 	  char const *link_name = safer_name_suffix (duplicate->name, true,
1489 	                                             absolute_names_option);
1490 	  if (duplicate->nlink)
1491 	    duplicate->nlink--;
1492 
1493 	  block_ordinal = current_block_ordinal ();
1494 	  assign_string (&st->link_name, link_name);
1495 	  if (NAME_FIELD_SIZE - (archive_format == OLDGNU_FORMAT)
1496 	      < strlen (link_name))
1497 	    write_long_link (st);
1498 
1499 	  st->stat.st_size = 0;
1500 	  blk = start_header (st);
1501 	  if (!blk)
1502 	    return false;
1503 	  tar_copy_str (blk->header.linkname, link_name, NAME_FIELD_SIZE);
1504 
1505 	  blk->header.typeflag = LNKTYPE;
1506 	  finish_header (st, blk, block_ordinal);
1507 
1508 	  if (remove_files_option)
1509 	    queue_deferred_unlink (st->orig_file_name, false);
1510 
1511 	  return true;
1512 	}
1513     }
1514   return false;
1515 }
1516 
1517 static void
file_count_links(struct tar_stat_info * st)1518 file_count_links (struct tar_stat_info *st)
1519 {
1520   if (hard_dereference_option)
1521     return;
1522   if (trivial_link_count < st->stat.st_nlink)
1523     {
1524       struct link *duplicate;
1525       char *linkname = NULL;
1526       struct link *lp;
1527 
1528       assign_string (&linkname, safer_name_suffix (st->orig_file_name, true,
1529 						   absolute_names_option));
1530       transform_name (&linkname, XFORM_LINK);
1531 
1532       lp = xmalloc (offsetof (struct link, name)
1533 				 + strlen (linkname) + 1);
1534       lp->ino = st->stat.st_ino;
1535       lp->dev = st->stat.st_dev;
1536       lp->nlink = st->stat.st_nlink;
1537       strcpy (lp->name, linkname);
1538       free (linkname);
1539 
1540       if (! ((link_table
1541 	      || (link_table = hash_initialize (0, 0, hash_link,
1542 						compare_links, 0)))
1543 	     && (duplicate = hash_insert (link_table, lp))))
1544 	xalloc_die ();
1545 
1546       if (duplicate != lp)
1547 	abort ();
1548       lp->nlink--;
1549     }
1550 }
1551 
1552 /* For each dumped file, check if all its links were dumped. Emit
1553    warnings if it is not so. */
1554 void
check_links(void)1555 check_links (void)
1556 {
1557   struct link *lp;
1558 
1559   if (!link_table)
1560     return;
1561 
1562   for (lp = hash_get_first (link_table); lp;
1563        lp = hash_get_next (link_table, lp))
1564     {
1565       if (lp->nlink)
1566 	{
1567 	  WARN ((0, 0, _("Missing links to %s."), quote (lp->name)));
1568 	}
1569     }
1570 }
1571 
1572 /* Assuming DIR is the working directory, open FILE, using FLAGS to
1573    control the open.  A null DIR means to use ".".  If we are low on
1574    file descriptors, try to release one or more from DIR's parents to
1575    reuse it.  */
1576 int
subfile_open(struct tar_stat_info const * dir,char const * file,int flags)1577 subfile_open (struct tar_stat_info const *dir, char const *file, int flags)
1578 {
1579   int fd;
1580 
1581   static bool initialized;
1582   if (! initialized)
1583     {
1584       /* Initialize any tables that might be needed when file
1585 	 descriptors are exhausted, and whose initialization might
1586 	 require a file descriptor.  This includes the system message
1587 	 catalog and tar's message catalog.  */
1588       initialized = true;
1589       strerror (ENOENT);
1590       gettext ("");
1591     }
1592 
1593   while ((fd = openat (dir ? dir->fd : chdir_fd, file, flags)) < 0
1594 	 && open_failure_recover (dir))
1595     continue;
1596   return fd;
1597 }
1598 
1599 /* Restore the file descriptor for ST->parent, if it was temporarily
1600    closed to conserve file descriptors.  On failure, set the file
1601    descriptor to the negative of the corresponding errno value.  Call
1602    this every time a subdirectory is ascended from.  */
1603 void
restore_parent_fd(struct tar_stat_info const * st)1604 restore_parent_fd (struct tar_stat_info const *st)
1605 {
1606   struct tar_stat_info *parent = st->parent;
1607   if (parent && ! parent->fd)
1608     {
1609       int parentfd = openat (st->fd, "..", open_searchdir_flags);
1610       struct stat parentstat;
1611 
1612       if (parentfd < 0)
1613 	parentfd = - errno;
1614       else if (! (fstat (parentfd, &parentstat) == 0
1615 		  && parent->stat.st_ino == parentstat.st_ino
1616 		  && parent->stat.st_dev == parentstat.st_dev))
1617 	{
1618 	  close (parentfd);
1619 	  parentfd = IMPOSTOR_ERRNO;
1620 	}
1621 
1622       if (parentfd < 0)
1623 	{
1624 	  int origfd = openat (chdir_fd, parent->orig_file_name,
1625 			       open_searchdir_flags);
1626 	  if (0 <= origfd)
1627 	    {
1628 	      if (fstat (parentfd, &parentstat) == 0
1629 		  && parent->stat.st_ino == parentstat.st_ino
1630 		  && parent->stat.st_dev == parentstat.st_dev)
1631 		parentfd = origfd;
1632 	      else
1633 		close (origfd);
1634 	    }
1635 	}
1636 
1637       parent->fd = parentfd;
1638     }
1639 }
1640 
1641 /* Dump a single file, recursing on directories.  ST is the file's
1642    status info, NAME its name relative to the parent directory, and P
1643    its full name (which may be relative to the working directory).  */
1644 
1645 /* FIXME: One should make sure that for *every* path leading to setting
1646    exit_status to failure, a clear diagnostic has been issued.  */
1647 
1648 static void
dump_file0(struct tar_stat_info * st,char const * name,char const * p)1649 dump_file0 (struct tar_stat_info *st, char const *name, char const *p)
1650 {
1651   union block *header;
1652   char type;
1653   off_t original_size;
1654   struct timespec original_ctime;
1655   off_t block_ordinal = -1;
1656   int fd = 0;
1657   bool is_dir;
1658   struct tar_stat_info const *parent = st->parent;
1659   bool top_level = ! parent;
1660   int parentfd = top_level ? chdir_fd : parent->fd;
1661   void (*diag) (char const *) = 0;
1662 
1663   if (interactive_option && !confirm ("add", p))
1664     return;
1665 
1666   assign_string (&st->orig_file_name, p);
1667   assign_string (&st->file_name,
1668                  safer_name_suffix (p, false, absolute_names_option));
1669 
1670   transform_name (&st->file_name, XFORM_REGFILE);
1671 
1672   if (parentfd < 0 && ! top_level)
1673     {
1674       errno = - parentfd;
1675       diag = open_diag;
1676     }
1677   else if (fstatat (parentfd, name, &st->stat, fstatat_flags) != 0)
1678     diag = stat_diag;
1679   else if (file_dumpable_p (&st->stat))
1680     {
1681       fd = subfile_open (parent, name, open_read_flags);
1682       if (fd < 0)
1683 	diag = open_diag;
1684       else
1685 	{
1686 	  st->fd = fd;
1687 	  if (fstat (fd, &st->stat) != 0)
1688 	    diag = stat_diag;
1689 	}
1690     }
1691   if (diag)
1692     {
1693       file_removed_diag (p, top_level, diag);
1694       return;
1695     }
1696 
1697   st->archive_file_size = original_size = st->stat.st_size;
1698   st->atime = get_stat_atime (&st->stat);
1699   st->mtime = get_stat_mtime (&st->stat);
1700   st->ctime = original_ctime = get_stat_ctime (&st->stat);
1701 
1702 #ifdef S_ISHIDDEN
1703   if (S_ISHIDDEN (st->stat.st_mode))
1704     {
1705       char *new = (char *) alloca (strlen (p) + 2);
1706       if (new)
1707 	{
1708 	  strcpy (new, p);
1709 	  strcat (new, "@");
1710 	  p = new;
1711 	}
1712     }
1713 #endif
1714 
1715   /* See if we want only new files, and check if this one is too old to
1716      put in the archive.
1717 
1718      This check is omitted if incremental_option is set *and* the
1719      requested file is not explicitly listed in the command line.  */
1720 
1721   if (! (incremental_option && ! top_level)
1722       && !S_ISDIR (st->stat.st_mode)
1723       && OLDER_TAR_STAT_TIME (*st, m)
1724       && (!after_date_option || OLDER_TAR_STAT_TIME (*st, c)))
1725     {
1726       if (!incremental_option && verbose_option)
1727 	WARNOPT (WARN_FILE_UNCHANGED,
1728 		 (0, 0, _("%s: file is unchanged; not dumped"),
1729 		  quotearg_colon (p)));
1730       return;
1731     }
1732 
1733   /* See if we are trying to dump the archive.  */
1734   if (sys_file_is_archive (st))
1735     {
1736       WARNOPT (WARN_IGNORE_ARCHIVE,
1737 	       (0, 0, _("%s: file is the archive; not dumped"),
1738 		quotearg_colon (p)));
1739       return;
1740     }
1741 
1742   is_dir = S_ISDIR (st->stat.st_mode) != 0;
1743 
1744   if (!is_dir && dump_hard_link (st))
1745     return;
1746 
1747   if (is_dir || S_ISREG (st->stat.st_mode) || S_ISCTG (st->stat.st_mode))
1748     {
1749       bool ok;
1750       struct stat final_stat;
1751 
1752       xattrs_acls_get (parentfd, name, st, 0, !is_dir);
1753       xattrs_selinux_get (parentfd, name, st, fd);
1754       xattrs_xattrs_get (parentfd, name, st, fd);
1755 
1756       if (is_dir)
1757 	{
1758 	  const char *tag_file_name;
1759 	  ensure_slash (&st->orig_file_name);
1760 	  ensure_slash (&st->file_name);
1761 
1762 	  if (check_exclusion_tags (st, &tag_file_name) == exclusion_tag_all)
1763 	    {
1764 	      exclusion_tag_warning (st->orig_file_name, tag_file_name,
1765 				     _("directory not dumped"));
1766 	      return;
1767 	    }
1768 
1769 	  ok = dump_dir (st);
1770 
1771 	  fd = st->fd;
1772 	  parentfd = top_level ? chdir_fd : parent->fd;
1773 	}
1774       else
1775 	{
1776 	  enum dump_status status;
1777 
1778 	  if (fd && sparse_option && ST_IS_SPARSE (st->stat))
1779 	    {
1780 	      status = sparse_dump_file (fd, st);
1781 	      if (status == dump_status_not_implemented)
1782 		status = dump_regular_file (fd, st);
1783 	    }
1784 	  else
1785 	    status = dump_regular_file (fd, st);
1786 
1787 	  switch (status)
1788 	    {
1789 	    case dump_status_ok:
1790 	    case dump_status_short:
1791 	      file_count_links (st);
1792 	      break;
1793 
1794 	    case dump_status_fail:
1795 	      break;
1796 
1797 	    case dump_status_not_implemented:
1798 	      abort ();
1799 	    }
1800 
1801 	  ok = status == dump_status_ok;
1802 	}
1803 
1804       if (ok)
1805 	{
1806 	  if (fd < 0)
1807 	    {
1808 	      errno = - fd;
1809 	      ok = false;
1810 	    }
1811 	  else if (fd == 0)
1812 	    {
1813 	      if (parentfd < 0 && ! top_level)
1814 		{
1815 		  errno = - parentfd;
1816 		  ok = false;
1817 		}
1818 	      else
1819 		ok = fstatat (parentfd, name, &final_stat, fstatat_flags) == 0;
1820 	    }
1821 	  else
1822 	    ok = fstat (fd, &final_stat) == 0;
1823 
1824 	  if (! ok)
1825 	    file_removed_diag (p, top_level, stat_diag);
1826 	}
1827 
1828       if (ok)
1829 	{
1830 	  if ((timespec_cmp (get_stat_ctime (&final_stat), original_ctime) != 0
1831 	       /* Original ctime will change if the file is a directory and
1832 		  --remove-files is given */
1833 	       && !(remove_files_option && is_dir))
1834 	      || original_size < final_stat.st_size)
1835 	    {
1836 	      WARNOPT (WARN_FILE_CHANGED,
1837 		       (0, 0, _("%s: file changed as we read it"),
1838 			quotearg_colon (p)));
1839 	      set_exit_status (TAREXIT_DIFFERS);
1840 	    }
1841 	  else if (atime_preserve_option == replace_atime_preserve
1842 		   && fd && (is_dir || original_size != 0)
1843 		   && set_file_atime (fd, parentfd, name, st->atime) != 0)
1844 	    utime_error (p);
1845 	}
1846 
1847       ok &= tar_stat_close (st);
1848       if (ok && remove_files_option)
1849 	queue_deferred_unlink (p, is_dir);
1850 
1851       return;
1852     }
1853 #ifdef HAVE_READLINK
1854   else if (S_ISLNK (st->stat.st_mode))
1855     {
1856       st->link_name = areadlinkat_with_size (parentfd, name, st->stat.st_size);
1857       if (!st->link_name)
1858 	{
1859 	  if (errno == ENOMEM)
1860 	    xalloc_die ();
1861 	  file_removed_diag (p, top_level, readlink_diag);
1862 	  return;
1863 	}
1864       transform_name (&st->link_name, XFORM_SYMLINK);
1865       if (NAME_FIELD_SIZE - (archive_format == OLDGNU_FORMAT)
1866 	  < strlen (st->link_name))
1867 	write_long_link (st);
1868 
1869       xattrs_selinux_get (parentfd, name, st, 0);
1870       xattrs_xattrs_get (parentfd, name, st, 0);
1871 
1872       block_ordinal = current_block_ordinal ();
1873       st->stat.st_size = 0;	/* force 0 size on symlink */
1874       header = start_header (st);
1875       if (!header)
1876 	return;
1877       tar_copy_str (header->header.linkname, st->link_name, NAME_FIELD_SIZE);
1878       header->header.typeflag = SYMTYPE;
1879       finish_header (st, header, block_ordinal);
1880       /* nothing more to do to it */
1881 
1882       if (remove_files_option)
1883 	queue_deferred_unlink (p, false);
1884 
1885       file_count_links (st);
1886       return;
1887     }
1888 #endif
1889   else if (S_ISCHR (st->stat.st_mode))
1890     {
1891       type = CHRTYPE;
1892       xattrs_acls_get (parentfd, name, st, 0, true);
1893       xattrs_selinux_get (parentfd, name, st, 0);
1894       xattrs_xattrs_get (parentfd, name, st, 0);
1895     }
1896   else if (S_ISBLK (st->stat.st_mode))
1897     {
1898       type = BLKTYPE;
1899       xattrs_acls_get (parentfd, name, st, 0, true);
1900       xattrs_selinux_get (parentfd, name, st, 0);
1901       xattrs_xattrs_get (parentfd, name, st, 0);
1902     }
1903   else if (S_ISFIFO (st->stat.st_mode))
1904     {
1905       type = FIFOTYPE;
1906       xattrs_acls_get (parentfd, name, st, 0, true);
1907       xattrs_selinux_get (parentfd, name, st, 0);
1908       xattrs_xattrs_get (parentfd, name, st, 0);
1909     }
1910   else if (S_ISSOCK (st->stat.st_mode))
1911     {
1912       WARNOPT (WARN_FILE_IGNORED,
1913 	       (0, 0, _("%s: socket ignored"), quotearg_colon (p)));
1914       return;
1915     }
1916   else if (S_ISDOOR (st->stat.st_mode))
1917     {
1918       WARNOPT (WARN_FILE_IGNORED,
1919 	       (0, 0, _("%s: door ignored"), quotearg_colon (p)));
1920       return;
1921     }
1922   else
1923     {
1924       unknown_file_error (p);
1925       return;
1926     }
1927 
1928   if (archive_format == V7_FORMAT)
1929     {
1930       unknown_file_error (p);
1931       return;
1932     }
1933 
1934   block_ordinal = current_block_ordinal ();
1935   st->stat.st_size = 0;	/* force 0 size */
1936   header = start_header (st);
1937   if (!header)
1938     return;
1939   header->header.typeflag = type;
1940 
1941   if (type != FIFOTYPE)
1942     {
1943       MAJOR_TO_CHARS (major (st->stat.st_rdev),
1944 		      header->header.devmajor);
1945       MINOR_TO_CHARS (minor (st->stat.st_rdev),
1946 		      header->header.devminor);
1947     }
1948 
1949   finish_header (st, header, block_ordinal);
1950   if (remove_files_option)
1951     queue_deferred_unlink (p, false);
1952 }
1953 
1954 /* Dump a file, recursively.  PARENT describes the file's parent
1955    directory, NAME is the file's name relative to PARENT, and FULLNAME
1956    its full name, possibly relative to the working directory.  NAME
1957    may contain slashes at the top level of invocation.  */
1958 
1959 void
dump_file(struct tar_stat_info * parent,char const * name,char const * fullname)1960 dump_file (struct tar_stat_info *parent, char const *name,
1961 	   char const *fullname)
1962 {
1963   struct tar_stat_info st;
1964   tar_stat_init (&st);
1965   st.parent = parent;
1966   dump_file0 (&st, name, fullname);
1967   if (parent && listed_incremental_option)
1968     update_parent_directory (parent);
1969   tar_stat_destroy (&st);
1970 }
1971