1 /* Functions for dealing with sparse files
2 
3    Copyright 2003-2021 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published by the
7    Free Software Foundation; either version 3, or (at your option) any later
8    version.
9 
10    This program is distributed in the hope that it will be useful, but
11    WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
13    Public License for more details.
14 
15    You should have received a copy of the GNU General Public License along
16    with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17 
18 #include <system.h>
19 #include <inttostr.h>
20 #include <quotearg.h>
21 #include "common.h"
22 
23 struct tar_sparse_file;
24 static bool sparse_select_optab (struct tar_sparse_file *file);
25 
26 enum sparse_scan_state
27   {
28     scan_begin,
29     scan_block,
30     scan_end
31   };
32 
33 struct tar_sparse_optab
34 {
35   bool (*init) (struct tar_sparse_file *);
36   bool (*done) (struct tar_sparse_file *);
37   bool (*sparse_member_p) (struct tar_sparse_file *);
38   bool (*dump_header) (struct tar_sparse_file *);
39   bool (*fixup_header) (struct tar_sparse_file *);
40   bool (*decode_header) (struct tar_sparse_file *);
41   bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
42 		      void *);
43   bool (*dump_region) (struct tar_sparse_file *, size_t);
44   bool (*extract_region) (struct tar_sparse_file *, size_t);
45 };
46 
47 struct tar_sparse_file
48 {
49   int fd;                           /* File descriptor */
50   bool seekable;                    /* Is fd seekable? */
51   off_t offset;                     /* Current offset in fd if seekable==false.
52 				       Otherwise unused */
53   off_t dumped_size;                /* Number of bytes actually written
54 				       to the archive */
55   struct tar_stat_info *stat_info;  /* Information about the file */
56   struct tar_sparse_optab const *optab; /* Operation table */
57   void *closure;                    /* Any additional data optab calls might
58 				       require */
59 };
60 
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62    lseek if the output file is not seekable */
63 static bool
dump_zeros(struct tar_sparse_file * file,off_t offset)64 dump_zeros (struct tar_sparse_file *file, off_t offset)
65 {
66   static char const zero_buf[BLOCKSIZE];
67 
68   if (offset < file->offset)
69     {
70       errno = EINVAL;
71       return false;
72     }
73 
74   while (file->offset < offset)
75     {
76       size_t size = (BLOCKSIZE < offset - file->offset
77 		     ? BLOCKSIZE
78 		     : offset - file->offset);
79       ssize_t wrbytes;
80 
81       wrbytes = write (file->fd, zero_buf, size);
82       if (wrbytes <= 0)
83 	{
84 	  if (wrbytes == 0)
85 	    errno = EINVAL;
86 	  return false;
87 	}
88       file->offset += wrbytes;
89     }
90 
91   return true;
92 }
93 
94 static bool
tar_sparse_member_p(struct tar_sparse_file * file)95 tar_sparse_member_p (struct tar_sparse_file *file)
96 {
97   if (file->optab->sparse_member_p)
98     return file->optab->sparse_member_p (file);
99   return false;
100 }
101 
102 static bool
tar_sparse_init(struct tar_sparse_file * file)103 tar_sparse_init (struct tar_sparse_file *file)
104 {
105   memset (file, 0, sizeof *file);
106 
107   if (!sparse_select_optab (file))
108     return false;
109 
110   if (file->optab->init)
111     return file->optab->init (file);
112 
113   return true;
114 }
115 
116 static bool
tar_sparse_done(struct tar_sparse_file * file)117 tar_sparse_done (struct tar_sparse_file *file)
118 {
119   if (file->optab->done)
120     return file->optab->done (file);
121   return true;
122 }
123 
124 static bool
tar_sparse_scan(struct tar_sparse_file * file,enum sparse_scan_state state,void * block)125 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
126 		 void *block)
127 {
128   if (file->optab->scan_block)
129     return file->optab->scan_block (file, state, block);
130   return true;
131 }
132 
133 static bool
tar_sparse_dump_region(struct tar_sparse_file * file,size_t i)134 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
135 {
136   if (file->optab->dump_region)
137     return file->optab->dump_region (file, i);
138   return false;
139 }
140 
141 static bool
tar_sparse_extract_region(struct tar_sparse_file * file,size_t i)142 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
143 {
144   if (file->optab->extract_region)
145     return file->optab->extract_region (file, i);
146   return false;
147 }
148 
149 static bool
tar_sparse_dump_header(struct tar_sparse_file * file)150 tar_sparse_dump_header (struct tar_sparse_file *file)
151 {
152   if (file->optab->dump_header)
153     return file->optab->dump_header (file);
154   return false;
155 }
156 
157 static bool
tar_sparse_decode_header(struct tar_sparse_file * file)158 tar_sparse_decode_header (struct tar_sparse_file *file)
159 {
160   if (file->optab->decode_header)
161     return file->optab->decode_header (file);
162   return true;
163 }
164 
165 static bool
tar_sparse_fixup_header(struct tar_sparse_file * file)166 tar_sparse_fixup_header (struct tar_sparse_file *file)
167 {
168   if (file->optab->fixup_header)
169     return file->optab->fixup_header (file);
170   return true;
171 }
172 
173 
174 static bool
lseek_or_error(struct tar_sparse_file * file,off_t offset)175 lseek_or_error (struct tar_sparse_file *file, off_t offset)
176 {
177   if (file->seekable
178       ? lseek (file->fd, offset, SEEK_SET) < 0
179       : ! dump_zeros (file, offset))
180     {
181       seek_diag_details (file->stat_info->orig_file_name, offset);
182       return false;
183     }
184   return true;
185 }
186 
187 /* Takes a blockful of data and basically cruises through it to see if
188    it's made *entirely* of zeros, returning a 0 the instant it finds
189    something that is a nonzero, i.e., useful data.  */
190 static bool
zero_block_p(char const * buffer,size_t size)191 zero_block_p (char const *buffer, size_t size)
192 {
193   while (size--)
194     if (*buffer++)
195       return false;
196   return true;
197 }
198 
199 static void
sparse_add_map(struct tar_stat_info * st,struct sp_array const * sp)200 sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
201 {
202   struct sp_array *sparse_map = st->sparse_map;
203   size_t avail = st->sparse_map_avail;
204   if (avail == st->sparse_map_size)
205     st->sparse_map = sparse_map =
206       x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
207   sparse_map[avail] = *sp;
208   st->sparse_map_avail = avail + 1;
209 }
210 
211 /* Scan the sparse file byte-by-byte and create its map. */
212 static bool
sparse_scan_file_raw(struct tar_sparse_file * file)213 sparse_scan_file_raw (struct tar_sparse_file *file)
214 {
215   struct tar_stat_info *st = file->stat_info;
216   int fd = file->fd;
217   char buffer[BLOCKSIZE];
218   size_t count = 0;
219   off_t offset = 0;
220   struct sp_array sp = {0, 0};
221 
222   st->archive_file_size = 0;
223 
224   if (!tar_sparse_scan (file, scan_begin, NULL))
225     return false;
226 
227   while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
228          && count != SAFE_READ_ERROR)
229     {
230       /* Analyze the block.  */
231       if (zero_block_p (buffer, count))
232         {
233           if (sp.numbytes)
234             {
235               sparse_add_map (st, &sp);
236               sp.numbytes = 0;
237               if (!tar_sparse_scan (file, scan_block, NULL))
238                 return false;
239             }
240         }
241       else
242         {
243           if (sp.numbytes == 0)
244             sp.offset = offset;
245           sp.numbytes += count;
246           st->archive_file_size += count;
247           if (!tar_sparse_scan (file, scan_block, buffer))
248             return false;
249         }
250 
251       offset += count;
252     }
253 
254   /* save one more sparse segment of length 0 to indicate that
255      the file ends with a hole */
256   if (sp.numbytes == 0)
257     sp.offset = offset;
258 
259   sparse_add_map (st, &sp);
260   st->archive_file_size += count;
261   return tar_sparse_scan (file, scan_end, NULL);
262 }
263 
264 static bool
sparse_scan_file_wholesparse(struct tar_sparse_file * file)265 sparse_scan_file_wholesparse (struct tar_sparse_file *file)
266 {
267   struct tar_stat_info *st = file->stat_info;
268   struct sp_array sp = {0, 0};
269 
270   /* Note that this function is called only for truly sparse files of size >= 1
271      block size (checked via ST_IS_SPARSE before).  See the thread
272      http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
273   if (ST_NBLOCKS (st->stat) == 0)
274     {
275       st->archive_file_size = 0;
276       sp.offset = st->stat.st_size;
277       sparse_add_map (st, &sp);
278       return true;
279     }
280 
281   return false;
282 }
283 
284 #ifdef SEEK_HOLE
285 /* Try to engage SEEK_HOLE/SEEK_DATA feature. */
286 static bool
sparse_scan_file_seek(struct tar_sparse_file * file)287 sparse_scan_file_seek (struct tar_sparse_file *file)
288 {
289   struct tar_stat_info *st = file->stat_info;
290   int fd = file->fd;
291   struct sp_array sp = {0, 0};
292   off_t offset = 0;
293   off_t data_offset;
294   off_t hole_offset;
295 
296   st->archive_file_size = 0;
297 
298   for (;;)
299     {
300       /* locate first chunk of data */
301       data_offset = lseek (fd, offset, SEEK_DATA);
302 
303       if (data_offset == (off_t)-1)
304         /* ENXIO == EOF; error otherwise */
305         {
306           if (errno == ENXIO)
307             {
308               /* file ends with hole, add one more empty chunk of data */
309               sp.numbytes = 0;
310               sp.offset = st->stat.st_size;
311               sparse_add_map (st, &sp);
312               return true;
313             }
314           return false;
315         }
316 
317       hole_offset = lseek (fd, data_offset, SEEK_HOLE);
318 
319       /* according to specs, if FS does not fully support
320 	 SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
321 	 classic lseek() call.  We must detect it here and try to use other
322 	 hole-detection methods. */
323       if (offset == 0 /* first loop */
324           && data_offset == 0
325           && hole_offset == st->stat.st_size)
326         {
327           lseek (fd, 0, SEEK_SET);
328           return false;
329         }
330 
331       sp.offset = data_offset;
332       sp.numbytes = hole_offset - data_offset;
333       sparse_add_map (st, &sp);
334 
335       st->archive_file_size += sp.numbytes;
336       offset = hole_offset;
337     }
338 
339   return true;
340 }
341 #endif
342 
343 static bool
sparse_scan_file(struct tar_sparse_file * file)344 sparse_scan_file (struct tar_sparse_file *file)
345 {
346   /* always check for completely sparse files */
347   if (sparse_scan_file_wholesparse (file))
348     return true;
349 
350   switch (hole_detection)
351     {
352     case HOLE_DETECTION_DEFAULT:
353     case HOLE_DETECTION_SEEK:
354 #ifdef SEEK_HOLE
355       if (sparse_scan_file_seek (file))
356         return true;
357 #else
358       if (hole_detection == HOLE_DETECTION_SEEK)
359 	WARN((0, 0,
360 	      _("\"seek\" hole detection is not supported, using \"raw\".")));
361       /* fall back to "raw" for this and all other files */
362       hole_detection = HOLE_DETECTION_RAW;
363 #endif
364       FALLTHROUGH;
365     case HOLE_DETECTION_RAW:
366       if (sparse_scan_file_raw (file))
367 	return true;
368     }
369 
370   return false;
371 }
372 
373 static struct tar_sparse_optab const oldgnu_optab;
374 static struct tar_sparse_optab const star_optab;
375 static struct tar_sparse_optab const pax_optab;
376 
377 static bool
sparse_select_optab(struct tar_sparse_file * file)378 sparse_select_optab (struct tar_sparse_file *file)
379 {
380   switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
381     {
382     case V7_FORMAT:
383     case USTAR_FORMAT:
384       return false;
385 
386     case OLDGNU_FORMAT:
387     case GNU_FORMAT: /*FIXME: This one should disappear? */
388       file->optab = &oldgnu_optab;
389       break;
390 
391     case POSIX_FORMAT:
392       file->optab = &pax_optab;
393       break;
394 
395     case STAR_FORMAT:
396       file->optab = &star_optab;
397       break;
398 
399     default:
400       return false;
401     }
402   return true;
403 }
404 
405 static bool
sparse_dump_region(struct tar_sparse_file * file,size_t i)406 sparse_dump_region (struct tar_sparse_file *file, size_t i)
407 {
408   union block *blk;
409   off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
410 
411   if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
412     return false;
413 
414   while (bytes_left > 0)
415     {
416       size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
417       size_t bytes_read;
418 
419       blk = find_next_block ();
420       bytes_read = safe_read (file->fd, blk->buffer, bufsize);
421       if (bytes_read == SAFE_READ_ERROR)
422 	{
423           read_diag_details (file->stat_info->orig_file_name,
424 	                     (file->stat_info->sparse_map[i].offset
425 			      + file->stat_info->sparse_map[i].numbytes
426 			      - bytes_left),
427 			     bufsize);
428 	  return false;
429 	}
430       else if (bytes_read == 0)
431 	{
432 	  char buf[UINTMAX_STRSIZE_BOUND];
433 	  struct stat st;
434 	  size_t n;
435 	  if (fstat (file->fd, &st) == 0)
436 	    n = file->stat_info->stat.st_size - st.st_size;
437 	  else
438 	    n = file->stat_info->stat.st_size
439 	      - (file->stat_info->sparse_map[i].offset
440 		 + file->stat_info->sparse_map[i].numbytes
441 		 - bytes_left);
442 
443 	  WARNOPT (WARN_FILE_SHRANK,
444 		   (0, 0,
445 		    ngettext ("%s: File shrank by %s byte; padding with zeros",
446 			      "%s: File shrank by %s bytes; padding with zeros",
447 			      n),
448 		    quotearg_colon (file->stat_info->orig_file_name),
449 		    STRINGIFY_BIGINT (n, buf)));
450 	  if (! ignore_failed_read_option)
451 	    set_exit_status (TAREXIT_DIFFERS);
452 	  return false;
453 	}
454 
455       memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
456       bytes_left -= bytes_read;
457       file->dumped_size += bytes_read;
458       set_next_block_after (blk);
459     }
460 
461   return true;
462 }
463 
464 static bool
sparse_extract_region(struct tar_sparse_file * file,size_t i)465 sparse_extract_region (struct tar_sparse_file *file, size_t i)
466 {
467   off_t write_size;
468 
469   if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
470     return false;
471 
472   write_size = file->stat_info->sparse_map[i].numbytes;
473 
474   if (write_size == 0)
475     {
476       /* Last block of the file is a hole */
477       if (file->seekable && sys_truncate (file->fd))
478 	truncate_warn (file->stat_info->orig_file_name);
479     }
480   else while (write_size > 0)
481     {
482       size_t count;
483       size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
484       union block *blk = find_next_block ();
485       if (!blk)
486 	{
487 	  ERROR ((0, 0, _("Unexpected EOF in archive")));
488 	  return false;
489 	}
490       set_next_block_after (blk);
491       file->dumped_size += BLOCKSIZE;
492       count = blocking_write (file->fd, blk->buffer, wrbytes);
493       write_size -= count;
494       mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
495       file->offset += count;
496       if (count != wrbytes)
497 	{
498 	  write_error_details (file->stat_info->orig_file_name,
499 			       count, wrbytes);
500 	  return false;
501 	}
502     }
503   return true;
504 }
505 
506 
507 
508 /* Interface functions */
509 enum dump_status
sparse_dump_file(int fd,struct tar_stat_info * st)510 sparse_dump_file (int fd, struct tar_stat_info *st)
511 {
512   bool rc;
513   struct tar_sparse_file file;
514 
515   if (!tar_sparse_init (&file))
516     return dump_status_not_implemented;
517 
518   file.stat_info = st;
519   file.fd = fd;
520   file.seekable = true; /* File *must* be seekable for dump to work */
521 
522   rc = sparse_scan_file (&file);
523   if (rc && file.optab->dump_region)
524     {
525       tar_sparse_dump_header (&file);
526 
527       if (fd >= 0)
528 	{
529 	  size_t i;
530 
531 	  mv_begin_write (file.stat_info->file_name,
532 		          file.stat_info->stat.st_size,
533 		          file.stat_info->archive_file_size - file.dumped_size);
534 	  for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
535 	    rc = tar_sparse_dump_region (&file, i);
536 	}
537     }
538 
539   pad_archive (file.stat_info->archive_file_size - file.dumped_size);
540   return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
541 }
542 
543 bool
sparse_member_p(struct tar_stat_info * st)544 sparse_member_p (struct tar_stat_info *st)
545 {
546   struct tar_sparse_file file;
547 
548   if (!tar_sparse_init (&file))
549     return false;
550   file.stat_info = st;
551   return tar_sparse_member_p (&file);
552 }
553 
554 bool
sparse_fixup_header(struct tar_stat_info * st)555 sparse_fixup_header (struct tar_stat_info *st)
556 {
557   struct tar_sparse_file file;
558 
559   if (!tar_sparse_init (&file))
560     return false;
561   file.stat_info = st;
562   return tar_sparse_fixup_header (&file);
563 }
564 
565 enum dump_status
sparse_extract_file(int fd,struct tar_stat_info * st,off_t * size)566 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
567 {
568   bool rc = true;
569   struct tar_sparse_file file;
570   size_t i;
571 
572   if (!tar_sparse_init (&file))
573     return dump_status_not_implemented;
574 
575   file.stat_info = st;
576   file.fd = fd;
577   file.seekable = lseek (fd, 0, SEEK_SET) == 0;
578   file.offset = 0;
579 
580   rc = tar_sparse_decode_header (&file);
581   for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
582     rc = tar_sparse_extract_region (&file, i);
583   *size = file.stat_info->archive_file_size - file.dumped_size;
584   return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
585 }
586 
587 enum dump_status
sparse_skip_file(struct tar_stat_info * st)588 sparse_skip_file (struct tar_stat_info *st)
589 {
590   bool rc = true;
591   struct tar_sparse_file file;
592 
593   if (!tar_sparse_init (&file))
594     return dump_status_not_implemented;
595 
596   file.stat_info = st;
597   file.fd = -1;
598 
599   rc = tar_sparse_decode_header (&file);
600   skip_file (file.stat_info->archive_file_size - file.dumped_size);
601   return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
602 }
603 
604 
605 static bool
check_sparse_region(struct tar_sparse_file * file,off_t beg,off_t end)606 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
607 {
608   if (!lseek_or_error (file, beg))
609     return false;
610 
611   while (beg < end)
612     {
613       size_t bytes_read;
614       size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
615       char diff_buffer[BLOCKSIZE];
616 
617       bytes_read = safe_read (file->fd, diff_buffer, rdsize);
618       if (bytes_read == SAFE_READ_ERROR)
619 	{
620           read_diag_details (file->stat_info->orig_file_name,
621 	                     beg,
622 			     rdsize);
623 	  return false;
624 	}
625       else if (bytes_read == 0)
626 	{
627 	  report_difference (file->stat_info, _("Size differs"));
628 	  return false;
629 	}
630 
631       if (!zero_block_p (diff_buffer, bytes_read))
632 	{
633 	  char begbuf[INT_BUFSIZE_BOUND (off_t)];
634  	  report_difference (file->stat_info,
635 			     _("File fragment at %s is not a hole"),
636 			     offtostr (beg, begbuf));
637 	  return false;
638 	}
639 
640       beg += bytes_read;
641     }
642 
643   return true;
644 }
645 
646 static bool
check_data_region(struct tar_sparse_file * file,size_t i)647 check_data_region (struct tar_sparse_file *file, size_t i)
648 {
649   off_t size_left;
650 
651   if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
652     return false;
653   size_left = file->stat_info->sparse_map[i].numbytes;
654   mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
655 
656   while (size_left > 0)
657     {
658       size_t bytes_read;
659       size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
660       char diff_buffer[BLOCKSIZE];
661 
662       union block *blk = find_next_block ();
663       if (!blk)
664 	{
665 	  ERROR ((0, 0, _("Unexpected EOF in archive")));
666 	  return false;
667 	}
668       set_next_block_after (blk);
669       file->dumped_size += BLOCKSIZE;
670       bytes_read = safe_read (file->fd, diff_buffer, rdsize);
671       if (bytes_read == SAFE_READ_ERROR)
672 	{
673           read_diag_details (file->stat_info->orig_file_name,
674 			     (file->stat_info->sparse_map[i].offset
675 			      + file->stat_info->sparse_map[i].numbytes
676 			      - size_left),
677 			     rdsize);
678 	  return false;
679 	}
680       else if (bytes_read == 0)
681 	{
682 	  report_difference (&current_stat_info, _("Size differs"));
683 	  return false;
684 	}
685       size_left -= bytes_read;
686       mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
687       if (memcmp (blk->buffer, diff_buffer, bytes_read))
688 	{
689 	  report_difference (file->stat_info, _("Contents differ"));
690 	  return false;
691 	}
692     }
693   return true;
694 }
695 
696 bool
sparse_diff_file(int fd,struct tar_stat_info * st)697 sparse_diff_file (int fd, struct tar_stat_info *st)
698 {
699   bool rc = true;
700   struct tar_sparse_file file;
701   size_t i;
702   off_t offset = 0;
703 
704   if (!tar_sparse_init (&file))
705     return false;
706 
707   file.stat_info = st;
708   file.fd = fd;
709   file.seekable = true; /* File *must* be seekable for compare to work */
710 
711   rc = tar_sparse_decode_header (&file);
712   mv_begin_read (st);
713   for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
714     {
715       rc = check_sparse_region (&file,
716 				offset, file.stat_info->sparse_map[i].offset)
717 	    && check_data_region (&file, i);
718       offset = file.stat_info->sparse_map[i].offset
719 	        + file.stat_info->sparse_map[i].numbytes;
720     }
721 
722   if (!rc)
723     skip_file (file.stat_info->archive_file_size - file.dumped_size);
724   mv_end ();
725 
726   tar_sparse_done (&file);
727   return rc;
728 }
729 
730 
731 /* Old GNU Format. The sparse file information is stored in the
732    oldgnu_header in the following manner:
733 
734    The header is marked with type 'S'. Its 'size' field contains
735    the cumulative size of all non-empty blocks of the file. The
736    actual file size is stored in 'realsize' member of oldgnu_header.
737 
738    The map of the file is stored in a list of 'struct sparse'.
739    Each struct contains offset to the block of data and its
740    size (both as octal numbers). The first file header contains
741    at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
742    contains more structs, then the field 'isextended' of the main
743    header is set to 1 (binary) and the 'struct sparse_header'
744    header follows, containing at most 21 following structs
745    (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
746    field of the extended header is set and next  next extension header
747    follows, etc... */
748 
749 enum oldgnu_add_status
750   {
751     add_ok,
752     add_finish,
753     add_fail
754   };
755 
756 static bool
oldgnu_sparse_member_p(struct tar_sparse_file * file)757 oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
758 {
759   return current_header->header.typeflag == GNUTYPE_SPARSE;
760 }
761 
762 /* Add a sparse item to the sparse file and its obstack */
763 static enum oldgnu_add_status
oldgnu_add_sparse(struct tar_sparse_file * file,struct sparse * s)764 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
765 {
766   struct sp_array sp;
767 
768   if (s->numbytes[0] == '\0')
769     return add_finish;
770   sp.offset = OFF_FROM_HEADER (s->offset);
771   sp.numbytes = OFF_FROM_HEADER (s->numbytes);
772   if (sp.offset < 0 || sp.numbytes < 0
773       || INT_ADD_OVERFLOW (sp.offset, sp.numbytes)
774       || file->stat_info->stat.st_size < sp.offset + sp.numbytes
775       || file->stat_info->archive_file_size < 0)
776     return add_fail;
777 
778   sparse_add_map (file->stat_info, &sp);
779   return add_ok;
780 }
781 
782 static bool
oldgnu_fixup_header(struct tar_sparse_file * file)783 oldgnu_fixup_header (struct tar_sparse_file *file)
784 {
785   /* NOTE! st_size was initialized from the header
786      which actually contains archived size. The following fixes it */
787   off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
788   file->stat_info->archive_file_size = file->stat_info->stat.st_size;
789   file->stat_info->stat.st_size = max (0, realsize);
790   return 0 <= realsize;
791 }
792 
793 /* Convert old GNU format sparse data to internal representation */
794 static bool
oldgnu_get_sparse_info(struct tar_sparse_file * file)795 oldgnu_get_sparse_info (struct tar_sparse_file *file)
796 {
797   size_t i;
798   union block *h = current_header;
799   int ext_p;
800   enum oldgnu_add_status rc;
801 
802   file->stat_info->sparse_map_avail = 0;
803   for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
804     {
805       rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
806       if (rc != add_ok)
807 	break;
808     }
809 
810   for (ext_p = h->oldgnu_header.isextended;
811        rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
812     {
813       h = find_next_block ();
814       if (!h)
815 	{
816 	  ERROR ((0, 0, _("Unexpected EOF in archive")));
817 	  return false;
818 	}
819       set_next_block_after (h);
820       for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
821 	rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
822     }
823 
824   if (rc == add_fail)
825     {
826       ERROR ((0, 0, _("%s: invalid sparse archive member"),
827 	      file->stat_info->orig_file_name));
828       return false;
829     }
830   return true;
831 }
832 
833 static void
oldgnu_store_sparse_info(struct tar_sparse_file * file,size_t * pindex,struct sparse * sp,size_t sparse_size)834 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
835 			  struct sparse *sp, size_t sparse_size)
836 {
837   for (; *pindex < file->stat_info->sparse_map_avail
838 	 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
839     {
840       OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
841 		    sp->offset);
842       OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
843 		    sp->numbytes);
844     }
845 }
846 
847 static bool
oldgnu_dump_header(struct tar_sparse_file * file)848 oldgnu_dump_header (struct tar_sparse_file *file)
849 {
850   off_t block_ordinal = current_block_ordinal ();
851   union block *blk;
852   size_t i;
853 
854   blk = start_header (file->stat_info);
855   blk->header.typeflag = GNUTYPE_SPARSE;
856   if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
857     blk->oldgnu_header.isextended = 1;
858 
859   /* Store the real file size */
860   OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
861   /* Store the effective (shrunken) file size */
862   OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
863 
864   i = 0;
865   oldgnu_store_sparse_info (file, &i,
866 			    blk->oldgnu_header.sp,
867 			    SPARSES_IN_OLDGNU_HEADER);
868   blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
869   finish_header (file->stat_info, blk, block_ordinal);
870 
871   while (i < file->stat_info->sparse_map_avail)
872     {
873       blk = find_next_block ();
874       memset (blk->buffer, 0, BLOCKSIZE);
875       oldgnu_store_sparse_info (file, &i,
876 				blk->sparse_header.sp,
877 				SPARSES_IN_SPARSE_HEADER);
878       if (i < file->stat_info->sparse_map_avail)
879 	blk->sparse_header.isextended = 1;
880       set_next_block_after (blk);
881     }
882   return true;
883 }
884 
885 static struct tar_sparse_optab const oldgnu_optab = {
886   NULL,  /* No init function */
887   NULL,  /* No done function */
888   oldgnu_sparse_member_p,
889   oldgnu_dump_header,
890   oldgnu_fixup_header,
891   oldgnu_get_sparse_info,
892   NULL,  /* No scan_block function */
893   sparse_dump_region,
894   sparse_extract_region,
895 };
896 
897 
898 /* Star */
899 
900 static bool
star_sparse_member_p(struct tar_sparse_file * file)901 star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
902 {
903   return current_header->header.typeflag == GNUTYPE_SPARSE;
904 }
905 
906 static bool
star_fixup_header(struct tar_sparse_file * file)907 star_fixup_header (struct tar_sparse_file *file)
908 {
909   /* NOTE! st_size was initialized from the header
910      which actually contains archived size. The following fixes it */
911   off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
912   file->stat_info->archive_file_size = file->stat_info->stat.st_size;
913   file->stat_info->stat.st_size = max (0, realsize);
914   return 0 <= realsize;
915 }
916 
917 /* Convert STAR format sparse data to internal representation */
918 static bool
star_get_sparse_info(struct tar_sparse_file * file)919 star_get_sparse_info (struct tar_sparse_file *file)
920 {
921   size_t i;
922   union block *h = current_header;
923   int ext_p;
924   enum oldgnu_add_status rc = add_ok;
925 
926   file->stat_info->sparse_map_avail = 0;
927 
928   if (h->star_in_header.prefix[0] == '\0'
929       && h->star_in_header.sp[0].offset[10] != '\0')
930     {
931       /* Old star format */
932       for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
933 	{
934 	  rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
935 	  if (rc != add_ok)
936 	    break;
937 	}
938       ext_p = h->star_in_header.isextended;
939     }
940   else
941     ext_p = 1;
942 
943   for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
944     {
945       h = find_next_block ();
946       if (!h)
947 	{
948 	  ERROR ((0, 0, _("Unexpected EOF in archive")));
949 	  return false;
950 	}
951       set_next_block_after (h);
952       for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
953 	rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
954       file->dumped_size += BLOCKSIZE;
955     }
956 
957   if (rc == add_fail)
958     {
959       ERROR ((0, 0, _("%s: invalid sparse archive member"),
960 	      file->stat_info->orig_file_name));
961       return false;
962     }
963   return true;
964 }
965 
966 
967 static struct tar_sparse_optab const star_optab = {
968   NULL,  /* No init function */
969   NULL,  /* No done function */
970   star_sparse_member_p,
971   NULL,
972   star_fixup_header,
973   star_get_sparse_info,
974   NULL,  /* No scan_block function */
975   NULL, /* No dump region function */
976   sparse_extract_region,
977 };
978 
979 
980 /* GNU PAX sparse file format. There are several versions:
981 
982    * 0.0
983 
984    The initial version of sparse format used by tar 1.14-1.15.1.
985    The sparse file map is stored in x header:
986 
987    GNU.sparse.size      Real size of the stored file
988    GNU.sparse.numblocks Number of blocks in the sparse map
989    repeat numblocks time
990      GNU.sparse.offset    Offset of the next data block
991      GNU.sparse.numbytes  Size of the next data block
992    end repeat
993 
994    This has been reported as conflicting with the POSIX specs. The reason is
995    that offsets and sizes of non-zero data blocks were stored in multiple
996    instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
997    POSIX requires the latest occurrence of the variable to override all
998    previous occurrences.
999 
1000    To avoid this incompatibility two following versions were introduced.
1001 
1002    * 0.1
1003 
1004    Used by tar 1.15.2 -- 1.15.91 (alpha releases).
1005 
1006    The sparse file map is stored in
1007    x header:
1008 
1009    GNU.sparse.size      Real size of the stored file
1010    GNU.sparse.numblocks Number of blocks in the sparse map
1011    GNU.sparse.map       Map of non-null data chunks. A string consisting
1012                        of comma-separated values "offset,size[,offset,size]..."
1013 
1014    The resulting GNU.sparse.map string can be *very* long. While POSIX does not
1015    impose any limit on the length of a x header variable, this can confuse some
1016    tars.
1017 
1018    * 1.0
1019 
1020    Starting from this version, the exact sparse format version is specified
1021    explicitely in the header using the following variables:
1022 
1023    GNU.sparse.major     Major version
1024    GNU.sparse.minor     Minor version
1025 
1026    X header keeps the following variables:
1027 
1028    GNU.sparse.name      Real file name of the sparse file
1029    GNU.sparse.realsize  Real size of the stored file (corresponds to the old
1030                         GNU.sparse.size variable)
1031 
1032    The name field of the ustar header is constructed using the pattern
1033    "%d/GNUSparseFile.%p/%f".
1034 
1035    The sparse map itself is stored in the file data block, preceding the actual
1036    file data. It consists of a series of octal numbers of arbitrary length,
1037    delimited by newlines. The map is padded with nulls to the nearest block
1038    boundary.
1039 
1040    The first number gives the number of entries in the map. Following are map
1041    entries, each one consisting of two numbers giving the offset and size of
1042    the data block it describes.
1043 
1044    The format is designed in such a way that non-posix aware tars and tars not
1045    supporting GNU.sparse.* keywords will extract each sparse file in its
1046    condensed form with the file map attached and will place it into a separate
1047    directory. Then, using a simple program it would be possible to expand the
1048    file to its original form even without GNU tar.
1049 
1050    Bu default, v.1.0 archives are created. To use other formats,
1051    --sparse-version option is provided. Additionally, v.0.0 can be obtained
1052    by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
1053    --pax-option delete=GNU.sparse.map
1054 */
1055 
1056 static bool
pax_sparse_member_p(struct tar_sparse_file * file)1057 pax_sparse_member_p (struct tar_sparse_file *file)
1058 {
1059   return file->stat_info->sparse_map_avail > 0
1060           || file->stat_info->sparse_major > 0;
1061 }
1062 
1063 /* Start a header that uses the effective (shrunken) file size.  */
1064 static union block *
pax_start_header(struct tar_stat_info * st)1065 pax_start_header (struct tar_stat_info *st)
1066 {
1067   off_t realsize = st->stat.st_size;
1068   union block *blk;
1069   st->stat.st_size = st->archive_file_size;
1070   blk = start_header (st);
1071   st->stat.st_size = realsize;
1072   return blk;
1073 }
1074 
1075 static bool
pax_dump_header_0(struct tar_sparse_file * file)1076 pax_dump_header_0 (struct tar_sparse_file *file)
1077 {
1078   off_t block_ordinal = current_block_ordinal ();
1079   union block *blk;
1080   size_t i;
1081   char nbuf[UINTMAX_STRSIZE_BOUND];
1082   struct sp_array *map = file->stat_info->sparse_map;
1083   char *save_file_name = NULL;
1084 
1085   /* Store the real file size */
1086   xheader_store ("GNU.sparse.size", file->stat_info, NULL);
1087   xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
1088 
1089   if (xheader_keyword_deleted_p ("GNU.sparse.map")
1090       || tar_sparse_minor == 0)
1091     {
1092       for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1093 	{
1094 	  xheader_store ("GNU.sparse.offset", file->stat_info, &i);
1095 	  xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
1096 	}
1097     }
1098   else
1099     {
1100       xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1101       save_file_name = file->stat_info->file_name;
1102       file->stat_info->file_name = xheader_format_name (file->stat_info,
1103 					       "%d/GNUSparseFile.%p/%f", 0);
1104 
1105       xheader_string_begin (&file->stat_info->xhdr);
1106       for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1107 	{
1108 	  if (i)
1109 	    xheader_string_add (&file->stat_info->xhdr, ",");
1110 	  xheader_string_add (&file->stat_info->xhdr,
1111 			      umaxtostr (map[i].offset, nbuf));
1112 	  xheader_string_add (&file->stat_info->xhdr, ",");
1113 	  xheader_string_add (&file->stat_info->xhdr,
1114 			      umaxtostr (map[i].numbytes, nbuf));
1115 	}
1116       if (!xheader_string_end (&file->stat_info->xhdr,
1117 			       "GNU.sparse.map"))
1118 	{
1119 	  free (file->stat_info->file_name);
1120 	  file->stat_info->file_name = save_file_name;
1121 	  return false;
1122 	}
1123     }
1124   blk = pax_start_header (file->stat_info);
1125   finish_header (file->stat_info, blk, block_ordinal);
1126   if (save_file_name)
1127     {
1128       free (file->stat_info->file_name);
1129       file->stat_info->file_name = save_file_name;
1130     }
1131   return true;
1132 }
1133 
1134 static bool
pax_dump_header_1(struct tar_sparse_file * file)1135 pax_dump_header_1 (struct tar_sparse_file *file)
1136 {
1137   off_t block_ordinal = current_block_ordinal ();
1138   union block *blk;
1139   char *p, *q;
1140   size_t i;
1141   char nbuf[UINTMAX_STRSIZE_BOUND];
1142   off_t size = 0;
1143   struct sp_array *map = file->stat_info->sparse_map;
1144   char *save_file_name = file->stat_info->file_name;
1145 
1146 #define COPY_STRING(b,dst,src) do                \
1147  {                                               \
1148    char *endp = b->buffer + BLOCKSIZE;           \
1149    char const *srcp = src;                       \
1150    while (*srcp)                                 \
1151      {                                           \
1152        if (dst == endp)                          \
1153 	 {                                       \
1154 	   set_next_block_after (b);             \
1155 	   b = find_next_block ();               \
1156            dst = b->buffer;                      \
1157 	   endp = b->buffer + BLOCKSIZE;         \
1158 	 }                                       \
1159        *dst++ = *srcp++;                         \
1160      }                                           \
1161    } while (0)
1162 
1163   /* Compute stored file size */
1164   p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1165   size += strlen (p) + 1;
1166   for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1167     {
1168       p = umaxtostr (map[i].offset, nbuf);
1169       size += strlen (p) + 1;
1170       p = umaxtostr (map[i].numbytes, nbuf);
1171       size += strlen (p) + 1;
1172     }
1173   size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1174   file->stat_info->archive_file_size += size * BLOCKSIZE;
1175   file->dumped_size += size * BLOCKSIZE;
1176 
1177   /* Store sparse file identification */
1178   xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1179   xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1180   xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1181   xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1182 
1183   file->stat_info->file_name =
1184     xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0);
1185   /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */
1186   if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE)
1187     file->stat_info->file_name[NAME_FIELD_SIZE] = 0;
1188 
1189   blk = pax_start_header (file->stat_info);
1190   finish_header (file->stat_info, blk, block_ordinal);
1191   free (file->stat_info->file_name);
1192   file->stat_info->file_name = save_file_name;
1193 
1194   blk = find_next_block ();
1195   q = blk->buffer;
1196   p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1197   COPY_STRING (blk, q, p);
1198   COPY_STRING (blk, q, "\n");
1199   for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1200     {
1201       p = umaxtostr (map[i].offset, nbuf);
1202       COPY_STRING (blk, q, p);
1203       COPY_STRING (blk, q, "\n");
1204       p = umaxtostr (map[i].numbytes, nbuf);
1205       COPY_STRING (blk, q, p);
1206       COPY_STRING (blk, q, "\n");
1207     }
1208   memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1209   set_next_block_after (blk);
1210   return true;
1211 }
1212 
1213 static bool
pax_dump_header(struct tar_sparse_file * file)1214 pax_dump_header (struct tar_sparse_file *file)
1215 {
1216   file->stat_info->sparse_major = tar_sparse_major;
1217   file->stat_info->sparse_minor = tar_sparse_minor;
1218 
1219   return (file->stat_info->sparse_major == 0) ?
1220            pax_dump_header_0 (file) : pax_dump_header_1 (file);
1221 }
1222 
1223 static bool
decode_num(uintmax_t * num,char const * arg,uintmax_t maxval)1224 decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1225 {
1226   uintmax_t u;
1227   char *arg_lim;
1228 
1229   if (!ISDIGIT (*arg))
1230     return false;
1231 
1232   errno = 0;
1233   u = strtoumax (arg, &arg_lim, 10);
1234 
1235   if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1236     return false;
1237 
1238   *num = u;
1239   return true;
1240 }
1241 
1242 static bool
pax_decode_header(struct tar_sparse_file * file)1243 pax_decode_header (struct tar_sparse_file *file)
1244 {
1245   if (file->stat_info->sparse_major > 0)
1246     {
1247       uintmax_t u;
1248       char nbuf[UINTMAX_STRSIZE_BOUND];
1249       union block *blk;
1250       char *p;
1251       size_t i;
1252       off_t start;
1253 
1254 #define COPY_BUF(b,buf,src) do                                     \
1255  {                                                                 \
1256    char *endp = b->buffer + BLOCKSIZE;                             \
1257    char *dst = buf;                                                \
1258    do                                                              \
1259      {                                                             \
1260        if (dst == buf + UINTMAX_STRSIZE_BOUND -1)                  \
1261          {                                                         \
1262            ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1263 	          file->stat_info->orig_file_name));               \
1264            return false;                                           \
1265          }                                                         \
1266        if (src == endp)                                            \
1267 	 {                                                         \
1268 	   set_next_block_after (b);                               \
1269            b = find_next_block ();                                 \
1270            if (!b)                                                 \
1271              FATAL_ERROR ((0, 0, _("Unexpected EOF in archive"))); \
1272            src = b->buffer;                                        \
1273 	   endp = b->buffer + BLOCKSIZE;                           \
1274 	 }                                                         \
1275        *dst = *src++;                                              \
1276      }                                                             \
1277    while (*dst++ != '\n');                                         \
1278    dst[-1] = 0;                                                    \
1279  } while (0)
1280 
1281       start = current_block_ordinal ();
1282       set_next_block_after (current_header);
1283       blk = find_next_block ();
1284       if (!blk)
1285         FATAL_ERROR ((0, 0, _("Unexpected EOF in archive")));
1286       p = blk->buffer;
1287       COPY_BUF (blk,nbuf,p);
1288       if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1289 	{
1290 	  ERROR ((0, 0, _("%s: malformed sparse archive member"),
1291 		  file->stat_info->orig_file_name));
1292 	  return false;
1293 	}
1294       file->stat_info->sparse_map_size = u;
1295       file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1296 					     sizeof (*file->stat_info->sparse_map));
1297       file->stat_info->sparse_map_avail = 0;
1298       for (i = 0; i < file->stat_info->sparse_map_size; i++)
1299 	{
1300 	  struct sp_array sp;
1301 
1302 	  COPY_BUF (blk,nbuf,p);
1303 	  if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1304 	    {
1305 	      ERROR ((0, 0, _("%s: malformed sparse archive member"),
1306 		      file->stat_info->orig_file_name));
1307 	      return false;
1308 	    }
1309 	  sp.offset = u;
1310 	  COPY_BUF (blk,nbuf,p);
1311 	  if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1312 	    {
1313 	      ERROR ((0, 0, _("%s: malformed sparse archive member"),
1314 		      file->stat_info->orig_file_name));
1315 	      return false;
1316 	    }
1317 	  sp.numbytes = u;
1318 	  sparse_add_map (file->stat_info, &sp);
1319 	}
1320       set_next_block_after (blk);
1321 
1322       file->dumped_size += BLOCKSIZE * (current_block_ordinal () - start);
1323     }
1324 
1325   return true;
1326 }
1327 
1328 static struct tar_sparse_optab const pax_optab = {
1329   NULL,  /* No init function */
1330   NULL,  /* No done function */
1331   pax_sparse_member_p,
1332   pax_dump_header,
1333   NULL,
1334   pax_decode_header,
1335   NULL,  /* No scan_block function */
1336   sparse_dump_region,
1337   sparse_extract_region,
1338 };
1339