1 /* Functions for dealing with sparse files
2
3 Copyright 2003-2021 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <system.h>
19 #include <inttostr.h>
20 #include <quotearg.h>
21 #include "common.h"
22
23 struct tar_sparse_file;
24 static bool sparse_select_optab (struct tar_sparse_file *file);
25
26 enum sparse_scan_state
27 {
28 scan_begin,
29 scan_block,
30 scan_end
31 };
32
33 struct tar_sparse_optab
34 {
35 bool (*init) (struct tar_sparse_file *);
36 bool (*done) (struct tar_sparse_file *);
37 bool (*sparse_member_p) (struct tar_sparse_file *);
38 bool (*dump_header) (struct tar_sparse_file *);
39 bool (*fixup_header) (struct tar_sparse_file *);
40 bool (*decode_header) (struct tar_sparse_file *);
41 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
42 void *);
43 bool (*dump_region) (struct tar_sparse_file *, size_t);
44 bool (*extract_region) (struct tar_sparse_file *, size_t);
45 };
46
47 struct tar_sparse_file
48 {
49 int fd; /* File descriptor */
50 bool seekable; /* Is fd seekable? */
51 off_t offset; /* Current offset in fd if seekable==false.
52 Otherwise unused */
53 off_t dumped_size; /* Number of bytes actually written
54 to the archive */
55 struct tar_stat_info *stat_info; /* Information about the file */
56 struct tar_sparse_optab const *optab; /* Operation table */
57 void *closure; /* Any additional data optab calls might
58 require */
59 };
60
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
63 static bool
dump_zeros(struct tar_sparse_file * file,off_t offset)64 dump_zeros (struct tar_sparse_file *file, off_t offset)
65 {
66 static char const zero_buf[BLOCKSIZE];
67
68 if (offset < file->offset)
69 {
70 errno = EINVAL;
71 return false;
72 }
73
74 while (file->offset < offset)
75 {
76 size_t size = (BLOCKSIZE < offset - file->offset
77 ? BLOCKSIZE
78 : offset - file->offset);
79 ssize_t wrbytes;
80
81 wrbytes = write (file->fd, zero_buf, size);
82 if (wrbytes <= 0)
83 {
84 if (wrbytes == 0)
85 errno = EINVAL;
86 return false;
87 }
88 file->offset += wrbytes;
89 }
90
91 return true;
92 }
93
94 static bool
tar_sparse_member_p(struct tar_sparse_file * file)95 tar_sparse_member_p (struct tar_sparse_file *file)
96 {
97 if (file->optab->sparse_member_p)
98 return file->optab->sparse_member_p (file);
99 return false;
100 }
101
102 static bool
tar_sparse_init(struct tar_sparse_file * file)103 tar_sparse_init (struct tar_sparse_file *file)
104 {
105 memset (file, 0, sizeof *file);
106
107 if (!sparse_select_optab (file))
108 return false;
109
110 if (file->optab->init)
111 return file->optab->init (file);
112
113 return true;
114 }
115
116 static bool
tar_sparse_done(struct tar_sparse_file * file)117 tar_sparse_done (struct tar_sparse_file *file)
118 {
119 if (file->optab->done)
120 return file->optab->done (file);
121 return true;
122 }
123
124 static bool
tar_sparse_scan(struct tar_sparse_file * file,enum sparse_scan_state state,void * block)125 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
126 void *block)
127 {
128 if (file->optab->scan_block)
129 return file->optab->scan_block (file, state, block);
130 return true;
131 }
132
133 static bool
tar_sparse_dump_region(struct tar_sparse_file * file,size_t i)134 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
135 {
136 if (file->optab->dump_region)
137 return file->optab->dump_region (file, i);
138 return false;
139 }
140
141 static bool
tar_sparse_extract_region(struct tar_sparse_file * file,size_t i)142 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
143 {
144 if (file->optab->extract_region)
145 return file->optab->extract_region (file, i);
146 return false;
147 }
148
149 static bool
tar_sparse_dump_header(struct tar_sparse_file * file)150 tar_sparse_dump_header (struct tar_sparse_file *file)
151 {
152 if (file->optab->dump_header)
153 return file->optab->dump_header (file);
154 return false;
155 }
156
157 static bool
tar_sparse_decode_header(struct tar_sparse_file * file)158 tar_sparse_decode_header (struct tar_sparse_file *file)
159 {
160 if (file->optab->decode_header)
161 return file->optab->decode_header (file);
162 return true;
163 }
164
165 static bool
tar_sparse_fixup_header(struct tar_sparse_file * file)166 tar_sparse_fixup_header (struct tar_sparse_file *file)
167 {
168 if (file->optab->fixup_header)
169 return file->optab->fixup_header (file);
170 return true;
171 }
172
173
174 static bool
lseek_or_error(struct tar_sparse_file * file,off_t offset)175 lseek_or_error (struct tar_sparse_file *file, off_t offset)
176 {
177 if (file->seekable
178 ? lseek (file->fd, offset, SEEK_SET) < 0
179 : ! dump_zeros (file, offset))
180 {
181 seek_diag_details (file->stat_info->orig_file_name, offset);
182 return false;
183 }
184 return true;
185 }
186
187 /* Takes a blockful of data and basically cruises through it to see if
188 it's made *entirely* of zeros, returning a 0 the instant it finds
189 something that is a nonzero, i.e., useful data. */
190 static bool
zero_block_p(char const * buffer,size_t size)191 zero_block_p (char const *buffer, size_t size)
192 {
193 while (size--)
194 if (*buffer++)
195 return false;
196 return true;
197 }
198
199 static void
sparse_add_map(struct tar_stat_info * st,struct sp_array const * sp)200 sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
201 {
202 struct sp_array *sparse_map = st->sparse_map;
203 size_t avail = st->sparse_map_avail;
204 if (avail == st->sparse_map_size)
205 st->sparse_map = sparse_map =
206 x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
207 sparse_map[avail] = *sp;
208 st->sparse_map_avail = avail + 1;
209 }
210
211 /* Scan the sparse file byte-by-byte and create its map. */
212 static bool
sparse_scan_file_raw(struct tar_sparse_file * file)213 sparse_scan_file_raw (struct tar_sparse_file *file)
214 {
215 struct tar_stat_info *st = file->stat_info;
216 int fd = file->fd;
217 char buffer[BLOCKSIZE];
218 size_t count = 0;
219 off_t offset = 0;
220 struct sp_array sp = {0, 0};
221
222 st->archive_file_size = 0;
223
224 if (!tar_sparse_scan (file, scan_begin, NULL))
225 return false;
226
227 while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
228 && count != SAFE_READ_ERROR)
229 {
230 /* Analyze the block. */
231 if (zero_block_p (buffer, count))
232 {
233 if (sp.numbytes)
234 {
235 sparse_add_map (st, &sp);
236 sp.numbytes = 0;
237 if (!tar_sparse_scan (file, scan_block, NULL))
238 return false;
239 }
240 }
241 else
242 {
243 if (sp.numbytes == 0)
244 sp.offset = offset;
245 sp.numbytes += count;
246 st->archive_file_size += count;
247 if (!tar_sparse_scan (file, scan_block, buffer))
248 return false;
249 }
250
251 offset += count;
252 }
253
254 /* save one more sparse segment of length 0 to indicate that
255 the file ends with a hole */
256 if (sp.numbytes == 0)
257 sp.offset = offset;
258
259 sparse_add_map (st, &sp);
260 st->archive_file_size += count;
261 return tar_sparse_scan (file, scan_end, NULL);
262 }
263
264 static bool
sparse_scan_file_wholesparse(struct tar_sparse_file * file)265 sparse_scan_file_wholesparse (struct tar_sparse_file *file)
266 {
267 struct tar_stat_info *st = file->stat_info;
268 struct sp_array sp = {0, 0};
269
270 /* Note that this function is called only for truly sparse files of size >= 1
271 block size (checked via ST_IS_SPARSE before). See the thread
272 http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
273 if (ST_NBLOCKS (st->stat) == 0)
274 {
275 st->archive_file_size = 0;
276 sp.offset = st->stat.st_size;
277 sparse_add_map (st, &sp);
278 return true;
279 }
280
281 return false;
282 }
283
284 #ifdef SEEK_HOLE
285 /* Try to engage SEEK_HOLE/SEEK_DATA feature. */
286 static bool
sparse_scan_file_seek(struct tar_sparse_file * file)287 sparse_scan_file_seek (struct tar_sparse_file *file)
288 {
289 struct tar_stat_info *st = file->stat_info;
290 int fd = file->fd;
291 struct sp_array sp = {0, 0};
292 off_t offset = 0;
293 off_t data_offset;
294 off_t hole_offset;
295
296 st->archive_file_size = 0;
297
298 for (;;)
299 {
300 /* locate first chunk of data */
301 data_offset = lseek (fd, offset, SEEK_DATA);
302
303 if (data_offset == (off_t)-1)
304 /* ENXIO == EOF; error otherwise */
305 {
306 if (errno == ENXIO)
307 {
308 /* file ends with hole, add one more empty chunk of data */
309 sp.numbytes = 0;
310 sp.offset = st->stat.st_size;
311 sparse_add_map (st, &sp);
312 return true;
313 }
314 return false;
315 }
316
317 hole_offset = lseek (fd, data_offset, SEEK_HOLE);
318
319 /* according to specs, if FS does not fully support
320 SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
321 classic lseek() call. We must detect it here and try to use other
322 hole-detection methods. */
323 if (offset == 0 /* first loop */
324 && data_offset == 0
325 && hole_offset == st->stat.st_size)
326 {
327 lseek (fd, 0, SEEK_SET);
328 return false;
329 }
330
331 sp.offset = data_offset;
332 sp.numbytes = hole_offset - data_offset;
333 sparse_add_map (st, &sp);
334
335 st->archive_file_size += sp.numbytes;
336 offset = hole_offset;
337 }
338
339 return true;
340 }
341 #endif
342
343 static bool
sparse_scan_file(struct tar_sparse_file * file)344 sparse_scan_file (struct tar_sparse_file *file)
345 {
346 /* always check for completely sparse files */
347 if (sparse_scan_file_wholesparse (file))
348 return true;
349
350 switch (hole_detection)
351 {
352 case HOLE_DETECTION_DEFAULT:
353 case HOLE_DETECTION_SEEK:
354 #ifdef SEEK_HOLE
355 if (sparse_scan_file_seek (file))
356 return true;
357 #else
358 if (hole_detection == HOLE_DETECTION_SEEK)
359 WARN((0, 0,
360 _("\"seek\" hole detection is not supported, using \"raw\".")));
361 /* fall back to "raw" for this and all other files */
362 hole_detection = HOLE_DETECTION_RAW;
363 #endif
364 FALLTHROUGH;
365 case HOLE_DETECTION_RAW:
366 if (sparse_scan_file_raw (file))
367 return true;
368 }
369
370 return false;
371 }
372
373 static struct tar_sparse_optab const oldgnu_optab;
374 static struct tar_sparse_optab const star_optab;
375 static struct tar_sparse_optab const pax_optab;
376
377 static bool
sparse_select_optab(struct tar_sparse_file * file)378 sparse_select_optab (struct tar_sparse_file *file)
379 {
380 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
381 {
382 case V7_FORMAT:
383 case USTAR_FORMAT:
384 return false;
385
386 case OLDGNU_FORMAT:
387 case GNU_FORMAT: /*FIXME: This one should disappear? */
388 file->optab = &oldgnu_optab;
389 break;
390
391 case POSIX_FORMAT:
392 file->optab = &pax_optab;
393 break;
394
395 case STAR_FORMAT:
396 file->optab = &star_optab;
397 break;
398
399 default:
400 return false;
401 }
402 return true;
403 }
404
405 static bool
sparse_dump_region(struct tar_sparse_file * file,size_t i)406 sparse_dump_region (struct tar_sparse_file *file, size_t i)
407 {
408 union block *blk;
409 off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
410
411 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
412 return false;
413
414 while (bytes_left > 0)
415 {
416 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
417 size_t bytes_read;
418
419 blk = find_next_block ();
420 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
421 if (bytes_read == SAFE_READ_ERROR)
422 {
423 read_diag_details (file->stat_info->orig_file_name,
424 (file->stat_info->sparse_map[i].offset
425 + file->stat_info->sparse_map[i].numbytes
426 - bytes_left),
427 bufsize);
428 return false;
429 }
430 else if (bytes_read == 0)
431 {
432 char buf[UINTMAX_STRSIZE_BOUND];
433 struct stat st;
434 size_t n;
435 if (fstat (file->fd, &st) == 0)
436 n = file->stat_info->stat.st_size - st.st_size;
437 else
438 n = file->stat_info->stat.st_size
439 - (file->stat_info->sparse_map[i].offset
440 + file->stat_info->sparse_map[i].numbytes
441 - bytes_left);
442
443 WARNOPT (WARN_FILE_SHRANK,
444 (0, 0,
445 ngettext ("%s: File shrank by %s byte; padding with zeros",
446 "%s: File shrank by %s bytes; padding with zeros",
447 n),
448 quotearg_colon (file->stat_info->orig_file_name),
449 STRINGIFY_BIGINT (n, buf)));
450 if (! ignore_failed_read_option)
451 set_exit_status (TAREXIT_DIFFERS);
452 return false;
453 }
454
455 memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
456 bytes_left -= bytes_read;
457 file->dumped_size += bytes_read;
458 set_next_block_after (blk);
459 }
460
461 return true;
462 }
463
464 static bool
sparse_extract_region(struct tar_sparse_file * file,size_t i)465 sparse_extract_region (struct tar_sparse_file *file, size_t i)
466 {
467 off_t write_size;
468
469 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
470 return false;
471
472 write_size = file->stat_info->sparse_map[i].numbytes;
473
474 if (write_size == 0)
475 {
476 /* Last block of the file is a hole */
477 if (file->seekable && sys_truncate (file->fd))
478 truncate_warn (file->stat_info->orig_file_name);
479 }
480 else while (write_size > 0)
481 {
482 size_t count;
483 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
484 union block *blk = find_next_block ();
485 if (!blk)
486 {
487 ERROR ((0, 0, _("Unexpected EOF in archive")));
488 return false;
489 }
490 set_next_block_after (blk);
491 file->dumped_size += BLOCKSIZE;
492 count = blocking_write (file->fd, blk->buffer, wrbytes);
493 write_size -= count;
494 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
495 file->offset += count;
496 if (count != wrbytes)
497 {
498 write_error_details (file->stat_info->orig_file_name,
499 count, wrbytes);
500 return false;
501 }
502 }
503 return true;
504 }
505
506
507
508 /* Interface functions */
509 enum dump_status
sparse_dump_file(int fd,struct tar_stat_info * st)510 sparse_dump_file (int fd, struct tar_stat_info *st)
511 {
512 bool rc;
513 struct tar_sparse_file file;
514
515 if (!tar_sparse_init (&file))
516 return dump_status_not_implemented;
517
518 file.stat_info = st;
519 file.fd = fd;
520 file.seekable = true; /* File *must* be seekable for dump to work */
521
522 rc = sparse_scan_file (&file);
523 if (rc && file.optab->dump_region)
524 {
525 tar_sparse_dump_header (&file);
526
527 if (fd >= 0)
528 {
529 size_t i;
530
531 mv_begin_write (file.stat_info->file_name,
532 file.stat_info->stat.st_size,
533 file.stat_info->archive_file_size - file.dumped_size);
534 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
535 rc = tar_sparse_dump_region (&file, i);
536 }
537 }
538
539 pad_archive (file.stat_info->archive_file_size - file.dumped_size);
540 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
541 }
542
543 bool
sparse_member_p(struct tar_stat_info * st)544 sparse_member_p (struct tar_stat_info *st)
545 {
546 struct tar_sparse_file file;
547
548 if (!tar_sparse_init (&file))
549 return false;
550 file.stat_info = st;
551 return tar_sparse_member_p (&file);
552 }
553
554 bool
sparse_fixup_header(struct tar_stat_info * st)555 sparse_fixup_header (struct tar_stat_info *st)
556 {
557 struct tar_sparse_file file;
558
559 if (!tar_sparse_init (&file))
560 return false;
561 file.stat_info = st;
562 return tar_sparse_fixup_header (&file);
563 }
564
565 enum dump_status
sparse_extract_file(int fd,struct tar_stat_info * st,off_t * size)566 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
567 {
568 bool rc = true;
569 struct tar_sparse_file file;
570 size_t i;
571
572 if (!tar_sparse_init (&file))
573 return dump_status_not_implemented;
574
575 file.stat_info = st;
576 file.fd = fd;
577 file.seekable = lseek (fd, 0, SEEK_SET) == 0;
578 file.offset = 0;
579
580 rc = tar_sparse_decode_header (&file);
581 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
582 rc = tar_sparse_extract_region (&file, i);
583 *size = file.stat_info->archive_file_size - file.dumped_size;
584 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
585 }
586
587 enum dump_status
sparse_skip_file(struct tar_stat_info * st)588 sparse_skip_file (struct tar_stat_info *st)
589 {
590 bool rc = true;
591 struct tar_sparse_file file;
592
593 if (!tar_sparse_init (&file))
594 return dump_status_not_implemented;
595
596 file.stat_info = st;
597 file.fd = -1;
598
599 rc = tar_sparse_decode_header (&file);
600 skip_file (file.stat_info->archive_file_size - file.dumped_size);
601 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
602 }
603
604
605 static bool
check_sparse_region(struct tar_sparse_file * file,off_t beg,off_t end)606 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
607 {
608 if (!lseek_or_error (file, beg))
609 return false;
610
611 while (beg < end)
612 {
613 size_t bytes_read;
614 size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
615 char diff_buffer[BLOCKSIZE];
616
617 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
618 if (bytes_read == SAFE_READ_ERROR)
619 {
620 read_diag_details (file->stat_info->orig_file_name,
621 beg,
622 rdsize);
623 return false;
624 }
625 else if (bytes_read == 0)
626 {
627 report_difference (file->stat_info, _("Size differs"));
628 return false;
629 }
630
631 if (!zero_block_p (diff_buffer, bytes_read))
632 {
633 char begbuf[INT_BUFSIZE_BOUND (off_t)];
634 report_difference (file->stat_info,
635 _("File fragment at %s is not a hole"),
636 offtostr (beg, begbuf));
637 return false;
638 }
639
640 beg += bytes_read;
641 }
642
643 return true;
644 }
645
646 static bool
check_data_region(struct tar_sparse_file * file,size_t i)647 check_data_region (struct tar_sparse_file *file, size_t i)
648 {
649 off_t size_left;
650
651 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
652 return false;
653 size_left = file->stat_info->sparse_map[i].numbytes;
654 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
655
656 while (size_left > 0)
657 {
658 size_t bytes_read;
659 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
660 char diff_buffer[BLOCKSIZE];
661
662 union block *blk = find_next_block ();
663 if (!blk)
664 {
665 ERROR ((0, 0, _("Unexpected EOF in archive")));
666 return false;
667 }
668 set_next_block_after (blk);
669 file->dumped_size += BLOCKSIZE;
670 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
671 if (bytes_read == SAFE_READ_ERROR)
672 {
673 read_diag_details (file->stat_info->orig_file_name,
674 (file->stat_info->sparse_map[i].offset
675 + file->stat_info->sparse_map[i].numbytes
676 - size_left),
677 rdsize);
678 return false;
679 }
680 else if (bytes_read == 0)
681 {
682 report_difference (¤t_stat_info, _("Size differs"));
683 return false;
684 }
685 size_left -= bytes_read;
686 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
687 if (memcmp (blk->buffer, diff_buffer, bytes_read))
688 {
689 report_difference (file->stat_info, _("Contents differ"));
690 return false;
691 }
692 }
693 return true;
694 }
695
696 bool
sparse_diff_file(int fd,struct tar_stat_info * st)697 sparse_diff_file (int fd, struct tar_stat_info *st)
698 {
699 bool rc = true;
700 struct tar_sparse_file file;
701 size_t i;
702 off_t offset = 0;
703
704 if (!tar_sparse_init (&file))
705 return false;
706
707 file.stat_info = st;
708 file.fd = fd;
709 file.seekable = true; /* File *must* be seekable for compare to work */
710
711 rc = tar_sparse_decode_header (&file);
712 mv_begin_read (st);
713 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
714 {
715 rc = check_sparse_region (&file,
716 offset, file.stat_info->sparse_map[i].offset)
717 && check_data_region (&file, i);
718 offset = file.stat_info->sparse_map[i].offset
719 + file.stat_info->sparse_map[i].numbytes;
720 }
721
722 if (!rc)
723 skip_file (file.stat_info->archive_file_size - file.dumped_size);
724 mv_end ();
725
726 tar_sparse_done (&file);
727 return rc;
728 }
729
730
731 /* Old GNU Format. The sparse file information is stored in the
732 oldgnu_header in the following manner:
733
734 The header is marked with type 'S'. Its 'size' field contains
735 the cumulative size of all non-empty blocks of the file. The
736 actual file size is stored in 'realsize' member of oldgnu_header.
737
738 The map of the file is stored in a list of 'struct sparse'.
739 Each struct contains offset to the block of data and its
740 size (both as octal numbers). The first file header contains
741 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
742 contains more structs, then the field 'isextended' of the main
743 header is set to 1 (binary) and the 'struct sparse_header'
744 header follows, containing at most 21 following structs
745 (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
746 field of the extended header is set and next next extension header
747 follows, etc... */
748
749 enum oldgnu_add_status
750 {
751 add_ok,
752 add_finish,
753 add_fail
754 };
755
756 static bool
oldgnu_sparse_member_p(struct tar_sparse_file * file)757 oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
758 {
759 return current_header->header.typeflag == GNUTYPE_SPARSE;
760 }
761
762 /* Add a sparse item to the sparse file and its obstack */
763 static enum oldgnu_add_status
oldgnu_add_sparse(struct tar_sparse_file * file,struct sparse * s)764 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
765 {
766 struct sp_array sp;
767
768 if (s->numbytes[0] == '\0')
769 return add_finish;
770 sp.offset = OFF_FROM_HEADER (s->offset);
771 sp.numbytes = OFF_FROM_HEADER (s->numbytes);
772 if (sp.offset < 0 || sp.numbytes < 0
773 || INT_ADD_OVERFLOW (sp.offset, sp.numbytes)
774 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
775 || file->stat_info->archive_file_size < 0)
776 return add_fail;
777
778 sparse_add_map (file->stat_info, &sp);
779 return add_ok;
780 }
781
782 static bool
oldgnu_fixup_header(struct tar_sparse_file * file)783 oldgnu_fixup_header (struct tar_sparse_file *file)
784 {
785 /* NOTE! st_size was initialized from the header
786 which actually contains archived size. The following fixes it */
787 off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
788 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
789 file->stat_info->stat.st_size = max (0, realsize);
790 return 0 <= realsize;
791 }
792
793 /* Convert old GNU format sparse data to internal representation */
794 static bool
oldgnu_get_sparse_info(struct tar_sparse_file * file)795 oldgnu_get_sparse_info (struct tar_sparse_file *file)
796 {
797 size_t i;
798 union block *h = current_header;
799 int ext_p;
800 enum oldgnu_add_status rc;
801
802 file->stat_info->sparse_map_avail = 0;
803 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
804 {
805 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
806 if (rc != add_ok)
807 break;
808 }
809
810 for (ext_p = h->oldgnu_header.isextended;
811 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
812 {
813 h = find_next_block ();
814 if (!h)
815 {
816 ERROR ((0, 0, _("Unexpected EOF in archive")));
817 return false;
818 }
819 set_next_block_after (h);
820 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
821 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
822 }
823
824 if (rc == add_fail)
825 {
826 ERROR ((0, 0, _("%s: invalid sparse archive member"),
827 file->stat_info->orig_file_name));
828 return false;
829 }
830 return true;
831 }
832
833 static void
oldgnu_store_sparse_info(struct tar_sparse_file * file,size_t * pindex,struct sparse * sp,size_t sparse_size)834 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
835 struct sparse *sp, size_t sparse_size)
836 {
837 for (; *pindex < file->stat_info->sparse_map_avail
838 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
839 {
840 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
841 sp->offset);
842 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
843 sp->numbytes);
844 }
845 }
846
847 static bool
oldgnu_dump_header(struct tar_sparse_file * file)848 oldgnu_dump_header (struct tar_sparse_file *file)
849 {
850 off_t block_ordinal = current_block_ordinal ();
851 union block *blk;
852 size_t i;
853
854 blk = start_header (file->stat_info);
855 blk->header.typeflag = GNUTYPE_SPARSE;
856 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
857 blk->oldgnu_header.isextended = 1;
858
859 /* Store the real file size */
860 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
861 /* Store the effective (shrunken) file size */
862 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
863
864 i = 0;
865 oldgnu_store_sparse_info (file, &i,
866 blk->oldgnu_header.sp,
867 SPARSES_IN_OLDGNU_HEADER);
868 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
869 finish_header (file->stat_info, blk, block_ordinal);
870
871 while (i < file->stat_info->sparse_map_avail)
872 {
873 blk = find_next_block ();
874 memset (blk->buffer, 0, BLOCKSIZE);
875 oldgnu_store_sparse_info (file, &i,
876 blk->sparse_header.sp,
877 SPARSES_IN_SPARSE_HEADER);
878 if (i < file->stat_info->sparse_map_avail)
879 blk->sparse_header.isextended = 1;
880 set_next_block_after (blk);
881 }
882 return true;
883 }
884
885 static struct tar_sparse_optab const oldgnu_optab = {
886 NULL, /* No init function */
887 NULL, /* No done function */
888 oldgnu_sparse_member_p,
889 oldgnu_dump_header,
890 oldgnu_fixup_header,
891 oldgnu_get_sparse_info,
892 NULL, /* No scan_block function */
893 sparse_dump_region,
894 sparse_extract_region,
895 };
896
897
898 /* Star */
899
900 static bool
star_sparse_member_p(struct tar_sparse_file * file)901 star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
902 {
903 return current_header->header.typeflag == GNUTYPE_SPARSE;
904 }
905
906 static bool
star_fixup_header(struct tar_sparse_file * file)907 star_fixup_header (struct tar_sparse_file *file)
908 {
909 /* NOTE! st_size was initialized from the header
910 which actually contains archived size. The following fixes it */
911 off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
912 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
913 file->stat_info->stat.st_size = max (0, realsize);
914 return 0 <= realsize;
915 }
916
917 /* Convert STAR format sparse data to internal representation */
918 static bool
star_get_sparse_info(struct tar_sparse_file * file)919 star_get_sparse_info (struct tar_sparse_file *file)
920 {
921 size_t i;
922 union block *h = current_header;
923 int ext_p;
924 enum oldgnu_add_status rc = add_ok;
925
926 file->stat_info->sparse_map_avail = 0;
927
928 if (h->star_in_header.prefix[0] == '\0'
929 && h->star_in_header.sp[0].offset[10] != '\0')
930 {
931 /* Old star format */
932 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
933 {
934 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
935 if (rc != add_ok)
936 break;
937 }
938 ext_p = h->star_in_header.isextended;
939 }
940 else
941 ext_p = 1;
942
943 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
944 {
945 h = find_next_block ();
946 if (!h)
947 {
948 ERROR ((0, 0, _("Unexpected EOF in archive")));
949 return false;
950 }
951 set_next_block_after (h);
952 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
953 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
954 file->dumped_size += BLOCKSIZE;
955 }
956
957 if (rc == add_fail)
958 {
959 ERROR ((0, 0, _("%s: invalid sparse archive member"),
960 file->stat_info->orig_file_name));
961 return false;
962 }
963 return true;
964 }
965
966
967 static struct tar_sparse_optab const star_optab = {
968 NULL, /* No init function */
969 NULL, /* No done function */
970 star_sparse_member_p,
971 NULL,
972 star_fixup_header,
973 star_get_sparse_info,
974 NULL, /* No scan_block function */
975 NULL, /* No dump region function */
976 sparse_extract_region,
977 };
978
979
980 /* GNU PAX sparse file format. There are several versions:
981
982 * 0.0
983
984 The initial version of sparse format used by tar 1.14-1.15.1.
985 The sparse file map is stored in x header:
986
987 GNU.sparse.size Real size of the stored file
988 GNU.sparse.numblocks Number of blocks in the sparse map
989 repeat numblocks time
990 GNU.sparse.offset Offset of the next data block
991 GNU.sparse.numbytes Size of the next data block
992 end repeat
993
994 This has been reported as conflicting with the POSIX specs. The reason is
995 that offsets and sizes of non-zero data blocks were stored in multiple
996 instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
997 POSIX requires the latest occurrence of the variable to override all
998 previous occurrences.
999
1000 To avoid this incompatibility two following versions were introduced.
1001
1002 * 0.1
1003
1004 Used by tar 1.15.2 -- 1.15.91 (alpha releases).
1005
1006 The sparse file map is stored in
1007 x header:
1008
1009 GNU.sparse.size Real size of the stored file
1010 GNU.sparse.numblocks Number of blocks in the sparse map
1011 GNU.sparse.map Map of non-null data chunks. A string consisting
1012 of comma-separated values "offset,size[,offset,size]..."
1013
1014 The resulting GNU.sparse.map string can be *very* long. While POSIX does not
1015 impose any limit on the length of a x header variable, this can confuse some
1016 tars.
1017
1018 * 1.0
1019
1020 Starting from this version, the exact sparse format version is specified
1021 explicitely in the header using the following variables:
1022
1023 GNU.sparse.major Major version
1024 GNU.sparse.minor Minor version
1025
1026 X header keeps the following variables:
1027
1028 GNU.sparse.name Real file name of the sparse file
1029 GNU.sparse.realsize Real size of the stored file (corresponds to the old
1030 GNU.sparse.size variable)
1031
1032 The name field of the ustar header is constructed using the pattern
1033 "%d/GNUSparseFile.%p/%f".
1034
1035 The sparse map itself is stored in the file data block, preceding the actual
1036 file data. It consists of a series of octal numbers of arbitrary length,
1037 delimited by newlines. The map is padded with nulls to the nearest block
1038 boundary.
1039
1040 The first number gives the number of entries in the map. Following are map
1041 entries, each one consisting of two numbers giving the offset and size of
1042 the data block it describes.
1043
1044 The format is designed in such a way that non-posix aware tars and tars not
1045 supporting GNU.sparse.* keywords will extract each sparse file in its
1046 condensed form with the file map attached and will place it into a separate
1047 directory. Then, using a simple program it would be possible to expand the
1048 file to its original form even without GNU tar.
1049
1050 Bu default, v.1.0 archives are created. To use other formats,
1051 --sparse-version option is provided. Additionally, v.0.0 can be obtained
1052 by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
1053 --pax-option delete=GNU.sparse.map
1054 */
1055
1056 static bool
pax_sparse_member_p(struct tar_sparse_file * file)1057 pax_sparse_member_p (struct tar_sparse_file *file)
1058 {
1059 return file->stat_info->sparse_map_avail > 0
1060 || file->stat_info->sparse_major > 0;
1061 }
1062
1063 /* Start a header that uses the effective (shrunken) file size. */
1064 static union block *
pax_start_header(struct tar_stat_info * st)1065 pax_start_header (struct tar_stat_info *st)
1066 {
1067 off_t realsize = st->stat.st_size;
1068 union block *blk;
1069 st->stat.st_size = st->archive_file_size;
1070 blk = start_header (st);
1071 st->stat.st_size = realsize;
1072 return blk;
1073 }
1074
1075 static bool
pax_dump_header_0(struct tar_sparse_file * file)1076 pax_dump_header_0 (struct tar_sparse_file *file)
1077 {
1078 off_t block_ordinal = current_block_ordinal ();
1079 union block *blk;
1080 size_t i;
1081 char nbuf[UINTMAX_STRSIZE_BOUND];
1082 struct sp_array *map = file->stat_info->sparse_map;
1083 char *save_file_name = NULL;
1084
1085 /* Store the real file size */
1086 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
1087 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
1088
1089 if (xheader_keyword_deleted_p ("GNU.sparse.map")
1090 || tar_sparse_minor == 0)
1091 {
1092 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1093 {
1094 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
1095 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
1096 }
1097 }
1098 else
1099 {
1100 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1101 save_file_name = file->stat_info->file_name;
1102 file->stat_info->file_name = xheader_format_name (file->stat_info,
1103 "%d/GNUSparseFile.%p/%f", 0);
1104
1105 xheader_string_begin (&file->stat_info->xhdr);
1106 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1107 {
1108 if (i)
1109 xheader_string_add (&file->stat_info->xhdr, ",");
1110 xheader_string_add (&file->stat_info->xhdr,
1111 umaxtostr (map[i].offset, nbuf));
1112 xheader_string_add (&file->stat_info->xhdr, ",");
1113 xheader_string_add (&file->stat_info->xhdr,
1114 umaxtostr (map[i].numbytes, nbuf));
1115 }
1116 if (!xheader_string_end (&file->stat_info->xhdr,
1117 "GNU.sparse.map"))
1118 {
1119 free (file->stat_info->file_name);
1120 file->stat_info->file_name = save_file_name;
1121 return false;
1122 }
1123 }
1124 blk = pax_start_header (file->stat_info);
1125 finish_header (file->stat_info, blk, block_ordinal);
1126 if (save_file_name)
1127 {
1128 free (file->stat_info->file_name);
1129 file->stat_info->file_name = save_file_name;
1130 }
1131 return true;
1132 }
1133
1134 static bool
pax_dump_header_1(struct tar_sparse_file * file)1135 pax_dump_header_1 (struct tar_sparse_file *file)
1136 {
1137 off_t block_ordinal = current_block_ordinal ();
1138 union block *blk;
1139 char *p, *q;
1140 size_t i;
1141 char nbuf[UINTMAX_STRSIZE_BOUND];
1142 off_t size = 0;
1143 struct sp_array *map = file->stat_info->sparse_map;
1144 char *save_file_name = file->stat_info->file_name;
1145
1146 #define COPY_STRING(b,dst,src) do \
1147 { \
1148 char *endp = b->buffer + BLOCKSIZE; \
1149 char const *srcp = src; \
1150 while (*srcp) \
1151 { \
1152 if (dst == endp) \
1153 { \
1154 set_next_block_after (b); \
1155 b = find_next_block (); \
1156 dst = b->buffer; \
1157 endp = b->buffer + BLOCKSIZE; \
1158 } \
1159 *dst++ = *srcp++; \
1160 } \
1161 } while (0)
1162
1163 /* Compute stored file size */
1164 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1165 size += strlen (p) + 1;
1166 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1167 {
1168 p = umaxtostr (map[i].offset, nbuf);
1169 size += strlen (p) + 1;
1170 p = umaxtostr (map[i].numbytes, nbuf);
1171 size += strlen (p) + 1;
1172 }
1173 size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1174 file->stat_info->archive_file_size += size * BLOCKSIZE;
1175 file->dumped_size += size * BLOCKSIZE;
1176
1177 /* Store sparse file identification */
1178 xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1179 xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1180 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1181 xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1182
1183 file->stat_info->file_name =
1184 xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0);
1185 /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */
1186 if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE)
1187 file->stat_info->file_name[NAME_FIELD_SIZE] = 0;
1188
1189 blk = pax_start_header (file->stat_info);
1190 finish_header (file->stat_info, blk, block_ordinal);
1191 free (file->stat_info->file_name);
1192 file->stat_info->file_name = save_file_name;
1193
1194 blk = find_next_block ();
1195 q = blk->buffer;
1196 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1197 COPY_STRING (blk, q, p);
1198 COPY_STRING (blk, q, "\n");
1199 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1200 {
1201 p = umaxtostr (map[i].offset, nbuf);
1202 COPY_STRING (blk, q, p);
1203 COPY_STRING (blk, q, "\n");
1204 p = umaxtostr (map[i].numbytes, nbuf);
1205 COPY_STRING (blk, q, p);
1206 COPY_STRING (blk, q, "\n");
1207 }
1208 memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1209 set_next_block_after (blk);
1210 return true;
1211 }
1212
1213 static bool
pax_dump_header(struct tar_sparse_file * file)1214 pax_dump_header (struct tar_sparse_file *file)
1215 {
1216 file->stat_info->sparse_major = tar_sparse_major;
1217 file->stat_info->sparse_minor = tar_sparse_minor;
1218
1219 return (file->stat_info->sparse_major == 0) ?
1220 pax_dump_header_0 (file) : pax_dump_header_1 (file);
1221 }
1222
1223 static bool
decode_num(uintmax_t * num,char const * arg,uintmax_t maxval)1224 decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1225 {
1226 uintmax_t u;
1227 char *arg_lim;
1228
1229 if (!ISDIGIT (*arg))
1230 return false;
1231
1232 errno = 0;
1233 u = strtoumax (arg, &arg_lim, 10);
1234
1235 if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1236 return false;
1237
1238 *num = u;
1239 return true;
1240 }
1241
1242 static bool
pax_decode_header(struct tar_sparse_file * file)1243 pax_decode_header (struct tar_sparse_file *file)
1244 {
1245 if (file->stat_info->sparse_major > 0)
1246 {
1247 uintmax_t u;
1248 char nbuf[UINTMAX_STRSIZE_BOUND];
1249 union block *blk;
1250 char *p;
1251 size_t i;
1252 off_t start;
1253
1254 #define COPY_BUF(b,buf,src) do \
1255 { \
1256 char *endp = b->buffer + BLOCKSIZE; \
1257 char *dst = buf; \
1258 do \
1259 { \
1260 if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \
1261 { \
1262 ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1263 file->stat_info->orig_file_name)); \
1264 return false; \
1265 } \
1266 if (src == endp) \
1267 { \
1268 set_next_block_after (b); \
1269 b = find_next_block (); \
1270 if (!b) \
1271 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive"))); \
1272 src = b->buffer; \
1273 endp = b->buffer + BLOCKSIZE; \
1274 } \
1275 *dst = *src++; \
1276 } \
1277 while (*dst++ != '\n'); \
1278 dst[-1] = 0; \
1279 } while (0)
1280
1281 start = current_block_ordinal ();
1282 set_next_block_after (current_header);
1283 blk = find_next_block ();
1284 if (!blk)
1285 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive")));
1286 p = blk->buffer;
1287 COPY_BUF (blk,nbuf,p);
1288 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1289 {
1290 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1291 file->stat_info->orig_file_name));
1292 return false;
1293 }
1294 file->stat_info->sparse_map_size = u;
1295 file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1296 sizeof (*file->stat_info->sparse_map));
1297 file->stat_info->sparse_map_avail = 0;
1298 for (i = 0; i < file->stat_info->sparse_map_size; i++)
1299 {
1300 struct sp_array sp;
1301
1302 COPY_BUF (blk,nbuf,p);
1303 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1304 {
1305 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1306 file->stat_info->orig_file_name));
1307 return false;
1308 }
1309 sp.offset = u;
1310 COPY_BUF (blk,nbuf,p);
1311 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1312 {
1313 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1314 file->stat_info->orig_file_name));
1315 return false;
1316 }
1317 sp.numbytes = u;
1318 sparse_add_map (file->stat_info, &sp);
1319 }
1320 set_next_block_after (blk);
1321
1322 file->dumped_size += BLOCKSIZE * (current_block_ordinal () - start);
1323 }
1324
1325 return true;
1326 }
1327
1328 static struct tar_sparse_optab const pax_optab = {
1329 NULL, /* No init function */
1330 NULL, /* No done function */
1331 pax_sparse_member_p,
1332 pax_dump_header,
1333 NULL,
1334 pax_decode_header,
1335 NULL, /* No scan_block function */
1336 sparse_dump_region,
1337 sparse_extract_region,
1338 };
1339