1#!/usr/bin/perl
2#============================================================= -*-perl-*-
3#
4# BackupPC_tarPCCopy: create a tar archive of the PC directory
5# for copying the entire PC data directory.  The archive will
6# contain hardlinks to the pool directory, which should be copied
7# before BackupPC_tarPCCopy is run.
8#
9# See the documentation for use.
10#
11# DESCRIPTION
12#
13#   Usage: BackupPC_tarPCCopy [options] files/directories...
14#
15#   Flags:
16#       -c      don't cache inode data (reduces memory usage at the
17#                                       expense of longer run time)
18#
19# AUTHOR
20#   Craig Barratt  <cbarratt@users.sourceforge.net>
21#
22# COPYRIGHT
23#   Copyright (C) 2005-2017  Craig Barratt
24#
25#   This program is free software; you can redistribute it and/or modify
26#   it under the terms of the GNU General Public License as published by
27#   the Free Software Foundation; either version 2 of the License, or
28#   (at your option) any later version.
29#
30#   This program is distributed in the hope that it will be useful,
31#   but WITHOUT ANY WARRANTY; without even the implied warranty of
32#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33#   GNU General Public License for more details.
34#
35#   You should have received a copy of the GNU General Public License
36#   along with this program; if not, write to the Free Software
37#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
38#
39#========================================================================
40#
41# Version 3.3.2, released 25 Jan 2017.
42#
43# See http://backuppc.sourceforge.net.
44#
45#========================================================================
46
47use strict;
48no  utf8;
49use lib "__INSTALLDIR__/lib";
50use File::Find;
51use File::Path;
52use Getopt::Std;
53
54use BackupPC::Lib;
55use BackupPC::Attrib qw(:all);
56use BackupPC::FileZIO;
57use BackupPC::View;
58
59use constant S_IFMT       => 0170000;   # type of file
60
61die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
62my $TopDir = $bpc->TopDir();
63my $BinDir = $bpc->BinDir();
64my %Conf   = $bpc->Conf();
65
66my %opts;
67
68if ( !getopts("c", \%opts) || @ARGV < 1 ) {
69    print STDERR <<EOF;
70usage: $0 [options] files/directories...
71  Options:
72     -c      don't cache inode data (reduces memory usage at the
73                                     expense of longer run time)
74EOF
75    exit(1);
76}
77
78#
79# This constant and the line of code below that uses it are borrowed
80# from Archive::Tar.  Thanks to Calle Dybedahl and Stephen Zander.
81# See www.cpan.org.
82#
83# Archive::Tar is Copyright 1997 Calle Dybedahl. All rights reserved.
84#                 Copyright 1998 Stephen Zander. All rights reserved.
85#
86my $tar_pack_header
87    = 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12';
88my $tar_header_length = 512;
89
90my $BufSize    = 1048576;               # 1MB or 2^20
91my $WriteBuf   = "";
92my $WriteBufSz = ($opts{b} || 20) * $tar_header_length;
93
94my(%UidCache, %GidCache);
95
96my($ClientName, $ClientBackups, $ClientBkupNum, $ClientDirAttr, $ClientDir);
97
98my $FileCnt    = 0;
99my $HLinkCnt   = 0;
100my $ByteCnt    = 0;
101my $DirCnt     = 0;
102my $ErrorCnt   = 0;
103my $ClientBkupCompress = 1;
104my $ClientBkupMangle   = 1;
105
106my %Inode2Path;
107
108#
109# Write out all the requested files/directories
110#
111binmode(STDOUT);
112my $fh = *STDOUT;
113
114my $argCnt = 1;
115my $argMax = @ARGV;
116
117while ( @ARGV ) {
118    my $path = shift(@ARGV);
119
120    if ( $path !~ m{^\Q$TopDir/\E} ) {
121        print STDERR "Argument $path must be an absolute path starting with $TopDir\n";
122        exit(1);
123    }
124    if ( !-d $path ) {
125        print STDERR "Argument $path does not exist\n";
126        exit(1);
127    }
128
129    find({wanted => sub { archiveFile($fh) } }, $path);
130
131    #
132    # To avoid using too much memory for the inode cache,
133    # remove it after each top-level directory is done.
134    #
135    %Inode2Path = ();
136
137    #
138    # Print some stats
139    #
140    print STDERR "Done $path ($argCnt of $argMax): $DirCnt dirs,"
141               . " $FileCnt files, $HLinkCnt hardlinks\n";
142
143    $FileCnt    = 0;
144    $HLinkCnt   = 0;
145    $ByteCnt    = 0;
146    $DirCnt     = 0;
147
148    $argCnt++;
149}
150
151#
152# Finish with two null 512 byte headers, and then round out a full
153# block.
154#
155my $data = "\0" x ($tar_header_length * 2);
156TarWrite($fh, \$data);
157TarWrite($fh, undef);
158
159if ( $ErrorCnt ) {
160    #
161    # Got errors so exit with a non-zero status
162    #
163    print STDERR "Got $ErrorCnt warnings/errors\n";
164    exit(1);
165}
166exit(0);
167
168###########################################################################
169# Subroutines
170###########################################################################
171
172sub archiveFile
173{
174    my($fh) = @_;
175    my($hdr);
176
177    my @s = stat($_);
178
179    #
180    # Default type - we'll update later if it is a symlink, hardlink etc
181    #
182    $hdr->{type}     = -d _ ? BPC_FTYPE_DIR
183                     : -f _ ? BPC_FTYPE_FILE
184                     : -1;
185    $hdr->{fullPath} = $File::Find::name;
186    $hdr->{inode}    = $s[1];
187    $hdr->{nlink}    = $s[3];
188    $hdr->{size}     = $s[7];
189    $hdr->{devmajor} = $s[6] >> 8;
190    $hdr->{devminor} = $s[6] & 0xff;
191    $hdr->{uid}      = $s[4];
192    $hdr->{gid}      = $s[5];
193    $hdr->{mode}     = $s[2];
194    $hdr->{mtime}    = $s[9];
195    $hdr->{compress} = 1;
196
197    if ( $hdr->{fullPath} !~ m{\Q$TopDir\E/pc/(.*)} ) {
198        print STDERR "Can't extract TopDir ($TopDir) from"
199                   . " $hdr->{fullPath}\n";
200        $ErrorCnt++;
201        return;
202    }
203    $hdr->{relPath}  = $1;
204    if ( $hdr->{relPath} =~ m{(.*)/(.*)} ) {
205        $hdr->{name} = $2;
206    } else {
207        $hdr->{name} = $hdr->{relPath};
208    }
209
210    if ( $hdr->{relPath} =~ m{(.*?)/} ) {
211        my $clientName = $1;
212        if ( $ClientName ne $clientName ) {
213            $ClientName    = $clientName;
214            $ClientBackups = [ $bpc->BackupInfoRead($ClientName) ];
215            #print STDERR "Setting Client to $ClientName\n";
216        }
217        if ( $hdr->{relPath} =~ m{(.*?)/(\d+)/}
218                 || $hdr->{relPath} =~ m{(.*?)/(\d+)$} ) {
219            my $backupNum = $2;
220            if ( $ClientBkupNum != $backupNum ) {
221                my $i;
222                $ClientBkupNum = $backupNum;
223                # print STDERR "Setting ClientBkupNum to $ClientBkupNum\n";
224                for ( $i = 0 ; $i < @$ClientBackups ; $i++ ) {
225                    if ( $ClientBackups->[$i]{num} == $ClientBkupNum ) {
226                        $ClientBkupCompress = $ClientBackups->[$i]{compress};
227                        $ClientBkupMangle   = $ClientBackups->[$i]{mangle};
228                        # print STDERR "Setting $ClientBkupNum compress to $ClientBkupCompress, mangle to $ClientBkupMangle\n";
229                        last;
230                    }
231                }
232            }
233            $hdr->{compress} = $ClientBkupCompress;
234            if ( $hdr->{type} == BPC_FTYPE_FILE && $hdr->{name} =~ /^f/ ) {
235                (my $dir = $hdr->{fullPath}) =~ s{(.*)/.*}{$1};
236                if ( $ClientDir ne $dir ) {
237                    $ClientDir = $dir;
238                    $ClientDirAttr = BackupPC::Attrib->new(
239                                          { compress => $ClientBkupCompress }
240                                     );
241                    if ( -f $ClientDirAttr->fileName($dir)
242                                && !$ClientDirAttr->read($dir) ) {
243                        print STDERR "Can't read attrib file in $dir\n";
244                        $ErrorCnt++;
245                    }
246                }
247                my $name = $hdr->{name};
248                $name = $bpc->fileNameUnmangle($name) if ( $ClientBkupMangle );
249                my $attr = $ClientDirAttr->get($name);
250                if ( defined($attr) ) {
251                    $hdr->{type}     = $attr->{type};
252                    $hdr->{realSize} = $attr->{size}
253                                if ( $attr->{type} == BPC_FTYPE_FILE );
254                }
255                #print STDERR "$hdr->{fullPath} has type $hdr->{type} and real size $hdr->{realSize}\n";
256            }
257        }
258    } else {
259        $hdr->{compress} = 0;
260        $hdr->{realSize} = $hdr->{size};
261    }
262
263    #print STDERR "$File::Find::name\n";
264
265    TarWriteFile($hdr, $fh);
266}
267
268sub UidLookup
269{
270    my($uid) = @_;
271
272    $UidCache{$uid} = (getpwuid($uid))[0] if ( !exists($UidCache{$uid}) );
273    return $UidCache{$uid};
274}
275
276sub GidLookup
277{
278    my($gid) = @_;
279
280    $GidCache{$gid} = (getgrgid($gid))[0] if ( !exists($GidCache{$gid}) );
281    return $GidCache{$gid};
282}
283
284sub TarWrite
285{
286    my($fh, $dataRef) = @_;
287
288    if ( !defined($dataRef) ) {
289        #
290        # do flush by padding to a full $WriteBufSz
291        #
292        my $data = "\0" x ($WriteBufSz - length($WriteBuf));
293        $dataRef = \$data;
294    }
295    if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) {
296        #
297        # just buffer and return
298        #
299        $WriteBuf .= $$dataRef;
300        return;
301    }
302    my $done = $WriteBufSz - length($WriteBuf);
303    if ( (my $n = syswrite($fh, $WriteBuf . substr($$dataRef, 0, $done)))
304                                != $WriteBufSz ) {
305        print(STDERR "Unable to write to output file ($!) ($n vs $WriteBufSz)\n");
306        exit(1);
307    }
308    while ( $done + $WriteBufSz <= length($$dataRef) ) {
309        if ( (my $n = syswrite($fh, substr($$dataRef, $done, $WriteBufSz)))
310                            != $WriteBufSz ) {
311            print(STDERR "Unable to write to output file ($!) ($n v $WriteBufSz)\n");
312            exit(1);
313        }
314        $done += $WriteBufSz;
315    }
316    $WriteBuf = substr($$dataRef, $done);
317}
318
319sub TarWritePad
320{
321    my($fh, $size) = @_;
322
323    if ( $size % $tar_header_length ) {
324        my $data = "\0" x ($tar_header_length - ($size % $tar_header_length));
325        TarWrite($fh, \$data);
326    }
327}
328
329sub TarWriteHeader
330{
331    my($fh, $hdr) = @_;
332
333    $hdr->{uname} = UidLookup($hdr->{uid}) if ( !defined($hdr->{uname}) );
334    $hdr->{gname} = GidLookup($hdr->{gid}) if ( !defined($hdr->{gname}) );
335    my $devmajor = defined($hdr->{devmajor}) ? sprintf("%07o", $hdr->{devmajor})
336                                             : "";
337    my $devminor = defined($hdr->{devminor}) ? sprintf("%07o", $hdr->{devminor})
338                                             : "";
339    my $sizeStr;
340    if ( $hdr->{size} >= 2 * 65536 * 65536 ) {
341	#
342	# GNU extension for files >= 8GB: send size in big-endian binary
343	#
344	$sizeStr = pack("c4 N N", 0x80, 0, 0, 0,
345				  $hdr->{size} / (65536 * 65536),
346				  $hdr->{size} % (65536 * 65536));
347    } elsif ( $hdr->{size} >= 1 * 65536 * 65536 ) {
348	#
349	# sprintf octal only handles up to 2^32 - 1
350	#
351	$sizeStr = sprintf("%03o", $hdr->{size} / (1 << 24))
352		 . sprintf("%08o", $hdr->{size} % (1 << 24));
353    } else {
354	$sizeStr = sprintf("%011o", $hdr->{size});
355    }
356    my $data = pack($tar_pack_header,
357                     substr($hdr->{name}, 0, 99),
358                     sprintf("%07o", $hdr->{mode}),
359                     sprintf("%07o", $hdr->{uid}),
360                     sprintf("%07o", $hdr->{gid}),
361                     $sizeStr,
362                     sprintf("%011o", $hdr->{mtime}),
363                     "",        #checksum field - space padded by pack("A8")
364                     $hdr->{type},
365                     substr($hdr->{linkname}, 0, 99),
366                     $hdr->{magic} || 'ustar ',
367                     $hdr->{version} || ' ',
368                     $hdr->{uname},
369                     $hdr->{gname},
370                     $devmajor,
371                     $devminor,
372                     ""         # prefix is empty
373                 );
374    substr($data, 148, 7) = sprintf("%06o\0", unpack("%16C*",$data));
375    TarWrite($fh, \$data);
376}
377
378sub TarWriteFileInfo
379{
380    my($fh, $hdr) = @_;
381
382    #
383    # Handle long link names (symbolic links)
384    #
385    if ( length($hdr->{linkname}) > 99 ) {
386        my %h;
387        my $data = $hdr->{linkname} . "\0";
388        $h{name} = "././\@LongLink";
389        $h{type} = "K";
390        $h{size} = length($data);
391        TarWriteHeader($fh, \%h);
392        TarWrite($fh, \$data);
393        TarWritePad($fh, length($data));
394    }
395    #
396    # Handle long file names
397    #
398    if ( length($hdr->{name}) > 99 ) {
399        my %h;
400        my $data = $hdr->{name} . "\0";
401        $h{name} = "././\@LongLink";
402        $h{type} = "L";
403        $h{size} = length($data);
404        TarWriteHeader($fh, \%h);
405        TarWrite($fh, \$data);
406        TarWritePad($fh, length($data));
407    }
408    TarWriteHeader($fh, $hdr);
409}
410
411my $Attr;
412my $AttrDir;
413
414sub TarWriteFile
415{
416    my($hdr, $fh) = @_;
417
418    my $tarPath = $hdr->{relPath};
419
420    $tarPath =~ s{//+}{/}g;
421    $tarPath = "./" . $tarPath if ( $tarPath !~ /^\.\// );
422    $tarPath =~ s{//+}{/}g;
423    $hdr->{name} = $tarPath;
424
425    if ( $hdr->{type} == BPC_FTYPE_DIR ) {
426        #
427        # Directory: just write the header
428        #
429        $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} );
430        TarWriteFileInfo($fh, $hdr);
431	$DirCnt++;
432    } elsif ( $hdr->{type} == BPC_FTYPE_FILE
433            || $hdr->{type} == BPC_FTYPE_HARDLINK
434            || $hdr->{type} == BPC_FTYPE_SYMLINK
435            || $hdr->{type} == BPC_FTYPE_CHARDEV
436            || $hdr->{type} == BPC_FTYPE_BLOCKDEV
437            || $hdr->{type} == BPC_FTYPE_FIFO
438            || $hdr->{type} == BPC_FTYPE_SOCKET ) {
439        #
440        # Underlying file is a regular file: write the header and file
441        #
442        my($data, $dataMD5, $size, $linkName);
443
444        if ( defined($Inode2Path{$hdr->{inode}}) ) {
445            $linkName = $Inode2Path{$hdr->{inode}};
446            #print STDERR "Got cache hit for $linkName\n";
447        } else {
448            my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0,
449                                            $hdr->{compress});
450            if ( !defined($f) ) {
451                print(STDERR "Unable to open file $hdr->{fullPath}\n");
452                $ErrorCnt++;
453                return;
454            }
455            #
456            # Try to find the hardlink it points to by computing
457            # the pool file digest.
458            #
459            $f->read(\$dataMD5, $BufSize);
460            if ( !defined($hdr->{realSize}) ) {
461                #
462                # Need to get the real size
463                #
464                $size = length($dataMD5);
465                while ( $f->read(\$data, $BufSize) > 0 ) {
466                    $size += length($data);
467                }
468                $hdr->{realSize} = $size;
469            }
470            $f->close();
471            my $md5 = Digest::MD5->new;
472            my $len = length($dataMD5);
473            if ( $hdr->{realSize} < 1048576
474                        && length($dataMD5) != $hdr->{realSize} ) {
475                print(STDERR "File $hdr->{fullPath} has bad size"
476                            . " (expect $hdr->{realSize}, got $len)\n");
477            } else {
478                my $digest = $bpc->Buffer2MD5($md5, $hdr->{realSize},
479                                              \$dataMD5);
480                my $path = $bpc->MD52Path($digest, $hdr->{compress});
481                my $i = -1;
482
483                # print(STDERR "Looking up $hdr->{fullPath} at $path\n");
484                while ( 1 ) {
485                    my $testPath = $path;
486                    $testPath .= "_$i" if ( $i >= 0 );
487                    last if ( !-f $testPath );
488                    my $inode = (stat(_))[1];
489                    if ( $inode == $hdr->{inode} ) {
490                        #
491                        # Found it!  Just emit a tar hardlink
492                        #
493                        $testPath =~ s{\Q$TopDir\E}{..};
494                        $linkName = $testPath;
495                        last;
496                    }
497                    $i++;
498                }
499            }
500        }
501        if ( defined($linkName) ) {
502            $hdr->{type}     = BPC_FTYPE_HARDLINK;
503            $hdr->{linkname} = $linkName;
504            TarWriteFileInfo($fh, $hdr);
505            $HLinkCnt++;
506            #print STDERR "$hdr->{relPath} matches $testPath\n";
507            if ( !$opts{c} && $hdr->{nlink} > 2 ) {
508                #
509                # add it to the cache if there are more
510                # than 2 links (pool + current file),
511                # since there are more to go
512                #
513                $Inode2Path{$hdr->{inode}} = $linkName;
514            }
515            return;
516        }
517        $size = 0;
518        if ( $hdr->{nlink} > 1 ) {
519            print STDERR "Can't find $hdr->{relPath} in pool, will copy file\n";
520            $ErrorCnt++;
521        }
522        $hdr->{type} = BPC_FTYPE_FILE;
523
524        my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, 0);
525        if ( !defined($f) ) {
526            print(STDERR "Unable to open file $hdr->{fullPath}\n");
527            $ErrorCnt++;
528	    return;
529        }
530        TarWriteFileInfo($fh, $hdr);
531        while ( $f->read(\$data, $BufSize) > 0 ) {
532            if ( $size + length($data) > $hdr->{size} ) {
533                print(STDERR "Error: truncating $hdr->{fullPath} to"
534                           . " $hdr->{size} bytes\n");
535                $data = substr($data, 0, $hdr->{size} - $size);
536                $ErrorCnt++;
537            }
538            TarWrite($fh, \$data);
539            $size += length($data);
540        }
541        $f->close;
542        if ( $size != $hdr->{size} ) {
543            print(STDERR "Error: padding $hdr->{fullPath} to $hdr->{size}"
544                       . " bytes from $size bytes\n");
545            $ErrorCnt++;
546            while ( $size < $hdr->{size} ) {
547                my $len = $hdr->{size} - $size;
548                $len = $BufSize if ( $len > $BufSize );
549                $data = "\0" x $len;
550                TarWrite($fh, \$data);
551                $size += $len;
552            }
553        }
554        TarWritePad($fh, $size);
555	$FileCnt++;
556	$ByteCnt += $size;
557    } else {
558        print(STDERR "Got unknown type $hdr->{type} for $hdr->{name}\n");
559	$ErrorCnt++;
560    }
561}
562