1#!/usr/bin/perl
2#============================================================= -*-perl-*-
3#
4# BackupPC_fsck: Pool reference count file system check
5#
6# DESCRIPTION
7#
8#   BackupPC_fsck checks the pool reference counts
9#
10#   Usage: BackupPC_fsck
11#
12# AUTHOR
13#   Craig Barratt  <cbarratt@users.sourceforge.net>
14#
15# COPYRIGHT
16#   Copyright (C) 2001-2022  Craig Barratt
17#
18#   This program is free software: you can redistribute it and/or modify
19#   it under the terms of the GNU General Public License as published by
20#   the Free Software Foundation, either version 3 of the License, or
21#   (at your option) any later version.
22#
23#   This program is distributed in the hope that it will be useful,
24#   but WITHOUT ANY WARRANTY; without even the implied warranty of
25#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26#   GNU General Public License for more details.
27#
28#   You should have received a copy of the GNU General Public License
29#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
30#
31#========================================================================
32#
33# Version 4.3.3.20200531, released 3 May 2022.
34#
35# See http://backuppc.sourceforge.net.
36#
37#========================================================================
38
39use strict;
40no  utf8;
41use lib "__INSTALLDIR__/lib";
42use BackupPC::Lib;
43use BackupPC::XS;
44use BackupPC::Storage;
45use BackupPC::DirOps;
46use Getopt::Std;
47
48use File::Path;
49use Data::Dumper;
50
51my(@Ref, $RefTotal);
52my $ErrorCnt = 0;
53my %opts;
54
55my $EmptyMD5 = pack("H*", "d41d8cd98f00b204e9800998ecf8427e");
56
57die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
58my $TopDir = $bpc->TopDir();
59my $BinDir = $bpc->BinDir();
60my %Conf   = $bpc->Conf();
61my $Hosts  = $bpc->HostInfoRead();
62my $s      = $bpc->{storage};
63
64if ( !getopts("fns", \%opts) || @ARGV >= 1 ) {
65    print STDERR <<EOF;
66usage: $0 [options]
67  Options:
68     -f              force regeneration of per-host reference counts
69     -n              don't remove zero count pool files - print only
70     -s              recompute pool stats
71EOF
72    exit(1);
73}
74
75#
76# We can't run if BackupPC is running
77#
78CheckIfServerRunning();
79
80my($Status, $Info);
81if ( $opts{s} ) {
82    ($Status, $Info) = $s->StatusDataRead();
83    if ( !defined($Info) && ref($Status) ne "HASH" ) {
84        print STDERR "$0: status.pl read failed: $Status\n";
85        $Info   = {};
86        $Status = {};
87    }
88    #
89    # Zero out the statistics
90    #
91    for my $p ( qw(pool4 cpool4) ) {
92        for ( my $i = 0 ; $i < 16 ; $i++ ) {
93            $Info->{pool}{$p}[$i]{FileCnt}       = 0;
94            $Info->{pool}{$p}[$i]{DirCnt}        = 0;
95            $Info->{pool}{$p}[$i]{KbRm}          = 0;
96            $Info->{pool}{$p}[$i]{Kb}            = 0;
97            $Info->{pool}{$p}[$i]{FileCntRm}     = 0;
98            $Info->{pool}{$p}[$i]{FileCntRep}    = 0;
99            $Info->{pool}{$p}[$i]{FileRepMax}    = 0;
100            $Info->{pool}{$p}[$i]{FileLinkMax}   = 0;
101            $Info->{pool}{$p}[$i]{Time}          = 0;
102        }
103    }
104}
105
106if ( $opts{f} ) {
107    #
108    # Rebuild host count database
109    #
110    foreach my $host ( sort(keys(%$Hosts)) ) {
111        print("BackupPC_fsck: Rebuilding count database for host $host\n");
112        $ErrorCnt++ if ( system("$BinDir/BackupPC_refCountUpdate -h $host -f -p") );
113    }
114} else {
115    #
116    # Make sure each host count database is up to date
117    # (ie: process any delta files)
118    #
119    foreach my $host ( sort(keys(%$Hosts)) ) {
120        $ErrorCnt++ if ( system("$BinDir/BackupPC_refCountUpdate -o 0 -h $host -p") );
121    }
122}
123
124CheckIfServerRunning();
125
126print("BackupPC_fsck: building main count database\n");
127$ErrorCnt++ if ( system("$BinDir/BackupPC_refCountUpdate -m -p") );
128
129CheckIfServerRunning();
130
131print("BackupPC_fsck: Calling poolCountUpdate\n");
132poolCountUpdate();
133
134if ( $opts{s} ) {
135    print("$0: Rewriting $s->{LogDir}/status.pl\n");
136    $s->StatusDataWrite($Status, $Info);
137}
138
139print("$0: got $ErrorCnt errors\n");
140exit($ErrorCnt ? 1 : 0);
141
142sub poolCountUpdate
143{
144    for ( my $compress = 0 ; $compress < 2 ; $compress++ ) {
145        my $poolName = $compress ? "cpool4" : "pool4";
146        for ( my $refCntFile = 0 ; $refCntFile < 128 ; $refCntFile++ ) {
147            my $fileCnt = 0;       # total number of pool files
148            my $dirCnt = 0;        # total number of pool directories
149            my $blkCnt = 0;        # total block size of pool files
150            my $fileCntRm = 0;     # total number of removed files
151            my $blkCntRm = 0;      # total block size of removed pool files
152            my $fileCntRep = 0;    # total number of pool files with repeated md5 checksums
153                                   # (ie: digest > 16 bytes; first instance isn't counted)
154            my $fileRepMax = 0;    # worse case chain length of pool files that have repeated
155                                   # checksums (ie: max(NNN) for all digests xxxxxxxxxxxxxxxxNNN)
156            my $fileLinkMax = 0;   # maximum number of links on a pool file
157            my $fileLinkTotal = 0; # total number of links on entire pool
158
159            my $poolDir = sprintf("%s/%02x",
160                                  $compress ? $bpc->{CPoolDir} : $bpc->{PoolDir},
161                                  $refCntFile * 2);
162            next if ( !-d $poolDir );
163            $dirCnt++;
164
165            my $count       = BackupPC::XS::PoolRefCnt::new();
166            my $dirty       = 0;
167            my $poolCntFile = "$poolDir/poolCnt";
168
169            #
170            # Count the number of pool directories
171            #
172            my $entries = BackupPC::DirOps::dirRead($bpc, $poolDir);
173            foreach my $e ( @$entries ) {
174                next if ( $e->{name} !~ /^[\da-f][\da-f]$/ );
175                $dirCnt++;
176            }
177
178            #
179            # Grab a lock to make sure BackupPC_dump won't unmark and use a pending
180            # delete file.
181            #
182            my $lockFd = BackupPC::XS::DirOps::lockRangeFile("$poolDir/LOCK", 0, 1, 1);
183            if ( -f $poolCntFile && $count->read($poolCntFile) ) {
184                print("Can't read pool count file $poolCntFile\n");
185                $dirty = 1;
186                $ErrorCnt++;
187            }
188
189            #
190            # Check that every file in the pool has a corresponding count.
191            # There are 128 subdirectories below this level.
192            #
193            for ( my $subDir = 0 ; $subDir < 128 ; $subDir++ ) {
194                my $subPoolDir = sprintf("%s/%02x", $poolDir, $subDir * 2);
195                next if ( !-d $subPoolDir );
196
197                my $entries = BackupPC::DirOps::dirRead($bpc, $subPoolDir);
198                next if ( !defined($entries) );
199
200                #
201                # traverse the files in reverse order, in case we can delete multiple files in
202                # a single chain.
203                #
204                foreach my $e ( sort {$b cmp $a} @$entries ) {
205                    next if ( $e->{name} eq "."
206                            || $e->{name} eq ".."
207                            || $e->{name} eq "LOCK" );
208                    my $digest = pack("H*", $e->{name});
209                    my $poolFile = "$subPoolDir/$e->{name}";
210                    #printf("Got %s, digest = %s\n", $e->{name}, unpack("H*", $digest));
211                    my($nBlks, @s);
212                    if ( $opts{s} ) {
213                        @s = stat($poolFile);
214                        $nBlks    = $s[12];
215                        $blkCnt  += $nBlks;
216                    }
217                    next if ( $count->get($digest) != 0 );
218
219                    #
220                    # figure out the next file in the chain to see how to
221                    # handle this one.
222                    #
223                    @s = stat($poolFile) if ( !$opts{s} );
224                    my $ext = $bpc->digestExtGet($digest);
225                    my($nextDigest, $nextPoolFile) = $bpc->digestConcat($digest,
226                                                                        $ext + 1, $compress);
227                    if ( !-f $nextPoolFile ) {
228                        #
229                        # last in the chain (or no chain) - just delete it
230                        #
231                        print("Removing pool file $poolFile\n") if ( $Conf{XferLogLevel} >= 2 );
232                        if ( !$opts{n} ) {
233                            if ( unlink($poolFile) != 1 ) {
234                                print("Can't remove $poolFile\n");
235                                $ErrorCnt++;
236                                next;
237                            }
238                            $count->delete($digest);
239                            $fileCntRm++;
240                            $blkCntRm += $nBlks;
241                        }
242                    } elsif ( $s[7] > 0 ) {
243                        #
244                        # in the middle of a chain of pool files, so
245                        # we replace the file with an empty file.
246                        #
247                        print("Zeroing pool file $poolFile (next $nextPoolFile exists)\n") if ( $Conf{XferLogLevel} >= 2 );
248                        if ( !$opts{n} ) {
249                            if ( chmod(0644, $poolFile) != 1 ) {
250                                print("Can't chmod 0644 $poolFile\n");
251                                $ErrorCnt++;
252                            }
253                            if ( open(my $fh, ">", $poolFile) ) {
254                                close($fh);
255                            } else {
256                                print("Can't truncate $poolFile\n");
257                                $ErrorCnt++;
258                                next;
259                            }
260                            $count->delete($digest);
261                            $fileCntRm++;
262                            $blkCntRm += $nBlks;
263                        }
264                    }
265                }
266            }
267
268            if ( $opts{s} ) {
269                my($digest, $cnt);
270                my $idx = 0;
271                while ( 1 ) {
272                    ($digest, $cnt, $idx) = $count->iterate($idx);
273                    last if ( !defined($digest) );
274
275                    $fileCnt++;
276                    $fileLinkTotal += $cnt;
277                    $fileLinkMax    = $cnt if ( $cnt > $fileLinkMax && $digest ne $EmptyMD5 );
278                    next if ( length($digest) <= 16 );
279                    my $ext = $bpc->digestExtGet($digest);
280                    $fileCntRep += $ext;
281                    $fileRepMax  = $ext if ( $ext > $fileRepMax );
282                }
283                my $kb   = int($blkCnt / 2 + 0.5);
284                my $kbRm = int($blkCntRm / 2 + 0.5);
285                #print("BackupPC_stats4 $refCntFile = $poolName,$fileCnt,$dirCnt,$kb,$kbRm,"
286                #      . "$fileCntRm,$fileCntRep,$fileRepMax,$fileLinkMax,$fileLinkTotal\n");
287                my $chunk = int($refCntFile / 8);
288                $Info->{pool}{$poolName}[$chunk]{FileCnt}       += $fileCnt;
289                $Info->{pool}{$poolName}[$chunk]{DirCnt}        += $dirCnt;
290                $Info->{pool}{$poolName}[$chunk]{Kb}            += $kb;
291                $Info->{pool}{$poolName}[$chunk]{KbRm}          += $kbRm;
292                $Info->{pool}{$poolName}[$chunk]{FileCntRm}     += $fileCntRm;
293                $Info->{pool}{$poolName}[$chunk]{FileCntRep}    += $fileCntRep;
294                $Info->{pool}{$poolName}[$chunk]{FileRepMax}     = $fileRepMax
295                        if ( $Info->{pool}{$poolName}[$chunk]{FileRepMax} < $fileRepMax );
296                $Info->{pool}{$poolName}[$chunk]{FileLinkMax}    = $fileLinkMax
297                        if ( $Info->{pool}{$poolName}[$chunk]{FileLinkMax} < $fileLinkMax );
298                $Info->{pool}{$poolName}[$chunk]{FileLinkTotal} += $fileLinkTotal;
299                $Info->{pool}{$poolName}[$chunk]{Time}           = time;
300            }
301        }
302    }
303    if ( $opts{s} ) {
304        #
305        # Update the cumulative statistics for pool4 and cpool4
306        #
307        for my $p ( qw(pool4 cpool4) ) {
308            $Info->{"${p}FileCnt"}       = 0;
309            $Info->{"${p}DirCnt"}        = 0;
310            $Info->{"${p}Kb"}            = 0;
311            $Info->{"${p}KbRm"}          = 0;
312            $Info->{"${p}FileCntRm"}     = 0;
313            $Info->{"${p}FileCntRep"}    = 0;
314            $Info->{"${p}FileRepMax"}    = 0;
315            $Info->{"${p}FileLinkMax"}   = 0;
316            $Info->{"${p}Time"}          = 0;
317            delete $Info->{"${p}FileCntRename"};
318            for ( my $i = 0 ; $i < 16 ; $i++ ) {
319                $Info->{"${p}FileCnt"}
320                       += $Info->{pool}{$p}[$i]{FileCnt};
321                $Info->{"${p}DirCnt"}
322                       += $Info->{pool}{$p}[$i]{DirCnt};
323                $Info->{"${p}Kb"}
324                       += $Info->{pool}{$p}[$i]{Kb};
325                $Info->{"${p}KbRm"}
326                       += $Info->{pool}{$p}[$i]{KbRm};
327                $Info->{"${p}FileCntRm"}
328                       += $Info->{pool}{$p}[$i]{FileCntRm};
329                $Info->{"${p}FileCntRep"}
330                       += $Info->{pool}{$p}[$i]{FileCntRep};
331                $Info->{"${p}FileRepMax"}
332                        = $Info->{pool}{$p}[$i]{FileRepMax}
333                          if ( $Info->{"${p}FileRepMax"} <
334                              $Info->{pool}{$p}[$i]{FileRepMax} );
335                $Info->{"${p}FileLinkMax"}
336                        = $Info->{pool}{$p}[$i]{FileLinkMax}
337                          if ( $Info->{"${p}FileLinkMax"} <
338                             $Info->{pool}{$p}[$i]{FileLinkMax} );
339                $Info->{"${p}Time"} = $Info->{pool}{$p}[$i]{Time}
340                          if ( $Info->{"${p}Time"} <
341                                 $Info->{pool}{$p}[$i]{Time} );
342            }
343            printf("%s%s BackupPC_fsck removed %d files of"
344                   . " size %.2fGB\n",
345                     $bpc->timeStamp, ucfirst($p),
346                     $Info->{"${p}FileCntRm"},
347                     $Info->{"${p}KbRm"} / (1000 * 1024));
348            printf("%s%s is %.2fGB, %d files (%d repeated, "
349                   . "%d max chain, %d max links), %d directories\n",
350                     $bpc->timeStamp, ucfirst($p),
351                     $Info->{"${p}Kb"} / (1000 * 1024),
352                     $Info->{"${p}FileCnt"}, $Info->{"${p}FileCntRep"},
353                     $Info->{"${p}FileRepMax"},
354                     $Info->{"${p}FileLinkMax"}, $Info->{"${p}DirCnt"});
355        }
356    }
357}
358
359sub CheckIfServerRunning
360{
361    my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
362    if ( !defined $err ) {
363        print STDERR "$0: can't run since BackupPC is running\n";
364        exit(1);
365    }
366}
367