1# Copyright 2002 Ben Escoto
2#
3# This file is part of rdiff-backup.
4#
5# rdiff-backup is free software; you can redistribute it and/or modify
6# under the terms of the GNU General Public License as published by the
7# Free Software Foundation; either version 2 of the License, or (at your
8# option) any later version.
9#
10# rdiff-backup is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with rdiff-backup; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301, USA
19"""Generate and process aggregated backup information"""
20
21import time
22from functools import reduce
23from . import Globals, Time, increment, log, metadata
24
25
26class StatsException(Exception):
27    pass
28
29
30class StatsObj:
31    """Contains various statistics, provide string conversion functions"""
32
33    stat_file_attrs = ('SourceFiles', 'SourceFileSize', 'MirrorFiles',
34                       'MirrorFileSize', 'NewFiles', 'NewFileSize',
35                       'DeletedFiles', 'DeletedFileSize', 'ChangedFiles',
36                       'ChangedSourceSize', 'ChangedMirrorSize',
37                       'IncrementFiles', 'IncrementFileSize')
38    stat_misc_attrs = ('Errors', 'TotalDestinationSizeChange')
39    stat_time_attrs = ('StartTime', 'EndTime', 'ElapsedTime')
40    stat_attrs = (
41        ('Filename', ) + stat_time_attrs + stat_misc_attrs + stat_file_attrs)
42
43    # Below, the second value in each pair is true iff the value
44    # indicates a number of bytes
45    stat_file_pairs = (('SourceFiles', None), ('SourceFileSize',
46                                               1), ('MirrorFiles', None),
47                       ('MirrorFileSize',
48                        1), ('NewFiles', None), ('NewFileSize',
49                                                 1), ('DeletedFiles', None),
50                       ('DeletedFileSize',
51                        1), ('ChangedFiles', None), ('ChangedSourceSize', 1),
52                       ('ChangedMirrorSize',
53                        1), ('IncrementFiles', None), ('IncrementFileSize', 1))
54
55    # This is used in get_byte_summary_string below
56    byte_abbrev_list = ((1024 * 1024 * 1024 * 1024, "TB"),
57                        (1024 * 1024 * 1024, "GB"), (1024 * 1024,
58                                                     "MB"), (1024, "KB"))
59
60    def __init__(self):
61        """Set attributes to None"""
62        for attr in self.stat_attrs:
63            self.__dict__[attr] = None
64
65    def get_stat(self, attribute):
66        """Get a statistic"""
67        return self.__dict__[attribute]
68
69    def set_stat(self, attr, value):
70        """Set attribute to given value"""
71        self.__dict__[attr] = value
72
73    def increment_stat(self, attr):
74        """Add 1 to value of attribute"""
75        self.__dict__[attr] += 1
76
77    def add_to_stat(self, attr, value):
78        """Add value to given attribute"""
79        self.__dict__[attr] += value
80
81    def get_total_dest_size_change(self):
82        """Return total destination size change
83
84        This represents the total change in the size of the
85        rdiff-backup destination directory.
86
87        """
88        addvals = [
89            self.NewFileSize, self.ChangedSourceSize, self.IncrementFileSize
90        ]
91        subtractvals = [self.DeletedFileSize, self.ChangedMirrorSize]
92        for val in addvals + subtractvals:
93            if val is None:
94                result = None
95                break
96        else:
97
98            def addlist(somelist):
99                return reduce(lambda x, y: x + y, somelist)
100
101            result = addlist(addvals) - addlist(subtractvals)
102        self.TotalDestinationSizeChange = result
103        return result
104
105    def get_stats_line(self, index, quote_filename=1):
106        """Return one line abbreviated version of full stats string"""
107        file_attrs = [
108            str(self.get_stat(attr)) for attr in self.stat_file_attrs
109        ]
110        if not index:
111            filename = "."
112        else:
113            filename = '/'.join(index)  # RORPath.path_join works only with bytes paths
114            if quote_filename:
115                # quote filename to make sure it doesn't have spaces
116                # or newlines impeaching proper parsing of the line
117                filename = filename.replace('\n', '\\n').replace(' ', '\\x20')
118        return " ".join([
119            filename,
120        ] + file_attrs)
121
122    def set_stats_from_line(self, line):
123        """Set statistics from given line"""
124
125        def error():
126            raise StatsException("Bad line '%s'" % line)
127
128        if line[-1] == "\n":
129            line = line[:-1]
130        lineparts = line.split(" ")
131        if len(lineparts) < len(self.stat_file_attrs):
132            error()
133        for attr, val_string in zip(self.stat_file_attrs,
134                                    lineparts[-len(self.stat_file_attrs):]):
135            try:
136                val = int(val_string)
137            except ValueError:
138                try:
139                    val = float(val_string)
140                except ValueError:
141                    error()
142            self.set_stat(attr, val)
143        return self
144
145    def get_stats_string(self):
146        """Return extended string printing out statistics"""
147        return "%s%s%s" % (self.get_timestats_string(),
148                           self.get_filestats_string(),
149                           self.get_miscstats_string())
150
151    def get_timestats_string(self):
152        """Return portion of statistics string dealing with time"""
153        timelist = []
154        if self.StartTime is not None:
155            timelist.append(
156                "StartTime %.2f (%s)\n" % (self.StartTime,
157                                           Time.timetopretty(self.StartTime)))
158        if self.EndTime is not None:
159            timelist.append("EndTime %.2f (%s)\n" %
160                            (self.EndTime, Time.timetopretty(self.EndTime)))
161        if self.ElapsedTime or (self.StartTime is not None
162                                and self.EndTime is not None):
163            if self.ElapsedTime is None:
164                self.ElapsedTime = self.EndTime - self.StartTime
165            timelist.append(
166                "ElapsedTime %.2f (%s)\n" %
167                (self.ElapsedTime, Time.inttopretty(self.ElapsedTime)))
168        return "".join(timelist)
169
170    def get_filestats_string(self):
171        """Return portion of statistics string about files and bytes"""
172
173        def fileline(stat_file_pair):
174            """Return zero or one line of the string"""
175            attr, in_bytes = stat_file_pair
176            val = self.get_stat(attr)
177            if val is None:
178                return ""
179            if in_bytes:
180                return "%s %s (%s)\n" % (attr, val,
181                                         self.get_byte_summary_string(val))
182            else:
183                return "%s %s\n" % (attr, val)
184
185        return "".join(map(fileline, self.stat_file_pairs))
186
187    def get_miscstats_string(self):
188        """Return portion of extended stat string about misc attributes"""
189        misc_string = ""
190        tdsc = self.get_total_dest_size_change()
191        if tdsc is not None:
192            misc_string += ("TotalDestinationSizeChange %s (%s)\n" %
193                            (tdsc, self.get_byte_summary_string(tdsc)))
194        if self.Errors is not None:
195            misc_string += "Errors %d\n" % self.Errors
196        return misc_string
197
198    def get_byte_summary_string(self, byte_count):
199        """Turn byte count into human readable string like "7.23GB" """
200        if byte_count < 0:
201            sign = "-"
202            byte_count = -byte_count
203        else:
204            sign = ""
205
206        for abbrev_bytes, abbrev_string in self.byte_abbrev_list:
207            if byte_count >= abbrev_bytes:
208                # Now get 3 significant figures
209                abbrev_count = float(byte_count) / abbrev_bytes
210                if abbrev_count >= 100:
211                    precision = 0
212                elif abbrev_count >= 10:
213                    precision = 1
214                else:
215                    precision = 2
216                return "%s%%.%df %s" % (sign, precision, abbrev_string) \
217                    % (abbrev_count,)
218        byte_count = round(byte_count)
219        if byte_count == 1:
220            return sign + "1 byte"
221        else:
222            return "%s%d bytes" % (sign, byte_count)
223
224    def get_stats_logstring(self, title):
225        """Like get_stats_string, but add header and footer"""
226        header = "--------------[ %s ]--------------" % title
227        footer = "-" * len(header)
228        return "%s\n%s%s\n" % (header, self.get_stats_string(), footer)
229
230    def set_stats_from_string(self, s):
231        """Initialize attributes from string, return self for convenience"""
232
233        def error(line):
234            raise StatsException("Bad line '%s'" % line)
235
236        for line in s.split("\n"):
237            if not line:
238                continue
239            line_parts = line.split()
240            if len(line_parts) < 2:
241                error(line)
242            attr, value_string = line_parts[:2]
243            if attr not in self.stat_attrs:
244                error(line)
245            try:
246                try:
247                    val1 = int(value_string)
248                except ValueError:
249                    val1 = None
250                val2 = float(value_string)
251                if val1 == val2:
252                    self.set_stat(attr, val1)  # use integer val
253                else:
254                    self.set_stat(attr, val2)  # use float
255            except ValueError:
256                error(line)
257        return self
258
259    def write_stats_to_rp(self, rp):
260        """Write statistics string to given rpath"""
261        fp = rp.open("w")  # statistics are a text file
262        fp.write(self.get_stats_string())
263        assert not fp.close()
264
265    def read_stats_from_rp(self, rp):
266        """Set statistics from rpath, return self for convenience"""
267        fp = rp.open("r")  # statistics are a text file
268        self.set_stats_from_string(fp.read())
269        fp.close()
270        return self
271
272    def stats_equal(self, s):
273        """Return true if s has same statistics as self"""
274        assert isinstance(s, StatsObj)
275        for attr in self.stat_file_attrs:
276            if self.get_stat(attr) != s.get_stat(attr):
277                return None
278        return 1
279
280    def set_to_average(self, statobj_list):
281        """Set self's attributes to average of those in statobj_list"""
282        for attr in self.stat_attrs:
283            self.set_stat(attr, 0)
284        for statobj in statobj_list:
285            for attr in self.stat_attrs:
286                if statobj.get_stat(attr) is None:
287                    self.set_stat(attr, None)
288                elif self.get_stat(attr) is not None:
289                    self.set_stat(attr,
290                                  statobj.get_stat(attr) + self.get_stat(attr))
291
292        # Don't compute average starting/stopping time
293        self.StartTime = None
294        self.EndTime = None
295
296        for attr in self.stat_attrs:
297            if self.get_stat(attr) is not None:
298                self.set_stat(attr,
299                              self.get_stat(attr) / float(len(statobj_list)))
300        return self
301
302    def get_statsobj_copy(self):
303        """Return new StatsObj object with same stats as self"""
304        s = StatsObj()
305        for attr in self.stat_attrs:
306            s.set_stat(attr, self.get_stat(attr))
307        return s
308
309
310class StatFileObj(StatsObj):
311    """Build on StatsObj, add functions for processing files"""
312
313    def __init__(self, start_time=None):
314        """StatFileObj initializer - zero out file attributes"""
315        StatsObj.__init__(self)
316        for attr in self.stat_file_attrs:
317            self.set_stat(attr, 0)
318        if start_time is None:
319            start_time = Time.curtime
320        self.StartTime = start_time
321        self.Errors = 0
322
323    def add_source_file(self, src_rorp):
324        """Add stats of source file"""
325        self.SourceFiles += 1
326        if src_rorp.isreg():
327            self.SourceFileSize += src_rorp.getsize()
328
329    def add_dest_file(self, dest_rorp):
330        """Add stats of destination size"""
331        self.MirrorFiles += 1
332        if dest_rorp.isreg():
333            self.MirrorFileSize += dest_rorp.getsize()
334
335    def add_changed(self, src_rorp, dest_rorp):
336        """Update stats when src_rorp changes to dest_rorp"""
337        if src_rorp and src_rorp.lstat() and dest_rorp and dest_rorp.lstat():
338            self.ChangedFiles += 1
339            if src_rorp.isreg():
340                self.ChangedSourceSize += src_rorp.getsize()
341            if dest_rorp.isreg():
342                self.ChangedMirrorSize += dest_rorp.getsize()
343        elif src_rorp and src_rorp.lstat():
344            self.NewFiles += 1
345            if src_rorp.isreg():
346                self.NewFileSize += src_rorp.getsize()
347        elif dest_rorp and dest_rorp.lstat():
348            self.DeletedFiles += 1
349            if dest_rorp.isreg():
350                self.DeletedFileSize += dest_rorp.getsize()
351
352    def add_increment(self, inc_rorp):
353        """Update stats with increment rorp"""
354        self.IncrementFiles += 1
355        if inc_rorp.isreg():
356            self.IncrementFileSize += inc_rorp.getsize()
357
358    def add_error(self):
359        """Increment error stat by 1"""
360        self.Errors += 1
361
362    def finish(self, end_time=None):
363        """Record end time and set other stats"""
364        if end_time is None:
365            end_time = time.time()
366        self.EndTime = end_time
367
368
369_active_statfileobj = None
370
371
372def init_statfileobj():
373    """Return new stat file object, record as active stat object"""
374    global _active_statfileobj
375    assert not _active_statfileobj, _active_statfileobj
376    _active_statfileobj = StatFileObj()
377    return _active_statfileobj
378
379
380def get_active_statfileobj():
381    """Return active stat file object if it exists"""
382    if _active_statfileobj:
383        return _active_statfileobj
384    else:
385        return None
386
387
388def record_error():
389    """Record error on active statfileobj, if there is one"""
390    if _active_statfileobj:
391        _active_statfileobj.add_error()
392
393
394def process_increment(inc_rorp):
395    """Add statistics of increment rp incrp if there is active statfile"""
396    if _active_statfileobj:
397        _active_statfileobj.add_increment(inc_rorp)
398
399
400def write_active_statfileobj(end_time=None):
401    """Write active StatFileObj object to session statistics file"""
402    global _active_statfileobj
403    assert _active_statfileobj
404    rp_base = Globals.rbdir.append(b"session_statistics")
405    session_stats_rp = increment.get_inc(rp_base, 'data', Time.curtime)
406    _active_statfileobj.finish(end_time)
407    _active_statfileobj.write_stats_to_rp(session_stats_rp)
408    _active_statfileobj = None
409
410
411def print_active_stats(end_time=None):
412    """Print statistics of active statobj to stdout and log"""
413    global _active_statfileobj
414    assert _active_statfileobj
415    _active_statfileobj.finish(end_time)
416    statmsg = _active_statfileobj.get_stats_logstring("Session statistics")
417    log.Log.log_to_file(statmsg)
418    Globals.client_conn.sys.stdout.write(statmsg)
419
420
421class FileStats:
422    """Keep track of less detailed stats on file-by-file basis"""
423    _fileobj, _rp = None, None
424    _line_sep = None
425
426    @classmethod
427    def init(cls):
428        """Open file stats object and prepare to write"""
429        assert not (cls._fileobj or cls._rp), (cls._fileobj, cls._rp)
430        rpbase = Globals.rbdir.append(b"file_statistics")
431        suffix = Globals.compression and 'data.gz' or 'data'
432        cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
433        assert not cls._rp.lstat()
434        cls._fileobj = cls._rp.open("wb", compress=Globals.compression)
435
436        cls._line_sep = Globals.null_separator and b'\0' or b'\n'
437        cls.write_docstring()
438        cls.line_buffer = []
439
440    @classmethod
441    def write_docstring(cls):
442        """Write the first line (a documentation string) into file"""
443        cls._fileobj.write(b"# Format of each line in file statistics file:")
444        cls._fileobj.write(cls._line_sep)
445        cls._fileobj.write(b"# Filename Changed SourceSize MirrorSize "
446                           b"IncrementSize" + cls._line_sep)
447
448    @classmethod
449    def update(cls, source_rorp, dest_rorp, changed, inc):
450        """Update file stats with given information"""
451        if source_rorp:
452            filename = source_rorp.get_indexpath()
453        else:
454            filename = dest_rorp.get_indexpath()
455        filename = metadata.quote_path(filename)
456
457        size_list = list(map(cls.get_size, [source_rorp, dest_rorp, inc]))
458        line = b" ".join([filename, str(changed).encode()] + size_list)
459        cls.line_buffer.append(line)
460        if len(cls.line_buffer) >= 100:
461            cls.write_buffer()
462
463    @classmethod
464    def get_size(cls, rorp):
465        """Return the size of rorp as bytes, or "NA" if not a regular file"""
466        if not rorp:
467            return b"NA"
468        if rorp.isreg():
469            return str(rorp.getsize()).encode()
470        else:
471            return b"0"
472
473    @classmethod
474    def write_buffer(cls):
475        """Write buffer to file because buffer is full
476
477        The buffer part is necessary because the GzipFile.write()
478        method seems fairly slow.
479
480        """
481        assert cls.line_buffer and cls._fileobj
482        cls.line_buffer.append(b'')  # have join add _line_sep to end also
483        cls._fileobj.write(cls._line_sep.join(cls.line_buffer))
484        cls.line_buffer = []
485
486    @classmethod
487    def close(cls):
488        """Close file stats file"""
489        assert cls._fileobj, cls._fileobj
490        if cls.line_buffer:
491            cls.write_buffer()
492        assert not cls._fileobj.close()
493        cls._fileobj = cls._rp = None
494