1# Copyright 2002 Ben Escoto 2# 3# This file is part of rdiff-backup. 4# 5# rdiff-backup is free software; you can redistribute it and/or modify 6# under the terms of the GNU General Public License as published by the 7# Free Software Foundation; either version 2 of the License, or (at your 8# option) any later version. 9# 10# rdiff-backup is distributed in the hope that it will be useful, but 11# WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with rdiff-backup; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18# 02110-1301, USA 19"""Generate and process aggregated backup information""" 20 21import time 22from functools import reduce 23from . import Globals, Time, increment, log, metadata 24 25 26class StatsException(Exception): 27 pass 28 29 30class StatsObj: 31 """Contains various statistics, provide string conversion functions""" 32 33 stat_file_attrs = ('SourceFiles', 'SourceFileSize', 'MirrorFiles', 34 'MirrorFileSize', 'NewFiles', 'NewFileSize', 35 'DeletedFiles', 'DeletedFileSize', 'ChangedFiles', 36 'ChangedSourceSize', 'ChangedMirrorSize', 37 'IncrementFiles', 'IncrementFileSize') 38 stat_misc_attrs = ('Errors', 'TotalDestinationSizeChange') 39 stat_time_attrs = ('StartTime', 'EndTime', 'ElapsedTime') 40 stat_attrs = ( 41 ('Filename', ) + stat_time_attrs + stat_misc_attrs + stat_file_attrs) 42 43 # Below, the second value in each pair is true iff the value 44 # indicates a number of bytes 45 stat_file_pairs = (('SourceFiles', None), ('SourceFileSize', 46 1), ('MirrorFiles', None), 47 ('MirrorFileSize', 48 1), ('NewFiles', None), ('NewFileSize', 49 1), ('DeletedFiles', None), 50 ('DeletedFileSize', 51 1), ('ChangedFiles', None), ('ChangedSourceSize', 1), 52 ('ChangedMirrorSize', 53 1), ('IncrementFiles', None), ('IncrementFileSize', 1)) 54 55 # This is used in get_byte_summary_string below 56 byte_abbrev_list = ((1024 * 1024 * 1024 * 1024, "TB"), 57 (1024 * 1024 * 1024, "GB"), (1024 * 1024, 58 "MB"), (1024, "KB")) 59 60 def __init__(self): 61 """Set attributes to None""" 62 for attr in self.stat_attrs: 63 self.__dict__[attr] = None 64 65 def get_stat(self, attribute): 66 """Get a statistic""" 67 return self.__dict__[attribute] 68 69 def set_stat(self, attr, value): 70 """Set attribute to given value""" 71 self.__dict__[attr] = value 72 73 def increment_stat(self, attr): 74 """Add 1 to value of attribute""" 75 self.__dict__[attr] += 1 76 77 def add_to_stat(self, attr, value): 78 """Add value to given attribute""" 79 self.__dict__[attr] += value 80 81 def get_total_dest_size_change(self): 82 """Return total destination size change 83 84 This represents the total change in the size of the 85 rdiff-backup destination directory. 86 87 """ 88 addvals = [ 89 self.NewFileSize, self.ChangedSourceSize, self.IncrementFileSize 90 ] 91 subtractvals = [self.DeletedFileSize, self.ChangedMirrorSize] 92 for val in addvals + subtractvals: 93 if val is None: 94 result = None 95 break 96 else: 97 98 def addlist(somelist): 99 return reduce(lambda x, y: x + y, somelist) 100 101 result = addlist(addvals) - addlist(subtractvals) 102 self.TotalDestinationSizeChange = result 103 return result 104 105 def get_stats_line(self, index, quote_filename=1): 106 """Return one line abbreviated version of full stats string""" 107 file_attrs = [ 108 str(self.get_stat(attr)) for attr in self.stat_file_attrs 109 ] 110 if not index: 111 filename = "." 112 else: 113 filename = '/'.join(index) # RORPath.path_join works only with bytes paths 114 if quote_filename: 115 # quote filename to make sure it doesn't have spaces 116 # or newlines impeaching proper parsing of the line 117 filename = filename.replace('\n', '\\n').replace(' ', '\\x20') 118 return " ".join([ 119 filename, 120 ] + file_attrs) 121 122 def set_stats_from_line(self, line): 123 """Set statistics from given line""" 124 125 def error(): 126 raise StatsException("Bad line '%s'" % line) 127 128 if line[-1] == "\n": 129 line = line[:-1] 130 lineparts = line.split(" ") 131 if len(lineparts) < len(self.stat_file_attrs): 132 error() 133 for attr, val_string in zip(self.stat_file_attrs, 134 lineparts[-len(self.stat_file_attrs):]): 135 try: 136 val = int(val_string) 137 except ValueError: 138 try: 139 val = float(val_string) 140 except ValueError: 141 error() 142 self.set_stat(attr, val) 143 return self 144 145 def get_stats_string(self): 146 """Return extended string printing out statistics""" 147 return "%s%s%s" % (self.get_timestats_string(), 148 self.get_filestats_string(), 149 self.get_miscstats_string()) 150 151 def get_timestats_string(self): 152 """Return portion of statistics string dealing with time""" 153 timelist = [] 154 if self.StartTime is not None: 155 timelist.append( 156 "StartTime %.2f (%s)\n" % (self.StartTime, 157 Time.timetopretty(self.StartTime))) 158 if self.EndTime is not None: 159 timelist.append("EndTime %.2f (%s)\n" % 160 (self.EndTime, Time.timetopretty(self.EndTime))) 161 if self.ElapsedTime or (self.StartTime is not None 162 and self.EndTime is not None): 163 if self.ElapsedTime is None: 164 self.ElapsedTime = self.EndTime - self.StartTime 165 timelist.append( 166 "ElapsedTime %.2f (%s)\n" % 167 (self.ElapsedTime, Time.inttopretty(self.ElapsedTime))) 168 return "".join(timelist) 169 170 def get_filestats_string(self): 171 """Return portion of statistics string about files and bytes""" 172 173 def fileline(stat_file_pair): 174 """Return zero or one line of the string""" 175 attr, in_bytes = stat_file_pair 176 val = self.get_stat(attr) 177 if val is None: 178 return "" 179 if in_bytes: 180 return "%s %s (%s)\n" % (attr, val, 181 self.get_byte_summary_string(val)) 182 else: 183 return "%s %s\n" % (attr, val) 184 185 return "".join(map(fileline, self.stat_file_pairs)) 186 187 def get_miscstats_string(self): 188 """Return portion of extended stat string about misc attributes""" 189 misc_string = "" 190 tdsc = self.get_total_dest_size_change() 191 if tdsc is not None: 192 misc_string += ("TotalDestinationSizeChange %s (%s)\n" % 193 (tdsc, self.get_byte_summary_string(tdsc))) 194 if self.Errors is not None: 195 misc_string += "Errors %d\n" % self.Errors 196 return misc_string 197 198 def get_byte_summary_string(self, byte_count): 199 """Turn byte count into human readable string like "7.23GB" """ 200 if byte_count < 0: 201 sign = "-" 202 byte_count = -byte_count 203 else: 204 sign = "" 205 206 for abbrev_bytes, abbrev_string in self.byte_abbrev_list: 207 if byte_count >= abbrev_bytes: 208 # Now get 3 significant figures 209 abbrev_count = float(byte_count) / abbrev_bytes 210 if abbrev_count >= 100: 211 precision = 0 212 elif abbrev_count >= 10: 213 precision = 1 214 else: 215 precision = 2 216 return "%s%%.%df %s" % (sign, precision, abbrev_string) \ 217 % (abbrev_count,) 218 byte_count = round(byte_count) 219 if byte_count == 1: 220 return sign + "1 byte" 221 else: 222 return "%s%d bytes" % (sign, byte_count) 223 224 def get_stats_logstring(self, title): 225 """Like get_stats_string, but add header and footer""" 226 header = "--------------[ %s ]--------------" % title 227 footer = "-" * len(header) 228 return "%s\n%s%s\n" % (header, self.get_stats_string(), footer) 229 230 def set_stats_from_string(self, s): 231 """Initialize attributes from string, return self for convenience""" 232 233 def error(line): 234 raise StatsException("Bad line '%s'" % line) 235 236 for line in s.split("\n"): 237 if not line: 238 continue 239 line_parts = line.split() 240 if len(line_parts) < 2: 241 error(line) 242 attr, value_string = line_parts[:2] 243 if attr not in self.stat_attrs: 244 error(line) 245 try: 246 try: 247 val1 = int(value_string) 248 except ValueError: 249 val1 = None 250 val2 = float(value_string) 251 if val1 == val2: 252 self.set_stat(attr, val1) # use integer val 253 else: 254 self.set_stat(attr, val2) # use float 255 except ValueError: 256 error(line) 257 return self 258 259 def write_stats_to_rp(self, rp): 260 """Write statistics string to given rpath""" 261 fp = rp.open("w") # statistics are a text file 262 fp.write(self.get_stats_string()) 263 assert not fp.close() 264 265 def read_stats_from_rp(self, rp): 266 """Set statistics from rpath, return self for convenience""" 267 fp = rp.open("r") # statistics are a text file 268 self.set_stats_from_string(fp.read()) 269 fp.close() 270 return self 271 272 def stats_equal(self, s): 273 """Return true if s has same statistics as self""" 274 assert isinstance(s, StatsObj) 275 for attr in self.stat_file_attrs: 276 if self.get_stat(attr) != s.get_stat(attr): 277 return None 278 return 1 279 280 def set_to_average(self, statobj_list): 281 """Set self's attributes to average of those in statobj_list""" 282 for attr in self.stat_attrs: 283 self.set_stat(attr, 0) 284 for statobj in statobj_list: 285 for attr in self.stat_attrs: 286 if statobj.get_stat(attr) is None: 287 self.set_stat(attr, None) 288 elif self.get_stat(attr) is not None: 289 self.set_stat(attr, 290 statobj.get_stat(attr) + self.get_stat(attr)) 291 292 # Don't compute average starting/stopping time 293 self.StartTime = None 294 self.EndTime = None 295 296 for attr in self.stat_attrs: 297 if self.get_stat(attr) is not None: 298 self.set_stat(attr, 299 self.get_stat(attr) / float(len(statobj_list))) 300 return self 301 302 def get_statsobj_copy(self): 303 """Return new StatsObj object with same stats as self""" 304 s = StatsObj() 305 for attr in self.stat_attrs: 306 s.set_stat(attr, self.get_stat(attr)) 307 return s 308 309 310class StatFileObj(StatsObj): 311 """Build on StatsObj, add functions for processing files""" 312 313 def __init__(self, start_time=None): 314 """StatFileObj initializer - zero out file attributes""" 315 StatsObj.__init__(self) 316 for attr in self.stat_file_attrs: 317 self.set_stat(attr, 0) 318 if start_time is None: 319 start_time = Time.curtime 320 self.StartTime = start_time 321 self.Errors = 0 322 323 def add_source_file(self, src_rorp): 324 """Add stats of source file""" 325 self.SourceFiles += 1 326 if src_rorp.isreg(): 327 self.SourceFileSize += src_rorp.getsize() 328 329 def add_dest_file(self, dest_rorp): 330 """Add stats of destination size""" 331 self.MirrorFiles += 1 332 if dest_rorp.isreg(): 333 self.MirrorFileSize += dest_rorp.getsize() 334 335 def add_changed(self, src_rorp, dest_rorp): 336 """Update stats when src_rorp changes to dest_rorp""" 337 if src_rorp and src_rorp.lstat() and dest_rorp and dest_rorp.lstat(): 338 self.ChangedFiles += 1 339 if src_rorp.isreg(): 340 self.ChangedSourceSize += src_rorp.getsize() 341 if dest_rorp.isreg(): 342 self.ChangedMirrorSize += dest_rorp.getsize() 343 elif src_rorp and src_rorp.lstat(): 344 self.NewFiles += 1 345 if src_rorp.isreg(): 346 self.NewFileSize += src_rorp.getsize() 347 elif dest_rorp and dest_rorp.lstat(): 348 self.DeletedFiles += 1 349 if dest_rorp.isreg(): 350 self.DeletedFileSize += dest_rorp.getsize() 351 352 def add_increment(self, inc_rorp): 353 """Update stats with increment rorp""" 354 self.IncrementFiles += 1 355 if inc_rorp.isreg(): 356 self.IncrementFileSize += inc_rorp.getsize() 357 358 def add_error(self): 359 """Increment error stat by 1""" 360 self.Errors += 1 361 362 def finish(self, end_time=None): 363 """Record end time and set other stats""" 364 if end_time is None: 365 end_time = time.time() 366 self.EndTime = end_time 367 368 369_active_statfileobj = None 370 371 372def init_statfileobj(): 373 """Return new stat file object, record as active stat object""" 374 global _active_statfileobj 375 assert not _active_statfileobj, _active_statfileobj 376 _active_statfileobj = StatFileObj() 377 return _active_statfileobj 378 379 380def get_active_statfileobj(): 381 """Return active stat file object if it exists""" 382 if _active_statfileobj: 383 return _active_statfileobj 384 else: 385 return None 386 387 388def record_error(): 389 """Record error on active statfileobj, if there is one""" 390 if _active_statfileobj: 391 _active_statfileobj.add_error() 392 393 394def process_increment(inc_rorp): 395 """Add statistics of increment rp incrp if there is active statfile""" 396 if _active_statfileobj: 397 _active_statfileobj.add_increment(inc_rorp) 398 399 400def write_active_statfileobj(end_time=None): 401 """Write active StatFileObj object to session statistics file""" 402 global _active_statfileobj 403 assert _active_statfileobj 404 rp_base = Globals.rbdir.append(b"session_statistics") 405 session_stats_rp = increment.get_inc(rp_base, 'data', Time.curtime) 406 _active_statfileobj.finish(end_time) 407 _active_statfileobj.write_stats_to_rp(session_stats_rp) 408 _active_statfileobj = None 409 410 411def print_active_stats(end_time=None): 412 """Print statistics of active statobj to stdout and log""" 413 global _active_statfileobj 414 assert _active_statfileobj 415 _active_statfileobj.finish(end_time) 416 statmsg = _active_statfileobj.get_stats_logstring("Session statistics") 417 log.Log.log_to_file(statmsg) 418 Globals.client_conn.sys.stdout.write(statmsg) 419 420 421class FileStats: 422 """Keep track of less detailed stats on file-by-file basis""" 423 _fileobj, _rp = None, None 424 _line_sep = None 425 426 @classmethod 427 def init(cls): 428 """Open file stats object and prepare to write""" 429 assert not (cls._fileobj or cls._rp), (cls._fileobj, cls._rp) 430 rpbase = Globals.rbdir.append(b"file_statistics") 431 suffix = Globals.compression and 'data.gz' or 'data' 432 cls._rp = increment.get_inc(rpbase, suffix, Time.curtime) 433 assert not cls._rp.lstat() 434 cls._fileobj = cls._rp.open("wb", compress=Globals.compression) 435 436 cls._line_sep = Globals.null_separator and b'\0' or b'\n' 437 cls.write_docstring() 438 cls.line_buffer = [] 439 440 @classmethod 441 def write_docstring(cls): 442 """Write the first line (a documentation string) into file""" 443 cls._fileobj.write(b"# Format of each line in file statistics file:") 444 cls._fileobj.write(cls._line_sep) 445 cls._fileobj.write(b"# Filename Changed SourceSize MirrorSize " 446 b"IncrementSize" + cls._line_sep) 447 448 @classmethod 449 def update(cls, source_rorp, dest_rorp, changed, inc): 450 """Update file stats with given information""" 451 if source_rorp: 452 filename = source_rorp.get_indexpath() 453 else: 454 filename = dest_rorp.get_indexpath() 455 filename = metadata.quote_path(filename) 456 457 size_list = list(map(cls.get_size, [source_rorp, dest_rorp, inc])) 458 line = b" ".join([filename, str(changed).encode()] + size_list) 459 cls.line_buffer.append(line) 460 if len(cls.line_buffer) >= 100: 461 cls.write_buffer() 462 463 @classmethod 464 def get_size(cls, rorp): 465 """Return the size of rorp as bytes, or "NA" if not a regular file""" 466 if not rorp: 467 return b"NA" 468 if rorp.isreg(): 469 return str(rorp.getsize()).encode() 470 else: 471 return b"0" 472 473 @classmethod 474 def write_buffer(cls): 475 """Write buffer to file because buffer is full 476 477 The buffer part is necessary because the GzipFile.write() 478 method seems fairly slow. 479 480 """ 481 assert cls.line_buffer and cls._fileobj 482 cls.line_buffer.append(b'') # have join add _line_sep to end also 483 cls._fileobj.write(cls._line_sep.join(cls.line_buffer)) 484 cls.line_buffer = [] 485 486 @classmethod 487 def close(cls): 488 """Close file stats file""" 489 assert cls._fileobj, cls._fileobj 490 if cls.line_buffer: 491 cls.write_buffer() 492 assert not cls._fileobj.close() 493 cls._fileobj = cls._rp = None 494