1# Copyright 2002 Ben Escoto 2# 3# This file is part of rdiff-backup. 4# 5# rdiff-backup is free software; you can redistribute it and/or modify 6# under the terms of the GNU General Public License as published by the 7# Free Software Foundation; either version 2 of the License, or (at your 8# option) any later version. 9# 10# rdiff-backup is distributed in the hope that it will be useful, but 11# WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with rdiff-backup; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18# 02110-1301, USA 19"""Store and retrieve metadata in destination directory 20 21The plan is to store metadata information for all files in the 22destination directory in a special metadata file. There are two 23reasons for this: 24 251) The filesystem of the mirror directory may not be able to handle 26 types of metadata that the source filesystem can. For instance, 27 rdiff-backup may not have root access on the destination side, so 28 cannot set uid/gid. Or the source side may have ACLs and the 29 destination side doesn't. 30 31 Hopefully every file system can store binary data. Storing 32 metadata separately allows us to back up anything (ok, maybe 33 strange filenames are still a problem). 34 352) Metadata can be more quickly read from a file than it can by 36 traversing the mirror directory over and over again. In many 37 cases most of rdiff-backup's time is spent comparing metadata (like 38 file size and modtime), trying to find differences. Reading this 39 data sequentially from a file is significantly less taxing than 40 listing directories and statting files all over the mirror 41 directory. 42 43The metadata is stored in a text file, which is a bunch of records 44concatenated together. Each record has the format: 45 46File <filename> 47 <field_name1> <value> 48 <field_name2> <value> 49 ... 50 51Where the lines are separated by newlines. See the code below for the 52field names and values. 53 54""" 55 56import re 57import os 58import binascii 59from . import log, Globals, rpath, Time, rorpiter 60 61 62class ParsingError(Exception): 63 """This is raised when bad or unparsable data is received""" 64 pass 65 66 67def carbonfile2string(cfile): 68 """Convert CarbonFile data to a string suitable for storing.""" 69 if not cfile: 70 return "None" 71 retvalparts = [] 72 retvalparts.append('creator:%s' % binascii.hexlify(cfile['creator'])) 73 retvalparts.append('type:%s' % binascii.hexlify(cfile['type'])) 74 retvalparts.append('location:%d,%d' % cfile['location']) 75 retvalparts.append('flags:%d' % cfile['flags']) 76 try: 77 retvalparts.append('createDate:%d' % cfile['createDate']) 78 except KeyError: 79 log.Log("Writing pre-1.1.6 style metadata, without creation date", 9) 80 return '|'.join(retvalparts) 81 82 83def string2carbonfile(data): 84 """Re-constitute CarbonFile data from a string stored by 85 carbonfile2string.""" 86 retval = {} 87 for component in data.split('|'): 88 key, value = component.split(':') 89 if key == 'creator': 90 retval['creator'] = binascii.unhexlify(value) 91 elif key == 'type': 92 retval['type'] = binascii.unhexlify(value) 93 elif key == 'location': 94 a, b = value.split(',') 95 retval['location'] = (int(a), int(b)) 96 elif key == 'flags': 97 retval['flags'] = int(value) 98 elif key == 'createDate': 99 retval['createDate'] = int(value) 100 return retval 101 102 103def RORP2Record(rorpath): 104 """From RORPath, return text record of file's metadata""" 105 str_list = [b"File %s\n" % quote_path(rorpath.get_indexpath())] 106 107 # Store file type, e.g. "dev", "reg", or "sym", and type-specific data 108 type = rorpath.gettype() 109 if type is None: 110 type = "None" 111 str_list.append(b" Type %b\n" % type.encode('ascii')) 112 if type == "reg": 113 str_list.append(b" Size %i\n" % rorpath.getsize()) 114 115 # If there is a resource fork, save it. 116 if rorpath.has_resource_fork(): 117 if not rorpath.get_resource_fork(): 118 rf = b"None" 119 else: 120 rf = binascii.hexlify(rorpath.get_resource_fork()) 121 str_list.append(b" ResourceFork %b\n" % (rf, )) 122 123 # If there is Carbon data, save it. 124 if rorpath.has_carbonfile(): 125 cfile = carbonfile2string(rorpath.get_carbonfile()) 126 str_list.append(b" CarbonFile %b\n" % (cfile, )) 127 128 # If file is hardlinked, add that information 129 if Globals.preserve_hardlinks != 0: 130 numlinks = rorpath.getnumlinks() 131 if numlinks > 1: 132 str_list.append(b" NumHardLinks %i\n" % numlinks) 133 str_list.append(b" Inode %i\n" % rorpath.getinode()) 134 str_list.append(b" DeviceLoc %i\n" % rorpath.getdevloc()) 135 136 # Save any hashes, if available 137 if rorpath.has_sha1(): 138 str_list.append( 139 b' SHA1Digest %b\n' % rorpath.get_sha1().encode('ascii')) 140 141 elif type == "None": 142 return b"".join(str_list) 143 elif type == "dir" or type == "sock" or type == "fifo": 144 pass 145 elif type == "sym": 146 str_list.append(b" SymData %b\n" % quote_path(rorpath.readlink())) 147 elif type == "dev": 148 devchar, major, minor = rorpath.getdevnums() 149 str_list.append( 150 b" DeviceNum %b %i %i\n" % (devchar.encode('ascii'), major, minor)) 151 152 # Store time information 153 if type != 'sym' and type != 'dev': 154 str_list.append(b" ModTime %i\n" % rorpath.getmtime()) 155 156 # Add user, group, and permission information 157 uid, gid = rorpath.getuidgid() 158 str_list.append(b" Uid %i\n" % uid) 159 str_list.append(b" Uname %b\n" % (rorpath.getuname() or ":").encode()) 160 str_list.append(b" Gid %i\n" % gid) 161 str_list.append(b" Gname %b\n" % (rorpath.getgname() or ":").encode()) 162 str_list.append(b" Permissions %d\n" % rorpath.getperms()) 163 164 # Add long filename information 165 if rorpath.has_alt_mirror_name(): 166 str_list.append( 167 b" AlternateMirrorName %b\n" % (rorpath.get_alt_mirror_name(), )) 168 elif rorpath.has_alt_inc_name(): 169 str_list.append( 170 b" AlternateIncrementName %b\n" % (rorpath.get_alt_inc_name(), )) 171 172 return b"".join(str_list) 173 174 175line_parsing_regexp = re.compile(b"^ *([A-Za-z0-9]+) (.+)$", re.M) 176 177 178def Record2RORP(record_string): 179 """Given record_string, return RORPath 180 181 For speed reasons, write the RORPath data dictionary directly 182 instead of calling rorpath functions. Profiling has shown this to 183 be a time critical function. 184 185 """ 186 data_dict = {} 187 for field, data in line_parsing_regexp.findall(record_string): 188 field = field.decode('ascii') 189 if field == "File": 190 index = quoted_filename_to_index(data) 191 elif field == "Type": 192 if data == b"None": 193 data_dict['type'] = None 194 else: 195 data_dict['type'] = data.decode('ascii') 196 elif field == "Size": 197 data_dict['size'] = int(data) 198 elif field == "ResourceFork": 199 if data == b"None": 200 data_dict['resourcefork'] = b"" 201 else: 202 data_dict['resourcefork'] = binascii.unhexlify(data) 203 elif field == "CarbonFile": 204 if data == b"None": 205 data_dict['carbonfile'] = None 206 else: 207 data_dict['carbonfile'] = string2carbonfile(data) 208 elif field == "SHA1Digest": 209 data_dict['sha1'] = data.decode('ascii') 210 elif field == "NumHardLinks": 211 data_dict['nlink'] = int(data) 212 elif field == "Inode": 213 data_dict['inode'] = int(data) 214 elif field == "DeviceLoc": 215 data_dict['devloc'] = int(data) 216 elif field == "SymData": 217 data_dict['linkname'] = unquote_path(data) 218 elif field == "DeviceNum": 219 devchar, major_str, minor_str = data.split(b" ") 220 data_dict['devnums'] = (devchar.decode('ascii'), int(major_str), 221 int(minor_str)) 222 elif field == "ModTime": 223 data_dict['mtime'] = int(data) 224 elif field == "Uid": 225 data_dict['uid'] = int(data) 226 elif field == "Gid": 227 data_dict['gid'] = int(data) 228 elif field == "Uname": 229 if data == b":" or data == b'None': 230 data_dict['uname'] = None 231 else: 232 data_dict['uname'] = data.decode() 233 elif field == "Gname": 234 if data == b':' or data == b'None': 235 data_dict['gname'] = None 236 else: 237 data_dict['gname'] = data.decode() 238 elif field == "Permissions": 239 data_dict['perms'] = int(data) 240 elif field == "AlternateMirrorName": 241 data_dict['mirrorname'] = data 242 elif field == "AlternateIncrementName": 243 data_dict['incname'] = data 244 else: 245 log.Log("Unknown field in line '%s %s'" % (field, data), 2) 246 return rpath.RORPath(index, data_dict) 247 248 249chars_to_quote = re.compile(b"\\n|\\\\") 250 251 252def quote_path(path_string): 253 """Return quoted version of path_string 254 255 Because newlines are used to separate fields in a record, they are 256 replaced with \n. Backslashes become \\ and everything else is 257 left the way it is. 258 259 """ 260 261 def replacement_func(match_obj): 262 """This is called on the match obj of any char that needs quoting""" 263 char = match_obj.group(0) 264 if char == b"\n": 265 return b"\\n" 266 elif char == b"\\": 267 return b"\\\\" 268 assert 0, "Bad char %s needs quoting" % char 269 270 return chars_to_quote.sub(replacement_func, path_string) 271 272 273def unquote_path(quoted_string): 274 """Reverse what was done by quote_path""" 275 276 def replacement_func(match_obj): 277 """Unquote match obj of two character sequence""" 278 two_chars = match_obj.group(0) 279 if two_chars == b"\\n": 280 return b"\n" 281 elif two_chars == b"\\\\": 282 return b"\\" 283 log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2) 284 return two_chars 285 286 return re.sub(b"\\\\n|\\\\\\\\", replacement_func, quoted_string) 287 288 289def quoted_filename_to_index(quoted_filename): 290 """Return tuple index given quoted filename""" 291 if quoted_filename == b'.': 292 return () 293 else: 294 return tuple(unquote_path(quoted_filename).split(b'/')) 295 296 297class FlatExtractor: 298 """Controls iterating objects from flat file""" 299 300 # Set this in subclass. record_boundary_regexp should match 301 # beginning of next record. The first group should start at the 302 # beginning of the record. The second group should contain the 303 # (possibly quoted) filename. 304 record_boundary_regexp = None 305 306 # Set in subclass to function that converts text record to object 307 record_to_object = None 308 309 def __init__(self, fileobj): 310 self.fileobj = fileobj # holds file object we are reading from 311 self.buf = b"" # holds the next part of the file 312 self.at_end = 0 # True if we are at the end of the file 313 self.blocksize = 32 * 1024 314 315 def get_next_pos(self): 316 """Return position of next record in buffer, or end pos if none""" 317 while 1: 318 m = self.record_boundary_regexp.search(self.buf, 1) 319 if m: 320 return m.start(1) 321 else: # add next block to the buffer, loop again 322 newbuf = self.fileobj.read(self.blocksize) 323 if not newbuf: 324 self.at_end = 1 325 return len(self.buf) 326 else: 327 self.buf += newbuf 328 329 def iterate(self): 330 """Return iterator that yields all objects with records""" 331 for record in self.iterate_records(): 332 try: 333 yield self.record_to_object(record) 334 except (ParsingError, ValueError) as e: 335 if self.at_end: 336 break # Ignore whitespace/bad records at end 337 log.Log( 338 "Error parsing flat file: %s [%s(%s)]" % 339 (e, type(self), self.fileobj.fileobj.name), 2) 340 341 def iterate_records(self): 342 """Yield all text records in order""" 343 while 1: 344 next_pos = self.get_next_pos() 345 if self.at_end: 346 if next_pos: 347 yield self.buf[:next_pos] 348 break 349 yield self.buf[:next_pos] 350 self.buf = self.buf[next_pos:] 351 assert not self.fileobj.close() 352 353 def skip_to_index(self, index): 354 """Scan through the file, set buffer to beginning of index record 355 356 Here we make sure that the buffer always ends in a newline, so 357 we will not be splitting lines in half. 358 359 """ 360 assert not self.buf or self.buf.endswith(b"\n") 361 while 1: 362 self.buf = self.fileobj.read(self.blocksize) 363 self.buf += self.fileobj.readline() 364 if not self.buf: 365 self.at_end = 1 366 return 367 while 1: 368 m = self.record_boundary_regexp.search(self.buf) 369 if not m: 370 break 371 cur_index = self.filename_to_index(m.group(2)) 372 if cur_index >= index: 373 self.buf = self.buf[m.start(1):] 374 return 375 else: 376 self.buf = self.buf[m.end(1):] 377 378 def iterate_starting_with(self, index): 379 """Iterate objects whose index starts with given index""" 380 self.skip_to_index(index) 381 if self.at_end: 382 return 383 while 1: 384 next_pos = self.get_next_pos() 385 try: 386 obj = self.record_to_object(self.buf[:next_pos]) 387 except (ParsingError, ValueError) as e: 388 log.Log("Error parsing metadata file: %s" % (e, ), 2) 389 else: 390 if obj.index[:len(index)] != index: 391 break 392 yield obj 393 if self.at_end: 394 break 395 self.buf = self.buf[next_pos:] 396 assert not self.fileobj.close() 397 398 def filename_to_index(self, filename): 399 """Translate filename, possibly quoted, into an index tuple 400 401 The filename is the first group matched by 402 regexp_boundary_regexp. 403 404 """ 405 assert 0 # subclass 406 407 408class RorpExtractor(FlatExtractor): 409 """Iterate rorps from metadata file""" 410 record_boundary_regexp = re.compile(b"(?:\\n|^)(File (.*?))\\n") 411 record_to_object = staticmethod(Record2RORP) 412 filename_to_index = staticmethod(quoted_filename_to_index) 413 414 415class FlatFile: 416 """Manage a flat file containing info on various files 417 418 This is used for metadata information, and possibly EAs and ACLs. 419 The main read interface is as an iterator. The storage format is 420 a flat, probably compressed file, so random access is not 421 recommended. 422 423 Even if the file looks like a text file, it is actually a binary file, 424 so that (especially) paths can be stored as bytes, without issue 425 with encoding / decoding. 426 """ 427 rp, fileobj, mode = None, None, None 428 _buffering_on = 1 # Buffering may be useful because gzip writes are slow 429 _record_buffer, _max_buffer_size = None, 100 430 _extractor = FlatExtractor # Override to class that iterates objects 431 _object_to_record = None # Set to function converting object to record 432 _prefix = None # Set to required prefix 433 434 def __init__(self, rp_base, mode, check_path=1, compress=1, callback=None): 435 """Open rp (or rp+'.gz') for reading ('r') or writing ('w') 436 437 If callback is available, it will be called on the rp upon 438 closing (because the rp may not be known in advance). 439 440 """ 441 self.mode = mode 442 self.callback = callback 443 self._record_buffer = [] 444 if check_path: 445 assert (rp_base.isincfile() 446 and rp_base.getincbase_bname() == self._prefix), rp_base 447 compress = 1 448 if mode == 'r' or mode == 'rb': 449 self.rp = rp_base 450 self.fileobj = self.rp.open("rb", compress) 451 else: 452 assert mode == 'w' or mode == 'wb', \ 453 "File opening mode must be one of r, rb, w or wb, and not %s." % mode 454 if compress and check_path and not rp_base.isinccompressed(): 455 456 def callback(rp): 457 self.rp = rp 458 459 self.fileobj = rpath.MaybeGzip(rp_base, callback) 460 else: 461 self.rp = rp_base 462 assert not self.rp.lstat(), self.rp 463 self.fileobj = self.rp.open("wb", compress=compress) 464 465 def write_record(self, record): 466 """Write a (text) record into the file""" 467 if self._buffering_on: 468 self._record_buffer.append(record) 469 if len(self._record_buffer) >= self._max_buffer_size: 470 self.fileobj.write(b"".join(self._record_buffer)) 471 self._record_buffer = [] 472 else: 473 self.fileobj.write(record) 474 475 def write_object(self, object): 476 """Convert one object to record and write to file""" 477 self.write_record(self._object_to_record(object)) 478 479 def get_objects(self, restrict_index=None): 480 """Return iterator of objects records from file rp""" 481 if not restrict_index: 482 return self._extractor(self.fileobj).iterate() 483 extractor = self._extractor(self.fileobj) 484 return extractor.iterate_starting_with(restrict_index) 485 486 def get_records(self): 487 """Return iterator of text records""" 488 return self._extractor(self.fileobj).iterate_records() 489 490 def close(self): 491 """Close file, for when any writing is done""" 492 assert self.fileobj, "File already closed" 493 if self._buffering_on and self._record_buffer: 494 self.fileobj.write(b"".join(self._record_buffer)) 495 self._record_buffer = [] 496 result = self.fileobj.close() 497 self.fileobj = None 498 self.rp.fsync_with_dir() 499 self.rp.setdata() 500 if self.callback: 501 self.callback(self.rp) 502 return result 503 504 505class MetadataFile(FlatFile): 506 """Store/retrieve metadata from mirror_metadata as rorps""" 507 _prefix = b"mirror_metadata" 508 _extractor = RorpExtractor 509 _object_to_record = staticmethod(RORP2Record) 510 511 512class CombinedWriter: 513 """Used for simultaneously writing metadata, eas, and acls""" 514 515 def __init__(self, metawriter, eawriter, aclwriter, winaclwriter): 516 self.metawriter = metawriter 517 self.eawriter, self.aclwriter, self.winaclwriter = \ 518 eawriter, aclwriter, winaclwriter # these can be None 519 520 def write_object(self, rorp): 521 """Write information in rorp to all the writers""" 522 self.metawriter.write_object(rorp) 523 if self.eawriter and not rorp.get_ea().empty(): 524 self.eawriter.write_object(rorp.get_ea()) 525 if self.aclwriter and not rorp.get_acl().is_basic(): 526 self.aclwriter.write_object(rorp.get_acl()) 527 if self.winaclwriter: 528 self.winaclwriter.write_object(rorp.get_win_acl()) 529 530 def close(self): 531 self.metawriter.close() 532 if self.eawriter: 533 self.eawriter.close() 534 if self.aclwriter: 535 self.aclwriter.close() 536 if self.winaclwriter: 537 self.winaclwriter.close() 538 539 540class Manager: 541 """Read/Combine/Write metadata files by time""" 542 meta_prefix = b'mirror_metadata' 543 acl_prefix = b'access_control_lists' 544 ea_prefix = b'extended_attributes' 545 wacl_prefix = b'win_access_control_lists' 546 547 def __init__(self): 548 """Set listing of rdiff-backup-data dir""" 549 self.rplist = [] 550 self.timerpmap, self.prefixmap = {}, {} 551 for filename in Globals.rbdir.listdir(): 552 rp = Globals.rbdir.append(filename) 553 if rp.isincfile(): 554 self.add_incrp(rp) 555 556 def add_incrp(self, rp): 557 """Add rp to list of inc rps in the rbdir""" 558 assert rp.isincfile(), rp 559 self.rplist.append(rp) 560 time = rp.getinctime() 561 if time in self.timerpmap: 562 self.timerpmap[time].append(rp) 563 else: 564 self.timerpmap[time] = [rp] 565 566 incbase = rp.getincbase_bname() 567 if incbase in self.prefixmap: 568 self.prefixmap[incbase].append(rp) 569 else: 570 self.prefixmap[incbase] = [rp] 571 572 def _iter_helper(self, prefix, flatfileclass, time, restrict_index): 573 """Used below to find the right kind of file by time""" 574 if time not in self.timerpmap: 575 return None 576 for rp in self.timerpmap[time]: 577 if rp.getincbase_bname() == prefix: 578 return flatfileclass(rp, 'r').get_objects(restrict_index) 579 return None 580 581 def get_meta_at_time(self, time, restrict_index): 582 """Return iter of metadata rorps at given time (or None)""" 583 return self._iter_helper(self.meta_prefix, MetadataFile, time, 584 restrict_index) 585 586 def get_eas_at_time(self, time, restrict_index): 587 """Return Extended Attributes iter at given time (or None)""" 588 return self._iter_helper(self.ea_prefix, 589 eas_acls.ExtendedAttributesFile, time, 590 restrict_index) 591 592 def get_acls_at_time(self, time, restrict_index): 593 """Return ACLs iter at given time from recordfile (or None)""" 594 return self._iter_helper(self.acl_prefix, 595 eas_acls.AccessControlListFile, time, 596 restrict_index) 597 598 def get_win_acls_at_time(self, time, restrict_index): 599 """Return WACLs iter at given time from recordfile (or None)""" 600 return self._iter_helper(self.wacl_prefix, 601 win_acls.WinAccessControlListFile, time, 602 restrict_index) 603 604 def GetAtTime(self, time, restrict_index=None): 605 """Return combined metadata iter with ea/acl info if necessary""" 606 cur_iter = self.get_meta_at_time(time, restrict_index) 607 if not cur_iter: 608 log.Log( 609 "Warning, could not find mirror_metadata file.\n" 610 "Metadata will be read from filesystem instead.", 2) 611 return None 612 613 if Globals.acls_active: 614 acl_iter = self.get_acls_at_time(time, restrict_index) 615 if not acl_iter: 616 log.Log("Warning: Access Control List file not found", 2) 617 acl_iter = iter([]) 618 cur_iter = eas_acls.join_acl_iter(cur_iter, acl_iter) 619 if Globals.eas_active: 620 ea_iter = self.get_eas_at_time(time, restrict_index) 621 if not ea_iter: 622 log.Log("Warning: Extended Attributes file not found", 2) 623 ea_iter = iter([]) 624 cur_iter = eas_acls.join_ea_iter(cur_iter, ea_iter) 625 if Globals.win_acls_active: 626 wacl_iter = self.get_win_acls_at_time(time, restrict_index) 627 if not wacl_iter: 628 log.Log( 629 "Warning: Windows Access Control List file not" 630 " found.", 2) 631 wacl_iter = iter([]) 632 cur_iter = win_acls.join_wacl_iter(cur_iter, wacl_iter) 633 634 return cur_iter 635 636 def _writer_helper(self, prefix, flatfileclass, typestr, time): 637 """Used in the get_xx_writer functions, returns a writer class""" 638 if time is None: 639 timestr = Time.curtimestr 640 else: 641 timestr = Time.timetobytes(time) 642 triple = map(os.fsencode, (prefix, timestr, typestr)) 643 filename = b'.'.join(triple) 644 rp = Globals.rbdir.append(filename) 645 assert not rp.lstat(), "File %s already exists!" % (rp.path, ) 646 assert rp.isincfile() 647 return flatfileclass(rp, 'w', callback=self.add_incrp) 648 649 def get_meta_writer(self, typestr, time): 650 """Return MetadataFile object opened for writing at given time""" 651 return self._writer_helper(self.meta_prefix, MetadataFile, typestr, 652 time) 653 654 def get_ea_writer(self, typestr, time): 655 """Return ExtendedAttributesFile opened for writing""" 656 return self._writer_helper( 657 self.ea_prefix, eas_acls.ExtendedAttributesFile, typestr, time) 658 659 def get_acl_writer(self, typestr, time): 660 """Return AccessControlListFile opened for writing""" 661 return self._writer_helper( 662 self.acl_prefix, eas_acls.AccessControlListFile, typestr, time) 663 664 def get_win_acl_writer(self, typestr, time): 665 """Return WinAccessControlListFile opened for writing""" 666 return self._writer_helper( 667 self.wacl_prefix, win_acls.WinAccessControlListFile, typestr, time) 668 669 def GetWriter(self, typestr=b'snapshot', time=None): 670 """Get a writer object that can write meta and possibly acls/eas""" 671 metawriter = self.get_meta_writer(typestr, time) 672 if not Globals.eas_active and not Globals.acls_active and \ 673 not Globals.win_acls_active: 674 return metawriter # no need for a CombinedWriter 675 676 if Globals.eas_active: 677 ea_writer = self.get_ea_writer(typestr, time) 678 else: 679 ea_writer = None 680 if Globals.acls_active: 681 acl_writer = self.get_acl_writer(typestr, time) 682 else: 683 acl_writer = None 684 if Globals.win_acls_active: 685 win_acl_writer = self.get_win_acl_writer(typestr, time) 686 else: 687 win_acl_writer = None 688 return CombinedWriter(metawriter, ea_writer, acl_writer, 689 win_acl_writer) 690 691 692class PatchDiffMan(Manager): 693 """Contains functions for patching and diffing metadata 694 695 To save space, we can record a full list of only the most recent 696 metadata, using the normal rdiff-backup reverse increment 697 strategy. Instead of using librsync to compute diffs, though, we 698 use our own technique so that the diff files are still 699 hand-editable. 700 701 A mirror_metadata diff has the same format as a mirror_metadata 702 snapshot. If the record for an index is missing from the diff, it 703 indicates no change from the original. If it is present it 704 replaces the mirror_metadata entry, unless it has Type None, which 705 indicates the record should be deleted from the original. 706 707 """ 708 max_diff_chain = 9 # After this many diffs, make a new snapshot 709 710 def get_diffiter(self, new_iter, old_iter): 711 """Iterate meta diffs of new_iter -> old_iter""" 712 for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter): 713 if not old_rorp: 714 yield rpath.RORPath(new_rorp.index) 715 elif not new_rorp or new_rorp.data != old_rorp.data: 716 # exact compare here, can't use == on rorps 717 yield old_rorp 718 719 def sorted_prefix_inclist(self, prefix, min_time=0): 720 """Return reverse sorted (by time) list of incs with given prefix""" 721 if prefix not in self.prefixmap: 722 return [] 723 sortlist = [(rp.getinctime(), rp) for rp in self.prefixmap[prefix]] 724 725 # we sort before we validate against duplicates so that we tell 726 # first about the youngest case of duplication 727 sortlist.sort(reverse=True, key=lambda x: x[0]) 728 729 # we had cases where the timestamp of the metadata files were 730 # duplicates, we need to fail or at least warn about such cases 731 unique_set = set() 732 for (time, rp) in sortlist: 733 if time in unique_set: 734 if Globals.allow_duplicate_timestamps: 735 log.Log("Warning: metadata file '%s' has a duplicate " 736 "timestamp date, you might not be able to " 737 "recover files on or earlier than this date. " 738 "Assuming you're in the process of cleaning up " 739 "your repository." % 740 rp.get_safepath(), 2) 741 else: 742 log.Log.FatalError( 743 "Metadata file '%s' has a duplicate timestamp date, " 744 "you might not be able to recover files on or earlier " 745 "than this date. " 746 "Check the man page on how to clean up your repository " 747 "using the '--allow-duplicate-timestamps' option." % 748 rp.get_safepath()) 749 else: 750 unique_set.add(time) 751 752 return [rp for (time, rp) in sortlist if time >= min_time] 753 754 def check_needs_diff(self): 755 """Check if we should diff, returns (new, old) rps, or (None, None)""" 756 inclist = self.sorted_prefix_inclist(b'mirror_metadata') 757 assert len(inclist) >= 1 758 if len(inclist) == 1: 759 return (None, None) 760 newrp, oldrp = inclist[:2] 761 assert newrp.getinctype() == oldrp.getinctype() == b'snapshot' 762 763 chainlen = 1 764 for rp in inclist[2:]: 765 if rp.getinctype() != b'diff': 766 break 767 chainlen += 1 768 if chainlen >= self.max_diff_chain: 769 return (None, None) 770 return (newrp, oldrp) 771 772 def ConvertMetaToDiff(self): 773 """Replace a mirror snapshot with a diff if it's appropriate""" 774 newrp, oldrp = self.check_needs_diff() 775 if not newrp: 776 return 777 log.Log("Writing mirror_metadata diff", 6) 778 779 diff_writer = self.get_meta_writer(b'diff', oldrp.getinctime()) 780 new_iter = MetadataFile(newrp, 'r').get_objects() 781 old_iter = MetadataFile(oldrp, 'r').get_objects() 782 for diff_rorp in self.get_diffiter(new_iter, old_iter): 783 diff_writer.write_object(diff_rorp) 784 diff_writer.close() # includes sync 785 oldrp.delete() 786 787 def get_meta_at_time(self, time, restrict_index): 788 """Get metadata rorp iter, possibly by patching with diffs""" 789 meta_iters = [ 790 MetadataFile(rp, 'r').get_objects(restrict_index) 791 for rp in self.relevant_meta_incs(time) 792 ] 793 if not meta_iters: 794 return None 795 if len(meta_iters) == 1: 796 return meta_iters[0] 797 return self.iterate_patched_meta(meta_iters) 798 799 def relevant_meta_incs(self, time): 800 """Return list [snapshotrp, diffrps ...] time sorted""" 801 inclist = self.sorted_prefix_inclist(b'mirror_metadata', min_time=time) 802 if not inclist: 803 return inclist 804 assert inclist[-1].getinctime() == time, inclist[-1] 805 for i in range(len(inclist) - 1, -1, -1): 806 if inclist[i].getinctype() == b'snapshot': 807 return inclist[i:] 808 assert 0, "Inclist %s contains no snapshots" % (inclist, ) 809 810 def iterate_patched_meta(self, meta_iter_list): 811 """Return an iter of metadata rorps by combining the given iters 812 813 The iters should be given as a list/tuple in reverse 814 chronological order. The earliest rorp in each iter will 815 supercede all the later ones. 816 817 """ 818 for meta_tuple in rorpiter.CollateIterators(*meta_iter_list): 819 for i in range(len(meta_tuple) - 1, -1, -1): 820 if meta_tuple[i]: 821 if meta_tuple[i].lstat(): 822 yield meta_tuple[i] 823 break # move to next index 824 else: 825 assert 0, "No valid rorps" 826 827 828ManagerObj = None # Set this later to Manager instance 829 830 831def SetManager(): 832 global ManagerObj 833 ManagerObj = PatchDiffMan() 834 return ManagerObj 835 836 837from . import eas_acls, win_acls # noqa: E402 838