1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# Copyright 2002 Ben Escoto <ben@emerose.org> 4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 5# 6# This file is part of duplicity. 7# 8# Duplicity is free software; you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by the 10# Free Software Foundation; either version 2 of the License, or (at your 11# option) any later version. 12# 13# Duplicity is distributed in the hope that it will be useful, but 14# WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with duplicity; if not, write to the Free Software Foundation, 20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 22from builtins import map 23from builtins import next 24from builtins import object 25from builtins import range 26 27import re 28import sys 29import tempfile 30 31from duplicity import diffdir 32from duplicity import config 33from duplicity import librsync 34from duplicity import log 35from duplicity import selection 36from duplicity import tarfile 37from duplicity import tempdir 38from duplicity import util 39from duplicity.lazy import * # pylint: disable=unused-wildcard-import,redefined-builtin 40from duplicity.path import * # pylint: disable=unused-wildcard-import,redefined-builtin 41 42u"""Functions for patching of directories""" 43 44 45class PatchDirException(Exception): 46 pass 47 48 49def Patch(base_path, difftar_fileobj): 50 u"""Patch given base_path and file object containing delta""" 51 diff_tarfile = tarfile.TarFile(u"arbitrary", u"r", difftar_fileobj) 52 patch_diff_tarfile(base_path, diff_tarfile) 53 assert not difftar_fileobj.close() 54 55 56def Patch_from_iter(base_path, fileobj_iter, restrict_index=()): 57 u"""Patch given base_path and iterator of delta file objects""" 58 diff_tarfile = TarFile_FromFileobjs(fileobj_iter) 59 patch_diff_tarfile(base_path, diff_tarfile, restrict_index) 60 61 62def patch_diff_tarfile(base_path, diff_tarfile, restrict_index=()): 63 u"""Patch given Path object using delta tarfile (as in tarfile.TarFile) 64 65 If restrict_index is set, ignore any deltas in diff_tarfile that 66 don't start with restrict_index. 67 68 """ 69 if base_path.exists(): 70 path_iter = selection.Select(base_path).set_iter() 71 else: 72 path_iter = empty_iter() # probably untarring full backup 73 74 diff_path_iter = difftar2path_iter(diff_tarfile) 75 if restrict_index: 76 diff_path_iter = filter_path_iter(diff_path_iter, restrict_index) 77 collated = diffdir.collate2iters(path_iter, diff_path_iter) 78 79 ITR = IterTreeReducer(PathPatcher, [base_path]) 80 for basis_path, diff_ropath in collated: 81 if basis_path: 82 log.Info(_(u"Patching %s") % (util.fsdecode(basis_path.get_relative_path())), 83 log.InfoCode.patch_file_patching, 84 util.escape(basis_path.get_relative_path())) 85 ITR(basis_path.index, basis_path, diff_ropath) 86 else: 87 log.Info(_(u"Patching %s") % (util.fsdecode(diff_ropath.get_relative_path())), 88 log.InfoCode.patch_file_patching, 89 util.escape(diff_ropath.get_relative_path())) 90 ITR(diff_ropath.index, basis_path, diff_ropath) 91 ITR.Finish() 92 base_path.setdata() 93 94 95def empty_iter(): 96 if 0: 97 yield 1 # this never happens, but fools into generator treatment 98 99 100def filter_path_iter(path_iter, index): 101 u"""Rewrite path elements of path_iter so they start with index 102 103 Discard any that doesn't start with index, and remove the index 104 prefix from the rest. 105 106 """ 107 assert isinstance(index, tuple) and index, index 108 l = len(index) 109 for path in path_iter: 110 if path.index[:l] == index: 111 path.index = path.index[l:] 112 yield path 113 114 115def difftar2path_iter(diff_tarfile): 116 u"""Turn file-like difftarobj into iterator of ROPaths""" 117 tar_iter = iter(diff_tarfile) 118 multivol_fileobj = None 119 120 # The next tar_info is stored in this one element list so 121 # Multivol_Filelike below can update it. Any StopIterations will 122 # be passed upwards. 123 try: 124 tarinfo_list = [next(tar_iter)] 125 except StopIteration: 126 return 127 128 while 1: 129 # This section relevant when a multivol diff is last in tar 130 if not tarinfo_list[0]: 131 return 132 if multivol_fileobj and not multivol_fileobj.at_end: 133 multivol_fileobj.close() # aborting in middle of multivol 134 continue 135 136 index, difftype, multivol = get_index_from_tarinfo(tarinfo_list[0]) 137 ropath = ROPath(index) 138 ropath.init_from_tarinfo(tarinfo_list[0]) 139 ropath.difftype = difftype 140 if difftype == u"deleted": 141 ropath.type = None 142 elif ropath.isreg(): 143 if multivol: 144 multivol_fileobj = Multivol_Filelike(diff_tarfile, tar_iter, 145 tarinfo_list, index) 146 ropath.setfileobj(multivol_fileobj) 147 yield ropath 148 continue # Multivol_Filelike will reset tarinfo_list 149 else: 150 ropath.setfileobj(diff_tarfile.extractfile(tarinfo_list[0])) 151 yield ropath 152 try: 153 tarinfo_list[0] = next(tar_iter) 154 except StopIteration: 155 return 156 157 158def get_index_from_tarinfo(tarinfo): 159 u"""Return (index, difftype, multivol) pair from tarinfo object""" 160 for prefix in [u"snapshot/", u"diff/", u"deleted/", 161 u"multivol_diff/", u"multivol_snapshot/"]: 162 tiname = util.get_tarinfo_name(tarinfo) 163 if sys.version_info.major == 2 and isinstance(prefix, unicode): 164 prefix = prefix.encode() 165 if tiname.startswith(prefix): 166 name = tiname[len(prefix):] # strip prefix 167 if prefix.startswith(u"multivol"): 168 if prefix == u"multivol_diff/": 169 difftype = u"diff" 170 else: 171 difftype = u"snapshot" 172 multivol = 1 173 name, num_subs = \ 174 re.subn(u"(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$", 175 u"\\2", tiname) 176 if num_subs != 1: 177 raise PatchDirException(u"Unrecognized diff entry %s" % 178 tiname) 179 else: 180 difftype = prefix[:-1] # strip trailing / 181 name = tiname[len(prefix):] 182 if name.endswith(r"/"): 183 name = name[:-1] # strip trailing /'s 184 multivol = 0 185 break 186 else: 187 raise PatchDirException(u"Unrecognized diff entry %s" % 188 tiname) 189 if name == r"." or name == r"": 190 index = () 191 else: 192 if sys.version_info.major >= 3: 193 index = tuple(util.fsencode(name).split(b"/")) 194 else: 195 index = tuple(name.split(b"/")) 196 if b'..' in index: 197 raise PatchDirException(u"Tar entry %s contains '..'. Security " 198 u"violation" % util.fsdecode(tiname)) 199 return (index, difftype, multivol) 200 201 202class Multivol_Filelike(object): 203 u"""Emulate a file like object from multivols 204 205 Maintains a buffer about the size of a volume. When it is read() 206 to the end, pull in more volumes as desired. 207 208 """ 209 def __init__(self, tf, tar_iter, tarinfo_list, index): 210 u"""Initializer. tf is TarFile obj, tarinfo is first tarinfo""" 211 self.tf, self.tar_iter = tf, tar_iter 212 self.tarinfo_list = tarinfo_list # must store as list for write access 213 self.index = index 214 self.buffer = b"" 215 self.at_end = 0 216 217 def read(self, length=-1): 218 u"""Read length bytes from file""" 219 if length < 0: 220 while self.addtobuffer(): 221 pass 222 real_len = len(self.buffer) 223 else: 224 while len(self.buffer) < length: 225 if not self.addtobuffer(): 226 break 227 real_len = min(len(self.buffer), length) 228 229 result = self.buffer[:real_len] 230 self.buffer = self.buffer[real_len:] 231 return result 232 233 def addtobuffer(self): 234 u"""Add next chunk to buffer""" 235 if self.at_end: 236 return None 237 index, difftype, multivol = get_index_from_tarinfo(self.tarinfo_list[0]) 238 if not multivol or index != self.index: 239 # we've moved on 240 # the following communicates next tarinfo to difftar2path_iter 241 self.at_end = 1 242 return None 243 244 fp = self.tf.extractfile(self.tarinfo_list[0]) 245 self.buffer += fp.read() 246 fp.close() 247 248 try: 249 self.tarinfo_list[0] = next(self.tar_iter) 250 except StopIteration: 251 self.tarinfo_list[0] = None 252 self.at_end = 1 253 return None 254 return 1 255 256 def close(self): 257 u"""If not at end, read remaining data""" 258 if not self.at_end: 259 while 1: 260 self.buffer = b"" 261 if not self.addtobuffer(): 262 break 263 self.at_end = 1 264 265 266class PathPatcher(ITRBranch): 267 u"""Used by DirPatch, process the given basis and diff""" 268 def __init__(self, base_path): 269 u"""Set base_path, Path of root of tree""" 270 self.base_path = base_path 271 self.dir_diff_ropath = None 272 273 def start_process(self, index, basis_path, diff_ropath): 274 u"""Start processing when diff_ropath is a directory""" 275 if not (diff_ropath and diff_ropath.isdir()): 276 assert index == (), util.uindex(index) # should only happen for first elem 277 self.fast_process(index, basis_path, diff_ropath) 278 return 279 280 if not basis_path: 281 basis_path = self.base_path.new_index(index) 282 assert not basis_path.exists() 283 basis_path.mkdir() # Need place for later files to go into 284 elif not basis_path.isdir(): 285 basis_path.delete() 286 basis_path.mkdir() 287 self.dir_basis_path = basis_path 288 self.dir_diff_ropath = diff_ropath 289 290 def end_process(self): 291 u"""Copy directory permissions when leaving tree""" 292 if self.dir_diff_ropath: 293 self.dir_diff_ropath.copy_attribs(self.dir_basis_path) 294 295 def can_fast_process(self, index, basis_path, diff_ropath): # pylint: disable=unused-argument 296 u"""No need to recurse if diff_ropath isn't a directory""" 297 return not (diff_ropath and diff_ropath.isdir()) 298 299 def fast_process(self, index, basis_path, diff_ropath): 300 u"""For use when neither is a directory""" 301 if not diff_ropath: 302 return # no change 303 elif not basis_path: 304 if diff_ropath.difftype == u"deleted": 305 pass # already deleted 306 else: 307 # just copy snapshot over 308 diff_ropath.copy(self.base_path.new_index(index)) 309 elif diff_ropath.difftype == u"deleted": 310 if basis_path.isdir(): 311 basis_path.deltree() 312 else: 313 basis_path.delete() 314 elif not basis_path.isreg() or (basis_path.isreg() and diff_ropath.difftype == u"snapshot"): 315 if basis_path.isdir(): 316 basis_path.deltree() 317 else: 318 basis_path.delete() 319 diff_ropath.copy(basis_path) 320 else: 321 assert diff_ropath.difftype == u"diff", diff_ropath.difftype 322 basis_path.patch_with_attribs(diff_ropath) 323 324 325class TarFile_FromFileobjs(object): 326 u"""Like a tarfile.TarFile iterator, but read from multiple fileobjs""" 327 def __init__(self, fileobj_iter): 328 u"""Make new tarinfo iterator 329 330 fileobj_iter should be an iterator of file objects opened for 331 reading. They will be closed at end of reading. 332 333 """ 334 self.fileobj_iter = fileobj_iter 335 self.tarfile, self.tar_iter = None, None 336 self.current_fp = None 337 338 def __iter__(self): # pylint: disable=non-iterator-returned 339 return self 340 341 def set_tarfile(self): 342 u"""Set tarfile from next file object, or raise StopIteration""" 343 if self.current_fp: 344 assert not self.current_fp.close() 345 self.current_fp = next(self.fileobj_iter) 346 self.tarfile = util.make_tarfile(u"r", self.current_fp) 347 self.tar_iter = iter(self.tarfile) 348 349 def __next__(self): 350 if not self.tarfile: 351 try: 352 self.set_tarfile() 353 except StopIteration: 354 return 355 try: 356 return next(self.tar_iter) 357 except StopIteration: 358 assert not self.tarfile.close() 359 self.set_tarfile() 360 return next(self.tar_iter) 361 362 def extractfile(self, tarinfo): 363 u"""Return data associated with given tarinfo""" 364 return self.tarfile.extractfile(tarinfo) 365 366 367def collate_iters(iter_list): 368 u"""Collate iterators by index 369 370 Input is a list of n iterators each of which must iterate elements 371 with an index attribute. The elements must come out in increasing 372 order, and the index should be a tuple itself. 373 374 The output is an iterator which yields tuples where all elements 375 in the tuple have the same index, and the tuple has n elements in 376 it. If any iterator lacks an element with that index, the tuple 377 will have None in that spot. 378 379 """ 380 # overflow[i] means that iter_list[i] has been exhausted 381 # elems[i] is None means that it is time to replenish it. 382 iter_num = len(iter_list) 383 if iter_num == 2: 384 return diffdir.collate2iters(iter_list[0], iter_list[1]) 385 overflow = [None] * iter_num 386 elems = overflow[:] 387 388 def setrorps(overflow, elems): 389 u"""Set the overflow and rorps list""" 390 for i in range(iter_num): 391 if not overflow[i] and elems[i] is None: 392 try: 393 elems[i] = next(iter_list[i]) 394 except StopIteration: 395 overflow[i] = 1 396 elems[i] = None 397 398 def getleastindex(elems): 399 u"""Return the first index in elems, assuming elems isn't empty""" 400 return min([elem.index for elem in [x for x in elems if x]]) 401 402 def yield_tuples(iter_num, overflow, elems): 403 while 1: 404 setrorps(overflow, elems) 405 if None not in overflow: 406 break 407 408 index = getleastindex(elems) 409 yieldval = [] 410 for i in range(iter_num): 411 if elems[i] and elems[i].index == index: 412 yieldval.append(elems[i]) 413 elems[i] = None 414 else: 415 yieldval.append(None) 416 yield tuple(yieldval) 417 return yield_tuples(iter_num, overflow, elems) 418 419 420class IndexedTuple(object): 421 u"""Like a tuple, but has .index (used previously by collate_iters)""" 422 def __init__(self, index, sequence): 423 self.index = index 424 self.data = tuple(sequence) 425 426 def __len__(self): 427 return len(self.data) 428 429 def __getitem__(self, key): 430 u"""This only works for numerical keys (easier this way)""" 431 return self.data[key] 432 433 def __lt__(self, other): 434 return self.__cmp__(other) == -1 435 436 def __le__(self, other): 437 return self.__cmp__(other) != 1 438 439 def __ne__(self, other): 440 return not self.__eq__(other) 441 442 def __gt__(self, other): 443 return self.__cmp__(other) == 1 444 445 def __ge__(self, other): 446 return self.__cmp__(other) != -1 447 448 def __cmp__(self, other): 449 assert isinstance(other, IndexedTuple) 450 if self.index < other.index: 451 return -1 452 elif self.index == other.index: 453 return 0 454 else: 455 return 1 456 457 def __eq__(self, other): 458 if isinstance(other, IndexedTuple): 459 return self.index == other.index and self.data == other.data 460 elif isinstance(other, tuple): 461 return self.data == other 462 else: 463 return None 464 465 def __str__(self): 466 return u"(%s).%s" % (u", ".join(map(str, self.data)), self.index) 467 468 469def normalize_ps(patch_sequence): 470 u"""Given an sequence of ROPath deltas, remove blank and unnecessary 471 472 The sequence is assumed to be in patch order (later patches apply 473 to earlier ones). A patch is unnecessary if a later one doesn't 474 require it (for instance, any patches before a "delete" are 475 unnecessary). 476 477 """ 478 result_list = [] 479 i = len(patch_sequence) - 1 480 while i >= 0: 481 delta = patch_sequence[i] 482 if delta is not None: 483 # skip blank entries 484 result_list.insert(0, delta) 485 if delta.difftype != u"diff": 486 break 487 i -= 1 488 return result_list 489 490 491def patch_seq2ropath(patch_seq): 492 u"""Apply the patches in patch_seq, return single ropath""" 493 first = patch_seq[0] 494 assert first.difftype != u"diff", u"First patch in sequence " \ 495 u"%s was a diff" % patch_seq 496 if not first.isreg(): 497 # No need to bother with data if not regular file 498 assert len(patch_seq) == 1, u"Patch sequence isn't regular, but " \ 499 u"has %d entries" % len(patch_seq) 500 return first.get_ropath() 501 502 current_file = first.open(u"rb") 503 504 for delta_ropath in patch_seq[1:]: 505 assert delta_ropath.difftype == u"diff", delta_ropath.difftype 506 try: 507 cur_file.fileno() 508 except: 509 u""" 510 librsync insists on a real file object, which we create manually 511 by using the duplicity.tempdir to tell us where. 512 513 See https://bugs.launchpad.net/duplicity/+bug/670891 for discussion 514 of os.tmpfile() vs tempfile.TemporaryFile() w.r.t. Windows / Posix, 515 which is worked around in librsync.PatchedFile() now. 516 """ 517 tempfp = tempfile.TemporaryFile(dir=tempdir.default().dir()) 518 util.copyfileobj(current_file, tempfp) 519 assert not current_file.close() 520 tempfp.seek(0) 521 current_file = tempfp 522 current_file = librsync.PatchedFile(current_file, 523 delta_ropath.open(u"rb")) 524 result = patch_seq[-1].get_ropath() 525 result.setfileobj(current_file) 526 return result 527 528 529def integrate_patch_iters(iter_list): 530 u"""Combine a list of iterators of ropath patches 531 532 The iter_list should be sorted in patch order, and the elements in 533 each iter_list need to be orderd by index. The output will be an 534 iterator of the final ROPaths in index order. 535 536 """ 537 collated = collate_iters(iter_list) 538 for patch_seq in collated: 539 normalized = normalize_ps(patch_seq) 540 try: 541 final_ropath = patch_seq2ropath(normalized) 542 if final_ropath.exists(): 543 # otherwise final patch was delete 544 yield final_ropath 545 except Exception as e: 546 filename = normalized[-1].get_ropath().get_relative_path() 547 log.Warn(_(u"Error '%s' patching %s") % 548 (util.uexc(e), util.fsdecode(filename)), 549 log.WarningCode.cannot_process, 550 util.escape(filename)) 551 552 553def tarfiles2rop_iter(tarfile_list, restrict_index=()): 554 u"""Integrate tarfiles of diffs into single ROPath iter 555 556 Then filter out all the diffs in that index which don't start with 557 the restrict_index. 558 559 """ 560 diff_iters = [difftar2path_iter(x) for x in tarfile_list] 561 if restrict_index: 562 # Apply filter before integration 563 diff_iters = [filter_path_iter(x, restrict_index) for x in diff_iters] 564 return integrate_patch_iters(diff_iters) 565 566 567def Write_ROPaths(base_path, rop_iter): 568 u"""Write out ropaths in rop_iter starting at base_path 569 570 Returns 1 if something was actually written, 0 otherwise. 571 572 """ 573 ITR = IterTreeReducer(ROPath_IterWriter, [base_path]) 574 return_val = 0 575 for ropath in rop_iter: 576 return_val = 1 577 ITR(ropath.index, ropath) 578 ITR.Finish() 579 base_path.setdata() 580 return return_val 581 582 583class ROPath_IterWriter(ITRBranch): 584 u"""Used in Write_ROPaths above 585 586 We need to use an ITR because we have to update the 587 permissions/times of directories after we write the files in them. 588 589 """ 590 def __init__(self, base_path): 591 u"""Set base_path, Path of root of tree""" 592 self.base_path = base_path 593 self.dir_diff_ropath = None 594 self.dir_new_path = None 595 596 def start_process(self, index, ropath): 597 u"""Write ropath. Only handles the directory case""" 598 if not ropath.isdir(): 599 # Base may not be a directory, but rest should 600 assert ropath.index == (), ropath.index 601 new_path = self.base_path.new_index(index) 602 if ropath.exists(): 603 if new_path.exists(): 604 new_path.deltree() 605 ropath.copy(new_path) 606 607 self.dir_new_path = self.base_path.new_index(index) 608 if self.dir_new_path.exists() and not config.force: 609 # base may exist, but nothing else 610 assert index == (), index 611 else: 612 self.dir_new_path.mkdir() 613 self.dir_diff_ropath = ropath 614 615 def end_process(self): 616 u"""Update information of a directory when leaving it""" 617 if self.dir_diff_ropath: 618 self.dir_diff_ropath.copy_attribs(self.dir_new_path) 619 620 def can_fast_process(self, index, ropath): # pylint: disable=unused-argument 621 u"""Can fast process (no recursion) if ropath isn't a directory""" 622 log.Info(_(u"Writing %s of type %s") % 623 (util.fsdecode(ropath.get_relative_path()), ropath.type), 624 log.InfoCode.patch_file_writing, 625 u"%s %s" % (util.escape(ropath.get_relative_path()), ropath.type)) 626 return not ropath.isdir() 627 628 def fast_process(self, index, ropath): 629 u"""Write non-directory ropath to destination""" 630 if ropath.exists(): 631 ropath.copy(self.base_path.new_index(index)) 632