1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# Copyright 2002 Ben Escoto <ben@emerose.org> 4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 5# 6# This file is part of duplicity. 7# 8# Duplicity is free software; you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by the 10# Free Software Foundation; either version 2 of the License, or (at your 11# option) any later version. 12# 13# Duplicity is distributed in the hope that it will be useful, but 14# WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with duplicity; if not, write to the Free Software Foundation, 20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 22u""" 23Functions for producing signatures and deltas of directories 24 25Note that the main processes of this module have two parts. In the 26first, the signature or delta is constructed of a ROPath iterator. In 27the second, the ROPath iterator is put into tar block form. 28""" 29from __future__ import division 30 31from future import standard_library 32standard_library.install_aliases() 33from builtins import map 34from builtins import next 35from builtins import str 36from builtins import range 37from builtins import object 38 39import io 40import sys 41 42from duplicity import statistics 43from duplicity import util 44from duplicity import config 45from duplicity.path import * # pylint: disable=unused-wildcard-import,redefined-builtin 46from duplicity.lazy import * # pylint: disable=unused-wildcard-import,redefined-builtin 47from duplicity import progress 48 49# A StatsObj will be written to this from DirDelta and DirDelta_WriteSig. 50stats = None 51tracker = None 52 53 54class DiffDirException(Exception): 55 pass 56 57 58def DirSig(path_iter): 59 u""" 60 Alias for SigTarBlockIter below 61 """ 62 return SigTarBlockIter(path_iter) 63 64 65def DirFull(path_iter): 66 u""" 67 Return a tarblock full backup of items in path_iter 68 69 A full backup is just a diff starting from nothing (it may be less 70 elegant than using a standard tar file, but we can be sure that it 71 will be easy to split up the tar and make the volumes the same 72 sizes). 73 """ 74 return DirDelta(path_iter, io.StringIO(u"")) 75 76 77def DirFull_WriteSig(path_iter, sig_outfp): 78 u""" 79 Return full backup like above, but also write signature to sig_outfp 80 """ 81 return DirDelta_WriteSig(path_iter, io.StringIO(u""), sig_outfp) 82 83 84def DirDelta(path_iter, dirsig_fileobj_list): 85 u""" 86 Produce tarblock diff given dirsig_fileobj_list and pathiter 87 88 dirsig_fileobj_list should either be a tar fileobj or a list of 89 those, sorted so the most recent is last. 90 """ 91 global stats 92 stats = statistics.StatsDeltaProcess() 93 if isinstance(dirsig_fileobj_list, list): 94 sig_iter = combine_path_iters([sigtar2path_iter(x) for x 95 in dirsig_fileobj_list]) 96 else: 97 sig_iter = sigtar2path_iter(dirsig_fileobj_list) 98 delta_iter = get_delta_iter(path_iter, sig_iter) 99 if config.dry_run or (config.progress and not progress.tracker.has_collected_evidence()): 100 return DummyBlockIter(delta_iter) 101 else: 102 return DeltaTarBlockIter(delta_iter) 103 104 105def delta_iter_error_handler(exc, new_path, sig_path, sig_tar=None): # pylint: disable=unused-argument 106 u""" 107 Called by get_delta_iter, report error in getting delta 108 """ 109 if new_path: 110 index_string = new_path.get_relative_path() 111 elif sig_path: 112 index_string = sig_path.get_relative_path() 113 else: 114 assert 0, u"Both new and sig are None for some reason" 115 log.Warn(_(u"Error %s getting delta for %s") 116 % (util.uexc(exc), util.fsdecode(index_string))) 117 return None 118 119 120def get_delta_path(new_path, sig_path, sigTarFile=None): 121 u""" 122 Return new delta_path which, when read, writes sig to sig_fileobj, 123 if sigTarFile is not None 124 """ 125 assert new_path 126 if sigTarFile: 127 ti = new_path.get_tarinfo() 128 index = new_path.index 129 delta_path = new_path.get_ropath() 130 log.Debug(_(u"Getting delta of %s and %s") % (new_path, sig_path)) 131 132 def callback(sig_string): 133 u""" 134 Callback activated when FileWithSignature read to end 135 """ 136 ti.size = len(sig_string) 137 if sys.version_info.major >= 3: 138 ti.name = u"signature/" + util.fsdecode(b"/".join(index)) 139 else: 140 ti.name = b"signature/" + b"/".join(index) 141 sigTarFile.addfile(ti, io.BytesIO(sig_string)) 142 143 if new_path.isreg() and sig_path and sig_path.isreg() and sig_path.difftype == u"signature": 144 delta_path.difftype = u"diff" 145 old_sigfp = sig_path.open(u"rb") 146 newfp = FileWithReadCounter(new_path.open(u"rb")) 147 if sigTarFile: 148 newfp = FileWithSignature(newfp, callback, 149 new_path.getsize()) 150 delta_path.setfileobj(librsync.DeltaFile(old_sigfp, newfp)) 151 else: 152 delta_path.difftype = u"snapshot" 153 if sigTarFile: 154 if sys.version_info.major >= 3: 155 ti.name = u"snapshot/" + util.fsdecode(b"/".join(index)) 156 else: 157 ti.name = b"snapshot/" + b"/".join(index) 158 if not new_path.isreg(): 159 if sigTarFile: 160 sigTarFile.addfile(ti) 161 if stats: 162 stats.SourceFileSize += delta_path.getsize() 163 else: 164 newfp = FileWithReadCounter(new_path.open(u"rb")) 165 if sigTarFile: 166 newfp = FileWithSignature(newfp, callback, 167 new_path.getsize()) 168 delta_path.setfileobj(newfp) 169 new_path.copy_attribs(delta_path) 170 delta_path.stat.st_size = new_path.stat.st_size 171 return delta_path 172 173 174def log_delta_path(delta_path, new_path=None, stats=None): 175 u""" 176 Look at delta path and log delta. Add stats if new_path is set 177 """ 178 if delta_path.difftype == u"snapshot": 179 if new_path and stats: 180 stats.add_new_file(new_path) 181 log.Info(_(u"A %s") % 182 (util.fsdecode(delta_path.get_relative_path())), 183 log.InfoCode.diff_file_new, 184 util.escape(delta_path.get_relative_path())) 185 else: 186 if new_path and stats: 187 stats.add_changed_file(new_path) 188 log.Info(_(u"M %s") % 189 (util.fsdecode(delta_path.get_relative_path())), 190 log.InfoCode.diff_file_changed, 191 util.escape(delta_path.get_relative_path())) 192 193 194def get_delta_iter(new_iter, sig_iter, sig_fileobj=None): 195 u""" 196 Generate delta iter from new Path iter and sig Path iter. 197 198 For each delta path of regular file type, path.difftype with be 199 set to "snapshot", "diff". sig_iter will probably iterate ROPaths 200 instead of Paths. 201 202 If sig_fileobj is not None, will also write signatures to sig_fileobj. 203 """ 204 collated = collate2iters(new_iter, sig_iter) 205 if sig_fileobj: 206 sigTarFile = util.make_tarfile(u"w", sig_fileobj) 207 else: 208 sigTarFile = None 209 for new_path, sig_path in collated: 210 log.Debug(_(u"Comparing %s and %s") % (new_path and util.uindex(new_path.index), 211 sig_path and util.uindex(sig_path.index))) 212 if not new_path or not new_path.type: 213 # File doesn't exist (but ignore attempts to delete base dir; 214 # old versions of duplicity could have written out the sigtar in 215 # such a way as to fool us; LP: #929067) 216 if sig_path and sig_path.exists() and sig_path.index != (): 217 # but signature says it did 218 log.Info(_(u"D %s") % 219 (util.fsdecode(sig_path.get_relative_path())), 220 log.InfoCode.diff_file_deleted, 221 util.escape(sig_path.get_relative_path())) 222 if sigTarFile: 223 ti = ROPath(sig_path.index).get_tarinfo() 224 if sys.version_info.major >= 3: 225 ti.name = u"deleted/" + util.uindex(sig_path.index) 226 else: 227 ti.name = b"deleted/" + b"/".join(sig_path.index) 228 sigTarFile.addfile(ti) 229 stats.add_deleted_file(sig_path) 230 yield ROPath(sig_path.index) 231 elif not sig_path or new_path != sig_path: 232 # Must calculate new signature and create delta 233 delta_path = robust.check_common_error(delta_iter_error_handler, 234 get_delta_path, 235 (new_path, sig_path, sigTarFile)) 236 if delta_path: 237 # log and collect stats 238 log_delta_path(delta_path, new_path, stats) 239 yield delta_path 240 else: 241 # if not, an error must have occurred 242 stats.Errors += 1 243 else: 244 stats.add_unchanged_file(new_path) 245 stats.close() 246 if sigTarFile: 247 sigTarFile.close() 248 249 250def sigtar2path_iter(sigtarobj): 251 u""" 252 Convert signature tar file object open for reading into path iter 253 """ 254 tf = util.make_tarfile(u"r", sigtarobj) 255 tf.debug = 1 256 for tarinfo in tf: 257 tiname = util.get_tarinfo_name(tarinfo) 258 for prefix in [r"signature/", r"snapshot/", r"deleted/"]: 259 if tiname.startswith(prefix): 260 # strip prefix and '/' from name and set it to difftype 261 name, difftype = tiname[len(prefix):], prefix[:-1] 262 break 263 else: 264 raise DiffDirException(u"Bad tarinfo name %s" % (tiname,)) 265 266 if sys.version_info.major >= 3: 267 index = tuple(util.fsencode(name).split(b"/")) 268 else: 269 index = tuple(name.split(b"/")) 270 if not index[-1]: 271 index = index[:-1] # deal with trailing /, "" 272 273 ropath = ROPath(index) 274 ropath.difftype = difftype 275 if difftype == u"signature" or difftype == u"snapshot": 276 ropath.init_from_tarinfo(tarinfo) 277 if ropath.isreg(): 278 ropath.setfileobj(tf.extractfile(tarinfo)) 279 yield ropath 280 sigtarobj.close() 281 282 283def collate2iters(riter1, riter2): 284 u""" 285 Collate two iterators. 286 287 The elements yielded by each iterator must be have an index 288 variable, and this function returns pairs (elem1, elem2), (elem1, 289 None), or (None, elem2) two elements in a pair will have the same 290 index, and earlier indicies are yielded later than later indicies. 291 """ 292 relem1, relem2 = None, None 293 while 1: 294 if not relem1: 295 try: 296 relem1 = next(riter1) 297 except StopIteration: 298 if relem2: 299 yield (None, relem2) 300 for relem2 in riter2: 301 yield (None, relem2) 302 break 303 index1 = relem1.index 304 if not relem2: 305 try: 306 relem2 = next(riter2) 307 except StopIteration: 308 if relem1: 309 yield (relem1, None) 310 for relem1 in riter1: 311 yield (relem1, None) 312 break 313 index2 = relem2.index 314 315 if index1 < index2: 316 yield (relem1, None) 317 relem1 = None 318 elif index1 == index2: 319 yield (relem1, relem2) 320 relem1, relem2 = None, None 321 else: 322 # index2 is less 323 yield (None, relem2) 324 relem2 = None 325 326 327def combine_path_iters(path_iter_list): 328 u""" 329 Produce new iterator by combining the iterators in path_iter_list 330 331 This new iter will iterate every path that is in path_iter_list in 332 order of increasing index. If multiple iterators in 333 path_iter_list yield paths with the same index, combine_path_iters 334 will discard all paths but the one yielded by the last path_iter. 335 336 This is used to combine signature iters, as the output will be a 337 full up-to-date signature iter. 338 """ 339 path_iter_list = path_iter_list[:] # copy before destructive reverse 340 path_iter_list.reverse() 341 342 def get_triple(iter_index): 343 u""" 344 Represent the next element as a triple, to help sorting 345 """ 346 try: 347 path = next(path_iter_list[iter_index]) 348 except StopIteration: 349 return None 350 return (path.index, iter_index, path) 351 352 def refresh_triple_list(triple_list): 353 u""" 354 Update all elements with path_index same as first element 355 """ 356 path_index = triple_list[0][0] 357 iter_index = 0 358 while iter_index < len(triple_list): 359 old_triple = triple_list[iter_index] 360 if old_triple[0] == path_index: 361 new_triple = get_triple(old_triple[1]) 362 if new_triple: 363 triple_list[iter_index] = new_triple 364 iter_index += 1 365 else: 366 del triple_list[iter_index] 367 else: 368 break # assumed triple_list sorted, so can exit now 369 370 triple_list = [x for x in map(get_triple, list(range(len(path_iter_list)))) if x] 371 while triple_list: 372 triple_list.sort() 373 yield triple_list[0][2] 374 refresh_triple_list(triple_list) 375 376 377def DirDelta_WriteSig(path_iter, sig_infp_list, newsig_outfp): 378 u""" 379 Like DirDelta but also write signature into sig_fileobj 380 381 Like DirDelta, sig_infp_list can be a tar fileobj or a sorted list 382 of those. A signature will only be written to newsig_outfp if it 383 is different from (the combined) sig_infp_list. 384 """ 385 global stats 386 stats = statistics.StatsDeltaProcess() 387 if isinstance(sig_infp_list, list): 388 sig_path_iter = get_combined_path_iter(sig_infp_list) 389 else: 390 sig_path_iter = sigtar2path_iter(sig_infp_list) 391 delta_iter = get_delta_iter(path_iter, sig_path_iter, newsig_outfp) 392 if config.dry_run or (config.progress and not progress.tracker.has_collected_evidence()): 393 return DummyBlockIter(delta_iter) 394 else: 395 return DeltaTarBlockIter(delta_iter) 396 397 398def get_combined_path_iter(sig_infp_list): 399 u""" 400 Return path iter combining signatures in list of open sig files 401 """ 402 return combine_path_iters([sigtar2path_iter(x) for x in sig_infp_list]) 403 404 405class FileWithReadCounter(object): 406 u""" 407 File-like object which also computes amount read as it is read 408 """ 409 def __init__(self, infile): 410 u"""FileWithReadCounter initializer""" 411 self.infile = infile 412 413 def read(self, length=-1): 414 try: 415 buf = self.infile.read(length) 416 except IOError as ex: 417 buf = b"" 418 log.Warn(_(u"Error %s getting delta for %s") 419 % (util.uexc(ex), util.fsdecode(self.infile.name))) 420 if stats: 421 stats.SourceFileSize += len(buf) 422 return buf 423 424 def close(self): 425 return self.infile.close() 426 427 428class FileWithSignature(object): 429 u""" 430 File-like object which also computes signature as it is read 431 """ 432 blocksize = 32 * 1024 433 434 def __init__(self, infile, callback, filelen, *extra_args): 435 u""" 436 FileTee initializer 437 438 The object will act like infile, but whenever it is read it 439 add infile's data to a SigGenerator object. When the file has 440 been read to the end the callback will be called with the 441 calculated signature, and any extra_args if given. 442 443 filelen is used to calculate the block size of the signature. 444 """ 445 self.infile, self.callback = infile, callback 446 self.sig_gen = librsync.SigGenerator(get_block_size(filelen)) 447 self.activated_callback = None 448 self.extra_args = extra_args 449 450 def read(self, length=-1): 451 buf = self.infile.read(length) 452 self.sig_gen.update(buf) 453 return buf 454 455 def close(self): 456 # Make sure all of infile read 457 if not self.activated_callback: 458 while self.read(self.blocksize): 459 pass 460 self.activated_callback = 1 461 self.callback(self.sig_gen.getsig(), *self.extra_args) 462 return self.infile.close() 463 464 465class TarBlock(object): 466 u""" 467 Contain information to add next file to tar 468 """ 469 def __init__(self, index, data): 470 u""" 471 TarBlock initializer - just store data 472 """ 473 self.index = index 474 self.data = data 475 476 477class TarBlockIter(object): 478 u""" 479 A bit like an iterator, yield tar blocks given input iterator 480 481 Unlike an iterator, however, control over the maximum size of a 482 tarblock is available by passing an argument to next(). Also the 483 get_footer() is available. 484 """ 485 def __init__(self, input_iter): 486 u""" 487 TarBlockIter initializer 488 """ 489 self.input_iter = input_iter 490 self.offset = 0 # total length of data read 491 self.process_waiting = False # process_continued has more blocks 492 self.process_next_vol_number = None # next volume number to write in multivol 493 self.previous_index = None # holds index of last block returned 494 self.previous_block = None # holds block of last block returned 495 self.remember_next = False # see remember_next_index() 496 self.remember_value = None # holds index of next block 497 self.remember_block = None # holds block of next block 498 self.queued_data = None # data to return in next next() call 499 500 def tarinfo2tarblock(self, index, tarinfo, file_data=b""): 501 u""" 502 Make tarblock out of tarinfo and file data 503 """ 504 tarinfo.size = len(file_data) 505 headers = tarinfo.tobuf(errors=u'replace', encoding=config.fsencoding) 506 blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) 507 if remainder > 0: 508 filler_data = b"\0" * (tarfile.BLOCKSIZE - remainder) 509 else: 510 filler_data = b"" 511 return TarBlock(index, b"%s%s%s" % (headers, file_data, filler_data)) 512 513 def process(self, val): # pylint: disable=unused-argument 514 u""" 515 Turn next value of input_iter into a TarBlock 516 """ 517 assert not self.process_waiting 518 XXX # Override in subclass @UndefinedVariable 519 520 def process_continued(self): 521 u""" 522 Get more tarblocks 523 524 If processing val above would produce more than one TarBlock, 525 get the rest of them by calling process_continue. 526 """ 527 assert self.process_waiting 528 XXX # Override in subclass @UndefinedVariable 529 530 def __next__(self): 531 u""" 532 Return next block and update offset 533 """ 534 if self.queued_data is not None: 535 result = self.queued_data 536 self.queued_data = None 537 # Keep rest of metadata as is (like previous_index) 538 return result 539 540 if self.process_waiting: 541 result = self.process_continued() # pylint: disable=assignment-from-no-return 542 else: 543 # Below a StopIteration exception will just be passed upwards 544 result = self.process(next(self.input_iter)) # pylint: disable=assignment-from-no-return 545 block_number = self.process_next_vol_number 546 self.offset += len(result.data) 547 self.previous_index = result.index 548 self.previous_block = block_number 549 if self.remember_next: 550 self.remember_value = result.index 551 self.remember_block = block_number 552 self.remember_next = False 553 return result 554 555 def get_read_size(self): 556 # read size must always be the same, because if we are restarting a 557 # backup volume where the previous volume ended in a data block, we 558 # have to be able to assume it's length in order to continue reading 559 # the file from the right place. 560 return 64 * 1024 561 562 def get_previous_index(self): 563 u""" 564 Return index of last tarblock, or None if no previous index 565 """ 566 return self.previous_index, self.previous_block 567 568 def queue_index_data(self, data): 569 u""" 570 Next time next() is called, we will return data instead of processing 571 """ 572 self.queued_data = data 573 574 def remember_next_index(self): 575 u""" 576 When called, remember the index of the next block iterated 577 """ 578 self.remember_next = True 579 self.remember_value = None 580 self.remember_block = None 581 582 def recall_index(self): 583 u""" 584 Retrieve index remembered with remember_next_index 585 """ 586 return self.remember_value, self.remember_block 587 588 def get_footer(self): 589 u""" 590 Return closing string for tarfile, reset offset 591 """ 592 blocks, remainder = divmod(self.offset, tarfile.RECORDSIZE) 593 self.offset = 0 594 return b'\0' * (tarfile.RECORDSIZE - remainder) # remainder can be 0 595 596 def __iter__(self): # pylint: disable=non-iterator-returned 597 return self 598 599 600class DummyBlockIter(TarBlockIter): 601 u""" 602 TarBlockIter that does no file reading 603 """ 604 def process(self, delta_ropath): 605 u""" 606 Get a fake tarblock from delta_ropath 607 """ 608 ti = delta_ropath.get_tarinfo() 609 index = delta_ropath.index 610 611 # Return blocks of deleted files or fileless snapshots 612 if not delta_ropath.type or not delta_ropath.fileobj: 613 return self.tarinfo2tarblock(index, ti) 614 615 if stats: 616 # Since we don't read the source files, we can't analyze them. 617 # Best we can do is count them raw. 618 stats.SourceFiles += 1 619 stats.SourceFileSize += delta_ropath.getsize() 620 log.Progress(None, stats.SourceFileSize) 621 return self.tarinfo2tarblock(index, ti) 622 623 624class SigTarBlockIter(TarBlockIter): 625 u""" 626 TarBlockIter that yields blocks of a signature tar from path_iter 627 """ 628 def process(self, path): 629 u""" 630 Return associated signature TarBlock from path 631 """ 632 ti = path.get_tarinfo() 633 if path.isreg(): 634 sfp = librsync.SigFile(path.open(u"rb"), 635 get_block_size(path.getsize())) 636 sigbuf = sfp.read() 637 sfp.close() 638 ti.name = b"signature/" + b"/".join(path.index) 639 if sys.version_info.major >= 3: 640 ti.name = util.fsdecode(ti.name) 641 return self.tarinfo2tarblock(path.index, ti, sigbuf) 642 else: 643 ti.name = b"snapshot/" + b"/".join(path.index) 644 if sys.version_info.major >= 3: 645 ti.name = util.fsdecode(ti.name) 646 return self.tarinfo2tarblock(path.index, ti) 647 648 649class DeltaTarBlockIter(TarBlockIter): 650 u""" 651 TarBlockIter that yields parts of a deltatar file 652 653 Unlike SigTarBlockIter, the argument to __init__ is a 654 delta_path_iter, so the delta information has already been 655 calculated. 656 """ 657 def process(self, delta_ropath): 658 u""" 659 Get a tarblock from delta_ropath 660 """ 661 def add_prefix(tarinfo, prefix): 662 u"""Add prefix to the name of a tarinfo file""" 663 if tarinfo.name == r".": 664 tarinfo.name = prefix + r"/" 665 else: 666 tarinfo.name = r"%s/%s" % (prefix, tarinfo.name) 667 668 ti = delta_ropath.get_tarinfo() 669 index = delta_ropath.index 670 671 # Return blocks of deleted files or fileless snapshots 672 if not delta_ropath.type or not delta_ropath.fileobj: 673 if not delta_ropath.type: 674 add_prefix(ti, r"deleted") 675 else: 676 assert delta_ropath.difftype == u"snapshot" 677 add_prefix(ti, r"snapshot") 678 return self.tarinfo2tarblock(index, ti) 679 680 # Now handle single volume block case 681 fp = delta_ropath.open(u"rb") 682 data, last_block = self.get_data_block(fp) 683 if stats: 684 stats.RawDeltaSize += len(data) 685 if last_block: 686 if delta_ropath.difftype == u"snapshot": 687 add_prefix(ti, r"snapshot") 688 elif delta_ropath.difftype == u"diff": 689 add_prefix(ti, r"diff") 690 else: 691 assert 0, u"Unknown difftype" 692 return self.tarinfo2tarblock(index, ti, data) 693 694 # Finally, do multivol snapshot or diff case 695 full_name = r"multivol_%s/%s" % (delta_ropath.difftype, ti.name) 696 ti.name = full_name + r"/1" 697 self.process_prefix = full_name 698 self.process_fp = fp 699 self.process_ropath = delta_ropath 700 self.process_waiting = 1 701 self.process_next_vol_number = 2 702 return self.tarinfo2tarblock(index, ti, data) 703 704 def get_data_block(self, fp): 705 u""" 706 Return pair (next data block, boolean last data block) 707 """ 708 read_size = self.get_read_size() 709 buf = fp.read(read_size) 710 if len(buf) < read_size: 711 if fp.close(): 712 raise DiffDirException(u"Error closing file") 713 return (buf, True) 714 else: 715 return (buf, False) 716 717 def process_continued(self): 718 u""" 719 Return next volume in multivol diff or snapshot 720 """ 721 assert self.process_waiting 722 ropath = self.process_ropath 723 ti, index = ropath.get_tarinfo(), ropath.index 724 ti.name = u"%s/%d" % (self.process_prefix, self.process_next_vol_number) 725 data, last_block = self.get_data_block(self.process_fp) 726 if stats: 727 stats.RawDeltaSize += len(data) 728 if last_block: 729 self.process_prefix = None 730 self.process_fp = None 731 self.process_ropath = None 732 self.process_waiting = None 733 self.process_next_vol_number = None 734 else: 735 self.process_next_vol_number += 1 736 return self.tarinfo2tarblock(index, ti, data) 737 738 739def write_block_iter(block_iter, out_obj): 740 u""" 741 Write block_iter to filename, path, or file object 742 """ 743 if isinstance(out_obj, Path): 744 fp = open(out_obj.name, u"wb") 745 elif isinstance(out_obj, (str, u"".__class__)): 746 fp = open(out_obj, u"wb") 747 else: 748 fp = out_obj 749 for block in block_iter: 750 fp.write(block.data) 751 fp.write(block_iter.get_footer()) 752 assert not fp.close() 753 if isinstance(out_obj, Path): 754 out_obj.setdata() 755 756 757def get_block_size(file_len): 758 u""" 759 Return a reasonable block size to use on files of length file_len 760 761 If the block size is too big, deltas will be bigger than is 762 necessary. If the block size is too small, making deltas and 763 patching can take a really long time. 764 """ 765 if file_len < 1024000: 766 return 512 # set minimum of 512 bytes 767 else: 768 # Split file into about 2000 pieces, rounding to 512 769 file_blocksize = int((file_len / (2000 * 512))) * 512 770 return min(file_blocksize, config.max_blocksize) 771