1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# Copyright 2002 Ben Escoto <ben@emerose.org> 4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 5# 6# This file is part of duplicity. 7# 8# Duplicity is free software; you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by the 10# Free Software Foundation; either version 2 of the License, or (at your 11# option) any later version. 12# 13# Duplicity is distributed in the hope that it will be useful, but 14# WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with duplicity; if not, write to the Free Software Foundation, 20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 22u"""Create and edit manifest for session contents""" 23 24from builtins import map 25from builtins import range 26from builtins import object 27 28import re 29import sys 30 31from duplicity import config 32from duplicity import log 33from duplicity import config 34from duplicity import util 35 36 37class ManifestError(Exception): 38 u""" 39 Exception raised when problem with manifest 40 """ 41 pass 42 43 44class Manifest(object): 45 u""" 46 List of volumes and information about each one 47 """ 48 def __init__(self, fh=None): 49 u""" 50 Create blank Manifest 51 52 @param fh: fileobj for manifest 53 @type fh: DupPath 54 55 @rtype: Manifest 56 @return: manifest 57 """ 58 self.hostname = None 59 self.local_dirname = None 60 self.volume_info_dict = {} # dictionary vol numbers -> vol infos 61 self.fh = fh 62 self.files_changed = [] 63 64 def set_dirinfo(self): 65 u""" 66 Set information about directory from config, 67 and write to manifest file. 68 69 @rtype: Manifest 70 @return: manifest 71 """ 72 self.hostname = config.hostname 73 self.local_dirname = config.local_path.name 74 if self.fh: 75 if self.hostname: 76 self.fh.write(b"Hostname %s\n" % self.hostname.encode()) 77 if self.local_dirname: 78 self.fh.write(b"Localdir %s\n" % Quote(self.local_dirname)) 79 return self 80 81 def check_dirinfo(self): 82 u""" 83 Return None if dirinfo is the same, otherwise error message 84 85 Does not raise an error message if hostname or local_dirname 86 are not available. 87 88 @rtype: string 89 @return: None or error message 90 """ 91 if config.allow_source_mismatch: 92 return 93 94 # Check both hostname and fqdn (we used to write the fqdn into the 95 # manifest, so we want to keep comparing against that) 96 if (self.hostname and 97 self.hostname != config.hostname and 98 self.hostname != config.fqdn): 99 errmsg = _(u"Fatal Error: Backup source host has changed.\n" 100 u"Current hostname: %s\n" 101 u"Previous hostname: %s") % (config.hostname, self.hostname) 102 code = log.ErrorCode.hostname_mismatch 103 code_extra = u"%s %s" % (util.escape(config.hostname), util.escape(self.hostname)) 104 105 elif (self.local_dirname and self.local_dirname != config.local_path.name): 106 errmsg = _(u"Fatal Error: Backup source directory has changed.\n" 107 u"Current directory: %s\n" 108 u"Previous directory: %s") % (config.local_path.name, self.local_dirname) 109 code = log.ErrorCode.source_dir_mismatch 110 code_extra = u"%s %s" % (util.escape(config.local_path.name), 111 util.escape(self.local_dirname)) 112 else: 113 return 114 115 log.FatalError(errmsg + u"\n\n" + 116 _(u"Aborting because you may have accidentally tried to " 117 u"backup two different data sets to the same remote " 118 u"location, or using the same archive directory. If " 119 u"this is not a mistake, use the " 120 u"--allow-source-mismatch switch to avoid seeing this " 121 u"message"), code, code_extra) 122 123 def set_files_changed_info(self, files_changed): 124 if files_changed: 125 self.files_changed = files_changed 126 127 if self.fh: 128 self.fh.write(b"Filelist %d\n" % len(self.files_changed)) 129 for fileinfo in self.files_changed: 130 self.fh.write(b" %-7s %s\n" % (fileinfo[1], Quote(fileinfo[0]))) 131 132 def add_volume_info(self, vi): 133 u""" 134 Add volume info vi to manifest and write to manifest 135 136 @param vi: volume info to add 137 @type vi: VolumeInfo 138 139 @return: void 140 """ 141 vol_num = vi.volume_number 142 self.volume_info_dict[vol_num] = vi 143 if self.fh: 144 self.fh.write(vi.to_string() + b"\n") 145 146 def del_volume_info(self, vol_num): 147 u""" 148 Remove volume vol_num from the manifest 149 150 @param vol_num: volume number to delete 151 @type vi: int 152 153 @return: void 154 """ 155 try: 156 del self.volume_info_dict[vol_num] 157 except Exception: 158 raise ManifestError(u"Volume %d not present in manifest" % (vol_num,)) 159 160 def to_string(self): 161 u""" 162 Return string version of self (just concatenate vi strings) 163 164 @rtype: string 165 @return: self in string form 166 """ 167 result = b"" 168 if self.hostname: 169 result += b"Hostname %s\n" % self.hostname.encode() 170 if self.local_dirname: 171 result += b"Localdir %s\n" % Quote(self.local_dirname) 172 173 result += b"Filelist %d\n" % len(self.files_changed) 174 for fileinfo in self.files_changed: 175 result += b" %-7s %s\n" % (fileinfo[1], Quote(fileinfo[0])) 176 177 vol_num_list = list(self.volume_info_dict.keys()) 178 vol_num_list.sort() 179 180 def vol_num_to_string(vol_num): 181 return self.volume_info_dict[vol_num].to_string() 182 result = b"%s%s\n" % (result, 183 b"\n".join(map(vol_num_to_string, vol_num_list))) 184 return result 185 186 __str__ = to_string 187 188 def from_string(self, s): 189 u""" 190 Initialize self from string s, return self 191 """ 192 193 def get_field(fieldname): 194 u""" 195 Return the value of a field by parsing s, or None if no field 196 """ 197 if not isinstance(fieldname, bytes): 198 fieldname = fieldname.encode() 199 m = re.search(b"(^|\\n)%s\\s(.*?)\n" % fieldname, s, re.I) 200 if not m: 201 return None 202 else: 203 return Unquote(m.group(2)) 204 self.hostname = get_field(u"hostname") 205 if self.hostname is not None: 206 self.hostname = self.hostname.decode() 207 self.local_dirname = get_field(u"localdir") 208 209 highest_vol = 0 210 latest_vol = 0 211 vi_regexp = re.compile(b"(?:^|\\n)(volume\\s.*(?:\\n.*)*?)(?=\\nvolume\\s|$)", re.I) 212 vi_iterator = vi_regexp.finditer(s) 213 for match in vi_iterator: 214 vi = VolumeInfo().from_string(match.group(1)) 215 self.add_volume_info(vi) 216 latest_vol = vi.volume_number 217 highest_vol = max(highest_vol, latest_vol) 218 log.Debug(_(u"Found manifest volume %s") % latest_vol) 219 # If we restarted after losing some remote volumes, the highest volume 220 # seen may be higher than the last volume recorded. That is, the 221 # manifest could contain "vol1, vol2, vol3, vol2." If so, we don't 222 # want to keep vol3's info. 223 for i in range(latest_vol + 1, highest_vol + 1): 224 self.del_volume_info(i) 225 log.Info(_(u"Found %s volumes in manifest") % latest_vol) 226 227 # Get file changed list - not needed if --file-changed not present 228 filecount = 0 229 if config.file_changed is not None: 230 filelist_regexp = re.compile(b"(^|\\n)filelist\\s([0-9]+)\\n(.*?)(\\nvolume\\s|$)", re.I | re.S) 231 match = filelist_regexp.search(s) 232 if match: 233 filecount = int(match.group(2)) 234 if filecount > 0: 235 def parse_fileinfo(line): 236 fileinfo = line.strip().split() 237 return (fileinfo[0], b''.join(fileinfo[1:])) 238 239 self.files_changed = list(map(parse_fileinfo, match.group(3).split(b'\n'))) 240 241 if filecount != len(self.files_changed): 242 log.Error(_(u"Manifest file '%s' is corrupt: File count says %d, File list contains %d" % 243 (self.fh.base if self.fh else u"", filecount, len(self.files_changed)))) 244 self.corrupt_filelist = True 245 246 return self 247 248 def get_files_changed(self): 249 return self.files_changed 250 251 def __eq__(self, other): 252 u""" 253 Two manifests are equal if they contain the same volume infos 254 """ 255 vi_list1 = list(self.volume_info_dict.keys()) 256 vi_list1.sort() 257 vi_list2 = list(other.volume_info_dict.keys()) 258 vi_list2.sort() 259 260 if vi_list1 != vi_list2: 261 log.Notice(_(u"Manifests not equal because different volume numbers")) 262 return False 263 264 for i in range(len(vi_list1)): 265 if not vi_list1[i] == vi_list2[i]: 266 log.Notice(_(u"Manifests not equal because volume lists differ")) 267 return False 268 269 if (self.hostname != other.hostname or 270 self.local_dirname != other.local_dirname): 271 log.Notice(_(u"Manifests not equal because hosts or directories differ")) 272 return False 273 274 return True 275 276 def __ne__(self, other): 277 u""" 278 Defines !=. Not doing this always leads to annoying bugs... 279 """ 280 return not self.__eq__(other) 281 282 def write_to_path(self, path): 283 u""" 284 Write string version of manifest to given path 285 """ 286 assert not path.exists() 287 fout = path.open(u"wb") 288 fout.write(self.to_string()) 289 assert not fout.close() 290 path.setdata() 291 292 def get_containing_volumes(self, index_prefix): 293 u""" 294 Return list of volume numbers that may contain index_prefix 295 """ 296 if len(index_prefix) == 1 and isinstance(index_prefix[0], u"".__class__): 297 index_prefix = (index_prefix[0].encode(),) 298 return [vol_num for vol_num in list(self.volume_info_dict.keys()) if 299 self.volume_info_dict[vol_num].contains(index_prefix)] 300 301 302class VolumeInfoError(Exception): 303 u""" 304 Raised when there is a problem initializing a VolumeInfo from string 305 """ 306 pass 307 308 309class VolumeInfo(object): 310 u""" 311 Information about a single volume 312 """ 313 def __init__(self): 314 u"""VolumeInfo initializer""" 315 self.volume_number = None 316 self.start_index = None 317 self.start_block = None 318 self.end_index = None 319 self.end_block = None 320 self.hashes = {} 321 322 def set_info(self, vol_number, 323 start_index, start_block, 324 end_index, end_block): 325 u""" 326 Set essential VolumeInfo information, return self 327 328 Call with starting and ending paths stored in the volume. If 329 a multivol diff gets split between volumes, count it as being 330 part of both volumes. 331 """ 332 self.volume_number = vol_number 333 self.start_index = start_index 334 self.start_block = start_block 335 self.end_index = end_index 336 self.end_block = end_block 337 338 return self 339 340 def set_hash(self, hash_name, data): 341 u""" 342 Set the value of hash hash_name (e.g. "MD5") to data 343 """ 344 if isinstance(hash_name, bytes): 345 hash_name = hash_name.decode() 346 if isinstance(data, bytes): 347 data = data.decode() 348 self.hashes[hash_name] = data 349 350 def get_best_hash(self): 351 u""" 352 Return pair (hash_type, hash_data) 353 354 SHA1 is the best hash, and MD5 is the second best hash. None 355 is returned if no hash is available. 356 """ 357 if not self.hashes: 358 return None 359 try: 360 return (u"SHA1", self.hashes[u'SHA1']) 361 except KeyError: 362 pass 363 try: 364 return (u"MD5", self.hashes[u'MD5']) 365 except KeyError: 366 pass 367 return list(self.hashes.items())[0] 368 369 def to_string(self): 370 u""" 371 Return nicely formatted string reporting all information 372 """ 373 def index_to_string(index): 374 u"""Return printable version of index without any whitespace""" 375 if index: 376 s = b"/".join(index) 377 return Quote(s) 378 else: 379 return b"." 380 381 def bfmt(x): 382 if x is None: 383 return b" " 384 return str(x).encode() 385 386 slist = [b"Volume %d:" % self.volume_number] 387 whitespace = b" " 388 slist.append(b"%sStartingPath %s %s" % 389 (whitespace, index_to_string(self.start_index), bfmt(self.start_block))) 390 slist.append(b"%sEndingPath %s %s" % 391 (whitespace, index_to_string(self.end_index), bfmt(self.end_block))) 392 for key in self.hashes: 393 slist.append(b"%sHash %s %s" % 394 (whitespace, key.encode(), self.hashes[key].encode())) 395 return b"\n".join(slist) 396 397 __str__ = to_string 398 399 def from_string(self, s): 400 u""" 401 Initialize self from string s as created by to_string 402 """ 403 def string_to_index(s): 404 u""" 405 Return tuple index from string 406 """ 407 s = Unquote(s) 408 if s == b".": 409 return () 410 return tuple(s.split(b"/")) 411 412 linelist = s.strip().split(b"\n") 413 414 # Set volume number 415 m = re.search(b"^Volume ([0-9]+):", linelist[0], re.I) 416 if not m: 417 raise VolumeInfoError(u"Bad first line '%s'" % (linelist[0],)) 418 self.volume_number = int(m.group(1)) 419 420 # Set other fields 421 for line in linelist[1:]: 422 if not line: 423 continue 424 line_split = line.strip().split() 425 field_name = line_split[0].lower() 426 other_fields = line_split[1:] 427 if field_name == b"Volume": 428 log.Warn(_(u"Warning, found extra Volume identifier")) 429 break 430 elif field_name == b"startingpath": 431 self.start_index = string_to_index(other_fields[0]) 432 if len(other_fields) > 1: 433 self.start_block = int(other_fields[1]) 434 else: 435 self.start_block = None 436 elif field_name == b"endingpath": 437 self.end_index = string_to_index(other_fields[0]) 438 if len(other_fields) > 1: 439 self.end_block = int(other_fields[1]) 440 else: 441 self.end_block = None 442 elif field_name == b"hash": 443 self.set_hash(other_fields[0], other_fields[1]) 444 445 if self.start_index is None or self.end_index is None: 446 raise VolumeInfoError(u"Start or end index not set") 447 return self 448 449 def __eq__(self, other): 450 u""" 451 Used in test suite 452 """ 453 if not isinstance(other, VolumeInfo): 454 log.Notice(_(u"Other is not VolumeInfo")) 455 return None 456 if self.volume_number != other.volume_number: 457 log.Notice(_(u"Volume numbers don't match")) 458 return None 459 if self.start_index != other.start_index: 460 log.Notice(_(u"start_indicies don't match")) 461 return None 462 if self.end_index != other.end_index: 463 log.Notice(_(u"end_index don't match")) 464 return None 465 hash_list1 = list(self.hashes.items()) 466 hash_list1.sort() 467 hash_list2 = list(other.hashes.items()) 468 hash_list2.sort() 469 if hash_list1 != hash_list2: 470 log.Notice(_(u"Hashes don't match")) 471 return None 472 return 1 473 474 def __ne__(self, other): 475 u""" 476 Defines != 477 """ 478 return not self.__eq__(other) 479 480 def contains(self, index_prefix, recursive=1): 481 u""" 482 Return true if volume might contain index 483 484 If recursive is true, then return true if any index starting 485 with index_prefix could be contained. Otherwise, just check 486 if index_prefix itself is between starting and ending 487 indicies. 488 """ 489 if recursive: 490 return (self.start_index[:len(index_prefix)] <= 491 index_prefix <= self.end_index) 492 else: 493 return self.start_index <= index_prefix <= self.end_index 494 495 496nonnormal_char_re = re.compile(b"(\\s|[\\\\\"'])") 497 498 499def Quote(s): 500 u""" 501 Return quoted version of s safe to put in a manifest or volume info 502 """ 503 if not nonnormal_char_re.search(s): 504 return s # no quoting necessary 505 slist = [] 506 for i in range(0, len(s)): 507 char = s[i:i + 1] 508 if nonnormal_char_re.search(char): 509 slist.append(b"\\x%02x" % ord(char)) 510 else: 511 slist.append(char) 512 return b'"%s"' % b"".join(slist) 513 514 515def maybe_chr(ch): 516 if sys.version_info.major >= 3: 517 return chr(ch) 518 else: 519 return ch 520 521 522def Unquote(quoted_string): 523 u""" 524 Return original string from quoted_string produced by above 525 """ 526 if not maybe_chr(quoted_string[0]) == u'"' or maybe_chr(quoted_string[0]) == u"'": 527 return quoted_string 528 assert quoted_string[0] == quoted_string[-1] 529 return_list = [] 530 i = 1 # skip initial char 531 while i < len(quoted_string) - 1: 532 char = quoted_string[i:i + 1] 533 if char == b"\\": 534 # quoted section 535 assert maybe_chr(quoted_string[i + 1]) == u"x" 536 if sys.version_info.major >= 3: 537 return_list.append(int(quoted_string[i + 2:i + 4].decode(), 16).to_bytes(1, byteorder=u'big')) 538 else: 539 return_list.append(chr(int(quoted_string[i + 2:i + 4], 16))) 540 i += 4 541 else: 542 return_list.append(char) 543 i += 1 544 return b"".join(return_list) 545