1# Copyright 2002, 2005 Ben Escoto 2# 3# This file is part of rdiff-backup. 4# 5# rdiff-backup is free software; you can redistribute it and/or modify 6# under the terms of the GNU General Public License as published by the 7# Free Software Foundation; either version 2 of the License, or (at your 8# option) any later version. 9# 10# rdiff-backup is distributed in the hope that it will be useful, but 11# WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with rdiff-backup; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18# 02110-1301, USA 19"""Code for reverting the rdiff-backup directory to prev state 20 21This module is used after an aborted session, and the rdiff-backup 22destination directory may be in-between states. In this situation we 23need to bring back the directory as it was after the last successful 24backup. The basic strategy is to restore all the attributes from the 25metadata file (which we assume is intact) and delete the extra 26increments. For regular files we examine the mirror file and use the 27increment file to get the old data if the mirror file is out of date. 28 29Currently this does not recover hard links. This may make the 30regressed directory take up more disk space, but hard links can still 31be recovered. 32 33""" 34 35import re 36import os 37from . import Globals, restore, log, rorpiter, TempFile, metadata, rpath, C, \ 38 Time, robust, longname 39 40# regress_time should be set to the time we want to regress back to 41# (usually the time of the last successful backup) 42regress_time = None 43 44# This should be set to the latest unsuccessful backup time 45unsuccessful_backup_time = None 46 47 48class RegressException(Exception): 49 """Raised on any exception in regress process""" 50 pass 51 52 53def Regress(mirror_rp): 54 """Bring mirror and inc directory back to regress_to_time 55 56 Also affects the rdiff-backup-data directory, so Globals.rbdir 57 should be set. Regress should only work one step at a time 58 (i.e. don't "regress" through two separate backup sets. This 59 function should be run locally to the rdiff-backup-data directory. 60 61 """ 62 inc_rpath = Globals.rbdir.append_path(b"increments") 63 assert mirror_rp.index == () and inc_rpath.index == () 64 assert mirror_rp.isdir() and inc_rpath.isdir() 65 assert mirror_rp.conn is inc_rpath.conn is Globals.local_connection 66 manager, former_current_mirror_rp = set_regress_time() 67 set_restore_times() 68 regress_rbdir(manager) 69 ITR = rorpiter.IterTreeReducer(RegressITRB, []) 70 for rf in iterate_meta_rfs(mirror_rp, inc_rpath): 71 ITR(rf.index, rf) 72 ITR.Finish() 73 if former_current_mirror_rp: 74 if Globals.do_fsync: 75 C.sync() # Sync first, since we are marking dest dir as good now 76 former_current_mirror_rp.delete() 77 78 79def set_regress_time(): 80 """Set global regress_time to previous successful backup 81 82 If there are two current_mirror increments, then the last one 83 corresponds to a backup session that failed. 84 85 """ 86 global regress_time, unsuccessful_backup_time 87 manager = metadata.SetManager() 88 curmir_incs = manager.sorted_prefix_inclist(b'current_mirror') 89 assert len(curmir_incs) == 2, \ 90 "Found %s current_mirror flags, expected 2" % len(curmir_incs) 91 mirror_rp_to_delete = curmir_incs[0] 92 regress_time = curmir_incs[1].getinctime() 93 unsuccessful_backup_time = mirror_rp_to_delete.getinctime() 94 log.Log("Regressing to %s" % Time.timetopretty(regress_time), 4) 95 return manager, mirror_rp_to_delete 96 97 98def set_restore_times(): 99 """Set _rest_time and _mirror_time in the restore module 100 101 _rest_time (restore time) corresponds to the last successful 102 backup time. _mirror_time is the unsuccessful backup time. 103 104 """ 105 restore.MirrorStruct._mirror_time = unsuccessful_backup_time 106 restore.MirrorStruct._rest_time = regress_time 107 108 109def regress_rbdir(meta_manager): 110 """Delete the increments in the rdiff-backup-data directory 111 112 Returns the former current mirror rp so we can delete it later. 113 All of the other rp's should be deleted before the actual regress, 114 to clear up disk space the rest of the procedure may need. 115 116 Also, in case the previous session failed while diffing the 117 metadata file, either recreate the mirror_metadata snapshot, or 118 delete the extra regress_time diff. 119 120 """ 121 has_meta_diff, has_meta_snap = 0, 0 122 for old_rp in meta_manager.timerpmap[regress_time]: 123 if old_rp.getincbase_bname() == b'mirror_metadata': 124 if old_rp.getinctype() == b'snapshot': 125 has_meta_snap = 1 126 else: 127 assert old_rp.getinctype() == b'diff', old_rp 128 has_meta_diff = 1 129 if has_meta_diff and not has_meta_snap: 130 recreate_meta(meta_manager) 131 132 for new_rp in meta_manager.timerpmap[unsuccessful_backup_time]: 133 if new_rp.getincbase_bname() != b'current_mirror': 134 log.Log("Deleting old diff at %s" % new_rp.get_safepath(), 5) 135 new_rp.delete() 136 for rp in meta_manager.timerpmap[regress_time]: 137 if (rp.getincbase_bname() == b'mirror_metadata' 138 and rp.getinctype() == b'diff'): 139 rp.delete() 140 break 141 142 143def recreate_meta(meta_manager): 144 """Make regress_time mirror_metadata snapshot by patching 145 146 We write to a tempfile first. Otherwise, in case of a crash, it 147 would seem we would have an intact snapshot and partial diff, not 148 the reverse. 149 150 """ 151 temprp = [TempFile.new_in_dir(Globals.rbdir)] 152 153 def callback(rp): 154 temprp[0] = rp 155 156 writer = metadata.MetadataFile( 157 temprp[0], 'wb', check_path=0, callback=callback) 158 for rorp in meta_manager.get_meta_at_time(regress_time, None): 159 writer.write_object(rorp) 160 writer.close() 161 162 finalrp = Globals.rbdir.append( 163 b"mirror_metadata.%b.snapshot.gz" % Time.timetobytes(regress_time)) 164 assert not finalrp.lstat(), finalrp 165 rpath.rename(temprp[0], finalrp) 166 if Globals.fsync_directories: 167 Globals.rbdir.fsync() 168 169 170def iterate_raw_rfs(mirror_rp, inc_rp): 171 """Iterate all RegressFile objects in mirror/inc directory 172 173 Also changes permissions of unreadable files. We don't have to 174 change them back later because regress will do that for us. 175 176 """ 177 root_rf = RegressFile(mirror_rp, inc_rp, restore.get_inclist(inc_rp)) 178 179 def helper(rf): 180 mirror_rp = rf.mirror_rp 181 if Globals.process_uid != 0: 182 if mirror_rp.isreg() and not mirror_rp.readable(): 183 mirror_rp.chmod(0o400 | mirror_rp.getperms()) 184 elif mirror_rp.isdir() and not mirror_rp.hasfullperms(): 185 mirror_rp.chmod(0o700 | mirror_rp.getperms()) 186 yield rf 187 if rf.mirror_rp.isdir() or rf.inc_rp.isdir(): 188 for sub_rf in rf.yield_sub_rfs(): 189 for sub_sub_rf in helper(sub_rf): 190 yield sub_sub_rf 191 192 return helper(root_rf) 193 194 195def yield_metadata(): 196 """Iterate rorps from metadata file, if any are available""" 197 metadata.SetManager() 198 metadata_iter = metadata.ManagerObj.GetAtTime(regress_time) 199 if metadata_iter: 200 return metadata_iter 201 log.Log.FatalError("No metadata for time %s (%s) found,\ncannot regress" % 202 (Time.timetopretty(regress_time), regress_time)) 203 204 205def iterate_meta_rfs(mirror_rp, inc_rp): 206 """Yield RegressFile objects with extra metadata information added 207 208 Each RegressFile will have an extra object variable .metadata_rorp 209 which will contain the metadata attributes of the mirror file at 210 regress_time. 211 212 """ 213 raw_rfs = iterate_raw_rfs(mirror_rp, inc_rp) 214 collated = rorpiter.Collate2Iters(raw_rfs, yield_metadata()) 215 for raw_rf, metadata_rorp in collated: 216 raw_rf = longname.update_regressfile(raw_rf, metadata_rorp, mirror_rp) 217 if not raw_rf: 218 log.Log( 219 "Warning, metadata file has entry for %s,\n" 220 "but there are no associated files." % 221 (metadata_rorp.get_safeindexpath(), ), 2) 222 continue 223 raw_rf.set_metadata_rorp(metadata_rorp) 224 yield raw_rf 225 226 227class RegressFile(restore.RestoreFile): 228 """Like RestoreFile but with metadata 229 230 Hold mirror_rp and related incs, but also put metadata info for 231 the mirror file at regress time in self.metadata_rorp. 232 self.metadata_rorp is not set in this class. 233 234 """ 235 236 def __init__(self, mirror_rp, inc_rp, inc_list): 237 restore.RestoreFile.__init__(self, mirror_rp, inc_rp, inc_list) 238 self.set_regress_inc() 239 240 def set_metadata_rorp(self, metadata_rorp): 241 """Set self.metadata_rorp, creating empty if given None""" 242 if metadata_rorp: 243 self.metadata_rorp = metadata_rorp 244 else: 245 self.metadata_rorp = rpath.RORPath(self.index) 246 247 def isdir(self): 248 """Return true if regress needs before/after processing""" 249 return ((self.metadata_rorp and self.metadata_rorp.isdir()) 250 or (self.mirror_rp and self.mirror_rp.isdir())) 251 252 def set_regress_inc(self): 253 """Set self.regress_inc to increment to be removed (or None)""" 254 newer_incs = self.get_newer_incs() 255 assert len(newer_incs) <= 1, "Too many recent increments" 256 if newer_incs: 257 self.regress_inc = newer_incs[0] # first is mirror_rp 258 else: 259 self.regress_inc = None 260 261 262class RegressITRB(rorpiter.ITRBranch): 263 """Turn back state of dest directory (use with IterTreeReducer) 264 265 The arguments to the ITR will be RegressFiles. There are two main 266 assumptions this procedure makes (besides those mentioned above): 267 268 1. The mirror_rp and the metadata_rorp equal_loose correctly iff 269 they contain the same data. If this is the case, then the inc 270 file is unnecessary and we can delete it. 271 272 2. If the don't match, then applying the inc file will 273 successfully get us back to the previous state. 274 275 Since the metadata file is required, the two above really only 276 matter for regular files. 277 278 """ 279 280 def __init__(self): 281 """Just initialize some variables to None""" 282 self.rf = None # will hold RegressFile applying to a directory 283 284 def can_fast_process(self, index, rf): 285 """True if none of the rps is a directory""" 286 return not rf.mirror_rp.isdir() and not rf.metadata_rorp.isdir() 287 288 def fast_process(self, index, rf): 289 """Process when nothing is a directory""" 290 if not rf.metadata_rorp.equal_loose(rf.mirror_rp): 291 log.Log( 292 "Regressing file %s" % (rf.metadata_rorp.get_safeindexpath()), 293 5) 294 if rf.metadata_rorp.isreg(): 295 self.restore_orig_regfile(rf) 296 else: 297 if rf.mirror_rp.lstat(): 298 rf.mirror_rp.delete() 299 if rf.metadata_rorp.isspecial(): 300 robust.check_common_error(None, rpath.copy_with_attribs, 301 (rf.metadata_rorp, rf.mirror_rp)) 302 else: 303 rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) 304 if rf.regress_inc: 305 log.Log("Deleting increment %s" % rf.regress_inc.get_safepath(), 5) 306 rf.regress_inc.delete() 307 308 def restore_orig_regfile(self, rf): 309 """Restore original regular file 310 311 This is the trickiest case for avoiding information loss, 312 because we don't want to delete the increment before the 313 mirror is fully written. 314 315 """ 316 assert rf.metadata_rorp.isreg() 317 if rf.mirror_rp.isreg(): 318 tf = TempFile.new(rf.mirror_rp) 319 tf.write_from_fileobj(rf.get_restore_fp()) 320 tf.fsync_with_dir() # make sure tf fully written before move 321 rpath.copy_attribs(rf.metadata_rorp, tf) 322 rpath.rename(tf, rf.mirror_rp) # move is atomic 323 else: 324 if rf.mirror_rp.lstat(): 325 rf.mirror_rp.delete() 326 rf.mirror_rp.write_from_fileobj(rf.get_restore_fp()) 327 rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp) 328 if Globals.fsync_directories: 329 rf.mirror_rp.get_parent_rp().fsync( 330 ) # force move before inc delete 331 332 def start_process(self, index, rf): 333 """Start processing directory""" 334 if rf.metadata_rorp.isdir(): 335 # make sure mirror is a readable dir 336 if not rf.mirror_rp.isdir(): 337 if rf.mirror_rp.lstat(): 338 rf.mirror_rp.delete() 339 rf.mirror_rp.mkdir() 340 if not rf.mirror_rp.hasfullperms(): 341 rf.mirror_rp.chmod(0o700) 342 self.rf = rf 343 344 def end_process(self): 345 """Finish processing a directory""" 346 rf = self.rf 347 if rf.metadata_rorp.isdir(): 348 if rf.mirror_rp.isdir(): 349 rf.mirror_rp.setdata() 350 if not rf.metadata_rorp.equal_loose(rf.mirror_rp): 351 log.Log( 352 "Regressing attributes of %s" % 353 rf.mirror_rp.get_safepath(), 5) 354 rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp) 355 else: 356 rf.mirror_rp.delete() 357 log.Log("Regressing file %s" % rf.mirror_rp.get_safepath(), 5) 358 rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) 359 else: # replacing a dir with some other kind of file 360 assert rf.mirror_rp.isdir() 361 log.Log("Replacing directory %s" % rf.mirror_rp.get_safepath(), 5) 362 if rf.metadata_rorp.isreg(): 363 self.restore_orig_regfile(rf) 364 else: 365 rf.mirror_rp.delete() 366 rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp) 367 if rf.regress_inc: 368 log.Log("Deleting increment %s" % rf.regress_inc.get_safepath(), 5) 369 rf.regress_inc.delete() 370 371 372def check_pids(curmir_incs): 373 """Check PIDs in curmir markers to make sure rdiff-backup not running""" 374 pid_re = re.compile(r"^PID\s*([0-9]+)", re.I | re.M) 375 376 def extract_pid(curmir_rp): 377 """Return process ID from a current mirror marker, if any""" 378 match = pid_re.search(curmir_rp.get_string()) 379 if not match: 380 return None 381 else: 382 return int(match.group(1)) 383 384 def pid_running(pid): 385 """True if we know if process with pid is currently running""" 386 try: 387 os.kill(pid, 0) 388 except ProcessLookupError: # errno.ESRCH - pid doesn't exist 389 return 0 390 except OSError: # any other OS error 391 log.Log( 392 "Warning: unable to check if PID %d still running" % (pid, ), 393 2) 394 except AttributeError: 395 assert os.name == 'nt' 396 import win32api 397 import win32con 398 import pywintypes 399 process = None 400 try: 401 process = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, 0, 402 pid) 403 except pywintypes.error as error: 404 if error[0] == 87: 405 return 0 406 else: 407 msg = "Warning: unable to check if PID %d still running" 408 log.Log(msg % pid, 2) 409 if process: 410 win32api.CloseHandle(process) 411 return 1 412 return 0 413 return 1 414 415 for curmir_rp in curmir_incs: 416 assert Globals.local_connection is curmir_rp.conn 417 pid = extract_pid(curmir_rp) 418 if pid is not None and pid_running(pid): 419 log.Log.FatalError( 420 """It appears that a previous rdiff-backup session with process 421id %d is still running. If two different rdiff-backup processes write 422the same repository simultaneously, data corruption will probably 423result. To proceed with regress anyway, rerun rdiff-backup with the 424--force option.""" % (pid, )) 425