1# Copyright 2002, 2005 Ben Escoto
2#
3# This file is part of rdiff-backup.
4#
5# rdiff-backup is free software; you can redistribute it and/or modify
6# under the terms of the GNU General Public License as published by the
7# Free Software Foundation; either version 2 of the License, or (at your
8# option) any later version.
9#
10# rdiff-backup is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with rdiff-backup; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301, USA
19"""Code for reverting the rdiff-backup directory to prev state
20
21This module is used after an aborted session, and the rdiff-backup
22destination directory may be in-between states.  In this situation we
23need to bring back the directory as it was after the last successful
24backup.  The basic strategy is to restore all the attributes from the
25metadata file (which we assume is intact) and delete the extra
26increments.  For regular files we examine the mirror file and use the
27increment file to get the old data if the mirror file is out of date.
28
29Currently this does not recover hard links.  This may make the
30regressed directory take up more disk space, but hard links can still
31be recovered.
32
33"""
34
35import re
36import os
37from . import Globals, restore, log, rorpiter, TempFile, metadata, rpath, C, \
38    Time, robust, longname
39
40# regress_time should be set to the time we want to regress back to
41# (usually the time of the last successful backup)
42regress_time = None
43
44# This should be set to the latest unsuccessful backup time
45unsuccessful_backup_time = None
46
47
48class RegressException(Exception):
49    """Raised on any exception in regress process"""
50    pass
51
52
53def Regress(mirror_rp):
54    """Bring mirror and inc directory back to regress_to_time
55
56    Also affects the rdiff-backup-data directory, so Globals.rbdir
57    should be set.  Regress should only work one step at a time
58    (i.e. don't "regress" through two separate backup sets.  This
59    function should be run locally to the rdiff-backup-data directory.
60
61    """
62    inc_rpath = Globals.rbdir.append_path(b"increments")
63    assert mirror_rp.index == () and inc_rpath.index == ()
64    assert mirror_rp.isdir() and inc_rpath.isdir()
65    assert mirror_rp.conn is inc_rpath.conn is Globals.local_connection
66    manager, former_current_mirror_rp = set_regress_time()
67    set_restore_times()
68    regress_rbdir(manager)
69    ITR = rorpiter.IterTreeReducer(RegressITRB, [])
70    for rf in iterate_meta_rfs(mirror_rp, inc_rpath):
71        ITR(rf.index, rf)
72    ITR.Finish()
73    if former_current_mirror_rp:
74        if Globals.do_fsync:
75            C.sync()  # Sync first, since we are marking dest dir as good now
76        former_current_mirror_rp.delete()
77
78
79def set_regress_time():
80    """Set global regress_time to previous successful backup
81
82    If there are two current_mirror increments, then the last one
83    corresponds to a backup session that failed.
84
85    """
86    global regress_time, unsuccessful_backup_time
87    manager = metadata.SetManager()
88    curmir_incs = manager.sorted_prefix_inclist(b'current_mirror')
89    assert len(curmir_incs) == 2, \
90        "Found %s current_mirror flags, expected 2" % len(curmir_incs)
91    mirror_rp_to_delete = curmir_incs[0]
92    regress_time = curmir_incs[1].getinctime()
93    unsuccessful_backup_time = mirror_rp_to_delete.getinctime()
94    log.Log("Regressing to %s" % Time.timetopretty(regress_time), 4)
95    return manager, mirror_rp_to_delete
96
97
98def set_restore_times():
99    """Set _rest_time and _mirror_time in the restore module
100
101    _rest_time (restore time) corresponds to the last successful
102    backup time.  _mirror_time is the unsuccessful backup time.
103
104    """
105    restore.MirrorStruct._mirror_time = unsuccessful_backup_time
106    restore.MirrorStruct._rest_time = regress_time
107
108
109def regress_rbdir(meta_manager):
110    """Delete the increments in the rdiff-backup-data directory
111
112    Returns the former current mirror rp so we can delete it later.
113    All of the other rp's should be deleted before the actual regress,
114    to clear up disk space the rest of the procedure may need.
115
116    Also, in case the previous session failed while diffing the
117    metadata file, either recreate the mirror_metadata snapshot, or
118    delete the extra regress_time diff.
119
120    """
121    has_meta_diff, has_meta_snap = 0, 0
122    for old_rp in meta_manager.timerpmap[regress_time]:
123        if old_rp.getincbase_bname() == b'mirror_metadata':
124            if old_rp.getinctype() == b'snapshot':
125                has_meta_snap = 1
126            else:
127                assert old_rp.getinctype() == b'diff', old_rp
128                has_meta_diff = 1
129    if has_meta_diff and not has_meta_snap:
130        recreate_meta(meta_manager)
131
132    for new_rp in meta_manager.timerpmap[unsuccessful_backup_time]:
133        if new_rp.getincbase_bname() != b'current_mirror':
134            log.Log("Deleting old diff at %s" % new_rp.get_safepath(), 5)
135            new_rp.delete()
136    for rp in meta_manager.timerpmap[regress_time]:
137        if (rp.getincbase_bname() == b'mirror_metadata'
138                and rp.getinctype() == b'diff'):
139            rp.delete()
140            break
141
142
143def recreate_meta(meta_manager):
144    """Make regress_time mirror_metadata snapshot by patching
145
146    We write to a tempfile first.  Otherwise, in case of a crash, it
147    would seem we would have an intact snapshot and partial diff, not
148    the reverse.
149
150    """
151    temprp = [TempFile.new_in_dir(Globals.rbdir)]
152
153    def callback(rp):
154        temprp[0] = rp
155
156    writer = metadata.MetadataFile(
157        temprp[0], 'wb', check_path=0, callback=callback)
158    for rorp in meta_manager.get_meta_at_time(regress_time, None):
159        writer.write_object(rorp)
160    writer.close()
161
162    finalrp = Globals.rbdir.append(
163        b"mirror_metadata.%b.snapshot.gz" % Time.timetobytes(regress_time))
164    assert not finalrp.lstat(), finalrp
165    rpath.rename(temprp[0], finalrp)
166    if Globals.fsync_directories:
167        Globals.rbdir.fsync()
168
169
170def iterate_raw_rfs(mirror_rp, inc_rp):
171    """Iterate all RegressFile objects in mirror/inc directory
172
173    Also changes permissions of unreadable files.  We don't have to
174    change them back later because regress will do that for us.
175
176    """
177    root_rf = RegressFile(mirror_rp, inc_rp, restore.get_inclist(inc_rp))
178
179    def helper(rf):
180        mirror_rp = rf.mirror_rp
181        if Globals.process_uid != 0:
182            if mirror_rp.isreg() and not mirror_rp.readable():
183                mirror_rp.chmod(0o400 | mirror_rp.getperms())
184            elif mirror_rp.isdir() and not mirror_rp.hasfullperms():
185                mirror_rp.chmod(0o700 | mirror_rp.getperms())
186        yield rf
187        if rf.mirror_rp.isdir() or rf.inc_rp.isdir():
188            for sub_rf in rf.yield_sub_rfs():
189                for sub_sub_rf in helper(sub_rf):
190                    yield sub_sub_rf
191
192    return helper(root_rf)
193
194
195def yield_metadata():
196    """Iterate rorps from metadata file, if any are available"""
197    metadata.SetManager()
198    metadata_iter = metadata.ManagerObj.GetAtTime(regress_time)
199    if metadata_iter:
200        return metadata_iter
201    log.Log.FatalError("No metadata for time %s (%s) found,\ncannot regress" %
202                       (Time.timetopretty(regress_time), regress_time))
203
204
205def iterate_meta_rfs(mirror_rp, inc_rp):
206    """Yield RegressFile objects with extra metadata information added
207
208    Each RegressFile will have an extra object variable .metadata_rorp
209    which will contain the metadata attributes of the mirror file at
210    regress_time.
211
212    """
213    raw_rfs = iterate_raw_rfs(mirror_rp, inc_rp)
214    collated = rorpiter.Collate2Iters(raw_rfs, yield_metadata())
215    for raw_rf, metadata_rorp in collated:
216        raw_rf = longname.update_regressfile(raw_rf, metadata_rorp, mirror_rp)
217        if not raw_rf:
218            log.Log(
219                "Warning, metadata file has entry for %s,\n"
220                "but there are no associated files." %
221                (metadata_rorp.get_safeindexpath(), ), 2)
222            continue
223        raw_rf.set_metadata_rorp(metadata_rorp)
224        yield raw_rf
225
226
227class RegressFile(restore.RestoreFile):
228    """Like RestoreFile but with metadata
229
230    Hold mirror_rp and related incs, but also put metadata info for
231    the mirror file at regress time in self.metadata_rorp.
232    self.metadata_rorp is not set in this class.
233
234    """
235
236    def __init__(self, mirror_rp, inc_rp, inc_list):
237        restore.RestoreFile.__init__(self, mirror_rp, inc_rp, inc_list)
238        self.set_regress_inc()
239
240    def set_metadata_rorp(self, metadata_rorp):
241        """Set self.metadata_rorp, creating empty if given None"""
242        if metadata_rorp:
243            self.metadata_rorp = metadata_rorp
244        else:
245            self.metadata_rorp = rpath.RORPath(self.index)
246
247    def isdir(self):
248        """Return true if regress needs before/after processing"""
249        return ((self.metadata_rorp and self.metadata_rorp.isdir())
250                or (self.mirror_rp and self.mirror_rp.isdir()))
251
252    def set_regress_inc(self):
253        """Set self.regress_inc to increment to be removed (or None)"""
254        newer_incs = self.get_newer_incs()
255        assert len(newer_incs) <= 1, "Too many recent increments"
256        if newer_incs:
257            self.regress_inc = newer_incs[0]  # first is mirror_rp
258        else:
259            self.regress_inc = None
260
261
262class RegressITRB(rorpiter.ITRBranch):
263    """Turn back state of dest directory (use with IterTreeReducer)
264
265    The arguments to the ITR will be RegressFiles.  There are two main
266    assumptions this procedure makes (besides those mentioned above):
267
268    1.  The mirror_rp and the metadata_rorp equal_loose correctly iff
269        they contain the same data.  If this is the case, then the inc
270        file is unnecessary and we can delete it.
271
272    2.  If the don't match, then applying the inc file will
273        successfully get us back to the previous state.
274
275    Since the metadata file is required, the two above really only
276    matter for regular files.
277
278    """
279
280    def __init__(self):
281        """Just initialize some variables to None"""
282        self.rf = None  # will hold RegressFile applying to a directory
283
284    def can_fast_process(self, index, rf):
285        """True if none of the rps is a directory"""
286        return not rf.mirror_rp.isdir() and not rf.metadata_rorp.isdir()
287
288    def fast_process(self, index, rf):
289        """Process when nothing is a directory"""
290        if not rf.metadata_rorp.equal_loose(rf.mirror_rp):
291            log.Log(
292                "Regressing file %s" % (rf.metadata_rorp.get_safeindexpath()),
293                5)
294            if rf.metadata_rorp.isreg():
295                self.restore_orig_regfile(rf)
296            else:
297                if rf.mirror_rp.lstat():
298                    rf.mirror_rp.delete()
299                if rf.metadata_rorp.isspecial():
300                    robust.check_common_error(None, rpath.copy_with_attribs,
301                                              (rf.metadata_rorp, rf.mirror_rp))
302                else:
303                    rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
304        if rf.regress_inc:
305            log.Log("Deleting increment %s" % rf.regress_inc.get_safepath(), 5)
306            rf.regress_inc.delete()
307
308    def restore_orig_regfile(self, rf):
309        """Restore original regular file
310
311        This is the trickiest case for avoiding information loss,
312        because we don't want to delete the increment before the
313        mirror is fully written.
314
315        """
316        assert rf.metadata_rorp.isreg()
317        if rf.mirror_rp.isreg():
318            tf = TempFile.new(rf.mirror_rp)
319            tf.write_from_fileobj(rf.get_restore_fp())
320            tf.fsync_with_dir()  # make sure tf fully written before move
321            rpath.copy_attribs(rf.metadata_rorp, tf)
322            rpath.rename(tf, rf.mirror_rp)  # move is atomic
323        else:
324            if rf.mirror_rp.lstat():
325                rf.mirror_rp.delete()
326            rf.mirror_rp.write_from_fileobj(rf.get_restore_fp())
327            rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp)
328        if Globals.fsync_directories:
329            rf.mirror_rp.get_parent_rp().fsync(
330            )  # force move before inc delete
331
332    def start_process(self, index, rf):
333        """Start processing directory"""
334        if rf.metadata_rorp.isdir():
335            # make sure mirror is a readable dir
336            if not rf.mirror_rp.isdir():
337                if rf.mirror_rp.lstat():
338                    rf.mirror_rp.delete()
339                rf.mirror_rp.mkdir()
340            if not rf.mirror_rp.hasfullperms():
341                rf.mirror_rp.chmod(0o700)
342        self.rf = rf
343
344    def end_process(self):
345        """Finish processing a directory"""
346        rf = self.rf
347        if rf.metadata_rorp.isdir():
348            if rf.mirror_rp.isdir():
349                rf.mirror_rp.setdata()
350                if not rf.metadata_rorp.equal_loose(rf.mirror_rp):
351                    log.Log(
352                        "Regressing attributes of %s" %
353                        rf.mirror_rp.get_safepath(), 5)
354                    rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp)
355            else:
356                rf.mirror_rp.delete()
357                log.Log("Regressing file %s" % rf.mirror_rp.get_safepath(), 5)
358                rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
359        else:  # replacing a dir with some other kind of file
360            assert rf.mirror_rp.isdir()
361            log.Log("Replacing directory %s" % rf.mirror_rp.get_safepath(), 5)
362            if rf.metadata_rorp.isreg():
363                self.restore_orig_regfile(rf)
364            else:
365                rf.mirror_rp.delete()
366                rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
367        if rf.regress_inc:
368            log.Log("Deleting increment %s" % rf.regress_inc.get_safepath(), 5)
369            rf.regress_inc.delete()
370
371
372def check_pids(curmir_incs):
373    """Check PIDs in curmir markers to make sure rdiff-backup not running"""
374    pid_re = re.compile(r"^PID\s*([0-9]+)", re.I | re.M)
375
376    def extract_pid(curmir_rp):
377        """Return process ID from a current mirror marker, if any"""
378        match = pid_re.search(curmir_rp.get_string())
379        if not match:
380            return None
381        else:
382            return int(match.group(1))
383
384    def pid_running(pid):
385        """True if we know if process with pid is currently running"""
386        try:
387            os.kill(pid, 0)
388        except ProcessLookupError:  # errno.ESRCH - pid doesn't exist
389            return 0
390        except OSError:  # any other OS error
391            log.Log(
392                "Warning: unable to check if PID %d still running" % (pid, ),
393                2)
394        except AttributeError:
395            assert os.name == 'nt'
396            import win32api
397            import win32con
398            import pywintypes
399            process = None
400            try:
401                process = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, 0,
402                                               pid)
403            except pywintypes.error as error:
404                if error[0] == 87:
405                    return 0
406                else:
407                    msg = "Warning: unable to check if PID %d still running"
408                    log.Log(msg % pid, 2)
409            if process:
410                win32api.CloseHandle(process)
411                return 1
412            return 0
413        return 1
414
415    for curmir_rp in curmir_incs:
416        assert Globals.local_connection is curmir_rp.conn
417        pid = extract_pid(curmir_rp)
418        if pid is not None and pid_running(pid):
419            log.Log.FatalError(
420                """It appears that a previous rdiff-backup session with process
421id %d is still running.  If two different rdiff-backup processes write
422the same repository simultaneously, data corruption will probably
423result.  To proceed with regress anyway, rerun rdiff-backup with the
424--force option.""" % (pid, ))
425