1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5#
6# This file is part of duplicity.
7#
8# Duplicity is free software; you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by the
10# Free Software Foundation; either version 2 of the License, or (at your
11# option) any later version.
12#
13# Duplicity is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with duplicity; if not, write to the Free Software Foundation,
20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22from builtins import map
23from builtins import next
24from builtins import object
25from builtins import range
26
27import re
28import sys
29import tempfile
30
31from duplicity import diffdir
32from duplicity import config
33from duplicity import librsync
34from duplicity import log
35from duplicity import selection
36from duplicity import tarfile
37from duplicity import tempdir
38from duplicity import util
39from duplicity.lazy import *  # pylint: disable=unused-wildcard-import,redefined-builtin
40from duplicity.path import *  # pylint: disable=unused-wildcard-import,redefined-builtin
41
42u"""Functions for patching of directories"""
43
44
45class PatchDirException(Exception):
46    pass
47
48
49def Patch(base_path, difftar_fileobj):
50    u"""Patch given base_path and file object containing delta"""
51    diff_tarfile = tarfile.TarFile(u"arbitrary", u"r", difftar_fileobj)
52    patch_diff_tarfile(base_path, diff_tarfile)
53    assert not difftar_fileobj.close()
54
55
56def Patch_from_iter(base_path, fileobj_iter, restrict_index=()):
57    u"""Patch given base_path and iterator of delta file objects"""
58    diff_tarfile = TarFile_FromFileobjs(fileobj_iter)
59    patch_diff_tarfile(base_path, diff_tarfile, restrict_index)
60
61
62def patch_diff_tarfile(base_path, diff_tarfile, restrict_index=()):
63    u"""Patch given Path object using delta tarfile (as in tarfile.TarFile)
64
65    If restrict_index is set, ignore any deltas in diff_tarfile that
66    don't start with restrict_index.
67
68    """
69    if base_path.exists():
70        path_iter = selection.Select(base_path).set_iter()
71    else:
72        path_iter = empty_iter()  # probably untarring full backup
73
74    diff_path_iter = difftar2path_iter(diff_tarfile)
75    if restrict_index:
76        diff_path_iter = filter_path_iter(diff_path_iter, restrict_index)
77    collated = diffdir.collate2iters(path_iter, diff_path_iter)
78
79    ITR = IterTreeReducer(PathPatcher, [base_path])
80    for basis_path, diff_ropath in collated:
81        if basis_path:
82            log.Info(_(u"Patching %s") % (util.fsdecode(basis_path.get_relative_path())),
83                     log.InfoCode.patch_file_patching,
84                     util.escape(basis_path.get_relative_path()))
85            ITR(basis_path.index, basis_path, diff_ropath)
86        else:
87            log.Info(_(u"Patching %s") % (util.fsdecode(diff_ropath.get_relative_path())),
88                     log.InfoCode.patch_file_patching,
89                     util.escape(diff_ropath.get_relative_path()))
90            ITR(diff_ropath.index, basis_path, diff_ropath)
91    ITR.Finish()
92    base_path.setdata()
93
94
95def empty_iter():
96    if 0:
97        yield 1  # this never happens, but fools into generator treatment
98
99
100def filter_path_iter(path_iter, index):
101    u"""Rewrite path elements of path_iter so they start with index
102
103    Discard any that doesn't start with index, and remove the index
104    prefix from the rest.
105
106    """
107    assert isinstance(index, tuple) and index, index
108    l = len(index)
109    for path in path_iter:
110        if path.index[:l] == index:
111            path.index = path.index[l:]
112            yield path
113
114
115def difftar2path_iter(diff_tarfile):
116    u"""Turn file-like difftarobj into iterator of ROPaths"""
117    tar_iter = iter(diff_tarfile)
118    multivol_fileobj = None
119
120    # The next tar_info is stored in this one element list so
121    # Multivol_Filelike below can update it.  Any StopIterations will
122    # be passed upwards.
123    try:
124        tarinfo_list = [next(tar_iter)]
125    except StopIteration:
126        return
127
128    while 1:
129        # This section relevant when a multivol diff is last in tar
130        if not tarinfo_list[0]:
131            return
132        if multivol_fileobj and not multivol_fileobj.at_end:
133            multivol_fileobj.close()  # aborting in middle of multivol
134            continue
135
136        index, difftype, multivol = get_index_from_tarinfo(tarinfo_list[0])
137        ropath = ROPath(index)
138        ropath.init_from_tarinfo(tarinfo_list[0])
139        ropath.difftype = difftype
140        if difftype == u"deleted":
141            ropath.type = None
142        elif ropath.isreg():
143            if multivol:
144                multivol_fileobj = Multivol_Filelike(diff_tarfile, tar_iter,
145                                                     tarinfo_list, index)
146                ropath.setfileobj(multivol_fileobj)
147                yield ropath
148                continue  # Multivol_Filelike will reset tarinfo_list
149            else:
150                ropath.setfileobj(diff_tarfile.extractfile(tarinfo_list[0]))
151        yield ropath
152        try:
153            tarinfo_list[0] = next(tar_iter)
154        except StopIteration:
155            return
156
157
158def get_index_from_tarinfo(tarinfo):
159    u"""Return (index, difftype, multivol) pair from tarinfo object"""
160    for prefix in [u"snapshot/", u"diff/", u"deleted/",
161                   u"multivol_diff/", u"multivol_snapshot/"]:
162        tiname = util.get_tarinfo_name(tarinfo)
163        if sys.version_info.major == 2 and isinstance(prefix, unicode):
164            prefix = prefix.encode()
165        if tiname.startswith(prefix):
166            name = tiname[len(prefix):]  # strip prefix
167            if prefix.startswith(u"multivol"):
168                if prefix == u"multivol_diff/":
169                    difftype = u"diff"
170                else:
171                    difftype = u"snapshot"
172                multivol = 1
173                name, num_subs = \
174                    re.subn(u"(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$",
175                            u"\\2", tiname)
176                if num_subs != 1:
177                    raise PatchDirException(u"Unrecognized diff entry %s" %
178                                            tiname)
179            else:
180                difftype = prefix[:-1]  # strip trailing /
181                name = tiname[len(prefix):]
182                if name.endswith(r"/"):
183                    name = name[:-1]  # strip trailing /'s
184                multivol = 0
185            break
186    else:
187        raise PatchDirException(u"Unrecognized diff entry %s" %
188                                tiname)
189    if name == r"." or name == r"":
190        index = ()
191    else:
192        if sys.version_info.major >= 3:
193            index = tuple(util.fsencode(name).split(b"/"))
194        else:
195            index = tuple(name.split(b"/"))
196        if b'..' in index:
197            raise PatchDirException(u"Tar entry %s contains '..'.  Security "
198                                    u"violation" % util.fsdecode(tiname))
199    return (index, difftype, multivol)
200
201
202class Multivol_Filelike(object):
203    u"""Emulate a file like object from multivols
204
205    Maintains a buffer about the size of a volume.  When it is read()
206    to the end, pull in more volumes as desired.
207
208    """
209    def __init__(self, tf, tar_iter, tarinfo_list, index):
210        u"""Initializer.  tf is TarFile obj, tarinfo is first tarinfo"""
211        self.tf, self.tar_iter = tf, tar_iter
212        self.tarinfo_list = tarinfo_list  # must store as list for write access
213        self.index = index
214        self.buffer = b""
215        self.at_end = 0
216
217    def read(self, length=-1):
218        u"""Read length bytes from file"""
219        if length < 0:
220            while self.addtobuffer():
221                pass
222            real_len = len(self.buffer)
223        else:
224            while len(self.buffer) < length:
225                if not self.addtobuffer():
226                    break
227            real_len = min(len(self.buffer), length)
228
229        result = self.buffer[:real_len]
230        self.buffer = self.buffer[real_len:]
231        return result
232
233    def addtobuffer(self):
234        u"""Add next chunk to buffer"""
235        if self.at_end:
236            return None
237        index, difftype, multivol = get_index_from_tarinfo(self.tarinfo_list[0])
238        if not multivol or index != self.index:
239            # we've moved on
240            # the following communicates next tarinfo to difftar2path_iter
241            self.at_end = 1
242            return None
243
244        fp = self.tf.extractfile(self.tarinfo_list[0])
245        self.buffer += fp.read()
246        fp.close()
247
248        try:
249            self.tarinfo_list[0] = next(self.tar_iter)
250        except StopIteration:
251            self.tarinfo_list[0] = None
252            self.at_end = 1
253            return None
254        return 1
255
256    def close(self):
257        u"""If not at end, read remaining data"""
258        if not self.at_end:
259            while 1:
260                self.buffer = b""
261                if not self.addtobuffer():
262                    break
263        self.at_end = 1
264
265
266class PathPatcher(ITRBranch):
267    u"""Used by DirPatch, process the given basis and diff"""
268    def __init__(self, base_path):
269        u"""Set base_path, Path of root of tree"""
270        self.base_path = base_path
271        self.dir_diff_ropath = None
272
273    def start_process(self, index, basis_path, diff_ropath):
274        u"""Start processing when diff_ropath is a directory"""
275        if not (diff_ropath and diff_ropath.isdir()):
276            assert index == (), util.uindex(index)  # should only happen for first elem
277            self.fast_process(index, basis_path, diff_ropath)
278            return
279
280        if not basis_path:
281            basis_path = self.base_path.new_index(index)
282            assert not basis_path.exists()
283            basis_path.mkdir()  # Need place for later files to go into
284        elif not basis_path.isdir():
285            basis_path.delete()
286            basis_path.mkdir()
287        self.dir_basis_path = basis_path
288        self.dir_diff_ropath = diff_ropath
289
290    def end_process(self):
291        u"""Copy directory permissions when leaving tree"""
292        if self.dir_diff_ropath:
293            self.dir_diff_ropath.copy_attribs(self.dir_basis_path)
294
295    def can_fast_process(self, index, basis_path, diff_ropath):  # pylint: disable=unused-argument
296        u"""No need to recurse if diff_ropath isn't a directory"""
297        return not (diff_ropath and diff_ropath.isdir())
298
299    def fast_process(self, index, basis_path, diff_ropath):
300        u"""For use when neither is a directory"""
301        if not diff_ropath:
302            return  # no change
303        elif not basis_path:
304            if diff_ropath.difftype == u"deleted":
305                pass  # already deleted
306            else:
307                # just copy snapshot over
308                diff_ropath.copy(self.base_path.new_index(index))
309        elif diff_ropath.difftype == u"deleted":
310            if basis_path.isdir():
311                basis_path.deltree()
312            else:
313                basis_path.delete()
314        elif not basis_path.isreg() or (basis_path.isreg() and diff_ropath.difftype == u"snapshot"):
315            if basis_path.isdir():
316                basis_path.deltree()
317            else:
318                basis_path.delete()
319            diff_ropath.copy(basis_path)
320        else:
321            assert diff_ropath.difftype == u"diff", diff_ropath.difftype
322            basis_path.patch_with_attribs(diff_ropath)
323
324
325class TarFile_FromFileobjs(object):
326    u"""Like a tarfile.TarFile iterator, but read from multiple fileobjs"""
327    def __init__(self, fileobj_iter):
328        u"""Make new tarinfo iterator
329
330        fileobj_iter should be an iterator of file objects opened for
331        reading.  They will be closed at end of reading.
332
333        """
334        self.fileobj_iter = fileobj_iter
335        self.tarfile, self.tar_iter = None, None
336        self.current_fp = None
337
338    def __iter__(self):  # pylint: disable=non-iterator-returned
339        return self
340
341    def set_tarfile(self):
342        u"""Set tarfile from next file object, or raise StopIteration"""
343        if self.current_fp:
344            assert not self.current_fp.close()
345        self.current_fp = next(self.fileobj_iter)
346        self.tarfile = util.make_tarfile(u"r", self.current_fp)
347        self.tar_iter = iter(self.tarfile)
348
349    def __next__(self):
350        if not self.tarfile:
351            try:
352                self.set_tarfile()
353            except StopIteration:
354                return
355        try:
356            return next(self.tar_iter)
357        except StopIteration:
358            assert not self.tarfile.close()
359            self.set_tarfile()
360            return next(self.tar_iter)
361
362    def extractfile(self, tarinfo):
363        u"""Return data associated with given tarinfo"""
364        return self.tarfile.extractfile(tarinfo)
365
366
367def collate_iters(iter_list):
368    u"""Collate iterators by index
369
370    Input is a list of n iterators each of which must iterate elements
371    with an index attribute.  The elements must come out in increasing
372    order, and the index should be a tuple itself.
373
374    The output is an iterator which yields tuples where all elements
375    in the tuple have the same index, and the tuple has n elements in
376    it.  If any iterator lacks an element with that index, the tuple
377    will have None in that spot.
378
379    """
380    # overflow[i] means that iter_list[i] has been exhausted
381    # elems[i] is None means that it is time to replenish it.
382    iter_num = len(iter_list)
383    if iter_num == 2:
384        return diffdir.collate2iters(iter_list[0], iter_list[1])
385    overflow = [None] * iter_num
386    elems = overflow[:]
387
388    def setrorps(overflow, elems):
389        u"""Set the overflow and rorps list"""
390        for i in range(iter_num):
391            if not overflow[i] and elems[i] is None:
392                try:
393                    elems[i] = next(iter_list[i])
394                except StopIteration:
395                    overflow[i] = 1
396                    elems[i] = None
397
398    def getleastindex(elems):
399        u"""Return the first index in elems, assuming elems isn't empty"""
400        return min([elem.index for elem in [x for x in elems if x]])
401
402    def yield_tuples(iter_num, overflow, elems):
403        while 1:
404            setrorps(overflow, elems)
405            if None not in overflow:
406                break
407
408            index = getleastindex(elems)
409            yieldval = []
410            for i in range(iter_num):
411                if elems[i] and elems[i].index == index:
412                    yieldval.append(elems[i])
413                    elems[i] = None
414                else:
415                    yieldval.append(None)
416            yield tuple(yieldval)
417    return yield_tuples(iter_num, overflow, elems)
418
419
420class IndexedTuple(object):
421    u"""Like a tuple, but has .index (used previously by collate_iters)"""
422    def __init__(self, index, sequence):
423        self.index = index
424        self.data = tuple(sequence)
425
426    def __len__(self):
427        return len(self.data)
428
429    def __getitem__(self, key):
430        u"""This only works for numerical keys (easier this way)"""
431        return self.data[key]
432
433    def __lt__(self, other):
434        return self.__cmp__(other) == -1
435
436    def __le__(self, other):
437        return self.__cmp__(other) != 1
438
439    def __ne__(self, other):
440        return not self.__eq__(other)
441
442    def __gt__(self, other):
443        return self.__cmp__(other) == 1
444
445    def __ge__(self, other):
446        return self.__cmp__(other) != -1
447
448    def __cmp__(self, other):
449        assert isinstance(other, IndexedTuple)
450        if self.index < other.index:
451            return -1
452        elif self.index == other.index:
453            return 0
454        else:
455            return 1
456
457    def __eq__(self, other):
458        if isinstance(other, IndexedTuple):
459            return self.index == other.index and self.data == other.data
460        elif isinstance(other, tuple):
461            return self.data == other
462        else:
463            return None
464
465    def __str__(self):
466        return u"(%s).%s" % (u", ".join(map(str, self.data)), self.index)
467
468
469def normalize_ps(patch_sequence):
470    u"""Given an sequence of ROPath deltas, remove blank and unnecessary
471
472    The sequence is assumed to be in patch order (later patches apply
473    to earlier ones).  A patch is unnecessary if a later one doesn't
474    require it (for instance, any patches before a "delete" are
475    unnecessary).
476
477    """
478    result_list = []
479    i = len(patch_sequence) - 1
480    while i >= 0:
481        delta = patch_sequence[i]
482        if delta is not None:
483            # skip blank entries
484            result_list.insert(0, delta)
485            if delta.difftype != u"diff":
486                break
487        i -= 1
488    return result_list
489
490
491def patch_seq2ropath(patch_seq):
492    u"""Apply the patches in patch_seq, return single ropath"""
493    first = patch_seq[0]
494    assert first.difftype != u"diff", u"First patch in sequence " \
495                                      u"%s was a diff" % patch_seq
496    if not first.isreg():
497        # No need to bother with data if not regular file
498        assert len(patch_seq) == 1, u"Patch sequence isn't regular, but " \
499                                    u"has %d entries" % len(patch_seq)
500        return first.get_ropath()
501
502    current_file = first.open(u"rb")
503
504    for delta_ropath in patch_seq[1:]:
505        assert delta_ropath.difftype == u"diff", delta_ropath.difftype
506        try:
507            cur_file.fileno()
508        except:
509            u"""
510            librsync insists on a real file object, which we create manually
511            by using the duplicity.tempdir to tell us where.
512
513            See https://bugs.launchpad.net/duplicity/+bug/670891 for discussion
514            of os.tmpfile() vs tempfile.TemporaryFile() w.r.t. Windows / Posix,
515            which is worked around in librsync.PatchedFile() now.
516            """
517            tempfp = tempfile.TemporaryFile(dir=tempdir.default().dir())
518            util.copyfileobj(current_file, tempfp)
519            assert not current_file.close()
520            tempfp.seek(0)
521            current_file = tempfp
522        current_file = librsync.PatchedFile(current_file,
523                                            delta_ropath.open(u"rb"))
524    result = patch_seq[-1].get_ropath()
525    result.setfileobj(current_file)
526    return result
527
528
529def integrate_patch_iters(iter_list):
530    u"""Combine a list of iterators of ropath patches
531
532    The iter_list should be sorted in patch order, and the elements in
533    each iter_list need to be orderd by index.  The output will be an
534    iterator of the final ROPaths in index order.
535
536    """
537    collated = collate_iters(iter_list)
538    for patch_seq in collated:
539        normalized = normalize_ps(patch_seq)
540        try:
541            final_ropath = patch_seq2ropath(normalized)
542            if final_ropath.exists():
543                # otherwise final patch was delete
544                yield final_ropath
545        except Exception as e:
546            filename = normalized[-1].get_ropath().get_relative_path()
547            log.Warn(_(u"Error '%s' patching %s") %
548                     (util.uexc(e), util.fsdecode(filename)),
549                     log.WarningCode.cannot_process,
550                     util.escape(filename))
551
552
553def tarfiles2rop_iter(tarfile_list, restrict_index=()):
554    u"""Integrate tarfiles of diffs into single ROPath iter
555
556    Then filter out all the diffs in that index which don't start with
557    the restrict_index.
558
559    """
560    diff_iters = [difftar2path_iter(x) for x in tarfile_list]
561    if restrict_index:
562        # Apply filter before integration
563        diff_iters = [filter_path_iter(x, restrict_index) for x in diff_iters]
564    return integrate_patch_iters(diff_iters)
565
566
567def Write_ROPaths(base_path, rop_iter):
568    u"""Write out ropaths in rop_iter starting at base_path
569
570    Returns 1 if something was actually written, 0 otherwise.
571
572    """
573    ITR = IterTreeReducer(ROPath_IterWriter, [base_path])
574    return_val = 0
575    for ropath in rop_iter:
576        return_val = 1
577        ITR(ropath.index, ropath)
578    ITR.Finish()
579    base_path.setdata()
580    return return_val
581
582
583class ROPath_IterWriter(ITRBranch):
584    u"""Used in Write_ROPaths above
585
586    We need to use an ITR because we have to update the
587    permissions/times of directories after we write the files in them.
588
589    """
590    def __init__(self, base_path):
591        u"""Set base_path, Path of root of tree"""
592        self.base_path = base_path
593        self.dir_diff_ropath = None
594        self.dir_new_path = None
595
596    def start_process(self, index, ropath):
597        u"""Write ropath.  Only handles the directory case"""
598        if not ropath.isdir():
599            # Base may not be a directory, but rest should
600            assert ropath.index == (), ropath.index
601            new_path = self.base_path.new_index(index)
602            if ropath.exists():
603                if new_path.exists():
604                    new_path.deltree()
605                ropath.copy(new_path)
606
607        self.dir_new_path = self.base_path.new_index(index)
608        if self.dir_new_path.exists() and not config.force:
609            # base may exist, but nothing else
610            assert index == (), index
611        else:
612            self.dir_new_path.mkdir()
613        self.dir_diff_ropath = ropath
614
615    def end_process(self):
616        u"""Update information of a directory when leaving it"""
617        if self.dir_diff_ropath:
618            self.dir_diff_ropath.copy_attribs(self.dir_new_path)
619
620    def can_fast_process(self, index, ropath):  # pylint: disable=unused-argument
621        u"""Can fast process (no recursion) if ropath isn't a directory"""
622        log.Info(_(u"Writing %s of type %s") %
623                 (util.fsdecode(ropath.get_relative_path()), ropath.type),
624                 log.InfoCode.patch_file_writing,
625                 u"%s %s" % (util.escape(ropath.get_relative_path()), ropath.type))
626        return not ropath.isdir()
627
628    def fast_process(self, index, ropath):
629        u"""Write non-directory ropath to destination"""
630        if ropath.exists():
631            ropath.copy(self.base_path.new_index(index))
632