1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5#
6# This file is part of duplicity.
7#
8# Duplicity is free software; you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by the
10# Free Software Foundation; either version 2 of the License, or (at your
11# option) any later version.
12#
13# Duplicity is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with duplicity; if not, write to the Free Software Foundation,
20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22u"""Create and edit manifest for session contents"""
23
24from builtins import map
25from builtins import range
26from builtins import object
27
28import re
29import sys
30
31from duplicity import config
32from duplicity import log
33from duplicity import config
34from duplicity import util
35
36
37class ManifestError(Exception):
38    u"""
39    Exception raised when problem with manifest
40    """
41    pass
42
43
44class Manifest(object):
45    u"""
46    List of volumes and information about each one
47    """
48    def __init__(self, fh=None):
49        u"""
50        Create blank Manifest
51
52        @param fh: fileobj for manifest
53        @type fh: DupPath
54
55        @rtype: Manifest
56        @return: manifest
57        """
58        self.hostname = None
59        self.local_dirname = None
60        self.volume_info_dict = {}  # dictionary vol numbers -> vol infos
61        self.fh = fh
62        self.files_changed = []
63
64    def set_dirinfo(self):
65        u"""
66        Set information about directory from config,
67        and write to manifest file.
68
69        @rtype: Manifest
70        @return: manifest
71        """
72        self.hostname = config.hostname
73        self.local_dirname = config.local_path.name
74        if self.fh:
75            if self.hostname:
76                self.fh.write(b"Hostname %s\n" % self.hostname.encode())
77            if self.local_dirname:
78                self.fh.write(b"Localdir %s\n" % Quote(self.local_dirname))
79        return self
80
81    def check_dirinfo(self):
82        u"""
83        Return None if dirinfo is the same, otherwise error message
84
85        Does not raise an error message if hostname or local_dirname
86        are not available.
87
88        @rtype: string
89        @return: None or error message
90        """
91        if config.allow_source_mismatch:
92            return
93
94        # Check both hostname and fqdn (we used to write the fqdn into the
95        # manifest, so we want to keep comparing against that)
96        if (self.hostname and
97                self.hostname != config.hostname and
98                self.hostname != config.fqdn):
99            errmsg = _(u"Fatal Error: Backup source host has changed.\n"
100                       u"Current hostname: %s\n"
101                       u"Previous hostname: %s") % (config.hostname, self.hostname)
102            code = log.ErrorCode.hostname_mismatch
103            code_extra = u"%s %s" % (util.escape(config.hostname), util.escape(self.hostname))
104
105        elif (self.local_dirname and self.local_dirname != config.local_path.name):
106            errmsg = _(u"Fatal Error: Backup source directory has changed.\n"
107                       u"Current directory: %s\n"
108                       u"Previous directory: %s") % (config.local_path.name, self.local_dirname)
109            code = log.ErrorCode.source_dir_mismatch
110            code_extra = u"%s %s" % (util.escape(config.local_path.name),
111                                     util.escape(self.local_dirname))
112        else:
113            return
114
115        log.FatalError(errmsg + u"\n\n" +
116                       _(u"Aborting because you may have accidentally tried to "
117                         u"backup two different data sets to the same remote "
118                         u"location, or using the same archive directory.  If "
119                         u"this is not a mistake, use the "
120                         u"--allow-source-mismatch switch to avoid seeing this "
121                         u"message"), code, code_extra)
122
123    def set_files_changed_info(self, files_changed):
124        if files_changed:
125            self.files_changed = files_changed
126
127        if self.fh:
128            self.fh.write(b"Filelist %d\n" % len(self.files_changed))
129            for fileinfo in self.files_changed:
130                self.fh.write(b"    %-7s  %s\n" % (fileinfo[1], Quote(fileinfo[0])))
131
132    def add_volume_info(self, vi):
133        u"""
134        Add volume info vi to manifest and write to manifest
135
136        @param vi: volume info to add
137        @type vi: VolumeInfo
138
139        @return: void
140        """
141        vol_num = vi.volume_number
142        self.volume_info_dict[vol_num] = vi
143        if self.fh:
144            self.fh.write(vi.to_string() + b"\n")
145
146    def del_volume_info(self, vol_num):
147        u"""
148        Remove volume vol_num from the manifest
149
150        @param vol_num: volume number to delete
151        @type vi: int
152
153        @return: void
154        """
155        try:
156            del self.volume_info_dict[vol_num]
157        except Exception:
158            raise ManifestError(u"Volume %d not present in manifest" % (vol_num,))
159
160    def to_string(self):
161        u"""
162        Return string version of self (just concatenate vi strings)
163
164        @rtype: string
165        @return: self in string form
166        """
167        result = b""
168        if self.hostname:
169            result += b"Hostname %s\n" % self.hostname.encode()
170        if self.local_dirname:
171            result += b"Localdir %s\n" % Quote(self.local_dirname)
172
173        result += b"Filelist %d\n" % len(self.files_changed)
174        for fileinfo in self.files_changed:
175            result += b"    %-7s  %s\n" % (fileinfo[1], Quote(fileinfo[0]))
176
177        vol_num_list = list(self.volume_info_dict.keys())
178        vol_num_list.sort()
179
180        def vol_num_to_string(vol_num):
181            return self.volume_info_dict[vol_num].to_string()
182        result = b"%s%s\n" % (result,
183                              b"\n".join(map(vol_num_to_string, vol_num_list)))
184        return result
185
186    __str__ = to_string
187
188    def from_string(self, s):
189        u"""
190        Initialize self from string s, return self
191        """
192
193        def get_field(fieldname):
194            u"""
195            Return the value of a field by parsing s, or None if no field
196            """
197            if not isinstance(fieldname, bytes):
198                fieldname = fieldname.encode()
199            m = re.search(b"(^|\\n)%s\\s(.*?)\n" % fieldname, s, re.I)
200            if not m:
201                return None
202            else:
203                return Unquote(m.group(2))
204        self.hostname = get_field(u"hostname")
205        if self.hostname is not None:
206            self.hostname = self.hostname.decode()
207        self.local_dirname = get_field(u"localdir")
208
209        highest_vol = 0
210        latest_vol = 0
211        vi_regexp = re.compile(b"(?:^|\\n)(volume\\s.*(?:\\n.*)*?)(?=\\nvolume\\s|$)", re.I)
212        vi_iterator = vi_regexp.finditer(s)
213        for match in vi_iterator:
214            vi = VolumeInfo().from_string(match.group(1))
215            self.add_volume_info(vi)
216            latest_vol = vi.volume_number
217            highest_vol = max(highest_vol, latest_vol)
218            log.Debug(_(u"Found manifest volume %s") % latest_vol)
219        # If we restarted after losing some remote volumes, the highest volume
220        # seen may be higher than the last volume recorded.  That is, the
221        # manifest could contain "vol1, vol2, vol3, vol2."  If so, we don't
222        # want to keep vol3's info.
223        for i in range(latest_vol + 1, highest_vol + 1):
224            self.del_volume_info(i)
225        log.Info(_(u"Found %s volumes in manifest") % latest_vol)
226
227        # Get file changed list - not needed if --file-changed not present
228        filecount = 0
229        if config.file_changed is not None:
230            filelist_regexp = re.compile(b"(^|\\n)filelist\\s([0-9]+)\\n(.*?)(\\nvolume\\s|$)", re.I | re.S)
231            match = filelist_regexp.search(s)
232            if match:
233                filecount = int(match.group(2))
234            if filecount > 0:
235                def parse_fileinfo(line):
236                    fileinfo = line.strip().split()
237                    return (fileinfo[0], b''.join(fileinfo[1:]))
238
239                self.files_changed = list(map(parse_fileinfo, match.group(3).split(b'\n')))
240
241            if filecount != len(self.files_changed):
242                log.Error(_(u"Manifest file '%s' is corrupt: File count says %d, File list contains %d" %
243                            (self.fh.base if self.fh else u"", filecount, len(self.files_changed))))
244                self.corrupt_filelist = True
245
246        return self
247
248    def get_files_changed(self):
249        return self.files_changed
250
251    def __eq__(self, other):
252        u"""
253        Two manifests are equal if they contain the same volume infos
254        """
255        vi_list1 = list(self.volume_info_dict.keys())
256        vi_list1.sort()
257        vi_list2 = list(other.volume_info_dict.keys())
258        vi_list2.sort()
259
260        if vi_list1 != vi_list2:
261            log.Notice(_(u"Manifests not equal because different volume numbers"))
262            return False
263
264        for i in range(len(vi_list1)):
265            if not vi_list1[i] == vi_list2[i]:
266                log.Notice(_(u"Manifests not equal because volume lists differ"))
267                return False
268
269        if (self.hostname != other.hostname or
270                self.local_dirname != other.local_dirname):
271            log.Notice(_(u"Manifests not equal because hosts or directories differ"))
272            return False
273
274        return True
275
276    def __ne__(self, other):
277        u"""
278        Defines !=.  Not doing this always leads to annoying bugs...
279        """
280        return not self.__eq__(other)
281
282    def write_to_path(self, path):
283        u"""
284        Write string version of manifest to given path
285        """
286        assert not path.exists()
287        fout = path.open(u"wb")
288        fout.write(self.to_string())
289        assert not fout.close()
290        path.setdata()
291
292    def get_containing_volumes(self, index_prefix):
293        u"""
294        Return list of volume numbers that may contain index_prefix
295        """
296        if len(index_prefix) == 1 and isinstance(index_prefix[0], u"".__class__):
297            index_prefix = (index_prefix[0].encode(),)
298        return [vol_num for vol_num in list(self.volume_info_dict.keys()) if
299                self.volume_info_dict[vol_num].contains(index_prefix)]
300
301
302class VolumeInfoError(Exception):
303    u"""
304    Raised when there is a problem initializing a VolumeInfo from string
305    """
306    pass
307
308
309class VolumeInfo(object):
310    u"""
311    Information about a single volume
312    """
313    def __init__(self):
314        u"""VolumeInfo initializer"""
315        self.volume_number = None
316        self.start_index = None
317        self.start_block = None
318        self.end_index = None
319        self.end_block = None
320        self.hashes = {}
321
322    def set_info(self, vol_number,
323                 start_index, start_block,
324                 end_index, end_block):
325        u"""
326        Set essential VolumeInfo information, return self
327
328        Call with starting and ending paths stored in the volume.  If
329        a multivol diff gets split between volumes, count it as being
330        part of both volumes.
331        """
332        self.volume_number = vol_number
333        self.start_index = start_index
334        self.start_block = start_block
335        self.end_index = end_index
336        self.end_block = end_block
337
338        return self
339
340    def set_hash(self, hash_name, data):
341        u"""
342        Set the value of hash hash_name (e.g. "MD5") to data
343        """
344        if isinstance(hash_name, bytes):
345            hash_name = hash_name.decode()
346        if isinstance(data, bytes):
347            data = data.decode()
348        self.hashes[hash_name] = data
349
350    def get_best_hash(self):
351        u"""
352        Return pair (hash_type, hash_data)
353
354        SHA1 is the best hash, and MD5 is the second best hash.  None
355        is returned if no hash is available.
356        """
357        if not self.hashes:
358            return None
359        try:
360            return (u"SHA1", self.hashes[u'SHA1'])
361        except KeyError:
362            pass
363        try:
364            return (u"MD5", self.hashes[u'MD5'])
365        except KeyError:
366            pass
367        return list(self.hashes.items())[0]
368
369    def to_string(self):
370        u"""
371        Return nicely formatted string reporting all information
372        """
373        def index_to_string(index):
374            u"""Return printable version of index without any whitespace"""
375            if index:
376                s = b"/".join(index)
377                return Quote(s)
378            else:
379                return b"."
380
381        def bfmt(x):
382            if x is None:
383                return b" "
384            return str(x).encode()
385
386        slist = [b"Volume %d:" % self.volume_number]
387        whitespace = b"    "
388        slist.append(b"%sStartingPath   %s %s" %
389                     (whitespace, index_to_string(self.start_index), bfmt(self.start_block)))
390        slist.append(b"%sEndingPath     %s %s" %
391                     (whitespace, index_to_string(self.end_index), bfmt(self.end_block)))
392        for key in self.hashes:
393            slist.append(b"%sHash %s %s" %
394                         (whitespace, key.encode(), self.hashes[key].encode()))
395        return b"\n".join(slist)
396
397    __str__ = to_string
398
399    def from_string(self, s):
400        u"""
401        Initialize self from string s as created by to_string
402        """
403        def string_to_index(s):
404            u"""
405            Return tuple index from string
406            """
407            s = Unquote(s)
408            if s == b".":
409                return ()
410            return tuple(s.split(b"/"))
411
412        linelist = s.strip().split(b"\n")
413
414        # Set volume number
415        m = re.search(b"^Volume ([0-9]+):", linelist[0], re.I)
416        if not m:
417            raise VolumeInfoError(u"Bad first line '%s'" % (linelist[0],))
418        self.volume_number = int(m.group(1))
419
420        # Set other fields
421        for line in linelist[1:]:
422            if not line:
423                continue
424            line_split = line.strip().split()
425            field_name = line_split[0].lower()
426            other_fields = line_split[1:]
427            if field_name == b"Volume":
428                log.Warn(_(u"Warning, found extra Volume identifier"))
429                break
430            elif field_name == b"startingpath":
431                self.start_index = string_to_index(other_fields[0])
432                if len(other_fields) > 1:
433                    self.start_block = int(other_fields[1])
434                else:
435                    self.start_block = None
436            elif field_name == b"endingpath":
437                self.end_index = string_to_index(other_fields[0])
438                if len(other_fields) > 1:
439                    self.end_block = int(other_fields[1])
440                else:
441                    self.end_block = None
442            elif field_name == b"hash":
443                self.set_hash(other_fields[0], other_fields[1])
444
445        if self.start_index is None or self.end_index is None:
446            raise VolumeInfoError(u"Start or end index not set")
447        return self
448
449    def __eq__(self, other):
450        u"""
451        Used in test suite
452        """
453        if not isinstance(other, VolumeInfo):
454            log.Notice(_(u"Other is not VolumeInfo"))
455            return None
456        if self.volume_number != other.volume_number:
457            log.Notice(_(u"Volume numbers don't match"))
458            return None
459        if self.start_index != other.start_index:
460            log.Notice(_(u"start_indicies don't match"))
461            return None
462        if self.end_index != other.end_index:
463            log.Notice(_(u"end_index don't match"))
464            return None
465        hash_list1 = list(self.hashes.items())
466        hash_list1.sort()
467        hash_list2 = list(other.hashes.items())
468        hash_list2.sort()
469        if hash_list1 != hash_list2:
470            log.Notice(_(u"Hashes don't match"))
471            return None
472        return 1
473
474    def __ne__(self, other):
475        u"""
476        Defines !=
477        """
478        return not self.__eq__(other)
479
480    def contains(self, index_prefix, recursive=1):
481        u"""
482        Return true if volume might contain index
483
484        If recursive is true, then return true if any index starting
485        with index_prefix could be contained.  Otherwise, just check
486        if index_prefix itself is between starting and ending
487        indicies.
488        """
489        if recursive:
490            return (self.start_index[:len(index_prefix)] <=
491                    index_prefix <= self.end_index)
492        else:
493            return self.start_index <= index_prefix <= self.end_index
494
495
496nonnormal_char_re = re.compile(b"(\\s|[\\\\\"'])")
497
498
499def Quote(s):
500    u"""
501    Return quoted version of s safe to put in a manifest or volume info
502    """
503    if not nonnormal_char_re.search(s):
504        return s  # no quoting necessary
505    slist = []
506    for i in range(0, len(s)):
507        char = s[i:i + 1]
508        if nonnormal_char_re.search(char):
509            slist.append(b"\\x%02x" % ord(char))
510        else:
511            slist.append(char)
512    return b'"%s"' % b"".join(slist)
513
514
515def maybe_chr(ch):
516    if sys.version_info.major >= 3:
517        return chr(ch)
518    else:
519        return ch
520
521
522def Unquote(quoted_string):
523    u"""
524    Return original string from quoted_string produced by above
525    """
526    if not maybe_chr(quoted_string[0]) == u'"' or maybe_chr(quoted_string[0]) == u"'":
527        return quoted_string
528    assert quoted_string[0] == quoted_string[-1]
529    return_list = []
530    i = 1  # skip initial char
531    while i < len(quoted_string) - 1:
532        char = quoted_string[i:i + 1]
533        if char == b"\\":
534            # quoted section
535            assert maybe_chr(quoted_string[i + 1]) == u"x"
536            if sys.version_info.major >= 3:
537                return_list.append(int(quoted_string[i + 2:i + 4].decode(), 16).to_bytes(1, byteorder=u'big'))
538            else:
539                return_list.append(chr(int(quoted_string[i + 2:i + 4], 16)))
540            i += 4
541        else:
542            return_list.append(char)
543            i += 1
544    return b"".join(return_list)
545