1# Copyright 2002 Ben Escoto
2#
3# This file is part of rdiff-backup.
4#
5# rdiff-backup is free software; you can redistribute it and/or modify
6# under the terms of the GNU General Public License as published by the
7# Free Software Foundation; either version 2 of the License, or (at your
8# option) any later version.
9#
10# rdiff-backup is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with rdiff-backup; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301, USA
19"""Store and retrieve metadata in destination directory
20
21The plan is to store metadata information for all files in the
22destination directory in a special metadata file.  There are two
23reasons for this:
24
251)  The filesystem of the mirror directory may not be able to handle
26    types of metadata that the source filesystem can.  For instance,
27    rdiff-backup may not have root access on the destination side, so
28    cannot set uid/gid.  Or the source side may have ACLs and the
29    destination side doesn't.
30
31    Hopefully every file system can store binary data.  Storing
32    metadata separately allows us to back up anything (ok, maybe
33    strange filenames are still a problem).
34
352)  Metadata can be more quickly read from a file than it can by
36    traversing the mirror directory over and over again.  In many
37    cases most of rdiff-backup's time is spent comparing metadata (like
38    file size and modtime), trying to find differences.  Reading this
39    data sequentially from a file is significantly less taxing than
40    listing directories and statting files all over the mirror
41    directory.
42
43The metadata is stored in a text file, which is a bunch of records
44concatenated together.  Each record has the format:
45
46File <filename>
47  <field_name1> <value>
48  <field_name2> <value>
49  ...
50
51Where the lines are separated by newlines.  See the code below for the
52field names and values.
53
54"""
55
56import re
57import os
58import binascii
59from . import log, Globals, rpath, Time, rorpiter
60
61
62class ParsingError(Exception):
63    """This is raised when bad or unparsable data is received"""
64    pass
65
66
67def carbonfile2string(cfile):
68    """Convert CarbonFile data to a string suitable for storing."""
69    if not cfile:
70        return "None"
71    retvalparts = []
72    retvalparts.append('creator:%s' % binascii.hexlify(cfile['creator']))
73    retvalparts.append('type:%s' % binascii.hexlify(cfile['type']))
74    retvalparts.append('location:%d,%d' % cfile['location'])
75    retvalparts.append('flags:%d' % cfile['flags'])
76    try:
77        retvalparts.append('createDate:%d' % cfile['createDate'])
78    except KeyError:
79        log.Log("Writing pre-1.1.6 style metadata, without creation date", 9)
80    return '|'.join(retvalparts)
81
82
83def string2carbonfile(data):
84    """Re-constitute CarbonFile data from a string stored by
85    carbonfile2string."""
86    retval = {}
87    for component in data.split('|'):
88        key, value = component.split(':')
89        if key == 'creator':
90            retval['creator'] = binascii.unhexlify(value)
91        elif key == 'type':
92            retval['type'] = binascii.unhexlify(value)
93        elif key == 'location':
94            a, b = value.split(',')
95            retval['location'] = (int(a), int(b))
96        elif key == 'flags':
97            retval['flags'] = int(value)
98        elif key == 'createDate':
99            retval['createDate'] = int(value)
100    return retval
101
102
103def RORP2Record(rorpath):
104    """From RORPath, return text record of file's metadata"""
105    str_list = [b"File %s\n" % quote_path(rorpath.get_indexpath())]
106
107    # Store file type, e.g. "dev", "reg", or "sym", and type-specific data
108    type = rorpath.gettype()
109    if type is None:
110        type = "None"
111    str_list.append(b"  Type %b\n" % type.encode('ascii'))
112    if type == "reg":
113        str_list.append(b"  Size %i\n" % rorpath.getsize())
114
115        # If there is a resource fork, save it.
116        if rorpath.has_resource_fork():
117            if not rorpath.get_resource_fork():
118                rf = b"None"
119            else:
120                rf = binascii.hexlify(rorpath.get_resource_fork())
121            str_list.append(b"  ResourceFork %b\n" % (rf, ))
122
123        # If there is Carbon data, save it.
124        if rorpath.has_carbonfile():
125            cfile = carbonfile2string(rorpath.get_carbonfile())
126            str_list.append(b"  CarbonFile %b\n" % (cfile, ))
127
128        # If file is hardlinked, add that information
129        if Globals.preserve_hardlinks != 0:
130            numlinks = rorpath.getnumlinks()
131            if numlinks > 1:
132                str_list.append(b"  NumHardLinks %i\n" % numlinks)
133                str_list.append(b"  Inode %i\n" % rorpath.getinode())
134                str_list.append(b"  DeviceLoc %i\n" % rorpath.getdevloc())
135
136        # Save any hashes, if available
137        if rorpath.has_sha1():
138            str_list.append(
139                b'  SHA1Digest %b\n' % rorpath.get_sha1().encode('ascii'))
140
141    elif type == "None":
142        return b"".join(str_list)
143    elif type == "dir" or type == "sock" or type == "fifo":
144        pass
145    elif type == "sym":
146        str_list.append(b"  SymData %b\n" % quote_path(rorpath.readlink()))
147    elif type == "dev":
148        devchar, major, minor = rorpath.getdevnums()
149        str_list.append(
150            b"  DeviceNum %b %i %i\n" % (devchar.encode('ascii'), major, minor))
151
152    # Store time information
153    if type != 'sym' and type != 'dev':
154        str_list.append(b"  ModTime %i\n" % rorpath.getmtime())
155
156    # Add user, group, and permission information
157    uid, gid = rorpath.getuidgid()
158    str_list.append(b"  Uid %i\n" % uid)
159    str_list.append(b"  Uname %b\n" % (rorpath.getuname() or ":").encode())
160    str_list.append(b"  Gid %i\n" % gid)
161    str_list.append(b"  Gname %b\n" % (rorpath.getgname() or ":").encode())
162    str_list.append(b"  Permissions %d\n" % rorpath.getperms())
163
164    # Add long filename information
165    if rorpath.has_alt_mirror_name():
166        str_list.append(
167            b"  AlternateMirrorName %b\n" % (rorpath.get_alt_mirror_name(), ))
168    elif rorpath.has_alt_inc_name():
169        str_list.append(
170            b"  AlternateIncrementName %b\n" % (rorpath.get_alt_inc_name(), ))
171
172    return b"".join(str_list)
173
174
175line_parsing_regexp = re.compile(b"^ *([A-Za-z0-9]+) (.+)$", re.M)
176
177
178def Record2RORP(record_string):
179    """Given record_string, return RORPath
180
181    For speed reasons, write the RORPath data dictionary directly
182    instead of calling rorpath functions.  Profiling has shown this to
183    be a time critical function.
184
185    """
186    data_dict = {}
187    for field, data in line_parsing_regexp.findall(record_string):
188        field = field.decode('ascii')
189        if field == "File":
190            index = quoted_filename_to_index(data)
191        elif field == "Type":
192            if data == b"None":
193                data_dict['type'] = None
194            else:
195                data_dict['type'] = data.decode('ascii')
196        elif field == "Size":
197            data_dict['size'] = int(data)
198        elif field == "ResourceFork":
199            if data == b"None":
200                data_dict['resourcefork'] = b""
201            else:
202                data_dict['resourcefork'] = binascii.unhexlify(data)
203        elif field == "CarbonFile":
204            if data == b"None":
205                data_dict['carbonfile'] = None
206            else:
207                data_dict['carbonfile'] = string2carbonfile(data)
208        elif field == "SHA1Digest":
209            data_dict['sha1'] = data.decode('ascii')
210        elif field == "NumHardLinks":
211            data_dict['nlink'] = int(data)
212        elif field == "Inode":
213            data_dict['inode'] = int(data)
214        elif field == "DeviceLoc":
215            data_dict['devloc'] = int(data)
216        elif field == "SymData":
217            data_dict['linkname'] = unquote_path(data)
218        elif field == "DeviceNum":
219            devchar, major_str, minor_str = data.split(b" ")
220            data_dict['devnums'] = (devchar.decode('ascii'), int(major_str),
221                                    int(minor_str))
222        elif field == "ModTime":
223            data_dict['mtime'] = int(data)
224        elif field == "Uid":
225            data_dict['uid'] = int(data)
226        elif field == "Gid":
227            data_dict['gid'] = int(data)
228        elif field == "Uname":
229            if data == b":" or data == b'None':
230                data_dict['uname'] = None
231            else:
232                data_dict['uname'] = data.decode()
233        elif field == "Gname":
234            if data == b':' or data == b'None':
235                data_dict['gname'] = None
236            else:
237                data_dict['gname'] = data.decode()
238        elif field == "Permissions":
239            data_dict['perms'] = int(data)
240        elif field == "AlternateMirrorName":
241            data_dict['mirrorname'] = data
242        elif field == "AlternateIncrementName":
243            data_dict['incname'] = data
244        else:
245            log.Log("Unknown field in line '%s %s'" % (field, data), 2)
246    return rpath.RORPath(index, data_dict)
247
248
249chars_to_quote = re.compile(b"\\n|\\\\")
250
251
252def quote_path(path_string):
253    """Return quoted version of path_string
254
255    Because newlines are used to separate fields in a record, they are
256    replaced with \n.  Backslashes become \\ and everything else is
257    left the way it is.
258
259    """
260
261    def replacement_func(match_obj):
262        """This is called on the match obj of any char that needs quoting"""
263        char = match_obj.group(0)
264        if char == b"\n":
265            return b"\\n"
266        elif char == b"\\":
267            return b"\\\\"
268        assert 0, "Bad char %s needs quoting" % char
269
270    return chars_to_quote.sub(replacement_func, path_string)
271
272
273def unquote_path(quoted_string):
274    """Reverse what was done by quote_path"""
275
276    def replacement_func(match_obj):
277        """Unquote match obj of two character sequence"""
278        two_chars = match_obj.group(0)
279        if two_chars == b"\\n":
280            return b"\n"
281        elif two_chars == b"\\\\":
282            return b"\\"
283        log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2)
284        return two_chars
285
286    return re.sub(b"\\\\n|\\\\\\\\", replacement_func, quoted_string)
287
288
289def quoted_filename_to_index(quoted_filename):
290    """Return tuple index given quoted filename"""
291    if quoted_filename == b'.':
292        return ()
293    else:
294        return tuple(unquote_path(quoted_filename).split(b'/'))
295
296
297class FlatExtractor:
298    """Controls iterating objects from flat file"""
299
300    # Set this in subclass.  record_boundary_regexp should match
301    # beginning of next record.  The first group should start at the
302    # beginning of the record.  The second group should contain the
303    # (possibly quoted) filename.
304    record_boundary_regexp = None
305
306    # Set in subclass to function that converts text record to object
307    record_to_object = None
308
309    def __init__(self, fileobj):
310        self.fileobj = fileobj  # holds file object we are reading from
311        self.buf = b""  # holds the next part of the file
312        self.at_end = 0  # True if we are at the end of the file
313        self.blocksize = 32 * 1024
314
315    def get_next_pos(self):
316        """Return position of next record in buffer, or end pos if none"""
317        while 1:
318            m = self.record_boundary_regexp.search(self.buf, 1)
319            if m:
320                return m.start(1)
321            else:  # add next block to the buffer, loop again
322                newbuf = self.fileobj.read(self.blocksize)
323                if not newbuf:
324                    self.at_end = 1
325                    return len(self.buf)
326                else:
327                    self.buf += newbuf
328
329    def iterate(self):
330        """Return iterator that yields all objects with records"""
331        for record in self.iterate_records():
332            try:
333                yield self.record_to_object(record)
334            except (ParsingError, ValueError) as e:
335                if self.at_end:
336                    break  # Ignore whitespace/bad records at end
337                log.Log(
338                    "Error parsing flat file: %s [%s(%s)]" %
339                    (e, type(self), self.fileobj.fileobj.name), 2)
340
341    def iterate_records(self):
342        """Yield all text records in order"""
343        while 1:
344            next_pos = self.get_next_pos()
345            if self.at_end:
346                if next_pos:
347                    yield self.buf[:next_pos]
348                break
349            yield self.buf[:next_pos]
350            self.buf = self.buf[next_pos:]
351        assert not self.fileobj.close()
352
353    def skip_to_index(self, index):
354        """Scan through the file, set buffer to beginning of index record
355
356        Here we make sure that the buffer always ends in a newline, so
357        we will not be splitting lines in half.
358
359        """
360        assert not self.buf or self.buf.endswith(b"\n")
361        while 1:
362            self.buf = self.fileobj.read(self.blocksize)
363            self.buf += self.fileobj.readline()
364            if not self.buf:
365                self.at_end = 1
366                return
367            while 1:
368                m = self.record_boundary_regexp.search(self.buf)
369                if not m:
370                    break
371                cur_index = self.filename_to_index(m.group(2))
372                if cur_index >= index:
373                    self.buf = self.buf[m.start(1):]
374                    return
375                else:
376                    self.buf = self.buf[m.end(1):]
377
378    def iterate_starting_with(self, index):
379        """Iterate objects whose index starts with given index"""
380        self.skip_to_index(index)
381        if self.at_end:
382            return
383        while 1:
384            next_pos = self.get_next_pos()
385            try:
386                obj = self.record_to_object(self.buf[:next_pos])
387            except (ParsingError, ValueError) as e:
388                log.Log("Error parsing metadata file: %s" % (e, ), 2)
389            else:
390                if obj.index[:len(index)] != index:
391                    break
392                yield obj
393            if self.at_end:
394                break
395            self.buf = self.buf[next_pos:]
396        assert not self.fileobj.close()
397
398    def filename_to_index(self, filename):
399        """Translate filename, possibly quoted, into an index tuple
400
401        The filename is the first group matched by
402        regexp_boundary_regexp.
403
404        """
405        assert 0  # subclass
406
407
408class RorpExtractor(FlatExtractor):
409    """Iterate rorps from metadata file"""
410    record_boundary_regexp = re.compile(b"(?:\\n|^)(File (.*?))\\n")
411    record_to_object = staticmethod(Record2RORP)
412    filename_to_index = staticmethod(quoted_filename_to_index)
413
414
415class FlatFile:
416    """Manage a flat file containing info on various files
417
418    This is used for metadata information, and possibly EAs and ACLs.
419    The main read interface is as an iterator.  The storage format is
420    a flat, probably compressed file, so random access is not
421    recommended.
422
423    Even if the file looks like a text file, it is actually a binary file,
424    so that (especially) paths can be stored as bytes, without issue
425    with encoding / decoding.
426    """
427    rp, fileobj, mode = None, None, None
428    _buffering_on = 1  # Buffering may be useful because gzip writes are slow
429    _record_buffer, _max_buffer_size = None, 100
430    _extractor = FlatExtractor  # Override to class that iterates objects
431    _object_to_record = None  # Set to function converting object to record
432    _prefix = None  # Set to required prefix
433
434    def __init__(self, rp_base, mode, check_path=1, compress=1, callback=None):
435        """Open rp (or rp+'.gz') for reading ('r') or writing ('w')
436
437        If callback is available, it will be called on the rp upon
438        closing (because the rp may not be known in advance).
439
440        """
441        self.mode = mode
442        self.callback = callback
443        self._record_buffer = []
444        if check_path:
445            assert (rp_base.isincfile()
446                    and rp_base.getincbase_bname() == self._prefix), rp_base
447            compress = 1
448        if mode == 'r' or mode == 'rb':
449            self.rp = rp_base
450            self.fileobj = self.rp.open("rb", compress)
451        else:
452            assert mode == 'w' or mode == 'wb', \
453                "File opening mode must be one of r, rb, w or wb, and not %s." % mode
454            if compress and check_path and not rp_base.isinccompressed():
455
456                def callback(rp):
457                    self.rp = rp
458
459                self.fileobj = rpath.MaybeGzip(rp_base, callback)
460            else:
461                self.rp = rp_base
462                assert not self.rp.lstat(), self.rp
463                self.fileobj = self.rp.open("wb", compress=compress)
464
465    def write_record(self, record):
466        """Write a (text) record into the file"""
467        if self._buffering_on:
468            self._record_buffer.append(record)
469            if len(self._record_buffer) >= self._max_buffer_size:
470                self.fileobj.write(b"".join(self._record_buffer))
471                self._record_buffer = []
472        else:
473            self.fileobj.write(record)
474
475    def write_object(self, object):
476        """Convert one object to record and write to file"""
477        self.write_record(self._object_to_record(object))
478
479    def get_objects(self, restrict_index=None):
480        """Return iterator of objects records from file rp"""
481        if not restrict_index:
482            return self._extractor(self.fileobj).iterate()
483        extractor = self._extractor(self.fileobj)
484        return extractor.iterate_starting_with(restrict_index)
485
486    def get_records(self):
487        """Return iterator of text records"""
488        return self._extractor(self.fileobj).iterate_records()
489
490    def close(self):
491        """Close file, for when any writing is done"""
492        assert self.fileobj, "File already closed"
493        if self._buffering_on and self._record_buffer:
494            self.fileobj.write(b"".join(self._record_buffer))
495            self._record_buffer = []
496        result = self.fileobj.close()
497        self.fileobj = None
498        self.rp.fsync_with_dir()
499        self.rp.setdata()
500        if self.callback:
501            self.callback(self.rp)
502        return result
503
504
505class MetadataFile(FlatFile):
506    """Store/retrieve metadata from mirror_metadata as rorps"""
507    _prefix = b"mirror_metadata"
508    _extractor = RorpExtractor
509    _object_to_record = staticmethod(RORP2Record)
510
511
512class CombinedWriter:
513    """Used for simultaneously writing metadata, eas, and acls"""
514
515    def __init__(self, metawriter, eawriter, aclwriter, winaclwriter):
516        self.metawriter = metawriter
517        self.eawriter, self.aclwriter, self.winaclwriter = \
518            eawriter, aclwriter, winaclwriter  # these can be None
519
520    def write_object(self, rorp):
521        """Write information in rorp to all the writers"""
522        self.metawriter.write_object(rorp)
523        if self.eawriter and not rorp.get_ea().empty():
524            self.eawriter.write_object(rorp.get_ea())
525        if self.aclwriter and not rorp.get_acl().is_basic():
526            self.aclwriter.write_object(rorp.get_acl())
527        if self.winaclwriter:
528            self.winaclwriter.write_object(rorp.get_win_acl())
529
530    def close(self):
531        self.metawriter.close()
532        if self.eawriter:
533            self.eawriter.close()
534        if self.aclwriter:
535            self.aclwriter.close()
536        if self.winaclwriter:
537            self.winaclwriter.close()
538
539
540class Manager:
541    """Read/Combine/Write metadata files by time"""
542    meta_prefix = b'mirror_metadata'
543    acl_prefix = b'access_control_lists'
544    ea_prefix = b'extended_attributes'
545    wacl_prefix = b'win_access_control_lists'
546
547    def __init__(self):
548        """Set listing of rdiff-backup-data dir"""
549        self.rplist = []
550        self.timerpmap, self.prefixmap = {}, {}
551        for filename in Globals.rbdir.listdir():
552            rp = Globals.rbdir.append(filename)
553            if rp.isincfile():
554                self.add_incrp(rp)
555
556    def add_incrp(self, rp):
557        """Add rp to list of inc rps in the rbdir"""
558        assert rp.isincfile(), rp
559        self.rplist.append(rp)
560        time = rp.getinctime()
561        if time in self.timerpmap:
562            self.timerpmap[time].append(rp)
563        else:
564            self.timerpmap[time] = [rp]
565
566        incbase = rp.getincbase_bname()
567        if incbase in self.prefixmap:
568            self.prefixmap[incbase].append(rp)
569        else:
570            self.prefixmap[incbase] = [rp]
571
572    def _iter_helper(self, prefix, flatfileclass, time, restrict_index):
573        """Used below to find the right kind of file by time"""
574        if time not in self.timerpmap:
575            return None
576        for rp in self.timerpmap[time]:
577            if rp.getincbase_bname() == prefix:
578                return flatfileclass(rp, 'r').get_objects(restrict_index)
579        return None
580
581    def get_meta_at_time(self, time, restrict_index):
582        """Return iter of metadata rorps at given time (or None)"""
583        return self._iter_helper(self.meta_prefix, MetadataFile, time,
584                                 restrict_index)
585
586    def get_eas_at_time(self, time, restrict_index):
587        """Return Extended Attributes iter at given time (or None)"""
588        return self._iter_helper(self.ea_prefix,
589                                 eas_acls.ExtendedAttributesFile, time,
590                                 restrict_index)
591
592    def get_acls_at_time(self, time, restrict_index):
593        """Return ACLs iter at given time from recordfile (or None)"""
594        return self._iter_helper(self.acl_prefix,
595                                 eas_acls.AccessControlListFile, time,
596                                 restrict_index)
597
598    def get_win_acls_at_time(self, time, restrict_index):
599        """Return WACLs iter at given time from recordfile (or None)"""
600        return self._iter_helper(self.wacl_prefix,
601                                 win_acls.WinAccessControlListFile, time,
602                                 restrict_index)
603
604    def GetAtTime(self, time, restrict_index=None):
605        """Return combined metadata iter with ea/acl info if necessary"""
606        cur_iter = self.get_meta_at_time(time, restrict_index)
607        if not cur_iter:
608            log.Log(
609                "Warning, could not find mirror_metadata file.\n"
610                "Metadata will be read from filesystem instead.", 2)
611            return None
612
613        if Globals.acls_active:
614            acl_iter = self.get_acls_at_time(time, restrict_index)
615            if not acl_iter:
616                log.Log("Warning: Access Control List file not found", 2)
617                acl_iter = iter([])
618            cur_iter = eas_acls.join_acl_iter(cur_iter, acl_iter)
619        if Globals.eas_active:
620            ea_iter = self.get_eas_at_time(time, restrict_index)
621            if not ea_iter:
622                log.Log("Warning: Extended Attributes file not found", 2)
623                ea_iter = iter([])
624            cur_iter = eas_acls.join_ea_iter(cur_iter, ea_iter)
625        if Globals.win_acls_active:
626            wacl_iter = self.get_win_acls_at_time(time, restrict_index)
627            if not wacl_iter:
628                log.Log(
629                    "Warning: Windows Access Control List file not"
630                    " found.", 2)
631                wacl_iter = iter([])
632            cur_iter = win_acls.join_wacl_iter(cur_iter, wacl_iter)
633
634        return cur_iter
635
636    def _writer_helper(self, prefix, flatfileclass, typestr, time):
637        """Used in the get_xx_writer functions, returns a writer class"""
638        if time is None:
639            timestr = Time.curtimestr
640        else:
641            timestr = Time.timetobytes(time)
642        triple = map(os.fsencode, (prefix, timestr, typestr))
643        filename = b'.'.join(triple)
644        rp = Globals.rbdir.append(filename)
645        assert not rp.lstat(), "File %s already exists!" % (rp.path, )
646        assert rp.isincfile()
647        return flatfileclass(rp, 'w', callback=self.add_incrp)
648
649    def get_meta_writer(self, typestr, time):
650        """Return MetadataFile object opened for writing at given time"""
651        return self._writer_helper(self.meta_prefix, MetadataFile, typestr,
652                                   time)
653
654    def get_ea_writer(self, typestr, time):
655        """Return ExtendedAttributesFile opened for writing"""
656        return self._writer_helper(
657            self.ea_prefix, eas_acls.ExtendedAttributesFile, typestr, time)
658
659    def get_acl_writer(self, typestr, time):
660        """Return AccessControlListFile opened for writing"""
661        return self._writer_helper(
662            self.acl_prefix, eas_acls.AccessControlListFile, typestr, time)
663
664    def get_win_acl_writer(self, typestr, time):
665        """Return WinAccessControlListFile opened for writing"""
666        return self._writer_helper(
667            self.wacl_prefix, win_acls.WinAccessControlListFile, typestr, time)
668
669    def GetWriter(self, typestr=b'snapshot', time=None):
670        """Get a writer object that can write meta and possibly acls/eas"""
671        metawriter = self.get_meta_writer(typestr, time)
672        if not Globals.eas_active and not Globals.acls_active and \
673           not Globals.win_acls_active:
674            return metawriter  # no need for a CombinedWriter
675
676        if Globals.eas_active:
677            ea_writer = self.get_ea_writer(typestr, time)
678        else:
679            ea_writer = None
680        if Globals.acls_active:
681            acl_writer = self.get_acl_writer(typestr, time)
682        else:
683            acl_writer = None
684        if Globals.win_acls_active:
685            win_acl_writer = self.get_win_acl_writer(typestr, time)
686        else:
687            win_acl_writer = None
688        return CombinedWriter(metawriter, ea_writer, acl_writer,
689                              win_acl_writer)
690
691
692class PatchDiffMan(Manager):
693    """Contains functions for patching and diffing metadata
694
695    To save space, we can record a full list of only the most recent
696    metadata, using the normal rdiff-backup reverse increment
697    strategy.  Instead of using librsync to compute diffs, though, we
698    use our own technique so that the diff files are still
699    hand-editable.
700
701    A mirror_metadata diff has the same format as a mirror_metadata
702    snapshot.  If the record for an index is missing from the diff, it
703    indicates no change from the original.  If it is present it
704    replaces the mirror_metadata entry, unless it has Type None, which
705    indicates the record should be deleted from the original.
706
707    """
708    max_diff_chain = 9  # After this many diffs, make a new snapshot
709
710    def get_diffiter(self, new_iter, old_iter):
711        """Iterate meta diffs of new_iter -> old_iter"""
712        for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter):
713            if not old_rorp:
714                yield rpath.RORPath(new_rorp.index)
715            elif not new_rorp or new_rorp.data != old_rorp.data:
716                # exact compare here, can't use == on rorps
717                yield old_rorp
718
719    def sorted_prefix_inclist(self, prefix, min_time=0):
720        """Return reverse sorted (by time) list of incs with given prefix"""
721        if prefix not in self.prefixmap:
722            return []
723        sortlist = [(rp.getinctime(), rp) for rp in self.prefixmap[prefix]]
724
725        # we sort before we validate against duplicates so that we tell
726        # first about the youngest case of duplication
727        sortlist.sort(reverse=True, key=lambda x: x[0])
728
729        # we had cases where the timestamp of the metadata files were
730        # duplicates, we need to fail or at least warn about such cases
731        unique_set = set()
732        for (time, rp) in sortlist:
733            if time in unique_set:
734                if Globals.allow_duplicate_timestamps:
735                    log.Log("Warning: metadata file '%s' has a duplicate "
736                            "timestamp date, you might not be able to "
737                            "recover files on or earlier than this date. "
738                            "Assuming you're in the process of cleaning up "
739                            "your repository." %
740                            rp.get_safepath(), 2)
741                else:
742                    log.Log.FatalError(
743                        "Metadata file '%s' has a duplicate timestamp date, "
744                        "you might not be able to recover files on or earlier "
745                        "than this date. "
746                        "Check the man page on how to clean up your repository "
747                        "using the '--allow-duplicate-timestamps' option." %
748                        rp.get_safepath())
749            else:
750                unique_set.add(time)
751
752        return [rp for (time, rp) in sortlist if time >= min_time]
753
754    def check_needs_diff(self):
755        """Check if we should diff, returns (new, old) rps, or (None, None)"""
756        inclist = self.sorted_prefix_inclist(b'mirror_metadata')
757        assert len(inclist) >= 1
758        if len(inclist) == 1:
759            return (None, None)
760        newrp, oldrp = inclist[:2]
761        assert newrp.getinctype() == oldrp.getinctype() == b'snapshot'
762
763        chainlen = 1
764        for rp in inclist[2:]:
765            if rp.getinctype() != b'diff':
766                break
767            chainlen += 1
768        if chainlen >= self.max_diff_chain:
769            return (None, None)
770        return (newrp, oldrp)
771
772    def ConvertMetaToDiff(self):
773        """Replace a mirror snapshot with a diff if it's appropriate"""
774        newrp, oldrp = self.check_needs_diff()
775        if not newrp:
776            return
777        log.Log("Writing mirror_metadata diff", 6)
778
779        diff_writer = self.get_meta_writer(b'diff', oldrp.getinctime())
780        new_iter = MetadataFile(newrp, 'r').get_objects()
781        old_iter = MetadataFile(oldrp, 'r').get_objects()
782        for diff_rorp in self.get_diffiter(new_iter, old_iter):
783            diff_writer.write_object(diff_rorp)
784        diff_writer.close()  # includes sync
785        oldrp.delete()
786
787    def get_meta_at_time(self, time, restrict_index):
788        """Get metadata rorp iter, possibly by patching with diffs"""
789        meta_iters = [
790            MetadataFile(rp, 'r').get_objects(restrict_index)
791            for rp in self.relevant_meta_incs(time)
792        ]
793        if not meta_iters:
794            return None
795        if len(meta_iters) == 1:
796            return meta_iters[0]
797        return self.iterate_patched_meta(meta_iters)
798
799    def relevant_meta_incs(self, time):
800        """Return list [snapshotrp, diffrps ...] time sorted"""
801        inclist = self.sorted_prefix_inclist(b'mirror_metadata', min_time=time)
802        if not inclist:
803            return inclist
804        assert inclist[-1].getinctime() == time, inclist[-1]
805        for i in range(len(inclist) - 1, -1, -1):
806            if inclist[i].getinctype() == b'snapshot':
807                return inclist[i:]
808        assert 0, "Inclist %s contains no snapshots" % (inclist, )
809
810    def iterate_patched_meta(self, meta_iter_list):
811        """Return an iter of metadata rorps by combining the given iters
812
813        The iters should be given as a list/tuple in reverse
814        chronological order.  The earliest rorp in each iter will
815        supercede all the later ones.
816
817        """
818        for meta_tuple in rorpiter.CollateIterators(*meta_iter_list):
819            for i in range(len(meta_tuple) - 1, -1, -1):
820                if meta_tuple[i]:
821                    if meta_tuple[i].lstat():
822                        yield meta_tuple[i]
823                    break  # move to next index
824            else:
825                assert 0, "No valid rorps"
826
827
828ManagerObj = None  # Set this later to Manager instance
829
830
831def SetManager():
832    global ManagerObj
833    ManagerObj = PatchDiffMan()
834    return ManagerObj
835
836
837from . import eas_acls, win_acls  # noqa: E402
838