1
2from __future__ import absolute_import, print_function
3import errno, os, stat, struct, tempfile
4
5from bup import compat, metadata, xstat
6from bup._helpers import UINT_MAX, bytescmp
7from bup.compat import range
8from bup.helpers import (add_error, log, merge_iter, mmap_readwrite,
9                         progress, qprogress, resolve_parent, slashappend)
10
11EMPTY_SHA = b'\0' * 20
12FAKE_SHA = b'\x01' * 20
13
14INDEX_HDR = b'BUPI\0\0\0\7'
15
16# Time values are handled as integer nanoseconds since the epoch in
17# memory, but are written as xstat/metadata timespecs.  This behavior
18# matches the existing metadata/xstat/.bupm code.
19
20# Record times (mtime, ctime, atime) as xstat/metadata timespecs, and
21# store all of the times in the index so they won't interfere with the
22# forthcoming metadata cache.
23INDEX_SIG = ('!'
24             'Q'                # dev
25             'Q'                # ino
26             'Q'                # nlink
27             'qQ'               # ctime_s, ctime_ns
28             'qQ'               # mtime_s, mtime_ns
29             'qQ'               # atime_s, atime_ns
30             'Q'                # size
31             'I'                # mode
32             'I'                # gitmode
33             '20s'              # sha
34             'H'                # flags
35             'Q'                # children_ofs
36             'I'                # children_n
37             'Q')               # meta_ofs
38
39ENTLEN = struct.calcsize(INDEX_SIG)
40FOOTER_SIG = '!Q'
41FOOTLEN = struct.calcsize(FOOTER_SIG)
42
43IX_EXISTS = 0x8000        # file exists on filesystem
44IX_HASHVALID = 0x4000     # the stored sha1 matches the filesystem
45IX_SHAMISSING = 0x2000    # the stored sha1 object doesn't seem to exist
46
47class Error(Exception):
48    pass
49
50
51class MetaStoreReader:
52    def __init__(self, filename):
53        self._file = None
54        self._file = open(filename, 'rb')
55
56    def close(self):
57        if self._file:
58            self._file.close()
59            self._file = None
60
61    def __del__(self):
62        self.close()
63
64    def metadata_at(self, ofs):
65        self._file.seek(ofs)
66        return metadata.Metadata.read(self._file)
67
68
69class MetaStoreWriter:
70    # For now, we just append to the file, and try to handle any
71    # truncation or corruption somewhat sensibly.
72
73    def __init__(self, filename):
74        # Map metadata hashes to bupindex.meta offsets.
75        self._offsets = {}
76        self._filename = filename
77        self._file = None
78        # FIXME: see how slow this is; does it matter?
79        m_file = open(filename, 'ab+')
80        try:
81            m_file.seek(0)
82            try:
83                m_off = m_file.tell()
84                m = metadata.Metadata.read(m_file)
85                while m:
86                    m_encoded = m.encode()
87                    self._offsets[m_encoded] = m_off
88                    m_off = m_file.tell()
89                    m = metadata.Metadata.read(m_file)
90            except EOFError:
91                pass
92            except:
93                log('index metadata in %r appears to be corrupt' % filename)
94                raise
95        finally:
96            m_file.close()
97        self._file = open(filename, 'ab')
98
99    def close(self):
100        if self._file:
101            self._file.close()
102            self._file = None
103
104    def __del__(self):
105        # Be optimistic.
106        self.close()
107
108    def store(self, metadata):
109        meta_encoded = metadata.encode(include_path=False)
110        ofs = self._offsets.get(meta_encoded)
111        if ofs:
112            return ofs
113        ofs = self._file.tell()
114        self._file.write(meta_encoded)
115        self._offsets[meta_encoded] = ofs
116        return ofs
117
118
119class Level:
120    def __init__(self, ename, parent):
121        self.parent = parent
122        self.ename = ename
123        self.list = []
124        self.count = 0
125
126    def write(self, f):
127        (ofs,n) = (f.tell(), len(self.list))
128        if self.list:
129            count = len(self.list)
130            #log('popping %r with %d entries\n'
131            #    % (''.join(self.ename), count))
132            for e in self.list:
133                e.write(f)
134            if self.parent:
135                self.parent.count += count + self.count
136        return (ofs,n)
137
138
139def _golevel(level, f, ename, newentry, metastore, tmax):
140    # close nodes back up the tree
141    assert(level)
142    default_meta_ofs = metastore.store(metadata.Metadata())
143    while ename[:len(level.ename)] != level.ename:
144        n = BlankNewEntry(level.ename[-1], default_meta_ofs, tmax)
145        n.flags |= IX_EXISTS
146        (n.children_ofs,n.children_n) = level.write(f)
147        level.parent.list.append(n)
148        level = level.parent
149
150    # create nodes down the tree
151    while len(level.ename) < len(ename):
152        level = Level(ename[:len(level.ename)+1], level)
153
154    # are we in precisely the right place?
155    assert(ename == level.ename)
156    n = newentry or \
157        BlankNewEntry(ename and level.ename[-1] or None, default_meta_ofs, tmax)
158    (n.children_ofs,n.children_n) = level.write(f)
159    if level.parent:
160        level.parent.list.append(n)
161    level = level.parent
162
163    return level
164
165
166class Entry:
167    def __init__(self, basename, name, meta_ofs, tmax):
168        assert basename is None or type(basename) == bytes
169        assert name is None or type(name) == bytes
170        self.basename = basename
171        self.name = name
172        self.meta_ofs = meta_ofs
173        self.tmax = tmax
174        self.children_ofs = 0
175        self.children_n = 0
176
177    def __repr__(self):
178        return ("(%r,0x%04x,%d,%d,%d,%d,%d,%d,%s/%s,0x%04x,%d,0x%08x/%d)"
179                % (self.name, self.dev, self.ino, self.nlink,
180                   self.ctime, self.mtime, self.atime,
181                   self.size, self.mode, self.gitmode,
182                   self.flags, self.meta_ofs,
183                   self.children_ofs, self.children_n))
184
185    def packed(self):
186        try:
187            ctime = xstat.nsecs_to_timespec(self.ctime)
188            mtime = xstat.nsecs_to_timespec(self.mtime)
189            atime = xstat.nsecs_to_timespec(self.atime)
190            return struct.pack(INDEX_SIG,
191                               self.dev, self.ino, self.nlink,
192                               ctime[0], ctime[1],
193                               mtime[0], mtime[1],
194                               atime[0], atime[1],
195                               self.size, self.mode,
196                               self.gitmode, self.sha, self.flags,
197                               self.children_ofs, self.children_n,
198                               self.meta_ofs)
199        except (DeprecationWarning, struct.error) as e:
200            log('pack error: %s (%r)\n' % (e, self))
201            raise
202
203    def stale(self, st, check_device=True):
204        if self.size != st.st_size:
205            return True
206        if self.mtime != st.st_mtime:
207            return True
208        if self.sha == EMPTY_SHA:
209            return True
210        if not self.gitmode:
211            return True
212        if self.ctime != st.st_ctime:
213            return True
214        if self.ino != st.st_ino:
215            return True
216        if self.nlink != st.st_nlink:
217            return True
218        if not (self.flags & IX_EXISTS):
219            return True
220        if check_device and (self.dev != st.st_dev):
221            return True
222        return False
223
224    def update_from_stat(self, st, meta_ofs):
225        # Should only be called when the entry is stale(), and
226        # invalidate() should almost certainly be called afterward.
227        self.dev = st.st_dev
228        self.ino = st.st_ino
229        self.nlink = st.st_nlink
230        self.ctime = st.st_ctime
231        self.mtime = st.st_mtime
232        self.atime = st.st_atime
233        self.size = st.st_size
234        self.mode = st.st_mode
235        self.flags |= IX_EXISTS
236        self.meta_ofs = meta_ofs
237        self._fixup()
238
239    def _fixup(self):
240        self.mtime = self._fixup_time(self.mtime)
241        self.ctime = self._fixup_time(self.ctime)
242
243    def _fixup_time(self, t):
244        if self.tmax != None and t > self.tmax:
245            return self.tmax
246        else:
247            return t
248
249    def is_valid(self):
250        f = IX_HASHVALID|IX_EXISTS
251        return (self.flags & f) == f
252
253    def invalidate(self):
254        self.flags &= ~IX_HASHVALID
255
256    def validate(self, gitmode, sha):
257        assert(sha)
258        assert(gitmode)
259        assert(gitmode+0 == gitmode)
260        self.gitmode = gitmode
261        self.sha = sha
262        self.flags |= IX_HASHVALID|IX_EXISTS
263
264    def exists(self):
265        return not self.is_deleted()
266
267    def sha_missing(self):
268        return (self.flags & IX_SHAMISSING) or not (self.flags & IX_HASHVALID)
269
270    def is_deleted(self):
271        return (self.flags & IX_EXISTS) == 0
272
273    def set_deleted(self):
274        if self.flags & IX_EXISTS:
275            self.flags &= ~(IX_EXISTS | IX_HASHVALID)
276
277    def is_real(self):
278        return not self.is_fake()
279
280    def is_fake(self):
281        return not self.ctime
282
283    def _cmp(self, other):
284        # Note reversed name ordering
285        bc = bytescmp(other.name, self.name)
286        if bc != 0:
287            return bc
288        vc = self.is_valid() - other.is_valid()
289        if vc != 0:
290            return vc
291        fc = self.is_fake() - other.is_fake()
292        if fc != 0:
293            return fc
294        return 0
295
296    def __eq__(self, other):
297        return self._cmp(other) == 0
298
299    def __ne__(self, other):
300        return self._cmp(other) != 0
301
302    def __lt__(self, other):
303        return self._cmp(other) < 0
304
305    def __gt__(self, other):
306        return self._cmp(other) > 0
307
308    def __le__(self, other):
309        return self._cmp(other) <= 0
310
311    def __ge__(self, other):
312        return self._cmp(other) >= 0
313
314    def write(self, f):
315        f.write(self.basename + b'\0' + self.packed())
316
317
318class NewEntry(Entry):
319    def __init__(self, basename, name, tmax, dev, ino, nlink,
320                 ctime, mtime, atime,
321                 size, mode, gitmode, sha, flags, meta_ofs,
322                 children_ofs, children_n):
323        Entry.__init__(self, basename, name, meta_ofs, tmax)
324        (self.dev, self.ino, self.nlink, self.ctime, self.mtime, self.atime,
325         self.size, self.mode, self.gitmode, self.sha,
326         self.flags, self.children_ofs, self.children_n
327         ) = (dev, ino, nlink, ctime, mtime, atime,
328              size, mode, gitmode, sha, flags, children_ofs, children_n)
329        self._fixup()
330
331
332class BlankNewEntry(NewEntry):
333    def __init__(self, basename, meta_ofs, tmax):
334        NewEntry.__init__(self, basename, basename, tmax,
335                          0, 0, 0, 0, 0, 0, 0, 0,
336                          0, EMPTY_SHA, 0, meta_ofs, 0, 0)
337
338
339class ExistingEntry(Entry):
340    def __init__(self, parent, basename, name, m, ofs):
341        Entry.__init__(self, basename, name, None, None)
342        self.parent = parent
343        self._m = m
344        self._ofs = ofs
345        (self.dev, self.ino, self.nlink,
346         self.ctime, ctime_ns, self.mtime, mtime_ns, self.atime, atime_ns,
347         self.size, self.mode, self.gitmode, self.sha,
348         self.flags, self.children_ofs, self.children_n, self.meta_ofs
349         ) = struct.unpack(INDEX_SIG, m[ofs : ofs + ENTLEN])
350        self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns))
351        self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns))
352        self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns))
353
354    # effectively, we don't bother messing with IX_SHAMISSING if
355    # not IX_HASHVALID, since it's redundant, and repacking is more
356    # expensive than not repacking.
357    # This is implemented by having sha_missing() check IX_HASHVALID too.
358    def set_sha_missing(self, val):
359        val = val and 1 or 0
360        oldval = self.sha_missing() and 1 or 0
361        if val != oldval:
362            flag = val and IX_SHAMISSING or 0
363            newflags = (self.flags & (~IX_SHAMISSING)) | flag
364            self.flags = newflags
365            self.repack()
366
367    def unset_sha_missing(self, flag):
368        if self.flags & IX_SHAMISSING:
369            self.flags &= ~IX_SHAMISSING
370            self.repack()
371
372    def repack(self):
373        self._m[self._ofs:self._ofs+ENTLEN] = self.packed()
374        if self.parent and not self.is_valid():
375            self.parent.invalidate()
376            self.parent.repack()
377
378    def iter(self, name=None, wantrecurse=None):
379        dname = name
380        if dname and not dname.endswith(b'/'):
381            dname += b'/'
382        ofs = self.children_ofs
383        assert(ofs <= len(self._m))
384        assert(self.children_n <= UINT_MAX)  # i.e. python struct 'I'
385        for i in range(self.children_n):
386            eon = self._m.find(b'\0', ofs)
387            assert(eon >= 0)
388            assert(eon >= ofs)
389            assert(eon > ofs)
390            basename = self._m[ofs : ofs + (eon - ofs)]
391            child = ExistingEntry(self, basename, self.name + basename,
392                                  self._m, eon+1)
393            if (not dname
394                 or child.name.startswith(dname)
395                 or child.name.endswith(b'/') and dname.startswith(child.name)):
396                if not wantrecurse or wantrecurse(child):
397                    for e in child.iter(name=name, wantrecurse=wantrecurse):
398                        yield e
399            if not name or child.name == name or child.name.startswith(dname):
400                yield child
401            ofs = eon + 1 + ENTLEN
402
403    def __iter__(self):
404        return self.iter()
405
406
407class Reader:
408    def __init__(self, filename):
409        self.filename = filename
410        self.m = b''
411        self.writable = False
412        self.count = 0
413        f = None
414        try:
415            f = open(filename, 'rb+')
416        except IOError as e:
417            if e.errno == errno.ENOENT:
418                pass
419            else:
420                raise
421        if f:
422            b = f.read(len(INDEX_HDR))
423            if b != INDEX_HDR:
424                log('warning: %s: header: expected %r, got %r\n'
425                                 % (filename, INDEX_HDR, b))
426            else:
427                st = os.fstat(f.fileno())
428                if st.st_size:
429                    self.m = mmap_readwrite(f)
430                    self.writable = True
431                    self.count = struct.unpack(FOOTER_SIG,
432                                               self.m[st.st_size - FOOTLEN
433                                                      : st.st_size])[0]
434
435    def __del__(self):
436        self.close()
437
438    def __len__(self):
439        return int(self.count)
440
441    def forward_iter(self):
442        ofs = len(INDEX_HDR)
443        while ofs+ENTLEN <= len(self.m)-FOOTLEN:
444            eon = self.m.find(b'\0', ofs)
445            assert(eon >= 0)
446            assert(eon >= ofs)
447            assert(eon > ofs)
448            basename = self.m[ofs : ofs + (eon - ofs)]
449            yield ExistingEntry(None, basename, basename, self.m, eon+1)
450            ofs = eon + 1 + ENTLEN
451
452    def iter(self, name=None, wantrecurse=None):
453        if len(self.m) > len(INDEX_HDR)+ENTLEN:
454            dname = name
455            if dname and not dname.endswith(b'/'):
456                dname += b'/'
457            root = ExistingEntry(None, b'/', b'/',
458                                 self.m, len(self.m)-FOOTLEN-ENTLEN)
459            for sub in root.iter(name=name, wantrecurse=wantrecurse):
460                yield sub
461            if not dname or dname == root.name:
462                yield root
463
464    def __iter__(self):
465        return self.iter()
466
467    def find(self, name):
468        return next((e for e in self.iter(name, wantrecurse=lambda x : True)
469                     if e.name == name),
470                    None)
471
472    def exists(self):
473        return self.m
474
475    def save(self):
476        if self.writable and self.m:
477            self.m.flush()
478
479    def close(self):
480        self.save()
481        if self.writable and self.m:
482            self.m.close()
483            self.m = None
484            self.writable = False
485
486    def filter(self, prefixes, wantrecurse=None):
487        for (rp, path) in reduce_paths(prefixes):
488            any_entries = False
489            for e in self.iter(rp, wantrecurse=wantrecurse):
490                any_entries = True
491                assert(e.name.startswith(rp))
492                name = path + e.name[len(rp):]
493                yield (name, e)
494            if not any_entries:
495                # Always return at least the top for each prefix.
496                # Otherwise something like "save x/y" will produce
497                # nothing if x is up to date.
498                pe = self.find(rp)
499                assert(pe)
500                name = path + pe.name[len(rp):]
501                yield (name, pe)
502
503# FIXME: this function isn't very generic, because it splits the filename
504# in an odd way and depends on a terminating '/' to indicate directories.
505def pathsplit(p):
506    """Split a path into a list of elements of the file system hierarchy."""
507    l = p.split(b'/')
508    l = [i + b'/' for i in l[:-1]] + l[-1:]
509    if l[-1] == b'':
510        l.pop()  # extra blank caused by terminating '/'
511    return l
512
513
514class Writer:
515    def __init__(self, filename, metastore, tmax):
516        self.rootlevel = self.level = Level([], None)
517        self.f = None
518        self.count = 0
519        self.lastfile = None
520        self.filename = None
521        self.filename = filename = resolve_parent(filename)
522        self.metastore = metastore
523        self.tmax = tmax
524        (dir,name) = os.path.split(filename)
525        ffd, self.tmpname = tempfile.mkstemp(b'.tmp', filename, dir)
526        self.f = os.fdopen(ffd, 'wb', 65536)
527        self.f.write(INDEX_HDR)
528
529    def __del__(self):
530        self.abort()
531
532    def abort(self):
533        f = self.f
534        self.f = None
535        if f:
536            f.close()
537            os.unlink(self.tmpname)
538
539    def flush(self):
540        if self.level:
541            self.level = _golevel(self.level, self.f, [], None,
542                                  self.metastore, self.tmax)
543            self.count = self.rootlevel.count
544            if self.count:
545                self.count += 1
546            self.f.write(struct.pack(FOOTER_SIG, self.count))
547            self.f.flush()
548        assert(self.level == None)
549
550    def close(self):
551        self.flush()
552        f = self.f
553        self.f = None
554        if f:
555            f.close()
556            os.rename(self.tmpname, self.filename)
557
558    def _add(self, ename, entry):
559        if self.lastfile and self.lastfile <= ename:
560            raise Error('%r must come before %r'
561                             % (''.join(ename), ''.join(self.lastfile)))
562        self.lastfile = ename
563        self.level = _golevel(self.level, self.f, ename, entry,
564                              self.metastore, self.tmax)
565
566    def add(self, name, st, meta_ofs, hashgen = None):
567        endswith = name.endswith(b'/')
568        ename = pathsplit(name)
569        basename = ename[-1]
570        #log('add: %r %r\n' % (basename, name))
571        flags = IX_EXISTS
572        sha = None
573        if hashgen:
574            (gitmode, sha) = hashgen(name)
575            flags |= IX_HASHVALID
576        else:
577            (gitmode, sha) = (0, EMPTY_SHA)
578        if st:
579            isdir = stat.S_ISDIR(st.st_mode)
580            assert(isdir == endswith)
581            e = NewEntry(basename, name, self.tmax,
582                         st.st_dev, st.st_ino, st.st_nlink,
583                         st.st_ctime, st.st_mtime, st.st_atime,
584                         st.st_size, st.st_mode, gitmode, sha, flags,
585                         meta_ofs, 0, 0)
586        else:
587            assert(endswith)
588            meta_ofs = self.metastore.store(metadata.Metadata())
589            e = BlankNewEntry(basename, meta_ofs, self.tmax)
590            e.gitmode = gitmode
591            e.sha = sha
592            e.flags = flags
593        self._add(ename, e)
594
595    def add_ixentry(self, e):
596        e.children_ofs = e.children_n = 0
597        self._add(pathsplit(e.name), e)
598
599    def new_reader(self):
600        self.flush()
601        return Reader(self.tmpname)
602
603
604def _slashappend_or_add_error(p, caller):
605    """Return p, after ensuring it has a single trailing slash if it names
606    a directory, unless there's an OSError, in which case, call
607    add_error() and return None."""
608    try:
609        st = os.lstat(p)
610    except OSError as e:
611        add_error('%s: %s' % (caller, e))
612        return None
613    else:
614        if stat.S_ISDIR(st.st_mode):
615            return slashappend(p)
616        return p
617
618
619def unique_resolved_paths(paths):
620    "Return a collection of unique resolved paths."
621    rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths')
622           for p in paths)
623    return frozenset((x for x in rps if x is not None))
624
625
626def reduce_paths(paths):
627    xpaths = []
628    for p in paths:
629        rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths')
630        if rp:
631            xpaths.append((rp, slashappend(p) if rp.endswith(b'/') else p))
632    xpaths.sort()
633
634    paths = []
635    prev = None
636    for (rp, p) in xpaths:
637        if prev and (prev == rp
638                     or (prev.endswith(b'/') and rp.startswith(prev))):
639            continue # already superceded by previous path
640        paths.append((rp, p))
641        prev = rp
642    paths.sort(reverse=True)
643    return paths
644
645
646def merge(*iters):
647    def pfunc(count, total):
648        qprogress('bup: merging indexes (%d/%d)\r' % (count, total))
649    def pfinal(count, total):
650        progress('bup: merging indexes (%d/%d), done.\n' % (count, total))
651    return merge_iter(iters, 1024, pfunc, pfinal, key='name')
652