1#!/bin/sh
2"""": # -*-python-*-
3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4export "BUP_ARGV_0"="$0"
5arg_i=1
6for arg in "$@"; do
7    export "BUP_ARGV_${arg_i}"="$arg"
8    shift
9    arg_i=$((arg_i + 1))
10done
11# Here to end of preamble replaced during install
12bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13exec "$bup_python" "$0"
14"""
15# end of bup preamble
16
17from __future__ import absolute_import, print_function
18from binascii import hexlify
19import errno, os, re, stat, sys, time
20
21sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
22
23from bup import compat, metadata, options, git, index, drecurse, hlinkdb
24from bup.compat import argv_bytes
25from bup.drecurse import recursive_dirlist
26from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE
27from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes,
28                         progress, qprogress, saved_errors)
29from bup.io import byte_stream, path_msg
30
31
32class IterHelper:
33    def __init__(self, l):
34        self.i = iter(l)
35        self.cur = None
36        self.next()
37
38    def __next__(self):
39        self.cur = next(self.i, None)
40        return self.cur
41
42    next = __next__
43
44def check_index(reader):
45    try:
46        log('check: checking forward iteration...\n')
47        e = None
48        d = {}
49        for e in reader.forward_iter():
50            if e.children_n:
51                if opt.verbose:
52                    log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n,
53                                            path_msg(e.name)))
54                assert(e.children_ofs)
55                assert e.name.endswith(b'/')
56                assert(not d.get(e.children_ofs))
57                d[e.children_ofs] = 1
58            if e.flags & index.IX_HASHVALID:
59                assert(e.sha != index.EMPTY_SHA)
60                assert(e.gitmode)
61        assert not e or bytes(e.name) == b'/'  # last entry is *always* /
62        log('check: checking normal iteration...\n')
63        last = None
64        for e in reader:
65            if last:
66                assert(last > e.name)
67            last = e.name
68    except:
69        log('index error! at %r\n' % e)
70        raise
71    log('check: passed.\n')
72
73
74def clear_index(indexfile):
75    indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink']
76    for indexfile in indexfiles:
77        path = git.repo(indexfile)
78        try:
79            os.remove(path)
80            if opt.verbose:
81                log('clear: removed %s\n' % path_msg(path))
82        except OSError as e:
83            if e.errno != errno.ENOENT:
84                raise
85
86
87def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions, out=None):
88    # tmax must be epoch nanoseconds.
89    tmax = (time.time() - 1) * 10**9
90    ri = index.Reader(indexfile)
91    msw = index.MetaStoreWriter(indexfile + b'.meta')
92    wi = index.Writer(indexfile, msw, tmax)
93    rig = IterHelper(ri.iter(name=top))
94
95    hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink')
96
97    fake_hash = None
98    if opt.fake_valid:
99        def fake_hash(name):
100            return (GIT_MODE_FILE, index.FAKE_SHA)
101
102    total = 0
103    bup_dir = os.path.abspath(git.repo())
104    index_start = time.time()
105    for path, pst in recursive_dirlist([top],
106                                       xdev=opt.xdev,
107                                       bup_dir=bup_dir,
108                                       excluded_paths=excluded_paths,
109                                       exclude_rxs=exclude_rxs,
110                                       xdev_exceptions=xdev_exceptions):
111        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
112            out.write(b'%s\n' % path)
113            out.flush()
114            elapsed = time.time() - index_start
115            paths_per_sec = total / elapsed if elapsed else 0
116            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
117        elif not (total % 128):
118            elapsed = time.time() - index_start
119            paths_per_sec = total / elapsed if elapsed else 0
120            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
121        total += 1
122
123        while rig.cur and rig.cur.name > path:  # deleted paths
124            if rig.cur.exists():
125                rig.cur.set_deleted()
126                rig.cur.repack()
127                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
128                    hlinks.del_path(rig.cur.name)
129            rig.next()
130
131        if rig.cur and rig.cur.name == path:    # paths that already existed
132            need_repack = False
133            if(rig.cur.stale(pst, check_device=opt.check_device)):
134                try:
135                    meta = metadata.from_path(path, statinfo=pst)
136                except (OSError, IOError) as e:
137                    add_error(e)
138                    rig.next()
139                    continue
140                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
141                    hlinks.del_path(rig.cur.name)
142                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
143                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
144                # Clear these so they don't bloat the store -- they're
145                # already in the index (since they vary a lot and they're
146                # fixed length).  If you've noticed "tmax", you might
147                # wonder why it's OK to do this, since that code may
148                # adjust (mangle) the index mtime and ctime -- producing
149                # fake values which must not end up in a .bupm.  However,
150                # it looks like that shouldn't be possible:  (1) When
151                # "save" validates the index entry, it always reads the
152                # metadata from the filesytem. (2) Metadata is only
153                # read/used from the index if hashvalid is true. (3)
154                # "faked" entries will be stale(), and so we'll invalidate
155                # them below.
156                meta.ctime = meta.mtime = meta.atime = 0
157                meta_ofs = msw.store(meta)
158                rig.cur.update_from_stat(pst, meta_ofs)
159                rig.cur.invalidate()
160                need_repack = True
161            if not (rig.cur.flags & index.IX_HASHVALID):
162                if fake_hash:
163                    if rig.cur.sha == index.EMPTY_SHA:
164                        rig.cur.gitmode, rig.cur.sha = fake_hash(path)
165                    rig.cur.flags |= index.IX_HASHVALID
166                    need_repack = True
167            if opt.fake_invalid:
168                rig.cur.invalidate()
169                need_repack = True
170            if need_repack:
171                rig.cur.repack()
172            rig.next()
173        else:  # new paths
174            try:
175                meta = metadata.from_path(path, statinfo=pst)
176            except (OSError, IOError) as e:
177                add_error(e)
178                continue
179            # See same assignment to 0, above, for rationale.
180            meta.atime = meta.mtime = meta.ctime = 0
181            meta_ofs = msw.store(meta)
182            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
183            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
184                hlinks.add_path(path, pst.st_dev, pst.st_ino)
185
186    elapsed = time.time() - index_start
187    paths_per_sec = total / elapsed if elapsed else 0
188    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))
189
190    hlinks.prepare_save()
191
192    if ri.exists():
193        ri.save()
194        wi.flush()
195        if wi.count:
196            wr = wi.new_reader()
197            if opt.check:
198                log('check: before merging: oldfile\n')
199                check_index(ri)
200                log('check: before merging: newfile\n')
201                check_index(wr)
202            mi = index.Writer(indexfile, msw, tmax)
203
204            for e in index.merge(ri, wr):
205                # FIXME: shouldn't we remove deleted entries eventually?  When?
206                mi.add_ixentry(e)
207
208            ri.close()
209            mi.close()
210            wr.close()
211        wi.abort()
212    else:
213        wi.close()
214
215    msw.close()
216    hlinks.commit_save()
217
218
219optspec = """
220bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...>
221--
222 Modes:
223p,print    print the index entries for the given names (also works with -u)
224m,modified print only added/deleted/modified files (implies -p)
225s,status   print each filename with a status char (A/M/D) (implies -p)
226u,update   recursively update the index entries for the given file/dir names (default if no mode is specified)
227check      carefully check index file integrity
228clear      clear the default index
229 Options:
230H,hash     print the hash for each object next to its name
231l,long     print more information about each file
232no-check-device don't invalidate an entry if the containing device changes
233fake-valid mark all index entries as up-to-date even if they aren't
234fake-invalid mark all index entries as invalid
235f,indexfile=  the name of the index file (normally BUP_DIR/bupindex)
236exclude= a path to exclude from the backup (may be repeated)
237exclude-from= skip --exclude paths in file (may be repeated)
238exclude-rx= skip paths matching the unanchored regex (may be repeated)
239exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
240v,verbose  increase log output (can be used more than once)
241x,xdev,one-file-system  don't cross filesystem boundaries
242"""
243o = options.Options(optspec)
244opt, flags, extra = o.parse(compat.argv[1:])
245
246if not (opt.modified or \
247        opt['print'] or \
248        opt.status or \
249        opt.update or \
250        opt.check or \
251        opt.clear):
252    opt.update = 1
253if (opt.fake_valid or opt.fake_invalid) and not opt.update:
254    o.fatal('--fake-{in,}valid are meaningless without -u')
255if opt.fake_valid and opt.fake_invalid:
256    o.fatal('--fake-valid is incompatible with --fake-invalid')
257if opt.clear and opt.indexfile:
258    o.fatal('cannot clear an external index (via -f)')
259
260# FIXME: remove this once we account for timestamp races, i.e. index;
261# touch new-file; index.  It's possible for this to happen quickly
262# enough that new-file ends up with the same timestamp as the first
263# index, and then bup will ignore it.
264tick_start = time.time()
265time.sleep(1 - (tick_start - int(tick_start)))
266
267git.check_repo_or_die()
268
269handle_ctrl_c()
270
271if opt.verbose is None:
272    opt.verbose = 0
273
274if opt.indexfile:
275    indexfile = argv_bytes(opt.indexfile)
276else:
277    indexfile = git.repo(b'bupindex')
278
279if opt.check:
280    log('check: starting initial check.\n')
281    check_index(index.Reader(indexfile))
282
283if opt.clear:
284    log('clear: clearing index.\n')
285    clear_index(indexfile)
286
287sys.stdout.flush()
288out = byte_stream(sys.stdout)
289
290if opt.update:
291    if not extra:
292        o.fatal('update mode (-u) requested but no paths given')
293    extra = [argv_bytes(x) for x in extra]
294    excluded_paths = parse_excludes(flags, o.fatal)
295    exclude_rxs = parse_rx_excludes(flags, o.fatal)
296    xexcept = index.unique_resolved_paths(extra)
297    for rp, path in index.reduce_paths(extra):
298        update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept,
299                     out=out)
300
301if opt['print'] or opt.status or opt.modified:
302    extra = [argv_bytes(x) for x in extra]
303    for name, ent in index.Reader(indexfile).filter(extra or [b'']):
304        if (opt.modified
305            and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
306            continue
307        line = b''
308        if opt.status:
309            if ent.is_deleted():
310                line += b'D '
311            elif not ent.is_valid():
312                if ent.sha == index.EMPTY_SHA:
313                    line += b'A '
314                else:
315                    line += b'M '
316            else:
317                line += b'  '
318        if opt.hash:
319            line += hexlify(ent.sha) + b' '
320        if opt.long:
321            line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
322                                   oct(ent.gitmode).encode('ascii'))
323        out.write(line + (name or b'./') + b'\n')
324
325if opt.check and (opt['print'] or opt.status or opt.modified or opt.update):
326    log('check: starting final check.\n')
327    check_index(index.Reader(indexfile))
328
329if saved_errors:
330    log('WARNING: %d errors encountered.\n' % len(saved_errors))
331    sys.exit(1)
332