1#!/bin/sh 2"""": # -*-python-*- 3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034 4export "BUP_ARGV_0"="$0" 5arg_i=1 6for arg in "$@"; do 7 export "BUP_ARGV_${arg_i}"="$arg" 8 shift 9 arg_i=$((arg_i + 1)) 10done 11# Here to end of preamble replaced during install 12bup_python="$(dirname "$0")/../../config/bin/python" || exit $? 13exec "$bup_python" "$0" 14""" 15# end of bup preamble 16 17from __future__ import absolute_import, print_function 18from binascii import hexlify 19import errno, os, re, stat, sys, time 20 21sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..'] 22 23from bup import compat, metadata, options, git, index, drecurse, hlinkdb 24from bup.compat import argv_bytes 25from bup.drecurse import recursive_dirlist 26from bup.hashsplit import GIT_MODE_TREE, GIT_MODE_FILE 27from bup.helpers import (add_error, handle_ctrl_c, log, parse_excludes, parse_rx_excludes, 28 progress, qprogress, saved_errors) 29from bup.io import byte_stream, path_msg 30 31 32class IterHelper: 33 def __init__(self, l): 34 self.i = iter(l) 35 self.cur = None 36 self.next() 37 38 def __next__(self): 39 self.cur = next(self.i, None) 40 return self.cur 41 42 next = __next__ 43 44def check_index(reader): 45 try: 46 log('check: checking forward iteration...\n') 47 e = None 48 d = {} 49 for e in reader.forward_iter(): 50 if e.children_n: 51 if opt.verbose: 52 log('%08x+%-4d %r\n' % (e.children_ofs, e.children_n, 53 path_msg(e.name))) 54 assert(e.children_ofs) 55 assert e.name.endswith(b'/') 56 assert(not d.get(e.children_ofs)) 57 d[e.children_ofs] = 1 58 if e.flags & index.IX_HASHVALID: 59 assert(e.sha != index.EMPTY_SHA) 60 assert(e.gitmode) 61 assert not e or bytes(e.name) == b'/' # last entry is *always* / 62 log('check: checking normal iteration...\n') 63 last = None 64 for e in reader: 65 if last: 66 assert(last > e.name) 67 last = e.name 68 except: 69 log('index error! at %r\n' % e) 70 raise 71 log('check: passed.\n') 72 73 74def clear_index(indexfile): 75 indexfiles = [indexfile, indexfile + b'.meta', indexfile + b'.hlink'] 76 for indexfile in indexfiles: 77 path = git.repo(indexfile) 78 try: 79 os.remove(path) 80 if opt.verbose: 81 log('clear: removed %s\n' % path_msg(path)) 82 except OSError as e: 83 if e.errno != errno.ENOENT: 84 raise 85 86 87def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions, out=None): 88 # tmax must be epoch nanoseconds. 89 tmax = (time.time() - 1) * 10**9 90 ri = index.Reader(indexfile) 91 msw = index.MetaStoreWriter(indexfile + b'.meta') 92 wi = index.Writer(indexfile, msw, tmax) 93 rig = IterHelper(ri.iter(name=top)) 94 95 hlinks = hlinkdb.HLinkDB(indexfile + b'.hlink') 96 97 fake_hash = None 98 if opt.fake_valid: 99 def fake_hash(name): 100 return (GIT_MODE_FILE, index.FAKE_SHA) 101 102 total = 0 103 bup_dir = os.path.abspath(git.repo()) 104 index_start = time.time() 105 for path, pst in recursive_dirlist([top], 106 xdev=opt.xdev, 107 bup_dir=bup_dir, 108 excluded_paths=excluded_paths, 109 exclude_rxs=exclude_rxs, 110 xdev_exceptions=xdev_exceptions): 111 if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)): 112 out.write(b'%s\n' % path) 113 out.flush() 114 elapsed = time.time() - index_start 115 paths_per_sec = total / elapsed if elapsed else 0 116 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) 117 elif not (total % 128): 118 elapsed = time.time() - index_start 119 paths_per_sec = total / elapsed if elapsed else 0 120 qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec)) 121 total += 1 122 123 while rig.cur and rig.cur.name > path: # deleted paths 124 if rig.cur.exists(): 125 rig.cur.set_deleted() 126 rig.cur.repack() 127 if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode): 128 hlinks.del_path(rig.cur.name) 129 rig.next() 130 131 if rig.cur and rig.cur.name == path: # paths that already existed 132 need_repack = False 133 if(rig.cur.stale(pst, check_device=opt.check_device)): 134 try: 135 meta = metadata.from_path(path, statinfo=pst) 136 except (OSError, IOError) as e: 137 add_error(e) 138 rig.next() 139 continue 140 if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1: 141 hlinks.del_path(rig.cur.name) 142 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: 143 hlinks.add_path(path, pst.st_dev, pst.st_ino) 144 # Clear these so they don't bloat the store -- they're 145 # already in the index (since they vary a lot and they're 146 # fixed length). If you've noticed "tmax", you might 147 # wonder why it's OK to do this, since that code may 148 # adjust (mangle) the index mtime and ctime -- producing 149 # fake values which must not end up in a .bupm. However, 150 # it looks like that shouldn't be possible: (1) When 151 # "save" validates the index entry, it always reads the 152 # metadata from the filesytem. (2) Metadata is only 153 # read/used from the index if hashvalid is true. (3) 154 # "faked" entries will be stale(), and so we'll invalidate 155 # them below. 156 meta.ctime = meta.mtime = meta.atime = 0 157 meta_ofs = msw.store(meta) 158 rig.cur.update_from_stat(pst, meta_ofs) 159 rig.cur.invalidate() 160 need_repack = True 161 if not (rig.cur.flags & index.IX_HASHVALID): 162 if fake_hash: 163 if rig.cur.sha == index.EMPTY_SHA: 164 rig.cur.gitmode, rig.cur.sha = fake_hash(path) 165 rig.cur.flags |= index.IX_HASHVALID 166 need_repack = True 167 if opt.fake_invalid: 168 rig.cur.invalidate() 169 need_repack = True 170 if need_repack: 171 rig.cur.repack() 172 rig.next() 173 else: # new paths 174 try: 175 meta = metadata.from_path(path, statinfo=pst) 176 except (OSError, IOError) as e: 177 add_error(e) 178 continue 179 # See same assignment to 0, above, for rationale. 180 meta.atime = meta.mtime = meta.ctime = 0 181 meta_ofs = msw.store(meta) 182 wi.add(path, pst, meta_ofs, hashgen=fake_hash) 183 if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1: 184 hlinks.add_path(path, pst.st_dev, pst.st_ino) 185 186 elapsed = time.time() - index_start 187 paths_per_sec = total / elapsed if elapsed else 0 188 progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec)) 189 190 hlinks.prepare_save() 191 192 if ri.exists(): 193 ri.save() 194 wi.flush() 195 if wi.count: 196 wr = wi.new_reader() 197 if opt.check: 198 log('check: before merging: oldfile\n') 199 check_index(ri) 200 log('check: before merging: newfile\n') 201 check_index(wr) 202 mi = index.Writer(indexfile, msw, tmax) 203 204 for e in index.merge(ri, wr): 205 # FIXME: shouldn't we remove deleted entries eventually? When? 206 mi.add_ixentry(e) 207 208 ri.close() 209 mi.close() 210 wr.close() 211 wi.abort() 212 else: 213 wi.close() 214 215 msw.close() 216 hlinks.commit_save() 217 218 219optspec = """ 220bup index <-p|-m|-s|-u|--clear|--check> [options...] <filenames...> 221-- 222 Modes: 223p,print print the index entries for the given names (also works with -u) 224m,modified print only added/deleted/modified files (implies -p) 225s,status print each filename with a status char (A/M/D) (implies -p) 226u,update recursively update the index entries for the given file/dir names (default if no mode is specified) 227check carefully check index file integrity 228clear clear the default index 229 Options: 230H,hash print the hash for each object next to its name 231l,long print more information about each file 232no-check-device don't invalidate an entry if the containing device changes 233fake-valid mark all index entries as up-to-date even if they aren't 234fake-invalid mark all index entries as invalid 235f,indexfile= the name of the index file (normally BUP_DIR/bupindex) 236exclude= a path to exclude from the backup (may be repeated) 237exclude-from= skip --exclude paths in file (may be repeated) 238exclude-rx= skip paths matching the unanchored regex (may be repeated) 239exclude-rx-from= skip --exclude-rx patterns in file (may be repeated) 240v,verbose increase log output (can be used more than once) 241x,xdev,one-file-system don't cross filesystem boundaries 242""" 243o = options.Options(optspec) 244opt, flags, extra = o.parse(compat.argv[1:]) 245 246if not (opt.modified or \ 247 opt['print'] or \ 248 opt.status or \ 249 opt.update or \ 250 opt.check or \ 251 opt.clear): 252 opt.update = 1 253if (opt.fake_valid or opt.fake_invalid) and not opt.update: 254 o.fatal('--fake-{in,}valid are meaningless without -u') 255if opt.fake_valid and opt.fake_invalid: 256 o.fatal('--fake-valid is incompatible with --fake-invalid') 257if opt.clear and opt.indexfile: 258 o.fatal('cannot clear an external index (via -f)') 259 260# FIXME: remove this once we account for timestamp races, i.e. index; 261# touch new-file; index. It's possible for this to happen quickly 262# enough that new-file ends up with the same timestamp as the first 263# index, and then bup will ignore it. 264tick_start = time.time() 265time.sleep(1 - (tick_start - int(tick_start))) 266 267git.check_repo_or_die() 268 269handle_ctrl_c() 270 271if opt.verbose is None: 272 opt.verbose = 0 273 274if opt.indexfile: 275 indexfile = argv_bytes(opt.indexfile) 276else: 277 indexfile = git.repo(b'bupindex') 278 279if opt.check: 280 log('check: starting initial check.\n') 281 check_index(index.Reader(indexfile)) 282 283if opt.clear: 284 log('clear: clearing index.\n') 285 clear_index(indexfile) 286 287sys.stdout.flush() 288out = byte_stream(sys.stdout) 289 290if opt.update: 291 if not extra: 292 o.fatal('update mode (-u) requested but no paths given') 293 extra = [argv_bytes(x) for x in extra] 294 excluded_paths = parse_excludes(flags, o.fatal) 295 exclude_rxs = parse_rx_excludes(flags, o.fatal) 296 xexcept = index.unique_resolved_paths(extra) 297 for rp, path in index.reduce_paths(extra): 298 update_index(rp, excluded_paths, exclude_rxs, xdev_exceptions=xexcept, 299 out=out) 300 301if opt['print'] or opt.status or opt.modified: 302 extra = [argv_bytes(x) for x in extra] 303 for name, ent in index.Reader(indexfile).filter(extra or [b'']): 304 if (opt.modified 305 and (ent.is_valid() or ent.is_deleted() or not ent.mode)): 306 continue 307 line = b'' 308 if opt.status: 309 if ent.is_deleted(): 310 line += b'D ' 311 elif not ent.is_valid(): 312 if ent.sha == index.EMPTY_SHA: 313 line += b'A ' 314 else: 315 line += b'M ' 316 else: 317 line += b' ' 318 if opt.hash: 319 line += hexlify(ent.sha) + b' ' 320 if opt.long: 321 line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'), 322 oct(ent.gitmode).encode('ascii')) 323 out.write(line + (name or b'./') + b'\n') 324 325if opt.check and (opt['print'] or opt.status or opt.modified or opt.update): 326 log('check: starting final check.\n') 327 check_index(index.Reader(indexfile)) 328 329if saved_errors: 330 log('WARNING: %d errors encountered.\n' % len(saved_errors)) 331 sys.exit(1) 332