1 2from __future__ import absolute_import, print_function 3import errno, os, stat, struct, tempfile 4 5from bup import compat, metadata, xstat 6from bup._helpers import UINT_MAX, bytescmp 7from bup.compat import range 8from bup.helpers import (add_error, log, merge_iter, mmap_readwrite, 9 progress, qprogress, resolve_parent, slashappend) 10 11EMPTY_SHA = b'\0' * 20 12FAKE_SHA = b'\x01' * 20 13 14INDEX_HDR = b'BUPI\0\0\0\7' 15 16# Time values are handled as integer nanoseconds since the epoch in 17# memory, but are written as xstat/metadata timespecs. This behavior 18# matches the existing metadata/xstat/.bupm code. 19 20# Record times (mtime, ctime, atime) as xstat/metadata timespecs, and 21# store all of the times in the index so they won't interfere with the 22# forthcoming metadata cache. 23INDEX_SIG = ('!' 24 'Q' # dev 25 'Q' # ino 26 'Q' # nlink 27 'qQ' # ctime_s, ctime_ns 28 'qQ' # mtime_s, mtime_ns 29 'qQ' # atime_s, atime_ns 30 'Q' # size 31 'I' # mode 32 'I' # gitmode 33 '20s' # sha 34 'H' # flags 35 'Q' # children_ofs 36 'I' # children_n 37 'Q') # meta_ofs 38 39ENTLEN = struct.calcsize(INDEX_SIG) 40FOOTER_SIG = '!Q' 41FOOTLEN = struct.calcsize(FOOTER_SIG) 42 43IX_EXISTS = 0x8000 # file exists on filesystem 44IX_HASHVALID = 0x4000 # the stored sha1 matches the filesystem 45IX_SHAMISSING = 0x2000 # the stored sha1 object doesn't seem to exist 46 47class Error(Exception): 48 pass 49 50 51class MetaStoreReader: 52 def __init__(self, filename): 53 self._file = None 54 self._file = open(filename, 'rb') 55 56 def close(self): 57 if self._file: 58 self._file.close() 59 self._file = None 60 61 def __del__(self): 62 self.close() 63 64 def metadata_at(self, ofs): 65 self._file.seek(ofs) 66 return metadata.Metadata.read(self._file) 67 68 69class MetaStoreWriter: 70 # For now, we just append to the file, and try to handle any 71 # truncation or corruption somewhat sensibly. 72 73 def __init__(self, filename): 74 # Map metadata hashes to bupindex.meta offsets. 75 self._offsets = {} 76 self._filename = filename 77 self._file = None 78 # FIXME: see how slow this is; does it matter? 79 m_file = open(filename, 'ab+') 80 try: 81 m_file.seek(0) 82 try: 83 m_off = m_file.tell() 84 m = metadata.Metadata.read(m_file) 85 while m: 86 m_encoded = m.encode() 87 self._offsets[m_encoded] = m_off 88 m_off = m_file.tell() 89 m = metadata.Metadata.read(m_file) 90 except EOFError: 91 pass 92 except: 93 log('index metadata in %r appears to be corrupt' % filename) 94 raise 95 finally: 96 m_file.close() 97 self._file = open(filename, 'ab') 98 99 def close(self): 100 if self._file: 101 self._file.close() 102 self._file = None 103 104 def __del__(self): 105 # Be optimistic. 106 self.close() 107 108 def store(self, metadata): 109 meta_encoded = metadata.encode(include_path=False) 110 ofs = self._offsets.get(meta_encoded) 111 if ofs: 112 return ofs 113 ofs = self._file.tell() 114 self._file.write(meta_encoded) 115 self._offsets[meta_encoded] = ofs 116 return ofs 117 118 119class Level: 120 def __init__(self, ename, parent): 121 self.parent = parent 122 self.ename = ename 123 self.list = [] 124 self.count = 0 125 126 def write(self, f): 127 (ofs,n) = (f.tell(), len(self.list)) 128 if self.list: 129 count = len(self.list) 130 #log('popping %r with %d entries\n' 131 # % (''.join(self.ename), count)) 132 for e in self.list: 133 e.write(f) 134 if self.parent: 135 self.parent.count += count + self.count 136 return (ofs,n) 137 138 139def _golevel(level, f, ename, newentry, metastore, tmax): 140 # close nodes back up the tree 141 assert(level) 142 default_meta_ofs = metastore.store(metadata.Metadata()) 143 while ename[:len(level.ename)] != level.ename: 144 n = BlankNewEntry(level.ename[-1], default_meta_ofs, tmax) 145 n.flags |= IX_EXISTS 146 (n.children_ofs,n.children_n) = level.write(f) 147 level.parent.list.append(n) 148 level = level.parent 149 150 # create nodes down the tree 151 while len(level.ename) < len(ename): 152 level = Level(ename[:len(level.ename)+1], level) 153 154 # are we in precisely the right place? 155 assert(ename == level.ename) 156 n = newentry or \ 157 BlankNewEntry(ename and level.ename[-1] or None, default_meta_ofs, tmax) 158 (n.children_ofs,n.children_n) = level.write(f) 159 if level.parent: 160 level.parent.list.append(n) 161 level = level.parent 162 163 return level 164 165 166class Entry: 167 def __init__(self, basename, name, meta_ofs, tmax): 168 assert basename is None or type(basename) == bytes 169 assert name is None or type(name) == bytes 170 self.basename = basename 171 self.name = name 172 self.meta_ofs = meta_ofs 173 self.tmax = tmax 174 self.children_ofs = 0 175 self.children_n = 0 176 177 def __repr__(self): 178 return ("(%r,0x%04x,%d,%d,%d,%d,%d,%d,%s/%s,0x%04x,%d,0x%08x/%d)" 179 % (self.name, self.dev, self.ino, self.nlink, 180 self.ctime, self.mtime, self.atime, 181 self.size, self.mode, self.gitmode, 182 self.flags, self.meta_ofs, 183 self.children_ofs, self.children_n)) 184 185 def packed(self): 186 try: 187 ctime = xstat.nsecs_to_timespec(self.ctime) 188 mtime = xstat.nsecs_to_timespec(self.mtime) 189 atime = xstat.nsecs_to_timespec(self.atime) 190 return struct.pack(INDEX_SIG, 191 self.dev, self.ino, self.nlink, 192 ctime[0], ctime[1], 193 mtime[0], mtime[1], 194 atime[0], atime[1], 195 self.size, self.mode, 196 self.gitmode, self.sha, self.flags, 197 self.children_ofs, self.children_n, 198 self.meta_ofs) 199 except (DeprecationWarning, struct.error) as e: 200 log('pack error: %s (%r)\n' % (e, self)) 201 raise 202 203 def stale(self, st, check_device=True): 204 if self.size != st.st_size: 205 return True 206 if self.mtime != st.st_mtime: 207 return True 208 if self.sha == EMPTY_SHA: 209 return True 210 if not self.gitmode: 211 return True 212 if self.ctime != st.st_ctime: 213 return True 214 if self.ino != st.st_ino: 215 return True 216 if self.nlink != st.st_nlink: 217 return True 218 if not (self.flags & IX_EXISTS): 219 return True 220 if check_device and (self.dev != st.st_dev): 221 return True 222 return False 223 224 def update_from_stat(self, st, meta_ofs): 225 # Should only be called when the entry is stale(), and 226 # invalidate() should almost certainly be called afterward. 227 self.dev = st.st_dev 228 self.ino = st.st_ino 229 self.nlink = st.st_nlink 230 self.ctime = st.st_ctime 231 self.mtime = st.st_mtime 232 self.atime = st.st_atime 233 self.size = st.st_size 234 self.mode = st.st_mode 235 self.flags |= IX_EXISTS 236 self.meta_ofs = meta_ofs 237 self._fixup() 238 239 def _fixup(self): 240 self.mtime = self._fixup_time(self.mtime) 241 self.ctime = self._fixup_time(self.ctime) 242 243 def _fixup_time(self, t): 244 if self.tmax != None and t > self.tmax: 245 return self.tmax 246 else: 247 return t 248 249 def is_valid(self): 250 f = IX_HASHVALID|IX_EXISTS 251 return (self.flags & f) == f 252 253 def invalidate(self): 254 self.flags &= ~IX_HASHVALID 255 256 def validate(self, gitmode, sha): 257 assert(sha) 258 assert(gitmode) 259 assert(gitmode+0 == gitmode) 260 self.gitmode = gitmode 261 self.sha = sha 262 self.flags |= IX_HASHVALID|IX_EXISTS 263 264 def exists(self): 265 return not self.is_deleted() 266 267 def sha_missing(self): 268 return (self.flags & IX_SHAMISSING) or not (self.flags & IX_HASHVALID) 269 270 def is_deleted(self): 271 return (self.flags & IX_EXISTS) == 0 272 273 def set_deleted(self): 274 if self.flags & IX_EXISTS: 275 self.flags &= ~(IX_EXISTS | IX_HASHVALID) 276 277 def is_real(self): 278 return not self.is_fake() 279 280 def is_fake(self): 281 return not self.ctime 282 283 def _cmp(self, other): 284 # Note reversed name ordering 285 bc = bytescmp(other.name, self.name) 286 if bc != 0: 287 return bc 288 vc = self.is_valid() - other.is_valid() 289 if vc != 0: 290 return vc 291 fc = self.is_fake() - other.is_fake() 292 if fc != 0: 293 return fc 294 return 0 295 296 def __eq__(self, other): 297 return self._cmp(other) == 0 298 299 def __ne__(self, other): 300 return self._cmp(other) != 0 301 302 def __lt__(self, other): 303 return self._cmp(other) < 0 304 305 def __gt__(self, other): 306 return self._cmp(other) > 0 307 308 def __le__(self, other): 309 return self._cmp(other) <= 0 310 311 def __ge__(self, other): 312 return self._cmp(other) >= 0 313 314 def write(self, f): 315 f.write(self.basename + b'\0' + self.packed()) 316 317 318class NewEntry(Entry): 319 def __init__(self, basename, name, tmax, dev, ino, nlink, 320 ctime, mtime, atime, 321 size, mode, gitmode, sha, flags, meta_ofs, 322 children_ofs, children_n): 323 Entry.__init__(self, basename, name, meta_ofs, tmax) 324 (self.dev, self.ino, self.nlink, self.ctime, self.mtime, self.atime, 325 self.size, self.mode, self.gitmode, self.sha, 326 self.flags, self.children_ofs, self.children_n 327 ) = (dev, ino, nlink, ctime, mtime, atime, 328 size, mode, gitmode, sha, flags, children_ofs, children_n) 329 self._fixup() 330 331 332class BlankNewEntry(NewEntry): 333 def __init__(self, basename, meta_ofs, tmax): 334 NewEntry.__init__(self, basename, basename, tmax, 335 0, 0, 0, 0, 0, 0, 0, 0, 336 0, EMPTY_SHA, 0, meta_ofs, 0, 0) 337 338 339class ExistingEntry(Entry): 340 def __init__(self, parent, basename, name, m, ofs): 341 Entry.__init__(self, basename, name, None, None) 342 self.parent = parent 343 self._m = m 344 self._ofs = ofs 345 (self.dev, self.ino, self.nlink, 346 self.ctime, ctime_ns, self.mtime, mtime_ns, self.atime, atime_ns, 347 self.size, self.mode, self.gitmode, self.sha, 348 self.flags, self.children_ofs, self.children_n, self.meta_ofs 349 ) = struct.unpack(INDEX_SIG, m[ofs : ofs + ENTLEN]) 350 self.atime = xstat.timespec_to_nsecs((self.atime, atime_ns)) 351 self.mtime = xstat.timespec_to_nsecs((self.mtime, mtime_ns)) 352 self.ctime = xstat.timespec_to_nsecs((self.ctime, ctime_ns)) 353 354 # effectively, we don't bother messing with IX_SHAMISSING if 355 # not IX_HASHVALID, since it's redundant, and repacking is more 356 # expensive than not repacking. 357 # This is implemented by having sha_missing() check IX_HASHVALID too. 358 def set_sha_missing(self, val): 359 val = val and 1 or 0 360 oldval = self.sha_missing() and 1 or 0 361 if val != oldval: 362 flag = val and IX_SHAMISSING or 0 363 newflags = (self.flags & (~IX_SHAMISSING)) | flag 364 self.flags = newflags 365 self.repack() 366 367 def unset_sha_missing(self, flag): 368 if self.flags & IX_SHAMISSING: 369 self.flags &= ~IX_SHAMISSING 370 self.repack() 371 372 def repack(self): 373 self._m[self._ofs:self._ofs+ENTLEN] = self.packed() 374 if self.parent and not self.is_valid(): 375 self.parent.invalidate() 376 self.parent.repack() 377 378 def iter(self, name=None, wantrecurse=None): 379 dname = name 380 if dname and not dname.endswith(b'/'): 381 dname += b'/' 382 ofs = self.children_ofs 383 assert(ofs <= len(self._m)) 384 assert(self.children_n <= UINT_MAX) # i.e. python struct 'I' 385 for i in range(self.children_n): 386 eon = self._m.find(b'\0', ofs) 387 assert(eon >= 0) 388 assert(eon >= ofs) 389 assert(eon > ofs) 390 basename = self._m[ofs : ofs + (eon - ofs)] 391 child = ExistingEntry(self, basename, self.name + basename, 392 self._m, eon+1) 393 if (not dname 394 or child.name.startswith(dname) 395 or child.name.endswith(b'/') and dname.startswith(child.name)): 396 if not wantrecurse or wantrecurse(child): 397 for e in child.iter(name=name, wantrecurse=wantrecurse): 398 yield e 399 if not name or child.name == name or child.name.startswith(dname): 400 yield child 401 ofs = eon + 1 + ENTLEN 402 403 def __iter__(self): 404 return self.iter() 405 406 407class Reader: 408 def __init__(self, filename): 409 self.filename = filename 410 self.m = b'' 411 self.writable = False 412 self.count = 0 413 f = None 414 try: 415 f = open(filename, 'rb+') 416 except IOError as e: 417 if e.errno == errno.ENOENT: 418 pass 419 else: 420 raise 421 if f: 422 b = f.read(len(INDEX_HDR)) 423 if b != INDEX_HDR: 424 log('warning: %s: header: expected %r, got %r\n' 425 % (filename, INDEX_HDR, b)) 426 else: 427 st = os.fstat(f.fileno()) 428 if st.st_size: 429 self.m = mmap_readwrite(f) 430 self.writable = True 431 self.count = struct.unpack(FOOTER_SIG, 432 self.m[st.st_size - FOOTLEN 433 : st.st_size])[0] 434 435 def __del__(self): 436 self.close() 437 438 def __len__(self): 439 return int(self.count) 440 441 def forward_iter(self): 442 ofs = len(INDEX_HDR) 443 while ofs+ENTLEN <= len(self.m)-FOOTLEN: 444 eon = self.m.find(b'\0', ofs) 445 assert(eon >= 0) 446 assert(eon >= ofs) 447 assert(eon > ofs) 448 basename = self.m[ofs : ofs + (eon - ofs)] 449 yield ExistingEntry(None, basename, basename, self.m, eon+1) 450 ofs = eon + 1 + ENTLEN 451 452 def iter(self, name=None, wantrecurse=None): 453 if len(self.m) > len(INDEX_HDR)+ENTLEN: 454 dname = name 455 if dname and not dname.endswith(b'/'): 456 dname += b'/' 457 root = ExistingEntry(None, b'/', b'/', 458 self.m, len(self.m)-FOOTLEN-ENTLEN) 459 for sub in root.iter(name=name, wantrecurse=wantrecurse): 460 yield sub 461 if not dname or dname == root.name: 462 yield root 463 464 def __iter__(self): 465 return self.iter() 466 467 def find(self, name): 468 return next((e for e in self.iter(name, wantrecurse=lambda x : True) 469 if e.name == name), 470 None) 471 472 def exists(self): 473 return self.m 474 475 def save(self): 476 if self.writable and self.m: 477 self.m.flush() 478 479 def close(self): 480 self.save() 481 if self.writable and self.m: 482 self.m.close() 483 self.m = None 484 self.writable = False 485 486 def filter(self, prefixes, wantrecurse=None): 487 for (rp, path) in reduce_paths(prefixes): 488 any_entries = False 489 for e in self.iter(rp, wantrecurse=wantrecurse): 490 any_entries = True 491 assert(e.name.startswith(rp)) 492 name = path + e.name[len(rp):] 493 yield (name, e) 494 if not any_entries: 495 # Always return at least the top for each prefix. 496 # Otherwise something like "save x/y" will produce 497 # nothing if x is up to date. 498 pe = self.find(rp) 499 assert(pe) 500 name = path + pe.name[len(rp):] 501 yield (name, pe) 502 503# FIXME: this function isn't very generic, because it splits the filename 504# in an odd way and depends on a terminating '/' to indicate directories. 505def pathsplit(p): 506 """Split a path into a list of elements of the file system hierarchy.""" 507 l = p.split(b'/') 508 l = [i + b'/' for i in l[:-1]] + l[-1:] 509 if l[-1] == b'': 510 l.pop() # extra blank caused by terminating '/' 511 return l 512 513 514class Writer: 515 def __init__(self, filename, metastore, tmax): 516 self.rootlevel = self.level = Level([], None) 517 self.f = None 518 self.count = 0 519 self.lastfile = None 520 self.filename = None 521 self.filename = filename = resolve_parent(filename) 522 self.metastore = metastore 523 self.tmax = tmax 524 (dir,name) = os.path.split(filename) 525 ffd, self.tmpname = tempfile.mkstemp(b'.tmp', filename, dir) 526 self.f = os.fdopen(ffd, 'wb', 65536) 527 self.f.write(INDEX_HDR) 528 529 def __del__(self): 530 self.abort() 531 532 def abort(self): 533 f = self.f 534 self.f = None 535 if f: 536 f.close() 537 os.unlink(self.tmpname) 538 539 def flush(self): 540 if self.level: 541 self.level = _golevel(self.level, self.f, [], None, 542 self.metastore, self.tmax) 543 self.count = self.rootlevel.count 544 if self.count: 545 self.count += 1 546 self.f.write(struct.pack(FOOTER_SIG, self.count)) 547 self.f.flush() 548 assert(self.level == None) 549 550 def close(self): 551 self.flush() 552 f = self.f 553 self.f = None 554 if f: 555 f.close() 556 os.rename(self.tmpname, self.filename) 557 558 def _add(self, ename, entry): 559 if self.lastfile and self.lastfile <= ename: 560 raise Error('%r must come before %r' 561 % (''.join(ename), ''.join(self.lastfile))) 562 self.lastfile = ename 563 self.level = _golevel(self.level, self.f, ename, entry, 564 self.metastore, self.tmax) 565 566 def add(self, name, st, meta_ofs, hashgen = None): 567 endswith = name.endswith(b'/') 568 ename = pathsplit(name) 569 basename = ename[-1] 570 #log('add: %r %r\n' % (basename, name)) 571 flags = IX_EXISTS 572 sha = None 573 if hashgen: 574 (gitmode, sha) = hashgen(name) 575 flags |= IX_HASHVALID 576 else: 577 (gitmode, sha) = (0, EMPTY_SHA) 578 if st: 579 isdir = stat.S_ISDIR(st.st_mode) 580 assert(isdir == endswith) 581 e = NewEntry(basename, name, self.tmax, 582 st.st_dev, st.st_ino, st.st_nlink, 583 st.st_ctime, st.st_mtime, st.st_atime, 584 st.st_size, st.st_mode, gitmode, sha, flags, 585 meta_ofs, 0, 0) 586 else: 587 assert(endswith) 588 meta_ofs = self.metastore.store(metadata.Metadata()) 589 e = BlankNewEntry(basename, meta_ofs, self.tmax) 590 e.gitmode = gitmode 591 e.sha = sha 592 e.flags = flags 593 self._add(ename, e) 594 595 def add_ixentry(self, e): 596 e.children_ofs = e.children_n = 0 597 self._add(pathsplit(e.name), e) 598 599 def new_reader(self): 600 self.flush() 601 return Reader(self.tmpname) 602 603 604def _slashappend_or_add_error(p, caller): 605 """Return p, after ensuring it has a single trailing slash if it names 606 a directory, unless there's an OSError, in which case, call 607 add_error() and return None.""" 608 try: 609 st = os.lstat(p) 610 except OSError as e: 611 add_error('%s: %s' % (caller, e)) 612 return None 613 else: 614 if stat.S_ISDIR(st.st_mode): 615 return slashappend(p) 616 return p 617 618 619def unique_resolved_paths(paths): 620 "Return a collection of unique resolved paths." 621 rps = (_slashappend_or_add_error(resolve_parent(p), 'unique_resolved_paths') 622 for p in paths) 623 return frozenset((x for x in rps if x is not None)) 624 625 626def reduce_paths(paths): 627 xpaths = [] 628 for p in paths: 629 rp = _slashappend_or_add_error(resolve_parent(p), 'reduce_paths') 630 if rp: 631 xpaths.append((rp, slashappend(p) if rp.endswith(b'/') else p)) 632 xpaths.sort() 633 634 paths = [] 635 prev = None 636 for (rp, p) in xpaths: 637 if prev and (prev == rp 638 or (prev.endswith(b'/') and rp.startswith(prev))): 639 continue # already superceded by previous path 640 paths.append((rp, p)) 641 prev = rp 642 paths.sort(reverse=True) 643 return paths 644 645 646def merge(*iters): 647 def pfunc(count, total): 648 qprogress('bup: merging indexes (%d/%d)\r' % (count, total)) 649 def pfinal(count, total): 650 progress('bup: merging indexes (%d/%d), done.\n' % (count, total)) 651 return merge_iter(iters, 1024, pfunc, pfinal, key='name') 652