1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2012-2021 Edgewall Software 4# Copyright (C) 2006-2011, Herbert Valerio Riedel <hvr@gnu.org> 5# All rights reserved. 6# 7# This software is licensed as described in the file COPYING, which 8# you should have received as part of this distribution. The terms 9# are also available at https://trac.edgewall.org/wiki/TracLicense. 10# 11# This software consists of voluntary contributions made by many 12# individuals. For the exact contribution history, see the revision 13# history and logs, available at https://trac.edgewall.org/log/. 14 15import codecs 16import contextlib 17import io 18import os 19import re 20import subprocess 21import tempfile 22import weakref 23from collections import deque 24from functools import partial 25from subprocess import DEVNULL, PIPE 26from threading import Lock 27 28from trac.core import TracBaseError 29from trac.util import terminate 30from trac.util.compat import close_fds 31from trac.util.datefmt import time_now 32from trac.util.text import exception_to_unicode, to_unicode 33 34__all__ = ['GitError', 'GitErrorSha', 'Storage', 'StorageFactory'] 35 36 37class GitError(TracBaseError): 38 pass 39 40class GitErrorSha(GitError): 41 pass 42 43# Helper functions 44 45def parse_commit(raw): 46 """Parse the raw content of a commit (as given by `git cat-file -p <rev>`). 47 48 Return the commit message and a dict of properties. 49 """ 50 if not raw: 51 raise GitErrorSha 52 lines = raw.splitlines() 53 if not lines: 54 raise GitErrorSha 55 line = lines.pop(0) 56 props = {} 57 multiline = multiline_key = None 58 while line: 59 if line[0] == ' ': 60 if not multiline: 61 multiline_key = key 62 multiline = [props[multiline_key][-1]] 63 multiline.append(line[1:]) 64 else: 65 key, value = line.split(None, 1) 66 props.setdefault(key, []).append(value.strip()) 67 line = lines.pop(0) 68 if multiline and (not line or key != multiline_key): 69 props[multiline_key][-1] = '\n'.join(multiline) 70 multiline = None 71 return '\n'.join(lines), props 72 73 74_unquote_re = re.compile(r'\\(?:[abtnvfr"\\]|[0-7]{3})'.encode('utf-8')) 75_unquote_chars = bytearray(128) 76for _key, _val in zip(b'abtnvfr"\\', b'\a\b\t\n\v\f\r"\\'): 77 _unquote_chars[_key] = _val 78del _key, _val 79_unquote_chars = bytes(_unquote_chars) 80 81 82def _unquote(path): 83 if path.startswith(b'"') and path.endswith(b'"'): 84 def replace(match): 85 match = match.group(0) 86 if len(match) == 4: 87 code = int(match[1:], 8) # \ooo 88 else: 89 code = _unquote_chars[match[1]] 90 return b'%c' % code 91 path = _unquote_re.sub(replace, path[1:-1]) 92 return path 93 94 95def _rev_u(rev): 96 if rev is not None: 97 rev = str(rev, 'ascii') 98 return rev 99 100 101def _rev_b(rev): 102 if rev is not None: 103 rev = rev.encode('ascii') 104 return rev 105 106 107class GitCore(object): 108 """Low-level wrapper around git executable""" 109 110 def __init__(self, git_dir=None, git_bin='git', log=None, 111 fs_encoding=None): 112 self.__git_bin = git_bin 113 self.__git_dir = git_dir 114 self.__log = log 115 self.__fs_encoding = fs_encoding 116 117 def __repr__(self): 118 return '<GitCore bin="%s" dir="%s">' % (self.__git_bin, 119 self.__git_dir) 120 121 def __build_git_cmd(self, gitcmd, *args): 122 """construct command tuple for git call suitable for Popen()""" 123 124 cmd = [self.__git_bin] 125 if self.__git_dir: 126 cmd.append('--git-dir=%s' % self.__git_dir) 127 cmd.append(gitcmd) 128 cmd.extend(args) 129 130 fs_encoding = self.__fs_encoding 131 if fs_encoding is not None: 132 if os.name == 'nt': 133 # If Python 3 for Windows, Popen() accepts only str instances 134 def to_cmd_encoding(arg): 135 if isinstance(arg, bytes): 136 arg = arg.decode(fs_encoding, 'replace') 137 return arg 138 else: 139 def to_cmd_encoding(arg): 140 if isinstance(arg, str): 141 arg = arg.encode(fs_encoding, 'replace') 142 return arg 143 cmd = list(map(to_cmd_encoding, cmd)) 144 return cmd 145 146 def __pipe(self, git_cmd, *cmd_args, **kw): 147 kw.setdefault('stdin', PIPE) 148 kw.setdefault('stdout', PIPE) 149 kw.setdefault('stderr', PIPE) 150 return subprocess.Popen(self.__build_git_cmd(git_cmd, *cmd_args), 151 close_fds=close_fds, **kw) 152 153 def __execute(self, *args): 154 """execute git command and return file-like object of stdout""" 155 156 #print("DEBUG:", args, file=sys.stderr) 157 158 with self.__pipe(*args, stdin=DEVNULL) as p: 159 stdout_data, stderr_data = p.communicate() 160 if self.__log and (p.returncode != 0 or stderr_data): 161 self.__log.debug('%s exits with %d, dir: %r, args: %r, stderr: %r', 162 self.__git_bin, p.returncode, self.__git_dir, 163 args, stderr_data) 164 165 return stdout_data 166 167 def cat_file_batch(self): 168 return self.__pipe('cat-file', '--batch') 169 170 def log_pipe(self, *cmd_args): 171 return self.__pipe('log', *cmd_args) 172 173 def diff_tree_pipe(self): 174 return self.__pipe('diff-tree', '--stdin', '--root', '-z', '-r', '-M') 175 176 def __getattr__(self, name): 177 if name.startswith('_') or \ 178 name in ('cat_file_batch', 'log_pipe', 'diff_tree_pipe'): 179 raise AttributeError(name) 180 return partial(self.__execute, name.replace('_','-')) 181 182 __is_sha_pat = re.compile(b'[0-9A-Fa-f]{4,40}$') 183 184 @classmethod 185 def is_sha(cls, sha): 186 """returns whether sha is a potential sha id 187 (i.e. proper hexstring between 4 and 40 characters) 188 """ 189 190 # quick test before starting up regexp matcher 191 if not (4 <= len(sha) <= 40): 192 return False 193 194 return bool(cls.__is_sha_pat.match(sha)) 195 196 197class SizedDict(dict): 198 """Size-bounded dictionary with FIFO replacement strategy""" 199 200 def __init__(self, max_size=0): 201 dict.__init__(self) 202 self.__max_size = max_size 203 self.__key_fifo = deque() 204 self.__lock = Lock() 205 206 def __setitem__(self, name, value): 207 with self.__lock: 208 assert len(self) == len(self.__key_fifo) # invariant 209 210 if not self.__contains__(name): 211 self.__key_fifo.append(name) 212 213 rc = dict.__setitem__(self, name, value) 214 215 while len(self.__key_fifo) > self.__max_size: 216 self.__delitem__(self.__key_fifo.popleft()) 217 218 assert len(self) == len(self.__key_fifo) # invariant 219 220 return rc 221 222 def setdefault(self, *_): 223 raise NotImplementedError("SizedDict has no setdefault() method") 224 225 226class StorageFactory(object): 227 __dict = weakref.WeakValueDictionary() 228 __dict_nonweak = {} 229 __dict_rev_cache = {} 230 __dict_lock = Lock() 231 232 def __init__(self, repo, log, weak=True, git_bin='git', 233 git_fs_encoding=None): 234 self.logger = log 235 236 with self.__dict_lock: 237 if weak: 238 # remove additional reference which is created 239 # with non-weak argument 240 try: 241 del self.__dict_nonweak[repo] 242 except KeyError: 243 pass 244 try: 245 i = self.__dict[repo] 246 except KeyError: 247 rev_cache = self.__dict_rev_cache.get(repo) 248 i = Storage(repo, log, git_bin, git_fs_encoding, rev_cache) 249 self.__dict[repo] = i 250 251 # create additional reference depending on 'weak' argument 252 if not weak: 253 self.__dict_nonweak[repo] = i 254 255 self.__inst = i 256 self.logger.debug("requested %s PyGIT.Storage instance for '%s'", 257 'weak' if weak else 'non-weak', repo) 258 259 def getInstance(self): 260 return self.__inst 261 262 @classmethod 263 def set_rev_cache(cls, repo, rev_cache): 264 with cls.__dict_lock: 265 cls.__dict_rev_cache[repo] = rev_cache 266 267 @classmethod 268 def _clean(cls): 269 """For testing purpose only""" 270 with cls.__dict_lock: 271 cls.__dict.clear() 272 cls.__dict_nonweak.clear() 273 cls.__dict_rev_cache.clear() 274 275 276class Storage(object): 277 """High-level wrapper around GitCore with in-memory caching""" 278 279 __SREV_MIN = 4 # minimum short-rev length 280 281 class RevCache(object): 282 283 __slots__ = ('youngest_rev', 'oldest_rev', 'rev_dict', 'refs_dict', 284 'srev_dict') 285 286 def __init__(self, youngest_rev, oldest_rev, rev_dict, refs_dict, 287 srev_dict): 288 self.youngest_rev = youngest_rev 289 self.oldest_rev = oldest_rev 290 self.rev_dict = rev_dict 291 self.refs_dict = refs_dict 292 self.srev_dict = srev_dict 293 if youngest_rev is not None and oldest_rev is not None and \ 294 rev_dict and refs_dict and srev_dict: 295 pass # all fields are not empty 296 elif not youngest_rev and not oldest_rev and \ 297 not rev_dict and not refs_dict and not srev_dict: 298 pass # all fields are empty 299 else: 300 raise ValueError('Invalid RevCache fields: %r' % self) 301 302 @classmethod 303 def empty(cls): 304 return cls(None, None, {}, {}, {}) 305 306 def __repr__(self): 307 return 'RevCache(youngest_rev=%r, oldest_rev=%r, ' \ 308 'rev_dict=%d entries, refs_dict=%d entries, ' \ 309 'srev_dict=%d entries)' % \ 310 (self.youngest_rev, self.oldest_rev, len(self.rev_dict), 311 len(self.refs_dict), len(self.srev_dict)) 312 313 def iter_branches(self): 314 head = self.refs_dict.get(b'HEAD') 315 for refname, rev in self.refs_dict.items(): 316 if refname.startswith(b'refs/heads/'): 317 yield refname[11:], rev, refname == head 318 319 def iter_tags(self): 320 for refname, rev in self.refs_dict.items(): 321 if refname.startswith(b'refs/tags/'): 322 yield refname[10:], rev 323 324 @staticmethod 325 def __rev_key(rev): 326 assert len(rev) >= 4 327 #assert GitCore.is_sha(rev) 328 srev_key = int(rev[:4], 16) 329 assert 0 <= srev_key <= 0xffff 330 return srev_key 331 332 @staticmethod 333 def git_version(git_bin='git'): 334 GIT_VERSION_MIN_REQUIRED = (1, 5, 6) 335 try: 336 g = GitCore(git_bin=git_bin) 337 [v] = g.version().splitlines() 338 version = v.strip().split()[2] 339 # 'version' has usually at least 3 numeric version 340 # components, e.g.:: 341 # 1.5.4.2 342 # 1.5.4.3.230.g2db511 343 # 1.5.4.GIT 344 345 def try_int(s): 346 try: 347 return int(s) 348 except ValueError: 349 return s 350 351 split_version = tuple(map(try_int, version.split(b'.'))) 352 353 result = {} 354 result['v_str'] = version 355 result['v_tuple'] = split_version 356 result['v_min_tuple'] = GIT_VERSION_MIN_REQUIRED 357 result['v_min_str'] = ".".join(map(str, GIT_VERSION_MIN_REQUIRED)) 358 result['v_compatible'] = split_version >= GIT_VERSION_MIN_REQUIRED 359 return result 360 361 except Exception as e: 362 raise GitError("Could not retrieve GIT version (tried to " 363 "execute/parse '%s --version' but got %s)" 364 % (git_bin, repr(e))) 365 366 def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None, 367 rev_cache=None): 368 """Initialize PyGit.Storage instance 369 370 `git_dir`: path to .git folder; 371 this setting is not affected by the `git_fs_encoding` setting 372 373 `log`: logger instance 374 375 `git_bin`: path to executable 376 this setting is not affected by the `git_fs_encoding` setting 377 378 `git_fs_encoding`: encoding used for paths stored in git repository; 379 if `None`, no implicit decoding/encoding to/from 380 unicode objects is performed, and bytestrings are 381 returned instead 382 """ 383 384 self.logger = log 385 386 self.commit_encoding = None 387 388 # caches 389 self.__rev_cache = rev_cache or self.RevCache.empty() 390 self.__rev_cache_refresh = True 391 self.__rev_cache_lock = Lock() 392 393 # cache the last 200 commit messages 394 self.__commit_msg_cache = SizedDict(200) 395 self.__commit_msg_lock = Lock() 396 397 self.__cat_file_pipe = None 398 self.__cat_file_pipe_lock = Lock() 399 self.__diff_tree_pipe = None 400 self.__diff_tree_pipe_lock = Lock() 401 402 if git_fs_encoding is not None: 403 # validate encoding name 404 codecs.lookup(git_fs_encoding) 405 406 # setup conversion functions 407 self._fs_to_unicode = lambda s: s.decode(git_fs_encoding, 408 'replace') 409 self._fs_from_unicode = lambda s: s.encode(git_fs_encoding) 410 else: 411 # pass bytestrings as-is w/o any conversion 412 self._fs_to_unicode = self._fs_from_unicode = lambda s: s 413 414 # simple sanity checking 415 try: 416 os.listdir(git_dir) 417 except EnvironmentError as e: 418 self._raise_not_readable(git_dir, e) 419 if not self._control_files_exist(git_dir): 420 dot_git_dir = os.path.join(git_dir, '.git') 421 try: 422 os.listdir(dot_git_dir) 423 except EnvironmentError: 424 missing = True 425 else: 426 if self._control_files_exist(dot_git_dir): 427 missing = False 428 git_dir = dot_git_dir 429 else: 430 missing = True 431 if missing: 432 raise GitError("Git control files not found in '%s'" % git_dir) 433 434 # at least, check that the HEAD file is readable 435 try: 436 with open(os.path.join(git_dir, 'HEAD'), 'rb'): 437 pass 438 except EnvironmentError as e: 439 self._raise_not_readable(git_dir, e) 440 441 self.repo = GitCore(git_dir, git_bin, log, git_fs_encoding) 442 self.repo_path = git_dir 443 444 self.logger.debug("PyGIT.Storage instance for '%s' is constructed", 445 git_dir) 446 447 def _cleanup_proc(self, proc): 448 if proc: 449 for f in (proc.stdin, proc.stdout, proc.stderr): 450 if f: 451 f.close() 452 terminate(proc) 453 proc.wait() 454 455 def __del__(self): 456 with self.__cat_file_pipe_lock: 457 self._cleanup_proc(self.__cat_file_pipe) 458 with self.__diff_tree_pipe_lock: 459 self._cleanup_proc(self.__diff_tree_pipe) 460 461 # 462 # cache handling 463 # 464 465 def invalidate_rev_cache(self): 466 with self.__rev_cache_lock: 467 self.__rev_cache_refresh = True 468 469 @property 470 def rev_cache(self): 471 """Retrieve revision cache 472 473 may rebuild cache on the fly if required 474 475 returns RevCache tuple 476 """ 477 with self.__rev_cache_lock: 478 self._refresh_rev_cache() 479 return self.__rev_cache 480 481 def _refresh_rev_cache(self, force=False): 482 refreshed = False 483 if force or self.__rev_cache_refresh: 484 self.__rev_cache_refresh = False 485 refs = self._get_refs() 486 if self.__rev_cache.refs_dict != refs: 487 self.logger.debug("Detected changes in git repository " 488 "'%s'", self.repo_path) 489 rev_cache = self._build_rev_cache(refs) 490 self.__rev_cache = rev_cache 491 StorageFactory.set_rev_cache(self.repo_path, rev_cache) 492 refreshed = True 493 else: 494 self.logger.debug("Detected no changes in git repository " 495 "'%s'", self.repo_path) 496 return refreshed 497 498 def _build_rev_cache(self, refs): 499 self.logger.debug("triggered rebuild of commit tree db for '%s'", 500 self.repo_path) 501 ts0 = time_now() 502 503 new_db = {} # db 504 new_sdb = {} # short_rev db 505 506 # helper for reusing strings 507 revs_seen = {} 508 def _rev_reuse(rev): 509 return revs_seen.setdefault(rev, rev) 510 511 refs = {refname: _rev_reuse(rev) for refname, rev in refs.items()} 512 head_revs = {rev for refname, rev in refs.items() 513 if refname.startswith(b'refs/heads/')} 514 rev_list = [list(map(_rev_reuse, line.split())) 515 for line in self.repo.rev_list('--parents', '--topo-order', 516 '--all').splitlines()] 517 revs_seen = None 518 519 if rev_list: 520 # first rev seen is assumed to be the youngest one 521 youngest = rev_list[0][0] 522 # last rev seen is assumed to be the oldest one 523 oldest = rev_list[-1][0] 524 else: 525 youngest = oldest = None 526 527 rheads_seen = {} 528 def _rheads_reuse(rheads): 529 rheads = frozenset(rheads) 530 return rheads_seen.setdefault(rheads, rheads) 531 532 __rev_key = self.__rev_key 533 for ord_rev, revs in enumerate(rev_list): 534 rev = revs[0] 535 parents = revs[1:] 536 537 # shortrev "hash" map 538 new_sdb.setdefault(__rev_key(rev), []).append(rev) 539 540 # new_db[rev] = (children(rev), parents(rev), 541 # ordinal_id(rev), rheads(rev)) 542 if rev in new_db: 543 # (incomplete) entry was already created by children 544 _children, _parents, _ord_rev, _rheads = new_db[rev] 545 assert _children 546 assert not _parents 547 assert _ord_rev == 0 548 else: # new entry 549 _children = set() 550 _rheads = set() 551 if rev in head_revs: 552 _rheads.add(rev) 553 554 # create/update entry 555 # transform into frozenset and tuple since entry will be final 556 new_db[rev] = (frozenset(_children), tuple(parents), ord_rev + 1, 557 _rheads_reuse(_rheads)) 558 559 # update parents(rev)s 560 for parent in parents: 561 # by default, a dummy ordinal_id is used for the mean-time 562 _children, _parents, _ord_rev, _rheads2 = \ 563 new_db.setdefault(parent, (set(), [], 0, set())) 564 565 # update parent(rev)'s children 566 _children.add(rev) 567 568 # update parent(rev)'s rheads 569 _rheads2.update(_rheads) 570 571 rheads_seen = None 572 573 # convert sdb either to dict or array depending on size 574 tmp = [()] * (max(new_sdb) + 1) if len(new_sdb) > 5000 else {} 575 try: 576 while True: 577 k, v = new_sdb.popitem() 578 tmp[k] = tuple(v) 579 except KeyError: 580 pass 581 assert len(new_sdb) == 0 582 new_sdb = tmp 583 584 rev_cache = self.RevCache(youngest, oldest, new_db, refs, new_sdb) 585 self.logger.debug("rebuilt commit tree db for '%s' with %d entries " 586 "(took %.1f ms)", self.repo_path, len(new_db), 587 1000 * (time_now() - ts0)) 588 return rev_cache 589 590 def _get_refs(self): 591 refs = {} 592 tags = {} 593 594 for line in self.repo.show_ref('--dereference').splitlines(): 595 if b' ' not in line: 596 continue 597 rev, refname = line.split(b' ', 1) 598 if refname.endswith(b'^{}'): # derefered tag 599 tags[refname[:-3]] = rev 600 else: 601 refs[refname] = rev 602 refs.update(iter(tags.items())) 603 604 if refs: 605 refname = (self.repo.symbolic_ref('-q', 'HEAD') or '').strip() 606 if refname in refs: 607 refs[b'HEAD'] = refname 608 609 return refs 610 611 def get_branches(self): 612 """returns list of (local) branches, with active (= HEAD) one being 613 the first item 614 """ 615 def fn(args): 616 name, rev, head = args 617 return not head, name 618 _fs_to_unicode = self._fs_to_unicode 619 branches = sorted(((_fs_to_unicode(name), _rev_u(rev), head) 620 for name, rev, head 621 in self.rev_cache.iter_branches()), key=fn) 622 return [(name, rev) for name, rev, head in branches] 623 624 def get_refs(self): 625 _fs_to_unicode = self._fs_to_unicode 626 for refname, rev in self.rev_cache.refs_dict.items(): 627 if refname != b'HEAD': 628 yield _fs_to_unicode(refname), _rev_u(rev) 629 630 def get_commits(self): 631 return self.rev_cache.rev_dict 632 633 def oldest_rev(self): 634 return _rev_u(self.rev_cache.oldest_rev) 635 636 def youngest_rev(self): 637 return _rev_u(self.rev_cache.youngest_rev) 638 639 def get_branch_contains(self, sha, resolve=False): 640 """return list of reachable head sha ids or (names, sha) pairs if 641 resolve is true 642 643 see also get_branches() 644 """ 645 646 sha = _rev_b(sha) 647 _rev_cache = self.rev_cache 648 649 try: 650 rheads = _rev_cache.rev_dict[sha][3] 651 except KeyError: 652 return [] 653 654 if resolve: 655 _fs_to_unicode = self._fs_to_unicode 656 rv = [(_fs_to_unicode(name), _rev_u(rev)) 657 for name, rev, head in _rev_cache.iter_branches() 658 if rev in rheads] 659 rv.sort(key=lambda v: v[0]) 660 return rv 661 else: 662 return list(map(_rev_u, rheads)) 663 664 def history_relative_rev(self, sha, rel_pos): 665 666 def get_history_relative_rev(sha, rel_pos): 667 rev_dict = self.get_commits() 668 669 if sha not in rev_dict: 670 raise GitErrorSha() 671 672 if rel_pos == 0: 673 return sha 674 675 lin_rev = rev_dict[sha][2] + rel_pos 676 677 if lin_rev < 1 or lin_rev > len(rev_dict): 678 return None 679 680 for k, v in rev_dict.items(): 681 if v[2] == lin_rev: 682 return k 683 684 # should never be reached if rev_dict is consistent 685 raise GitError("internal inconsistency detected") 686 687 result = get_history_relative_rev(_rev_b(sha), rel_pos) 688 return _rev_u(result) 689 690 def hist_next_revision(self, sha): 691 return self.history_relative_rev(sha, -1) 692 693 def hist_prev_revision(self, sha): 694 return self.history_relative_rev(sha, +1) 695 696 def get_commit_encoding(self): 697 if self.commit_encoding is None: 698 self.commit_encoding = \ 699 self.repo.config('--get', 'i18n.commitEncoding').strip() or \ 700 'utf-8' 701 702 return self.commit_encoding 703 704 def head(self): 705 """get current HEAD commit id""" 706 return self.verifyrev('HEAD') 707 708 def cat_file(self, kind, sha): 709 return self._cat_file_reader(kind, sha).read() 710 711 def _cat_file_reader(self, kind, sha): 712 with self.__cat_file_pipe_lock: 713 if self.__cat_file_pipe is None: 714 self.__cat_file_pipe = self.repo.cat_file_batch() 715 716 try: 717 self.__cat_file_pipe.stdin.write(sha + b'\n') 718 self.__cat_file_pipe.stdin.flush() 719 720 split_stdout_line = self.__cat_file_pipe.stdout.readline() \ 721 .split() 722 if len(split_stdout_line) != 3: 723 raise GitError("internal error (could not split line %s)" % 724 repr(split_stdout_line)) 725 726 _sha, _type, _size = split_stdout_line 727 728 if _type != kind: 729 raise GitError("internal error (got unexpected object " 730 "kind %r, expected %r)" % (_type, kind)) 731 732 size = int(_size) 733 734 # stdout.read() can return fewer bytes than requested, 735 # especially if a pipe buffers because the contents are 736 # larger than 64k. 737 stdout_read = self.__cat_file_pipe.stdout.read 738 if size > 32 * 1024 * 1024: 739 buf = tempfile.TemporaryFile() 740 else: 741 buf = io.BytesIO() 742 remaining = size + 1 743 while remaining > 0: 744 chunk = stdout_read(min(remaining, 65536)) 745 if not chunk: 746 # No new data, let's abort 747 raise GitError("internal error (expected to read %d " 748 "bytes, but only got %d)" % 749 (size + 1, size + 1 - remaining)) 750 remaining -= len(chunk) 751 buf.write(chunk if remaining > 0 else chunk[:-1]) 752 753 buf.seek(0) 754 return buf 755 756 except EnvironmentError as e: 757 # There was an error, we should close the pipe to get to a 758 # consistent state (Otherwise it happens that next time we 759 # call cat_file we get payload from previous call) 760 self.logger.warning("closing cat_file pipe: %s", 761 exception_to_unicode(e)) 762 self._cleanup_proc(self.__cat_file_pipe) 763 self.__cat_file_pipe = None 764 765 def verifyrev(self, rev): 766 """verify/lookup given revision object and return a sha id or None 767 if lookup failed 768 """ 769 770 def get_verifyrev(rev): 771 _rev_cache = self.rev_cache 772 773 if GitCore.is_sha(rev): 774 # maybe it's a short or full rev 775 fullrev = self.fullrev(rev) 776 if fullrev: 777 return fullrev 778 779 refs = _rev_cache.refs_dict 780 if rev == b'HEAD': # resolve HEAD 781 refname = refs.get(rev) 782 if refname in refs: 783 return refs[refname] 784 resolved = refs.get(b'refs/heads/' + rev) # resolve branch 785 if resolved: 786 return resolved 787 resolved = refs.get(b'refs/tags/' + rev) # resolve tag 788 if resolved: 789 return resolved 790 791 # fall back to external git calls 792 rc = self.repo.rev_parse('--verify', rev).strip() 793 if not rc: 794 return None 795 if rc in _rev_cache.rev_dict: 796 return rc 797 798 return None 799 800 result = get_verifyrev(self._fs_from_unicode(rev)) 801 return _rev_u(result) 802 803 def shortrev(self, rev, min_len=7): 804 805 def get_shortrev(rev, min_len): 806 """try to shorten sha id""" 807 #try to emulate the following: 808 #return self.repo.rev_parse("--short", rev).strip() 809 810 if min_len < self.__SREV_MIN: 811 min_len = self.__SREV_MIN 812 813 _rev_cache = self.rev_cache 814 815 if rev not in _rev_cache.rev_dict: 816 return None 817 818 srev = rev[:min_len] 819 srevs = set(_rev_cache.srev_dict[self.__rev_key(rev)]) 820 821 if len(srevs) == 1: 822 return srev # we already got a unique id 823 824 # find a shortened id for which rev doesn't conflict with 825 # the other ones from srevs 826 crevs = srevs - {rev} 827 828 for l in range(min_len+1, 40): 829 srev = rev[:l] 830 if srev not in [ r[:l] for r in crevs ]: 831 return srev 832 833 return rev # worst-case, all except the last character match 834 835 return _rev_u(get_shortrev(_rev_b(rev), min_len)) 836 837 838 def fullrev(self, rev): 839 """try to reverse shortrev()""" 840 841 _rev_cache = self.rev_cache 842 843 # short-cut 844 if len(rev) == 40 and rev in _rev_cache.rev_dict: 845 return rev 846 847 if not GitCore.is_sha(rev): 848 return None 849 850 try: 851 srevs = _rev_cache.srev_dict[self.__rev_key(rev)] 852 except KeyError: 853 return None 854 855 resolved = None 856 for s in srevs: 857 if s.startswith(rev): 858 if resolved is not None: 859 return None 860 resolved = s 861 return resolved 862 863 def get_tags(self, rev=None): 864 if rev is not None: 865 rev = _rev_b(rev) 866 return sorted(self._fs_to_unicode(name) 867 for name, rev_ in self.rev_cache.iter_tags() 868 if rev is None or rev == rev_) 869 870 def ls_tree(self, rev, path='', recursive=False): 871 rev = self._fs_from_unicode(rev) if rev else b'HEAD' # paranoia 872 path = self._fs_from_unicode(path).lstrip(b'/') or b'.' 873 tree = self.repo.ls_tree('-zlr' if recursive else '-zl', 874 rev, '--', path).split(b'\0') 875 876 def split_ls_tree_line(l): 877 """split according to '<mode> <type> <sha> <size>\t<fname>'""" 878 879 meta, fname = l.split(b'\t', 1) 880 _mode, _type, _sha, _size = meta.split() 881 _type = str(_type, 'utf-8') 882 _sha = _rev_u(_sha) 883 _mode = int(_mode, 8) 884 _size = None if _size == b'-' else int(_size) 885 fname = self._fs_to_unicode(fname) 886 return _mode, _type, _sha, _size, fname 887 888 return [split_ls_tree_line(e) for e in tree if e] 889 890 def read_commit(self, commit_id): 891 if not commit_id: 892 raise GitError("read_commit called with empty commit_id") 893 894 commit_id_orig = commit_id 895 commit_id = self.fullrev(_rev_b(commit_id)) 896 897 rev_dict = self.get_commits() 898 if commit_id not in rev_dict: 899 self.logger.info("read_commit failed for %r (%r)", 900 commit_id, commit_id_orig) 901 raise GitErrorSha 902 903 with self.__commit_msg_lock: 904 if commit_id in self.__commit_msg_cache: 905 # cache hit 906 result = self.__commit_msg_cache[commit_id] 907 return result[0], dict(result[1]) 908 909 # cache miss 910 raw = self.cat_file(b'commit', commit_id) 911 raw = str(raw, self.get_commit_encoding(), 'replace') 912 result = parse_commit(raw) 913 with self.__commit_msg_lock: 914 self.__commit_msg_cache[commit_id] = result 915 return result[0], dict(result[1]) 916 917 def get_file(self, sha): 918 sha = _rev_b(sha) 919 return self._cat_file_reader(b'blob', sha) 920 921 def get_obj_size(self, sha): 922 sha = _rev_b(sha) 923 try: 924 obj_size = int(self.repo.cat_file(b'-s', sha).strip()) 925 except ValueError: 926 raise GitErrorSha("object '%s' not found" % sha) 927 return obj_size 928 929 def children(self, sha): 930 sha = _rev_b(sha) 931 rev_dict = self.get_commits() 932 try: 933 item = rev_dict[sha] 934 except KeyError: 935 return () 936 return sorted(map(_rev_u, item[0])) 937 938 def children_recursive(self, sha, rev_dict=None): 939 """Recursively traverse children in breadth-first order""" 940 941 if rev_dict is None: 942 rev_dict = self.get_commits() 943 944 work_list = deque() 945 seen = set() 946 947 _children = rev_dict[sha][0] 948 seen.update(_children) 949 work_list.extend(_children) 950 951 while work_list: 952 p = work_list.popleft() 953 yield p 954 955 _children = rev_dict[p][0] - seen 956 seen.update(_children) 957 work_list.extend(_children) 958 959 assert len(work_list) == 0 960 961 def parents(self, sha): 962 sha = _rev_b(sha) 963 rev_dict = self.get_commits() 964 try: 965 item = rev_dict[sha] 966 except KeyError: 967 return [] 968 return list(map(_rev_u, item[1])) 969 970 def all_revs(self): 971 for rev in self.get_commits(): 972 yield _rev_u(rev) 973 974 def sync(self): 975 with self.__rev_cache_lock: 976 return self._refresh_rev_cache(force=True) 977 978 @contextlib.contextmanager 979 def get_historian(self, sha, base_path): 980 p = [] 981 change = {} 982 next_path = [] 983 base_path = self._fs_from_unicode(base_path) or '.' 984 985 def name_status_gen(): 986 p[:] = [self.repo.log_pipe('--pretty=format:%n%H', '--no-renames', 987 '--name-status', sha, '--', base_path)] 988 f = p[0].stdout 989 for l in f: 990 if l == b'\n': 991 continue 992 old_sha = l.rstrip(b'\n') 993 for l in f: 994 if l == b'\n': 995 break 996 _, path = l.rstrip(b'\n').split(b'\t', 1) 997 # git-log without -z option quotes each pathname 998 path = _unquote(path) 999 while path not in change: 1000 change[path] = old_sha 1001 if next_path == [path]: 1002 yield old_sha 1003 try: 1004 path, _ = path.rsplit(b'/', 1) 1005 except ValueError: 1006 break 1007 if p: 1008 self._cleanup_proc(p[0]) 1009 p[:] = [] 1010 while True: 1011 yield None 1012 gen = name_status_gen() 1013 1014 def historian(path): 1015 path = self._fs_from_unicode(path) 1016 try: 1017 rev = change[path] 1018 except KeyError: 1019 next_path[:] = [path] 1020 rev = next(gen) 1021 return _rev_u(rev) 1022 1023 try: 1024 yield historian 1025 finally: 1026 if p: 1027 self._cleanup_proc(p[0]) 1028 1029 def last_change(self, sha, path, historian=None): 1030 if historian is not None: 1031 return historian(path) 1032 for entry in self.history(sha, path, limit=1): 1033 return entry 1034 return None 1035 1036 def history(self, sha, path, limit=None): 1037 if limit is None: 1038 limit = -1 1039 1040 args = ['--max-count=%d' % limit, str(sha)] 1041 if path: 1042 args.extend(('--', self._fs_from_unicode(path))) 1043 tmp = self.repo.rev_list(*args) 1044 for rev in tmp.splitlines(): 1045 yield _rev_u(rev) 1046 1047 def history_timerange(self, start, stop): 1048 # retrieve start <= committer-time < stop, 1049 # see CachedRepository.get_changesets() 1050 output = self.repo.rev_list('--all', '--date-order', 1051 '--max-age=%d' % start, 1052 '--min-age=%d' % (stop - 1)) 1053 return [_rev_u(rev) for rev in output.splitlines()] 1054 1055 def rev_is_anchestor_of(self, rev1, rev2): 1056 """return True if rev2 is successor of rev1""" 1057 1058 rev1 = _rev_b(rev1) 1059 rev2 = _rev_b(rev2) 1060 rev_dict = self.get_commits() 1061 return (rev2 in rev_dict and 1062 rev2 in self.children_recursive(rev1, rev_dict)) 1063 1064 def blame(self, commit_sha, path): 1065 in_metadata = False 1066 1067 commit_sha = _rev_b(commit_sha) 1068 path = self._fs_from_unicode(path) 1069 1070 for line in self.repo.blame('-p', '--', path, commit_sha) \ 1071 .splitlines(): 1072 assert line 1073 if in_metadata: 1074 in_metadata = not line.startswith(b'\t') 1075 else: 1076 split_line = line.split() 1077 if len(split_line) == 4: 1078 (sha, orig_lineno, lineno, group_size) = split_line 1079 else: 1080 (sha, orig_lineno, lineno) = split_line 1081 1082 assert len(sha) == 40 1083 yield _rev_u(sha), lineno 1084 in_metadata = True 1085 1086 assert not in_metadata 1087 1088 def get_changes(self, tree1, tree2): 1089 with self.__diff_tree_pipe_lock: 1090 if self.__diff_tree_pipe is None: 1091 self.__diff_tree_pipe = self.repo.diff_tree_pipe() 1092 proc = self.__diff_tree_pipe 1093 try: 1094 proc.stdin.write(b'%s %s\n\n' % (_rev_b(tree2), _rev_b(tree1)) 1095 if tree1 else 1096 b'%s\n\n' % _rev_b(tree2)) 1097 proc.stdin.flush() 1098 read = proc.stdout.read 1099 entries = [] 1100 c = read(1) 1101 if not c: 1102 raise EOFError() 1103 while c != b'\n': 1104 entry = bytearray() 1105 while c != b'\0': 1106 entry.append(c[0]) 1107 c = read(1) 1108 if not c: 1109 raise EOFError() 1110 entries.append(bytes(entry)) 1111 c = read(1) 1112 if not c: 1113 raise EOFError() 1114 except: 1115 self.__diff_tree_pipe = None 1116 self._cleanup_proc(proc) 1117 raise 1118 if not entries: 1119 return 1120 # skip first entry as a sha 1121 assert not entries[0].startswith(b':') 1122 entries = entries[1:] 1123 1124 yield from self._iter_diff_tree(entries) 1125 1126 def diff_tree(self, tree1, tree2, path='', find_renames=False): 1127 """calls `git diff-tree` and returns tuples of the kind 1128 (mode1,mode2,obj1,obj2,action,path1,path2)""" 1129 1130 # diff-tree returns records with the following structure: 1131 # :<old-mode> <new-mode> <old-sha> <new-sha> <change> NUL <old-path> NUL [ <new-path> NUL ] 1132 1133 path = self._fs_from_unicode(path).strip(b'/') or b'.' 1134 diff_tree_args = ['-z', '-r'] 1135 if find_renames: 1136 diff_tree_args.append('-M') 1137 diff_tree_args.extend([tree1 if tree1 else '--root', 1138 tree2, '--', path]) 1139 result = self.repo.diff_tree(*diff_tree_args) 1140 if not result: 1141 return 1142 1143 def iter_entry(result): 1144 start = 0 1145 while True: 1146 idx = result.find(b'\0', start) 1147 if idx == -1: 1148 return 1149 yield result[start:idx] 1150 start = idx + 1 1151 1152 entries = list(iter_entry(result)) 1153 if not tree1: 1154 # if only one tree-sha is given on commandline, 1155 # the first line is just the redundant tree-sha itself... 1156 entry = entries.pop(0) 1157 assert not entry.startswith(b':') 1158 1159 yield from self._iter_diff_tree(entries) 1160 1161 def _iter_diff_tree(self, entries): 1162 1163 def next_entry(): 1164 return next(iter_entry) 1165 1166 iter_entry = iter(entries) 1167 while True: 1168 try: 1169 entry = next_entry() 1170 except StopIteration: 1171 return 1172 assert entry.startswith(b':') 1173 values = entry[1:].split(b' ') 1174 assert len(values) == 5 1175 old_mode, new_mode, old_sha, new_sha, change = values 1176 old_mode = int(old_mode, 8) 1177 new_mode = int(new_mode, 8) 1178 old_sha = _rev_u(old_sha) 1179 new_sha = _rev_u(new_sha) 1180 change = str(change[:1], 'utf-8') 1181 old_path = self._fs_to_unicode(next_entry()) 1182 new_path = None 1183 if change in ('R', 'C'): # renamed or copied 1184 new_path = self._fs_to_unicode(next_entry()) 1185 yield (old_mode, new_mode, old_sha, new_sha, change, old_path, 1186 new_path) 1187 1188 def _raise_not_readable(self, git_dir, e): 1189 raise GitError("Make sure the Git repository '%s' is readable: %s" 1190 % (git_dir, to_unicode(e))) 1191 1192 def _control_files_exist(self, git_dir): 1193 for name in ('HEAD', 'objects', 'refs'): 1194 if not os.path.exists(os.path.join(git_dir, name)): 1195 self.logger.debug("Missing Git control file '%s' in '%s'", 1196 name, git_dir) 1197 return False 1198 return True 1199