1# cmd.py 2# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors 3# 4# This module is part of GitPython and is released under 5# the BSD License: http://www.opensource.org/licenses/bsd-license.php 6 7from contextlib import contextmanager 8import io 9import logging 10import os 11import signal 12from subprocess import ( 13 call, 14 Popen, 15 PIPE 16) 17import subprocess 18import sys 19import threading 20from collections import OrderedDict 21from textwrap import dedent 22 23from git.compat import ( 24 string_types, 25 defenc, 26 force_bytes, 27 PY3, 28 # just to satisfy flake8 on py3 29 unicode, 30 safe_decode, 31 is_posix, 32 is_win, 33) 34from git.exc import CommandError 35from git.util import is_cygwin_git, cygpath, expand_path 36 37from .exc import ( 38 GitCommandError, 39 GitCommandNotFound 40) 41from .util import ( 42 LazyMixin, 43 stream_copy, 44) 45 46 47execute_kwargs = {'istream', 'with_extended_output', 48 'with_exceptions', 'as_process', 'stdout_as_string', 49 'output_stream', 'with_stdout', 'kill_after_timeout', 50 'universal_newlines', 'shell', 'env', 'max_chunk_size'} 51 52log = logging.getLogger(__name__) 53log.addHandler(logging.NullHandler()) 54 55__all__ = ('Git',) 56 57 58# ============================================================================== 59## @name Utilities 60# ------------------------------------------------------------------------------ 61# Documentation 62## @{ 63 64def handle_process_output(process, stdout_handler, stderr_handler, 65 finalizer=None, decode_streams=True): 66 """Registers for notifications to lean that process output is ready to read, and dispatches lines to 67 the respective line handlers. 68 This function returns once the finalizer returns 69 70 :return: result of finalizer 71 :param process: subprocess.Popen instance 72 :param stdout_handler: f(stdout_line_string), or None 73 :param stderr_handler: f(stderr_line_string), or None 74 :param finalizer: f(proc) - wait for proc to finish 75 :param decode_streams: 76 Assume stdout/stderr streams are binary and decode them before pushing \ 77 their contents to handlers. 78 Set it to False if `universal_newline == True` (then streams are in text-mode) 79 or if decoding must happen later (i.e. for Diffs). 80 """ 81 # Use 2 "pupm" threads and wait for both to finish. 82 def pump_stream(cmdline, name, stream, is_decode, handler): 83 try: 84 for line in stream: 85 if handler: 86 if is_decode: 87 line = line.decode(defenc) 88 handler(line) 89 except Exception as ex: 90 log.error("Pumping %r of cmd(%s) failed due to: %r", name, cmdline, ex) 91 raise CommandError(['<%s-pump>' % name] + cmdline, ex) 92 finally: 93 stream.close() 94 95 cmdline = getattr(process, 'args', '') # PY3+ only 96 if not isinstance(cmdline, (tuple, list)): 97 cmdline = cmdline.split() 98 99 pumps = [] 100 if process.stdout: 101 pumps.append(('stdout', process.stdout, stdout_handler)) 102 if process.stderr: 103 pumps.append(('stderr', process.stderr, stderr_handler)) 104 105 threads = [] 106 107 for name, stream, handler in pumps: 108 t = threading.Thread(target=pump_stream, 109 args=(cmdline, name, stream, decode_streams, handler)) 110 t.setDaemon(True) 111 t.start() 112 threads.append(t) 113 114 ## FIXME: Why Join?? Will block if `stdin` needs feeding... 115 # 116 for t in threads: 117 t.join() 118 119 if finalizer: 120 return finalizer(process) 121 122 123def dashify(string): 124 return string.replace('_', '-') 125 126 127def slots_to_dict(self, exclude=()): 128 return {s: getattr(self, s) for s in self.__slots__ if s not in exclude} 129 130 131def dict_to_slots_and__excluded_are_none(self, d, excluded=()): 132 for k, v in d.items(): 133 setattr(self, k, v) 134 for k in excluded: 135 setattr(self, k, None) 136 137## -- End Utilities -- @} 138 139 140# value of Windows process creation flag taken from MSDN 141CREATE_NO_WINDOW = 0x08000000 142 143## CREATE_NEW_PROCESS_GROUP is needed to allow killing it afterwards, 144# see https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal 145PROC_CREATIONFLAGS = (CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP 146 if is_win else 0) 147 148 149class Git(LazyMixin): 150 151 """ 152 The Git class manages communication with the Git binary. 153 154 It provides a convenient interface to calling the Git binary, such as in:: 155 156 g = Git( git_dir ) 157 g.init() # calls 'git init' program 158 rval = g.ls_files() # calls 'git ls-files' program 159 160 ``Debugging`` 161 Set the GIT_PYTHON_TRACE environment variable print each invocation 162 of the command to stdout. 163 Set its value to 'full' to see details about the returned values. 164 """ 165 __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info", 166 "_git_options", "_persistent_git_options", "_environment") 167 168 _excluded_ = ('cat_file_all', 'cat_file_header', '_version_info') 169 170 def __getstate__(self): 171 return slots_to_dict(self, exclude=self._excluded_) 172 173 def __setstate__(self, d): 174 dict_to_slots_and__excluded_are_none(self, d, excluded=self._excluded_) 175 176 # CONFIGURATION 177 178 git_exec_name = "git" # default that should work on linux and windows 179 180 # Enables debugging of GitPython's git commands 181 GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) 182 183 # If True, a shell will be used when executing git commands. 184 # This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 185 # and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. 186 # Override this value using `Git.USE_SHELL = True` 187 USE_SHELL = False 188 189 # Provide the full path to the git executable. Otherwise it assumes git is in the path 190 _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" 191 _refresh_env_var = "GIT_PYTHON_REFRESH" 192 GIT_PYTHON_GIT_EXECUTABLE = None 193 # note that the git executable is actually found during the refresh step in 194 # the top level __init__ 195 196 @classmethod 197 def refresh(cls, path=None): 198 """This gets called by the refresh function (see the top level 199 __init__). 200 """ 201 # discern which path to refresh with 202 if path is not None: 203 new_git = os.path.expanduser(path) 204 new_git = os.path.abspath(new_git) 205 else: 206 new_git = os.environ.get(cls._git_exec_env_var, cls.git_exec_name) 207 208 # keep track of the old and new git executable path 209 old_git = cls.GIT_PYTHON_GIT_EXECUTABLE 210 cls.GIT_PYTHON_GIT_EXECUTABLE = new_git 211 212 # test if the new git executable path is valid 213 214 if sys.version_info < (3,): 215 # - a GitCommandNotFound error is spawned by ourselves 216 # - a OSError is spawned if the git executable provided 217 # cannot be executed for whatever reason 218 exceptions = (GitCommandNotFound, OSError) 219 else: 220 # - a GitCommandNotFound error is spawned by ourselves 221 # - a PermissionError is spawned if the git executable provided 222 # cannot be executed for whatever reason 223 exceptions = (GitCommandNotFound, PermissionError) 224 225 has_git = False 226 try: 227 cls().version() 228 has_git = True 229 except exceptions: 230 pass 231 232 # warn or raise exception if test failed 233 if not has_git: 234 err = dedent("""\ 235 Bad git executable. 236 The git executable must be specified in one of the following ways: 237 - be included in your $PATH 238 - be set via $%s 239 - explicitly set via git.refresh() 240 """) % cls._git_exec_env_var 241 242 # revert to whatever the old_git was 243 cls.GIT_PYTHON_GIT_EXECUTABLE = old_git 244 245 if old_git is None: 246 # on the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is 247 # None) we only are quiet, warn, or error depending on the 248 # GIT_PYTHON_REFRESH value 249 250 # determine what the user wants to happen during the initial 251 # refresh we expect GIT_PYTHON_REFRESH to either be unset or 252 # be one of the following values: 253 # 0|q|quiet|s|silence 254 # 1|w|warn|warning 255 # 2|r|raise|e|error 256 257 mode = os.environ.get(cls._refresh_env_var, "raise").lower() 258 259 quiet = ["quiet", "q", "silence", "s", "none", "n", "0"] 260 warn = ["warn", "w", "warning", "1"] 261 error = ["error", "e", "raise", "r", "2"] 262 263 if mode in quiet: 264 pass 265 elif mode in warn or mode in error: 266 err = dedent("""\ 267 %s 268 All git commands will error until this is rectified. 269 270 This initial warning can be silenced or aggravated in the future by setting the 271 $%s environment variable. Use one of the following values: 272 - %s: for no warning or exception 273 - %s: for a printed warning 274 - %s: for a raised exception 275 276 Example: 277 export %s=%s 278 """) % ( 279 err, 280 cls._refresh_env_var, 281 "|".join(quiet), 282 "|".join(warn), 283 "|".join(error), 284 cls._refresh_env_var, 285 quiet[0]) 286 287 if mode in warn: 288 print("WARNING: %s" % err) 289 else: 290 raise ImportError(err) 291 else: 292 err = dedent("""\ 293 %s environment variable has been set but it has been set with an invalid value. 294 295 Use only the following values: 296 - %s: for no warning or exception 297 - %s: for a printed warning 298 - %s: for a raised exception 299 """) % ( 300 cls._refresh_env_var, 301 "|".join(quiet), 302 "|".join(warn), 303 "|".join(error)) 304 raise ImportError(err) 305 306 # we get here if this was the init refresh and the refresh mode 307 # was not error, go ahead and set the GIT_PYTHON_GIT_EXECUTABLE 308 # such that we discern the difference between a first import 309 # and a second import 310 cls.GIT_PYTHON_GIT_EXECUTABLE = cls.git_exec_name 311 else: 312 # after the first refresh (when GIT_PYTHON_GIT_EXECUTABLE 313 # is no longer None) we raise an exception 314 raise GitCommandNotFound("git", err) 315 316 return has_git 317 318 @classmethod 319 def is_cygwin(cls): 320 return is_cygwin_git(cls.GIT_PYTHON_GIT_EXECUTABLE) 321 322 @classmethod 323 def polish_url(cls, url, is_cygwin=None): 324 if is_cygwin is None: 325 is_cygwin = cls.is_cygwin() 326 327 if is_cygwin: 328 url = cygpath(url) 329 else: 330 """Remove any backslahes from urls to be written in config files. 331 332 Windows might create config-files containing paths with backslashed, 333 but git stops liking them as it will escape the backslashes. 334 Hence we undo the escaping just to be sure. 335 """ 336 url = url.replace("\\\\", "\\").replace("\\", "/") 337 338 return url 339 340 class AutoInterrupt(object): 341 """Kill/Interrupt the stored process instance once this instance goes out of scope. It is 342 used to prevent processes piling up in case iterators stop reading. 343 Besides all attributes are wired through to the contained process object. 344 345 The wait method was overridden to perform automatic status code checking 346 and possibly raise.""" 347 348 __slots__ = ("proc", "args") 349 350 def __init__(self, proc, args): 351 self.proc = proc 352 self.args = args 353 354 def __del__(self): 355 if self.proc is None: 356 return 357 358 proc = self.proc 359 self.proc = None 360 if proc.stdin: 361 proc.stdin.close() 362 if proc.stdout: 363 proc.stdout.close() 364 if proc.stderr: 365 proc.stderr.close() 366 367 # did the process finish already so we have a return code ? 368 if proc.poll() is not None: 369 return 370 371 # can be that nothing really exists anymore ... 372 if os is None or getattr(os, 'kill', None) is None: 373 return 374 375 # try to kill it 376 try: 377 proc.terminate() 378 proc.wait() # ensure process goes away 379 except OSError as ex: 380 log.info("Ignored error after process had died: %r", ex) 381 pass # ignore error when process already died 382 except AttributeError: 383 # try windows 384 # for some reason, providing None for stdout/stderr still prints something. This is why 385 # we simply use the shell and redirect to nul. Its slower than CreateProcess, question 386 # is whether we really want to see all these messages. Its annoying no matter what. 387 if is_win: 388 call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(proc.pid)), shell=True) 389 # END exception handling 390 391 def __getattr__(self, attr): 392 return getattr(self.proc, attr) 393 394 def wait(self, stderr=b''): # TODO: Bad choice to mimic `proc.wait()` but with different args. 395 """Wait for the process and return its status code. 396 397 :param stderr: Previously read value of stderr, in case stderr is already closed. 398 :warn: may deadlock if output or error pipes are used and not handled separately. 399 :raise GitCommandError: if the return status is not 0""" 400 if stderr is None: 401 stderr = b'' 402 stderr = force_bytes(stderr) 403 404 status = self.proc.wait() 405 406 def read_all_from_possibly_closed_stream(stream): 407 try: 408 return stderr + force_bytes(stream.read()) 409 except ValueError: 410 return stderr or b'' 411 412 if status != 0: 413 errstr = read_all_from_possibly_closed_stream(self.proc.stderr) 414 log.debug('AutoInterrupt wait stderr: %r' % (errstr,)) 415 raise GitCommandError(self.args, status, errstr) 416 # END status handling 417 return status 418 # END auto interrupt 419 420 class CatFileContentStream(object): 421 422 """Object representing a sized read-only stream returning the contents of 423 an object. 424 It behaves like a stream, but counts the data read and simulates an empty 425 stream once our sized content region is empty. 426 If not all data is read to the end of the objects's lifetime, we read the 427 rest to assure the underlying stream continues to work""" 428 429 __slots__ = ('_stream', '_nbr', '_size') 430 431 def __init__(self, size, stream): 432 self._stream = stream 433 self._size = size 434 self._nbr = 0 # num bytes read 435 436 # special case: if the object is empty, has null bytes, get the 437 # final newline right away. 438 if size == 0: 439 stream.read(1) 440 # END handle empty streams 441 442 def read(self, size=-1): 443 bytes_left = self._size - self._nbr 444 if bytes_left == 0: 445 return b'' 446 if size > -1: 447 # assure we don't try to read past our limit 448 size = min(bytes_left, size) 449 else: 450 # they try to read all, make sure its not more than what remains 451 size = bytes_left 452 # END check early depletion 453 data = self._stream.read(size) 454 self._nbr += len(data) 455 456 # check for depletion, read our final byte to make the stream usable by others 457 if self._size - self._nbr == 0: 458 self._stream.read(1) # final newline 459 # END finish reading 460 return data 461 462 def readline(self, size=-1): 463 if self._nbr == self._size: 464 return b'' 465 466 # clamp size to lowest allowed value 467 bytes_left = self._size - self._nbr 468 if size > -1: 469 size = min(bytes_left, size) 470 else: 471 size = bytes_left 472 # END handle size 473 474 data = self._stream.readline(size) 475 self._nbr += len(data) 476 477 # handle final byte 478 if self._size - self._nbr == 0: 479 self._stream.read(1) 480 # END finish reading 481 482 return data 483 484 def readlines(self, size=-1): 485 if self._nbr == self._size: 486 return [] 487 488 # leave all additional logic to our readline method, we just check the size 489 out = [] 490 nbr = 0 491 while True: 492 line = self.readline() 493 if not line: 494 break 495 out.append(line) 496 if size > -1: 497 nbr += len(line) 498 if nbr > size: 499 break 500 # END handle size constraint 501 # END readline loop 502 return out 503 504 def __iter__(self): 505 return self 506 507 def next(self): 508 line = self.readline() 509 if not line: 510 raise StopIteration 511 512 return line 513 514 def __del__(self): 515 bytes_left = self._size - self._nbr 516 if bytes_left: 517 # read and discard - seeking is impossible within a stream 518 # includes terminating newline 519 self._stream.read(bytes_left + 1) 520 # END handle incomplete read 521 522 def __init__(self, working_dir=None): 523 """Initialize this instance with: 524 525 :param working_dir: 526 Git directory we should work in. If None, we always work in the current 527 directory as returned by os.getcwd(). 528 It is meant to be the working tree directory if available, or the 529 .git directory in case of bare repositories.""" 530 super(Git, self).__init__() 531 self._working_dir = expand_path(working_dir) 532 self._git_options = () 533 self._persistent_git_options = [] 534 535 # Extra environment variables to pass to git commands 536 self._environment = {} 537 538 # cached command slots 539 self.cat_file_header = None 540 self.cat_file_all = None 541 542 def __getattr__(self, name): 543 """A convenience method as it allows to call the command as if it was 544 an object. 545 :return: Callable object that will execute call _call_process with your arguments.""" 546 if name[0] == '_': 547 return LazyMixin.__getattr__(self, name) 548 return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) 549 550 def set_persistent_git_options(self, **kwargs): 551 """Specify command line options to the git executable 552 for subsequent subcommand calls 553 554 :param kwargs: 555 is a dict of keyword arguments. 556 these arguments are passed as in _call_process 557 but will be passed to the git command rather than 558 the subcommand. 559 """ 560 561 self._persistent_git_options = self.transform_kwargs( 562 split_single_char_options=True, **kwargs) 563 564 def _set_cache_(self, attr): 565 if attr == '_version_info': 566 # We only use the first 4 numbers, as everything else could be strings in fact (on windows) 567 version_numbers = self._call_process('version').split(' ')[2] 568 self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4] if n.isdigit()) 569 else: 570 super(Git, self)._set_cache_(attr) 571 # END handle version info 572 573 @property 574 def working_dir(self): 575 """:return: Git directory we are working on""" 576 return self._working_dir 577 578 @property 579 def version_info(self): 580 """ 581 :return: tuple(int, int, int, int) tuple with integers representing the major, minor 582 and additional version numbers as parsed from git version. 583 This value is generated on demand and is cached""" 584 return self._version_info 585 586 def execute(self, command, 587 istream=None, 588 with_extended_output=False, 589 with_exceptions=True, 590 as_process=False, 591 output_stream=None, 592 stdout_as_string=True, 593 kill_after_timeout=None, 594 with_stdout=True, 595 universal_newlines=False, 596 shell=None, 597 env=None, 598 max_chunk_size=io.DEFAULT_BUFFER_SIZE, 599 **subprocess_kwargs 600 ): 601 """Handles executing the command on the shell and consumes and returns 602 the returned information (stdout) 603 604 :param command: 605 The command argument list to execute. 606 It should be a string, or a sequence of program arguments. The 607 program to execute is the first item in the args sequence or string. 608 609 :param istream: 610 Standard input filehandle passed to subprocess.Popen. 611 612 :param with_extended_output: 613 Whether to return a (status, stdout, stderr) tuple. 614 615 :param with_exceptions: 616 Whether to raise an exception when git returns a non-zero status. 617 618 :param as_process: 619 Whether to return the created process instance directly from which 620 streams can be read on demand. This will render with_extended_output and 621 with_exceptions ineffective - the caller will have 622 to deal with the details himself. 623 It is important to note that the process will be placed into an AutoInterrupt 624 wrapper that will interrupt the process once it goes out of scope. If you 625 use the command in iterators, you should pass the whole process instance 626 instead of a single stream. 627 628 :param output_stream: 629 If set to a file-like object, data produced by the git command will be 630 output to the given stream directly. 631 This feature only has any effect if as_process is False. Processes will 632 always be created with a pipe due to issues with subprocess. 633 This merely is a workaround as data will be copied from the 634 output pipe to the given output stream directly. 635 Judging from the implementation, you shouldn't use this flag ! 636 637 :param stdout_as_string: 638 if False, the commands standard output will be bytes. Otherwise, it will be 639 decoded into a string using the default encoding (usually utf-8). 640 The latter can fail, if the output contains binary data. 641 642 :param env: 643 A dictionary of environment variables to be passed to `subprocess.Popen`. 644 645 :param max_chunk_size: 646 Maximum number of bytes in one chunk of data passed to the output_stream in 647 one invocation of write() method. If the given number is not positive then 648 the default value is used. 649 650 :param subprocess_kwargs: 651 Keyword arguments to be passed to subprocess.Popen. Please note that 652 some of the valid kwargs are already set by this method, the ones you 653 specify may not be the same ones. 654 655 :param with_stdout: If True, default True, we open stdout on the created process 656 :param universal_newlines: 657 if True, pipes will be opened as text, and lines are split at 658 all known line endings. 659 :param shell: 660 Whether to invoke commands through a shell (see `Popen(..., shell=True)`). 661 It overrides :attr:`USE_SHELL` if it is not `None`. 662 :param kill_after_timeout: 663 To specify a timeout in seconds for the git command, after which the process 664 should be killed. This will have no effect if as_process is set to True. It is 665 set to None by default and will let the process run until the timeout is 666 explicitly specified. This feature is not supported on Windows. It's also worth 667 noting that kill_after_timeout uses SIGKILL, which can have negative side 668 effects on a repository. For example, stale locks in case of git gc could 669 render the repository incapable of accepting changes until the lock is manually 670 removed. 671 672 :return: 673 * str(output) if extended_output = False (Default) 674 * tuple(int(status), str(stdout), str(stderr)) if extended_output = True 675 676 if output_stream is True, the stdout value will be your output stream: 677 * output_stream if extended_output = False 678 * tuple(int(status), output_stream, str(stderr)) if extended_output = True 679 680 Note git is executed with LC_MESSAGES="C" to ensure consistent 681 output regardless of system language. 682 683 :raise GitCommandError: 684 685 :note: 686 If you add additional keyword arguments to the signature of this method, 687 you must update the execute_kwargs tuple housed in this module.""" 688 if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != 'full' or as_process): 689 log.info(' '.join(command)) 690 691 # Allow the user to have the command executed in their working dir. 692 cwd = self._working_dir or os.getcwd() 693 694 # Start the process 695 inline_env = env 696 env = os.environ.copy() 697 # Attempt to force all output to plain ascii english, which is what some parsing code 698 # may expect. 699 # According to stackoverflow (http://goo.gl/l74GC8), we are setting LANGUAGE as well 700 # just to be sure. 701 env["LANGUAGE"] = "C" 702 env["LC_ALL"] = "C" 703 env.update(self._environment) 704 if inline_env is not None: 705 env.update(inline_env) 706 707 if is_win: 708 cmd_not_found_exception = OSError 709 if kill_after_timeout: 710 raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.') 711 else: 712 if sys.version_info[0] > 2: 713 cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable 714 else: 715 cmd_not_found_exception = OSError 716 # end handle 717 718 stdout_sink = (PIPE 719 if with_stdout 720 else getattr(subprocess, 'DEVNULL', None) or open(os.devnull, 'wb')) 721 log.debug("Popen(%s, cwd=%s, universal_newlines=%s, shell=%s)", 722 command, cwd, universal_newlines, shell) 723 try: 724 proc = Popen(command, 725 env=env, 726 cwd=cwd, 727 bufsize=-1, 728 stdin=istream, 729 stderr=PIPE, 730 stdout=stdout_sink, 731 shell=shell is not None and shell or self.USE_SHELL, 732 close_fds=is_posix, # unsupported on windows 733 universal_newlines=universal_newlines, 734 creationflags=PROC_CREATIONFLAGS, 735 **subprocess_kwargs 736 ) 737 except cmd_not_found_exception as err: 738 raise GitCommandNotFound(command, err) 739 740 if as_process: 741 return self.AutoInterrupt(proc, command) 742 743 def _kill_process(pid): 744 """ Callback method to kill a process. """ 745 p = Popen(['ps', '--ppid', str(pid)], stdout=PIPE, 746 creationflags=PROC_CREATIONFLAGS) 747 child_pids = [] 748 for line in p.stdout: 749 if len(line.split()) > 0: 750 local_pid = (line.split())[0] 751 if local_pid.isdigit(): 752 child_pids.append(int(local_pid)) 753 try: 754 # Windows does not have SIGKILL, so use SIGTERM instead 755 sig = getattr(signal, 'SIGKILL', signal.SIGTERM) 756 os.kill(pid, sig) 757 for child_pid in child_pids: 758 try: 759 os.kill(child_pid, sig) 760 except OSError: 761 pass 762 kill_check.set() # tell the main routine that the process was killed 763 except OSError: 764 # It is possible that the process gets completed in the duration after timeout 765 # happens and before we try to kill the process. 766 pass 767 return 768 # end 769 770 if kill_after_timeout: 771 kill_check = threading.Event() 772 watchdog = threading.Timer(kill_after_timeout, _kill_process, args=(proc.pid,)) 773 774 # Wait for the process to return 775 status = 0 776 stdout_value = b'' 777 stderr_value = b'' 778 try: 779 if output_stream is None: 780 if kill_after_timeout: 781 watchdog.start() 782 stdout_value, stderr_value = proc.communicate() 783 if kill_after_timeout: 784 watchdog.cancel() 785 if kill_check.isSet(): 786 stderr_value = ('Timeout: the command "%s" did not complete in %d ' 787 'secs.' % (" ".join(command), kill_after_timeout)).encode(defenc) 788 # strip trailing "\n" 789 if stdout_value.endswith(b"\n"): 790 stdout_value = stdout_value[:-1] 791 if stderr_value.endswith(b"\n"): 792 stderr_value = stderr_value[:-1] 793 status = proc.returncode 794 else: 795 max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE 796 stream_copy(proc.stdout, output_stream, max_chunk_size) 797 stdout_value = output_stream 798 stderr_value = proc.stderr.read() 799 # strip trailing "\n" 800 if stderr_value.endswith(b"\n"): 801 stderr_value = stderr_value[:-1] 802 status = proc.wait() 803 # END stdout handling 804 finally: 805 proc.stdout.close() 806 proc.stderr.close() 807 808 if self.GIT_PYTHON_TRACE == 'full': 809 cmdstr = " ".join(command) 810 811 def as_text(stdout_value): 812 return not output_stream and safe_decode(stdout_value) or '<OUTPUT_STREAM>' 813 # end 814 815 if stderr_value: 816 log.info("%s -> %d; stdout: '%s'; stderr: '%s'", 817 cmdstr, status, as_text(stdout_value), safe_decode(stderr_value)) 818 elif stdout_value: 819 log.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value)) 820 else: 821 log.info("%s -> %d", cmdstr, status) 822 # END handle debug printing 823 824 if with_exceptions and status != 0: 825 raise GitCommandError(command, status, stderr_value, stdout_value) 826 827 if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream 828 stdout_value = safe_decode(stdout_value) 829 830 # Allow access to the command's status code 831 if with_extended_output: 832 return (status, stdout_value, safe_decode(stderr_value)) 833 else: 834 return stdout_value 835 836 def environment(self): 837 return self._environment 838 839 def update_environment(self, **kwargs): 840 """ 841 Set environment variables for future git invocations. Return all changed 842 values in a format that can be passed back into this function to revert 843 the changes: 844 845 ``Examples``:: 846 847 old_env = self.update_environment(PWD='/tmp') 848 self.update_environment(**old_env) 849 850 :param kwargs: environment variables to use for git processes 851 :return: dict that maps environment variables to their old values 852 """ 853 old_env = {} 854 for key, value in kwargs.items(): 855 # set value if it is None 856 if value is not None: 857 old_env[key] = self._environment.get(key) 858 self._environment[key] = value 859 # remove key from environment if its value is None 860 elif key in self._environment: 861 old_env[key] = self._environment[key] 862 del self._environment[key] 863 return old_env 864 865 @contextmanager 866 def custom_environment(self, **kwargs): 867 """ 868 A context manager around the above ``update_environment`` method to restore the 869 environment back to its previous state after operation. 870 871 ``Examples``:: 872 873 with self.custom_environment(GIT_SSH='/bin/ssh_wrapper'): 874 repo.remotes.origin.fetch() 875 876 :param kwargs: see update_environment 877 """ 878 old_env = self.update_environment(**kwargs) 879 try: 880 yield 881 finally: 882 self.update_environment(**old_env) 883 884 def transform_kwarg(self, name, value, split_single_char_options): 885 if len(name) == 1: 886 if value is True: 887 return ["-%s" % name] 888 elif type(value) is not bool: 889 if split_single_char_options: 890 return ["-%s" % name, "%s" % value] 891 else: 892 return ["-%s%s" % (name, value)] 893 else: 894 if value is True: 895 return ["--%s" % dashify(name)] 896 elif type(value) is not bool: 897 return ["--%s=%s" % (dashify(name), value)] 898 return [] 899 900 def transform_kwargs(self, split_single_char_options=True, **kwargs): 901 """Transforms Python style kwargs into git command line options.""" 902 args = [] 903 kwargs = OrderedDict(sorted(kwargs.items(), key=lambda x: x[0])) 904 for k, v in kwargs.items(): 905 if isinstance(v, (list, tuple)): 906 for value in v: 907 args += self.transform_kwarg(k, value, split_single_char_options) 908 else: 909 args += self.transform_kwarg(k, v, split_single_char_options) 910 return args 911 912 @classmethod 913 def __unpack_args(cls, arg_list): 914 if not isinstance(arg_list, (list, tuple)): 915 # This is just required for unicode conversion, as subprocess can't handle it 916 # However, in any other case, passing strings (usually utf-8 encoded) is totally fine 917 if not PY3 and isinstance(arg_list, unicode): 918 return [arg_list.encode(defenc)] 919 return [str(arg_list)] 920 921 outlist = [] 922 for arg in arg_list: 923 if isinstance(arg_list, (list, tuple)): 924 outlist.extend(cls.__unpack_args(arg)) 925 elif not PY3 and isinstance(arg_list, unicode): 926 outlist.append(arg_list.encode(defenc)) 927 # END recursion 928 else: 929 outlist.append(str(arg)) 930 # END for each arg 931 return outlist 932 933 def __call__(self, **kwargs): 934 """Specify command line options to the git executable 935 for a subcommand call 936 937 :param kwargs: 938 is a dict of keyword arguments. 939 these arguments are passed as in _call_process 940 but will be passed to the git command rather than 941 the subcommand. 942 943 ``Examples``:: 944 git(work_tree='/tmp').difftool()""" 945 self._git_options = self.transform_kwargs( 946 split_single_char_options=True, **kwargs) 947 return self 948 949 def _call_process(self, method, *args, **kwargs): 950 """Run the given git command with the specified arguments and return 951 the result as a String 952 953 :param method: 954 is the command. Contained "_" characters will be converted to dashes, 955 such as in 'ls_files' to call 'ls-files'. 956 957 :param args: 958 is the list of arguments. If None is included, it will be pruned. 959 This allows your commands to call git more conveniently as None 960 is realized as non-existent 961 962 :param kwargs: 963 It contains key-values for the following: 964 - the :meth:`execute()` kwds, as listed in :var:`execute_kwargs`; 965 - "command options" to be converted by :meth:`transform_kwargs()`; 966 - the `'insert_kwargs_after'` key which its value must match one of ``*args``, 967 and any cmd-options will be appended after the matched arg. 968 969 Examples:: 970 971 git.rev_list('master', max_count=10, header=True) 972 973 turns into:: 974 975 git rev-list max-count 10 --header master 976 977 :return: Same as ``execute``""" 978 # Handle optional arguments prior to calling transform_kwargs 979 # otherwise these'll end up in args, which is bad. 980 exec_kwargs = {k: v for k, v in kwargs.items() if k in execute_kwargs} 981 opts_kwargs = {k: v for k, v in kwargs.items() if k not in execute_kwargs} 982 983 insert_after_this_arg = opts_kwargs.pop('insert_kwargs_after', None) 984 985 # Prepare the argument list 986 opt_args = self.transform_kwargs(**opts_kwargs) 987 ext_args = self.__unpack_args([a for a in args if a is not None]) 988 989 if insert_after_this_arg is None: 990 args = opt_args + ext_args 991 else: 992 try: 993 index = ext_args.index(insert_after_this_arg) 994 except ValueError: 995 raise ValueError("Couldn't find argument '%s' in args %s to insert cmd options after" 996 % (insert_after_this_arg, str(ext_args))) 997 # end handle error 998 args = ext_args[:index + 1] + opt_args + ext_args[index + 1:] 999 # end handle opts_kwargs 1000 1001 call = [self.GIT_PYTHON_GIT_EXECUTABLE] 1002 1003 # add persistent git options 1004 call.extend(self._persistent_git_options) 1005 1006 # add the git options, then reset to empty 1007 # to avoid side_effects 1008 call.extend(self._git_options) 1009 self._git_options = () 1010 1011 call.append(dashify(method)) 1012 call.extend(args) 1013 1014 return self.execute(call, **exec_kwargs) 1015 1016 def _parse_object_header(self, header_line): 1017 """ 1018 :param header_line: 1019 <hex_sha> type_string size_as_int 1020 1021 :return: (hex_sha, type_string, size_as_int) 1022 1023 :raise ValueError: if the header contains indication for an error due to 1024 incorrect input sha""" 1025 tokens = header_line.split() 1026 if len(tokens) != 3: 1027 if not tokens: 1028 raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) 1029 else: 1030 raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) 1031 # END handle actual return value 1032 # END error handling 1033 1034 if len(tokens[0]) != 40: 1035 raise ValueError("Failed to parse header: %r" % header_line) 1036 return (tokens[0], tokens[1], int(tokens[2])) 1037 1038 def _prepare_ref(self, ref): 1039 # required for command to separate refs on stdin, as bytes 1040 refstr = ref 1041 if isinstance(ref, bytes): 1042 # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text 1043 refstr = ref.decode('ascii') 1044 elif not isinstance(ref, string_types): 1045 refstr = str(ref) # could be ref-object 1046 1047 if not refstr.endswith("\n"): 1048 refstr += "\n" 1049 return refstr.encode(defenc) 1050 1051 def _get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs): 1052 cur_val = getattr(self, attr_name) 1053 if cur_val is not None: 1054 return cur_val 1055 1056 options = {"istream": PIPE, "as_process": True} 1057 options.update(kwargs) 1058 1059 cmd = self._call_process(cmd_name, *args, **options) 1060 setattr(self, attr_name, cmd) 1061 return cmd 1062 1063 def __get_object_header(self, cmd, ref): 1064 cmd.stdin.write(self._prepare_ref(ref)) 1065 cmd.stdin.flush() 1066 return self._parse_object_header(cmd.stdout.readline()) 1067 1068 def get_object_header(self, ref): 1069 """ Use this method to quickly examine the type and size of the object behind 1070 the given ref. 1071 1072 :note: The method will only suffer from the costs of command invocation 1073 once and reuses the command in subsequent calls. 1074 1075 :return: (hexsha, type_string, size_as_int)""" 1076 cmd = self._get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) 1077 return self.__get_object_header(cmd, ref) 1078 1079 def get_object_data(self, ref): 1080 """ As get_object_header, but returns object data as well 1081 :return: (hexsha, type_string, size_as_int,data_string) 1082 :note: not threadsafe""" 1083 hexsha, typename, size, stream = self.stream_object_data(ref) 1084 data = stream.read(size) 1085 del(stream) 1086 return (hexsha, typename, size, data) 1087 1088 def stream_object_data(self, ref): 1089 """ As get_object_header, but returns the data as a stream 1090 1091 :return: (hexsha, type_string, size_as_int, stream) 1092 :note: This method is not threadsafe, you need one independent Command instance per thread to be safe !""" 1093 cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True) 1094 hexsha, typename, size = self.__get_object_header(cmd, ref) 1095 return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) 1096 1097 def clear_cache(self): 1098 """Clear all kinds of internal caches to release resources. 1099 1100 Currently persistent commands will be interrupted. 1101 1102 :return: self""" 1103 for cmd in (self.cat_file_all, self.cat_file_header): 1104 if cmd: 1105 cmd.__del__() 1106 1107 self.cat_file_all = None 1108 self.cat_file_header = None 1109 return self 1110