1# cmd.py
2# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
3#
4# This module is part of GitPython and is released under
5# the BSD License: http://www.opensource.org/licenses/bsd-license.php
6
7from contextlib import contextmanager
8import io
9import logging
10import os
11import signal
12from subprocess import (
13    call,
14    Popen,
15    PIPE
16)
17import subprocess
18import sys
19import threading
20from collections import OrderedDict
21from textwrap import dedent
22
23from git.compat import (
24    string_types,
25    defenc,
26    force_bytes,
27    PY3,
28    # just to satisfy flake8 on py3
29    unicode,
30    safe_decode,
31    is_posix,
32    is_win,
33)
34from git.exc import CommandError
35from git.util import is_cygwin_git, cygpath, expand_path
36
37from .exc import (
38    GitCommandError,
39    GitCommandNotFound
40)
41from .util import (
42    LazyMixin,
43    stream_copy,
44)
45
46
47execute_kwargs = {'istream', 'with_extended_output',
48                  'with_exceptions', 'as_process', 'stdout_as_string',
49                  'output_stream', 'with_stdout', 'kill_after_timeout',
50                  'universal_newlines', 'shell', 'env', 'max_chunk_size'}
51
52log = logging.getLogger(__name__)
53log.addHandler(logging.NullHandler())
54
55__all__ = ('Git',)
56
57
58# ==============================================================================
59## @name Utilities
60# ------------------------------------------------------------------------------
61# Documentation
62## @{
63
64def handle_process_output(process, stdout_handler, stderr_handler,
65                          finalizer=None, decode_streams=True):
66    """Registers for notifications to lean that process output is ready to read, and dispatches lines to
67    the respective line handlers.
68    This function returns once the finalizer returns
69
70    :return: result of finalizer
71    :param process: subprocess.Popen instance
72    :param stdout_handler: f(stdout_line_string), or None
73    :param stderr_handler: f(stderr_line_string), or None
74    :param finalizer: f(proc) - wait for proc to finish
75    :param decode_streams:
76        Assume stdout/stderr streams are binary and decode them before pushing \
77        their contents to handlers.
78        Set it to False if `universal_newline == True` (then streams are in text-mode)
79        or if decoding must happen later (i.e. for Diffs).
80    """
81    # Use 2 "pupm" threads and wait for both to finish.
82    def pump_stream(cmdline, name, stream, is_decode, handler):
83        try:
84            for line in stream:
85                if handler:
86                    if is_decode:
87                        line = line.decode(defenc)
88                    handler(line)
89        except Exception as ex:
90            log.error("Pumping %r of cmd(%s) failed due to: %r", name, cmdline, ex)
91            raise CommandError(['<%s-pump>' % name] + cmdline, ex)
92        finally:
93            stream.close()
94
95    cmdline = getattr(process, 'args', '')  # PY3+ only
96    if not isinstance(cmdline, (tuple, list)):
97        cmdline = cmdline.split()
98
99    pumps = []
100    if process.stdout:
101        pumps.append(('stdout', process.stdout, stdout_handler))
102    if process.stderr:
103        pumps.append(('stderr', process.stderr, stderr_handler))
104
105    threads = []
106
107    for name, stream, handler in pumps:
108        t = threading.Thread(target=pump_stream,
109                             args=(cmdline, name, stream, decode_streams, handler))
110        t.setDaemon(True)
111        t.start()
112        threads.append(t)
113
114    ## FIXME: Why Join??  Will block if `stdin` needs feeding...
115    #
116    for t in threads:
117        t.join()
118
119    if finalizer:
120        return finalizer(process)
121
122
123def dashify(string):
124    return string.replace('_', '-')
125
126
127def slots_to_dict(self, exclude=()):
128    return {s: getattr(self, s) for s in self.__slots__ if s not in exclude}
129
130
131def dict_to_slots_and__excluded_are_none(self, d, excluded=()):
132    for k, v in d.items():
133        setattr(self, k, v)
134    for k in excluded:
135        setattr(self, k, None)
136
137## -- End Utilities -- @}
138
139
140# value of Windows process creation flag taken from MSDN
141CREATE_NO_WINDOW = 0x08000000
142
143## CREATE_NEW_PROCESS_GROUP is needed to allow killing it afterwards,
144# see https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal
145PROC_CREATIONFLAGS = (CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP
146                      if is_win else 0)
147
148
149class Git(LazyMixin):
150
151    """
152    The Git class manages communication with the Git binary.
153
154    It provides a convenient interface to calling the Git binary, such as in::
155
156     g = Git( git_dir )
157     g.init()                   # calls 'git init' program
158     rval = g.ls_files()        # calls 'git ls-files' program
159
160    ``Debugging``
161        Set the GIT_PYTHON_TRACE environment variable print each invocation
162        of the command to stdout.
163        Set its value to 'full' to see details about the returned values.
164    """
165    __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info",
166                 "_git_options", "_persistent_git_options", "_environment")
167
168    _excluded_ = ('cat_file_all', 'cat_file_header', '_version_info')
169
170    def __getstate__(self):
171        return slots_to_dict(self, exclude=self._excluded_)
172
173    def __setstate__(self, d):
174        dict_to_slots_and__excluded_are_none(self, d, excluded=self._excluded_)
175
176    # CONFIGURATION
177
178    git_exec_name = "git"           # default that should work on linux and windows
179
180    # Enables debugging of GitPython's git commands
181    GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
182
183    # If True, a shell will be used when executing git commands.
184    # This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126
185    # and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required.
186    # Override this value using `Git.USE_SHELL = True`
187    USE_SHELL = False
188
189    # Provide the full path to the git executable. Otherwise it assumes git is in the path
190    _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE"
191    _refresh_env_var = "GIT_PYTHON_REFRESH"
192    GIT_PYTHON_GIT_EXECUTABLE = None
193    # note that the git executable is actually found during the refresh step in
194    # the top level __init__
195
196    @classmethod
197    def refresh(cls, path=None):
198        """This gets called by the refresh function (see the top level
199        __init__).
200        """
201        # discern which path to refresh with
202        if path is not None:
203            new_git = os.path.expanduser(path)
204            new_git = os.path.abspath(new_git)
205        else:
206            new_git = os.environ.get(cls._git_exec_env_var, cls.git_exec_name)
207
208        # keep track of the old and new git executable path
209        old_git = cls.GIT_PYTHON_GIT_EXECUTABLE
210        cls.GIT_PYTHON_GIT_EXECUTABLE = new_git
211
212        # test if the new git executable path is valid
213
214        if sys.version_info < (3,):
215            # - a GitCommandNotFound error is spawned by ourselves
216            # - a OSError is spawned if the git executable provided
217            #   cannot be executed for whatever reason
218            exceptions = (GitCommandNotFound, OSError)
219        else:
220            # - a GitCommandNotFound error is spawned by ourselves
221            # - a PermissionError is spawned if the git executable provided
222            #   cannot be executed for whatever reason
223            exceptions = (GitCommandNotFound, PermissionError)
224
225        has_git = False
226        try:
227            cls().version()
228            has_git = True
229        except exceptions:
230            pass
231
232        # warn or raise exception if test failed
233        if not has_git:
234            err = dedent("""\
235                Bad git executable.
236                The git executable must be specified in one of the following ways:
237                    - be included in your $PATH
238                    - be set via $%s
239                    - explicitly set via git.refresh()
240                """) % cls._git_exec_env_var
241
242            # revert to whatever the old_git was
243            cls.GIT_PYTHON_GIT_EXECUTABLE = old_git
244
245            if old_git is None:
246                # on the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is
247                # None) we only are quiet, warn, or error depending on the
248                # GIT_PYTHON_REFRESH value
249
250                # determine what the user wants to happen during the initial
251                # refresh we expect GIT_PYTHON_REFRESH to either be unset or
252                # be one of the following values:
253                #   0|q|quiet|s|silence
254                #   1|w|warn|warning
255                #   2|r|raise|e|error
256
257                mode = os.environ.get(cls._refresh_env_var, "raise").lower()
258
259                quiet = ["quiet", "q", "silence", "s", "none", "n", "0"]
260                warn = ["warn", "w", "warning", "1"]
261                error = ["error", "e", "raise", "r", "2"]
262
263                if mode in quiet:
264                    pass
265                elif mode in warn or mode in error:
266                    err = dedent("""\
267                        %s
268                        All git commands will error until this is rectified.
269
270                        This initial warning can be silenced or aggravated in the future by setting the
271                        $%s environment variable. Use one of the following values:
272                            - %s: for no warning or exception
273                            - %s: for a printed warning
274                            - %s: for a raised exception
275
276                        Example:
277                            export %s=%s
278                        """) % (
279                        err,
280                        cls._refresh_env_var,
281                        "|".join(quiet),
282                        "|".join(warn),
283                        "|".join(error),
284                        cls._refresh_env_var,
285                        quiet[0])
286
287                    if mode in warn:
288                        print("WARNING: %s" % err)
289                    else:
290                        raise ImportError(err)
291                else:
292                    err = dedent("""\
293                        %s environment variable has been set but it has been set with an invalid value.
294
295                        Use only the following values:
296                            - %s: for no warning or exception
297                            - %s: for a printed warning
298                            - %s: for a raised exception
299                        """) % (
300                        cls._refresh_env_var,
301                        "|".join(quiet),
302                        "|".join(warn),
303                        "|".join(error))
304                    raise ImportError(err)
305
306                # we get here if this was the init refresh and the refresh mode
307                # was not error, go ahead and set the GIT_PYTHON_GIT_EXECUTABLE
308                # such that we discern the difference between a first import
309                # and a second import
310                cls.GIT_PYTHON_GIT_EXECUTABLE = cls.git_exec_name
311            else:
312                # after the first refresh (when GIT_PYTHON_GIT_EXECUTABLE
313                # is no longer None) we raise an exception
314                raise GitCommandNotFound("git", err)
315
316        return has_git
317
318    @classmethod
319    def is_cygwin(cls):
320        return is_cygwin_git(cls.GIT_PYTHON_GIT_EXECUTABLE)
321
322    @classmethod
323    def polish_url(cls, url, is_cygwin=None):
324        if is_cygwin is None:
325            is_cygwin = cls.is_cygwin()
326
327        if is_cygwin:
328            url = cygpath(url)
329        else:
330            """Remove any backslahes from urls to be written in config files.
331
332            Windows might create config-files containing paths with backslashed,
333            but git stops liking them as it will escape the backslashes.
334            Hence we undo the escaping just to be sure.
335            """
336            url = url.replace("\\\\", "\\").replace("\\", "/")
337
338        return url
339
340    class AutoInterrupt(object):
341        """Kill/Interrupt the stored process instance once this instance goes out of scope. It is
342        used to prevent processes piling up in case iterators stop reading.
343        Besides all attributes are wired through to the contained process object.
344
345        The wait method was overridden to perform automatic status code checking
346        and possibly raise."""
347
348        __slots__ = ("proc", "args")
349
350        def __init__(self, proc, args):
351            self.proc = proc
352            self.args = args
353
354        def __del__(self):
355            if self.proc is None:
356                return
357
358            proc = self.proc
359            self.proc = None
360            if proc.stdin:
361                proc.stdin.close()
362            if proc.stdout:
363                proc.stdout.close()
364            if proc.stderr:
365                proc.stderr.close()
366
367            # did the process finish already so we have a return code ?
368            if proc.poll() is not None:
369                return
370
371            # can be that nothing really exists anymore ...
372            if os is None or getattr(os, 'kill', None) is None:
373                return
374
375            # try to kill it
376            try:
377                proc.terminate()
378                proc.wait()    # ensure process goes away
379            except OSError as ex:
380                log.info("Ignored error after process had died: %r", ex)
381                pass  # ignore error when process already died
382            except AttributeError:
383                # try windows
384                # for some reason, providing None for stdout/stderr still prints something. This is why
385                # we simply use the shell and redirect to nul. Its slower than CreateProcess, question
386                # is whether we really want to see all these messages. Its annoying no matter what.
387                if is_win:
388                    call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(proc.pid)), shell=True)
389            # END exception handling
390
391        def __getattr__(self, attr):
392            return getattr(self.proc, attr)
393
394        def wait(self, stderr=b''):  # TODO: Bad choice to mimic `proc.wait()` but with different args.
395            """Wait for the process and return its status code.
396
397            :param stderr: Previously read value of stderr, in case stderr is already closed.
398            :warn: may deadlock if output or error pipes are used and not handled separately.
399            :raise GitCommandError: if the return status is not 0"""
400            if stderr is None:
401                stderr = b''
402            stderr = force_bytes(stderr)
403
404            status = self.proc.wait()
405
406            def read_all_from_possibly_closed_stream(stream):
407                try:
408                    return stderr + force_bytes(stream.read())
409                except ValueError:
410                    return stderr or b''
411
412            if status != 0:
413                errstr = read_all_from_possibly_closed_stream(self.proc.stderr)
414                log.debug('AutoInterrupt wait stderr: %r' % (errstr,))
415                raise GitCommandError(self.args, status, errstr)
416            # END status handling
417            return status
418    # END auto interrupt
419
420    class CatFileContentStream(object):
421
422        """Object representing a sized read-only stream returning the contents of
423        an object.
424        It behaves like a stream, but counts the data read and simulates an empty
425        stream once our sized content region is empty.
426        If not all data is read to the end of the objects's lifetime, we read the
427        rest to assure the underlying stream continues to work"""
428
429        __slots__ = ('_stream', '_nbr', '_size')
430
431        def __init__(self, size, stream):
432            self._stream = stream
433            self._size = size
434            self._nbr = 0           # num bytes read
435
436            # special case: if the object is empty, has null bytes, get the
437            # final newline right away.
438            if size == 0:
439                stream.read(1)
440            # END handle empty streams
441
442        def read(self, size=-1):
443            bytes_left = self._size - self._nbr
444            if bytes_left == 0:
445                return b''
446            if size > -1:
447                # assure we don't try to read past our limit
448                size = min(bytes_left, size)
449            else:
450                # they try to read all, make sure its not more than what remains
451                size = bytes_left
452            # END check early depletion
453            data = self._stream.read(size)
454            self._nbr += len(data)
455
456            # check for depletion, read our final byte to make the stream usable by others
457            if self._size - self._nbr == 0:
458                self._stream.read(1)    # final newline
459            # END finish reading
460            return data
461
462        def readline(self, size=-1):
463            if self._nbr == self._size:
464                return b''
465
466            # clamp size to lowest allowed value
467            bytes_left = self._size - self._nbr
468            if size > -1:
469                size = min(bytes_left, size)
470            else:
471                size = bytes_left
472            # END handle size
473
474            data = self._stream.readline(size)
475            self._nbr += len(data)
476
477            # handle final byte
478            if self._size - self._nbr == 0:
479                self._stream.read(1)
480            # END finish reading
481
482            return data
483
484        def readlines(self, size=-1):
485            if self._nbr == self._size:
486                return []
487
488            # leave all additional logic to our readline method, we just check the size
489            out = []
490            nbr = 0
491            while True:
492                line = self.readline()
493                if not line:
494                    break
495                out.append(line)
496                if size > -1:
497                    nbr += len(line)
498                    if nbr > size:
499                        break
500                # END handle size constraint
501            # END readline loop
502            return out
503
504        def __iter__(self):
505            return self
506
507        def next(self):
508            line = self.readline()
509            if not line:
510                raise StopIteration
511
512            return line
513
514        def __del__(self):
515            bytes_left = self._size - self._nbr
516            if bytes_left:
517                # read and discard - seeking is impossible within a stream
518                # includes terminating newline
519                self._stream.read(bytes_left + 1)
520            # END handle incomplete read
521
522    def __init__(self, working_dir=None):
523        """Initialize this instance with:
524
525        :param working_dir:
526           Git directory we should work in. If None, we always work in the current
527           directory as returned by os.getcwd().
528           It is meant to be the working tree directory if available, or the
529           .git directory in case of bare repositories."""
530        super(Git, self).__init__()
531        self._working_dir = expand_path(working_dir)
532        self._git_options = ()
533        self._persistent_git_options = []
534
535        # Extra environment variables to pass to git commands
536        self._environment = {}
537
538        # cached command slots
539        self.cat_file_header = None
540        self.cat_file_all = None
541
542    def __getattr__(self, name):
543        """A convenience method as it allows to call the command as if it was
544        an object.
545        :return: Callable object that will execute call _call_process with your arguments."""
546        if name[0] == '_':
547            return LazyMixin.__getattr__(self, name)
548        return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
549
550    def set_persistent_git_options(self, **kwargs):
551        """Specify command line options to the git executable
552        for subsequent subcommand calls
553
554        :param kwargs:
555            is a dict of keyword arguments.
556            these arguments are passed as in _call_process
557            but will be passed to the git command rather than
558            the subcommand.
559        """
560
561        self._persistent_git_options = self.transform_kwargs(
562            split_single_char_options=True, **kwargs)
563
564    def _set_cache_(self, attr):
565        if attr == '_version_info':
566            # We only use the first 4 numbers, as everything else could be strings in fact (on windows)
567            version_numbers = self._call_process('version').split(' ')[2]
568            self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4] if n.isdigit())
569        else:
570            super(Git, self)._set_cache_(attr)
571        # END handle version info
572
573    @property
574    def working_dir(self):
575        """:return: Git directory we are working on"""
576        return self._working_dir
577
578    @property
579    def version_info(self):
580        """
581        :return: tuple(int, int, int, int) tuple with integers representing the major, minor
582            and additional version numbers as parsed from git version.
583            This value is generated on demand and is cached"""
584        return self._version_info
585
586    def execute(self, command,
587                istream=None,
588                with_extended_output=False,
589                with_exceptions=True,
590                as_process=False,
591                output_stream=None,
592                stdout_as_string=True,
593                kill_after_timeout=None,
594                with_stdout=True,
595                universal_newlines=False,
596                shell=None,
597                env=None,
598                max_chunk_size=io.DEFAULT_BUFFER_SIZE,
599                **subprocess_kwargs
600                ):
601        """Handles executing the command on the shell and consumes and returns
602        the returned information (stdout)
603
604        :param command:
605            The command argument list to execute.
606            It should be a string, or a sequence of program arguments. The
607            program to execute is the first item in the args sequence or string.
608
609        :param istream:
610            Standard input filehandle passed to subprocess.Popen.
611
612        :param with_extended_output:
613            Whether to return a (status, stdout, stderr) tuple.
614
615        :param with_exceptions:
616            Whether to raise an exception when git returns a non-zero status.
617
618        :param as_process:
619            Whether to return the created process instance directly from which
620            streams can be read on demand. This will render with_extended_output and
621            with_exceptions ineffective - the caller will have
622            to deal with the details himself.
623            It is important to note that the process will be placed into an AutoInterrupt
624            wrapper that will interrupt the process once it goes out of scope. If you
625            use the command in iterators, you should pass the whole process instance
626            instead of a single stream.
627
628        :param output_stream:
629            If set to a file-like object, data produced by the git command will be
630            output to the given stream directly.
631            This feature only has any effect if as_process is False. Processes will
632            always be created with a pipe due to issues with subprocess.
633            This merely is a workaround as data will be copied from the
634            output pipe to the given output stream directly.
635            Judging from the implementation, you shouldn't use this flag !
636
637        :param stdout_as_string:
638            if False, the commands standard output will be bytes. Otherwise, it will be
639            decoded into a string using the default encoding (usually utf-8).
640            The latter can fail, if the output contains binary data.
641
642        :param env:
643            A dictionary of environment variables to be passed to `subprocess.Popen`.
644
645        :param max_chunk_size:
646            Maximum number of bytes in one chunk of data passed to the output_stream in
647            one invocation of write() method. If the given number is not positive then
648            the default value is used.
649
650        :param subprocess_kwargs:
651            Keyword arguments to be passed to subprocess.Popen. Please note that
652            some of the valid kwargs are already set by this method, the ones you
653            specify may not be the same ones.
654
655        :param with_stdout: If True, default True, we open stdout on the created process
656        :param universal_newlines:
657            if True, pipes will be opened as text, and lines are split at
658            all known line endings.
659        :param shell:
660            Whether to invoke commands through a shell (see `Popen(..., shell=True)`).
661            It overrides :attr:`USE_SHELL` if it is not `None`.
662        :param kill_after_timeout:
663            To specify a timeout in seconds for the git command, after which the process
664            should be killed. This will have no effect if as_process is set to True. It is
665            set to None by default and will let the process run until the timeout is
666            explicitly specified. This feature is not supported on Windows. It's also worth
667            noting that kill_after_timeout uses SIGKILL, which can have negative side
668            effects on a repository. For example, stale locks in case of git gc could
669            render the repository incapable of accepting changes until the lock is manually
670            removed.
671
672        :return:
673            * str(output) if extended_output = False (Default)
674            * tuple(int(status), str(stdout), str(stderr)) if extended_output = True
675
676            if output_stream is True, the stdout value will be your output stream:
677            * output_stream if extended_output = False
678            * tuple(int(status), output_stream, str(stderr)) if extended_output = True
679
680            Note git is executed with LC_MESSAGES="C" to ensure consistent
681            output regardless of system language.
682
683        :raise GitCommandError:
684
685        :note:
686           If you add additional keyword arguments to the signature of this method,
687           you must update the execute_kwargs tuple housed in this module."""
688        if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != 'full' or as_process):
689            log.info(' '.join(command))
690
691        # Allow the user to have the command executed in their working dir.
692        cwd = self._working_dir or os.getcwd()
693
694        # Start the process
695        inline_env = env
696        env = os.environ.copy()
697        # Attempt to force all output to plain ascii english, which is what some parsing code
698        # may expect.
699        # According to stackoverflow (http://goo.gl/l74GC8), we are setting LANGUAGE as well
700        # just to be sure.
701        env["LANGUAGE"] = "C"
702        env["LC_ALL"] = "C"
703        env.update(self._environment)
704        if inline_env is not None:
705            env.update(inline_env)
706
707        if is_win:
708            cmd_not_found_exception = OSError
709            if kill_after_timeout:
710                raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.')
711        else:
712            if sys.version_info[0] > 2:
713                cmd_not_found_exception = FileNotFoundError  # NOQA # exists, flake8 unknown @UndefinedVariable
714            else:
715                cmd_not_found_exception = OSError
716        # end handle
717
718        stdout_sink = (PIPE
719                       if with_stdout
720                       else getattr(subprocess, 'DEVNULL', None) or open(os.devnull, 'wb'))
721        log.debug("Popen(%s, cwd=%s, universal_newlines=%s, shell=%s)",
722                  command, cwd, universal_newlines, shell)
723        try:
724            proc = Popen(command,
725                         env=env,
726                         cwd=cwd,
727                         bufsize=-1,
728                         stdin=istream,
729                         stderr=PIPE,
730                         stdout=stdout_sink,
731                         shell=shell is not None and shell or self.USE_SHELL,
732                         close_fds=is_posix,  # unsupported on windows
733                         universal_newlines=universal_newlines,
734                         creationflags=PROC_CREATIONFLAGS,
735                         **subprocess_kwargs
736                         )
737        except cmd_not_found_exception as err:
738            raise GitCommandNotFound(command, err)
739
740        if as_process:
741            return self.AutoInterrupt(proc, command)
742
743        def _kill_process(pid):
744            """ Callback method to kill a process. """
745            p = Popen(['ps', '--ppid', str(pid)], stdout=PIPE,
746                      creationflags=PROC_CREATIONFLAGS)
747            child_pids = []
748            for line in p.stdout:
749                if len(line.split()) > 0:
750                    local_pid = (line.split())[0]
751                    if local_pid.isdigit():
752                        child_pids.append(int(local_pid))
753            try:
754                # Windows does not have SIGKILL, so use SIGTERM instead
755                sig = getattr(signal, 'SIGKILL', signal.SIGTERM)
756                os.kill(pid, sig)
757                for child_pid in child_pids:
758                    try:
759                        os.kill(child_pid, sig)
760                    except OSError:
761                        pass
762                kill_check.set()    # tell the main routine that the process was killed
763            except OSError:
764                # It is possible that the process gets completed in the duration after timeout
765                # happens and before we try to kill the process.
766                pass
767            return
768        # end
769
770        if kill_after_timeout:
771            kill_check = threading.Event()
772            watchdog = threading.Timer(kill_after_timeout, _kill_process, args=(proc.pid,))
773
774        # Wait for the process to return
775        status = 0
776        stdout_value = b''
777        stderr_value = b''
778        try:
779            if output_stream is None:
780                if kill_after_timeout:
781                    watchdog.start()
782                stdout_value, stderr_value = proc.communicate()
783                if kill_after_timeout:
784                    watchdog.cancel()
785                    if kill_check.isSet():
786                        stderr_value = ('Timeout: the command "%s" did not complete in %d '
787                                        'secs.' % (" ".join(command), kill_after_timeout)).encode(defenc)
788                # strip trailing "\n"
789                if stdout_value.endswith(b"\n"):
790                    stdout_value = stdout_value[:-1]
791                if stderr_value.endswith(b"\n"):
792                    stderr_value = stderr_value[:-1]
793                status = proc.returncode
794            else:
795                max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE
796                stream_copy(proc.stdout, output_stream, max_chunk_size)
797                stdout_value = output_stream
798                stderr_value = proc.stderr.read()
799                # strip trailing "\n"
800                if stderr_value.endswith(b"\n"):
801                    stderr_value = stderr_value[:-1]
802                status = proc.wait()
803            # END stdout handling
804        finally:
805            proc.stdout.close()
806            proc.stderr.close()
807
808        if self.GIT_PYTHON_TRACE == 'full':
809            cmdstr = " ".join(command)
810
811            def as_text(stdout_value):
812                return not output_stream and safe_decode(stdout_value) or '<OUTPUT_STREAM>'
813            # end
814
815            if stderr_value:
816                log.info("%s -> %d; stdout: '%s'; stderr: '%s'",
817                         cmdstr, status, as_text(stdout_value), safe_decode(stderr_value))
818            elif stdout_value:
819                log.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value))
820            else:
821                log.info("%s -> %d", cmdstr, status)
822        # END handle debug printing
823
824        if with_exceptions and status != 0:
825            raise GitCommandError(command, status, stderr_value, stdout_value)
826
827        if isinstance(stdout_value, bytes) and stdout_as_string:  # could also be output_stream
828            stdout_value = safe_decode(stdout_value)
829
830        # Allow access to the command's status code
831        if with_extended_output:
832            return (status, stdout_value, safe_decode(stderr_value))
833        else:
834            return stdout_value
835
836    def environment(self):
837        return self._environment
838
839    def update_environment(self, **kwargs):
840        """
841        Set environment variables for future git invocations. Return all changed
842        values in a format that can be passed back into this function to revert
843        the changes:
844
845        ``Examples``::
846
847            old_env = self.update_environment(PWD='/tmp')
848            self.update_environment(**old_env)
849
850        :param kwargs: environment variables to use for git processes
851        :return: dict that maps environment variables to their old values
852        """
853        old_env = {}
854        for key, value in kwargs.items():
855            # set value if it is None
856            if value is not None:
857                old_env[key] = self._environment.get(key)
858                self._environment[key] = value
859            # remove key from environment if its value is None
860            elif key in self._environment:
861                old_env[key] = self._environment[key]
862                del self._environment[key]
863        return old_env
864
865    @contextmanager
866    def custom_environment(self, **kwargs):
867        """
868        A context manager around the above ``update_environment`` method to restore the
869        environment back to its previous state after operation.
870
871        ``Examples``::
872
873            with self.custom_environment(GIT_SSH='/bin/ssh_wrapper'):
874                repo.remotes.origin.fetch()
875
876        :param kwargs: see update_environment
877        """
878        old_env = self.update_environment(**kwargs)
879        try:
880            yield
881        finally:
882            self.update_environment(**old_env)
883
884    def transform_kwarg(self, name, value, split_single_char_options):
885        if len(name) == 1:
886            if value is True:
887                return ["-%s" % name]
888            elif type(value) is not bool:
889                if split_single_char_options:
890                    return ["-%s" % name, "%s" % value]
891                else:
892                    return ["-%s%s" % (name, value)]
893        else:
894            if value is True:
895                return ["--%s" % dashify(name)]
896            elif type(value) is not bool:
897                return ["--%s=%s" % (dashify(name), value)]
898        return []
899
900    def transform_kwargs(self, split_single_char_options=True, **kwargs):
901        """Transforms Python style kwargs into git command line options."""
902        args = []
903        kwargs = OrderedDict(sorted(kwargs.items(), key=lambda x: x[0]))
904        for k, v in kwargs.items():
905            if isinstance(v, (list, tuple)):
906                for value in v:
907                    args += self.transform_kwarg(k, value, split_single_char_options)
908            else:
909                args += self.transform_kwarg(k, v, split_single_char_options)
910        return args
911
912    @classmethod
913    def __unpack_args(cls, arg_list):
914        if not isinstance(arg_list, (list, tuple)):
915            # This is just required for unicode conversion, as subprocess can't handle it
916            # However, in any other case, passing strings (usually utf-8 encoded) is totally fine
917            if not PY3 and isinstance(arg_list, unicode):
918                return [arg_list.encode(defenc)]
919            return [str(arg_list)]
920
921        outlist = []
922        for arg in arg_list:
923            if isinstance(arg_list, (list, tuple)):
924                outlist.extend(cls.__unpack_args(arg))
925            elif not PY3 and isinstance(arg_list, unicode):
926                outlist.append(arg_list.encode(defenc))
927            # END recursion
928            else:
929                outlist.append(str(arg))
930        # END for each arg
931        return outlist
932
933    def __call__(self, **kwargs):
934        """Specify command line options to the git executable
935        for a subcommand call
936
937        :param kwargs:
938            is a dict of keyword arguments.
939            these arguments are passed as in _call_process
940            but will be passed to the git command rather than
941            the subcommand.
942
943        ``Examples``::
944            git(work_tree='/tmp').difftool()"""
945        self._git_options = self.transform_kwargs(
946            split_single_char_options=True, **kwargs)
947        return self
948
949    def _call_process(self, method, *args, **kwargs):
950        """Run the given git command with the specified arguments and return
951        the result as a String
952
953        :param method:
954            is the command. Contained "_" characters will be converted to dashes,
955            such as in 'ls_files' to call 'ls-files'.
956
957        :param args:
958            is the list of arguments. If None is included, it will be pruned.
959            This allows your commands to call git more conveniently as None
960            is realized as non-existent
961
962        :param kwargs:
963            It contains key-values for the following:
964            - the :meth:`execute()` kwds, as listed in :var:`execute_kwargs`;
965            - "command options" to be converted by :meth:`transform_kwargs()`;
966            - the `'insert_kwargs_after'` key which its value must match one of ``*args``,
967              and any cmd-options will be appended after the matched arg.
968
969        Examples::
970
971            git.rev_list('master', max_count=10, header=True)
972
973        turns into::
974
975           git rev-list max-count 10 --header master
976
977        :return: Same as ``execute``"""
978        # Handle optional arguments prior to calling transform_kwargs
979        # otherwise these'll end up in args, which is bad.
980        exec_kwargs = {k: v for k, v in kwargs.items() if k in execute_kwargs}
981        opts_kwargs = {k: v for k, v in kwargs.items() if k not in execute_kwargs}
982
983        insert_after_this_arg = opts_kwargs.pop('insert_kwargs_after', None)
984
985        # Prepare the argument list
986        opt_args = self.transform_kwargs(**opts_kwargs)
987        ext_args = self.__unpack_args([a for a in args if a is not None])
988
989        if insert_after_this_arg is None:
990            args = opt_args + ext_args
991        else:
992            try:
993                index = ext_args.index(insert_after_this_arg)
994            except ValueError:
995                raise ValueError("Couldn't find argument '%s' in args %s to insert cmd options after"
996                                 % (insert_after_this_arg, str(ext_args)))
997            # end handle error
998            args = ext_args[:index + 1] + opt_args + ext_args[index + 1:]
999        # end handle opts_kwargs
1000
1001        call = [self.GIT_PYTHON_GIT_EXECUTABLE]
1002
1003        # add persistent git options
1004        call.extend(self._persistent_git_options)
1005
1006        # add the git options, then reset to empty
1007        # to avoid side_effects
1008        call.extend(self._git_options)
1009        self._git_options = ()
1010
1011        call.append(dashify(method))
1012        call.extend(args)
1013
1014        return self.execute(call, **exec_kwargs)
1015
1016    def _parse_object_header(self, header_line):
1017        """
1018        :param header_line:
1019            <hex_sha> type_string size_as_int
1020
1021        :return: (hex_sha, type_string, size_as_int)
1022
1023        :raise ValueError: if the header contains indication for an error due to
1024            incorrect input sha"""
1025        tokens = header_line.split()
1026        if len(tokens) != 3:
1027            if not tokens:
1028                raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip()))
1029            else:
1030                raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()))
1031            # END handle actual return value
1032        # END error handling
1033
1034        if len(tokens[0]) != 40:
1035            raise ValueError("Failed to parse header: %r" % header_line)
1036        return (tokens[0], tokens[1], int(tokens[2]))
1037
1038    def _prepare_ref(self, ref):
1039        # required for command to separate refs on stdin, as bytes
1040        refstr = ref
1041        if isinstance(ref, bytes):
1042            # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text
1043            refstr = ref.decode('ascii')
1044        elif not isinstance(ref, string_types):
1045            refstr = str(ref)               # could be ref-object
1046
1047        if not refstr.endswith("\n"):
1048            refstr += "\n"
1049        return refstr.encode(defenc)
1050
1051    def _get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs):
1052        cur_val = getattr(self, attr_name)
1053        if cur_val is not None:
1054            return cur_val
1055
1056        options = {"istream": PIPE, "as_process": True}
1057        options.update(kwargs)
1058
1059        cmd = self._call_process(cmd_name, *args, **options)
1060        setattr(self, attr_name, cmd)
1061        return cmd
1062
1063    def __get_object_header(self, cmd, ref):
1064        cmd.stdin.write(self._prepare_ref(ref))
1065        cmd.stdin.flush()
1066        return self._parse_object_header(cmd.stdout.readline())
1067
1068    def get_object_header(self, ref):
1069        """ Use this method to quickly examine the type and size of the object behind
1070        the given ref.
1071
1072        :note: The method will only suffer from the costs of command invocation
1073            once and reuses the command in subsequent calls.
1074
1075        :return: (hexsha, type_string, size_as_int)"""
1076        cmd = self._get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
1077        return self.__get_object_header(cmd, ref)
1078
1079    def get_object_data(self, ref):
1080        """ As get_object_header, but returns object data as well
1081        :return: (hexsha, type_string, size_as_int,data_string)
1082        :note: not threadsafe"""
1083        hexsha, typename, size, stream = self.stream_object_data(ref)
1084        data = stream.read(size)
1085        del(stream)
1086        return (hexsha, typename, size, data)
1087
1088    def stream_object_data(self, ref):
1089        """ As get_object_header, but returns the data as a stream
1090
1091        :return: (hexsha, type_string, size_as_int, stream)
1092        :note: This method is not threadsafe, you need one independent Command instance per thread to be safe !"""
1093        cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True)
1094        hexsha, typename, size = self.__get_object_header(cmd, ref)
1095        return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
1096
1097    def clear_cache(self):
1098        """Clear all kinds of internal caches to release resources.
1099
1100        Currently persistent commands will be interrupted.
1101
1102        :return: self"""
1103        for cmd in (self.cat_file_all, self.cat_file_header):
1104            if cmd:
1105                cmd.__del__()
1106
1107        self.cat_file_all = None
1108        self.cat_file_header = None
1109        return self
1110