1"""
2Classes which provide the shared base for GitFS, git_pillar, and winrepo
3"""
4
5
6import contextlib
7import copy
8import errno
9import fnmatch
10import glob
11import hashlib
12import io
13import logging
14import os
15import shlex
16import shutil
17import stat
18import subprocess
19import time
20import weakref
21from datetime import datetime
22
23import salt.ext.tornado.ioloop
24import salt.fileserver
25import salt.utils.configparser
26import salt.utils.data
27import salt.utils.files
28import salt.utils.gzip_util
29import salt.utils.hashutils
30import salt.utils.itertools
31import salt.utils.path
32import salt.utils.platform
33import salt.utils.stringutils
34import salt.utils.url
35import salt.utils.user
36import salt.utils.versions
37from salt.config import DEFAULT_MASTER_OPTS as _DEFAULT_MASTER_OPTS
38from salt.exceptions import FileserverConfigError, GitLockError, get_error_message
39from salt.utils.event import tagify
40from salt.utils.odict import OrderedDict
41from salt.utils.process import os_is_running as pid_exists
42from salt.utils.versions import LooseVersion as _LooseVersion
43
44VALID_REF_TYPES = _DEFAULT_MASTER_OPTS["gitfs_ref_types"]
45
46# Optional per-remote params that can only be used on a per-remote basis, and
47# thus do not have defaults in salt/config.py.
48PER_REMOTE_ONLY = ("name",)
49# Params which are global only and cannot be overridden for a single remote.
50GLOBAL_ONLY = ()
51
52SYMLINK_RECURSE_DEPTH = 100
53
54# Auth support (auth params can be global or per-remote, too)
55AUTH_PROVIDERS = ("pygit2",)
56AUTH_PARAMS = ("user", "password", "pubkey", "privkey", "passphrase", "insecure_auth")
57
58# GitFS only: params which can be overridden for a single saltenv. Aside from
59# 'ref', this must be a subset of the per-remote params passed to the
60# constructor for the GitProvider subclasses.
61PER_SALTENV_PARAMS = ("mountpoint", "root", "ref")
62
63_RECOMMEND_GITPYTHON = (
64    "GitPython is installed, you may wish to set %s_provider to "
65    "'gitpython' to use GitPython for %s support."
66)
67
68_RECOMMEND_PYGIT2 = (
69    "pygit2 is installed, you may wish to set %s_provider to "
70    "'pygit2' to use pygit2 for for %s support."
71)
72
73_INVALID_REPO = (
74    "Cache path %s (corresponding remote: %s) exists but is not a valid "
75    "git repository. You will need to manually delete this directory on the "
76    "master to continue to use this %s remote."
77)
78
79log = logging.getLogger(__name__)
80
81# pylint: disable=import-error
82try:
83    if (
84        salt.utils.platform.is_darwin()
85        and salt.utils.path.which("git") == "/usr/bin/git"
86    ):
87        # On a freshly installed macOS, if we proceed a GUI dialog box
88        # will be opened. Instead, we can see if it's safe to check
89        # first. If git is a stub, git is _not_ present.
90        from salt.utils.mac_utils import git_is_stub
91
92        if git_is_stub():
93            raise ImportError("Git is not present.")
94
95    import git
96    import gitdb
97
98    GITPYTHON_VERSION = _LooseVersion(git.__version__)
99except Exception:  # pylint: disable=broad-except
100    GITPYTHON_VERSION = None
101
102try:
103    # Squelch warning on cent7 due to them upgrading cffi
104    import warnings
105
106    with warnings.catch_warnings():
107        warnings.simplefilter("ignore")
108        import pygit2
109    PYGIT2_VERSION = _LooseVersion(pygit2.__version__)
110    LIBGIT2_VERSION = _LooseVersion(pygit2.LIBGIT2_VERSION)
111
112    # Work around upstream bug where bytestrings were being decoded using the
113    # default encoding (which is usually ascii on Python 2). This was fixed
114    # on 2 Feb 2018, so releases prior to 0.26.3 will need a workaround.
115    if PYGIT2_VERSION <= _LooseVersion("0.26.3"):
116        try:
117            import pygit2.ffi
118            import pygit2.remote
119        except ImportError:
120            # If we couldn't import these, then we're using an old enough
121            # version where ffi isn't in use and this workaround would be
122            # useless.
123            pass
124        else:
125
126            def __maybe_string(ptr):
127                if not ptr:
128                    return None
129                return pygit2.ffi.string(ptr).decode("utf-8")
130
131            pygit2.remote.maybe_string = __maybe_string
132
133    # Older pygit2 releases did not raise a specific exception class, this
134    # try/except makes Salt's exception catching work on any supported release.
135    try:
136        GitError = pygit2.errors.GitError
137    except AttributeError:
138        GitError = Exception
139except Exception as exc:  # pylint: disable=broad-except
140    # Exceptions other than ImportError can be raised in cases where there is a
141    # problem with cffi (such as when python-cffi is upgraded and pygit2 tries
142    # to rebuild itself against the newer cffi). Therefore, we simply will
143    # catch a generic exception, and log the exception if it is anything other
144    # than an ImportError.
145    PYGIT2_VERSION = None
146    LIBGIT2_VERSION = None
147    if not isinstance(exc, ImportError):
148        log.exception("Failed to import pygit2")
149
150# pylint: enable=import-error
151
152# Minimum versions for backend providers
153GITPYTHON_MINVER = _LooseVersion("0.3")
154PYGIT2_MINVER = _LooseVersion("0.20.3")
155LIBGIT2_MINVER = _LooseVersion("0.20.0")
156
157
158def enforce_types(key, val):
159    """
160    Force params to be strings unless they should remain a different type
161    """
162    non_string_params = {
163        "ssl_verify": bool,
164        "insecure_auth": bool,
165        "disable_saltenv_mapping": bool,
166        "saltenv_whitelist": "stringlist",
167        "saltenv_blacklist": "stringlist",
168        "refspecs": "stringlist",
169        "ref_types": "stringlist",
170        "update_interval": int,
171    }
172
173    def _find_global(key):
174        for item in non_string_params:
175            try:
176                if key.endswith("_" + item):
177                    ret = item
178                    break
179            except TypeError:
180                if key.endswith("_" + str(item)):
181                    ret = item
182                    break
183        else:
184            ret = None
185        return ret
186
187    if key not in non_string_params:
188        key = _find_global(key)
189        if key is None:
190            return str(val)
191
192    expected = non_string_params[key]
193    if expected == "stringlist":
194        if not isinstance(val, ((str,), list)):
195            val = str(val)
196        if isinstance(val, str):
197            return [x.strip() for x in val.split(",")]
198        return [str(x) for x in val]
199    else:
200        try:
201            return expected(val)
202        except Exception as exc:  # pylint: disable=broad-except
203            log.error(
204                "Failed to enforce type for key=%s with val=%s, falling back "
205                "to a string",
206                key,
207                val,
208            )
209            return str(val)
210
211
212def failhard(role):
213    """
214    Fatal configuration issue, raise an exception
215    """
216    raise FileserverConfigError("Failed to load {}".format(role))
217
218
219class GitProvider:
220    """
221    Base class for gitfs/git_pillar provider classes. Should never be used
222    directly.
223
224    self.provider should be set in the sub-class' __init__ function before
225    invoking the parent class' __init__.
226    """
227
228    def __init__(
229        self,
230        opts,
231        remote,
232        per_remote_defaults,
233        per_remote_only,
234        override_params,
235        cache_root,
236        role="gitfs",
237    ):
238        self.opts = opts
239        self.role = role
240        self.global_saltenv = salt.utils.data.repack_dictlist(
241            self.opts.get("{}_saltenv".format(self.role), []),
242            strict=True,
243            recurse=True,
244            key_cb=str,
245            val_cb=lambda x, y: str(y),
246        )
247        self.conf = copy.deepcopy(per_remote_defaults)
248
249        # Remove the 'salt://' from the beginning of any globally-defined
250        # per-saltenv mountpoints
251        for saltenv, saltenv_conf in self.global_saltenv.items():
252            if "mountpoint" in saltenv_conf:
253                self.global_saltenv[saltenv]["mountpoint"] = salt.utils.url.strip_proto(
254                    self.global_saltenv[saltenv]["mountpoint"]
255                )
256
257        per_remote_collisions = [x for x in override_params if x in per_remote_only]
258        if per_remote_collisions:
259            log.critical(
260                "The following parameter names are restricted to per-remote "
261                "use only: %s. This is a bug, please report it.",
262                ", ".join(per_remote_collisions),
263            )
264
265        try:
266            valid_per_remote_params = override_params + per_remote_only
267        except TypeError:
268            valid_per_remote_params = list(override_params) + list(per_remote_only)
269
270        if isinstance(remote, dict):
271            self.id = next(iter(remote))
272            self.get_url()
273
274            per_remote_conf = salt.utils.data.repack_dictlist(
275                remote[self.id],
276                strict=True,
277                recurse=True,
278                key_cb=str,
279                val_cb=enforce_types,
280            )
281
282            if not per_remote_conf:
283                log.critical(
284                    "Invalid per-remote configuration for %s remote '%s'. "
285                    "If no per-remote parameters are being specified, there "
286                    "may be a trailing colon after the URL, which should be "
287                    "removed. Check the master configuration file.",
288                    self.role,
289                    self.id,
290                )
291                failhard(self.role)
292
293            if (
294                self.role == "git_pillar"
295                and self.branch != "__env__"
296                and "base" in per_remote_conf
297            ):
298                log.critical(
299                    "Invalid per-remote configuration for %s remote '%s'. base can only"
300                    " be specified if __env__ is specified as the branch name.",
301                    self.role,
302                    self.id,
303                )
304                failhard(self.role)
305
306            per_remote_errors = False
307            for param in (
308                x for x in per_remote_conf if x not in valid_per_remote_params
309            ):
310                per_remote_errors = True
311                if param in AUTH_PARAMS and self.provider not in AUTH_PROVIDERS:
312                    msg = (
313                        "{0} authentication parameter '{1}' (from remote "
314                        "'{2}') is only supported by the following "
315                        "provider(s): {3}. Current {0}_provider is '{4}'.".format(
316                            self.role,
317                            param,
318                            self.id,
319                            ", ".join(AUTH_PROVIDERS),
320                            self.provider,
321                        )
322                    )
323                    if self.role == "gitfs":
324                        msg += (
325                            "See the GitFS Walkthrough in the Salt "
326                            "documentation for further information."
327                        )
328                    log.critical(msg)
329                else:
330                    msg = (
331                        "Invalid {} configuration parameter '{}' in "
332                        "remote '{}'. Valid parameters are: {}.".format(
333                            self.role,
334                            param,
335                            self.id,
336                            ", ".join(valid_per_remote_params),
337                        )
338                    )
339                    if self.role == "gitfs":
340                        msg += (
341                            " See the GitFS Walkthrough in the Salt "
342                            "documentation for further information."
343                        )
344                    log.critical(msg)
345
346            if per_remote_errors:
347                failhard(self.role)
348
349            self.conf.update(per_remote_conf)
350        else:
351            self.id = remote
352            self.get_url()
353
354        # Winrepo doesn't support the 'root' option, but it still must be part
355        # of the GitProvider object because other code depends on it. Add it as
356        # an empty string.
357        if "root" not in self.conf:
358            self.conf["root"] = ""
359
360        if self.role == "winrepo" and "name" not in self.conf:
361            # Ensure that winrepo has the 'name' parameter set if it wasn't
362            # provided. Default to the last part of the URL, minus the .git if
363            # it is present.
364            self.conf["name"] = self.url.rsplit("/", 1)[-1]
365            # Remove trailing .git from name
366            if self.conf["name"].lower().endswith(".git"):
367                self.conf["name"] = self.conf["name"][:-4]
368
369        if "mountpoint" in self.conf:
370            # Remove the 'salt://' from the beginning of the mountpoint, as
371            # well as any additional leading/trailing slashes
372            self.conf["mountpoint"] = salt.utils.url.strip_proto(
373                self.conf["mountpoint"]
374            ).strip("/")
375        else:
376            # For providers which do not use a mountpoint, assume the
377            # filesystem is mounted at the root of the fileserver.
378            self.conf["mountpoint"] = ""
379
380        if "saltenv" not in self.conf:
381            self.conf["saltenv"] = {}
382        else:
383            for saltenv, saltenv_conf in self.conf["saltenv"].items():
384                if "mountpoint" in saltenv_conf:
385                    saltenv_ptr = self.conf["saltenv"][saltenv]
386                    saltenv_ptr["mountpoint"] = salt.utils.url.strip_proto(
387                        saltenv_ptr["mountpoint"]
388                    )
389
390        for key, val in self.conf.items():
391            if key not in PER_SALTENV_PARAMS and not hasattr(self, key):
392                setattr(self, key, val)
393
394        for key in PER_SALTENV_PARAMS:
395            if key != "ref":
396                setattr(self, "_" + key, self.conf[key])
397            self.add_conf_overlay(key)
398
399        if not hasattr(self, "refspecs"):
400            # This was not specified as a per-remote overrideable parameter
401            # when instantiating an instance of a GitBase subclass. Make sure
402            # that we set this attribute so we at least have a sane default and
403            # are able to fetch.
404            key = "{}_refspecs".format(self.role)
405            try:
406                default_refspecs = _DEFAULT_MASTER_OPTS[key]
407            except KeyError:
408                log.critical(
409                    "The '%s' option has no default value in salt/config/__init__.py.",
410                    key,
411                )
412                failhard(self.role)
413
414            setattr(self, "refspecs", default_refspecs)
415            log.debug(
416                "The 'refspecs' option was not explicitly defined as a "
417                "configurable parameter. Falling back to %s for %s remote "
418                "'%s'.",
419                default_refspecs,
420                self.role,
421                self.id,
422            )
423
424        # Discard the conf dictionary since we have set all of the config
425        # params as attributes
426        delattr(self, "conf")
427
428        # Normalize components of the ref_types configuration and check for
429        # invalid configuration.
430        if hasattr(self, "ref_types"):
431            self.ref_types = [x.lower() for x in self.ref_types]
432            invalid_ref_types = [x for x in self.ref_types if x not in VALID_REF_TYPES]
433            if invalid_ref_types:
434                log.critical(
435                    "The following ref_types for %s remote '%s' are "
436                    "invalid: %s. The supported values are: %s",
437                    self.role,
438                    self.id,
439                    ", ".join(invalid_ref_types),
440                    ", ".join(VALID_REF_TYPES),
441                )
442                failhard(self.role)
443
444        if not isinstance(self.url, str):
445            log.critical(
446                "Invalid %s remote '%s'. Remotes must be strings, you "
447                "may need to enclose the URL in quotes",
448                self.role,
449                self.id,
450            )
451            failhard(self.role)
452
453        hash_type = getattr(hashlib, self.opts.get("hash_type", "md5"))
454        # We loaded this data from yaml configuration files, so, its safe
455        # to use UTF-8
456        self.hash = hash_type(self.id.encode("utf-8")).hexdigest()
457        self.cachedir_basename = getattr(self, "name", self.hash)
458        self.cachedir = salt.utils.path.join(cache_root, self.cachedir_basename)
459        self.linkdir = salt.utils.path.join(cache_root, "links", self.cachedir_basename)
460
461        if not os.path.isdir(self.cachedir):
462            os.makedirs(self.cachedir)
463
464        try:
465            self.new = self.init_remote()
466        except Exception as exc:  # pylint: disable=broad-except
467            msg = "Exception caught while initializing {} remote '{}': {}".format(
468                self.role, self.id, exc
469            )
470            if isinstance(self, GitPython):
471                msg += " Perhaps git is not available."
472            log.critical(msg, exc_info=True)
473            failhard(self.role)
474
475    def _get_envs_from_ref_paths(self, refs):
476        """
477        Return the names of remote refs (stripped of the remote name) and tags
478        which are map to the branches and tags.
479        """
480
481        def _check_ref(env_set, rname):
482            """
483            Add the appropriate saltenv(s) to the set
484            """
485            if rname in self.saltenv_revmap:
486                env_set.update(self.saltenv_revmap[rname])
487            else:
488                if rname == self.base:
489                    env_set.add("base")
490                elif not self.disable_saltenv_mapping:
491                    env_set.add(rname)
492
493        use_branches = "branch" in self.ref_types
494        use_tags = "tag" in self.ref_types
495
496        ret = set()
497        if salt.utils.stringutils.is_hex(self.base):
498            # gitfs_base or per-saltenv 'base' may point to a commit ID, which
499            # would not show up in the refs. Make sure we include it.
500            ret.add("base")
501        for ref in salt.utils.data.decode(refs):
502            if ref.startswith("refs/"):
503                ref = ref[5:]
504            rtype, rname = ref.split("/", 1)
505            if rtype == "remotes" and use_branches:
506                parted = rname.partition("/")
507                rname = parted[2] if parted[2] else parted[0]
508                _check_ref(ret, rname)
509            elif rtype == "tags" and use_tags:
510                _check_ref(ret, rname)
511
512        return ret
513
514    def _get_lock_file(self, lock_type="update"):
515        return salt.utils.path.join(self.gitdir, lock_type + ".lk")
516
517    @classmethod
518    def add_conf_overlay(cls, name):
519        """
520        Programmatically determine config value based on the desired saltenv
521        """
522
523        def _getconf(self, tgt_env="base"):
524            strip_sep = (
525                lambda x: x.rstrip(os.sep) if name in ("root", "mountpoint") else x
526            )
527            if self.role != "gitfs":
528                return strip_sep(getattr(self, "_" + name))
529            # Get saltenv-specific configuration
530            saltenv_conf = self.saltenv.get(tgt_env, {})
531            if name == "ref":
532
533                def _get_per_saltenv(tgt_env):
534                    if name in saltenv_conf:
535                        return saltenv_conf[name]
536                    elif (
537                        tgt_env in self.global_saltenv
538                        and name in self.global_saltenv[tgt_env]
539                    ):
540                        return self.global_saltenv[tgt_env][name]
541                    else:
542                        return None
543
544                # Return the all_saltenvs branch/tag if it is configured
545                per_saltenv_ref = _get_per_saltenv(tgt_env)
546                try:
547                    all_saltenvs_ref = self.all_saltenvs
548                    if per_saltenv_ref and all_saltenvs_ref != per_saltenv_ref:
549                        log.debug(
550                            "The per-saltenv configuration has mapped the "
551                            "'%s' branch/tag to saltenv '%s' for %s "
552                            "remote '%s', but this remote has "
553                            "all_saltenvs set to '%s'. The per-saltenv "
554                            "mapping will be ignored in favor of '%s'.",
555                            per_saltenv_ref,
556                            tgt_env,
557                            self.role,
558                            self.id,
559                            all_saltenvs_ref,
560                            all_saltenvs_ref,
561                        )
562                    return all_saltenvs_ref
563                except AttributeError:
564                    # all_saltenvs not configured for this remote
565                    pass
566
567                if tgt_env == "base":
568                    return self.base
569                elif self.disable_saltenv_mapping:
570                    if per_saltenv_ref is None:
571                        log.debug(
572                            "saltenv mapping is disabled for %s remote '%s' "
573                            "and saltenv '%s' is not explicitly mapped",
574                            self.role,
575                            self.id,
576                            tgt_env,
577                        )
578                    return per_saltenv_ref
579                else:
580                    return per_saltenv_ref or tgt_env
581
582            if name in saltenv_conf:
583                return strip_sep(saltenv_conf[name])
584            elif (
585                tgt_env in self.global_saltenv and name in self.global_saltenv[tgt_env]
586            ):
587                return strip_sep(self.global_saltenv[tgt_env][name])
588            else:
589                return strip_sep(getattr(self, "_" + name))
590
591        setattr(cls, name, _getconf)
592
593    def check_root(self):
594        """
595        Check if the relative root path exists in the checked-out copy of the
596        remote. Return the full path to that relative root if it does exist,
597        otherwise return None.
598        """
599        # No need to pass an environment to self.root() here since per-saltenv
600        # configuration is a gitfs-only feature and check_root() is not used
601        # for gitfs.
602        root_dir = salt.utils.path.join(self.cachedir, self.root()).rstrip(os.sep)
603        if os.path.isdir(root_dir):
604            return root_dir
605        log.error(
606            "Root path '%s' not present in %s remote '%s', skipping.",
607            self.root(),
608            self.role,
609            self.id,
610        )
611        return None
612
613    def clean_stale_refs(self):
614        """
615        Remove stale refs so that they are no longer seen as fileserver envs
616        """
617        cleaned = []
618        cmd_str = "git remote prune origin"
619
620        # Attempt to force all output to plain ascii english, which is what some parsing code
621        # may expect.
622        # According to stackoverflow (http://goo.gl/l74GC8), we are setting LANGUAGE as well
623        # just to be sure.
624        env = os.environ.copy()
625        if not salt.utils.platform.is_windows():
626            env[b"LANGUAGE"] = b"C"
627            env[b"LC_ALL"] = b"C"
628
629        cmd = subprocess.Popen(
630            shlex.split(cmd_str),
631            close_fds=not salt.utils.platform.is_windows(),
632            cwd=os.path.dirname(self.gitdir),
633            env=env,
634            stdout=subprocess.PIPE,
635            stderr=subprocess.STDOUT,
636        )
637        output = cmd.communicate()[0]
638        output = output.decode(__salt_system_encoding__)
639        if cmd.returncode != 0:
640            log.warning(
641                "Failed to prune stale branches for %s remote '%s'. "
642                "Output from '%s' follows:\n%s",
643                self.role,
644                self.id,
645                cmd_str,
646                output,
647            )
648        else:
649            marker = " * [pruned] "
650            for line in salt.utils.itertools.split(output, "\n"):
651                if line.startswith(marker):
652                    cleaned.append(line[len(marker) :].strip())
653            if cleaned:
654                log.debug(
655                    "%s pruned the following stale refs: %s",
656                    self.role,
657                    ", ".join(cleaned),
658                )
659        return cleaned
660
661    def clear_lock(self, lock_type="update"):
662        """
663        Clear update.lk
664        """
665        lock_file = self._get_lock_file(lock_type=lock_type)
666
667        def _add_error(errlist, exc):
668            msg = "Unable to remove update lock for {} ({}): {} ".format(
669                self.url, lock_file, exc
670            )
671            log.debug(msg)
672            errlist.append(msg)
673
674        success = []
675        failed = []
676
677        try:
678            os.remove(lock_file)
679        except OSError as exc:
680            if exc.errno == errno.ENOENT:
681                # No lock file present
682                pass
683            elif exc.errno == errno.EISDIR:
684                # Somehow this path is a directory. Should never happen
685                # unless some wiseguy manually creates a directory at this
686                # path, but just in case, handle it.
687                try:
688                    shutil.rmtree(lock_file)
689                except OSError as exc:
690                    _add_error(failed, exc)
691            else:
692                _add_error(failed, exc)
693        else:
694            msg = "Removed {} lock for {} remote '{}'".format(
695                lock_type, self.role, self.id
696            )
697            log.debug(msg)
698            success.append(msg)
699        return success, failed
700
701    def enforce_git_config(self):
702        """
703        For the config options which need to be maintained in the git config,
704        ensure that the git config file is configured as desired.
705        """
706        git_config = os.path.join(self.gitdir, "config")
707        conf = salt.utils.configparser.GitConfigParser()
708        if not conf.read(git_config):
709            log.error("Failed to read from git config file %s", git_config)
710        else:
711            # We are currently enforcing the following git config items:
712            # 1. Fetch URL
713            # 2. refspecs used in fetch
714            # 3. http.sslVerify
715            conf_changed = False
716            remote_section = 'remote "origin"'
717
718            # 1. URL
719            try:
720                url = conf.get(remote_section, "url")
721            except salt.utils.configparser.NoSectionError:
722                # First time we've init'ed this repo, we need to add the
723                # section for the remote to the git config
724                conf.add_section(remote_section)
725                conf_changed = True
726                url = None
727            log.debug(
728                "Current fetch URL for %s remote '%s': %s (desired: %s)",
729                self.role,
730                self.id,
731                url,
732                self.url,
733            )
734            if url != self.url:
735                conf.set(remote_section, "url", self.url)
736                log.debug(
737                    "Fetch URL for %s remote '%s' set to %s",
738                    self.role,
739                    self.id,
740                    self.url,
741                )
742                conf_changed = True
743
744            # 2. refspecs
745            try:
746                refspecs = sorted(conf.get(remote_section, "fetch", as_list=True))
747            except salt.utils.configparser.NoOptionError:
748                # No 'fetch' option present in the remote section. Should never
749                # happen, but if it does for some reason, don't let it cause a
750                # traceback.
751                refspecs = []
752            desired_refspecs = sorted(self.refspecs)
753            log.debug(
754                "Current refspecs for %s remote '%s': %s (desired: %s)",
755                self.role,
756                self.id,
757                refspecs,
758                desired_refspecs,
759            )
760            if refspecs != desired_refspecs:
761                conf.set_multivar(remote_section, "fetch", self.refspecs)
762                log.debug(
763                    "Refspecs for %s remote '%s' set to %s",
764                    self.role,
765                    self.id,
766                    desired_refspecs,
767                )
768                conf_changed = True
769
770            # 3. http.sslVerify
771            try:
772                ssl_verify = conf.get("http", "sslVerify")
773            except salt.utils.configparser.NoSectionError:
774                conf.add_section("http")
775                ssl_verify = None
776            except salt.utils.configparser.NoOptionError:
777                ssl_verify = None
778            desired_ssl_verify = str(self.ssl_verify).lower()
779            log.debug(
780                "Current http.sslVerify for %s remote '%s': %s (desired: %s)",
781                self.role,
782                self.id,
783                ssl_verify,
784                desired_ssl_verify,
785            )
786            if ssl_verify != desired_ssl_verify:
787                conf.set("http", "sslVerify", desired_ssl_verify)
788                log.debug(
789                    "http.sslVerify for %s remote '%s' set to %s",
790                    self.role,
791                    self.id,
792                    desired_ssl_verify,
793                )
794                conf_changed = True
795
796            # Write changes, if necessary
797            if conf_changed:
798                with salt.utils.files.fopen(git_config, "w") as fp_:
799                    conf.write(fp_)
800                    log.debug(
801                        "Config updates for %s remote '%s' written to %s",
802                        self.role,
803                        self.id,
804                        git_config,
805                    )
806
807    def fetch(self):
808        """
809        Fetch the repo. If the local copy was updated, return True. If the
810        local copy was already up-to-date, return False.
811
812        This function requires that a _fetch() function be implemented in a
813        sub-class.
814        """
815        try:
816            with self.gen_lock(lock_type="update"):
817                log.debug("Fetching %s remote '%s'", self.role, self.id)
818                # Run provider-specific fetch code
819                return self._fetch()
820        except GitLockError as exc:
821            if exc.errno == errno.EEXIST:
822                log.warning(
823                    "Update lock file is present for %s remote '%s', "
824                    "skipping. If this warning persists, it is possible that "
825                    "the update process was interrupted, but the lock could "
826                    "also have been manually set. Removing %s or running "
827                    "'salt-run cache.clear_git_lock %s type=update' will "
828                    "allow updates to continue for this remote.",
829                    self.role,
830                    self.id,
831                    self._get_lock_file(lock_type="update"),
832                    self.role,
833                )
834            return False
835
836    def _lock(self, lock_type="update", failhard=False):
837        """
838        Place a lock file if (and only if) it does not already exist.
839        """
840        try:
841            fh_ = os.open(
842                self._get_lock_file(lock_type), os.O_CREAT | os.O_EXCL | os.O_WRONLY
843            )
844            with os.fdopen(fh_, "wb"):
845                # Write the lock file and close the filehandle
846                os.write(fh_, salt.utils.stringutils.to_bytes(str(os.getpid())))
847        except OSError as exc:
848            if exc.errno == errno.EEXIST:
849                with salt.utils.files.fopen(self._get_lock_file(lock_type), "r") as fd_:
850                    try:
851                        pid = int(
852                            salt.utils.stringutils.to_unicode(fd_.readline()).rstrip()
853                        )
854                    except ValueError:
855                        # Lock file is empty, set pid to 0 so it evaluates as
856                        # False.
857                        pid = 0
858                global_lock_key = self.role + "_global_lock"
859                lock_file = self._get_lock_file(lock_type=lock_type)
860                if self.opts[global_lock_key]:
861                    msg = (
862                        "{} is enabled and {} lockfile {} is present for "
863                        "{} remote '{}'.".format(
864                            global_lock_key,
865                            lock_type,
866                            lock_file,
867                            self.role,
868                            self.id,
869                        )
870                    )
871                    if pid:
872                        msg += " Process {} obtained the lock".format(pid)
873                        if not pid_exists(pid):
874                            msg += (
875                                " but this process is not running. The "
876                                "update may have been interrupted. If "
877                                "using multi-master with shared gitfs "
878                                "cache, the lock may have been obtained "
879                                "by another master."
880                            )
881                    log.warning(msg)
882                    if failhard:
883                        raise
884                    return
885                elif pid and pid_exists(pid):
886                    log.warning(
887                        "Process %d has a %s %s lock (%s)",
888                        pid,
889                        self.role,
890                        lock_type,
891                        lock_file,
892                    )
893                    if failhard:
894                        raise
895                    return
896                else:
897                    if pid:
898                        log.warning(
899                            "Process %d has a %s %s lock (%s), but this "
900                            "process is not running. Cleaning up lock file.",
901                            pid,
902                            self.role,
903                            lock_type,
904                            lock_file,
905                        )
906                    success, fail = self.clear_lock()
907                    if success:
908                        return self._lock(lock_type="update", failhard=failhard)
909                    elif failhard:
910                        raise
911                    return
912            else:
913                msg = "Unable to set {} lock for {} ({}): {} ".format(
914                    lock_type, self.id, self._get_lock_file(lock_type), exc
915                )
916                log.error(msg, exc_info=True)
917                raise GitLockError(exc.errno, msg)
918        msg = "Set {} lock for {} remote '{}'".format(lock_type, self.role, self.id)
919        log.debug(msg)
920        return msg
921
922    def lock(self):
923        """
924        Place an lock file and report on the success/failure. This is an
925        interface to be used by the fileserver runner, so it is hard-coded to
926        perform an update lock. We aren't using the gen_lock()
927        contextmanager here because the lock is meant to stay and not be
928        automatically removed.
929        """
930        success = []
931        failed = []
932        try:
933            result = self._lock(lock_type="update")
934        except GitLockError as exc:
935            failed.append(exc.strerror)
936        else:
937            if result is not None:
938                success.append(result)
939        return success, failed
940
941    @contextlib.contextmanager
942    def gen_lock(self, lock_type="update", timeout=0, poll_interval=0.5):
943        """
944        Set and automatically clear a lock
945        """
946        if not isinstance(lock_type, str):
947            raise GitLockError(errno.EINVAL, "Invalid lock_type '{}'".format(lock_type))
948
949        # Make sure that we have a positive integer timeout, otherwise just set
950        # it to zero.
951        try:
952            timeout = int(timeout)
953        except ValueError:
954            timeout = 0
955        else:
956            if timeout < 0:
957                timeout = 0
958
959        if not isinstance(poll_interval, ((int,), float)) or poll_interval < 0:
960            poll_interval = 0.5
961
962        if poll_interval > timeout:
963            poll_interval = timeout
964
965        lock_set = False
966        try:
967            time_start = time.time()
968            while True:
969                try:
970                    self._lock(lock_type=lock_type, failhard=True)
971                    lock_set = True
972                    yield
973                    # Break out of his loop once we've yielded the lock, to
974                    # avoid continued attempts to iterate and establish lock
975                    break
976                except (OSError, GitLockError) as exc:
977                    if not timeout or time.time() - time_start > timeout:
978                        raise GitLockError(exc.errno, exc.strerror)
979                    else:
980                        log.debug(
981                            "A %s lock is already present for %s remote "
982                            "'%s', sleeping %f second(s)",
983                            lock_type,
984                            self.role,
985                            self.id,
986                            poll_interval,
987                        )
988                        time.sleep(poll_interval)
989                        continue
990        finally:
991            if lock_set:
992                self.clear_lock(lock_type=lock_type)
993
994    def init_remote(self):
995        """
996        This function must be overridden in a sub-class
997        """
998        raise NotImplementedError()
999
1000    def checkout(self):
1001        """
1002        This function must be overridden in a sub-class
1003        """
1004        raise NotImplementedError()
1005
1006    def dir_list(self, tgt_env):
1007        """
1008        This function must be overridden in a sub-class
1009        """
1010        raise NotImplementedError()
1011
1012    def env_is_exposed(self, tgt_env):
1013        """
1014        Check if an environment is exposed by comparing it against a whitelist
1015        and blacklist.
1016        """
1017        return salt.utils.stringutils.check_whitelist_blacklist(
1018            tgt_env,
1019            whitelist=self.saltenv_whitelist,
1020            blacklist=self.saltenv_blacklist,
1021        )
1022
1023    def _fetch(self):
1024        """
1025        Provider-specific code for fetching, must be implemented in a
1026        sub-class.
1027        """
1028        raise NotImplementedError()
1029
1030    def envs(self):
1031        """
1032        This function must be overridden in a sub-class
1033        """
1034        raise NotImplementedError()
1035
1036    def file_list(self, tgt_env):
1037        """
1038        This function must be overridden in a sub-class
1039        """
1040        raise NotImplementedError()
1041
1042    def find_file(self, path, tgt_env):
1043        """
1044        This function must be overridden in a sub-class
1045        """
1046        raise NotImplementedError()
1047
1048    def get_checkout_target(self):
1049        """
1050        Resolve dynamically-set branch
1051        """
1052        if self.role == "git_pillar" and self.branch == "__env__":
1053            try:
1054                return self.all_saltenvs
1055            except AttributeError:
1056                # all_saltenvs not configured for this remote
1057                pass
1058            target = self.opts.get("pillarenv") or self.opts.get("saltenv") or "base"
1059            return self.base if target == "base" else str(target)
1060        return self.branch
1061
1062    def get_tree(self, tgt_env):
1063        """
1064        Return a tree object for the specified environment
1065        """
1066        if not self.env_is_exposed(tgt_env):
1067            return None
1068
1069        tgt_ref = self.ref(tgt_env)
1070        if tgt_ref is None:
1071            return None
1072
1073        for ref_type in self.ref_types:
1074            try:
1075                func_name = "get_tree_from_{}".format(ref_type)
1076                func = getattr(self, func_name)
1077            except AttributeError:
1078                log.error(
1079                    "%s class is missing function '%s'",
1080                    self.__class__.__name__,
1081                    func_name,
1082                )
1083            else:
1084                candidate = func(tgt_ref)
1085                if candidate is not None:
1086                    return candidate
1087
1088        if self.fallback:
1089            for ref_type in self.ref_types:
1090                try:
1091                    func_name = "get_tree_from_{}".format(ref_type)
1092                    func = getattr(self, func_name)
1093                except AttributeError:
1094                    log.error(
1095                        "%s class is missing function '%s'",
1096                        self.__class__.__name__,
1097                        func_name,
1098                    )
1099                else:
1100                    candidate = func(self.fallback)
1101                    if candidate is not None:
1102                        return candidate
1103
1104        # No matches found
1105        return None
1106
1107    def get_url(self):
1108        """
1109        Examine self.id and assign self.url (and self.branch, for git_pillar)
1110        """
1111        if self.role in ("git_pillar", "winrepo"):
1112            # With winrepo and git_pillar, the remote is specified in the
1113            # format '<branch> <url>', so that we can get a unique identifier
1114            # to hash for each remote.
1115            try:
1116                self.branch, self.url = self.id.split(None, 1)
1117            except ValueError:
1118                self.branch = self.conf["branch"]
1119                self.url = self.id
1120        else:
1121            self.url = self.id
1122
1123    @property
1124    def linkdir_walk(self):
1125        """
1126        Return the expected result of an os.walk on the linkdir, based on the
1127        mountpoint value.
1128        """
1129        try:
1130            # Use cached linkdir_walk if we've already run this
1131            return self._linkdir_walk
1132        except AttributeError:
1133            self._linkdir_walk = []
1134            try:
1135                parts = self._mountpoint.split("/")
1136            except AttributeError:
1137                log.error(
1138                    "%s class is missing a '_mountpoint' attribute",
1139                    self.__class__.__name__,
1140                )
1141            else:
1142                for idx, item in enumerate(parts[:-1]):
1143                    try:
1144                        dirs = [parts[idx + 1]]
1145                    except IndexError:
1146                        dirs = []
1147                    self._linkdir_walk.append(
1148                        (
1149                            salt.utils.path.join(self.linkdir, *parts[: idx + 1]),
1150                            dirs,
1151                            [],
1152                        )
1153                    )
1154                try:
1155                    # The linkdir itself goes at the beginning
1156                    self._linkdir_walk.insert(0, (self.linkdir, [parts[0]], []))
1157                except IndexError:
1158                    pass
1159            return self._linkdir_walk
1160
1161    def setup_callbacks(self):
1162        """
1163        Only needed in pygit2, included in the base class for simplicty of use
1164        """
1165
1166    def verify_auth(self):
1167        """
1168        Override this function in a sub-class to implement auth checking.
1169        """
1170        self.credentials = None
1171        return True
1172
1173    def write_file(self, blob, dest):
1174        """
1175        This function must be overridden in a sub-class
1176        """
1177        raise NotImplementedError()
1178
1179
1180class GitPython(GitProvider):
1181    """
1182    Interface to GitPython
1183    """
1184
1185    def __init__(
1186        self,
1187        opts,
1188        remote,
1189        per_remote_defaults,
1190        per_remote_only,
1191        override_params,
1192        cache_root,
1193        role="gitfs",
1194    ):
1195        self.provider = "gitpython"
1196        super().__init__(
1197            opts,
1198            remote,
1199            per_remote_defaults,
1200            per_remote_only,
1201            override_params,
1202            cache_root,
1203            role,
1204        )
1205
1206    def checkout(self):
1207        """
1208        Checkout the configured branch/tag. We catch an "Exception" class here
1209        instead of a specific exception class because the exceptions raised by
1210        GitPython when running these functions vary in different versions of
1211        GitPython.
1212        """
1213        tgt_ref = self.get_checkout_target()
1214        try:
1215            head_sha = self.repo.rev_parse("HEAD").hexsha
1216        except Exception:  # pylint: disable=broad-except
1217            # Should only happen the first time we are checking out, since
1218            # we fetch first before ever checking anything out.
1219            head_sha = None
1220
1221        # 'origin/' + tgt_ref ==> matches a branch head
1222        # 'tags/' + tgt_ref + '@{commit}' ==> matches tag's commit
1223        checkout_refs = [
1224            ("origin/" + tgt_ref, False),
1225            ("tags/" + tgt_ref, False),
1226        ]
1227        if self.fallback:
1228            checkout_refs += [
1229                ("origin/" + self.fallback, True),
1230                ("tags/" + self.fallback, True),
1231            ]
1232        for checkout_ref, fallback in checkout_refs:
1233            try:
1234                target_sha = self.repo.rev_parse(checkout_ref).hexsha
1235            except Exception:  # pylint: disable=broad-except
1236                # ref does not exist
1237                continue
1238            else:
1239                if head_sha == target_sha:
1240                    # No need to checkout, we're already up-to-date
1241                    return self.check_root()
1242
1243            try:
1244                with self.gen_lock(lock_type="checkout"):
1245                    self.repo.git.checkout(checkout_ref)
1246                    log.debug(
1247                        "%s remote '%s' has been checked out to %s%s",
1248                        self.role,
1249                        self.id,
1250                        checkout_ref,
1251                        " as fallback" if fallback else "",
1252                    )
1253            except GitLockError as exc:
1254                if exc.errno == errno.EEXIST:
1255                    # Re-raise with a different strerror containing a
1256                    # more meaningful error message for the calling
1257                    # function.
1258                    raise GitLockError(
1259                        exc.errno,
1260                        "Checkout lock exists for {} remote '{}'".format(
1261                            self.role, self.id
1262                        ),
1263                    )
1264                else:
1265                    log.error(
1266                        "Error %d encountered obtaining checkout lock "
1267                        "for %s remote '%s'",
1268                        exc.errno,
1269                        self.role,
1270                        self.id,
1271                    )
1272                    return None
1273            except Exception:  # pylint: disable=broad-except
1274                continue
1275            return self.check_root()
1276        log.error(
1277            "Failed to checkout %s from %s remote '%s': remote ref does not exist",
1278            tgt_ref,
1279            self.role,
1280            self.id,
1281        )
1282        return None
1283
1284    def init_remote(self):
1285        """
1286        Initialize/attach to a remote using GitPython. Return a boolean
1287        which will let the calling function know whether or not a new repo was
1288        initialized by this function.
1289        """
1290        new = False
1291        if not os.listdir(self.cachedir):
1292            # Repo cachedir is empty, initialize a new repo there
1293            self.repo = git.Repo.init(self.cachedir)
1294            new = True
1295        else:
1296            # Repo cachedir exists, try to attach
1297            try:
1298                self.repo = git.Repo(self.cachedir)
1299            except git.exc.InvalidGitRepositoryError:
1300                log.error(_INVALID_REPO, self.cachedir, self.url, self.role)
1301                return new
1302
1303        self.gitdir = salt.utils.path.join(self.repo.working_dir, ".git")
1304        self.enforce_git_config()
1305
1306        return new
1307
1308    def dir_list(self, tgt_env):
1309        """
1310        Get list of directories for the target environment using GitPython
1311        """
1312        ret = set()
1313        tree = self.get_tree(tgt_env)
1314        if not tree:
1315            return ret
1316        if self.root(tgt_env):
1317            try:
1318                tree = tree / self.root(tgt_env)
1319            except KeyError:
1320                return ret
1321            relpath = lambda path: os.path.relpath(path, self.root(tgt_env))
1322        else:
1323            relpath = lambda path: path
1324        add_mountpoint = lambda path: salt.utils.path.join(
1325            self.mountpoint(tgt_env), path, use_posixpath=True
1326        )
1327        for blob in tree.traverse():
1328            if isinstance(blob, git.Tree):
1329                ret.add(add_mountpoint(relpath(blob.path)))
1330        if self.mountpoint(tgt_env):
1331            ret.add(self.mountpoint(tgt_env))
1332        return ret
1333
1334    def envs(self):
1335        """
1336        Check the refs and return a list of the ones which can be used as salt
1337        environments.
1338        """
1339        ref_paths = [x.path for x in self.repo.refs]
1340        return self._get_envs_from_ref_paths(ref_paths)
1341
1342    def _fetch(self):
1343        """
1344        Fetch the repo. If the local copy was updated, return True. If the
1345        local copy was already up-to-date, return False.
1346        """
1347        origin = self.repo.remotes[0]
1348        try:
1349            fetch_results = origin.fetch()
1350        except AssertionError:
1351            fetch_results = origin.fetch()
1352
1353        new_objs = False
1354        for fetchinfo in fetch_results:
1355            if fetchinfo.old_commit is not None:
1356                log.debug(
1357                    "%s has updated '%s' for remote '%s' from %s to %s",
1358                    self.role,
1359                    fetchinfo.name,
1360                    self.id,
1361                    fetchinfo.old_commit.hexsha[:7],
1362                    fetchinfo.commit.hexsha[:7],
1363                )
1364                new_objs = True
1365            elif fetchinfo.flags in (fetchinfo.NEW_TAG, fetchinfo.NEW_HEAD):
1366                log.debug(
1367                    "%s has fetched new %s '%s' for remote '%s'",
1368                    self.role,
1369                    "tag" if fetchinfo.flags == fetchinfo.NEW_TAG else "head",
1370                    fetchinfo.name,
1371                    self.id,
1372                )
1373                new_objs = True
1374
1375        cleaned = self.clean_stale_refs()
1376        return True if (new_objs or cleaned) else None
1377
1378    def file_list(self, tgt_env):
1379        """
1380        Get file list for the target environment using GitPython
1381        """
1382        files = set()
1383        symlinks = {}
1384        tree = self.get_tree(tgt_env)
1385        if not tree:
1386            # Not found, return empty objects
1387            return files, symlinks
1388        if self.root(tgt_env):
1389            try:
1390                tree = tree / self.root(tgt_env)
1391            except KeyError:
1392                return files, symlinks
1393            relpath = lambda path: os.path.relpath(path, self.root(tgt_env))
1394        else:
1395            relpath = lambda path: path
1396        add_mountpoint = lambda path: salt.utils.path.join(
1397            self.mountpoint(tgt_env), path, use_posixpath=True
1398        )
1399        for file_blob in tree.traverse():
1400            if not isinstance(file_blob, git.Blob):
1401                continue
1402            file_path = add_mountpoint(relpath(file_blob.path))
1403            files.add(file_path)
1404            if stat.S_ISLNK(file_blob.mode):
1405                stream = io.BytesIO()
1406                file_blob.stream_data(stream)
1407                stream.seek(0)
1408                link_tgt = salt.utils.stringutils.to_str(stream.read())
1409                stream.close()
1410                symlinks[file_path] = link_tgt
1411        return files, symlinks
1412
1413    def find_file(self, path, tgt_env):
1414        """
1415        Find the specified file in the specified environment
1416        """
1417        tree = self.get_tree(tgt_env)
1418        if not tree:
1419            # Branch/tag/SHA not found in repo
1420            return None, None, None
1421        blob = None
1422        depth = 0
1423        while True:
1424            depth += 1
1425            if depth > SYMLINK_RECURSE_DEPTH:
1426                blob = None
1427                break
1428            try:
1429                file_blob = tree / path
1430                if stat.S_ISLNK(file_blob.mode):
1431                    # Path is a symlink. The blob data corresponding to
1432                    # this path's object ID will be the target of the
1433                    # symlink. Follow the symlink and set path to the
1434                    # location indicated in the blob data.
1435                    stream = io.BytesIO()
1436                    file_blob.stream_data(stream)
1437                    stream.seek(0)
1438                    link_tgt = salt.utils.stringutils.to_str(stream.read())
1439                    stream.close()
1440                    path = salt.utils.path.join(
1441                        os.path.dirname(path), link_tgt, use_posixpath=True
1442                    )
1443                else:
1444                    blob = file_blob
1445                    if isinstance(blob, git.Tree):
1446                        # Path is a directory, not a file.
1447                        blob = None
1448                    break
1449            except KeyError:
1450                # File not found or repo_path points to a directory
1451                blob = None
1452                break
1453        if isinstance(blob, git.Blob):
1454            return blob, blob.hexsha, blob.mode
1455        return None, None, None
1456
1457    def get_tree_from_branch(self, ref):
1458        """
1459        Return a git.Tree object matching a head ref fetched into
1460        refs/remotes/origin/
1461        """
1462        try:
1463            return git.RemoteReference(
1464                self.repo, "refs/remotes/origin/{}".format(ref)
1465            ).commit.tree
1466        except ValueError:
1467            return None
1468
1469    def get_tree_from_tag(self, ref):
1470        """
1471        Return a git.Tree object matching a tag ref fetched into refs/tags/
1472        """
1473        try:
1474            return git.TagReference(self.repo, "refs/tags/{}".format(ref)).commit.tree
1475        except ValueError:
1476            return None
1477
1478    def get_tree_from_sha(self, ref):
1479        """
1480        Return a git.Tree object matching a SHA
1481        """
1482        try:
1483            return self.repo.rev_parse(ref).tree
1484        except (gitdb.exc.ODBError, AttributeError):
1485            return None
1486
1487    def write_file(self, blob, dest):
1488        """
1489        Using the blob object, write the file to the destination path
1490        """
1491        with salt.utils.files.fopen(dest, "wb+") as fp_:
1492            blob.stream_data(fp_)
1493
1494
1495class Pygit2(GitProvider):
1496    """
1497    Interface to Pygit2
1498    """
1499
1500    def __init__(
1501        self,
1502        opts,
1503        remote,
1504        per_remote_defaults,
1505        per_remote_only,
1506        override_params,
1507        cache_root,
1508        role="gitfs",
1509    ):
1510        self.provider = "pygit2"
1511        super().__init__(
1512            opts,
1513            remote,
1514            per_remote_defaults,
1515            per_remote_only,
1516            override_params,
1517            cache_root,
1518            role,
1519        )
1520
1521    def peel(self, obj):
1522        """
1523        Compatibility function for pygit2.Reference objects. Older versions of
1524        pygit2 use .get_object() to return the object to which the reference
1525        points, while newer versions use .peel(). In pygit2 0.27.4,
1526        .get_object() was removed. This function will try .peel() first and
1527        fall back to .get_object().
1528        """
1529        try:
1530            return obj.peel()
1531        except AttributeError:
1532            return obj.get_object()
1533
1534    def checkout(self):
1535        """
1536        Checkout the configured branch/tag
1537        """
1538        tgt_ref = self.get_checkout_target()
1539        local_ref = "refs/heads/" + tgt_ref
1540        remote_ref = "refs/remotes/origin/" + tgt_ref
1541        tag_ref = "refs/tags/" + tgt_ref
1542
1543        try:
1544            local_head = self.repo.lookup_reference("HEAD")
1545        except KeyError:
1546            log.warning("HEAD not present in %s remote '%s'", self.role, self.id)
1547            return None
1548
1549        try:
1550            head_sha = self.peel(local_head).hex
1551        except AttributeError:
1552            # Shouldn't happen, but just in case a future pygit2 API change
1553            # breaks things, avoid a traceback and log an error.
1554            log.error(
1555                "Unable to get SHA of HEAD for %s remote '%s'", self.role, self.id
1556            )
1557            return None
1558        except KeyError:
1559            head_sha = None
1560
1561        refs = self.repo.listall_references()
1562
1563        def _perform_checkout(checkout_ref, branch=True):
1564            """
1565            DRY function for checking out either a branch or a tag
1566            """
1567            try:
1568                with self.gen_lock(lock_type="checkout"):
1569                    # Checkout the local branch corresponding to the
1570                    # remote ref.
1571                    self.repo.checkout(checkout_ref)
1572                    if branch:
1573                        self.repo.reset(oid, pygit2.GIT_RESET_HARD)
1574                return True
1575            except GitLockError as exc:
1576                if exc.errno == errno.EEXIST:
1577                    # Re-raise with a different strerror containing a
1578                    # more meaningful error message for the calling
1579                    # function.
1580                    raise GitLockError(
1581                        exc.errno,
1582                        "Checkout lock exists for {} remote '{}'".format(
1583                            self.role, self.id
1584                        ),
1585                    )
1586                else:
1587                    log.error(
1588                        "Error %d encountered obtaining checkout lock "
1589                        "for %s remote '%s'",
1590                        exc.errno,
1591                        self.role,
1592                        self.id,
1593                    )
1594            return False
1595
1596        try:
1597            if remote_ref not in refs and tag_ref not in refs and self.fallback:
1598                tgt_ref = self.fallback
1599                local_ref = "refs/heads/" + tgt_ref
1600                remote_ref = "refs/remotes/origin/" + tgt_ref
1601                tag_ref = "refs/tags/" + tgt_ref
1602            if remote_ref in refs:
1603                # Get commit id for the remote ref
1604                oid = self.peel(self.repo.lookup_reference(remote_ref)).id
1605                if local_ref not in refs:
1606                    # No local branch for this remote, so create one and point
1607                    # it at the commit id of the remote ref
1608                    self.repo.create_reference(local_ref, oid)
1609
1610                try:
1611                    target_sha = self.peel(self.repo.lookup_reference(remote_ref)).hex
1612                except KeyError:
1613                    log.error(
1614                        "pygit2 was unable to get SHA for %s in %s remote '%s'",
1615                        local_ref,
1616                        self.role,
1617                        self.id,
1618                        exc_info=True,
1619                    )
1620                    return None
1621
1622                # Only perform a checkout if HEAD and target are not pointing
1623                # at the same SHA1.
1624                if head_sha != target_sha:
1625                    # Check existence of the ref in refs/heads/ which
1626                    # corresponds to the local HEAD. Checking out local_ref
1627                    # below when no local ref for HEAD is missing will raise an
1628                    # exception in pygit2 >= 0.21. If this ref is not present,
1629                    # create it. The "head_ref != local_ref" check ensures we
1630                    # don't try to add this ref if it is not necessary, as it
1631                    # would have been added above already. head_ref would be
1632                    # the same as local_ref if the branch name was changed but
1633                    # the cachedir was not (for example if a "name" parameter
1634                    # was used in a git_pillar remote, or if we are using
1635                    # winrepo which takes the basename of the repo as the
1636                    # cachedir).
1637                    head_ref = local_head.target
1638                    # If head_ref is not a string, it will point to a
1639                    # pygit2.Oid object and we are in detached HEAD mode.
1640                    # Therefore, there is no need to add a local reference. If
1641                    # head_ref == local_ref, then the local reference for HEAD
1642                    # in refs/heads/ already exists and again, no need to add.
1643                    if (
1644                        isinstance(head_ref, str)
1645                        and head_ref not in refs
1646                        and head_ref != local_ref
1647                    ):
1648                        branch_name = head_ref.partition("refs/heads/")[-1]
1649                        if not branch_name:
1650                            # Shouldn't happen, but log an error if it does
1651                            log.error(
1652                                "pygit2 was unable to resolve branch name from "
1653                                "HEAD ref '%s' in %s remote '%s'",
1654                                head_ref,
1655                                self.role,
1656                                self.id,
1657                            )
1658                            return None
1659                        remote_head = "refs/remotes/origin/" + branch_name
1660                        if remote_head not in refs:
1661                            # No remote ref for HEAD exists. This can happen in
1662                            # the first-time git_pillar checkout when when the
1663                            # remote repo does not have a master branch. Since
1664                            # we need a HEAD reference to keep pygit2 from
1665                            # throwing an error, and none exists in
1666                            # refs/remotes/origin, we'll just point HEAD at the
1667                            # remote_ref.
1668                            remote_head = remote_ref
1669                        self.repo.create_reference(
1670                            head_ref, self.repo.lookup_reference(remote_head).target
1671                        )
1672
1673                    if not _perform_checkout(local_ref, branch=True):
1674                        return None
1675
1676                # Return the relative root, if present
1677                return self.check_root()
1678
1679            elif tag_ref in refs:
1680                tag_obj = self.repo.revparse_single(tag_ref)
1681                if not isinstance(tag_obj, (pygit2.Commit, pygit2.Tag)):
1682                    log.error(
1683                        "%s does not correspond to pygit2 Commit or Tag object. It is"
1684                        " of type %s",
1685                        tag_ref,
1686                        type(tag_obj),
1687                    )
1688                else:
1689                    try:
1690                        # If no AttributeError raised, this is an annotated tag
1691                        tag_sha = tag_obj.target.hex
1692                    except AttributeError:
1693                        try:
1694                            tag_sha = tag_obj.hex
1695                        except AttributeError:
1696                            # Shouldn't happen, but could if a future pygit2
1697                            # API change breaks things.
1698                            log.error(
1699                                "Unable to resolve %s from %s remote '%s' "
1700                                "to either an annotated or non-annotated tag",
1701                                tag_ref,
1702                                self.role,
1703                                self.id,
1704                                exc_info=True,
1705                            )
1706                            return None
1707                    log.debug("SHA of tag %s: %s", tgt_ref, tag_sha)
1708
1709                    if head_sha != tag_sha:
1710                        if not _perform_checkout(tag_ref, branch=False):
1711                            return None
1712
1713                    # Return the relative root, if present
1714                    return self.check_root()
1715        except GitLockError:
1716            raise
1717        except Exception as exc:  # pylint: disable=broad-except
1718            log.error(
1719                "Failed to checkout %s from %s remote '%s': %s",
1720                tgt_ref,
1721                self.role,
1722                self.id,
1723                exc,
1724                exc_info=True,
1725            )
1726            return None
1727        log.error(
1728            "Failed to checkout %s from %s remote '%s': remote ref does not exist",
1729            tgt_ref,
1730            self.role,
1731            self.id,
1732        )
1733        return None
1734
1735    def clean_stale_refs(self, local_refs=None):  # pylint: disable=arguments-differ
1736        """
1737        Clean stale local refs so they don't appear as fileserver environments
1738        """
1739        try:
1740            if pygit2.GIT_FETCH_PRUNE:
1741                # Don't need to clean anything, pygit2 can do it by itself
1742                return []
1743        except AttributeError:
1744            # However, only in 0.26.2 and newer
1745            pass
1746        if self.credentials is not None:
1747            log.debug(
1748                "The installed version of pygit2 (%s) does not support "
1749                "detecting stale refs for authenticated remotes, saltenvs "
1750                "will not reflect branches/tags removed from remote '%s'",
1751                PYGIT2_VERSION,
1752                self.id,
1753            )
1754            return []
1755        return super().clean_stale_refs()
1756
1757    def init_remote(self):
1758        """
1759        Initialize/attach to a remote using pygit2. Return a boolean which
1760        will let the calling function know whether or not a new repo was
1761        initialized by this function.
1762        """
1763        # https://github.com/libgit2/pygit2/issues/339
1764        # https://github.com/libgit2/libgit2/issues/2122
1765        home = os.path.expanduser("~")
1766        pygit2.settings.search_path[pygit2.GIT_CONFIG_LEVEL_GLOBAL] = home
1767        new = False
1768        if not os.listdir(self.cachedir):
1769            # Repo cachedir is empty, initialize a new repo there
1770            self.repo = pygit2.init_repository(self.cachedir)
1771            new = True
1772        else:
1773            # Repo cachedir exists, try to attach
1774            try:
1775                self.repo = pygit2.Repository(self.cachedir)
1776            except KeyError:
1777                log.error(_INVALID_REPO, self.cachedir, self.url, self.role)
1778                return new
1779
1780        self.gitdir = salt.utils.path.join(self.repo.workdir, ".git")
1781        self.enforce_git_config()
1782        git_config = os.path.join(self.gitdir, "config")
1783        if os.path.exists(git_config) and PYGIT2_VERSION >= _LooseVersion("0.28.0"):
1784            self.repo.config.add_file(git_config)
1785
1786        return new
1787
1788    def dir_list(self, tgt_env):
1789        """
1790        Get a list of directories for the target environment using pygit2
1791        """
1792
1793        def _traverse(tree, blobs, prefix):
1794            """
1795            Traverse through a pygit2 Tree object recursively, accumulating all
1796            the empty directories within it in the "blobs" list
1797            """
1798            for entry in iter(tree):
1799                if entry.oid not in self.repo:
1800                    # Entry is a submodule, skip it
1801                    continue
1802                blob = self.repo[entry.oid]
1803                if not isinstance(blob, pygit2.Tree):
1804                    continue
1805                blobs.append(
1806                    salt.utils.path.join(prefix, entry.name, use_posixpath=True)
1807                )
1808                if blob:
1809                    _traverse(
1810                        blob,
1811                        blobs,
1812                        salt.utils.path.join(prefix, entry.name, use_posixpath=True),
1813                    )
1814
1815        ret = set()
1816        tree = self.get_tree(tgt_env)
1817        if not tree:
1818            return ret
1819        if self.root(tgt_env):
1820            try:
1821                oid = tree[self.root(tgt_env)].oid
1822                tree = self.repo[oid]
1823            except KeyError:
1824                return ret
1825            if not isinstance(tree, pygit2.Tree):
1826                return ret
1827            relpath = lambda path: os.path.relpath(path, self.root(tgt_env))
1828        else:
1829            relpath = lambda path: path
1830        blobs = []
1831        if tree:
1832            _traverse(tree, blobs, self.root(tgt_env))
1833        add_mountpoint = lambda path: salt.utils.path.join(
1834            self.mountpoint(tgt_env), path, use_posixpath=True
1835        )
1836        for blob in blobs:
1837            ret.add(add_mountpoint(relpath(blob)))
1838        if self.mountpoint(tgt_env):
1839            ret.add(self.mountpoint(tgt_env))
1840        return ret
1841
1842    def envs(self):
1843        """
1844        Check the refs and return a list of the ones which can be used as salt
1845        environments.
1846        """
1847        ref_paths = self.repo.listall_references()
1848        return self._get_envs_from_ref_paths(ref_paths)
1849
1850    def _fetch(self):
1851        """
1852        Fetch the repo. If the local copy was updated, return True. If the
1853        local copy was already up-to-date, return False.
1854        """
1855        origin = self.repo.remotes[0]
1856        refs_pre = self.repo.listall_references()
1857        fetch_kwargs = {}
1858        # pygit2 radically changed fetchiing in 0.23.2
1859        if self.remotecallbacks is not None:
1860            fetch_kwargs["callbacks"] = self.remotecallbacks
1861        else:
1862            if self.credentials is not None:
1863                origin.credentials = self.credentials
1864        try:
1865            fetch_kwargs["prune"] = pygit2.GIT_FETCH_PRUNE
1866        except AttributeError:
1867            # pruning only available in pygit2 >= 0.26.2
1868            pass
1869        try:
1870            fetch_results = origin.fetch(**fetch_kwargs)
1871        except GitError as exc:  # pylint: disable=broad-except
1872            exc_str = get_error_message(exc).lower()
1873            if "unsupported url protocol" in exc_str and isinstance(
1874                self.credentials, pygit2.Keypair
1875            ):
1876                log.error(
1877                    "Unable to fetch SSH-based %s remote '%s'. "
1878                    "You may need to add ssh:// to the repo string or "
1879                    "libgit2 must be compiled with libssh2 to support "
1880                    "SSH authentication.",
1881                    self.role,
1882                    self.id,
1883                    exc_info=True,
1884                )
1885            elif "authentication required but no callback set" in exc_str:
1886                log.error(
1887                    "%s remote '%s' requires authentication, but no "
1888                    "authentication configured",
1889                    self.role,
1890                    self.id,
1891                    exc_info=True,
1892                )
1893            else:
1894                log.error(
1895                    "Error occurred fetching %s remote '%s': %s",
1896                    self.role,
1897                    self.id,
1898                    exc,
1899                    exc_info=True,
1900                )
1901            return False
1902        try:
1903            # pygit2.Remote.fetch() returns a dict in pygit2 < 0.21.0
1904            received_objects = fetch_results["received_objects"]
1905        except (AttributeError, TypeError):
1906            # pygit2.Remote.fetch() returns a class instance in
1907            # pygit2 >= 0.21.0
1908            received_objects = fetch_results.received_objects
1909        if received_objects != 0:
1910            log.debug(
1911                "%s received %s objects for remote '%s'",
1912                self.role,
1913                received_objects,
1914                self.id,
1915            )
1916        else:
1917            log.debug("%s remote '%s' is up-to-date", self.role, self.id)
1918        refs_post = self.repo.listall_references()
1919        cleaned = self.clean_stale_refs(local_refs=refs_post)
1920        return True if (received_objects or refs_pre != refs_post or cleaned) else None
1921
1922    def file_list(self, tgt_env):
1923        """
1924        Get file list for the target environment using pygit2
1925        """
1926
1927        def _traverse(tree, blobs, prefix):
1928            """
1929            Traverse through a pygit2 Tree object recursively, accumulating all
1930            the file paths and symlink info in the "blobs" dict
1931            """
1932            for entry in iter(tree):
1933                if entry.oid not in self.repo:
1934                    # Entry is a submodule, skip it
1935                    continue
1936                obj = self.repo[entry.oid]
1937                if isinstance(obj, pygit2.Blob):
1938                    repo_path = salt.utils.path.join(
1939                        prefix, entry.name, use_posixpath=True
1940                    )
1941                    blobs.setdefault("files", []).append(repo_path)
1942                    if stat.S_ISLNK(tree[entry.name].filemode):
1943                        link_tgt = self.repo[tree[entry.name].oid].data
1944                        blobs.setdefault("symlinks", {})[repo_path] = link_tgt
1945                elif isinstance(obj, pygit2.Tree):
1946                    _traverse(
1947                        obj,
1948                        blobs,
1949                        salt.utils.path.join(prefix, entry.name, use_posixpath=True),
1950                    )
1951
1952        files = set()
1953        symlinks = {}
1954        tree = self.get_tree(tgt_env)
1955        if not tree:
1956            # Not found, return empty objects
1957            return files, symlinks
1958        if self.root(tgt_env):
1959            try:
1960                # This might need to be changed to account for a root that
1961                # spans more than one directory
1962                oid = tree[self.root(tgt_env)].oid
1963                tree = self.repo[oid]
1964            except KeyError:
1965                return files, symlinks
1966            if not isinstance(tree, pygit2.Tree):
1967                return files, symlinks
1968            relpath = lambda path: os.path.relpath(path, self.root(tgt_env))
1969        else:
1970            relpath = lambda path: path
1971        blobs = {}
1972        if tree:
1973            _traverse(tree, blobs, self.root(tgt_env))
1974        add_mountpoint = lambda path: salt.utils.path.join(
1975            self.mountpoint(tgt_env), path, use_posixpath=True
1976        )
1977        for repo_path in blobs.get("files", []):
1978            files.add(add_mountpoint(relpath(repo_path)))
1979        for repo_path, link_tgt in blobs.get("symlinks", {}).items():
1980            symlinks[add_mountpoint(relpath(repo_path))] = link_tgt
1981        return files, symlinks
1982
1983    def find_file(self, path, tgt_env):
1984        """
1985        Find the specified file in the specified environment
1986        """
1987        tree = self.get_tree(tgt_env)
1988        if not tree:
1989            # Branch/tag/SHA not found in repo
1990            return None, None, None
1991        blob = None
1992        mode = None
1993        depth = 0
1994        while True:
1995            depth += 1
1996            if depth > SYMLINK_RECURSE_DEPTH:
1997                blob = None
1998                break
1999            try:
2000                entry = tree[path]
2001                mode = entry.filemode
2002                if stat.S_ISLNK(mode):
2003                    # Path is a symlink. The blob data corresponding to this
2004                    # path's object ID will be the target of the symlink. Follow
2005                    # the symlink and set path to the location indicated
2006                    # in the blob data.
2007                    link_tgt = self.repo[entry.oid].data
2008                    path = salt.utils.path.join(
2009                        os.path.dirname(path), link_tgt, use_posixpath=True
2010                    )
2011                else:
2012                    blob = self.repo[entry.oid]
2013                    if isinstance(blob, pygit2.Tree):
2014                        # Path is a directory, not a file.
2015                        blob = None
2016                    break
2017            except KeyError:
2018                blob = None
2019                break
2020        if isinstance(blob, pygit2.Blob):
2021            return blob, blob.hex, mode
2022        return None, None, None
2023
2024    def get_tree_from_branch(self, ref):
2025        """
2026        Return a pygit2.Tree object matching a head ref fetched into
2027        refs/remotes/origin/
2028        """
2029        try:
2030            return self.peel(
2031                self.repo.lookup_reference("refs/remotes/origin/{}".format(ref))
2032            ).tree
2033        except KeyError:
2034            return None
2035
2036    def get_tree_from_tag(self, ref):
2037        """
2038        Return a pygit2.Tree object matching a tag ref fetched into refs/tags/
2039        """
2040        try:
2041            return self.peel(
2042                self.repo.lookup_reference("refs/tags/{}".format(ref))
2043            ).tree
2044        except KeyError:
2045            return None
2046
2047    def get_tree_from_sha(self, ref):
2048        """
2049        Return a pygit2.Tree object matching a SHA
2050        """
2051        try:
2052            return self.repo.revparse_single(ref).tree
2053        except (KeyError, TypeError, ValueError, AttributeError):
2054            return None
2055
2056    def setup_callbacks(self):
2057        """
2058        Assign attributes for pygit2 callbacks
2059        """
2060        if PYGIT2_VERSION >= _LooseVersion("0.23.2"):
2061            self.remotecallbacks = pygit2.RemoteCallbacks(credentials=self.credentials)
2062            if not self.ssl_verify:
2063                # Override the certificate_check function with a lambda that
2064                # just returns True, thus skipping the cert check.
2065                self.remotecallbacks.certificate_check = lambda *args, **kwargs: True
2066        else:
2067            self.remotecallbacks = None
2068            if not self.ssl_verify:
2069                warnings.warn(
2070                    "pygit2 does not support disabling the SSL certificate "
2071                    "check in versions prior to 0.23.2 (installed: {}). "
2072                    "Fetches for self-signed certificates will fail.".format(
2073                        PYGIT2_VERSION
2074                    )
2075                )
2076
2077    def verify_auth(self):
2078        """
2079        Check the username and password/keypair info for validity. If valid,
2080        set a 'credentials' attribute consisting of the appropriate Pygit2
2081        credentials object. Return False if a required auth param is not
2082        present. Return True if the required auth parameters are present (or
2083        auth is not configured), otherwise failhard if there is a problem with
2084        authenticaion.
2085        """
2086        self.credentials = None
2087
2088        if os.path.isabs(self.url):
2089            # If the URL is an absolute file path, there is no authentication.
2090            return True
2091        elif not any(getattr(self, x, None) for x in AUTH_PARAMS):
2092            # Auth information not configured for this remote
2093            return True
2094
2095        def _incomplete_auth(missing):
2096            """
2097            Helper function to log errors about missing auth parameters
2098            """
2099            log.critical(
2100                "Incomplete authentication information for %s remote "
2101                "'%s'. Missing parameters: %s",
2102                self.role,
2103                self.id,
2104                ", ".join(missing),
2105            )
2106            failhard(self.role)
2107
2108        def _key_does_not_exist(key_type, path):
2109            """
2110            Helper function to log errors about missing key file
2111            """
2112            log.critical(
2113                "SSH %s (%s) for %s remote '%s' could not be found, path "
2114                "may be incorrect. Note that it may be necessary to clear "
2115                "git_pillar locks to proceed once this is resolved and the "
2116                "master has been started back up. A warning will be logged "
2117                "if this is the case, with instructions.",
2118                key_type,
2119                path,
2120                self.role,
2121                self.id,
2122            )
2123            failhard(self.role)
2124
2125        transport, _, address = self.url.partition("://")
2126        if not address:
2127            # Assume scp-like SSH syntax (user@domain.tld:relative/path.git)
2128            transport = "ssh"
2129            address = self.url
2130
2131        transport = transport.lower()
2132
2133        if transport in ("git", "file"):
2134            # These transports do not use auth
2135            return True
2136
2137        elif "ssh" in transport:
2138            required_params = ("pubkey", "privkey")
2139            user = address.split("@")[0]
2140            if user == address:
2141                # No '@' sign == no user. This is a problem.
2142                log.critical(
2143                    "Keypair specified for %s remote '%s', but remote URL "
2144                    "is missing a username",
2145                    self.role,
2146                    self.id,
2147                )
2148                failhard(self.role)
2149
2150            self.user = user
2151            if all(bool(getattr(self, x, None)) for x in required_params):
2152                keypair_params = [
2153                    getattr(self, x, None)
2154                    for x in ("user", "pubkey", "privkey", "passphrase")
2155                ]
2156                # Check pubkey and privkey to make sure file exists
2157                for idx, key_type in ((1, "pubkey"), (2, "privkey")):
2158                    key_path = keypair_params[idx]
2159                    if key_path is not None:
2160                        try:
2161                            if not os.path.isfile(key_path):
2162                                _key_does_not_exist(key_type, key_path)
2163                        except TypeError:
2164                            _key_does_not_exist(key_type, key_path)
2165                self.credentials = pygit2.Keypair(*keypair_params)
2166                return True
2167            else:
2168                missing_auth = [
2169                    x for x in required_params if not bool(getattr(self, x, None))
2170                ]
2171                _incomplete_auth(missing_auth)
2172
2173        elif "http" in transport:
2174            required_params = ("user", "password")
2175            password_ok = all(bool(getattr(self, x, None)) for x in required_params)
2176            no_password_auth = not any(
2177                bool(getattr(self, x, None)) for x in required_params
2178            )
2179            if no_password_auth:
2180                # No auth params were passed, assuming this is unauthenticated
2181                # http(s).
2182                return True
2183            if password_ok:
2184                if transport == "http" and not self.insecure_auth:
2185                    log.critical(
2186                        "Invalid configuration for %s remote '%s'. "
2187                        "Authentication is disabled by default on http "
2188                        "remotes. Either set %s_insecure_auth to True in the "
2189                        "master configuration file, set a per-remote config "
2190                        "option named 'insecure_auth' to True, or use https "
2191                        "or ssh-based authentication.",
2192                        self.role,
2193                        self.id,
2194                        self.role,
2195                    )
2196                    failhard(self.role)
2197                self.credentials = pygit2.UserPass(self.user, self.password)
2198                return True
2199            else:
2200                missing_auth = [
2201                    x for x in required_params if not bool(getattr(self, x, None))
2202                ]
2203                _incomplete_auth(missing_auth)
2204        else:
2205            log.critical(
2206                "Invalid configuration for %s remote '%s'. Unsupported transport '%s'.",
2207                self.role,
2208                self.id,
2209                transport,
2210            )
2211            failhard(self.role)
2212
2213    def write_file(self, blob, dest):
2214        """
2215        Using the blob object, write the file to the destination path
2216        """
2217        with salt.utils.files.fopen(dest, "wb+") as fp_:
2218            fp_.write(blob.data)
2219
2220
2221GIT_PROVIDERS = {
2222    "pygit2": Pygit2,
2223    "gitpython": GitPython,
2224}
2225
2226
2227class GitBase:
2228    """
2229    Base class for gitfs/git_pillar
2230    """
2231
2232    def __init__(
2233        self,
2234        opts,
2235        remotes=None,
2236        per_remote_overrides=(),
2237        per_remote_only=PER_REMOTE_ONLY,
2238        global_only=GLOBAL_ONLY,
2239        git_providers=None,
2240        cache_root=None,
2241        init_remotes=True,
2242    ):
2243        """
2244        IMPORTANT: If specifying a cache_root, understand that this is also
2245        where the remotes will be cloned. A non-default cache_root is only
2246        really designed right now for winrepo, as its repos need to be checked
2247        out into the winrepo locations and not within the cachedir.
2248
2249        As of the 2018.3 release cycle, the classes used to interface with
2250        Pygit2 and GitPython can be overridden by passing the git_providers
2251        argument when spawning a class instance. This allows for one to write
2252        classes which inherit from salt.utils.gitfs.Pygit2 or
2253        salt.utils.gitfs.GitPython, and then direct one of the GitBase
2254        subclasses (GitFS, GitPillar, WinRepo) to use the custom class. For
2255        example:
2256
2257        .. code-block:: Python
2258
2259            import salt.utils.gitfs
2260            from salt.fileserver.gitfs import PER_REMOTE_OVERRIDES, PER_REMOTE_ONLY
2261
2262            class CustomPygit2(salt.utils.gitfs.Pygit2):
2263                def fetch_remotes(self):
2264                    ...
2265                    Alternate fetch behavior here
2266                    ...
2267
2268            git_providers = {
2269                'pygit2': CustomPygit2,
2270                'gitpython': salt.utils.gitfs.GitPython,
2271            }
2272
2273            gitfs = salt.utils.gitfs.GitFS(
2274                __opts__,
2275                __opts__['gitfs_remotes'],
2276                per_remote_overrides=PER_REMOTE_OVERRIDES,
2277                per_remote_only=PER_REMOTE_ONLY,
2278                git_providers=git_providers)
2279
2280            gitfs.fetch_remotes()
2281        """
2282        self.opts = opts
2283        self.git_providers = (
2284            git_providers if git_providers is not None else GIT_PROVIDERS
2285        )
2286        self.verify_provider()
2287        if cache_root is not None:
2288            self.cache_root = self.remote_root = cache_root
2289        else:
2290            self.cache_root = salt.utils.path.join(self.opts["cachedir"], self.role)
2291            self.remote_root = salt.utils.path.join(self.cache_root, "remotes")
2292        self.env_cache = salt.utils.path.join(self.cache_root, "envs.p")
2293        self.hash_cachedir = salt.utils.path.join(self.cache_root, "hash")
2294        self.file_list_cachedir = salt.utils.path.join(
2295            self.opts["cachedir"], "file_lists", self.role
2296        )
2297        if init_remotes:
2298            self.init_remotes(
2299                remotes if remotes is not None else [],
2300                per_remote_overrides,
2301                per_remote_only,
2302                global_only,
2303            )
2304
2305    def init_remotes(
2306        self,
2307        remotes,
2308        per_remote_overrides=(),
2309        per_remote_only=PER_REMOTE_ONLY,
2310        global_only=GLOBAL_ONLY,
2311    ):
2312        """
2313        Initialize remotes
2314        """
2315        # The global versions of the auth params (gitfs_user,
2316        # gitfs_password, etc.) default to empty strings. If any of them
2317        # are defined and the provider is not one that supports auth, then
2318        # error out and do not proceed.
2319        override_params = copy.deepcopy(per_remote_overrides)
2320        global_auth_params = [
2321            "{}_{}".format(self.role, x)
2322            for x in AUTH_PARAMS
2323            if self.opts["{}_{}".format(self.role, x)]
2324        ]
2325        if self.provider in AUTH_PROVIDERS:
2326            override_params += AUTH_PARAMS
2327        elif global_auth_params:
2328            msg = (
2329                "{0} authentication was configured, but the '{1}' "
2330                "{0}_provider does not support authentication. The "
2331                "providers for which authentication is supported in {0} "
2332                "are: {2}.".format(self.role, self.provider, ", ".join(AUTH_PROVIDERS))
2333            )
2334            if self.role == "gitfs":
2335                msg += (
2336                    " See the GitFS Walkthrough in the Salt documentation "
2337                    "for further information."
2338                )
2339            log.critical(msg)
2340            failhard(self.role)
2341
2342        per_remote_defaults = {}
2343        global_values = set(override_params)
2344        global_values.update(set(global_only))
2345        for param in global_values:
2346            key = "{}_{}".format(self.role, param)
2347            if key not in self.opts:
2348                log.critical(
2349                    "Key '%s' not present in global configuration. This is "
2350                    "a bug, please report it.",
2351                    key,
2352                )
2353                failhard(self.role)
2354            per_remote_defaults[param] = enforce_types(key, self.opts[key])
2355
2356        self.remotes = []
2357        for remote in remotes:
2358            repo_obj = self.git_providers[self.provider](
2359                self.opts,
2360                remote,
2361                per_remote_defaults,
2362                per_remote_only,
2363                override_params,
2364                self.cache_root,
2365                self.role,
2366            )
2367            if hasattr(repo_obj, "repo"):
2368                # Sanity check and assign the credential parameter
2369                repo_obj.verify_auth()
2370                repo_obj.setup_callbacks()
2371                if self.opts["__role"] == "minion" and repo_obj.new:
2372                    # Perform initial fetch on masterless minion
2373                    repo_obj.fetch()
2374
2375                # Reverse map to be used when running envs() to detect the
2376                # available envs.
2377                repo_obj.saltenv_revmap = {}
2378
2379                for saltenv, saltenv_conf in repo_obj.saltenv.items():
2380                    if "ref" in saltenv_conf:
2381                        ref = saltenv_conf["ref"]
2382                        repo_obj.saltenv_revmap.setdefault(ref, []).append(saltenv)
2383
2384                        if saltenv == "base":
2385                            # Remove redundant 'ref' config for base saltenv
2386                            repo_obj.saltenv[saltenv].pop("ref")
2387                            if ref != repo_obj.base:
2388                                log.warning(
2389                                    "The 'base' environment has been "
2390                                    "defined in the 'saltenv' param for %s "
2391                                    "remote %s and will override the "
2392                                    "branch/tag specified by %s_base (or a "
2393                                    "per-remote 'base' parameter).",
2394                                    self.role,
2395                                    repo_obj.id,
2396                                    self.role,
2397                                )
2398                                # Rewrite 'base' config param
2399                                repo_obj.base = ref
2400
2401                # Build list of all envs defined by ref mappings in the
2402                # per-remote 'saltenv' param. We won't add any matching envs
2403                # from the global saltenv map to the revmap.
2404                all_envs = []
2405                for env_names in repo_obj.saltenv_revmap.values():
2406                    all_envs.extend(env_names)
2407
2408                # Add the global saltenv map to the reverse map, skipping envs
2409                # explicitly mapped in the per-remote 'saltenv' param.
2410                for key, conf in repo_obj.global_saltenv.items():
2411                    if key not in all_envs and "ref" in conf:
2412                        repo_obj.saltenv_revmap.setdefault(conf["ref"], []).append(key)
2413
2414                self.remotes.append(repo_obj)
2415
2416        # Don't allow collisions in cachedir naming
2417        cachedir_map = {}
2418        for repo in self.remotes:
2419            cachedir_map.setdefault(repo.cachedir, []).append(repo.id)
2420
2421        collisions = [x for x in cachedir_map if len(cachedir_map[x]) > 1]
2422        if collisions:
2423            for dirname in collisions:
2424                log.critical(
2425                    "The following %s remotes have conflicting cachedirs: "
2426                    "%s. Resolve this using a per-remote parameter called "
2427                    "'name'.",
2428                    self.role,
2429                    ", ".join(cachedir_map[dirname]),
2430                )
2431                failhard(self.role)
2432
2433        if any(x.new for x in self.remotes):
2434            self.write_remote_map()
2435
2436    def clear_old_remotes(self):
2437        """
2438        Remove cache directories for remotes no longer configured
2439        """
2440        try:
2441            cachedir_ls = os.listdir(self.cache_root)
2442        except OSError:
2443            cachedir_ls = []
2444        # Remove actively-used remotes from list
2445        for repo in self.remotes:
2446            try:
2447                cachedir_ls.remove(repo.cachedir_basename)
2448            except ValueError:
2449                pass
2450        to_remove = []
2451        for item in cachedir_ls:
2452            if item in ("hash", "refs"):
2453                continue
2454            path = salt.utils.path.join(self.cache_root, item)
2455            if os.path.isdir(path):
2456                to_remove.append(path)
2457        failed = []
2458        if to_remove:
2459            for rdir in to_remove:
2460                try:
2461                    shutil.rmtree(rdir)
2462                except OSError as exc:
2463                    log.error(
2464                        "Unable to remove old %s remote cachedir %s: %s",
2465                        self.role,
2466                        rdir,
2467                        exc,
2468                    )
2469                    failed.append(rdir)
2470                else:
2471                    log.debug("%s removed old cachedir %s", self.role, rdir)
2472        for fdir in failed:
2473            to_remove.remove(fdir)
2474        ret = bool(to_remove)
2475        if ret:
2476            self.write_remote_map()
2477        return ret
2478
2479    def clear_cache(self):
2480        """
2481        Completely clear cache
2482        """
2483        errors = []
2484        for rdir in (self.cache_root, self.file_list_cachedir):
2485            if os.path.exists(rdir):
2486                try:
2487                    shutil.rmtree(rdir)
2488                except OSError as exc:
2489                    errors.append("Unable to delete {}: {}".format(rdir, exc))
2490        return errors
2491
2492    def clear_lock(self, remote=None, lock_type="update"):
2493        """
2494        Clear update.lk for all remotes
2495        """
2496        cleared = []
2497        errors = []
2498        for repo in self.remotes:
2499            if remote:
2500                # Specific remote URL/pattern was passed, ensure that the URL
2501                # matches or else skip this one
2502                try:
2503                    if not fnmatch.fnmatch(repo.url, remote):
2504                        continue
2505                except TypeError:
2506                    # remote was non-string, try again
2507                    if not fnmatch.fnmatch(repo.url, str(remote)):
2508                        continue
2509            success, failed = repo.clear_lock(lock_type=lock_type)
2510            cleared.extend(success)
2511            errors.extend(failed)
2512        return cleared, errors
2513
2514    def fetch_remotes(self, remotes=None):
2515        """
2516        Fetch all remotes and return a boolean to let the calling function know
2517        whether or not any remotes were updated in the process of fetching
2518        """
2519        if remotes is None:
2520            remotes = []
2521        elif isinstance(remotes, str):
2522            remotes = remotes.split(",")
2523        elif not isinstance(remotes, list):
2524            log.error(
2525                "Invalid 'remotes' argument (%s) for fetch_remotes. "
2526                "Must be a list of strings",
2527                remotes,
2528            )
2529            remotes = []
2530
2531        changed = False
2532        for repo in self.remotes:
2533            name = getattr(repo, "name", None)
2534            if not remotes or (repo.id, name) in remotes or name in remotes:
2535                try:
2536                    if repo.fetch():
2537                        # We can't just use the return value from repo.fetch()
2538                        # because the data could still have changed if old
2539                        # remotes were cleared above. Additionally, we're
2540                        # running this in a loop and later remotes without
2541                        # changes would override this value and make it
2542                        # incorrect.
2543                        changed = True
2544                except Exception as exc:  # pylint: disable=broad-except
2545                    log.error(
2546                        "Exception caught while fetching %s remote '%s': %s",
2547                        self.role,
2548                        repo.id,
2549                        exc,
2550                        exc_info=True,
2551                    )
2552        return changed
2553
2554    def lock(self, remote=None):
2555        """
2556        Place an update.lk
2557        """
2558        locked = []
2559        errors = []
2560        for repo in self.remotes:
2561            if remote:
2562                # Specific remote URL/pattern was passed, ensure that the URL
2563                # matches or else skip this one
2564                try:
2565                    if not fnmatch.fnmatch(repo.url, remote):
2566                        continue
2567                except TypeError:
2568                    # remote was non-string, try again
2569                    if not fnmatch.fnmatch(repo.url, str(remote)):
2570                        continue
2571            success, failed = repo.lock()
2572            locked.extend(success)
2573            errors.extend(failed)
2574        return locked, errors
2575
2576    def update(self, remotes=None):
2577        """
2578        .. versionchanged:: 2018.3.0
2579            The remotes argument was added. This being a list of remote URLs,
2580            it will only update matching remotes. This actually matches on
2581            repo.id
2582
2583        Execute a git fetch on all of the repos and perform maintenance on the
2584        fileserver cache.
2585        """
2586        # data for the fileserver event
2587        data = {"changed": False, "backend": "gitfs"}
2588
2589        data["changed"] = self.clear_old_remotes()
2590        if self.fetch_remotes(remotes=remotes):
2591            data["changed"] = True
2592
2593        # A masterless minion will need a new env cache file even if no changes
2594        # were fetched.
2595        refresh_env_cache = self.opts["__role"] == "minion"
2596
2597        if data["changed"] is True or not os.path.isfile(self.env_cache):
2598            env_cachedir = os.path.dirname(self.env_cache)
2599            if not os.path.exists(env_cachedir):
2600                os.makedirs(env_cachedir)
2601            refresh_env_cache = True
2602
2603        if refresh_env_cache:
2604            new_envs = self.envs(ignore_cache=True)
2605            with salt.utils.files.fopen(self.env_cache, "wb+") as fp_:
2606                fp_.write(salt.payload.dumps(new_envs))
2607                log.trace("Wrote env cache data to %s", self.env_cache)
2608
2609        # if there is a change, fire an event
2610        if self.opts.get("fileserver_events", False):
2611            with salt.utils.event.get_event(
2612                "master",
2613                self.opts["sock_dir"],
2614                self.opts["transport"],
2615                opts=self.opts,
2616                listen=False,
2617            ) as event:
2618                event.fire_event(data, tagify(["gitfs", "update"], prefix="fileserver"))
2619        try:
2620            salt.fileserver.reap_fileserver_cache_dir(
2621                self.hash_cachedir, self.find_file
2622            )
2623        except OSError:
2624            # Hash file won't exist if no files have yet been served up
2625            pass
2626
2627    def update_intervals(self):
2628        """
2629        Returns a dictionary mapping remote IDs to their intervals, designed to
2630        be used for variable update intervals in salt.master.FileserverUpdate.
2631
2632        A remote's ID is defined here as a tuple of the GitPython/Pygit2
2633        object's "id" and "name" attributes, with None being assumed as the
2634        "name" value if the attribute is not present.
2635        """
2636        return {
2637            (repo.id, getattr(repo, "name", None)): repo.update_interval
2638            for repo in self.remotes
2639        }
2640
2641    def verify_provider(self):
2642        """
2643        Determine which provider to use
2644        """
2645        if "verified_{}_provider".format(self.role) in self.opts:
2646            self.provider = self.opts["verified_{}_provider".format(self.role)]
2647        else:
2648            desired_provider = self.opts.get("{}_provider".format(self.role))
2649            if not desired_provider:
2650                if self.verify_pygit2(quiet=True):
2651                    self.provider = "pygit2"
2652                elif self.verify_gitpython(quiet=True):
2653                    self.provider = "gitpython"
2654            else:
2655                # Ensure non-lowercase providers work
2656                try:
2657                    desired_provider = desired_provider.lower()
2658                except AttributeError:
2659                    # Should only happen if someone does something silly like
2660                    # set the provider to a numeric value.
2661                    desired_provider = str(desired_provider).lower()
2662                if desired_provider not in self.git_providers:
2663                    log.critical(
2664                        "Invalid %s_provider '%s'. Valid choices are: %s",
2665                        self.role,
2666                        desired_provider,
2667                        ", ".join(self.git_providers),
2668                    )
2669                    failhard(self.role)
2670                elif desired_provider == "pygit2" and self.verify_pygit2():
2671                    self.provider = "pygit2"
2672                elif desired_provider == "gitpython" and self.verify_gitpython():
2673                    self.provider = "gitpython"
2674        if not hasattr(self, "provider"):
2675            log.critical("No suitable %s provider module is installed.", self.role)
2676            failhard(self.role)
2677
2678    def verify_gitpython(self, quiet=False):
2679        """
2680        Check if GitPython is available and at a compatible version (>= 0.3.0)
2681        """
2682
2683        def _recommend():
2684            if PYGIT2_VERSION and "pygit2" in self.git_providers:
2685                log.error(_RECOMMEND_PYGIT2, self.role, self.role)
2686
2687        if not GITPYTHON_VERSION:
2688            if not quiet:
2689                log.error(
2690                    "%s is configured but could not be loaded, is GitPython installed?",
2691                    self.role,
2692                )
2693                _recommend()
2694            return False
2695        elif "gitpython" not in self.git_providers:
2696            return False
2697
2698        errors = []
2699        if GITPYTHON_VERSION < GITPYTHON_MINVER:
2700            errors.append(
2701                "{} is configured, but the GitPython version is earlier than "
2702                "{}. Version {} detected.".format(
2703                    self.role, GITPYTHON_MINVER, GITPYTHON_VERSION
2704                )
2705            )
2706        if not salt.utils.path.which("git"):
2707            errors.append(
2708                "The git command line utility is required when using the "
2709                "'gitpython' {}_provider.".format(self.role)
2710            )
2711
2712        if errors:
2713            for error in errors:
2714                log.error(error)
2715            if not quiet:
2716                _recommend()
2717            return False
2718
2719        self.opts["verified_{}_provider".format(self.role)] = "gitpython"
2720        log.debug("gitpython %s_provider enabled", self.role)
2721        return True
2722
2723    def verify_pygit2(self, quiet=False):
2724        """
2725        Check if pygit2/libgit2 are available and at a compatible version.
2726        Pygit2 must be at least 0.20.3 and libgit2 must be at least 0.20.0.
2727        """
2728
2729        def _recommend():
2730            if GITPYTHON_VERSION and "gitpython" in self.git_providers:
2731                log.error(_RECOMMEND_GITPYTHON, self.role, self.role)
2732
2733        if not PYGIT2_VERSION:
2734            if not quiet:
2735                log.error(
2736                    "%s is configured but could not be loaded, are pygit2 "
2737                    "and libgit2 installed?",
2738                    self.role,
2739                )
2740                _recommend()
2741            return False
2742        elif "pygit2" not in self.git_providers:
2743            return False
2744
2745        errors = []
2746        if PYGIT2_VERSION < PYGIT2_MINVER:
2747            errors.append(
2748                "{} is configured, but the pygit2 version is earlier than "
2749                "{}. Version {} detected.".format(
2750                    self.role, PYGIT2_MINVER, PYGIT2_VERSION
2751                )
2752            )
2753        if LIBGIT2_VERSION < LIBGIT2_MINVER:
2754            errors.append(
2755                "{} is configured, but the libgit2 version is earlier than "
2756                "{}. Version {} detected.".format(
2757                    self.role, LIBGIT2_MINVER, LIBGIT2_VERSION
2758                )
2759            )
2760        if not getattr(pygit2, "GIT_FETCH_PRUNE", False) and not salt.utils.path.which(
2761            "git"
2762        ):
2763            errors.append(
2764                "The git command line utility is required when using the "
2765                "'pygit2' {}_provider.".format(self.role)
2766            )
2767
2768        if errors:
2769            for error in errors:
2770                log.error(error)
2771            if not quiet:
2772                _recommend()
2773            return False
2774
2775        self.opts["verified_{}_provider".format(self.role)] = "pygit2"
2776        log.debug("pygit2 %s_provider enabled", self.role)
2777        return True
2778
2779    def write_remote_map(self):
2780        """
2781        Write the remote_map.txt
2782        """
2783        remote_map = salt.utils.path.join(self.cache_root, "remote_map.txt")
2784        try:
2785            with salt.utils.files.fopen(remote_map, "w+") as fp_:
2786                timestamp = datetime.now().strftime("%d %b %Y %H:%M:%S.%f")
2787                fp_.write("# {}_remote map as of {}\n".format(self.role, timestamp))
2788                for repo in self.remotes:
2789                    fp_.write(
2790                        salt.utils.stringutils.to_str(
2791                            "{} = {}\n".format(repo.cachedir_basename, repo.id)
2792                        )
2793                    )
2794        except OSError:
2795            pass
2796        else:
2797            log.info("Wrote new %s remote map to %s", self.role, remote_map)
2798
2799    def do_checkout(self, repo):
2800        """
2801        Common code for git_pillar/winrepo to handle locking and checking out
2802        of a repo.
2803        """
2804        time_start = time.time()
2805        while time.time() - time_start <= 5:
2806            try:
2807                return repo.checkout()
2808            except GitLockError as exc:
2809                if exc.errno == errno.EEXIST:
2810                    time.sleep(0.1)
2811                    continue
2812                else:
2813                    log.error(
2814                        "Error %d encountered while obtaining checkout "
2815                        "lock for %s remote '%s': %s",
2816                        exc.errno,
2817                        repo.role,
2818                        repo.id,
2819                        exc,
2820                        exc_info=True,
2821                    )
2822                    break
2823        else:
2824            log.error(
2825                "Timed out waiting for checkout lock to be released for "
2826                "%s remote '%s'. If this error persists, run 'salt-run "
2827                "cache.clear_git_lock %s type=checkout' to clear it.",
2828                self.role,
2829                repo.id,
2830                self.role,
2831            )
2832        return None
2833
2834
2835class GitFS(GitBase):
2836    """
2837    Functionality specific to the git fileserver backend
2838    """
2839
2840    role = "gitfs"
2841    instance_map = weakref.WeakKeyDictionary()
2842
2843    def __new__(
2844        cls,
2845        opts,
2846        remotes=None,
2847        per_remote_overrides=(),
2848        per_remote_only=PER_REMOTE_ONLY,
2849        git_providers=None,
2850        cache_root=None,
2851        init_remotes=True,
2852    ):
2853        """
2854        If we are not initializing remotes (such as in cases where we just want
2855        to load the config so that we can run clear_cache), then just return a
2856        new __init__'ed object. Otherwise, check the instance map and re-use an
2857        instance if one exists for the current process. Weak references are
2858        used to ensure that we garbage collect instances for threads which have
2859        exited.
2860        """
2861        # No need to get the ioloop reference if we're not initializing remotes
2862        io_loop = salt.ext.tornado.ioloop.IOLoop.current() if init_remotes else None
2863        if not init_remotes or io_loop not in cls.instance_map:
2864            # We only evaluate the second condition in this if statement if
2865            # we're initializing remotes, so we won't get here unless io_loop
2866            # is something other than None.
2867            obj = object.__new__(cls)
2868            super(GitFS, obj).__init__(
2869                opts,
2870                remotes if remotes is not None else [],
2871                per_remote_overrides=per_remote_overrides,
2872                per_remote_only=per_remote_only,
2873                git_providers=git_providers
2874                if git_providers is not None
2875                else GIT_PROVIDERS,
2876                cache_root=cache_root,
2877                init_remotes=init_remotes,
2878            )
2879            if not init_remotes:
2880                log.debug("Created gitfs object with uninitialized remotes")
2881            else:
2882                log.debug("Created gitfs object for process %s", os.getpid())
2883                # Add to the instance map so we can re-use later
2884                cls.instance_map[io_loop] = obj
2885            return obj
2886        log.debug("Re-using gitfs object for process %s", os.getpid())
2887        return cls.instance_map[io_loop]
2888
2889    # pylint: disable=super-init-not-called
2890    def __init__(
2891        self,
2892        opts,
2893        remotes,
2894        per_remote_overrides=(),
2895        per_remote_only=PER_REMOTE_ONLY,
2896        git_providers=None,
2897        cache_root=None,
2898        init_remotes=True,
2899    ):
2900        # Initialization happens above in __new__(), so don't do anything here
2901        pass
2902
2903    # pylint: enable=super-init-not-called
2904
2905    def dir_list(self, load):
2906        """
2907        Return a list of all directories on the master
2908        """
2909        return self._file_lists(load, "dirs")
2910
2911    def envs(self, ignore_cache=False):
2912        """
2913        Return a list of refs that can be used as environments
2914        """
2915        if not ignore_cache:
2916            cache_match = salt.fileserver.check_env_cache(self.opts, self.env_cache)
2917            if cache_match is not None:
2918                return cache_match
2919        ret = set()
2920        for repo in self.remotes:
2921            repo_envs = repo.envs()
2922            for env_list in repo.saltenv_revmap.values():
2923                repo_envs.update(env_list)
2924            ret.update([x for x in repo_envs if repo.env_is_exposed(x)])
2925        return sorted(ret)
2926
2927    def find_file(self, path, tgt_env="base", **kwargs):  # pylint: disable=W0613
2928        """
2929        Find the first file to match the path and ref, read the file out of git
2930        and send the path to the newly cached file
2931        """
2932        fnd = {"path": "", "rel": ""}
2933        if os.path.isabs(path):
2934            return fnd
2935
2936        dest = salt.utils.path.join(self.cache_root, "refs", tgt_env, path)
2937        hashes_glob = salt.utils.path.join(
2938            self.hash_cachedir, tgt_env, "{}.hash.*".format(path)
2939        )
2940        blobshadest = salt.utils.path.join(
2941            self.hash_cachedir, tgt_env, "{}.hash.blob_sha1".format(path)
2942        )
2943        lk_fn = salt.utils.path.join(self.hash_cachedir, tgt_env, "{}.lk".format(path))
2944        destdir = os.path.dirname(dest)
2945        hashdir = os.path.dirname(blobshadest)
2946        if not os.path.isdir(destdir):
2947            try:
2948                os.makedirs(destdir)
2949            except OSError:
2950                # Path exists and is a file, remove it and retry
2951                os.remove(destdir)
2952                os.makedirs(destdir)
2953        if not os.path.isdir(hashdir):
2954            try:
2955                os.makedirs(hashdir)
2956            except OSError:
2957                # Path exists and is a file, remove it and retry
2958                os.remove(hashdir)
2959                os.makedirs(hashdir)
2960
2961        for repo in self.remotes:
2962            if repo.mountpoint(tgt_env) and not path.startswith(
2963                repo.mountpoint(tgt_env) + os.sep
2964            ):
2965                continue
2966            if (
2967                not salt.utils.stringutils.is_hex(tgt_env)
2968                and tgt_env not in self.envs()
2969                and not repo.fallback
2970            ):
2971                continue
2972            repo_path = path[len(repo.mountpoint(tgt_env)) :].lstrip(os.sep)
2973            if repo.root(tgt_env):
2974                repo_path = salt.utils.path.join(repo.root(tgt_env), repo_path)
2975
2976            blob, blob_hexsha, blob_mode = repo.find_file(repo_path, tgt_env)
2977            if blob is None:
2978                continue
2979
2980            def _add_file_stat(fnd, mode):
2981                """
2982                Add a the mode to the return dict. In other fileserver backends
2983                we stat the file to get its mode, and add the stat result
2984                (passed through list() for better serialization) to the 'stat'
2985                key in the return dict. However, since we aren't using the
2986                stat result for anything but the mode at this time, we can
2987                avoid unnecessary work by just manually creating the list and
2988                not running an os.stat() on all files in the repo.
2989                """
2990                if mode is not None:
2991                    fnd["stat"] = [mode]
2992                return fnd
2993
2994            salt.fileserver.wait_lock(lk_fn, dest)
2995            try:
2996                with salt.utils.files.fopen(blobshadest, "r") as fp_:
2997                    sha = salt.utils.stringutils.to_unicode(fp_.read())
2998                    if sha == blob_hexsha:
2999                        fnd["rel"] = path
3000                        fnd["path"] = dest
3001                        return _add_file_stat(fnd, blob_mode)
3002            except OSError as exc:
3003                if exc.errno != errno.ENOENT:
3004                    raise
3005
3006            with salt.utils.files.fopen(lk_fn, "w"):
3007                pass
3008
3009            for filename in glob.glob(hashes_glob):
3010                try:
3011                    os.remove(filename)
3012                except Exception:  # pylint: disable=broad-except
3013                    pass
3014            # Write contents of file to their destination in the FS cache
3015            repo.write_file(blob, dest)
3016            with salt.utils.files.fopen(blobshadest, "w+") as fp_:
3017                fp_.write(blob_hexsha)
3018            try:
3019                os.remove(lk_fn)
3020            except OSError:
3021                pass
3022            fnd["rel"] = path
3023            fnd["path"] = dest
3024            return _add_file_stat(fnd, blob_mode)
3025
3026        # No matching file was found in tgt_env. Return a dict with empty paths
3027        # so the calling function knows the file could not be found.
3028        return fnd
3029
3030    def serve_file(self, load, fnd):
3031        """
3032        Return a chunk from a file based on the data received
3033        """
3034        if "env" in load:
3035            # "env" is not supported; Use "saltenv".
3036            load.pop("env")
3037
3038        ret = {"data": "", "dest": ""}
3039        required_load_keys = {"path", "loc", "saltenv"}
3040        if not all(x in load for x in required_load_keys):
3041            log.debug(
3042                "Not all of the required keys present in payload. Missing: %s",
3043                ", ".join(required_load_keys.difference(load)),
3044            )
3045            return ret
3046        if not fnd["path"]:
3047            return ret
3048        ret["dest"] = fnd["rel"]
3049        gzip = load.get("gzip", None)
3050        fpath = os.path.normpath(fnd["path"])
3051        with salt.utils.files.fopen(fpath, "rb") as fp_:
3052            fp_.seek(load["loc"])
3053            data = fp_.read(self.opts["file_buffer_size"])
3054            if data and not salt.utils.files.is_binary(fpath):
3055                data = data.decode(__salt_system_encoding__)
3056            if gzip and data:
3057                data = salt.utils.gzip_util.compress(data, gzip)
3058                ret["gzip"] = gzip
3059            ret["data"] = data
3060        return ret
3061
3062    def file_hash(self, load, fnd):
3063        """
3064        Return a file hash, the hash type is set in the master config file
3065        """
3066        if "env" in load:
3067            # "env" is not supported; Use "saltenv".
3068            load.pop("env")
3069
3070        if not all(x in load for x in ("path", "saltenv")):
3071            return "", None
3072        ret = {"hash_type": self.opts["hash_type"]}
3073        relpath = fnd["rel"]
3074        path = fnd["path"]
3075        hashdest = salt.utils.path.join(
3076            self.hash_cachedir,
3077            load["saltenv"],
3078            "{}.hash.{}".format(relpath, self.opts["hash_type"]),
3079        )
3080        try:
3081            with salt.utils.files.fopen(hashdest, "rb") as fp_:
3082                ret["hsum"] = fp_.read()
3083            return ret
3084        except OSError as exc:
3085            if exc.errno != errno.ENOENT:
3086                raise
3087
3088        try:
3089            os.makedirs(os.path.dirname(hashdest))
3090        except OSError as exc:
3091            if exc.errno != errno.EEXIST:
3092                raise
3093
3094        ret["hsum"] = salt.utils.hashutils.get_hash(path, self.opts["hash_type"])
3095        with salt.utils.files.fopen(hashdest, "w+") as fp_:
3096            fp_.write(ret["hsum"])
3097        return ret
3098
3099    def _file_lists(self, load, form):
3100        """
3101        Return a dict containing the file lists for files and dirs
3102        """
3103        if "env" in load:
3104            # "env" is not supported; Use "saltenv".
3105            load.pop("env")
3106
3107        if not os.path.isdir(self.file_list_cachedir):
3108            try:
3109                os.makedirs(self.file_list_cachedir)
3110            except os.error:
3111                log.error("Unable to make cachedir %s", self.file_list_cachedir)
3112                return []
3113        list_cache = salt.utils.path.join(
3114            self.file_list_cachedir,
3115            "{}.p".format(load["saltenv"].replace(os.path.sep, "_|-")),
3116        )
3117        w_lock = salt.utils.path.join(
3118            self.file_list_cachedir,
3119            ".{}.w".format(load["saltenv"].replace(os.path.sep, "_|-")),
3120        )
3121        cache_match, refresh_cache, save_cache = salt.fileserver.check_file_list_cache(
3122            self.opts, form, list_cache, w_lock
3123        )
3124        if cache_match is not None:
3125            return cache_match
3126        if refresh_cache:
3127            log.trace("Start rebuilding gitfs file_list cache")
3128            ret = {"files": set(), "symlinks": {}, "dirs": set()}
3129            for repo in self.remotes:
3130                if (
3131                    salt.utils.stringutils.is_hex(load["saltenv"])
3132                    or load["saltenv"] in self.envs()
3133                    or repo.fallback
3134                ):
3135                    start = time.time()
3136                    repo_files, repo_symlinks = repo.file_list(load["saltenv"])
3137                    ret["files"].update(repo_files)
3138                    ret["symlinks"].update(repo_symlinks)
3139                    ret["dirs"].update(repo.dir_list(load["saltenv"]))
3140                    log.profile(
3141                        "gitfs file_name cache rebuild repo=%s duration=%s seconds",
3142                        repo.id,
3143                        time.time() - start,
3144                    )
3145            ret["files"] = sorted(ret["files"])
3146            ret["dirs"] = sorted(ret["dirs"])
3147
3148            if save_cache:
3149                salt.fileserver.write_file_list_cache(
3150                    self.opts, ret, list_cache, w_lock
3151                )
3152            # NOTE: symlinks are organized in a dict instead of a list, however
3153            # the 'symlinks' key will be defined above so it will never get to
3154            # the default value in the call to ret.get() below.
3155            log.trace("Finished rebuilding gitfs file_list cache")
3156            return ret.get(form, [])
3157        # Shouldn't get here, but if we do, this prevents a TypeError
3158        return {} if form == "symlinks" else []
3159
3160    def file_list(self, load):
3161        """
3162        Return a list of all files on the file server in a specified
3163        environment
3164        """
3165        return self._file_lists(load, "files")
3166
3167    def file_list_emptydirs(self, load):  # pylint: disable=W0613
3168        """
3169        Return a list of all empty directories on the master
3170        """
3171        # Cannot have empty dirs in git
3172        return []
3173
3174    def symlink_list(self, load):
3175        """
3176        Return a dict of all symlinks based on a given path in the repo
3177        """
3178        if "env" in load:
3179            # "env" is not supported; Use "saltenv".
3180            load.pop("env")
3181
3182        if (
3183            not salt.utils.stringutils.is_hex(load["saltenv"])
3184            and load["saltenv"] not in self.envs()
3185        ):
3186            return {}
3187        if "prefix" in load:
3188            prefix = load["prefix"].strip("/")
3189        else:
3190            prefix = ""
3191        symlinks = self._file_lists(load, "symlinks")
3192        return {key: val for key, val in symlinks.items() if key.startswith(prefix)}
3193
3194
3195class GitPillar(GitBase):
3196    """
3197    Functionality specific to the git external pillar
3198    """
3199
3200    role = "git_pillar"
3201
3202    def checkout(self):
3203        """
3204        Checkout the targeted branches/tags from the git_pillar remotes
3205        """
3206        self.pillar_dirs = OrderedDict()
3207        self.pillar_linked_dirs = []
3208        for repo in self.remotes:
3209            cachedir = self.do_checkout(repo)
3210            if cachedir is not None:
3211                # Figure out which environment this remote should be assigned
3212                if repo.branch == "__env__" and hasattr(repo, "all_saltenvs"):
3213                    env = (
3214                        self.opts.get("pillarenv") or self.opts.get("saltenv") or "base"
3215                    )
3216                elif repo.env:
3217                    env = repo.env
3218                else:
3219                    if repo.branch == repo.base:
3220                        env = "base"
3221                    else:
3222                        tgt = repo.get_checkout_target()
3223                        env = "base" if tgt == repo.base else tgt
3224                if repo._mountpoint:
3225                    if self.link_mountpoint(repo):
3226                        self.pillar_dirs[repo.linkdir] = env
3227                        self.pillar_linked_dirs.append(repo.linkdir)
3228                else:
3229                    self.pillar_dirs[cachedir] = env
3230
3231    def link_mountpoint(self, repo):
3232        """
3233        Ensure that the mountpoint is present in the correct location and
3234        points at the correct path
3235        """
3236        lcachelink = salt.utils.path.join(repo.linkdir, repo._mountpoint)
3237        lcachedest = salt.utils.path.join(repo.cachedir, repo.root()).rstrip(os.sep)
3238        wipe_linkdir = False
3239        create_link = False
3240        try:
3241            with repo.gen_lock(lock_type="mountpoint", timeout=10):
3242                walk_results = list(os.walk(repo.linkdir, followlinks=False))
3243                if walk_results != repo.linkdir_walk:
3244                    log.debug(
3245                        "Results of walking %s differ from expected results",
3246                        repo.linkdir,
3247                    )
3248                    log.debug("Walk results: %s", walk_results)
3249                    log.debug("Expected results: %s", repo.linkdir_walk)
3250                    wipe_linkdir = True
3251                else:
3252                    if not all(
3253                        not salt.utils.path.islink(x[0]) and os.path.isdir(x[0])
3254                        for x in walk_results[:-1]
3255                    ):
3256                        log.debug(
3257                            "Linkdir parents of %s are not all directories", lcachelink
3258                        )
3259                        wipe_linkdir = True
3260                    elif not salt.utils.path.islink(lcachelink):
3261                        wipe_linkdir = True
3262                    else:
3263                        try:
3264                            ldest = salt.utils.path.readlink(lcachelink)
3265                        except Exception:  # pylint: disable=broad-except
3266                            log.debug("Failed to read destination of %s", lcachelink)
3267                            wipe_linkdir = True
3268                        else:
3269                            if ldest != lcachedest:
3270                                log.debug(
3271                                    "Destination of %s (%s) does not match "
3272                                    "the expected value (%s)",
3273                                    lcachelink,
3274                                    ldest,
3275                                    lcachedest,
3276                                )
3277                                # Since we know that the parent dirs of the
3278                                # link are set up properly, all we need to do
3279                                # is remove the symlink and let it be created
3280                                # below.
3281                                try:
3282                                    if (
3283                                        salt.utils.platform.is_windows()
3284                                        and not ldest.startswith("\\\\")
3285                                        and os.path.isdir(ldest)
3286                                    ):
3287                                        # On Windows, symlinks to directories
3288                                        # must be removed as if they were
3289                                        # themselves directories.
3290                                        shutil.rmtree(lcachelink)
3291                                    else:
3292                                        os.remove(lcachelink)
3293                                except Exception as exc:  # pylint: disable=broad-except
3294                                    log.exception(
3295                                        "Failed to remove existing git_pillar "
3296                                        "mountpoint link %s: %s",
3297                                        lcachelink,
3298                                        exc.__str__(),
3299                                    )
3300                                wipe_linkdir = False
3301                                create_link = True
3302
3303                if wipe_linkdir:
3304                    # Wiping implies that we need to create the link
3305                    create_link = True
3306                    try:
3307                        shutil.rmtree(repo.linkdir)
3308                    except OSError:
3309                        pass
3310                    try:
3311                        ldirname = os.path.dirname(lcachelink)
3312                        os.makedirs(ldirname)
3313                        log.debug("Successfully made linkdir parent %s", ldirname)
3314                    except OSError as exc:
3315                        log.error(
3316                            "Failed to os.makedirs() linkdir parent %s: %s",
3317                            ldirname,
3318                            exc.__str__(),
3319                        )
3320                        return False
3321
3322                if create_link:
3323                    try:
3324                        os.symlink(lcachedest, lcachelink)
3325                        log.debug(
3326                            "Successfully linked %s to cachedir %s",
3327                            lcachelink,
3328                            lcachedest,
3329                        )
3330                        return True
3331                    except OSError as exc:
3332                        log.error(
3333                            "Failed to create symlink to %s at path %s: %s",
3334                            lcachedest,
3335                            lcachelink,
3336                            exc.__str__(),
3337                        )
3338                        return False
3339        except GitLockError:
3340            log.error(
3341                "Timed out setting mountpoint lock for %s remote '%s'. If "
3342                "this error persists, it may be because an earlier %s "
3343                "checkout was interrupted. The lock can be cleared by running "
3344                "'salt-run cache.clear_git_lock %s type=mountpoint', or by "
3345                "manually removing %s.",
3346                self.role,
3347                repo.id,
3348                self.role,
3349                self.role,
3350                repo._get_lock_file(lock_type="mountpoint"),
3351            )
3352            return False
3353        return True
3354
3355
3356class WinRepo(GitBase):
3357    """
3358    Functionality specific to the winrepo runner
3359    """
3360
3361    role = "winrepo"
3362    # Need to define this in case we try to reference it before checking
3363    # out the repos.
3364    winrepo_dirs = {}
3365
3366    def checkout(self):
3367        """
3368        Checkout the targeted branches/tags from the winrepo remotes
3369        """
3370        self.winrepo_dirs = {}
3371        for repo in self.remotes:
3372            cachedir = self.do_checkout(repo)
3373            if cachedir is not None:
3374                self.winrepo_dirs[repo.id] = cachedir
3375