1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5import os
6
7from mozpack import path as mozpath
8from mozpack.files import FileFinder
9
10
11class FilterPath(object):
12    """Helper class to make comparing and matching file paths easier."""
13
14    def __init__(self, path):
15        self.path = os.path.normpath(path)
16        self._finder = None
17
18    @property
19    def finder(self):
20        if self._finder:
21            return self._finder
22        self._finder = FileFinder(mozpath.normsep(self.path))
23        return self._finder
24
25    @property
26    def ext(self):
27        return os.path.splitext(self.path)[1].strip(".")
28
29    @property
30    def exists(self):
31        return os.path.exists(self.path)
32
33    @property
34    def isfile(self):
35        return os.path.isfile(self.path)
36
37    @property
38    def isdir(self):
39        return os.path.isdir(self.path)
40
41    def join(self, *args):
42        return FilterPath(os.path.join(self.path, *args))
43
44    def match(self, patterns):
45        a = mozpath.normsep(self.path)
46        for p in patterns:
47            if isinstance(p, FilterPath):
48                p = p.path
49            p = mozpath.normsep(p)
50            if mozpath.match(a, p):
51                return True
52        return False
53
54    def contains(self, other):
55        """Return True if other is a subdirectory of self or equals self."""
56        if isinstance(other, FilterPath):
57            other = other.path
58        a = os.path.abspath(self.path)
59        b = os.path.normpath(os.path.abspath(other))
60
61        parts_a = a.split(os.sep)
62        parts_b = b.split(os.sep)
63
64        if len(parts_a) > len(parts_b):
65            return False
66
67        for i, part in enumerate(parts_a):
68            if part != parts_b[i]:
69                return False
70        return True
71
72    def __repr__(self):
73        return repr(self.path)
74
75
76def collapse(paths, base=None, dotfiles=False):
77    """Given an iterable of paths, collapse them into the smallest possible set
78    of paths that contain the original set (without containing any extra paths).
79
80    For example, if directory 'a' contains two files b.txt and c.txt, calling:
81
82        collapse(['a/b.txt', 'a/c.txt'])
83
84    returns ['a']. But if a third file d.txt also exists, then it will return
85    ['a/b.txt', 'a/c.txt'] since ['a'] would also include that extra file.
86
87    :param paths: An iterable of paths (files and directories) to collapse.
88    :returns: The smallest set of paths (files and directories) that contain
89              the original set of paths and only the original set.
90    """
91    if not paths:
92        if not base:
93            return []
94
95        # Need to test whether directory chain is empty. If it is then bubble
96        # the base back up so that it counts as 'covered'.
97        for _, _, names in os.walk(base):
98            if names:
99                return []
100        return [base]
101
102    if not base:
103        paths = list(map(mozpath.abspath, paths))
104        base = mozpath.commonprefix(paths).rstrip("/")
105
106        # Make sure `commonprefix` factors in sibling directories that have the
107        # same prefix in their basenames.
108        parent = mozpath.dirname(base)
109        same_prefix = [
110            p for p in os.listdir(parent) if p.startswith(mozpath.basename(base))
111        ]
112        if not os.path.isdir(base) or len(same_prefix) > 1:
113            base = parent
114
115    if base in paths:
116        return [base]
117
118    covered = set()
119    full = set()
120    for name in os.listdir(base):
121        if not dotfiles and name[0] == ".":
122            continue
123
124        path = mozpath.join(base, name)
125        full.add(path)
126
127        if path in paths:
128            # This path was explicitly specified, so just bubble it back up
129            # without recursing down into it (if it was a directory).
130            covered.add(path)
131        elif os.path.isdir(path):
132            new_paths = [p for p in paths if p.startswith(path)]
133            covered.update(collapse(new_paths, base=path, dotfiles=dotfiles))
134
135    if full == covered:
136        # Every file under this base was covered, so we can collapse them all
137        # up into the base path.
138        return [base]
139    return list(covered)
140
141
142def filterpaths(root, paths, include, exclude=None, extensions=None):
143    """Filters a list of paths.
144
145    Given a list of paths and some filtering rules, return the set of paths
146    that should be linted.
147
148    :param paths: A starting list of paths to possibly lint.
149    :param include: A list of paths that should be included (required).
150    :param exclude: A list of paths that should be excluded (optional).
151    :param extensions: A list of file extensions which should be considered (optional).
152    :returns: A tuple containing a list of file paths to lint and a list of
153              paths to exclude.
154    """
155
156    def normalize(path):
157        if "*" not in path and not os.path.isabs(path):
158            path = os.path.join(root, path)
159        return FilterPath(path)
160
161    # Includes are always paths and should always exist.
162    include = list(map(normalize, include))
163
164    # Exclude paths with and without globs will be handled separately,
165    # pull them apart now.
166    exclude = list(map(normalize, exclude or []))
167    excludepaths = [p for p in exclude if p.exists]
168    excludeglobs = [p.path for p in exclude if not p.exists]
169
170    keep = set()
171    discard = set()
172    for path in list(map(normalize, paths)):
173        # Exclude bad file extensions
174        if extensions and path.isfile and path.ext not in extensions:
175            continue
176
177        if path.match(excludeglobs):
178            continue
179
180        # First handle include/exclude directives
181        # that exist (i.e don't have globs)
182        for inc in include:
183            # Only excludes that are subdirectories of the include
184            # path matter.
185            excs = [e for e in excludepaths if inc.contains(e)]
186
187            if path.contains(inc):
188                # If specified path is an ancestor of include path,
189                # then lint the include path.
190                keep.add(inc)
191
192                # We can't apply these exclude paths without explicitly
193                # including every sibling file. Rather than do that,
194                # just return them and hope the underlying linter will
195                # deal with them.
196                discard.update(excs)
197
198            elif inc.contains(path):
199                # If the include path is an ancestor of the specified
200                # path, then add the specified path only if there are
201                # no exclude paths in-between them.
202                if not any(e.contains(path) for e in excs):
203                    keep.add(path)
204                    discard.update([e for e in excs if path.contains(e)])
205
206        # Next expand excludes with globs in them so we can add them to
207        # the set of files to discard.
208        for pattern in excludeglobs:
209            for p, f in path.finder.find(pattern):
210                discard.add(path.join(p))
211
212    return (
213        [f.path for f in keep if f.exists],
214        collapse([f.path for f in discard if f.exists]),
215    )
216
217
218def findobject(path):
219    """
220    Find a Python object given a path of the form <modulepath>:<objectpath>.
221    Conceptually equivalent to
222
223        def find_object(modulepath, objectpath):
224            import <modulepath> as mod
225            return mod.<objectpath>
226    """
227    if path.count(":") != 1:
228        raise ValueError(
229            'python path {!r} does not have the form "module:object"'.format(path)
230        )
231
232    modulepath, objectpath = path.split(":")
233    obj = __import__(modulepath)
234    for a in modulepath.split(".")[1:]:
235        obj = getattr(obj, a)
236    for a in objectpath.split("."):
237        obj = getattr(obj, a)
238    return obj
239
240
241def ancestors(path):
242    while path:
243        yield path
244        (path, child) = os.path.split(path)
245        if child == "":
246            break
247
248
249def get_ancestors_by_name(name, path, root):
250    """Returns a list of files called `name` in `path`'s ancestors,
251    sorted from closest->furthest. This can be useful for finding
252    relevant configuration files.
253    """
254    configs = []
255    for path in ancestors(path):
256        config = os.path.join(path, name)
257        if os.path.isfile(config):
258            configs.append(config)
259        if path == root:
260            break
261    return configs
262
263
264def expand_exclusions(paths, config, root):
265    """Returns all files that match patterns and aren't excluded.
266
267    This is used by some external linters who receive 'batch' files (e.g dirs)
268    but aren't capable of applying their own exclusions. There is an argument
269    to be made that this step should just apply to all linters no matter what.
270
271    Args:
272        paths (list): List of candidate paths to lint.
273        config (dict): Linter's config object.
274        root (str): Root of the repository.
275
276    Returns:
277        Generator which generates list of paths that weren't excluded.
278    """
279    extensions = [e.lstrip(".") for e in config.get("extensions", [])]
280    find_dotfiles = config.get("find-dotfiles", False)
281
282    def normalize(path):
283        path = mozpath.normpath(path)
284        if os.path.isabs(path):
285            return path
286        return mozpath.join(root, path)
287
288    exclude = list(map(normalize, config.get("exclude", [])))
289    for path in paths:
290        path = mozpath.normsep(path)
291        if os.path.isfile(path):
292            if any(path.startswith(e) for e in exclude if "*" not in e):
293                continue
294
295            if any(mozpath.match(path, e) for e in exclude if "*" in e):
296                continue
297
298            yield path
299            continue
300
301        ignore = [
302            e[len(path) :].lstrip("/")
303            for e in exclude
304            if mozpath.commonprefix((path, e)) == path
305        ]
306        finder = FileFinder(path, ignore=ignore, find_dotfiles=find_dotfiles)
307
308        _, ext = os.path.splitext(path)
309        ext.lstrip(".")
310
311        for ext in extensions:
312            for p, f in finder.find("**/*.{}".format(ext)):
313                yield os.path.join(path, p)
314