1"""File wrangling."""
2
3from coverage.backward import to_string
4from coverage.misc import CoverageException
5import fnmatch, os, os.path, re, sys
6import ntpath, posixpath
7
8class FileLocator(object):
9    """Understand how filenames work."""
10
11    def __init__(self):
12        # The absolute path to our current directory.
13        self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep)
14
15        # Cache of results of calling the canonical_filename() method, to
16        # avoid duplicating work.
17        self.canonical_filename_cache = {}
18
19    def relative_filename(self, filename):
20        """Return the relative form of `filename`.
21
22        The filename will be relative to the current directory when the
23        `FileLocator` was constructed.
24
25        """
26        fnorm = os.path.normcase(filename)
27        if fnorm.startswith(self.relative_dir):
28            filename = filename[len(self.relative_dir):]
29        return filename
30
31    def canonical_filename(self, filename):
32        """Return a canonical filename for `filename`.
33
34        An absolute path with no redundant components and normalized case.
35
36        """
37        if filename not in self.canonical_filename_cache:
38            if not os.path.isabs(filename):
39                for path in [os.curdir] + sys.path:
40                    if path is None:
41                        continue
42                    f = os.path.join(path, filename)
43                    if os.path.exists(f):
44                        filename = f
45                        break
46            cf = abs_file(filename)
47            self.canonical_filename_cache[filename] = cf
48        return self.canonical_filename_cache[filename]
49
50    def get_zip_data(self, filename):
51        """Get data from `filename` if it is a zip file path.
52
53        Returns the string data read from the zip file, or None if no zip file
54        could be found or `filename` isn't in it.  The data returned will be
55        an empty string if the file is empty.
56
57        """
58        import zipimport
59        markers = ['.zip'+os.sep, '.egg'+os.sep]
60        for marker in markers:
61            if marker in filename:
62                parts = filename.split(marker)
63                try:
64                    zi = zipimport.zipimporter(parts[0]+marker[:-1])
65                except zipimport.ZipImportError:
66                    continue
67                try:
68                    data = zi.get_data(parts[1])
69                except IOError:
70                    continue
71                return to_string(data)
72        return None
73
74
75if sys.platform == 'win32':
76
77    def actual_path(path):
78        """Get the actual path of `path`, including the correct case."""
79        if path in actual_path.cache:
80            return actual_path.cache[path]
81
82        head, tail = os.path.split(path)
83        if not tail:
84            actpath = head
85        elif not head:
86            actpath = tail
87        else:
88            head = actual_path(head)
89            if head in actual_path.list_cache:
90                files = actual_path.list_cache[head]
91            else:
92                try:
93                    files = os.listdir(head)
94                except OSError:
95                    files = []
96                actual_path.list_cache[head] = files
97            normtail = os.path.normcase(tail)
98            for f in files:
99                if os.path.normcase(f) == normtail:
100                    tail = f
101                    break
102            actpath = os.path.join(head, tail)
103        actual_path.cache[path] = actpath
104        return actpath
105
106    actual_path.cache = {}
107    actual_path.list_cache = {}
108
109else:
110    def actual_path(filename):
111        """The actual path for non-Windows platforms."""
112        return filename
113
114
115def abs_file(filename):
116    """Return the absolute normalized form of `filename`."""
117    path = os.path.expandvars(os.path.expanduser(filename))
118    path = os.path.abspath(os.path.realpath(path))
119    path = actual_path(path)
120    return path
121
122
123def isabs_anywhere(filename):
124    """Is `filename` an absolute path on any OS?"""
125    return ntpath.isabs(filename) or posixpath.isabs(filename)
126
127
128def prep_patterns(patterns):
129    """Prepare the file patterns for use in a `FnmatchMatcher`.
130
131    If a pattern starts with a wildcard, it is used as a pattern
132    as-is.  If it does not start with a wildcard, then it is made
133    absolute with the current directory.
134
135    If `patterns` is None, an empty list is returned.
136
137    """
138    prepped = []
139    for p in patterns or []:
140        if p.startswith("*") or p.startswith("?"):
141            prepped.append(p)
142        else:
143            prepped.append(abs_file(p))
144    return prepped
145
146
147class TreeMatcher(object):
148    """A matcher for files in a tree."""
149    def __init__(self, directories):
150        self.dirs = directories[:]
151
152    def __repr__(self):
153        return "<TreeMatcher %r>" % self.dirs
154
155    def info(self):
156        """A list of strings for displaying when dumping state."""
157        return self.dirs
158
159    def add(self, directory):
160        """Add another directory to the list we match for."""
161        self.dirs.append(directory)
162
163    def match(self, fpath):
164        """Does `fpath` indicate a file in one of our trees?"""
165        for d in self.dirs:
166            if fpath.startswith(d):
167                if fpath == d:
168                    # This is the same file!
169                    return True
170                if fpath[len(d)] == os.sep:
171                    # This is a file in the directory
172                    return True
173        return False
174
175
176class FnmatchMatcher(object):
177    """A matcher for files by filename pattern."""
178    def __init__(self, pats):
179        self.pats = pats[:]
180
181    def __repr__(self):
182        return "<FnmatchMatcher %r>" % self.pats
183
184    def info(self):
185        """A list of strings for displaying when dumping state."""
186        return self.pats
187
188    def match(self, fpath):
189        """Does `fpath` match one of our filename patterns?"""
190        for pat in self.pats:
191            if fnmatch.fnmatch(fpath, pat):
192                return True
193        return False
194
195
196def sep(s):
197    """Find the path separator used in this string, or os.sep if none."""
198    sep_match = re.search(r"[\\/]", s)
199    if sep_match:
200        the_sep = sep_match.group(0)
201    else:
202        the_sep = os.sep
203    return the_sep
204
205
206class PathAliases(object):
207    """A collection of aliases for paths.
208
209    When combining data files from remote machines, often the paths to source
210    code are different, for example, due to OS differences, or because of
211    serialized checkouts on continuous integration machines.
212
213    A `PathAliases` object tracks a list of pattern/result pairs, and can
214    map a path through those aliases to produce a unified path.
215
216    `locator` is a FileLocator that is used to canonicalize the results.
217
218    """
219    def __init__(self, locator=None):
220        self.aliases = []
221        self.locator = locator
222
223    def add(self, pattern, result):
224        """Add the `pattern`/`result` pair to the list of aliases.
225
226        `pattern` is an `fnmatch`-style pattern.  `result` is a simple
227        string.  When mapping paths, if a path starts with a match against
228        `pattern`, then that match is replaced with `result`.  This models
229        isomorphic source trees being rooted at different places on two
230        different machines.
231
232        `pattern` can't end with a wildcard component, since that would
233        match an entire tree, and not just its root.
234
235        """
236        # The pattern can't end with a wildcard component.
237        pattern = pattern.rstrip(r"\/")
238        if pattern.endswith("*"):
239            raise CoverageException("Pattern must not end with wildcards.")
240        pattern_sep = sep(pattern)
241
242        # The pattern is meant to match a filepath.  Let's make it absolute
243        # unless it already is, or is meant to match any prefix.
244        if not pattern.startswith('*') and not isabs_anywhere(pattern):
245            pattern = abs_file(pattern)
246        pattern += pattern_sep
247
248        # Make a regex from the pattern.  fnmatch always adds a \Z or $ to
249        # match the whole string, which we don't want.
250        regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
251        if regex_pat.endswith("$"):
252            regex_pat = regex_pat[:-1]
253        # We want */a/b.py to match on Windows too, so change slash to match
254        # either separator.
255        regex_pat = regex_pat.replace(r"\/", r"[\\/]")
256        # We want case-insensitive matching, so add that flag.
257        regex = re.compile(r"(?i)" + regex_pat)
258
259        # Normalize the result: it must end with a path separator.
260        result_sep = sep(result)
261        result = result.rstrip(r"\/") + result_sep
262        self.aliases.append((regex, result, pattern_sep, result_sep))
263
264    def map(self, path):
265        """Map `path` through the aliases.
266
267        `path` is checked against all of the patterns.  The first pattern to
268        match is used to replace the root of the path with the result root.
269        Only one pattern is ever used.  If no patterns match, `path` is
270        returned unchanged.
271
272        The separator style in the result is made to match that of the result
273        in the alias.
274
275        """
276        for regex, result, pattern_sep, result_sep in self.aliases:
277            m = regex.match(path)
278            if m:
279                new = path.replace(m.group(0), result)
280                if pattern_sep != result_sep:
281                    new = new.replace(pattern_sep, result_sep)
282                if self.locator:
283                    new = self.locator.canonical_filename(new)
284                return new
285        return path
286
287
288def find_python_files(dirname):
289    """Yield all of the importable Python files in `dirname`, recursively.
290
291    To be importable, the files have to be in a directory with a __init__.py,
292    except for `dirname` itself, which isn't required to have one.  The
293    assumption is that `dirname` was specified directly, so the user knows
294    best, but subdirectories are checked for a __init__.py to be sure we only
295    find the importable files.
296
297    """
298    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
299        if i > 0 and '__init__.py' not in filenames:
300            # If a directory doesn't have __init__.py, then it isn't
301            # importable and neither are its files
302            del dirnames[:]
303            continue
304        for filename in filenames:
305            # We're only interested in files that look like reasonable Python
306            # files: Must end with .py or .pyw, and must not have certain funny
307            # characters that probably mean they are editor junk.
308            if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
309                yield os.path.join(dirpath, filename)
310