1# -*- coding: utf-8 -*-
2
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7# We don't import all modules at the top for performance reasons. See Bug 1008943
8
9from __future__ import absolute_import, print_function
10
11import errno
12import os
13import re
14import stat
15import sys
16import time
17import warnings
18from contextlib import contextmanager
19
20from six.moves import urllib
21
22
23__all__ = [
24    "extract_tarball",
25    "extract_zip",
26    "extract",
27    "is_url",
28    "load",
29    "copy_contents",
30    "match",
31    "move",
32    "remove",
33    "rmtree",
34    "tree",
35    "which",
36    "NamedTemporaryFile",
37    "TemporaryDirectory",
38]
39
40# utilities for extracting archives
41
42
43def extract_tarball(src, dest, ignore=None):
44    """extract a .tar file"""
45
46    import tarfile
47
48    with tarfile.open(src) as bundle:
49        namelist = []
50
51        for m in bundle:
52            if ignore and any(match(m.name, i) for i in ignore):
53                continue
54            bundle.extract(m, path=dest)
55            namelist.append(m.name)
56
57    return namelist
58
59
60def extract_zip(src, dest, ignore=None):
61    """extract a zip file"""
62
63    import zipfile
64
65    if isinstance(src, zipfile.ZipFile):
66        bundle = src
67    else:
68        try:
69            bundle = zipfile.ZipFile(src)
70        except Exception:
71            print("src: %s" % src)
72            raise
73
74    namelist = bundle.namelist()
75
76    for name in namelist:
77        if ignore and any(match(name, i) for i in ignore):
78            continue
79
80        bundle.extract(name, dest)
81        filename = os.path.realpath(os.path.join(dest, name))
82        mode = bundle.getinfo(name).external_attr >> 16 & 0x1FF
83        # Only update permissions if attributes are set. Otherwise fallback to the defaults.
84        if mode:
85            os.chmod(filename, mode)
86    bundle.close()
87    return namelist
88
89
90def extract(src, dest=None, ignore=None):
91    """
92    Takes in a tar or zip file and extracts it to dest
93
94    If dest is not specified, extracts to os.path.dirname(src)
95
96    Returns the list of top level files that were extracted
97    """
98
99    import zipfile
100    import tarfile
101
102    assert os.path.exists(src), "'%s' does not exist" % src
103
104    if dest is None:
105        dest = os.path.dirname(src)
106    elif not os.path.isdir(dest):
107        os.makedirs(dest)
108    assert not os.path.isfile(dest), "dest cannot be a file"
109
110    if tarfile.is_tarfile(src):
111        namelist = extract_tarball(src, dest, ignore=ignore)
112    elif zipfile.is_zipfile(src):
113        namelist = extract_zip(src, dest, ignore=ignore)
114    else:
115        raise Exception("mozfile.extract: no archive format found for '%s'" % src)
116
117    # namelist returns paths with forward slashes even in windows
118    top_level_files = [
119        os.path.join(dest, name.rstrip("/"))
120        for name in namelist
121        if len(name.rstrip("/").split("/")) == 1
122    ]
123
124    # namelist doesn't include folders, append these to the list
125    for name in namelist:
126        index = name.find("/")
127        if index != -1:
128            root = os.path.join(dest, name[:index])
129            if root not in top_level_files:
130                top_level_files.append(root)
131
132    return top_level_files
133
134
135# utilities for removal of files and directories
136
137
138def rmtree(dir):
139    """Deprecated wrapper method to remove a directory tree.
140
141    Ensure to update your code to use mozfile.remove() directly
142
143    :param dir: directory to be removed
144    """
145
146    warnings.warn(
147        "mozfile.rmtree() is deprecated in favor of mozfile.remove()",
148        PendingDeprecationWarning,
149        stacklevel=2,
150    )
151    return remove(dir)
152
153
154def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5):
155    """
156    It's possible to see spurious errors on Windows due to various things
157    keeping a handle to the directory open (explorer, virus scanners, etc)
158    So we try a few times if it fails with a known error.
159    retry_delay is multiplied by the number of failed attempts to increase
160    the likelihood of success in subsequent attempts.
161    """
162    retry_count = 0
163    while True:
164        try:
165            func(*args)
166        except OSError as e:
167            # Error codes are defined in:
168            # http://docs.python.org/2/library/errno.html#module-errno
169            if e.errno not in (errno.EACCES, errno.ENOTEMPTY):
170                raise
171
172            if retry_count == retry_max:
173                raise
174
175            retry_count += 1
176
177            print(
178                '%s() failed for "%s". Reason: %s (%s). Retrying...'
179                % (func.__name__, args, e.strerror, e.errno)
180            )
181            time.sleep(retry_count * retry_delay)
182        else:
183            # If no exception has been thrown it should be done
184            break
185
186
187def remove(path):
188    """Removes the specified file, link, or directory tree.
189
190    This is a replacement for shutil.rmtree that works better under
191    windows. It does the following things:
192
193     - check path access for the current user before trying to remove
194     - retry operations on some known errors due to various things keeping
195       a handle on file paths - like explorer, virus scanners, etc. The
196       known errors are errno.EACCES and errno.ENOTEMPTY, and it will
197       retry up to 5 five times with a delay of (failed_attempts * 0.5) seconds
198       between each attempt.
199
200    Note that no error will be raised if the given path does not exists.
201
202    :param path: path to be removed
203    """
204
205    import shutil
206
207    def _call_with_windows_retry(*args, **kwargs):
208        try:
209            _call_windows_retry(*args, **kwargs)
210        except OSError as e:
211            # The file or directory to be removed doesn't exist anymore
212            if e.errno != errno.ENOENT:
213                raise
214
215    def _update_permissions(path):
216        """Sets specified pemissions depending on filetype"""
217        if os.path.islink(path):
218            # Path is a symlink which we don't have to modify
219            # because it should already have all the needed permissions
220            return
221
222        stats = os.stat(path)
223
224        if os.path.isfile(path):
225            mode = stats.st_mode | stat.S_IWUSR
226        elif os.path.isdir(path):
227            mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR
228        else:
229            # Not supported type
230            return
231
232        _call_with_windows_retry(os.chmod, (path, mode))
233
234    if not os.path.exists(path):
235        return
236
237    """
238    On Windows, adds '\\\\?\\' to paths which match ^[A-Za-z]:\\.* to access
239    files or directories that exceed MAX_PATH(260) limitation or that ends
240    with a period.
241    """
242    if (
243        sys.platform in ("win32", "cygwin")
244        and len(path) >= 3
245        and path[1] == ":"
246        and path[2] == "\\"
247    ):
248        path = u"\\\\?\\%s" % path
249
250    if os.path.isfile(path) or os.path.islink(path):
251        # Verify the file or link is read/write for the current user
252        _update_permissions(path)
253        _call_with_windows_retry(os.remove, (path,))
254
255    elif os.path.isdir(path):
256        # Verify the directory is read/write/execute for the current user
257        _update_permissions(path)
258
259        # We're ensuring that every nested item has writable permission.
260        for root, dirs, files in os.walk(path):
261            for entry in dirs + files:
262                _update_permissions(os.path.join(root, entry))
263        _call_with_windows_retry(shutil.rmtree, (path,))
264
265
266def copy_contents(srcdir, dstdir):
267    """
268    Copy the contents of the srcdir into the dstdir, preserving
269    subdirectories.
270
271    If an existing file of the same name exists in dstdir, it will be overwritten.
272    """
273    import shutil
274
275    # dirs_exist_ok was introduced in Python 3.8
276    # On earlier versions, or Windows, use the verbose mechanism.
277    # We use it on Windows because _call_with_windows_retry doesn't allow
278    # named arguments to be passed.
279    if (sys.version_info.major < 3 or sys.version_info.minor < 8) or (os.name == "nt"):
280        names = os.listdir(srcdir)
281        if not os.path.isdir(dstdir):
282            os.makedirs(dstdir)
283        errors = []
284        for name in names:
285            srcname = os.path.join(srcdir, name)
286            dstname = os.path.join(dstdir, name)
287            try:
288                if os.path.islink(srcname):
289                    linkto = os.readlink(srcname)
290                    os.symlink(linkto, dstname)
291                elif os.path.isdir(srcname):
292                    copy_contents(srcname, dstname)
293                else:
294                    _call_windows_retry(shutil.copy2, (srcname, dstname))
295            except OSError as why:
296                errors.append((srcname, dstname, str(why)))
297            except Exception as err:
298                errors.extend(err)
299        try:
300            _call_windows_retry(shutil.copystat, (srcdir, dstdir))
301        except OSError as why:
302            if why.winerror is None:
303                errors.extend((srcdir, dstdir, str(why)))
304        if errors:
305            raise Exception(errors)
306    else:
307        shutil.copytree(srcdir, dstdir, dirs_exist_ok=True)
308
309
310def move(src, dst):
311    """
312    Move a file or directory path.
313
314    This is a replacement for shutil.move that works better under windows,
315    retrying operations on some known errors due to various things keeping
316    a handle on file paths.
317    """
318    import shutil
319
320    _call_windows_retry(shutil.move, (src, dst))
321
322
323def depth(directory):
324    """returns the integer depth of a directory or path relative to '/'"""
325
326    directory = os.path.abspath(directory)
327    level = 0
328    while True:
329        directory, remainder = os.path.split(directory)
330        level += 1
331        if not remainder:
332            break
333    return level
334
335
336def tree(directory, sort_key=lambda x: x.lower()):
337    """Display tree directory structure for `directory`."""
338    vertical_line = u"│"
339    item_marker = u"├"
340    last_child = u"└"
341
342    retval = []
343    indent = []
344    last = {}
345    top = depth(directory)
346
347    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
348
349        abspath = os.path.abspath(dirpath)
350        basename = os.path.basename(abspath)
351        parent = os.path.dirname(abspath)
352        level = depth(abspath) - top
353
354        # sort articles of interest
355        for resource in (dirnames, filenames):
356            resource[:] = sorted(resource, key=sort_key)
357
358        if level > len(indent):
359            indent.append(vertical_line)
360        indent = indent[:level]
361
362        if dirnames:
363            files_end = item_marker
364            last[abspath] = dirnames[-1]
365        else:
366            files_end = last_child
367
368        if last.get(parent) == os.path.basename(abspath):
369            # last directory of parent
370            dirpath_mark = last_child
371            indent[-1] = " "
372        elif not indent:
373            dirpath_mark = ""
374        else:
375            dirpath_mark = item_marker
376
377        # append the directory and piece of tree structure
378        # if the top-level entry directory, print as passed
379        retval.append(
380            "%s%s%s"
381            % ("".join(indent[:-1]), dirpath_mark, basename if retval else directory)
382        )
383        # add the files
384        if filenames:
385            last_file = filenames[-1]
386            retval.extend(
387                [
388                    (
389                        "%s%s%s"
390                        % (
391                            "".join(indent),
392                            files_end if filename == last_file else item_marker,
393                            filename,
394                        )
395                    )
396                    for index, filename in enumerate(filenames)
397                ]
398            )
399
400    return "\n".join(retval)
401
402
403def which(cmd, mode=os.F_OK | os.X_OK, path=None, exts=None, extra_search_dirs=()):
404    """A wrapper around `shutil.which` to make the behavior on Windows
405    consistent with other platforms.
406
407    On non-Windows platforms, this is a direct call to `shutil.which`. On
408    Windows, this:
409
410    * Ensures that `cmd` without an extension will be found. Previously it was
411      only found if it had an extension in `PATHEXT`.
412    * Ensures the absolute path to the binary is returned. Previously if the
413      binary was found in `cwd`, a relative path was returned.
414    * Checks the Windows registry if shutil.which doesn't come up with anything.
415
416    The arguments are the same as the ones in `shutil.which`. In addition there
417    is an `exts` argument that only has an effect on Windows. This is used to
418    set a custom value for PATHEXT and is formatted as a list of file
419    extensions.
420
421    extra_search_dirs is a convenience argument. If provided, the strings in
422    the sequence will be appended to the END of the given `path`.
423    """
424    from shutil import which as shutil_which
425
426    if isinstance(path, (list, tuple)):
427        path = os.pathsep.join(path)
428
429    if not path:
430        path = os.environ.get("PATH", os.defpath)
431
432    if extra_search_dirs:
433        path = os.pathsep.join([path] + list(extra_search_dirs))
434
435    if sys.platform != "win32":
436        return shutil_which(cmd, mode=mode, path=path)
437
438    oldexts = os.environ.get("PATHEXT", "")
439    if not exts:
440        exts = oldexts.split(os.pathsep)
441
442    # This ensures that `cmd` without any extensions will be found.
443    # See: https://bugs.python.org/issue31405
444    if "." not in exts:
445        exts.append(".")
446
447    os.environ["PATHEXT"] = os.pathsep.join(exts)
448    try:
449        path = shutil_which(cmd, mode=mode, path=path)
450        if path:
451            return os.path.abspath(path.rstrip("."))
452    finally:
453        if oldexts:
454            os.environ["PATHEXT"] = oldexts
455        else:
456            del os.environ["PATHEXT"]
457
458    # If we've gotten this far, we need to check for registered executables
459    # before giving up.
460    try:
461        import winreg
462    except ImportError:
463        import _winreg as winreg
464    if not cmd.lower().endswith(".exe"):
465        cmd += ".exe"
466    try:
467        ret = winreg.QueryValue(
468            winreg.HKEY_LOCAL_MACHINE,
469            r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\%s" % cmd,
470        )
471        return os.path.abspath(ret) if ret else None
472    except winreg.error:
473        return None
474
475
476# utilities for temporary resources
477
478
479class NamedTemporaryFile(object):
480    """
481    Like tempfile.NamedTemporaryFile except it works on Windows
482    in the case where you open the created file a second time.
483
484    This behaves very similarly to tempfile.NamedTemporaryFile but may
485    not behave exactly the same. For example, this function does not
486    prevent fd inheritance by children.
487
488    Example usage:
489
490    with NamedTemporaryFile() as fh:
491        fh.write(b'foobar')
492
493        print('Filename: %s' % fh.name)
494
495    see https://bugzilla.mozilla.org/show_bug.cgi?id=821362
496    """
497
498    def __init__(
499        self, mode="w+b", bufsize=-1, suffix="", prefix="tmp", dir=None, delete=True
500    ):
501
502        import tempfile
503
504        fd, path = tempfile.mkstemp(suffix, prefix, dir, "t" in mode)
505        os.close(fd)
506
507        self.file = open(path, mode)
508        self._path = path
509        self._delete = delete
510        self._unlinked = False
511
512    def __getattr__(self, k):
513        return getattr(self.__dict__["file"], k)
514
515    def __iter__(self):
516        return self.__dict__["file"]
517
518    def __enter__(self):
519        self.file.__enter__()
520        return self
521
522    def __exit__(self, exc, value, tb):
523        self.file.__exit__(exc, value, tb)
524        if self.__dict__["_delete"]:
525            os.unlink(self.__dict__["_path"])
526            self._unlinked = True
527
528    def __del__(self):
529        if self.__dict__["_unlinked"]:
530            return
531        self.file.__exit__(None, None, None)
532        if self.__dict__["_delete"]:
533            os.unlink(self.__dict__["_path"])
534
535
536@contextmanager
537def TemporaryDirectory():
538    """
539    create a temporary directory using tempfile.mkdtemp, and then clean it up.
540
541    Example usage:
542    with TemporaryDirectory() as tmp:
543       open(os.path.join(tmp, "a_temp_file"), "w").write("data")
544
545    """
546
547    import tempfile
548    import shutil
549
550    tempdir = tempfile.mkdtemp()
551    try:
552        yield tempdir
553    finally:
554        shutil.rmtree(tempdir)
555
556
557# utilities dealing with URLs
558
559
560def is_url(thing):
561    """
562    Return True if thing looks like a URL.
563    """
564
565    parsed = urllib.parse.urlparse(thing)
566    if "scheme" in parsed:
567        return len(parsed.scheme) >= 2
568    else:
569        return len(parsed[0]) >= 2
570
571
572def load(resource):
573    """
574    open a file or URL for reading.  If the passed resource string is not a URL,
575    or begins with 'file://', return a ``file``.  Otherwise, return the
576    result of urllib.urlopen()
577    """
578
579    # handle file URLs separately due to python stdlib limitations
580    if resource.startswith("file://"):
581        resource = resource[len("file://") :]
582
583    if not is_url(resource):
584        # if no scheme is given, it is a file path
585        return open(resource)
586
587    return urllib.request.urlopen(resource)
588
589
590# We can't depend on mozpack.path here, so copy the 'match' function over.
591
592re_cache = {}
593# Python versions < 3.7 return r'\/' for re.escape('/').
594if re.escape("/") == "/":
595    MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
596    MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
597else:
598    MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/")
599    MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$")
600
601
602def match(path, pattern):
603    """
604    Return whether the given path matches the given pattern.
605    An asterisk can be used to match any string, including the null string, in
606    one part of the path:
607
608        ``foo`` matches ``*``, ``f*`` or ``fo*o``
609
610    However, an asterisk matching a subdirectory may not match the null string:
611
612        ``foo/bar`` does *not* match ``foo/*/bar``
613
614    If the pattern matches one of the ancestor directories of the path, the
615    patch is considered matching:
616
617        ``foo/bar`` matches ``foo``
618
619    Two adjacent asterisks can be used to match files and zero or more
620    directories and subdirectories.
621
622        ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
623    """
624    if not pattern:
625        return True
626    if pattern not in re_cache:
627        p = re.escape(pattern)
628        p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
629        p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
630        p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
631        re_cache[pattern] = re.compile(p)
632    return re_cache[pattern].match(path) is not None
633