1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates.
3#
4# This source code is licensed under the MIT license found in the
5# LICENSE file in the root directory of this source tree.
6
7from __future__ import absolute_import, division, print_function, unicode_literals
8
9import errno
10import hashlib
11import os
12import re
13import shutil
14import stat
15import subprocess
16import sys
17import tarfile
18import time
19import zipfile
20from datetime import datetime
21from typing import Dict, NamedTuple
22
23from .copytree import prefetch_dir_if_eden
24from .envfuncs import Env
25from .errors import TransientFailure
26from .platform import is_windows
27from .runcmd import run_cmd
28
29
30try:
31    from urllib import urlretrieve
32    from urlparse import urlparse
33except ImportError:
34    from urllib.parse import urlparse
35    from urllib.request import urlretrieve
36
37
38def file_name_is_cmake_file(file_name):
39    file_name = file_name.lower()
40    base = os.path.basename(file_name)
41    return (
42        base.endswith(".cmake")
43        or base.endswith(".cmake.in")
44        or base == "cmakelists.txt"
45    )
46
47
48class ChangeStatus(object):
49    """Indicates the nature of changes that happened while updating
50    the source directory.  There are two broad uses:
51    * When extracting archives for third party software we want to
52      know that we did something (eg: we either extracted code or
53      we didn't do anything)
54    * For 1st party code where we use shipit to transform the code,
55      we want to know if we changed anything so that we can perform
56      a build, but we generally want to be a little more nuanced
57      and be able to distinguish between just changing a source file
58      and whether we might need to reconfigure the build system.
59    """
60
61    def __init__(self, all_changed=False):
62        """Construct a ChangeStatus object.  The default is to create
63        a status that indicates no changes, but passing all_changed=True
64        will create one that indicates that everything changed"""
65        if all_changed:
66            self.source_files = 1
67            self.make_files = 1
68        else:
69            self.source_files = 0
70            self.make_files = 0
71
72    def record_change(self, file_name):
73        """Used by the shipit fetcher to record changes as it updates
74        files in the destination.  If the file name might be one used
75        in the cmake build system that we use for 1st party code, then
76        record that as a "make file" change.  We could broaden this
77        to match any file used by various build systems, but it is
78        only really useful for our internal cmake stuff at this time.
79        If the file isn't a build file and is under the `fbcode_builder`
80        dir then we don't class that as an interesting change that we
81        might need to rebuild, so we ignore it.
82        Otherwise we record the file as a source file change."""
83
84        file_name = file_name.lower()
85        if file_name_is_cmake_file(file_name):
86            self.make_files += 1
87        elif "/fbcode_builder/cmake" in file_name:
88            self.source_files += 1
89        elif "/fbcode_builder/" not in file_name:
90            self.source_files += 1
91
92    def sources_changed(self):
93        """Returns true if any source files were changed during
94        an update operation.  This will typically be used to decide
95        that the build system to be run on the source dir in an
96        incremental mode"""
97        return self.source_files > 0
98
99    def build_changed(self):
100        """Returns true if any build files were changed during
101        an update operation.  This will typically be used to decidfe
102        that the build system should be reconfigured and re-run
103        as a full build"""
104        return self.make_files > 0
105
106
107class Fetcher(object):
108    """The Fetcher is responsible for fetching and extracting the
109    sources for project.  The Fetcher instance defines where the
110    extracted data resides and reports this to the consumer via
111    its `get_src_dir` method."""
112
113    def update(self):
114        """Brings the src dir up to date, ideally minimizing
115        changes so that a subsequent build doesn't over-build.
116        Returns a ChangeStatus object that helps the caller to
117        understand the nature of the changes required during
118        the update."""
119        return ChangeStatus()
120
121    def clean(self):
122        """Reverts any changes that might have been made to
123        the src dir"""
124        pass
125
126    def hash(self):
127        """Returns a hash that identifies the version of the code in the
128        working copy.  For a git repo this is commit hash for the working
129        copy.  For other Fetchers this should relate to the version of
130        the code in the src dir.  The intent is that if a manifest
131        changes the version/rev of a project that the hash be different.
132        Importantly, this should be computable without actually fetching
133        the code, as we want this to factor into a hash used to download
134        a pre-built version of the code, without having to first download
135        and extract its sources (eg: boost on windows is pretty painful).
136        """
137        pass
138
139    def get_src_dir(self):
140        """Returns the source directory that the project was
141        extracted into"""
142        pass
143
144
145class LocalDirFetcher(object):
146    """This class exists to override the normal fetching behavior, and
147    use an explicit user-specified directory for the project sources.
148
149    This fetcher cannot update or track changes.  It always reports that the
150    project has changed, forcing it to always be built."""
151
152    def __init__(self, path):
153        self.path = os.path.realpath(path)
154
155    def update(self):
156        return ChangeStatus(all_changed=True)
157
158    def hash(self):
159        return "0" * 40
160
161    def get_src_dir(self):
162        return self.path
163
164
165class SystemPackageFetcher(object):
166    def __init__(self, build_options, packages):
167        self.manager = build_options.host_type.get_package_manager()
168        self.packages = packages.get(self.manager)
169        if self.packages:
170            self.installed = None
171        else:
172            self.installed = False
173
174    def packages_are_installed(self):
175        if self.installed is not None:
176            return self.installed
177
178        cmd = None
179        if self.manager == "rpm":
180            cmd = ["rpm", "-q"] + sorted(self.packages)
181        elif self.manager == "deb":
182            cmd = ["dpkg", "-s"] + sorted(self.packages)
183
184        if cmd:
185            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
186            if proc.returncode == 0:
187                # captured as binary as we will hash this later
188                self.installed = proc.stdout
189            else:
190                # Need all packages to be present to consider us installed
191                self.installed = False
192        else:
193            self.installed = False
194
195        return bool(self.installed)
196
197    def update(self):
198        assert self.installed
199        return ChangeStatus(all_changed=False)
200
201    def hash(self):
202        if self.packages_are_installed():
203            return hashlib.sha256(self.installed).hexdigest()
204        else:
205            return "0" * 40
206
207    def get_src_dir(self):
208        return None
209
210
211class PreinstalledNopFetcher(SystemPackageFetcher):
212    def __init__(self):
213        self.installed = True
214
215
216class GitFetcher(Fetcher):
217    DEFAULT_DEPTH = 1
218
219    def __init__(self, build_options, manifest, repo_url, rev, depth):
220        # Extract the host/path portions of the URL and generate a flattened
221        # directory name.  eg:
222        # github.com/facebook/folly.git -> github.com-facebook-folly.git
223        url = urlparse(repo_url)
224        directory = "%s%s" % (url.netloc, url.path)
225        for s in ["/", "\\", ":"]:
226            directory = directory.replace(s, "-")
227
228        # Place it in a repos dir in the scratch space
229        repos_dir = os.path.join(build_options.scratch_dir, "repos")
230        if not os.path.exists(repos_dir):
231            os.makedirs(repos_dir)
232        self.repo_dir = os.path.join(repos_dir, directory)
233
234        if not rev and build_options.project_hashes:
235            hash_file = os.path.join(
236                build_options.project_hashes,
237                re.sub("\\.git$", "-rev.txt", url.path[1:]),
238            )
239            if os.path.exists(hash_file):
240                with open(hash_file, "r") as f:
241                    data = f.read()
242                    m = re.match("Subproject commit ([a-fA-F0-9]{40})", data)
243                    if not m:
244                        raise Exception("Failed to parse rev from %s" % hash_file)
245                    rev = m.group(1)
246                    print("Using pinned rev %s for %s" % (rev, repo_url))
247
248        self.rev = rev or "main"
249        self.origin_repo = repo_url
250        self.manifest = manifest
251        self.depth = depth if depth else GitFetcher.DEFAULT_DEPTH
252
253    def _update(self):
254        current_hash = (
255            subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=self.repo_dir)
256            .strip()
257            .decode("utf-8")
258        )
259        target_hash = (
260            subprocess.check_output(["git", "rev-parse", self.rev], cwd=self.repo_dir)
261            .strip()
262            .decode("utf-8")
263        )
264        if target_hash == current_hash:
265            # It's up to date, so there are no changes.  This doesn't detect eg:
266            # if origin/main moved and rev='main', but that's ok for our purposes;
267            # we should be using explicit hashes or eg: a stable branch for the cases
268            # that we care about, and it isn't unreasonable to require that the user
269            # explicitly perform a clean build if those have moved.  For the most
270            # part we prefer that folks build using a release tarball from github
271            # rather than use the git protocol, as it is generally a bit quicker
272            # to fetch and easier to hash and verify tarball downloads.
273            return ChangeStatus()
274
275        print("Updating %s -> %s" % (self.repo_dir, self.rev))
276        run_cmd(["git", "fetch", "origin", self.rev], cwd=self.repo_dir)
277        run_cmd(["git", "checkout", self.rev], cwd=self.repo_dir)
278        run_cmd(["git", "submodule", "update", "--init"], cwd=self.repo_dir)
279
280        return ChangeStatus(True)
281
282    def update(self):
283        if os.path.exists(self.repo_dir):
284            return self._update()
285        self._clone()
286        return ChangeStatus(True)
287
288    def _clone(self):
289        print("Cloning %s..." % self.origin_repo)
290        # The basename/dirname stuff allows us to dance around issues where
291        # eg: this python process is native win32, but the git.exe is cygwin
292        # or msys and doesn't like the absolute windows path that we'd otherwise
293        # pass to it.  Careful use of cwd helps avoid headaches with cygpath.
294        run_cmd(
295            [
296                "git",
297                "clone",
298                "--depth=" + str(self.depth),
299                "--",
300                self.origin_repo,
301                os.path.basename(self.repo_dir),
302            ],
303            cwd=os.path.dirname(self.repo_dir),
304        )
305        self._update()
306
307    def clean(self):
308        if os.path.exists(self.repo_dir):
309            run_cmd(["git", "clean", "-fxd"], cwd=self.repo_dir)
310
311    def hash(self):
312        return self.rev
313
314    def get_src_dir(self):
315        return self.repo_dir
316
317
318def does_file_need_update(src_name, src_st, dest_name):
319    try:
320        target_st = os.lstat(dest_name)
321    except OSError as exc:
322        if exc.errno != errno.ENOENT:
323            raise
324        return True
325
326    if src_st.st_size != target_st.st_size:
327        return True
328
329    if stat.S_IFMT(src_st.st_mode) != stat.S_IFMT(target_st.st_mode):
330        return True
331    if stat.S_ISLNK(src_st.st_mode):
332        return os.readlink(src_name) != os.readlink(dest_name)
333    if not stat.S_ISREG(src_st.st_mode):
334        return True
335
336    # They might have the same content; compare.
337    with open(src_name, "rb") as sf, open(dest_name, "rb") as df:
338        chunk_size = 8192
339        while True:
340            src_data = sf.read(chunk_size)
341            dest_data = df.read(chunk_size)
342            if src_data != dest_data:
343                return True
344            if len(src_data) < chunk_size:
345                # EOF
346                break
347    return False
348
349
350def copy_if_different(src_name, dest_name):
351    """Copy src_name -> dest_name, but only touch dest_name
352    if src_name is different from dest_name, making this a
353    more build system friendly way to copy."""
354    src_st = os.lstat(src_name)
355    if not does_file_need_update(src_name, src_st, dest_name):
356        return False
357
358    dest_parent = os.path.dirname(dest_name)
359    if not os.path.exists(dest_parent):
360        os.makedirs(dest_parent)
361    if stat.S_ISLNK(src_st.st_mode):
362        try:
363            os.unlink(dest_name)
364        except OSError as exc:
365            if exc.errno != errno.ENOENT:
366                raise
367        target = os.readlink(src_name)
368        print("Symlinking %s -> %s" % (dest_name, target))
369        os.symlink(target, dest_name)
370    else:
371        print("Copying %s -> %s" % (src_name, dest_name))
372        shutil.copy2(src_name, dest_name)
373
374    return True
375
376
377def list_files_under_dir_newer_than_timestamp(dir_to_scan, ts):
378    for root, _dirs, files in os.walk(dir_to_scan):
379        for src_file in files:
380            full_name = os.path.join(root, src_file)
381            st = os.lstat(full_name)
382            if st.st_mtime > ts:
383                yield full_name
384
385
386class ShipitPathMap(object):
387    def __init__(self):
388        self.roots = []
389        self.mapping = []
390        self.exclusion = []
391
392    def add_mapping(self, fbsource_dir, target_dir):
393        """Add a posix path or pattern.  We cannot normpath the input
394        here because that would change the paths from posix to windows
395        form and break the logic throughout this class."""
396        self.roots.append(fbsource_dir)
397        self.mapping.append((fbsource_dir, target_dir))
398
399    def add_exclusion(self, pattern):
400        self.exclusion.append(re.compile(pattern))
401
402    def _minimize_roots(self):
403        """compute the de-duplicated set of roots within fbsource.
404        We take the shortest common directory prefix to make this
405        determination"""
406        self.roots.sort(key=len)
407        minimized = []
408
409        for r in self.roots:
410            add_this_entry = True
411            for existing in minimized:
412                if r.startswith(existing + "/"):
413                    add_this_entry = False
414                    break
415            if add_this_entry:
416                minimized.append(r)
417
418        self.roots = minimized
419
420    def _sort_mapping(self):
421        self.mapping.sort(reverse=True, key=lambda x: len(x[0]))
422
423    def _map_name(self, norm_name, dest_root):
424        if norm_name.endswith(".pyc") or norm_name.endswith(".swp"):
425            # Ignore some incidental garbage while iterating
426            return None
427
428        for excl in self.exclusion:
429            if excl.match(norm_name):
430                return None
431
432        for src_name, dest_name in self.mapping:
433            if norm_name == src_name or norm_name.startswith(src_name + "/"):
434                rel_name = os.path.relpath(norm_name, src_name)
435                # We can have "." as a component of some paths, depending
436                # on the contents of the shipit transformation section.
437                # normpath doesn't always remove `.` as the final component
438                # of the path, which be problematic when we later mkdir
439                # the dirname of the path that we return.  Take care to avoid
440                # returning a path with a `.` in it.
441                rel_name = os.path.normpath(rel_name)
442                if dest_name == ".":
443                    return os.path.normpath(os.path.join(dest_root, rel_name))
444                dest_name = os.path.normpath(dest_name)
445                return os.path.normpath(os.path.join(dest_root, dest_name, rel_name))
446
447        raise Exception("%s did not match any rules" % norm_name)
448
449    def mirror(self, fbsource_root, dest_root):
450        self._minimize_roots()
451        self._sort_mapping()
452
453        change_status = ChangeStatus()
454
455        # Record the full set of files that should be in the tree
456        full_file_list = set()
457
458        for fbsource_subdir in self.roots:
459            dir_to_mirror = os.path.join(fbsource_root, fbsource_subdir)
460            prefetch_dir_if_eden(dir_to_mirror)
461            if not os.path.exists(dir_to_mirror):
462                raise Exception(
463                    "%s doesn't exist; check your sparse profile!" % dir_to_mirror
464                )
465            for root, _dirs, files in os.walk(dir_to_mirror):
466                for src_file in files:
467                    full_name = os.path.join(root, src_file)
468                    rel_name = os.path.relpath(full_name, fbsource_root)
469                    norm_name = rel_name.replace("\\", "/")
470
471                    target_name = self._map_name(norm_name, dest_root)
472                    if target_name:
473                        full_file_list.add(target_name)
474                        if copy_if_different(full_name, target_name):
475                            change_status.record_change(target_name)
476
477        # Compare the list of previously shipped files; if a file is
478        # in the old list but not the new list then it has been
479        # removed from the source and should be removed from the
480        # destination.
481        # Why don't we simply create this list by walking dest_root?
482        # Some builds currently have to be in-source builds and
483        # may legitimately need to keep some state in the source tree :-/
484        installed_name = os.path.join(dest_root, ".shipit_shipped")
485        if os.path.exists(installed_name):
486            with open(installed_name, "rb") as f:
487                for name in f.read().decode("utf-8").splitlines():
488                    name = name.strip()
489                    if name not in full_file_list:
490                        print("Remove %s" % name)
491                        os.unlink(name)
492                        change_status.record_change(name)
493
494        with open(installed_name, "wb") as f:
495            for name in sorted(list(full_file_list)):
496                f.write(("%s\n" % name).encode("utf-8"))
497
498        return change_status
499
500
501class FbsourceRepoData(NamedTuple):
502    hash: str
503    date: str
504
505
506FBSOURCE_REPO_DATA: Dict[str, FbsourceRepoData] = {}
507
508
509def get_fbsource_repo_data(build_options):
510    """Returns the commit metadata for the fbsource repo.
511    Since we may have multiple first party projects to
512    hash, and because we don't mutate the repo, we cache
513    this hash in a global."""
514    cached_data = FBSOURCE_REPO_DATA.get(build_options.fbsource_dir)
515    if cached_data:
516        return cached_data
517
518    cmd = ["hg", "log", "-r.", "-T{node}\n{date|hgdate}"]
519    env = Env()
520    env.set("HGPLAIN", "1")
521    log_data = subprocess.check_output(
522        cmd, cwd=build_options.fbsource_dir, env=dict(env.items())
523    ).decode("ascii")
524
525    (hash, datestr) = log_data.split("\n")
526
527    # datestr is like "seconds fractionalseconds"
528    # We want "20200324.113140"
529    (unixtime, _fractional) = datestr.split(" ")
530    date = datetime.fromtimestamp(int(unixtime)).strftime("%Y%m%d.%H%M%S")
531    cached_data = FbsourceRepoData(hash=hash, date=date)
532
533    FBSOURCE_REPO_DATA[build_options.fbsource_dir] = cached_data
534
535    return cached_data
536
537
538class SimpleShipitTransformerFetcher(Fetcher):
539    def __init__(self, build_options, manifest):
540        self.build_options = build_options
541        self.manifest = manifest
542        self.repo_dir = os.path.join(build_options.scratch_dir, "shipit", manifest.name)
543
544    def clean(self):
545        if os.path.exists(self.repo_dir):
546            shutil.rmtree(self.repo_dir)
547
548    def update(self):
549        mapping = ShipitPathMap()
550        for src, dest in self.manifest.get_section_as_ordered_pairs("shipit.pathmap"):
551            mapping.add_mapping(src, dest)
552        if self.manifest.shipit_fbcode_builder:
553            mapping.add_mapping(
554                "fbcode/opensource/fbcode_builder", "build/fbcode_builder"
555            )
556        for pattern in self.manifest.get_section_as_args("shipit.strip"):
557            mapping.add_exclusion(pattern)
558
559        return mapping.mirror(self.build_options.fbsource_dir, self.repo_dir)
560
561    def hash(self):
562        # We return a fixed non-hash string for in-fbsource builds.
563        # We're relying on the `update` logic to correctly invalidate
564        # the build in the case that files have changed.
565        return "fbsource"
566
567    def get_src_dir(self):
568        return self.repo_dir
569
570
571class ShipitTransformerFetcher(Fetcher):
572    SHIPIT = "/var/www/scripts/opensource/shipit/run_shipit.php"
573
574    def __init__(self, build_options, project_name):
575        self.build_options = build_options
576        self.project_name = project_name
577        self.repo_dir = os.path.join(build_options.scratch_dir, "shipit", project_name)
578
579    def update(self):
580        if os.path.exists(self.repo_dir):
581            return ChangeStatus()
582        self.run_shipit()
583        return ChangeStatus(True)
584
585    def clean(self):
586        if os.path.exists(self.repo_dir):
587            shutil.rmtree(self.repo_dir)
588
589    @classmethod
590    def available(cls):
591        return os.path.exists(cls.SHIPIT)
592
593    def run_shipit(self):
594        tmp_path = self.repo_dir + ".new"
595        try:
596            if os.path.exists(tmp_path):
597                shutil.rmtree(tmp_path)
598
599            # Run shipit
600            run_cmd(
601                [
602                    "php",
603                    ShipitTransformerFetcher.SHIPIT,
604                    "--project=" + self.project_name,
605                    "--create-new-repo",
606                    "--source-repo-dir=" + self.build_options.fbsource_dir,
607                    "--source-branch=.",
608                    "--skip-source-init",
609                    "--skip-source-pull",
610                    "--skip-source-clean",
611                    "--skip-push",
612                    "--skip-reset",
613                    "--destination-use-anonymous-https",
614                    "--create-new-repo-output-path=" + tmp_path,
615                ]
616            )
617
618            # Remove the .git directory from the repository it generated.
619            # There is no need to commit this.
620            repo_git_dir = os.path.join(tmp_path, ".git")
621            shutil.rmtree(repo_git_dir)
622            os.rename(tmp_path, self.repo_dir)
623        except Exception:
624            # Clean up after a failed extraction
625            if os.path.exists(tmp_path):
626                shutil.rmtree(tmp_path)
627            self.clean()
628            raise
629
630    def hash(self):
631        # We return a fixed non-hash string for in-fbsource builds.
632        return "fbsource"
633
634    def get_src_dir(self):
635        return self.repo_dir
636
637
638def download_url_to_file_with_progress(url, file_name):
639    print("Download %s -> %s ..." % (url, file_name))
640
641    class Progress(object):
642        last_report = 0
643
644        def progress(self, count, block, total):
645            if total == -1:
646                total = "(Unknown)"
647            amount = count * block
648
649            if sys.stdout.isatty():
650                sys.stdout.write("\r downloading %s of %s " % (amount, total))
651            else:
652                # When logging to CI logs, avoid spamming the logs and print
653                # status every few seconds
654                now = time.time()
655                if now - self.last_report > 5:
656                    sys.stdout.write(".. %s of %s " % (amount, total))
657                    self.last_report = now
658            sys.stdout.flush()
659
660    progress = Progress()
661    start = time.time()
662    try:
663        (_filename, headers) = urlretrieve(url, file_name, reporthook=progress.progress)
664    except (OSError, IOError) as exc:  # noqa: B014
665        raise TransientFailure(
666            "Failed to download %s to %s: %s" % (url, file_name, str(exc))
667        )
668
669    end = time.time()
670    sys.stdout.write(" [Complete in %f seconds]\n" % (end - start))
671    sys.stdout.flush()
672    print(f"{headers}")
673
674
675class ArchiveFetcher(Fetcher):
676    def __init__(self, build_options, manifest, url, sha256):
677        self.manifest = manifest
678        self.url = url
679        self.sha256 = sha256
680        self.build_options = build_options
681
682        url = urlparse(self.url)
683        basename = "%s-%s" % (manifest.name, os.path.basename(url.path))
684        self.file_name = os.path.join(build_options.scratch_dir, "downloads", basename)
685        self.src_dir = os.path.join(build_options.scratch_dir, "extracted", basename)
686        self.hash_file = self.src_dir + ".hash"
687
688    def _verify_hash(self):
689        h = hashlib.sha256()
690        with open(self.file_name, "rb") as f:
691            while True:
692                block = f.read(8192)
693                if not block:
694                    break
695                h.update(block)
696        digest = h.hexdigest()
697        if digest != self.sha256:
698            os.unlink(self.file_name)
699            raise Exception(
700                "%s: expected sha256 %s but got %s" % (self.url, self.sha256, digest)
701            )
702
703    def _download_dir(self):
704        """returns the download dir, creating it if it doesn't already exist"""
705        download_dir = os.path.dirname(self.file_name)
706        if not os.path.exists(download_dir):
707            os.makedirs(download_dir)
708        return download_dir
709
710    def _download(self):
711        self._download_dir()
712        download_url_to_file_with_progress(self.url, self.file_name)
713        self._verify_hash()
714
715    def clean(self):
716        if os.path.exists(self.src_dir):
717            shutil.rmtree(self.src_dir)
718
719    def update(self):
720        try:
721            with open(self.hash_file, "r") as f:
722                saved_hash = f.read().strip()
723                if saved_hash == self.sha256 and os.path.exists(self.src_dir):
724                    # Everything is up to date
725                    return ChangeStatus()
726                print(
727                    "saved hash %s doesn't match expected hash %s, re-validating"
728                    % (saved_hash, self.sha256)
729                )
730                os.unlink(self.hash_file)
731        except EnvironmentError:
732            pass
733
734        # If we got here we know the contents of src_dir are either missing
735        # or wrong, so blow away whatever happened to be there first.
736        if os.path.exists(self.src_dir):
737            shutil.rmtree(self.src_dir)
738
739        # If we already have a file here, make sure it looks legit before
740        # proceeding: any errors and we just remove it and re-download
741        if os.path.exists(self.file_name):
742            try:
743                self._verify_hash()
744            except Exception:
745                if os.path.exists(self.file_name):
746                    os.unlink(self.file_name)
747
748        if not os.path.exists(self.file_name):
749            self._download()
750
751        if tarfile.is_tarfile(self.file_name):
752            opener = tarfile.open
753        elif zipfile.is_zipfile(self.file_name):
754            opener = zipfile.ZipFile
755        else:
756            raise Exception("don't know how to extract %s" % self.file_name)
757        os.makedirs(self.src_dir)
758        print("Extract %s -> %s" % (self.file_name, self.src_dir))
759        t = opener(self.file_name)
760        if is_windows():
761            # Ensure that we don't fall over when dealing with long paths
762            # on windows
763            src = r"\\?\%s" % os.path.normpath(self.src_dir)
764        else:
765            src = self.src_dir
766        # The `str` here is necessary to ensure that we don't pass a unicode
767        # object down to tarfile.extractall on python2.  When extracting
768        # the boost tarball it makes some assumptions and tries to convert
769        # a non-ascii path to ascii and throws.
770        src = str(src)
771        t.extractall(src)
772
773        with open(self.hash_file, "w") as f:
774            f.write(self.sha256)
775
776        return ChangeStatus(True)
777
778    def hash(self):
779        return self.sha256
780
781    def get_src_dir(self):
782        return self.src_dir
783