1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. 3# 4# This source code is licensed under the MIT license found in the 5# LICENSE file in the root directory of this source tree. 6 7from __future__ import absolute_import, division, print_function, unicode_literals 8 9import errno 10import hashlib 11import os 12import re 13import shutil 14import stat 15import subprocess 16import sys 17import tarfile 18import time 19import zipfile 20from datetime import datetime 21from typing import Dict, NamedTuple 22 23from .copytree import prefetch_dir_if_eden 24from .envfuncs import Env 25from .errors import TransientFailure 26from .platform import is_windows 27from .runcmd import run_cmd 28 29 30try: 31 from urllib import urlretrieve 32 from urlparse import urlparse 33except ImportError: 34 from urllib.parse import urlparse 35 from urllib.request import urlretrieve 36 37 38def file_name_is_cmake_file(file_name): 39 file_name = file_name.lower() 40 base = os.path.basename(file_name) 41 return ( 42 base.endswith(".cmake") 43 or base.endswith(".cmake.in") 44 or base == "cmakelists.txt" 45 ) 46 47 48class ChangeStatus(object): 49 """Indicates the nature of changes that happened while updating 50 the source directory. There are two broad uses: 51 * When extracting archives for third party software we want to 52 know that we did something (eg: we either extracted code or 53 we didn't do anything) 54 * For 1st party code where we use shipit to transform the code, 55 we want to know if we changed anything so that we can perform 56 a build, but we generally want to be a little more nuanced 57 and be able to distinguish between just changing a source file 58 and whether we might need to reconfigure the build system. 59 """ 60 61 def __init__(self, all_changed=False): 62 """Construct a ChangeStatus object. The default is to create 63 a status that indicates no changes, but passing all_changed=True 64 will create one that indicates that everything changed""" 65 if all_changed: 66 self.source_files = 1 67 self.make_files = 1 68 else: 69 self.source_files = 0 70 self.make_files = 0 71 72 def record_change(self, file_name): 73 """Used by the shipit fetcher to record changes as it updates 74 files in the destination. If the file name might be one used 75 in the cmake build system that we use for 1st party code, then 76 record that as a "make file" change. We could broaden this 77 to match any file used by various build systems, but it is 78 only really useful for our internal cmake stuff at this time. 79 If the file isn't a build file and is under the `fbcode_builder` 80 dir then we don't class that as an interesting change that we 81 might need to rebuild, so we ignore it. 82 Otherwise we record the file as a source file change.""" 83 84 file_name = file_name.lower() 85 if file_name_is_cmake_file(file_name): 86 self.make_files += 1 87 elif "/fbcode_builder/cmake" in file_name: 88 self.source_files += 1 89 elif "/fbcode_builder/" not in file_name: 90 self.source_files += 1 91 92 def sources_changed(self): 93 """Returns true if any source files were changed during 94 an update operation. This will typically be used to decide 95 that the build system to be run on the source dir in an 96 incremental mode""" 97 return self.source_files > 0 98 99 def build_changed(self): 100 """Returns true if any build files were changed during 101 an update operation. This will typically be used to decidfe 102 that the build system should be reconfigured and re-run 103 as a full build""" 104 return self.make_files > 0 105 106 107class Fetcher(object): 108 """The Fetcher is responsible for fetching and extracting the 109 sources for project. The Fetcher instance defines where the 110 extracted data resides and reports this to the consumer via 111 its `get_src_dir` method.""" 112 113 def update(self): 114 """Brings the src dir up to date, ideally minimizing 115 changes so that a subsequent build doesn't over-build. 116 Returns a ChangeStatus object that helps the caller to 117 understand the nature of the changes required during 118 the update.""" 119 return ChangeStatus() 120 121 def clean(self): 122 """Reverts any changes that might have been made to 123 the src dir""" 124 pass 125 126 def hash(self): 127 """Returns a hash that identifies the version of the code in the 128 working copy. For a git repo this is commit hash for the working 129 copy. For other Fetchers this should relate to the version of 130 the code in the src dir. The intent is that if a manifest 131 changes the version/rev of a project that the hash be different. 132 Importantly, this should be computable without actually fetching 133 the code, as we want this to factor into a hash used to download 134 a pre-built version of the code, without having to first download 135 and extract its sources (eg: boost on windows is pretty painful). 136 """ 137 pass 138 139 def get_src_dir(self): 140 """Returns the source directory that the project was 141 extracted into""" 142 pass 143 144 145class LocalDirFetcher(object): 146 """This class exists to override the normal fetching behavior, and 147 use an explicit user-specified directory for the project sources. 148 149 This fetcher cannot update or track changes. It always reports that the 150 project has changed, forcing it to always be built.""" 151 152 def __init__(self, path): 153 self.path = os.path.realpath(path) 154 155 def update(self): 156 return ChangeStatus(all_changed=True) 157 158 def hash(self): 159 return "0" * 40 160 161 def get_src_dir(self): 162 return self.path 163 164 165class SystemPackageFetcher(object): 166 def __init__(self, build_options, packages): 167 self.manager = build_options.host_type.get_package_manager() 168 self.packages = packages.get(self.manager) 169 if self.packages: 170 self.installed = None 171 else: 172 self.installed = False 173 174 def packages_are_installed(self): 175 if self.installed is not None: 176 return self.installed 177 178 cmd = None 179 if self.manager == "rpm": 180 cmd = ["rpm", "-q"] + sorted(self.packages) 181 elif self.manager == "deb": 182 cmd = ["dpkg", "-s"] + sorted(self.packages) 183 184 if cmd: 185 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 186 if proc.returncode == 0: 187 # captured as binary as we will hash this later 188 self.installed = proc.stdout 189 else: 190 # Need all packages to be present to consider us installed 191 self.installed = False 192 else: 193 self.installed = False 194 195 return bool(self.installed) 196 197 def update(self): 198 assert self.installed 199 return ChangeStatus(all_changed=False) 200 201 def hash(self): 202 if self.packages_are_installed(): 203 return hashlib.sha256(self.installed).hexdigest() 204 else: 205 return "0" * 40 206 207 def get_src_dir(self): 208 return None 209 210 211class PreinstalledNopFetcher(SystemPackageFetcher): 212 def __init__(self): 213 self.installed = True 214 215 216class GitFetcher(Fetcher): 217 DEFAULT_DEPTH = 1 218 219 def __init__(self, build_options, manifest, repo_url, rev, depth): 220 # Extract the host/path portions of the URL and generate a flattened 221 # directory name. eg: 222 # github.com/facebook/folly.git -> github.com-facebook-folly.git 223 url = urlparse(repo_url) 224 directory = "%s%s" % (url.netloc, url.path) 225 for s in ["/", "\\", ":"]: 226 directory = directory.replace(s, "-") 227 228 # Place it in a repos dir in the scratch space 229 repos_dir = os.path.join(build_options.scratch_dir, "repos") 230 if not os.path.exists(repos_dir): 231 os.makedirs(repos_dir) 232 self.repo_dir = os.path.join(repos_dir, directory) 233 234 if not rev and build_options.project_hashes: 235 hash_file = os.path.join( 236 build_options.project_hashes, 237 re.sub("\\.git$", "-rev.txt", url.path[1:]), 238 ) 239 if os.path.exists(hash_file): 240 with open(hash_file, "r") as f: 241 data = f.read() 242 m = re.match("Subproject commit ([a-fA-F0-9]{40})", data) 243 if not m: 244 raise Exception("Failed to parse rev from %s" % hash_file) 245 rev = m.group(1) 246 print("Using pinned rev %s for %s" % (rev, repo_url)) 247 248 self.rev = rev or "main" 249 self.origin_repo = repo_url 250 self.manifest = manifest 251 self.depth = depth if depth else GitFetcher.DEFAULT_DEPTH 252 253 def _update(self): 254 current_hash = ( 255 subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=self.repo_dir) 256 .strip() 257 .decode("utf-8") 258 ) 259 target_hash = ( 260 subprocess.check_output(["git", "rev-parse", self.rev], cwd=self.repo_dir) 261 .strip() 262 .decode("utf-8") 263 ) 264 if target_hash == current_hash: 265 # It's up to date, so there are no changes. This doesn't detect eg: 266 # if origin/main moved and rev='main', but that's ok for our purposes; 267 # we should be using explicit hashes or eg: a stable branch for the cases 268 # that we care about, and it isn't unreasonable to require that the user 269 # explicitly perform a clean build if those have moved. For the most 270 # part we prefer that folks build using a release tarball from github 271 # rather than use the git protocol, as it is generally a bit quicker 272 # to fetch and easier to hash and verify tarball downloads. 273 return ChangeStatus() 274 275 print("Updating %s -> %s" % (self.repo_dir, self.rev)) 276 run_cmd(["git", "fetch", "origin", self.rev], cwd=self.repo_dir) 277 run_cmd(["git", "checkout", self.rev], cwd=self.repo_dir) 278 run_cmd(["git", "submodule", "update", "--init"], cwd=self.repo_dir) 279 280 return ChangeStatus(True) 281 282 def update(self): 283 if os.path.exists(self.repo_dir): 284 return self._update() 285 self._clone() 286 return ChangeStatus(True) 287 288 def _clone(self): 289 print("Cloning %s..." % self.origin_repo) 290 # The basename/dirname stuff allows us to dance around issues where 291 # eg: this python process is native win32, but the git.exe is cygwin 292 # or msys and doesn't like the absolute windows path that we'd otherwise 293 # pass to it. Careful use of cwd helps avoid headaches with cygpath. 294 run_cmd( 295 [ 296 "git", 297 "clone", 298 "--depth=" + str(self.depth), 299 "--", 300 self.origin_repo, 301 os.path.basename(self.repo_dir), 302 ], 303 cwd=os.path.dirname(self.repo_dir), 304 ) 305 self._update() 306 307 def clean(self): 308 if os.path.exists(self.repo_dir): 309 run_cmd(["git", "clean", "-fxd"], cwd=self.repo_dir) 310 311 def hash(self): 312 return self.rev 313 314 def get_src_dir(self): 315 return self.repo_dir 316 317 318def does_file_need_update(src_name, src_st, dest_name): 319 try: 320 target_st = os.lstat(dest_name) 321 except OSError as exc: 322 if exc.errno != errno.ENOENT: 323 raise 324 return True 325 326 if src_st.st_size != target_st.st_size: 327 return True 328 329 if stat.S_IFMT(src_st.st_mode) != stat.S_IFMT(target_st.st_mode): 330 return True 331 if stat.S_ISLNK(src_st.st_mode): 332 return os.readlink(src_name) != os.readlink(dest_name) 333 if not stat.S_ISREG(src_st.st_mode): 334 return True 335 336 # They might have the same content; compare. 337 with open(src_name, "rb") as sf, open(dest_name, "rb") as df: 338 chunk_size = 8192 339 while True: 340 src_data = sf.read(chunk_size) 341 dest_data = df.read(chunk_size) 342 if src_data != dest_data: 343 return True 344 if len(src_data) < chunk_size: 345 # EOF 346 break 347 return False 348 349 350def copy_if_different(src_name, dest_name): 351 """Copy src_name -> dest_name, but only touch dest_name 352 if src_name is different from dest_name, making this a 353 more build system friendly way to copy.""" 354 src_st = os.lstat(src_name) 355 if not does_file_need_update(src_name, src_st, dest_name): 356 return False 357 358 dest_parent = os.path.dirname(dest_name) 359 if not os.path.exists(dest_parent): 360 os.makedirs(dest_parent) 361 if stat.S_ISLNK(src_st.st_mode): 362 try: 363 os.unlink(dest_name) 364 except OSError as exc: 365 if exc.errno != errno.ENOENT: 366 raise 367 target = os.readlink(src_name) 368 print("Symlinking %s -> %s" % (dest_name, target)) 369 os.symlink(target, dest_name) 370 else: 371 print("Copying %s -> %s" % (src_name, dest_name)) 372 shutil.copy2(src_name, dest_name) 373 374 return True 375 376 377def list_files_under_dir_newer_than_timestamp(dir_to_scan, ts): 378 for root, _dirs, files in os.walk(dir_to_scan): 379 for src_file in files: 380 full_name = os.path.join(root, src_file) 381 st = os.lstat(full_name) 382 if st.st_mtime > ts: 383 yield full_name 384 385 386class ShipitPathMap(object): 387 def __init__(self): 388 self.roots = [] 389 self.mapping = [] 390 self.exclusion = [] 391 392 def add_mapping(self, fbsource_dir, target_dir): 393 """Add a posix path or pattern. We cannot normpath the input 394 here because that would change the paths from posix to windows 395 form and break the logic throughout this class.""" 396 self.roots.append(fbsource_dir) 397 self.mapping.append((fbsource_dir, target_dir)) 398 399 def add_exclusion(self, pattern): 400 self.exclusion.append(re.compile(pattern)) 401 402 def _minimize_roots(self): 403 """compute the de-duplicated set of roots within fbsource. 404 We take the shortest common directory prefix to make this 405 determination""" 406 self.roots.sort(key=len) 407 minimized = [] 408 409 for r in self.roots: 410 add_this_entry = True 411 for existing in minimized: 412 if r.startswith(existing + "/"): 413 add_this_entry = False 414 break 415 if add_this_entry: 416 minimized.append(r) 417 418 self.roots = minimized 419 420 def _sort_mapping(self): 421 self.mapping.sort(reverse=True, key=lambda x: len(x[0])) 422 423 def _map_name(self, norm_name, dest_root): 424 if norm_name.endswith(".pyc") or norm_name.endswith(".swp"): 425 # Ignore some incidental garbage while iterating 426 return None 427 428 for excl in self.exclusion: 429 if excl.match(norm_name): 430 return None 431 432 for src_name, dest_name in self.mapping: 433 if norm_name == src_name or norm_name.startswith(src_name + "/"): 434 rel_name = os.path.relpath(norm_name, src_name) 435 # We can have "." as a component of some paths, depending 436 # on the contents of the shipit transformation section. 437 # normpath doesn't always remove `.` as the final component 438 # of the path, which be problematic when we later mkdir 439 # the dirname of the path that we return. Take care to avoid 440 # returning a path with a `.` in it. 441 rel_name = os.path.normpath(rel_name) 442 if dest_name == ".": 443 return os.path.normpath(os.path.join(dest_root, rel_name)) 444 dest_name = os.path.normpath(dest_name) 445 return os.path.normpath(os.path.join(dest_root, dest_name, rel_name)) 446 447 raise Exception("%s did not match any rules" % norm_name) 448 449 def mirror(self, fbsource_root, dest_root): 450 self._minimize_roots() 451 self._sort_mapping() 452 453 change_status = ChangeStatus() 454 455 # Record the full set of files that should be in the tree 456 full_file_list = set() 457 458 for fbsource_subdir in self.roots: 459 dir_to_mirror = os.path.join(fbsource_root, fbsource_subdir) 460 prefetch_dir_if_eden(dir_to_mirror) 461 if not os.path.exists(dir_to_mirror): 462 raise Exception( 463 "%s doesn't exist; check your sparse profile!" % dir_to_mirror 464 ) 465 for root, _dirs, files in os.walk(dir_to_mirror): 466 for src_file in files: 467 full_name = os.path.join(root, src_file) 468 rel_name = os.path.relpath(full_name, fbsource_root) 469 norm_name = rel_name.replace("\\", "/") 470 471 target_name = self._map_name(norm_name, dest_root) 472 if target_name: 473 full_file_list.add(target_name) 474 if copy_if_different(full_name, target_name): 475 change_status.record_change(target_name) 476 477 # Compare the list of previously shipped files; if a file is 478 # in the old list but not the new list then it has been 479 # removed from the source and should be removed from the 480 # destination. 481 # Why don't we simply create this list by walking dest_root? 482 # Some builds currently have to be in-source builds and 483 # may legitimately need to keep some state in the source tree :-/ 484 installed_name = os.path.join(dest_root, ".shipit_shipped") 485 if os.path.exists(installed_name): 486 with open(installed_name, "rb") as f: 487 for name in f.read().decode("utf-8").splitlines(): 488 name = name.strip() 489 if name not in full_file_list: 490 print("Remove %s" % name) 491 os.unlink(name) 492 change_status.record_change(name) 493 494 with open(installed_name, "wb") as f: 495 for name in sorted(list(full_file_list)): 496 f.write(("%s\n" % name).encode("utf-8")) 497 498 return change_status 499 500 501class FbsourceRepoData(NamedTuple): 502 hash: str 503 date: str 504 505 506FBSOURCE_REPO_DATA: Dict[str, FbsourceRepoData] = {} 507 508 509def get_fbsource_repo_data(build_options): 510 """Returns the commit metadata for the fbsource repo. 511 Since we may have multiple first party projects to 512 hash, and because we don't mutate the repo, we cache 513 this hash in a global.""" 514 cached_data = FBSOURCE_REPO_DATA.get(build_options.fbsource_dir) 515 if cached_data: 516 return cached_data 517 518 cmd = ["hg", "log", "-r.", "-T{node}\n{date|hgdate}"] 519 env = Env() 520 env.set("HGPLAIN", "1") 521 log_data = subprocess.check_output( 522 cmd, cwd=build_options.fbsource_dir, env=dict(env.items()) 523 ).decode("ascii") 524 525 (hash, datestr) = log_data.split("\n") 526 527 # datestr is like "seconds fractionalseconds" 528 # We want "20200324.113140" 529 (unixtime, _fractional) = datestr.split(" ") 530 date = datetime.fromtimestamp(int(unixtime)).strftime("%Y%m%d.%H%M%S") 531 cached_data = FbsourceRepoData(hash=hash, date=date) 532 533 FBSOURCE_REPO_DATA[build_options.fbsource_dir] = cached_data 534 535 return cached_data 536 537 538class SimpleShipitTransformerFetcher(Fetcher): 539 def __init__(self, build_options, manifest): 540 self.build_options = build_options 541 self.manifest = manifest 542 self.repo_dir = os.path.join(build_options.scratch_dir, "shipit", manifest.name) 543 544 def clean(self): 545 if os.path.exists(self.repo_dir): 546 shutil.rmtree(self.repo_dir) 547 548 def update(self): 549 mapping = ShipitPathMap() 550 for src, dest in self.manifest.get_section_as_ordered_pairs("shipit.pathmap"): 551 mapping.add_mapping(src, dest) 552 if self.manifest.shipit_fbcode_builder: 553 mapping.add_mapping( 554 "fbcode/opensource/fbcode_builder", "build/fbcode_builder" 555 ) 556 for pattern in self.manifest.get_section_as_args("shipit.strip"): 557 mapping.add_exclusion(pattern) 558 559 return mapping.mirror(self.build_options.fbsource_dir, self.repo_dir) 560 561 def hash(self): 562 # We return a fixed non-hash string for in-fbsource builds. 563 # We're relying on the `update` logic to correctly invalidate 564 # the build in the case that files have changed. 565 return "fbsource" 566 567 def get_src_dir(self): 568 return self.repo_dir 569 570 571class ShipitTransformerFetcher(Fetcher): 572 SHIPIT = "/var/www/scripts/opensource/shipit/run_shipit.php" 573 574 def __init__(self, build_options, project_name): 575 self.build_options = build_options 576 self.project_name = project_name 577 self.repo_dir = os.path.join(build_options.scratch_dir, "shipit", project_name) 578 579 def update(self): 580 if os.path.exists(self.repo_dir): 581 return ChangeStatus() 582 self.run_shipit() 583 return ChangeStatus(True) 584 585 def clean(self): 586 if os.path.exists(self.repo_dir): 587 shutil.rmtree(self.repo_dir) 588 589 @classmethod 590 def available(cls): 591 return os.path.exists(cls.SHIPIT) 592 593 def run_shipit(self): 594 tmp_path = self.repo_dir + ".new" 595 try: 596 if os.path.exists(tmp_path): 597 shutil.rmtree(tmp_path) 598 599 # Run shipit 600 run_cmd( 601 [ 602 "php", 603 ShipitTransformerFetcher.SHIPIT, 604 "--project=" + self.project_name, 605 "--create-new-repo", 606 "--source-repo-dir=" + self.build_options.fbsource_dir, 607 "--source-branch=.", 608 "--skip-source-init", 609 "--skip-source-pull", 610 "--skip-source-clean", 611 "--skip-push", 612 "--skip-reset", 613 "--destination-use-anonymous-https", 614 "--create-new-repo-output-path=" + tmp_path, 615 ] 616 ) 617 618 # Remove the .git directory from the repository it generated. 619 # There is no need to commit this. 620 repo_git_dir = os.path.join(tmp_path, ".git") 621 shutil.rmtree(repo_git_dir) 622 os.rename(tmp_path, self.repo_dir) 623 except Exception: 624 # Clean up after a failed extraction 625 if os.path.exists(tmp_path): 626 shutil.rmtree(tmp_path) 627 self.clean() 628 raise 629 630 def hash(self): 631 # We return a fixed non-hash string for in-fbsource builds. 632 return "fbsource" 633 634 def get_src_dir(self): 635 return self.repo_dir 636 637 638def download_url_to_file_with_progress(url, file_name): 639 print("Download %s -> %s ..." % (url, file_name)) 640 641 class Progress(object): 642 last_report = 0 643 644 def progress(self, count, block, total): 645 if total == -1: 646 total = "(Unknown)" 647 amount = count * block 648 649 if sys.stdout.isatty(): 650 sys.stdout.write("\r downloading %s of %s " % (amount, total)) 651 else: 652 # When logging to CI logs, avoid spamming the logs and print 653 # status every few seconds 654 now = time.time() 655 if now - self.last_report > 5: 656 sys.stdout.write(".. %s of %s " % (amount, total)) 657 self.last_report = now 658 sys.stdout.flush() 659 660 progress = Progress() 661 start = time.time() 662 try: 663 (_filename, headers) = urlretrieve(url, file_name, reporthook=progress.progress) 664 except (OSError, IOError) as exc: # noqa: B014 665 raise TransientFailure( 666 "Failed to download %s to %s: %s" % (url, file_name, str(exc)) 667 ) 668 669 end = time.time() 670 sys.stdout.write(" [Complete in %f seconds]\n" % (end - start)) 671 sys.stdout.flush() 672 print(f"{headers}") 673 674 675class ArchiveFetcher(Fetcher): 676 def __init__(self, build_options, manifest, url, sha256): 677 self.manifest = manifest 678 self.url = url 679 self.sha256 = sha256 680 self.build_options = build_options 681 682 url = urlparse(self.url) 683 basename = "%s-%s" % (manifest.name, os.path.basename(url.path)) 684 self.file_name = os.path.join(build_options.scratch_dir, "downloads", basename) 685 self.src_dir = os.path.join(build_options.scratch_dir, "extracted", basename) 686 self.hash_file = self.src_dir + ".hash" 687 688 def _verify_hash(self): 689 h = hashlib.sha256() 690 with open(self.file_name, "rb") as f: 691 while True: 692 block = f.read(8192) 693 if not block: 694 break 695 h.update(block) 696 digest = h.hexdigest() 697 if digest != self.sha256: 698 os.unlink(self.file_name) 699 raise Exception( 700 "%s: expected sha256 %s but got %s" % (self.url, self.sha256, digest) 701 ) 702 703 def _download_dir(self): 704 """returns the download dir, creating it if it doesn't already exist""" 705 download_dir = os.path.dirname(self.file_name) 706 if not os.path.exists(download_dir): 707 os.makedirs(download_dir) 708 return download_dir 709 710 def _download(self): 711 self._download_dir() 712 download_url_to_file_with_progress(self.url, self.file_name) 713 self._verify_hash() 714 715 def clean(self): 716 if os.path.exists(self.src_dir): 717 shutil.rmtree(self.src_dir) 718 719 def update(self): 720 try: 721 with open(self.hash_file, "r") as f: 722 saved_hash = f.read().strip() 723 if saved_hash == self.sha256 and os.path.exists(self.src_dir): 724 # Everything is up to date 725 return ChangeStatus() 726 print( 727 "saved hash %s doesn't match expected hash %s, re-validating" 728 % (saved_hash, self.sha256) 729 ) 730 os.unlink(self.hash_file) 731 except EnvironmentError: 732 pass 733 734 # If we got here we know the contents of src_dir are either missing 735 # or wrong, so blow away whatever happened to be there first. 736 if os.path.exists(self.src_dir): 737 shutil.rmtree(self.src_dir) 738 739 # If we already have a file here, make sure it looks legit before 740 # proceeding: any errors and we just remove it and re-download 741 if os.path.exists(self.file_name): 742 try: 743 self._verify_hash() 744 except Exception: 745 if os.path.exists(self.file_name): 746 os.unlink(self.file_name) 747 748 if not os.path.exists(self.file_name): 749 self._download() 750 751 if tarfile.is_tarfile(self.file_name): 752 opener = tarfile.open 753 elif zipfile.is_zipfile(self.file_name): 754 opener = zipfile.ZipFile 755 else: 756 raise Exception("don't know how to extract %s" % self.file_name) 757 os.makedirs(self.src_dir) 758 print("Extract %s -> %s" % (self.file_name, self.src_dir)) 759 t = opener(self.file_name) 760 if is_windows(): 761 # Ensure that we don't fall over when dealing with long paths 762 # on windows 763 src = r"\\?\%s" % os.path.normpath(self.src_dir) 764 else: 765 src = self.src_dir 766 # The `str` here is necessary to ensure that we don't pass a unicode 767 # object down to tarfile.extractall on python2. When extracting 768 # the boost tarball it makes some assumptions and tries to convert 769 # a non-ascii path to ascii and throws. 770 src = str(src) 771 t.extractall(src) 772 773 with open(self.hash_file, "w") as f: 774 f.write(self.sha256) 775 776 return ChangeStatus(True) 777 778 def hash(self): 779 return self.sha256 780 781 def get_src_dir(self): 782 return self.src_dir 783