1#!/usr/bin/env python
2
3# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org>
4# and Michael R. Miller <michaelrmmiller@gmail.com>
5#
6# Permission is hereby granted, free of charge, to any person obtaining a copy
7# of this software and associated documentation files (the "Software"), to deal
8# in the Software without restriction, including without limitation the rights
9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10# copies of the Software, and to permit persons to whom the Software is
11# furnished to do so, subject to the following conditions:
12#
13# The above copyright notice and this permission notice shall be included in
14# all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22# SOFTWARE.
23
24from __future__ import absolute_import
25
26import argparse
27import concurrent.futures
28import logging
29import os
30import re
31import requests
32import shutil
33import subprocess
34import tempfile
35import urlparse
36
37from PackageSymbolDumper import process_packages, find_packages
38
39OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+")
40
41
42def extract_dmg(dmg_path, dest):
43    logging.info("extract_dmg({}, {})".format(dmg_path, dest))
44    with tempfile.NamedTemporaryFile() as f:
45        subprocess.check_call(
46            ["dmg", "extract", dmg_path, f.name], stdout=open(os.devnull, "wb")
47        )
48        subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest)
49
50
51def get_update_packages():
52    for i in range(16):
53        logging.info("get_update_packages: page " + str(i))
54        url = (
55            "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency"
56            "&facet=all&category=PF6&locale=en_US&offset=%d" % i
57        )
58        res = requests.get(url)
59        if res.status_code != 200:
60            break
61        data = res.json()
62        downloads = data.get("downloads", [])
63        if not downloads:
64            break
65        for d in downloads:
66            title = d.get("title", "")
67            if OSX_RE.search(title) and "Combo" not in title:
68                logging.info("Title: " + title)
69                if "fileurl" in d:
70                    yield d["fileurl"]
71                else:
72                    logging.warn("No fileurl in download!")
73
74
75def fetch_url_to_file(url, download_dir):
76    filename = os.path.basename(urlparse.urlsplit(url).path)
77    local_filename = os.path.join(download_dir, filename)
78    if os.path.isfile(local_filename):
79        logging.info("{} already exists, skipping".format(local_filename))
80        return None
81    r = requests.get(url, stream=True)
82    res_len = int(r.headers.get("content-length", "0"))
83    logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len))
84    with open(local_filename, "wb") as f:
85        for chunk in r.iter_content(chunk_size=1024):
86            if chunk:  # filter out keep-alive new chunks
87                f.write(chunk)
88    return local_filename
89
90
91def fetch_and_extract_dmg(url, tmpdir):
92    logging.info("fetch_and_extract_dmg: " + url)
93    filename = fetch_url_to_file(url, tmpdir)
94    if not filename:
95        return []
96    # Extract dmg contents to a subdir
97    subdir = tempfile.mkdtemp(dir=tmpdir)
98    extract_dmg(filename, subdir)
99    packages = list(find_packages(subdir))
100    logging.info(
101        "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages))
102    )
103    return packages
104
105
106def find_update_packages(tmpdir):
107    logging.info("find_update_packages")
108    # Only download 2 packages at a time.
109    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
110        jobs = dict(
111            (executor.submit(fetch_and_extract_dmg, url, tmpdir), url)
112            for url in get_update_packages()
113        )
114        for future in concurrent.futures.as_completed(jobs):
115            url = jobs[future]
116            if future.exception() is not None:
117                logging.error(
118                    "exception downloading {}: {}".format(url, future.exception())
119                )
120            else:
121                for pkg in future.result():
122                    yield pkg
123
124
125def main():
126    parser = argparse.ArgumentParser(
127        description="Download OS X update packages and dump symbols from them"
128    )
129    parser.add_argument(
130        "--dump_syms",
131        default="dump_syms",
132        type=str,
133        help="path to the Breakpad dump_syms executable",
134    )
135    parser.add_argument("to", type=str, help="destination path for the symbols")
136    args = parser.parse_args()
137    logging.basicConfig(
138        level=logging.DEBUG,
139        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
140    )
141    for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
142        urllib3_logger = logging.getLogger(p)
143        urllib3_logger.setLevel(logging.ERROR)
144    try:
145        tmpdir = tempfile.mkdtemp(suffix=".osxupdates")
146
147        def finder():
148            return find_update_packages(tmpdir)
149
150        process_packages(finder, args.to, None, args.dump_syms)
151    finally:
152        shutil.rmtree(tmpdir)
153
154
155if __name__ == "__main__":
156    main()
157