1"""
2Compare the speed of downloading URLs sequentially vs. using futures.
3"""
4
5import sys
6import time
7import functools
8
9try:
10    from urllib.request import urlopen
11except ImportError:
12    from urllib2 import urlopen
13
14try:
15    from concurrent.futures import ThreadPoolExecutor
16except ImportError:
17    ThreadPoolExecutor = lambda n: None
18try:
19    from concurrent.futures import ProcessPoolExecutor
20except ImportError:
21    ProcessPoolExecutor = lambda n: None
22
23from mpi4py.futures import MPIPoolExecutor, as_completed
24
25URLS = [
26    'http://www.google.com/',
27    'http://www.apple.com/',
28    'http://www.ibm.com',
29    'http://www.thisurlprobablydoesnotexist.com',
30    'http://www.slashdot.org/',
31    'http://www.python.org/',
32    'http://www.bing.com/',
33    'http://www.facebook.com/',
34    'http://www.yahoo.com/',
35    'http://www.youtube.com/',
36    'http://www.blogger.com/',
37]
38
39def load_url(url, timeout):
40    kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
41    return urlopen(url, **kwargs).read()
42
43def download_urls_sequential(urls, timeout=60):
44    url_to_content = {}
45    for url in urls:
46        try:
47            url_to_content[url] = load_url(url, timeout=timeout)
48        except:
49            pass
50    return url_to_content
51
52def download_urls_with_executor(executor, urls, timeout=60):
53    if executor is None: return {}
54    try:
55        url_to_content = {}
56        future_to_url = dict((executor.submit(load_url, url, timeout), url)
57                             for url in urls)
58        for future in as_completed(future_to_url):
59            try:
60                url_to_content[future_to_url[future]] = future.result()
61            except:
62                pass
63        return url_to_content
64    finally:
65        executor.shutdown()
66
67def main():
68    for meth, fn in [('sequential',
69                      functools.partial(download_urls_sequential,
70                                        URLS)),
71                     ('threads',
72                      functools.partial(download_urls_with_executor,
73                                        ThreadPoolExecutor(10), URLS)),
74                     ('processes',
75                      functools.partial(download_urls_with_executor,
76                                        ProcessPoolExecutor(10), URLS)),
77                     ('mpi4py',
78                      functools.partial(download_urls_with_executor,
79                                        MPIPoolExecutor(10), URLS))]:
80        sys.stdout.write('%s: ' % meth.ljust(11))
81        sys.stdout.flush()
82        start = time.time()
83        url_map = fn()
84        elapsed = time.time() - start
85        sys.stdout.write('%5.2f seconds (%2d of %d downloaded)\n' %
86                         (elapsed, len(url_map), len(URLS)))
87        sys.stdout.flush()
88
89if __name__ == '__main__':
90    main()
91