1""" 2Compare the speed of downloading URLs sequentially vs. using futures. 3""" 4 5import sys 6import time 7import functools 8 9try: 10 from urllib.request import urlopen 11except ImportError: 12 from urllib2 import urlopen 13 14try: 15 from concurrent.futures import ThreadPoolExecutor 16except ImportError: 17 ThreadPoolExecutor = lambda n: None 18try: 19 from concurrent.futures import ProcessPoolExecutor 20except ImportError: 21 ProcessPoolExecutor = lambda n: None 22 23from mpi4py.futures import MPIPoolExecutor, as_completed 24 25URLS = [ 26 'http://www.google.com/', 27 'http://www.apple.com/', 28 'http://www.ibm.com', 29 'http://www.thisurlprobablydoesnotexist.com', 30 'http://www.slashdot.org/', 31 'http://www.python.org/', 32 'http://www.bing.com/', 33 'http://www.facebook.com/', 34 'http://www.yahoo.com/', 35 'http://www.youtube.com/', 36 'http://www.blogger.com/', 37] 38 39def load_url(url, timeout): 40 kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {} 41 return urlopen(url, **kwargs).read() 42 43def download_urls_sequential(urls, timeout=60): 44 url_to_content = {} 45 for url in urls: 46 try: 47 url_to_content[url] = load_url(url, timeout=timeout) 48 except: 49 pass 50 return url_to_content 51 52def download_urls_with_executor(executor, urls, timeout=60): 53 if executor is None: return {} 54 try: 55 url_to_content = {} 56 future_to_url = dict((executor.submit(load_url, url, timeout), url) 57 for url in urls) 58 for future in as_completed(future_to_url): 59 try: 60 url_to_content[future_to_url[future]] = future.result() 61 except: 62 pass 63 return url_to_content 64 finally: 65 executor.shutdown() 66 67def main(): 68 for meth, fn in [('sequential', 69 functools.partial(download_urls_sequential, 70 URLS)), 71 ('threads', 72 functools.partial(download_urls_with_executor, 73 ThreadPoolExecutor(10), URLS)), 74 ('processes', 75 functools.partial(download_urls_with_executor, 76 ProcessPoolExecutor(10), URLS)), 77 ('mpi4py', 78 functools.partial(download_urls_with_executor, 79 MPIPoolExecutor(10), URLS))]: 80 sys.stdout.write('%s: ' % meth.ljust(11)) 81 sys.stdout.flush() 82 start = time.time() 83 url_map = fn() 84 elapsed = time.time() - start 85 sys.stdout.write('%5.2f seconds (%2d of %d downloaded)\n' % 86 (elapsed, len(url_map), len(URLS))) 87 sys.stdout.flush() 88 89if __name__ == '__main__': 90 main() 91