1"""Utilities using NDG HTTPS Client, including a main module that can be used to 2fetch from a URL. 3""" 4__author__ = "R B Wilkinson" 5__date__ = "09/12/11" 6__copyright__ = "(C) 2011 Science and Technology Facilities Council" 7__license__ = "BSD - see LICENSE file in top-level directory" 8__contact__ = "Philip.Kershaw@stfc.ac.uk" 9__revision__ = '$Id$' 10 11import logging 12from optparse import OptionParser 13import os 14import sys 15 16if sys.version_info[0] > 2: 17 import http.cookiejar as cookiejar_ 18 import http.client as http_client_ 19 from urllib.request import Request as Request_ 20 from urllib.request import HTTPHandler as HTTPHandler_ 21 from urllib.request import HTTPCookieProcessor as HTTPCookieProcessor_ 22 from urllib.request import HTTPBasicAuthHandler as HTTPBasicAuthHandler_ 23 from urllib.request import HTTPPasswordMgrWithDefaultRealm as \ 24 HTTPPasswordMgrWithDefaultRealm_ 25 from urllib.request import ProxyHandler as ProxyHandler_ 26 from urllib.error import HTTPError as HTTPError_ 27 import urllib.parse as urlparse_ 28else: 29 import cookielib as cookiejar_ 30 import httplib as http_client_ 31 from urllib2 import Request as Request_ 32 from urllib2 import HTTPHandler as HTTPHandler_ 33 from urllib2 import HTTPCookieProcessor as HTTPCookieProcessor_ 34 from urllib2 import HTTPBasicAuthHandler as HTTPBasicAuthHandler_ 35 from urllib2 import HTTPPasswordMgrWithDefaultRealm as \ 36 HTTPPasswordMgrWithDefaultRealm_ 37 from urllib2 import ProxyHandler as ProxyHandler_ 38 from urllib2 import HTTPError as HTTPError_ 39 import urlparse as urlparse_ 40 41from ndg.httpsclient.urllib2_build_opener import build_opener 42from ndg.httpsclient.https import HTTPSContextHandler 43from ndg.httpsclient import ssl_context_util 44 45log = logging.getLogger(__name__) 46 47class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor_): 48 """Cookie processor that adds new cookies (instead of replacing the existing 49 ones as HTTPCookieProcessor does) 50 """ 51 def http_request(self, request): 52 """Processes cookies for a HTTP request. 53 @param request: request to process 54 @type request: urllib2.Request 55 @return: request 56 @rtype: urllib2.Request 57 """ 58 COOKIE_HEADER_NAME = "Cookie" 59 tmp_request = Request_(request.get_full_url(), request.data, {}, 60 request.origin_req_host, 61 request.unverifiable) 62 self.cookiejar.add_cookie_header(tmp_request) 63 # Combine existing and new cookies. 64 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME) 65 if new_cookies: 66 if request.has_header(COOKIE_HEADER_NAME): 67 # Merge new cookies with existing ones. 68 old_cookies = request.get_header(COOKIE_HEADER_NAME) 69 merged_cookies = '; '.join([old_cookies, new_cookies]) 70 request.add_unredirected_header(COOKIE_HEADER_NAME, 71 merged_cookies) 72 else: 73 # No existing cookies so just set new ones. 74 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies) 75 return request 76 77 # Process cookies for HTTPS in the same way. 78 https_request = http_request 79 80 81class URLFetchError(Exception): 82 """Error fetching content from URL""" 83 84 85def fetch_from_url(url, config, data=None, handlers=None): 86 """Returns data retrieved from a URL. 87 @param url: URL to attempt to open 88 @type url: basestring 89 @param config: SSL context configuration 90 @type config: Configuration 91 @return data retrieved from URL or None 92 """ 93 return_code, return_message, response = open_url(url, config, data=data, 94 handlers=handlers) 95 if return_code and return_code == http_client_.OK: 96 return_data = response.read() 97 response.close() 98 return return_data 99 else: 100 raise URLFetchError(return_message) 101 102def fetch_from_url_to_file(url, config, output_file, data=None, handlers=None): 103 """Writes data retrieved from a URL to a file. 104 @param url: URL to attempt to open 105 @type url: basestring 106 @param config: SSL context configuration 107 @type config: Configuration 108 @param output_file: output file 109 @type output_file: basestring 110 @return: tuple ( 111 returned HTTP status code or 0 if an error occurred 112 returned message 113 boolean indicating whether access was successful) 114 """ 115 return_code, return_message, response = open_url(url, config, data=data, 116 handlers=handlers) 117 if return_code == http_client_.OK: 118 return_data = response.read() 119 response.close() 120 outfile = open(output_file, "w") 121 outfile.write(return_data) 122 outfile.close() 123 124 return return_code, return_message, return_code == http_client_.OK 125 126 127def fetch_stream_from_url(url, config, data=None, handlers=None): 128 """Returns data retrieved from a URL. 129 @param url: URL to attempt to open 130 @type url: basestring 131 @param config: SSL context configuration 132 @type config: Configuration 133 @param data: HTTP POST data 134 @type data: str 135 @param handlers: list of custom urllib2 handlers to add to the request 136 @type handlers: iterable 137 @return: data retrieved from URL or None 138 @rtype: file derived type 139 """ 140 return_code, return_message, response = open_url(url, config, data=data, 141 handlers=handlers) 142 if return_code and return_code == http_client_.OK: 143 return response 144 else: 145 raise URLFetchError(return_message) 146 147 148def open_url(url, config, data=None, handlers=None): 149 """Attempts to open a connection to a specified URL. 150 @param url: URL to attempt to open 151 @param config: SSL context configuration 152 @type config: Configuration 153 @param data: HTTP POST data 154 @type data: str 155 @param handlers: list of custom urllib2 handlers to add to the request 156 @type handlers: iterable 157 @return: tuple ( 158 returned HTTP status code or 0 if an error occurred 159 returned message or error description 160 response object) 161 """ 162 debuglevel = 1 if config.debug else 0 163 164 # Set up handlers for URL opener. 165 if config.cookie: 166 cj = config.cookie 167 else: 168 cj = cookiejar_.CookieJar() 169 170 # Use a cookie processor that accumulates cookies when redirects occur so 171 # that an application can redirect for authentication and retain both any 172 # cookies for the application and the security system (c.f., 173 # urllib2.HTTPCookieProcessor which replaces cookies). 174 cookie_handler = AccumulatingHTTPCookieProcessor(cj) 175 176 if not handlers: 177 handlers = [] 178 179 handlers.append(cookie_handler) 180 181 if config.debug: 182 http_handler = HTTPHandler_(debuglevel=debuglevel) 183 https_handler = HTTPSContextHandler(config.ssl_context, 184 debuglevel=debuglevel) 185 handlers.extend([http_handler, https_handler]) 186 187 if config.http_basicauth: 188 # currently only supports http basic auth 189 auth_handler = HTTPBasicAuthHandler_(HTTPPasswordMgrWithDefaultRealm_()) 190 auth_handler.add_password(realm=None, uri=url, 191 user=config.http_basicauth[0], 192 passwd=config.http_basicauth[1]) 193 handlers.append(auth_handler) 194 195 196 # Explicitly remove proxy handling if the host is one listed in the value of 197 # the no_proxy environment variable because urllib2 does use proxy settings 198 # set via http_proxy and https_proxy, but does not take the no_proxy value 199 # into account. 200 if not _should_use_proxy(url, config.no_proxy): 201 handlers.append(ProxyHandler_({})) 202 log.debug("Not using proxy") 203 elif config.proxies: 204 handlers.append(ProxyHandler_(config.proxies)) 205 log.debug("Configuring proxies: %s" % config.proxies) 206 207 opener = build_opener(*handlers, ssl_context=config.ssl_context) 208 209 headers = config.headers 210 if headers is None: 211 headers = {} 212 213 request = Request_(url, data, headers) 214 215 # Open the URL and check the response. 216 return_code = 0 217 return_message = '' 218 response = None 219 220 try: 221 response = opener.open(request) 222 return_message = response.msg 223 return_code = response.code 224 if log.isEnabledFor(logging.DEBUG): 225 for index, cookie in enumerate(cj): 226 log.debug("%s : %s", index, cookie) 227 228 except HTTPError_ as exc: 229 return_code = exc.code 230 return_message = "Error: %s" % exc.msg 231 if log.isEnabledFor(logging.DEBUG): 232 log.debug("%s %s", exc.code, exc.msg) 233 234 except Exception as exc: 235 return_message = "Error: %s" % exc.__str__() 236 if log.isEnabledFor(logging.DEBUG): 237 import traceback 238 log.debug(traceback.format_exc()) 239 240 return (return_code, return_message, response) 241 242 243def _should_use_proxy(url, no_proxy=None): 244 """Determines whether a proxy should be used to open a connection to the 245 specified URL, based on the value of the no_proxy environment variable. 246 @param url: URL 247 @type url: basestring or urllib2.Request 248 """ 249 if no_proxy is None: 250 no_proxy_effective = os.environ.get('no_proxy', '') 251 else: 252 no_proxy_effective = no_proxy 253 254 urlObj = urlparse_.urlparse(_url_as_string(url)) 255 for np in [h.strip() for h in no_proxy_effective.split(',')]: 256 if urlObj.hostname == np: 257 return False 258 259 return True 260 261def _url_as_string(url): 262 """Returns the URL string from a URL value that is either a string or 263 urllib2.Request.. 264 @param url: URL 265 @type url: basestring or urllib2.Request 266 @return: URL string 267 @rtype: basestring 268 """ 269 if isinstance(url, Request_): 270 return url.get_full_url() 271 elif isinstance(url, str): 272 return url 273 else: 274 raise TypeError("Expected type %r or %r" % 275 (str, Request_)) 276 277 278class Configuration(object): 279 """Connection configuration. 280 """ 281 def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None, 282 cookie=None, http_basicauth=None, headers=None): 283 """ 284 @param ssl_context: SSL context to use with this configuration 285 @type ssl_context: OpenSSL.SSL.Context 286 @param debug: if True, output debugging information 287 @type debug: bool 288 @param proxies: proxies to use for 289 @type proxies: dict with basestring keys and values 290 @param no_proxy: hosts for which a proxy should not be used 291 @type no_proxy: basestring 292 @param cookie: cookies to set for request 293 @type cookie: cookielib.CookieJar (python 3 - http.cookiejar) 294 @param http_basicauth: http authentication, or None 295 @type http_basicauth: tuple of (username,password) 296 @param headers: http headers 297 @type headers: dict 298 """ 299 self.ssl_context = ssl_context 300 self.debug = debug 301 self.proxies = proxies 302 self.no_proxy = no_proxy 303 self.cookie = cookie 304 self.http_basicauth = http_basicauth 305 self.headers = headers 306 307 308def main(): 309 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL. 310 ''' 311 parser = OptionParser(usage="%prog [options] url") 312 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE", 313 default=os.path.expanduser("~/credentials.pem"), 314 help="Certificate file - defaults to $HOME/credentials.pem") 315 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE", 316 default=None, 317 help="Private key file - defaults to the certificate file") 318 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir", 319 metavar="PATH", 320 default=None, 321 help="Trusted CA certificate file directory") 322 parser.add_option("-d", "--debug", action="store_true", dest="debug", 323 default=False, 324 help="Print debug information.") 325 parser.add_option("-p", "--post-data-file", dest="data_file", 326 metavar="FILE", default=None, 327 help="POST data file") 328 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE", 329 default=None, help="Output file") 330 parser.add_option("-n", "--no-verify-peer", action="store_true", 331 dest="no_verify_peer", default=False, 332 help="Skip verification of peer certificate.") 333 parser.add_option("-a", "--basicauth", dest="basicauth", 334 metavar="USER:PASSWD", 335 default=None, 336 help="HTTP authentication credentials") 337 parser.add_option("--header", action="append", dest="headers", 338 metavar="HEADER: VALUE", 339 help="Add HTTP header to request") 340 (options, args) = parser.parse_args() 341 if len(args) != 1: 342 parser.error("Incorrect number of arguments") 343 344 url = args[0] 345 346 if options.debug: 347 logging.getLogger().setLevel(logging.DEBUG) 348 349 if options.key_file and os.path.exists(options.key_file): 350 key_file = options.key_file 351 else: 352 key_file = None 353 354 if options.cert_file and os.path.exists(options.cert_file): 355 cert_file = options.cert_file 356 else: 357 cert_file = None 358 359 if options.ca_dir and os.path.exists(options.ca_dir): 360 ca_dir = options.ca_dir 361 else: 362 ca_dir = None 363 364 verify_peer = not options.no_verify_peer 365 366 if options.data_file and os.path.exists(options.data_file): 367 data_file = open(options.data_file) 368 data = data_file.read() 369 data_file.close() 370 else: 371 data = None 372 373 if options.basicauth: 374 http_basicauth = options.basicauth.split(':', 1) 375 else: 376 http_basicauth = None 377 378 headers = {} 379 if options.headers: 380 for h in options.headers: 381 key, val = h.split(':', 1) 382 headers[key.strip()] = val.lstrip() 383 384 # If a private key file is not specified, the key is assumed to be stored in 385 # the certificate file. 386 ssl_context = ssl_context_util.make_ssl_context(key_file, 387 cert_file, 388 None, 389 ca_dir, 390 verify_peer, 391 url) 392 393 config = Configuration(ssl_context, 394 options.debug, 395 http_basicauth=http_basicauth, 396 headers=headers) 397 if options.output_file: 398 return_code, return_message = fetch_from_url_to_file( 399 url, 400 config, 401 options.output_file, 402 data)[:2] 403 raise SystemExit(return_code, return_message) 404 else: 405 data = fetch_from_url(url, config) 406 print(data) 407 408 409if __name__=='__main__': 410 logging.basicConfig() 411 main() 412