1 2"""Make HTTP requests. Thin wrapper around urllib2""" 3 4 5__all__ = ['HTTPClient', 'HTTPResponse'] 6 7 8import httplib 9import socket 10import urllib 11import urllib2 12 13 14class Address(str): 15 def __init__(self, value): 16 self.refcount = 0 17 18class HostCache(object): 19 def __init__(self): 20 self.hostmap = {} 21 22 def get(self, host): 23 return str(self.hostmap[host]) 24 25 def lookup(self, host): 26 try: 27 address = self.hostmap[host] 28 except KeyError: 29 try: 30 address = self.hostmap.setdefault(host, Address(next(sa[0] for family, socktype, proto, cname, sa in socket.getaddrinfo(host, 0, 0, 0, socket.SOL_TCP)))) 31 except socket.gaierror: 32 address = self.hostmap.setdefault(host, Address(host)) 33 address.refcount += 1 34 return str(address) 35 36 def release(self, host): 37 address = self.hostmap[host] 38 address.refcount -= 1 39 if address.refcount == 0: 40 del self.hostmap[host] 41 42HostCache = HostCache() 43 44 45class HTTPConnection(httplib.HTTPConnection): 46 def connect(self): 47 address = HostCache.get(self.host) 48 self.sock = socket.create_connection((address, self.port), self.timeout) # self.source_address is only present in 2.7 and is not set by urllib2 49 if hasattr(self, '_tunnel') and self._tunnel_host: 50 self._tunnel() 51 52class HTTPSConnection(httplib.HTTPSConnection): 53 def connect(self): 54 import ssl 55 address = HostCache.get(self.host) 56 sock = socket.create_connection((address, self.port), self.timeout) # self.source_address is only present in 2.7 and is not set by urllib2 57 if hasattr(self, '_tunnel') and self._tunnel_host: 58 self.sock = sock 59 self._tunnel() 60 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) 61 62class HTTPHandler(urllib2.HTTPHandler): 63 def http_open(self, req): 64 return self.do_open(HTTPConnection, req) 65 66class HTTPSHandler(urllib2.HTTPSHandler): 67 def https_open(self, req): 68 return self.do_open(HTTPSConnection, req) 69 70 71class HTTPRequest(urllib2.Request): 72 """Hack urllib2.Request to support PUT and DELETE methods.""" 73 74 def __init__(self, url, method="GET", data=None, headers={}, origin_req_host=None, unverifiable=False): 75 urllib2.Request.__init__(self, url, data, headers, origin_req_host, unverifiable) 76 self.url = url 77 self.method = method 78 79 def get_method(self): 80 return self.method 81 82 def format(self): 83 s = '%s %s\n' % (self.get_method(), self.get_full_url()) 84 s += '\n'.join(("%s: %s" % x for x in self.header_items())) 85 return s 86 87 88class HTTPResponse(object): 89 def __init__(self, url, status, reason, headers, body): 90 self.url = url 91 self.status = status 92 self.reason = reason 93 self.headers = headers if headers is not None else {} 94 self.body = body 95 96 def __str__(self): 97 result = "%s %s <%s>" % (self.status, self.reason, self.url) 98 for k, v in self.headers.items(): 99 result += '\n%s: %s' % (k, v) 100 if self.body: 101 result += '\n\n' 102 result += self.body 103 result += '\n' 104 return result 105 106 @property 107 def etag(self): 108 etag = self.headers.get('ETag') 109 if etag is None: 110 return None 111 if len(etag) > 1 and etag[0] == etag[-1] == '"': 112 return etag[1:-1] 113 else: 114 raise ValueError('Cannot parse etag header value: %r' % etag) 115 116 @classmethod 117 def from_HTTPError(cls, response): 118 if response.fp is not None: 119 length = int(response.hdrs.get('content-length') or -1) 120 body = response.fp.read(length) 121 else: 122 body = '' 123 return cls(response.filename, response.code, response.msg, response.hdrs, body) 124 125 @classmethod 126 def from_addinfourl(cls, response): 127 length = int(response.headers.get('content-length') or -1) 128 return cls(response.url, response.code, response.msg, response.headers, response.fp.read(length)) 129 130 131class HTTPClient(object): 132 def __init__(self, base_url, username, domain, password=None): 133 self.base_url = base_url 134 if self.base_url[-1:] != '/': 135 self.base_url += '/' 136 password_manager = urllib2.HTTPPasswordMgr() 137 if username is not None is not password: 138 password_manager.add_password(domain, self.base_url, username, password) 139 self.opener = urllib2.build_opener(HTTPHandler, HTTPSHandler, urllib2.HTTPDigestAuthHandler(password_manager), urllib2.HTTPBasicAuthHandler(password_manager)) 140 141 def request(self, method, path, headers=None, data=None, etag=None, etagnot=None, timeout=None): 142 """Make HTTP request. Return HTTPResponse instance. 143 144 Will never raise urllib2.HTTPError, but may raise other exceptions, such 145 as urllib2.URLError or httplib.HTTPException 146 """ 147 if path[:1]=='/': 148 path = path[1:] 149 if headers is None: 150 headers = {} 151 if etag is not None: 152 headers['If-Match'] = '"%s"' % etag if etag!='*' else '*' # XXX use quoteString instead? 153 if etagnot is not None: 154 headers['If-None-Match'] = ('"%s"' % etagnot) if etagnot!='*' else '*' 155 url = self.base_url+path 156 req = HTTPRequest(url, method=method, headers=headers, data=data) 157 host, port = urllib.splitport(req.get_host()) 158 HostCache.lookup(host) 159 try: 160 response = self.opener.open(req, timeout=timeout) 161 if isinstance(response, urllib2.HTTPError): 162 return HTTPResponse.from_HTTPError(response) 163 elif isinstance(response, urllib2.addinfourl): 164 return HTTPResponse.from_addinfourl(response) 165 else: 166 raise RuntimeError('urllib2.open returned %r' % response) 167 except urllib2.HTTPError as e: 168 # Workaround for bug in urllib2 which doesn't reset the retry count 169 # when a negative, but different that 401 or 407, response is 170 # received. -Luci 171 if e.code not in (401, 407): 172 for handler in (handler for handler in self.opener.handlers if isinstance(handler, (urllib2.HTTPDigestAuthHandler, urllib2.ProxyDigestAuthHandler))): 173 handler.reset_retry_count() 174 return HTTPResponse.from_HTTPError(e) 175 finally: 176 HostCache.release(host) 177 178 179