1
2"""Make HTTP requests. Thin wrapper around urllib2"""
3
4
5__all__ = ['HTTPClient', 'HTTPResponse']
6
7
8import httplib
9import socket
10import urllib
11import urllib2
12
13
14class Address(str):
15    def __init__(self, value):
16        self.refcount = 0
17
18class HostCache(object):
19    def __init__(self):
20        self.hostmap = {}
21
22    def get(self, host):
23        return str(self.hostmap[host])
24
25    def lookup(self, host):
26        try:
27            address = self.hostmap[host]
28        except KeyError:
29            try:
30                address = self.hostmap.setdefault(host, Address(next(sa[0] for family, socktype, proto, cname, sa in socket.getaddrinfo(host, 0, 0, 0, socket.SOL_TCP))))
31            except socket.gaierror:
32                address = self.hostmap.setdefault(host, Address(host))
33        address.refcount += 1
34        return str(address)
35
36    def release(self, host):
37        address = self.hostmap[host]
38        address.refcount -= 1
39        if address.refcount == 0:
40            del self.hostmap[host]
41
42HostCache = HostCache()
43
44
45class HTTPConnection(httplib.HTTPConnection):
46    def connect(self):
47        address = HostCache.get(self.host)
48        self.sock = socket.create_connection((address, self.port), self.timeout) # self.source_address is only present in 2.7 and is not set by urllib2
49        if hasattr(self, '_tunnel') and self._tunnel_host:
50            self._tunnel()
51
52class HTTPSConnection(httplib.HTTPSConnection):
53    def connect(self):
54        import ssl
55        address = HostCache.get(self.host)
56        sock = socket.create_connection((address, self.port), self.timeout) # self.source_address is only present in 2.7 and is not set by urllib2
57        if hasattr(self, '_tunnel') and self._tunnel_host:
58            self.sock = sock
59            self._tunnel()
60        self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
61
62class HTTPHandler(urllib2.HTTPHandler):
63    def http_open(self, req):
64        return self.do_open(HTTPConnection, req)
65
66class HTTPSHandler(urllib2.HTTPSHandler):
67    def https_open(self, req):
68        return self.do_open(HTTPSConnection, req)
69
70
71class HTTPRequest(urllib2.Request):
72    """Hack urllib2.Request to support PUT and DELETE methods."""
73
74    def __init__(self, url, method="GET", data=None, headers={}, origin_req_host=None, unverifiable=False):
75        urllib2.Request.__init__(self, url, data, headers, origin_req_host, unverifiable)
76        self.url = url
77        self.method = method
78
79    def get_method(self):
80        return self.method
81
82    def format(self):
83        s = '%s %s\n' % (self.get_method(), self.get_full_url())
84        s += '\n'.join(("%s: %s" % x for x in self.header_items()))
85        return s
86
87
88class HTTPResponse(object):
89    def __init__(self, url, status, reason, headers, body):
90        self.url = url
91        self.status = status
92        self.reason = reason
93        self.headers = headers if headers is not None else {}
94        self.body = body
95
96    def __str__(self):
97        result = "%s %s <%s>" % (self.status, self.reason, self.url)
98        for k, v in self.headers.items():
99            result += '\n%s: %s' % (k, v)
100        if self.body:
101            result += '\n\n'
102            result += self.body
103            result += '\n'
104        return result
105
106    @property
107    def etag(self):
108        etag = self.headers.get('ETag')
109        if etag is None:
110            return None
111        if len(etag) > 1 and etag[0] == etag[-1] == '"':
112            return etag[1:-1]
113        else:
114            raise ValueError('Cannot parse etag header value: %r' % etag)
115
116    @classmethod
117    def from_HTTPError(cls, response):
118        if response.fp is not None:
119            length = int(response.hdrs.get('content-length') or -1)
120            body = response.fp.read(length)
121        else:
122            body = ''
123        return cls(response.filename, response.code, response.msg, response.hdrs, body)
124
125    @classmethod
126    def from_addinfourl(cls, response):
127        length = int(response.headers.get('content-length') or -1)
128        return cls(response.url, response.code, response.msg, response.headers, response.fp.read(length))
129
130
131class HTTPClient(object):
132    def __init__(self, base_url, username, domain, password=None):
133        self.base_url = base_url
134        if self.base_url[-1:] != '/':
135            self.base_url += '/'
136        password_manager = urllib2.HTTPPasswordMgr()
137        if username is not None is not password:
138            password_manager.add_password(domain, self.base_url, username, password)
139        self.opener = urllib2.build_opener(HTTPHandler, HTTPSHandler, urllib2.HTTPDigestAuthHandler(password_manager), urllib2.HTTPBasicAuthHandler(password_manager))
140
141    def request(self, method, path, headers=None, data=None, etag=None, etagnot=None, timeout=None):
142        """Make HTTP request. Return HTTPResponse instance.
143
144        Will never raise urllib2.HTTPError, but may raise other exceptions, such
145        as urllib2.URLError or httplib.HTTPException
146        """
147        if path[:1]=='/':
148            path = path[1:]
149        if headers is None:
150            headers = {}
151        if etag is not None:
152            headers['If-Match'] = '"%s"' % etag if etag!='*' else '*' # XXX use quoteString instead?
153        if etagnot is not None:
154            headers['If-None-Match'] = ('"%s"' % etagnot) if etagnot!='*' else '*'
155        url = self.base_url+path
156        req = HTTPRequest(url, method=method, headers=headers, data=data)
157        host, port = urllib.splitport(req.get_host())
158        HostCache.lookup(host)
159        try:
160            response = self.opener.open(req, timeout=timeout)
161            if isinstance(response, urllib2.HTTPError):
162                return HTTPResponse.from_HTTPError(response)
163            elif isinstance(response, urllib2.addinfourl):
164                return HTTPResponse.from_addinfourl(response)
165            else:
166                raise RuntimeError('urllib2.open returned %r' % response)
167        except urllib2.HTTPError as e:
168            # Workaround for bug in urllib2 which doesn't reset the retry count
169            # when a negative, but different that 401 or 407, response is
170            # received. -Luci
171            if e.code not in (401, 407):
172                for handler in (handler for handler in self.opener.handlers if isinstance(handler, (urllib2.HTTPDigestAuthHandler, urllib2.ProxyDigestAuthHandler))):
173                    handler.reset_retry_count()
174            return HTTPResponse.from_HTTPError(e)
175        finally:
176            HostCache.release(host)
177
178
179