1# -*- coding: utf-8 -*-
2"""
3hyper/http11/connection
4~~~~~~~~~~~~~~~~~~~~~~~
5
6Objects that build hyper's connection-level HTTP/1.1 abstraction.
7"""
8import logging
9import os
10import socket
11import base64
12
13from collections import Iterable, Mapping
14
15import collections
16from hyperframe.frame import SettingsFrame
17
18from .response import HTTP11Response
19from ..tls import wrap_socket, H2C_PROTOCOL
20from ..common.bufsocket import BufferedSocket
21from ..common.exceptions import TLSUpgrade, HTTPUpgrade
22from ..common.headers import HTTPHeaderMap
23from ..common.util import to_bytestring, to_host_port_tuple
24from ..compat import bytes
25
26# We prefer pycohttpparser to the pure-Python interpretation
27try:  # pragma: no cover
28    from pycohttpparser.api import Parser
29except ImportError:  # pragma: no cover
30    from .parser import Parser
31
32
33log = logging.getLogger(__name__)
34
35BODY_CHUNKED = 1
36BODY_FLAT = 2
37
38
39class HTTP11Connection(object):
40    """
41    An object representing a single HTTP/1.1 connection to a server.
42
43    :param host: The host to connect to. This may be an IP address or a
44        hostname, and optionally may include a port: for example,
45        ``'twitter.com'``, ``'twitter.com:443'`` or ``'127.0.0.1'``.
46    :param port: (optional) The port to connect to. If not provided and one
47        also isn't provided in the ``host`` parameter, defaults to 80.
48    :param secure: (optional) Whether the request should use TLS. Defaults to
49        ``False`` for most requests, but to ``True`` for any request issued to
50        port 443.
51    :param ssl_context: (optional) A class with custom certificate settings.
52        If not provided then hyper's default ``SSLContext`` is used instead.
53    :param proxy_host: (optional) The proxy to connect to.  This can be an IP
54        address or a host name and may include a port.
55    :param proxy_port: (optional) The proxy port to connect to. If not provided
56        and one also isn't provided in the ``proxy`` parameter,
57        defaults to 8080.
58    """
59    def __init__(self, host, port=None, secure=None, ssl_context=None,
60                 proxy_host=None, proxy_port=None, **kwargs):
61        if port is None:
62            self.host, self.port = to_host_port_tuple(host, default_port=80)
63        else:
64            self.host, self.port = host, port
65
66        # Record whether we plan to secure the request. In future this should
67        # be extended to a security profile, but a bool will do for now.
68        # TODO: Actually do something with this!
69        if secure is not None:
70            self.secure = secure
71        elif self.port == 443:
72            self.secure = True
73        else:
74            self.secure = False
75
76        # only send http upgrade headers for non-secure connection
77        self._send_http_upgrade = not self.secure
78
79        self.ssl_context = ssl_context
80        self._sock = None
81
82        # Setup proxy details if applicable.
83        if proxy_host:
84            if proxy_port is None:
85                self.proxy_host, self.proxy_port = to_host_port_tuple(
86                    proxy_host, default_port=8080
87                )
88            else:
89                self.proxy_host, self.proxy_port = proxy_host, proxy_port
90        else:
91            self.proxy_host = None
92            self.proxy_port = None
93
94        #: The size of the in-memory buffer used to store data from the
95        #: network. This is used as a performance optimisation. Increase buffer
96        #: size to improve performance: decrease it to conserve memory.
97        #: Defaults to 64kB.
98        self.network_buffer_size = 65536
99
100        #: The object used to perform HTTP/1.1 parsing. Needs to conform to
101        #: the standard hyper parsing interface.
102        self.parser = Parser()
103
104    def connect(self):
105        """
106        Connect to the server specified when the object was created. This is a
107        no-op if we're already connected.
108
109        :returns: Nothing.
110        """
111        if self._sock is None:
112            if not self.proxy_host:
113                host = self.host
114                port = self.port
115            else:
116                host = self.proxy_host
117                port = self.proxy_port
118
119            sock = socket.create_connection((host, port), 5)
120            proto = None
121
122            if self.secure:
123                assert not self.proxy_host, "Proxy with HTTPS not supported."
124                sock, proto = wrap_socket(sock, host, self.ssl_context)
125
126            log.debug("Selected protocol: %s", proto)
127            sock = BufferedSocket(sock, self.network_buffer_size)
128
129            if proto not in ('http/1.1', None):
130                raise TLSUpgrade(proto, sock)
131
132            self._sock = sock
133
134        return
135
136    def request(self, method, url, body=None, headers=None):
137        """
138        This will send a request to the server using the HTTP request method
139        ``method`` and the selector ``url``. If the ``body`` argument is
140        present, it should be string or bytes object of data to send after the
141        headers are finished. Strings are encoded as UTF-8. To use other
142        encodings, pass a bytes object. The Content-Length header is set to the
143        length of the body field.
144
145        :param method: The request method, e.g. ``'GET'``.
146        :param url: The URL to contact, e.g. ``'/path/segment'``.
147        :param body: (optional) The request body to send. Must be a bytestring,
148            an iterable of bytestring, or a file-like object.
149        :param headers: (optional) The headers to send on the request.
150        :returns: Nothing.
151        """
152
153        headers = headers or {}
154
155        method = to_bytestring(method)
156        url = to_bytestring(url)
157
158        if not isinstance(headers, HTTPHeaderMap):
159            if isinstance(headers, Mapping):
160                headers = HTTPHeaderMap(headers.items())
161            elif isinstance(headers, Iterable):
162                headers = HTTPHeaderMap(headers)
163            else:
164                raise ValueError(
165                    'Header argument must be a dictionary or an iterable'
166                )
167
168        if self._sock is None:
169            self.connect()
170
171        if self._send_http_upgrade:
172            self._add_upgrade_headers(headers)
173            self._send_http_upgrade = False
174
175        # We may need extra headers.
176        if body:
177            body_type = self._add_body_headers(headers, body)
178
179        if b'host' not in headers:
180            headers[b'host'] = self.host
181
182        # Begin by emitting the header block.
183        self._send_headers(method, url, headers)
184
185        # Next, send the request body.
186        if body:
187            self._send_body(body, body_type)
188
189        return
190
191    def get_response(self):
192        """
193        Returns a response object.
194
195        This is an early beta, so the response object is pretty stupid. That's
196        ok, we'll fix it later.
197        """
198        headers = HTTPHeaderMap()
199
200        response = None
201        while response is None:
202            # 'encourage' the socket to receive data.
203            self._sock.fill()
204            response = self.parser.parse_response(self._sock.buffer)
205
206        for n, v in response.headers:
207            headers[n.tobytes()] = v.tobytes()
208
209        self._sock.advance_buffer(response.consumed)
210
211        if (response.status == 101 and
212                b'upgrade' in headers['connection'] and
213                H2C_PROTOCOL.encode('utf-8') in headers['upgrade']):
214            raise HTTPUpgrade(H2C_PROTOCOL, self._sock)
215
216        return HTTP11Response(
217            response.status,
218            response.msg.tobytes(),
219            headers,
220            self._sock,
221            self
222        )
223
224    def _send_headers(self, method, url, headers):
225        """
226        Handles the logic of sending the header block.
227        """
228        self._sock.send(b' '.join([method, url, b'HTTP/1.1\r\n']))
229
230        for name, value in headers.iter_raw():
231            name, value = to_bytestring(name), to_bytestring(value)
232            header = b''.join([name, b': ', value, b'\r\n'])
233            self._sock.send(header)
234
235        self._sock.send(b'\r\n')
236
237    def _add_body_headers(self, headers, body):
238        """
239        Adds any headers needed for sending the request body. This will always
240        defer to the user-supplied header content.
241
242        :returns: One of (BODY_CHUNKED, BODY_FLAT), indicating what type of
243            request body should be used.
244        """
245        if b'content-length' in headers:
246            return BODY_FLAT
247
248        if b'chunked' in headers.get(b'transfer-encoding', []):
249            return BODY_CHUNKED
250
251        # For bytestring bodies we upload the content with a fixed length.
252        # For file objects, we use the length of the file object.
253        if isinstance(body, bytes):
254            length = str(len(body)).encode('utf-8')
255        elif hasattr(body, 'fileno'):
256            length = str(os.fstat(body.fileno()).st_size).encode('utf-8')
257        else:
258            length = None
259
260        if length:
261            headers[b'content-length'] = length
262            return BODY_FLAT
263
264        headers[b'transfer-encoding'] = b'chunked'
265        return BODY_CHUNKED
266
267    def _add_upgrade_headers(self, headers):
268        # Add HTTP Upgrade headers.
269        headers[b'connection'] = b'Upgrade, HTTP2-Settings'
270        headers[b'upgrade'] = H2C_PROTOCOL
271
272        # Encode SETTINGS frame payload in Base64 and put into the HTTP-2
273        # Settings header.
274        http2_settings = SettingsFrame(0)
275        http2_settings.settings[SettingsFrame.INITIAL_WINDOW_SIZE] = 65535
276        encoded_settings = base64.urlsafe_b64encode(
277            http2_settings.serialize_body()
278        )
279        headers[b'HTTP2-Settings'] = encoded_settings.rstrip(b'=')
280
281    def _send_body(self, body, body_type):
282        """
283        Handles the HTTP/1.1 logic for sending HTTP bodies. This does magical
284        different things in different cases.
285        """
286        if body_type == BODY_FLAT:
287            # Special case for files and other 'readable' objects.
288            if hasattr(body, 'read'):
289                return self._send_file_like_obj(body)
290
291            # Case for bytestrings.
292            elif isinstance(body, bytes):
293                self._sock.send(body)
294
295                return
296
297            # Iterables that set a specific content length.
298            elif isinstance(body, collections.Iterable):
299                for item in body:
300                    try:
301                        self._sock.send(item)
302                    except TypeError:
303                        raise ValueError(
304                            "Elements in iterable body must be bytestrings. "
305                            "Illegal element: {}".format(item)
306                        )
307                return
308
309            else:
310                raise ValueError(
311                    'Request body must be a bytestring, a file-like object '
312                    'returning bytestrings or an iterable of bytestrings. '
313                    'Got: {}'.format(type(body))
314                )
315
316        # Chunked!
317        return self._send_chunked(body)
318
319    def _send_chunked(self, body):
320        """
321        Handles the HTTP/1.1 logic for sending a chunk-encoded body.
322        """
323        # Chunked! For chunked bodies we don't special-case, we just iterate
324        # over what we have and send stuff out.
325        for chunk in body:
326            length = '{0:x}'.format(len(chunk)).encode('ascii')
327
328            # For now write this as four 'send' calls. That's probably
329            # inefficient, let's come back to it.
330            try:
331                self._sock.send(length)
332                self._sock.send(b'\r\n')
333                self._sock.send(chunk)
334                self._sock.send(b'\r\n')
335            except TypeError:
336                raise ValueError(
337                    "Iterable bodies must always iterate in bytestrings"
338                )
339
340        self._sock.send(b'0\r\n\r\n')
341        return
342
343    def _send_file_like_obj(self, fobj):
344        """
345        Handles streaming a file-like object to the network.
346        """
347        while True:
348            block = fobj.read(16*1024)
349            if not block:
350                break
351
352            try:
353                self._sock.send(block)
354            except TypeError:
355                raise ValueError(
356                    "File-like bodies must return bytestrings. Got: "
357                    "{}".format(type(block))
358                )
359
360        return
361
362    def close(self):
363        """
364        Closes the connection. This closes the socket and then abandons the
365        reference to it. After calling this method, any outstanding
366        :class:`Response <hyper.http11.response.Response>` objects will throw
367        exceptions if attempts are made to read their bodies.
368
369        In some cases this method will automatically be called.
370
371        .. warning:: This method should absolutely only be called when you are
372                     certain the connection object is no longer needed.
373        """
374        self._sock.close()
375        self._sock = None
376
377    # The following two methods are the implementation of the context manager
378    # protocol.
379    def __enter__(self):
380        return self
381
382    def __exit__(self, type, value, tb):
383        self.close()
384        return False  # Never swallow exceptions.
385