1# -*- test-case-name: twisted.web.test.test_newclient -*- 2# Copyright (c) Twisted Matrix Laboratories. 3# See LICENSE for details. 4 5""" 6An U{HTTP 1.1<http://www.w3.org/Protocols/rfc2616/rfc2616.html>} client. 7 8The way to use the functionality provided by this module is to: 9 10 - Connect a L{HTTP11ClientProtocol} to an HTTP server 11 - Create a L{Request} with the appropriate data 12 - Pass the request to L{HTTP11ClientProtocol.request} 13 - The returned Deferred will fire with a L{Response} object 14 - Create a L{IProtocol} provider which can handle the response body 15 - Connect it to the response with L{Response.deliverBody} 16 - When the protocol's C{connectionLost} method is called, the response is 17 complete. See L{Response.deliverBody} for details. 18 19Various other classes in this module support this usage: 20 21 - HTTPParser is the basic HTTP parser. It can handle the parts of HTTP which 22 are symmetric between requests and responses. 23 24 - HTTPClientParser extends HTTPParser to handle response-specific parts of 25 HTTP. One instance is created for each request to parse the corresponding 26 response. 27""" 28 29import re 30 31from zope.interface import implementer 32 33from twisted.internet.defer import ( 34 CancelledError, 35 Deferred, 36 fail, 37 maybeDeferred, 38 succeed, 39) 40from twisted.internet.error import ConnectionDone 41from twisted.internet.interfaces import IConsumer, IPushProducer 42from twisted.internet.protocol import Protocol 43from twisted.logger import Logger 44from twisted.protocols.basic import LineReceiver 45from twisted.python.compat import networkString 46from twisted.python.components import proxyForInterface 47from twisted.python.failure import Failure 48from twisted.python.reflect import fullyQualifiedName 49from twisted.web.http import ( 50 NO_CONTENT, 51 NOT_MODIFIED, 52 PotentialDataLoss, 53 _ChunkedTransferDecoder, 54 _DataLoss, 55 _IdentityTransferDecoder, 56) 57from twisted.web.http_headers import Headers 58from twisted.web.iweb import UNKNOWN_LENGTH, IClientRequest, IResponse 59 60# States HTTPParser can be in 61STATUS = "STATUS" 62HEADER = "HEADER" 63BODY = "BODY" 64DONE = "DONE" 65_moduleLog = Logger() 66 67 68class BadHeaders(Exception): 69 """ 70 Headers passed to L{Request} were in some way invalid. 71 """ 72 73 74class ExcessWrite(Exception): 75 """ 76 The body L{IBodyProducer} for a request tried to write data after 77 indicating it had finished writing data. 78 """ 79 80 81class ParseError(Exception): 82 """ 83 Some received data could not be parsed. 84 85 @ivar data: The string which could not be parsed. 86 """ 87 88 def __init__(self, reason, data): 89 Exception.__init__(self, reason, data) 90 self.data = data 91 92 93class BadResponseVersion(ParseError): 94 """ 95 The version string in a status line was unparsable. 96 """ 97 98 99class _WrapperException(Exception): 100 """ 101 L{_WrapperException} is the base exception type for exceptions which 102 include one or more other exceptions as the low-level causes. 103 104 @ivar reasons: A L{list} of one or more L{Failure} instances encountered 105 during an HTTP request. See subclass documentation for more details. 106 """ 107 108 def __init__(self, reasons): 109 Exception.__init__(self, reasons) 110 self.reasons = reasons 111 112 113class RequestGenerationFailed(_WrapperException): 114 """ 115 There was an error while creating the bytes which make up a request. 116 117 @ivar reasons: A C{list} of one or more L{Failure} instances giving the 118 reasons the request generation was considered to have failed. 119 """ 120 121 122class RequestTransmissionFailed(_WrapperException): 123 """ 124 There was an error while sending the bytes which make up a request. 125 126 @ivar reasons: A C{list} of one or more L{Failure} instances giving the 127 reasons the request transmission was considered to have failed. 128 """ 129 130 131class ConnectionAborted(Exception): 132 """ 133 The connection was explicitly aborted by application code. 134 """ 135 136 137class WrongBodyLength(Exception): 138 """ 139 An L{IBodyProducer} declared the number of bytes it was going to 140 produce (via its C{length} attribute) and then produced a different number 141 of bytes. 142 """ 143 144 145class ResponseDone(Exception): 146 """ 147 L{ResponseDone} may be passed to L{IProtocol.connectionLost} on the 148 protocol passed to L{Response.deliverBody} and indicates that the entire 149 response has been delivered. 150 """ 151 152 153class ResponseFailed(_WrapperException): 154 """ 155 L{ResponseFailed} indicates that all of the response to a request was not 156 received for some reason. 157 158 @ivar reasons: A C{list} of one or more L{Failure} instances giving the 159 reasons the response was considered to have failed. 160 161 @ivar response: If specified, the L{Response} received from the server (and 162 in particular the status code and the headers). 163 """ 164 165 def __init__(self, reasons, response=None): 166 _WrapperException.__init__(self, reasons) 167 self.response = response 168 169 170class ResponseNeverReceived(ResponseFailed): 171 """ 172 A L{ResponseFailed} that knows no response bytes at all have been received. 173 """ 174 175 176class RequestNotSent(Exception): 177 """ 178 L{RequestNotSent} indicates that an attempt was made to issue a request but 179 for reasons unrelated to the details of the request itself, the request 180 could not be sent. For example, this may indicate that an attempt was made 181 to send a request using a protocol which is no longer connected to a 182 server. 183 """ 184 185 186def _callAppFunction(function): 187 """ 188 Call C{function}. If it raises an exception, log it with a minimal 189 description of the source. 190 191 @return: L{None} 192 """ 193 try: 194 function() 195 except BaseException: 196 _moduleLog.failure( 197 "Unexpected exception from {name}", name=fullyQualifiedName(function) 198 ) 199 200 201class HTTPParser(LineReceiver): 202 """ 203 L{HTTPParser} handles the parsing side of HTTP processing. With a suitable 204 subclass, it can parse either the client side or the server side of the 205 connection. 206 207 @ivar headers: All of the non-connection control message headers yet 208 received. 209 210 @ivar state: State indicator for the response parsing state machine. One 211 of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}. 212 213 @ivar _partialHeader: L{None} or a C{list} of the lines of a multiline 214 header while that header is being received. 215 """ 216 217 # NOTE: According to HTTP spec, we're supposed to eat the 218 # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that 219 # doesn't sound like a good idea to me, because it makes it impossible to 220 # have a non-authenticating transparent proxy in front of an authenticating 221 # proxy. An authenticating proxy can eat them itself. -jknight 222 # 223 # Further, quoting 224 # http://homepages.tesco.net/J.deBoynePollard/FGA/web-proxy-connection-header.html 225 # regarding the 'Proxy-Connection' header: 226 # 227 # The Proxy-Connection: header is a mistake in how some web browsers 228 # use HTTP. Its name is the result of a false analogy. It is not a 229 # standard part of the protocol. There is a different standard 230 # protocol mechanism for doing what it does. And its existence 231 # imposes a requirement upon HTTP servers such that no proxy HTTP 232 # server can be standards-conforming in practice. 233 # 234 # -exarkun 235 236 # Some servers (like http://news.ycombinator.com/) return status lines and 237 # HTTP headers delimited by \n instead of \r\n. 238 delimiter = b"\n" 239 240 CONNECTION_CONTROL_HEADERS = { 241 b"content-length", 242 b"connection", 243 b"keep-alive", 244 b"te", 245 b"trailers", 246 b"transfer-encoding", 247 b"upgrade", 248 b"proxy-connection", 249 } 250 251 def connectionMade(self): 252 self.headers = Headers() 253 self.connHeaders = Headers() 254 self.state = STATUS 255 self._partialHeader = None 256 257 def switchToBodyMode(self, decoder): 258 """ 259 Switch to body parsing mode - interpret any more bytes delivered as 260 part of the message body and deliver them to the given decoder. 261 """ 262 if self.state == BODY: 263 raise RuntimeError("already in body mode") 264 265 self.bodyDecoder = decoder 266 self.state = BODY 267 self.setRawMode() 268 269 def lineReceived(self, line): 270 """ 271 Handle one line from a response. 272 """ 273 # Handle the normal CR LF case. 274 if line[-1:] == b"\r": 275 line = line[:-1] 276 277 if self.state == STATUS: 278 self.statusReceived(line) 279 self.state = HEADER 280 elif self.state == HEADER: 281 if not line or line[0] not in b" \t": 282 if self._partialHeader is not None: 283 header = b"".join(self._partialHeader) 284 name, value = header.split(b":", 1) 285 value = value.strip() 286 self.headerReceived(name, value) 287 if not line: 288 # Empty line means the header section is over. 289 self.allHeadersReceived() 290 else: 291 # Line not beginning with LWS is another header. 292 self._partialHeader = [line] 293 else: 294 # A line beginning with LWS is a continuation of a header 295 # begun on a previous line. 296 self._partialHeader.append(line) 297 298 def rawDataReceived(self, data): 299 """ 300 Pass data from the message body to the body decoder object. 301 """ 302 self.bodyDecoder.dataReceived(data) 303 304 def isConnectionControlHeader(self, name): 305 """ 306 Return C{True} if the given lower-cased name is the name of a 307 connection control header (rather than an entity header). 308 309 According to RFC 2616, section 14.10, the tokens in the Connection 310 header are probably relevant here. However, I am not sure what the 311 practical consequences of either implementing or ignoring that are. 312 So I leave it unimplemented for the time being. 313 """ 314 return name in self.CONNECTION_CONTROL_HEADERS 315 316 def statusReceived(self, status): 317 """ 318 Callback invoked whenever the first line of a new message is received. 319 Override this. 320 321 @param status: The first line of an HTTP request or response message 322 without trailing I{CR LF}. 323 @type status: C{bytes} 324 """ 325 326 def headerReceived(self, name, value): 327 """ 328 Store the given header in C{self.headers}. 329 """ 330 name = name.lower() 331 if self.isConnectionControlHeader(name): 332 headers = self.connHeaders 333 else: 334 headers = self.headers 335 headers.addRawHeader(name, value) 336 337 def allHeadersReceived(self): 338 """ 339 Callback invoked after the last header is passed to C{headerReceived}. 340 Override this to change to the C{BODY} or C{DONE} state. 341 """ 342 self.switchToBodyMode(None) 343 344 345class HTTPClientParser(HTTPParser): 346 """ 347 An HTTP parser which only handles HTTP responses. 348 349 @ivar request: The request with which the expected response is associated. 350 @type request: L{Request} 351 352 @ivar NO_BODY_CODES: A C{set} of response codes which B{MUST NOT} have a 353 body. 354 355 @ivar finisher: A callable to invoke when this response is fully parsed. 356 357 @ivar _responseDeferred: A L{Deferred} which will be called back with the 358 response when all headers in the response have been received. 359 Thereafter, L{None}. 360 361 @ivar _everReceivedData: C{True} if any bytes have been received. 362 """ 363 364 NO_BODY_CODES = {NO_CONTENT, NOT_MODIFIED} 365 366 _transferDecoders = { 367 b"chunked": _ChunkedTransferDecoder, 368 } 369 370 bodyDecoder = None 371 _log = Logger() 372 373 def __init__(self, request, finisher): 374 self.request = request 375 self.finisher = finisher 376 self._responseDeferred = Deferred() 377 self._everReceivedData = False 378 379 def dataReceived(self, data): 380 """ 381 Override so that we know if any response has been received. 382 """ 383 self._everReceivedData = True 384 HTTPParser.dataReceived(self, data) 385 386 def parseVersion(self, strversion): 387 """ 388 Parse version strings of the form Protocol '/' Major '.' Minor. E.g. 389 b'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError 390 on bad syntax. 391 """ 392 try: 393 proto, strnumber = strversion.split(b"/") 394 major, minor = strnumber.split(b".") 395 major, minor = int(major), int(minor) 396 except ValueError as e: 397 raise BadResponseVersion(str(e), strversion) 398 if major < 0 or minor < 0: 399 raise BadResponseVersion("version may not be negative", strversion) 400 return (proto, major, minor) 401 402 def statusReceived(self, status): 403 """ 404 Parse the status line into its components and create a response object 405 to keep track of this response's state. 406 """ 407 parts = status.split(b" ", 2) 408 if len(parts) == 2: 409 # Some broken servers omit the required `phrase` portion of 410 # `status-line`. One such server identified as 411 # "cloudflare-nginx". Others fail to identify themselves 412 # entirely. Fill in an empty phrase for such cases. 413 version, codeBytes = parts 414 phrase = b"" 415 elif len(parts) == 3: 416 version, codeBytes, phrase = parts 417 else: 418 raise ParseError("wrong number of parts", status) 419 420 try: 421 statusCode = int(codeBytes) 422 except ValueError: 423 raise ParseError("non-integer status code", status) 424 425 self.response = Response._construct( 426 self.parseVersion(version), 427 statusCode, 428 phrase, 429 self.headers, 430 self.transport, 431 self.request, 432 ) 433 434 def _finished(self, rest): 435 """ 436 Called to indicate that an entire response has been received. No more 437 bytes will be interpreted by this L{HTTPClientParser}. Extra bytes are 438 passed up and the state of this L{HTTPClientParser} is set to I{DONE}. 439 440 @param rest: A C{bytes} giving any extra bytes delivered to this 441 L{HTTPClientParser} which are not part of the response being 442 parsed. 443 """ 444 self.state = DONE 445 self.finisher(rest) 446 447 def isConnectionControlHeader(self, name): 448 """ 449 Content-Length in the response to a HEAD request is an entity header, 450 not a connection control header. 451 """ 452 if self.request.method == b"HEAD" and name == b"content-length": 453 return False 454 return HTTPParser.isConnectionControlHeader(self, name) 455 456 def allHeadersReceived(self): 457 """ 458 Figure out how long the response body is going to be by examining 459 headers and stuff. 460 """ 461 if 100 <= self.response.code < 200: 462 # RFC 7231 Section 6.2 says that if we receive a 1XX status code 463 # and aren't expecting it, we MAY ignore it. That's what we're 464 # going to do. We reset the parser here, but we leave 465 # _everReceivedData in its True state because we have, in fact, 466 # received data. 467 self._log.info( 468 "Ignoring unexpected {code} response", code=self.response.code 469 ) 470 self.connectionMade() 471 del self.response 472 return 473 474 if self.response.code in self.NO_BODY_CODES or self.request.method == b"HEAD": 475 self.response.length = 0 476 # The order of the next two lines might be of interest when adding 477 # support for pipelining. 478 self._finished(self.clearLineBuffer()) 479 self.response._bodyDataFinished() 480 else: 481 transferEncodingHeaders = self.connHeaders.getRawHeaders( 482 b"transfer-encoding" 483 ) 484 if transferEncodingHeaders: 485 486 # This could be a KeyError. However, that would mean we do not 487 # know how to decode the response body, so failing the request 488 # is as good a behavior as any. Perhaps someday we will want 489 # to normalize/document/test this specifically, but failing 490 # seems fine to me for now. 491 transferDecoder = self._transferDecoders[ 492 transferEncodingHeaders[0].lower() 493 ] 494 495 # If anyone ever invents a transfer encoding other than 496 # chunked (yea right), and that transfer encoding can predict 497 # the length of the response body, it might be sensible to 498 # allow the transfer decoder to set the response object's 499 # length attribute. 500 else: 501 contentLengthHeaders = self.connHeaders.getRawHeaders(b"content-length") 502 if contentLengthHeaders is None: 503 contentLength = None 504 elif len(contentLengthHeaders) == 1: 505 contentLength = int(contentLengthHeaders[0]) 506 self.response.length = contentLength 507 else: 508 # "HTTP Message Splitting" or "HTTP Response Smuggling" 509 # potentially happening. Or it's just a buggy server. 510 raise ValueError( 511 "Too many Content-Length headers; " "response is invalid" 512 ) 513 514 if contentLength == 0: 515 self._finished(self.clearLineBuffer()) 516 transferDecoder = None 517 else: 518 transferDecoder = lambda x, y: _IdentityTransferDecoder( 519 contentLength, x, y 520 ) 521 522 if transferDecoder is None: 523 self.response._bodyDataFinished() 524 else: 525 # Make sure as little data as possible from the response body 526 # gets delivered to the response object until the response 527 # object actually indicates it is ready to handle bytes 528 # (probably because an application gave it a way to interpret 529 # them). 530 self.transport.pauseProducing() 531 self.switchToBodyMode( 532 transferDecoder(self.response._bodyDataReceived, self._finished) 533 ) 534 535 # This must be last. If it were first, then application code might 536 # change some state (for example, registering a protocol to receive the 537 # response body). Then the pauseProducing above would be wrong since 538 # the response is ready for bytes and nothing else would ever resume 539 # the transport. 540 self._responseDeferred.callback(self.response) 541 del self._responseDeferred 542 543 def connectionLost(self, reason): 544 if self.bodyDecoder is not None: 545 try: 546 try: 547 self.bodyDecoder.noMoreData() 548 except PotentialDataLoss: 549 self.response._bodyDataFinished(Failure()) 550 except _DataLoss: 551 self.response._bodyDataFinished( 552 Failure(ResponseFailed([reason, Failure()], self.response)) 553 ) 554 else: 555 self.response._bodyDataFinished() 556 except BaseException: 557 # Handle exceptions from both the except suites and the else 558 # suite. Those functions really shouldn't raise exceptions, 559 # but maybe there's some buggy application code somewhere 560 # making things difficult. 561 self._log.failure("") 562 elif self.state != DONE: 563 if self._everReceivedData: 564 exceptionClass = ResponseFailed 565 else: 566 exceptionClass = ResponseNeverReceived 567 self._responseDeferred.errback(Failure(exceptionClass([reason]))) 568 del self._responseDeferred 569 570 571_VALID_METHOD = re.compile( 572 br"\A[%s]+\Z" 573 % ( 574 bytes().join( 575 ( 576 b"!", 577 b"#", 578 b"$", 579 b"%", 580 b"&", 581 b"'", 582 b"*", 583 b"+", 584 b"-", 585 b".", 586 b"^", 587 b"_", 588 b"`", 589 b"|", 590 b"~", 591 b"\x30-\x39", 592 b"\x41-\x5a", 593 b"\x61-\x7A", 594 ), 595 ), 596 ), 597) 598 599 600def _ensureValidMethod(method): 601 """ 602 An HTTP method is an HTTP token, which consists of any visible 603 ASCII character that is not a delimiter (i.e. one of 604 C{"(),/:;<=>?@[\\]{}}.) 605 606 @param method: the method to check 607 @type method: L{bytes} 608 609 @return: the method if it is valid 610 @rtype: L{bytes} 611 612 @raise ValueError: if the method is not valid 613 614 @see: U{https://tools.ietf.org/html/rfc7230#section-3.1.1}, 615 U{https://tools.ietf.org/html/rfc7230#section-3.2.6}, 616 U{https://tools.ietf.org/html/rfc5234#appendix-B.1} 617 """ 618 if _VALID_METHOD.match(method): 619 return method 620 raise ValueError(f"Invalid method {method!r}") 621 622 623_VALID_URI = re.compile(br"\A[\x21-\x7e]+\Z") 624 625 626def _ensureValidURI(uri): 627 """ 628 A valid URI cannot contain control characters (i.e., characters 629 between 0-32, inclusive and 127) or non-ASCII characters (i.e., 630 characters with values between 128-255, inclusive). 631 632 @param uri: the URI to check 633 @type uri: L{bytes} 634 635 @return: the URI if it is valid 636 @rtype: L{bytes} 637 638 @raise ValueError: if the URI is not valid 639 640 @see: U{https://tools.ietf.org/html/rfc3986#section-3.3}, 641 U{https://tools.ietf.org/html/rfc3986#appendix-A}, 642 U{https://tools.ietf.org/html/rfc5234#appendix-B.1} 643 """ 644 if _VALID_URI.match(uri): 645 return uri 646 raise ValueError(f"Invalid URI {uri!r}") 647 648 649@implementer(IClientRequest) 650class Request: 651 """ 652 A L{Request} instance describes an HTTP request to be sent to an HTTP 653 server. 654 655 @ivar method: See L{__init__}. 656 @ivar uri: See L{__init__}. 657 @ivar headers: See L{__init__}. 658 @ivar bodyProducer: See L{__init__}. 659 @ivar persistent: See L{__init__}. 660 661 @ivar _parsedURI: Parsed I{URI} for the request, or L{None}. 662 @type _parsedURI: L{twisted.web.client.URI} or L{None} 663 """ 664 665 _log = Logger() 666 667 def __init__(self, method, uri, headers, bodyProducer, persistent=False): 668 """ 669 @param method: The HTTP method for this request, ex: b'GET', b'HEAD', 670 b'POST', etc. 671 @type method: L{bytes} 672 673 @param uri: The relative URI of the resource to request. For example, 674 C{b'/foo/bar?baz=quux'}. 675 @type uri: L{bytes} 676 677 @param headers: Headers to be sent to the server. It is important to 678 note that this object does not create any implicit headers. So it 679 is up to the HTTP Client to add required headers such as 'Host'. 680 @type headers: L{twisted.web.http_headers.Headers} 681 682 @param bodyProducer: L{None} or an L{IBodyProducer} provider which 683 produces the content body to send to the remote HTTP server. 684 685 @param persistent: Set to C{True} when you use HTTP persistent 686 connection, defaults to C{False}. 687 @type persistent: L{bool} 688 """ 689 self.method = _ensureValidMethod(method) 690 self.uri = _ensureValidURI(uri) 691 self.headers = headers 692 self.bodyProducer = bodyProducer 693 self.persistent = persistent 694 self._parsedURI = None 695 696 @classmethod 697 def _construct( 698 cls, method, uri, headers, bodyProducer, persistent=False, parsedURI=None 699 ): 700 """ 701 Private constructor. 702 703 @param method: See L{__init__}. 704 @param uri: See L{__init__}. 705 @param headers: See L{__init__}. 706 @param bodyProducer: See L{__init__}. 707 @param persistent: See L{__init__}. 708 @param parsedURI: See L{Request._parsedURI}. 709 710 @return: L{Request} instance. 711 """ 712 request = cls(method, uri, headers, bodyProducer, persistent) 713 request._parsedURI = parsedURI 714 return request 715 716 @property 717 def absoluteURI(self): 718 """ 719 The absolute URI of the request as C{bytes}, or L{None} if the 720 absolute URI cannot be determined. 721 """ 722 return getattr(self._parsedURI, "toBytes", lambda: None)() 723 724 def _writeHeaders(self, transport, TEorCL): 725 hosts = self.headers.getRawHeaders(b"host", ()) 726 if len(hosts) != 1: 727 raise BadHeaders("Exactly one Host header required") 728 729 # In the future, having the protocol version be a parameter to this 730 # method would probably be good. It would be nice if this method 731 # weren't limited to issuing HTTP/1.1 requests. 732 requestLines = [] 733 requestLines.append( 734 b" ".join( 735 [ 736 _ensureValidMethod(self.method), 737 _ensureValidURI(self.uri), 738 b"HTTP/1.1\r\n", 739 ] 740 ), 741 ) 742 if not self.persistent: 743 requestLines.append(b"Connection: close\r\n") 744 if TEorCL is not None: 745 requestLines.append(TEorCL) 746 for name, values in self.headers.getAllRawHeaders(): 747 requestLines.extend([name + b": " + v + b"\r\n" for v in values]) 748 requestLines.append(b"\r\n") 749 transport.writeSequence(requestLines) 750 751 def _writeToBodyProducerChunked(self, transport): 752 """ 753 Write this request to the given transport using chunked 754 transfer-encoding to frame the body. 755 756 @param transport: See L{writeTo}. 757 @return: See L{writeTo}. 758 """ 759 self._writeHeaders(transport, b"Transfer-Encoding: chunked\r\n") 760 encoder = ChunkedEncoder(transport) 761 encoder.registerProducer(self.bodyProducer, True) 762 d = self.bodyProducer.startProducing(encoder) 763 764 def cbProduced(ignored): 765 encoder.unregisterProducer() 766 767 def ebProduced(err): 768 encoder._allowNoMoreWrites() 769 # Don't call the encoder's unregisterProducer because it will write 770 # a zero-length chunk. This would indicate to the server that the 771 # request body is complete. There was an error, though, so we 772 # don't want to do that. 773 transport.unregisterProducer() 774 return err 775 776 d.addCallbacks(cbProduced, ebProduced) 777 return d 778 779 def _writeToBodyProducerContentLength(self, transport): 780 """ 781 Write this request to the given transport using content-length to frame 782 the body. 783 784 @param transport: See L{writeTo}. 785 @return: See L{writeTo}. 786 """ 787 self._writeHeaders( 788 transport, 789 networkString("Content-Length: %d\r\n" % (self.bodyProducer.length,)), 790 ) 791 792 # This Deferred is used to signal an error in the data written to the 793 # encoder below. It can only errback and it will only do so before too 794 # many bytes have been written to the encoder and before the producer 795 # Deferred fires. 796 finishedConsuming = Deferred() 797 798 # This makes sure the producer writes the correct number of bytes for 799 # the request body. 800 encoder = LengthEnforcingConsumer( 801 self.bodyProducer, transport, finishedConsuming 802 ) 803 804 transport.registerProducer(self.bodyProducer, True) 805 806 finishedProducing = self.bodyProducer.startProducing(encoder) 807 808 def combine(consuming, producing): 809 # This Deferred is returned and will be fired when the first of 810 # consuming or producing fires. If it's cancelled, forward that 811 # cancellation to the producer. 812 def cancelConsuming(ign): 813 finishedProducing.cancel() 814 815 ultimate = Deferred(cancelConsuming) 816 817 # Keep track of what has happened so far. This initially 818 # contains None, then an integer uniquely identifying what 819 # sequence of events happened. See the callbacks and errbacks 820 # defined below for the meaning of each value. 821 state = [None] 822 823 def ebConsuming(err): 824 if state == [None]: 825 # The consuming Deferred failed first. This means the 826 # overall writeTo Deferred is going to errback now. The 827 # producing Deferred should not fire later (because the 828 # consumer should have called stopProducing on the 829 # producer), but if it does, a callback will be ignored 830 # and an errback will be logged. 831 state[0] = 1 832 ultimate.errback(err) 833 else: 834 # The consuming Deferred errbacked after the producing 835 # Deferred fired. This really shouldn't ever happen. 836 # If it does, I goofed. Log the error anyway, just so 837 # there's a chance someone might notice and complain. 838 self._log.failure( 839 "Buggy state machine in {request}/[{state}]: " 840 "ebConsuming called", 841 failure=err, 842 request=repr(self), 843 state=state[0], 844 ) 845 846 def cbProducing(result): 847 if state == [None]: 848 # The producing Deferred succeeded first. Nothing will 849 # ever happen to the consuming Deferred. Tell the 850 # encoder we're done so it can check what the producer 851 # wrote and make sure it was right. 852 state[0] = 2 853 try: 854 encoder._noMoreWritesExpected() 855 except BaseException: 856 # Fail the overall writeTo Deferred - something the 857 # producer did was wrong. 858 ultimate.errback() 859 else: 860 # Success - succeed the overall writeTo Deferred. 861 ultimate.callback(None) 862 # Otherwise, the consuming Deferred already errbacked. The 863 # producing Deferred wasn't supposed to fire, but it did 864 # anyway. It's buggy, but there's not really anything to be 865 # done about it. Just ignore this result. 866 867 def ebProducing(err): 868 if state == [None]: 869 # The producing Deferred failed first. This means the 870 # overall writeTo Deferred is going to errback now. 871 # Tell the encoder that we're done so it knows to reject 872 # further writes from the producer (which should not 873 # happen, but the producer may be buggy). 874 state[0] = 3 875 encoder._allowNoMoreWrites() 876 ultimate.errback(err) 877 else: 878 # The producing Deferred failed after the consuming 879 # Deferred failed. It shouldn't have, so it's buggy. 880 # Log the exception in case anyone who can fix the code 881 # is watching. 882 self._log.failure("Producer is buggy", failure=err) 883 884 consuming.addErrback(ebConsuming) 885 producing.addCallbacks(cbProducing, ebProducing) 886 887 return ultimate 888 889 d = combine(finishedConsuming, finishedProducing) 890 891 def f(passthrough): 892 # Regardless of what happens with the overall Deferred, once it 893 # fires, the producer registered way up above the definition of 894 # combine should be unregistered. 895 transport.unregisterProducer() 896 return passthrough 897 898 d.addBoth(f) 899 return d 900 901 def _writeToEmptyBodyContentLength(self, transport): 902 """ 903 Write this request to the given transport using content-length to frame 904 the (empty) body. 905 906 @param transport: See L{writeTo}. 907 @return: See L{writeTo}. 908 """ 909 self._writeHeaders(transport, b"Content-Length: 0\r\n") 910 return succeed(None) 911 912 def writeTo(self, transport): 913 """ 914 Format this L{Request} as an HTTP/1.1 request and write it to the given 915 transport. If bodyProducer is not None, it will be associated with an 916 L{IConsumer}. 917 918 @param transport: The transport to which to write. 919 @type transport: L{twisted.internet.interfaces.ITransport} provider 920 921 @return: A L{Deferred} which fires with L{None} when the request has 922 been completely written to the transport or with a L{Failure} if 923 there is any problem generating the request bytes. 924 """ 925 if self.bodyProducer is None: 926 # If the method semantics anticipate a body, include a 927 # Content-Length even if it is 0. 928 # https://tools.ietf.org/html/rfc7230#section-3.3.2 929 if self.method in (b"PUT", b"POST"): 930 self._writeToEmptyBodyContentLength(transport) 931 else: 932 self._writeHeaders(transport, None) 933 elif self.bodyProducer.length is UNKNOWN_LENGTH: 934 return self._writeToBodyProducerChunked(transport) 935 else: 936 return self._writeToBodyProducerContentLength(transport) 937 938 def stopWriting(self): 939 """ 940 Stop writing this request to the transport. This can only be called 941 after C{writeTo} and before the L{Deferred} returned by C{writeTo} 942 fires. It should cancel any asynchronous task started by C{writeTo}. 943 The L{Deferred} returned by C{writeTo} need not be fired if this method 944 is called. 945 """ 946 # If bodyProducer is None, then the Deferred returned by writeTo has 947 # fired already and this method cannot be called. 948 _callAppFunction(self.bodyProducer.stopProducing) 949 950 951class LengthEnforcingConsumer: 952 """ 953 An L{IConsumer} proxy which enforces an exact length requirement on the 954 total data written to it. 955 956 @ivar _length: The number of bytes remaining to be written. 957 958 @ivar _producer: The L{IBodyProducer} which is writing to this 959 consumer. 960 961 @ivar _consumer: The consumer to which at most C{_length} bytes will be 962 forwarded. 963 964 @ivar _finished: A L{Deferred} which will be fired with a L{Failure} if too 965 many bytes are written to this consumer. 966 """ 967 968 def __init__(self, producer, consumer, finished): 969 self._length = producer.length 970 self._producer = producer 971 self._consumer = consumer 972 self._finished = finished 973 974 def _allowNoMoreWrites(self): 975 """ 976 Indicate that no additional writes are allowed. Attempts to write 977 after calling this method will be met with an exception. 978 """ 979 self._finished = None 980 981 def write(self, bytes): 982 """ 983 Write C{bytes} to the underlying consumer unless 984 C{_noMoreWritesExpected} has been called or there are/have been too 985 many bytes. 986 """ 987 if self._finished is None: 988 # No writes are supposed to happen any more. Try to convince the 989 # calling code to stop calling this method by calling its 990 # stopProducing method and then throwing an exception at it. This 991 # exception isn't documented as part of the API because you're 992 # never supposed to expect it: only buggy code will ever receive 993 # it. 994 self._producer.stopProducing() 995 raise ExcessWrite() 996 997 if len(bytes) <= self._length: 998 self._length -= len(bytes) 999 self._consumer.write(bytes) 1000 else: 1001 # No synchronous exception is raised in *this* error path because 1002 # we still have _finished which we can use to report the error to a 1003 # better place than the direct caller of this method (some 1004 # arbitrary application code). 1005 _callAppFunction(self._producer.stopProducing) 1006 self._finished.errback(WrongBodyLength("too many bytes written")) 1007 self._allowNoMoreWrites() 1008 1009 def _noMoreWritesExpected(self): 1010 """ 1011 Called to indicate no more bytes will be written to this consumer. 1012 Check to see that the correct number have been written. 1013 1014 @raise WrongBodyLength: If not enough bytes have been written. 1015 """ 1016 if self._finished is not None: 1017 self._allowNoMoreWrites() 1018 if self._length: 1019 raise WrongBodyLength("too few bytes written") 1020 1021 1022def makeStatefulDispatcher(name, template): 1023 """ 1024 Given a I{dispatch} name and a function, return a function which can be 1025 used as a method and which, when called, will call another method defined 1026 on the instance and return the result. The other method which is called is 1027 determined by the value of the C{_state} attribute of the instance. 1028 1029 @param name: A string which is used to construct the name of the subsidiary 1030 method to invoke. The subsidiary method is named like C{'_%s_%s' % 1031 (name, _state)}. 1032 1033 @param template: A function object which is used to give the returned 1034 function a docstring. 1035 1036 @return: The dispatcher function. 1037 """ 1038 1039 def dispatcher(self, *args, **kwargs): 1040 func = getattr(self, "_" + name + "_" + self._state, None) 1041 if func is None: 1042 raise RuntimeError(f"{self!r} has no {name} method in state {self._state}") 1043 return func(*args, **kwargs) 1044 1045 dispatcher.__doc__ = template.__doc__ 1046 return dispatcher 1047 1048 1049# This proxy class is used only in the private constructor of the Response 1050# class below, in order to prevent users relying on any property of the 1051# concrete request object: they can only use what is provided by 1052# IClientRequest. 1053_ClientRequestProxy = proxyForInterface(IClientRequest) 1054 1055 1056@implementer(IResponse) 1057class Response: 1058 """ 1059 A L{Response} instance describes an HTTP response received from an HTTP 1060 server. 1061 1062 L{Response} should not be subclassed or instantiated. 1063 1064 @ivar _transport: See L{__init__}. 1065 1066 @ivar _bodyProtocol: The L{IProtocol} provider to which the body is 1067 delivered. L{None} before one has been registered with 1068 C{deliverBody}. 1069 1070 @ivar _bodyBuffer: A C{list} of the strings passed to C{bodyDataReceived} 1071 before C{deliverBody} is called. L{None} afterwards. 1072 1073 @ivar _state: Indicates what state this L{Response} instance is in, 1074 particularly with respect to delivering bytes from the response body 1075 to an application-supplied protocol object. This may be one of 1076 C{'INITIAL'}, C{'CONNECTED'}, C{'DEFERRED_CLOSE'}, or C{'FINISHED'}, 1077 with the following meanings: 1078 1079 - INITIAL: This is the state L{Response} objects start in. No 1080 protocol has yet been provided and the underlying transport may 1081 still have bytes to deliver to it. 1082 1083 - DEFERRED_CLOSE: If the underlying transport indicates all bytes 1084 have been delivered but no application-provided protocol is yet 1085 available, the L{Response} moves to this state. Data is 1086 buffered and waiting for a protocol to be delivered to. 1087 1088 - CONNECTED: If a protocol is provided when the state is INITIAL, 1089 the L{Response} moves to this state. Any buffered data is 1090 delivered and any data which arrives from the transport 1091 subsequently is given directly to the protocol. 1092 1093 - FINISHED: If a protocol is provided in the DEFERRED_CLOSE state, 1094 the L{Response} moves to this state after delivering all 1095 buffered data to the protocol. Otherwise, if the L{Response} is 1096 in the CONNECTED state, if the transport indicates there is no 1097 more data, the L{Response} moves to this state. Nothing else 1098 can happen once the L{Response} is in this state. 1099 @type _state: C{str} 1100 """ 1101 1102 length = UNKNOWN_LENGTH 1103 1104 _bodyProtocol = None 1105 _bodyFinished = False 1106 1107 def __init__(self, version, code, phrase, headers, _transport): 1108 """ 1109 @param version: HTTP version components protocol, major, minor. E.g. 1110 C{(b'HTTP', 1, 1)} to mean C{b'HTTP/1.1'}. 1111 1112 @param code: HTTP status code. 1113 @type code: L{int} 1114 1115 @param phrase: HTTP reason phrase, intended to give a short description 1116 of the HTTP status code. 1117 1118 @param headers: HTTP response headers. 1119 @type headers: L{twisted.web.http_headers.Headers} 1120 1121 @param _transport: The transport which is delivering this response. 1122 """ 1123 self.version = version 1124 self.code = code 1125 self.phrase = phrase 1126 self.headers = headers 1127 self._transport = _transport 1128 self._bodyBuffer = [] 1129 self._state = "INITIAL" 1130 self.request = None 1131 self.previousResponse = None 1132 1133 @classmethod 1134 def _construct(cls, version, code, phrase, headers, _transport, request): 1135 """ 1136 Private constructor. 1137 1138 @param version: See L{__init__}. 1139 @param code: See L{__init__}. 1140 @param phrase: See L{__init__}. 1141 @param headers: See L{__init__}. 1142 @param _transport: See L{__init__}. 1143 @param request: See L{IResponse.request}. 1144 1145 @return: L{Response} instance. 1146 """ 1147 response = Response(version, code, phrase, headers, _transport) 1148 response.request = _ClientRequestProxy(request) 1149 return response 1150 1151 def setPreviousResponse(self, previousResponse): 1152 self.previousResponse = previousResponse 1153 1154 def deliverBody(self, protocol): 1155 """ 1156 Dispatch the given L{IProtocol} depending of the current state of the 1157 response. 1158 """ 1159 1160 deliverBody = makeStatefulDispatcher("deliverBody", deliverBody) 1161 1162 def _deliverBody_INITIAL(self, protocol): 1163 """ 1164 Deliver any buffered data to C{protocol} and prepare to deliver any 1165 future data to it. Move to the C{'CONNECTED'} state. 1166 """ 1167 protocol.makeConnection(self._transport) 1168 self._bodyProtocol = protocol 1169 for data in self._bodyBuffer: 1170 self._bodyProtocol.dataReceived(data) 1171 self._bodyBuffer = None 1172 1173 self._state = "CONNECTED" 1174 1175 # Now that there's a protocol to consume the body, resume the 1176 # transport. It was previously paused by HTTPClientParser to avoid 1177 # reading too much data before it could be handled. We need to do this 1178 # after we transition our state as it may recursively lead to more data 1179 # being delivered, or even the body completing. 1180 self._transport.resumeProducing() 1181 1182 def _deliverBody_CONNECTED(self, protocol): 1183 """ 1184 It is invalid to attempt to deliver data to a protocol when it is 1185 already being delivered to another protocol. 1186 """ 1187 raise RuntimeError( 1188 "Response already has protocol %r, cannot deliverBody " 1189 "again" % (self._bodyProtocol,) 1190 ) 1191 1192 def _deliverBody_DEFERRED_CLOSE(self, protocol): 1193 """ 1194 Deliver any buffered data to C{protocol} and then disconnect the 1195 protocol. Move to the C{'FINISHED'} state. 1196 """ 1197 # Unlike _deliverBody_INITIAL, there is no need to resume the 1198 # transport here because all of the response data has been received 1199 # already. Some higher level code may want to resume the transport if 1200 # that code expects further data to be received over it. 1201 1202 protocol.makeConnection(self._transport) 1203 1204 for data in self._bodyBuffer: 1205 protocol.dataReceived(data) 1206 self._bodyBuffer = None 1207 protocol.connectionLost(self._reason) 1208 self._state = "FINISHED" 1209 1210 def _deliverBody_FINISHED(self, protocol): 1211 """ 1212 It is invalid to attempt to deliver data to a protocol after the 1213 response body has been delivered to another protocol. 1214 """ 1215 raise RuntimeError("Response already finished, cannot deliverBody now.") 1216 1217 def _bodyDataReceived(self, data): 1218 """ 1219 Called by HTTPClientParser with chunks of data from the response body. 1220 They will be buffered or delivered to the protocol passed to 1221 deliverBody. 1222 """ 1223 1224 _bodyDataReceived = makeStatefulDispatcher("bodyDataReceived", _bodyDataReceived) 1225 1226 def _bodyDataReceived_INITIAL(self, data): 1227 """ 1228 Buffer any data received for later delivery to a protocol passed to 1229 C{deliverBody}. 1230 1231 Little or no data should be buffered by this method, since the 1232 transport has been paused and will not be resumed until a protocol 1233 is supplied. 1234 """ 1235 self._bodyBuffer.append(data) 1236 1237 def _bodyDataReceived_CONNECTED(self, data): 1238 """ 1239 Deliver any data received to the protocol to which this L{Response} 1240 is connected. 1241 """ 1242 self._bodyProtocol.dataReceived(data) 1243 1244 def _bodyDataReceived_DEFERRED_CLOSE(self, data): 1245 """ 1246 It is invalid for data to be delivered after it has been indicated 1247 that the response body has been completely delivered. 1248 """ 1249 raise RuntimeError("Cannot receive body data after _bodyDataFinished") 1250 1251 def _bodyDataReceived_FINISHED(self, data): 1252 """ 1253 It is invalid for data to be delivered after the response body has 1254 been delivered to a protocol. 1255 """ 1256 raise RuntimeError("Cannot receive body data after " "protocol disconnected") 1257 1258 def _bodyDataFinished(self, reason=None): 1259 """ 1260 Called by HTTPClientParser when no more body data is available. If the 1261 optional reason is supplied, this indicates a problem or potential 1262 problem receiving all of the response body. 1263 """ 1264 1265 _bodyDataFinished = makeStatefulDispatcher("bodyDataFinished", _bodyDataFinished) 1266 1267 def _bodyDataFinished_INITIAL(self, reason=None): 1268 """ 1269 Move to the C{'DEFERRED_CLOSE'} state to wait for a protocol to 1270 which to deliver the response body. 1271 """ 1272 self._state = "DEFERRED_CLOSE" 1273 if reason is None: 1274 reason = Failure(ResponseDone("Response body fully received")) 1275 self._reason = reason 1276 1277 def _bodyDataFinished_CONNECTED(self, reason=None): 1278 """ 1279 Disconnect the protocol and move to the C{'FINISHED'} state. 1280 """ 1281 if reason is None: 1282 reason = Failure(ResponseDone("Response body fully received")) 1283 self._bodyProtocol.connectionLost(reason) 1284 self._bodyProtocol = None 1285 self._state = "FINISHED" 1286 1287 def _bodyDataFinished_DEFERRED_CLOSE(self): 1288 """ 1289 It is invalid to attempt to notify the L{Response} of the end of the 1290 response body data more than once. 1291 """ 1292 raise RuntimeError("Cannot finish body data more than once") 1293 1294 def _bodyDataFinished_FINISHED(self): 1295 """ 1296 It is invalid to attempt to notify the L{Response} of the end of the 1297 response body data more than once. 1298 """ 1299 raise RuntimeError("Cannot finish body data after " "protocol disconnected") 1300 1301 1302@implementer(IConsumer) 1303class ChunkedEncoder: 1304 """ 1305 Helper object which exposes L{IConsumer} on top of L{HTTP11ClientProtocol} 1306 for streaming request bodies to the server. 1307 """ 1308 1309 def __init__(self, transport): 1310 self.transport = transport 1311 1312 def _allowNoMoreWrites(self): 1313 """ 1314 Indicate that no additional writes are allowed. Attempts to write 1315 after calling this method will be met with an exception. 1316 """ 1317 self.transport = None 1318 1319 def registerProducer(self, producer, streaming): 1320 """ 1321 Register the given producer with C{self.transport}. 1322 """ 1323 self.transport.registerProducer(producer, streaming) 1324 1325 def write(self, data): 1326 """ 1327 Write the given request body bytes to the transport using chunked 1328 encoding. 1329 1330 @type data: C{bytes} 1331 """ 1332 if self.transport is None: 1333 raise ExcessWrite() 1334 self.transport.writeSequence( 1335 (networkString("%x\r\n" % len(data)), data, b"\r\n") 1336 ) 1337 1338 def unregisterProducer(self): 1339 """ 1340 Indicate that the request body is complete and finish the request. 1341 """ 1342 self.write(b"") 1343 self.transport.unregisterProducer() 1344 self._allowNoMoreWrites() 1345 1346 1347@implementer(IPushProducer) 1348class TransportProxyProducer: 1349 """ 1350 An L{twisted.internet.interfaces.IPushProducer} implementation which 1351 wraps another such thing and proxies calls to it until it is told to stop. 1352 1353 @ivar _producer: The wrapped L{twisted.internet.interfaces.IPushProducer} 1354 provider or L{None} after this proxy has been stopped. 1355 """ 1356 1357 # LineReceiver uses this undocumented attribute of transports to decide 1358 # when to stop calling lineReceived or rawDataReceived (if it finds it to 1359 # be true, it doesn't bother to deliver any more data). Set disconnecting 1360 # to False here and never change it to true so that all data is always 1361 # delivered to us and so that LineReceiver doesn't fail with an 1362 # AttributeError. 1363 disconnecting = False 1364 1365 def __init__(self, producer): 1366 self._producer = producer 1367 1368 def stopProxying(self): 1369 """ 1370 Stop forwarding calls of L{twisted.internet.interfaces.IPushProducer} 1371 methods to the underlying L{twisted.internet.interfaces.IPushProducer} 1372 provider. 1373 """ 1374 self._producer = None 1375 1376 def stopProducing(self): 1377 """ 1378 Proxy the stoppage to the underlying producer, unless this proxy has 1379 been stopped. 1380 """ 1381 if self._producer is not None: 1382 self._producer.stopProducing() 1383 1384 def resumeProducing(self): 1385 """ 1386 Proxy the resumption to the underlying producer, unless this proxy has 1387 been stopped. 1388 """ 1389 if self._producer is not None: 1390 self._producer.resumeProducing() 1391 1392 def pauseProducing(self): 1393 """ 1394 Proxy the pause to the underlying producer, unless this proxy has been 1395 stopped. 1396 """ 1397 if self._producer is not None: 1398 self._producer.pauseProducing() 1399 1400 def loseConnection(self): 1401 """ 1402 Proxy the request to lose the connection to the underlying producer, 1403 unless this proxy has been stopped. 1404 """ 1405 if self._producer is not None: 1406 self._producer.loseConnection() 1407 1408 1409class HTTP11ClientProtocol(Protocol): 1410 """ 1411 L{HTTP11ClientProtocol} is an implementation of the HTTP 1.1 client 1412 protocol. It supports as few features as possible. 1413 1414 @ivar _parser: After a request is issued, the L{HTTPClientParser} to 1415 which received data making up the response to that request is 1416 delivered. 1417 1418 @ivar _finishedRequest: After a request is issued, the L{Deferred} which 1419 will fire when a L{Response} object corresponding to that request is 1420 available. This allows L{HTTP11ClientProtocol} to fail the request 1421 if there is a connection or parsing problem. 1422 1423 @ivar _currentRequest: After a request is issued, the L{Request} 1424 instance used to make that request. This allows 1425 L{HTTP11ClientProtocol} to stop request generation if necessary (for 1426 example, if the connection is lost). 1427 1428 @ivar _transportProxy: After a request is issued, the 1429 L{TransportProxyProducer} to which C{_parser} is connected. This 1430 allows C{_parser} to pause and resume the transport in a way which 1431 L{HTTP11ClientProtocol} can exert some control over. 1432 1433 @ivar _responseDeferred: After a request is issued, the L{Deferred} from 1434 C{_parser} which will fire with a L{Response} when one has been 1435 received. This is eventually chained with C{_finishedRequest}, but 1436 only in certain cases to avoid double firing that Deferred. 1437 1438 @ivar _state: Indicates what state this L{HTTP11ClientProtocol} instance 1439 is in with respect to transmission of a request and reception of a 1440 response. This may be one of the following strings: 1441 1442 - QUIESCENT: This is the state L{HTTP11ClientProtocol} instances 1443 start in. Nothing is happening: no request is being sent and no 1444 response is being received or expected. 1445 1446 - TRANSMITTING: When a request is made (via L{request}), the 1447 instance moves to this state. L{Request.writeTo} has been used 1448 to start to send a request but it has not yet finished. 1449 1450 - TRANSMITTING_AFTER_RECEIVING_RESPONSE: The server has returned a 1451 complete response but the request has not yet been fully sent 1452 yet. The instance will remain in this state until the request 1453 is fully sent. 1454 1455 - GENERATION_FAILED: There was an error while the request. The 1456 request was not fully sent to the network. 1457 1458 - WAITING: The request was fully sent to the network. The 1459 instance is now waiting for the response to be fully received. 1460 1461 - ABORTING: Application code has requested that the HTTP connection 1462 be aborted. 1463 1464 - CONNECTION_LOST: The connection has been lost. 1465 @type _state: C{str} 1466 1467 @ivar _abortDeferreds: A list of C{Deferred} instances that will fire when 1468 the connection is lost. 1469 """ 1470 1471 _state = "QUIESCENT" 1472 _parser = None 1473 _finishedRequest = None 1474 _currentRequest = None 1475 _transportProxy = None 1476 _responseDeferred = None 1477 _log = Logger() 1478 1479 def __init__(self, quiescentCallback=lambda c: None): 1480 self._quiescentCallback = quiescentCallback 1481 self._abortDeferreds = [] 1482 1483 @property 1484 def state(self): 1485 return self._state 1486 1487 def request(self, request): 1488 """ 1489 Issue C{request} over C{self.transport} and return a L{Deferred} which 1490 will fire with a L{Response} instance or an error. 1491 1492 @param request: The object defining the parameters of the request to 1493 issue. 1494 @type request: L{Request} 1495 1496 @rtype: L{Deferred} 1497 @return: The deferred may errback with L{RequestGenerationFailed} if 1498 the request was not fully written to the transport due to a local 1499 error. It may errback with L{RequestTransmissionFailed} if it was 1500 not fully written to the transport due to a network error. It may 1501 errback with L{ResponseFailed} if the request was sent (not 1502 necessarily received) but some or all of the response was lost. It 1503 may errback with L{RequestNotSent} if it is not possible to send 1504 any more requests using this L{HTTP11ClientProtocol}. 1505 """ 1506 if self._state != "QUIESCENT": 1507 return fail(RequestNotSent()) 1508 1509 self._state = "TRANSMITTING" 1510 _requestDeferred = maybeDeferred(request.writeTo, self.transport) 1511 1512 def cancelRequest(ign): 1513 # Explicitly cancel the request's deferred if it's still trying to 1514 # write when this request is cancelled. 1515 if self._state in ("TRANSMITTING", "TRANSMITTING_AFTER_RECEIVING_RESPONSE"): 1516 _requestDeferred.cancel() 1517 else: 1518 self.transport.abortConnection() 1519 self._disconnectParser(Failure(CancelledError())) 1520 1521 self._finishedRequest = Deferred(cancelRequest) 1522 1523 # Keep track of the Request object in case we need to call stopWriting 1524 # on it. 1525 self._currentRequest = request 1526 1527 self._transportProxy = TransportProxyProducer(self.transport) 1528 self._parser = HTTPClientParser(request, self._finishResponse) 1529 self._parser.makeConnection(self._transportProxy) 1530 self._responseDeferred = self._parser._responseDeferred 1531 1532 def cbRequestWritten(ignored): 1533 if self._state == "TRANSMITTING": 1534 self._state = "WAITING" 1535 self._responseDeferred.chainDeferred(self._finishedRequest) 1536 1537 def ebRequestWriting(err): 1538 if self._state == "TRANSMITTING": 1539 self._state = "GENERATION_FAILED" 1540 self.transport.abortConnection() 1541 self._finishedRequest.errback(Failure(RequestGenerationFailed([err]))) 1542 else: 1543 self._log.failure( 1544 "Error writing request, but not in valid state " 1545 "to finalize request: {state}", 1546 failure=err, 1547 state=self._state, 1548 ) 1549 1550 _requestDeferred.addCallbacks(cbRequestWritten, ebRequestWriting) 1551 1552 return self._finishedRequest 1553 1554 def _finishResponse(self, rest): 1555 """ 1556 Called by an L{HTTPClientParser} to indicate that it has parsed a 1557 complete response. 1558 1559 @param rest: A C{bytes} giving any trailing bytes which were given to 1560 the L{HTTPClientParser} which were not part of the response it 1561 was parsing. 1562 """ 1563 1564 _finishResponse = makeStatefulDispatcher("finishResponse", _finishResponse) 1565 1566 def _finishResponse_WAITING(self, rest): 1567 # Currently the rest parameter is ignored. Don't forget to use it if 1568 # we ever add support for pipelining. And maybe check what trailers 1569 # mean. 1570 if self._state == "WAITING": 1571 self._state = "QUIESCENT" 1572 else: 1573 # The server sent the entire response before we could send the 1574 # whole request. That sucks. Oh well. Fire the request() 1575 # Deferred with the response. But first, make sure that if the 1576 # request does ever finish being written that it won't try to fire 1577 # that Deferred. 1578 self._state = "TRANSMITTING_AFTER_RECEIVING_RESPONSE" 1579 self._responseDeferred.chainDeferred(self._finishedRequest) 1580 1581 # This will happen if we're being called due to connection being lost; 1582 # if so, no need to disconnect parser again, or to call 1583 # _quiescentCallback. 1584 if self._parser is None: 1585 return 1586 1587 reason = ConnectionDone("synthetic!") 1588 connHeaders = self._parser.connHeaders.getRawHeaders(b"connection", ()) 1589 if ( 1590 (b"close" in connHeaders) 1591 or self._state != "QUIESCENT" 1592 or not self._currentRequest.persistent 1593 ): 1594 self._giveUp(Failure(reason)) 1595 else: 1596 # Just in case we had paused the transport, resume it before 1597 # considering it quiescent again. 1598 self.transport.resumeProducing() 1599 1600 # We call the quiescent callback first, to ensure connection gets 1601 # added back to connection pool before we finish the request. 1602 try: 1603 self._quiescentCallback(self) 1604 except BaseException: 1605 # If callback throws exception, just log it and disconnect; 1606 # keeping persistent connections around is an optimisation: 1607 self._log.failure("") 1608 self.transport.loseConnection() 1609 self._disconnectParser(reason) 1610 1611 _finishResponse_TRANSMITTING = _finishResponse_WAITING 1612 1613 def _disconnectParser(self, reason): 1614 """ 1615 If there is still a parser, call its C{connectionLost} method with the 1616 given reason. If there is not, do nothing. 1617 1618 @type reason: L{Failure} 1619 """ 1620 if self._parser is not None: 1621 parser = self._parser 1622 self._parser = None 1623 self._currentRequest = None 1624 self._finishedRequest = None 1625 self._responseDeferred = None 1626 1627 # The parser is no longer allowed to do anything to the real 1628 # transport. Stop proxying from the parser's transport to the real 1629 # transport before telling the parser it's done so that it can't do 1630 # anything. 1631 self._transportProxy.stopProxying() 1632 self._transportProxy = None 1633 parser.connectionLost(reason) 1634 1635 def _giveUp(self, reason): 1636 """ 1637 Lose the underlying connection and disconnect the parser with the given 1638 L{Failure}. 1639 1640 Use this method instead of calling the transport's loseConnection 1641 method directly otherwise random things will break. 1642 """ 1643 self.transport.loseConnection() 1644 self._disconnectParser(reason) 1645 1646 def dataReceived(self, bytes): 1647 """ 1648 Handle some stuff from some place. 1649 """ 1650 try: 1651 self._parser.dataReceived(bytes) 1652 except BaseException: 1653 self._giveUp(Failure()) 1654 1655 def connectionLost(self, reason): 1656 """ 1657 The underlying transport went away. If appropriate, notify the parser 1658 object. 1659 """ 1660 1661 connectionLost = makeStatefulDispatcher("connectionLost", connectionLost) 1662 1663 def _connectionLost_QUIESCENT(self, reason): 1664 """ 1665 Nothing is currently happening. Move to the C{'CONNECTION_LOST'} 1666 state but otherwise do nothing. 1667 """ 1668 self._state = "CONNECTION_LOST" 1669 1670 def _connectionLost_GENERATION_FAILED(self, reason): 1671 """ 1672 The connection was in an inconsistent state. Move to the 1673 C{'CONNECTION_LOST'} state but otherwise do nothing. 1674 """ 1675 self._state = "CONNECTION_LOST" 1676 1677 def _connectionLost_TRANSMITTING(self, reason): 1678 """ 1679 Fail the L{Deferred} for the current request, notify the request 1680 object that it does not need to continue transmitting itself, and 1681 move to the C{'CONNECTION_LOST'} state. 1682 """ 1683 self._state = "CONNECTION_LOST" 1684 self._finishedRequest.errback(Failure(RequestTransmissionFailed([reason]))) 1685 del self._finishedRequest 1686 1687 # Tell the request that it should stop bothering now. 1688 self._currentRequest.stopWriting() 1689 1690 def _connectionLost_TRANSMITTING_AFTER_RECEIVING_RESPONSE(self, reason): 1691 """ 1692 Move to the C{'CONNECTION_LOST'} state. 1693 """ 1694 self._state = "CONNECTION_LOST" 1695 1696 def _connectionLost_WAITING(self, reason): 1697 """ 1698 Disconnect the response parser so that it can propagate the event as 1699 necessary (for example, to call an application protocol's 1700 C{connectionLost} method, or to fail a request L{Deferred}) and move 1701 to the C{'CONNECTION_LOST'} state. 1702 """ 1703 self._disconnectParser(reason) 1704 self._state = "CONNECTION_LOST" 1705 1706 def _connectionLost_ABORTING(self, reason): 1707 """ 1708 Disconnect the response parser with a L{ConnectionAborted} failure, and 1709 move to the C{'CONNECTION_LOST'} state. 1710 """ 1711 self._disconnectParser(Failure(ConnectionAborted())) 1712 self._state = "CONNECTION_LOST" 1713 for d in self._abortDeferreds: 1714 d.callback(None) 1715 self._abortDeferreds = [] 1716 1717 def abort(self): 1718 """ 1719 Close the connection and cause all outstanding L{request} L{Deferred}s 1720 to fire with an error. 1721 """ 1722 if self._state == "CONNECTION_LOST": 1723 return succeed(None) 1724 self.transport.loseConnection() 1725 self._state = "ABORTING" 1726 d = Deferred() 1727 self._abortDeferreds.append(d) 1728 return d 1729