1# Copyright (C) 2005-2010 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 17"""Base implementation of Transport over http using urllib. 18 19There are separate implementation modules for each http client implementation. 20""" 21 22from __future__ import absolute_import 23 24DEBUG = 0 25 26import base64 27import cgi 28import errno 29import os 30import re 31import socket 32import ssl 33import sys 34import time 35import urllib 36import weakref 37 38try: 39 import http.client as http_client 40except ImportError: 41 import httplib as http_client 42try: 43 import urllib.request as urllib_request 44except ImportError: # python < 3 45 import urllib2 as urllib_request 46try: 47 from urllib.parse import urljoin, splitport, splittype, splithost, urlencode 48except ImportError: 49 from urlparse import urljoin 50 from urllib import splitport, splittype, splithost, urlencode 51 52# TODO: handle_response should be integrated into the http/__init__.py 53from .response import handle_response 54 55# FIXME: Oversimplifying, two kind of exceptions should be 56# raised, once a request is issued: URLError before we have been 57# able to process the response, HTTPError after that. Process the 58# response means we are able to leave the socket clean, so if we 59# are not able to do that, we should close the connection. The 60# actual code more or less do that, tests should be written to 61# ensure that. 62 63from ... import __version__ as breezy_version 64from ... import ( 65 config, 66 debug, 67 errors, 68 lazy_import, 69 osutils, 70 trace, 71 transport, 72 ui, 73 urlutils, 74) 75from ...bzr.smart import medium 76from ...trace import mutter 77from ...transport import ( 78 ConnectedTransport, 79 UnusableRedirect, 80 ) 81 82from . import default_user_agent, ssl 83 84 85checked_kerberos = False 86kerberos = None 87 88 89class addinfourl(urllib_request.addinfourl): 90 '''Replacement addinfourl class compatible with python-2.7's xmlrpclib 91 92 In python-2.7, xmlrpclib expects that the response object that it receives 93 has a getheader method. http_client.HTTPResponse provides this but 94 urllib_request.addinfourl does not. Add the necessary functions here, ported to 95 use the internal data structures of addinfourl. 96 ''' 97 98 def getheader(self, name, default=None): 99 if self.headers is None: 100 raise http_client.ResponseNotReady() 101 return self.headers.getheader(name, default) 102 103 def getheaders(self): 104 if self.headers is None: 105 raise http_client.ResponseNotReady() 106 return list(self.headers.items()) 107 108 109class _ReportingFileSocket(object): 110 111 def __init__(self, filesock, report_activity=None): 112 self.filesock = filesock 113 self._report_activity = report_activity 114 115 def report_activity(self, size, direction): 116 if self._report_activity: 117 self._report_activity(size, direction) 118 119 def read(self, size=1): 120 s = self.filesock.read(size) 121 self.report_activity(len(s), 'read') 122 return s 123 124 def readline(self, size=-1): 125 s = self.filesock.readline(size) 126 self.report_activity(len(s), 'read') 127 return s 128 129 def readinto(self, b): 130 s = self.filesock.readinto(b) 131 self.report_activity(s, 'read') 132 return s 133 134 def __getattr__(self, name): 135 return getattr(self.filesock, name) 136 137 138class _ReportingSocket(object): 139 140 def __init__(self, sock, report_activity=None): 141 self.sock = sock 142 self._report_activity = report_activity 143 144 def report_activity(self, size, direction): 145 if self._report_activity: 146 self._report_activity(size, direction) 147 148 def sendall(self, s, *args): 149 self.sock.sendall(s, *args) 150 self.report_activity(len(s), 'write') 151 152 def recv(self, *args): 153 s = self.sock.recv(*args) 154 self.report_activity(len(s), 'read') 155 return s 156 157 def makefile(self, mode='r', bufsize=-1): 158 # http_client creates a fileobject that doesn't do buffering, which 159 # makes fp.readline() very expensive because it only reads one byte 160 # at a time. So we wrap the socket in an object that forces 161 # sock.makefile to make a buffered file. 162 fsock = self.sock.makefile(mode, 65536) 163 # And wrap that into a reporting kind of fileobject 164 return _ReportingFileSocket(fsock, self._report_activity) 165 166 def __getattr__(self, name): 167 return getattr(self.sock, name) 168 169 170# We define our own Response class to keep our http_client pipe clean 171class Response(http_client.HTTPResponse): 172 """Custom HTTPResponse, to avoid the need to decorate. 173 174 http_client prefers to decorate the returned objects, rather 175 than using a custom object. 176 """ 177 178 # Some responses have bodies in which we have no interest 179 _body_ignored_responses = [301, 302, 303, 307, 308, 403, 404, 501] 180 181 # in finish() below, we may have to discard several MB in the worst 182 # case. To avoid buffering that much, we read and discard by chunks 183 # instead. The underlying file is either a socket or a StringIO, so reading 184 # 8k chunks should be fine. 185 _discarded_buf_size = 8192 186 187 def __init__(self, sock, debuglevel=0, method=None, url=None): 188 self.url = url 189 super(Response, self).__init__( 190 sock, debuglevel=debuglevel, method=method, url=url) 191 192 def begin(self): 193 """Begin to read the response from the server. 194 195 http_client assumes that some responses get no content and do 196 not even attempt to read the body in that case, leaving 197 the body in the socket, blocking the next request. Let's 198 try to workaround that. 199 """ 200 http_client.HTTPResponse.begin(self) 201 if self.status in self._body_ignored_responses: 202 if self.debuglevel >= 2: 203 print("For status: [%s], will ready body, length: %s" % ( 204 self.status, self.length)) 205 if not (self.length is None or self.will_close): 206 # In some cases, we just can't read the body not 207 # even try or we may encounter a 104, 'Connection 208 # reset by peer' error if there is indeed no body 209 # and the server closed the connection just after 210 # having issued the response headers (even if the 211 # headers indicate a Content-Type...) 212 body = self.read(self.length) 213 if self.debuglevel >= 9: 214 # This one can be huge and is generally not interesting 215 print("Consumed body: [%s]" % body) 216 self.close() 217 elif self.status == 200: 218 # Whatever the request is, it went ok, so we surely don't want to 219 # close the connection. Some cases are not correctly detected by 220 # http_client.HTTPConnection.getresponse (called by 221 # http_client.HTTPResponse.begin). The CONNECT response for the https 222 # through proxy case is one. Note: the 'will_close' below refers 223 # to the "true" socket between us and the server, whereas the 224 # 'close()' above refers to the copy of that socket created by 225 # http_client for the response itself. So, in the if above we close the 226 # socket to indicate that we are done with the response whereas 227 # below we keep the socket with the server opened. 228 self.will_close = False 229 230 def finish(self): 231 """Finish reading the body. 232 233 In some cases, the client may have left some bytes to read in the 234 body. That will block the next request to succeed if we use a 235 persistent connection. If we don't use a persistent connection, well, 236 nothing will block the next request since a new connection will be 237 issued anyway. 238 239 :return: the number of bytes left on the socket (may be None) 240 """ 241 pending = None 242 if not self.isclosed(): 243 # Make sure nothing was left to be read on the socket 244 pending = 0 245 data = True 246 while data and self.length: 247 # read() will update self.length 248 data = self.read(min(self.length, self._discarded_buf_size)) 249 pending += len(data) 250 if pending: 251 trace.mutter("%s bytes left on the HTTP socket", pending) 252 self.close() 253 return pending 254 255 256# Not inheriting from 'object' because http_client.HTTPConnection doesn't. 257class AbstractHTTPConnection: 258 """A custom HTTP(S) Connection, which can reset itself on a bad response""" 259 260 response_class = Response 261 262 # When we detect a server responding with the whole file to range requests, 263 # we want to warn. But not below a given thresold. 264 _range_warning_thresold = 1024 * 1024 265 266 def __init__(self, report_activity=None): 267 self._response = None 268 self._report_activity = report_activity 269 self._ranges_received_whole_file = None 270 271 def _mutter_connect(self): 272 netloc = '%s:%s' % (self.host, self.port) 273 if self.proxied_host is not None: 274 netloc += '(proxy for %s)' % self.proxied_host 275 trace.mutter('* About to connect() to %s' % netloc) 276 277 def getresponse(self): 278 """Capture the response to be able to cleanup""" 279 self._response = http_client.HTTPConnection.getresponse(self) 280 return self._response 281 282 def cleanup_pipe(self): 283 """Read the remaining bytes of the last response if any.""" 284 if self._response is not None: 285 try: 286 pending = self._response.finish() 287 # Warn the user (once) 288 if (self._ranges_received_whole_file is None 289 and self._response.status == 200 290 and pending 291 and pending > self._range_warning_thresold): 292 self._ranges_received_whole_file = True 293 trace.warning( 294 'Got a 200 response when asking for multiple ranges,' 295 ' does your server at %s:%s support range requests?', 296 self.host, self.port) 297 except socket.error as e: 298 # It's conceivable that the socket is in a bad state here 299 # (including some test cases) and in this case, it doesn't need 300 # cleaning anymore, so no need to fail, we just get rid of the 301 # socket and let callers reconnect 302 if (len(e.args) == 0 303 or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)): 304 raise 305 self.close() 306 self._response = None 307 # Preserve our preciousss 308 sock = self.sock 309 self.sock = None 310 # Let http_client.HTTPConnection do its housekeeping 311 self.close() 312 # Restore our preciousss 313 self.sock = sock 314 315 def _wrap_socket_for_reporting(self, sock): 316 """Wrap the socket before anybody use it.""" 317 self.sock = _ReportingSocket(sock, self._report_activity) 318 319 320class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection): 321 322 # XXX: Needs refactoring at the caller level. 323 def __init__(self, host, port=None, proxied_host=None, 324 report_activity=None, ca_certs=None): 325 AbstractHTTPConnection.__init__(self, report_activity=report_activity) 326 http_client.HTTPConnection.__init__(self, host, port) 327 self.proxied_host = proxied_host 328 # ca_certs is ignored, it's only relevant for https 329 330 def connect(self): 331 if 'http' in debug.debug_flags: 332 self._mutter_connect() 333 http_client.HTTPConnection.connect(self) 334 self._wrap_socket_for_reporting(self.sock) 335 336 337class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection): 338 339 def __init__(self, host, port=None, key_file=None, cert_file=None, 340 proxied_host=None, 341 report_activity=None, ca_certs=None): 342 AbstractHTTPConnection.__init__(self, report_activity=report_activity) 343 http_client.HTTPSConnection.__init__( 344 self, host, port, key_file, cert_file) 345 self.proxied_host = proxied_host 346 self.ca_certs = ca_certs 347 348 def connect(self): 349 if 'http' in debug.debug_flags: 350 self._mutter_connect() 351 http_client.HTTPConnection.connect(self) 352 self._wrap_socket_for_reporting(self.sock) 353 if self.proxied_host is None: 354 self.connect_to_origin() 355 356 def connect_to_origin(self): 357 # FIXME JRV 2011-12-18: Use location config here? 358 config_stack = config.GlobalStack() 359 cert_reqs = config_stack.get('ssl.cert_reqs') 360 if self.proxied_host is not None: 361 host = self.proxied_host.split(":", 1)[0] 362 else: 363 host = self.host 364 if cert_reqs == ssl.CERT_NONE: 365 ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host) 366 ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert') 367 ca_certs = None 368 else: 369 if self.ca_certs is None: 370 ca_certs = config_stack.get('ssl.ca_certs') 371 else: 372 ca_certs = self.ca_certs 373 if ca_certs is None: 374 trace.warning( 375 "No valid trusted SSL CA certificates file set. See " 376 "'brz help ssl.ca_certs' for more information on setting " 377 "trusted CAs.") 378 try: 379 ssl_context = ssl.create_default_context( 380 purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs) 381 ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE 382 if self.cert_file: 383 ssl_context.load_cert_chain( 384 keyfile=self.key_file, certfile=self.cert_file) 385 ssl_context.verify_mode = cert_reqs 386 ssl_sock = ssl_context.wrap_socket( 387 self.sock, server_hostname=self.host) 388 except ssl.SSLError: 389 trace.note( 390 "\n" 391 "See `brz help ssl.ca_certs` for how to specify trusted CA" 392 "certificates.\n" 393 "Pass -Ossl.cert_reqs=none to disable certificate " 394 "verification entirely.\n") 395 raise 396 # Wrap the ssl socket before anybody use it 397 self._wrap_socket_for_reporting(ssl_sock) 398 399 400class Request(urllib_request.Request): 401 """A custom Request object. 402 403 urllib_request determines the request method heuristically (based on 404 the presence or absence of data). We set the method 405 statically. 406 407 The Request object tracks: 408 - the connection the request will be made on. 409 - the authentication parameters needed to preventively set 410 the authentication header once a first authentication have 411 been made. 412 """ 413 414 def __init__(self, method, url, data=None, headers={}, 415 origin_req_host=None, unverifiable=False, 416 connection=None, parent=None): 417 urllib_request.Request.__init__( 418 self, url, data, headers, 419 origin_req_host, unverifiable) 420 self.method = method 421 self.connection = connection 422 # To handle redirections 423 self.parent = parent 424 self.redirected_to = None 425 # Unless told otherwise, redirections are not followed 426 self.follow_redirections = False 427 # auth and proxy_auth are dicts containing, at least 428 # (scheme, host, port, realm, user, password, protocol, path). 429 # The dict entries are mostly handled by the AuthHandler. 430 # Some authentication schemes may add more entries. 431 self.auth = {} 432 self.proxy_auth = {} 433 self.proxied_host = None 434 435 def get_method(self): 436 return self.method 437 438 def set_proxy(self, proxy, type): 439 """Set the proxy and remember the proxied host.""" 440 host, port = splitport(self.host) 441 if port is None: 442 # We need to set the default port ourselves way before it gets set 443 # in the HTTP[S]Connection object at build time. 444 if self.type == 'https': 445 conn_class = HTTPSConnection 446 else: 447 conn_class = HTTPConnection 448 port = conn_class.default_port 449 self.proxied_host = '%s:%s' % (host, port) 450 urllib_request.Request.set_proxy(self, proxy, type) 451 # When urllib_request makes a https request with our wrapper code and a proxy, 452 # it sets Host to the https proxy, not the host we want to talk to. 453 # I'm fairly sure this is our fault, but what is the cause is an open 454 # question. -- Robert Collins May 8 2010. 455 self.add_unredirected_header('Host', self.proxied_host) 456 457 458class _ConnectRequest(Request): 459 460 def __init__(self, request): 461 """Constructor 462 463 :param request: the first request sent to the proxied host, already 464 processed by the opener (i.e. proxied_host is already set). 465 """ 466 # We give a fake url and redefine selector or urllib_request will be 467 # confused 468 Request.__init__(self, 'CONNECT', request.get_full_url(), 469 connection=request.connection) 470 if request.proxied_host is None: 471 raise AssertionError() 472 self.proxied_host = request.proxied_host 473 474 @property 475 def selector(self): 476 return self.proxied_host 477 478 def get_selector(self): 479 return self.selector 480 481 def set_proxy(self, proxy, type): 482 """Set the proxy without remembering the proxied host. 483 484 We already know the proxied host by definition, the CONNECT request 485 occurs only when the connection goes through a proxy. The usual 486 processing (masquerade the request so that the connection is done to 487 the proxy while the request is targeted at another host) does not apply 488 here. In fact, the connection is already established with proxy and we 489 just want to enable the SSL tunneling. 490 """ 491 urllib_request.Request.set_proxy(self, proxy, type) 492 493 494class ConnectionHandler(urllib_request.BaseHandler): 495 """Provides connection-sharing by pre-processing requests. 496 497 urllib_request provides no way to access the HTTPConnection object 498 internally used. But we need it in order to achieve 499 connection sharing. So, we add it to the request just before 500 it is processed, and then we override the do_open method for 501 http[s] requests in AbstractHTTPHandler. 502 """ 503 504 handler_order = 1000 # after all pre-processings 505 506 def __init__(self, report_activity=None, ca_certs=None): 507 self._report_activity = report_activity 508 self.ca_certs = ca_certs 509 510 def create_connection(self, request, http_connection_class): 511 host = request.host 512 if not host: 513 # Just a bit of paranoia here, this should have been 514 # handled in the higher levels 515 raise urlutils.InvalidURL(request.get_full_url(), 'no host given.') 516 517 # We create a connection (but it will not connect until the first 518 # request is made) 519 try: 520 connection = http_connection_class( 521 host, proxied_host=request.proxied_host, 522 report_activity=self._report_activity, 523 ca_certs=self.ca_certs) 524 except http_client.InvalidURL as exception: 525 # There is only one occurrence of InvalidURL in http_client 526 raise urlutils.InvalidURL(request.get_full_url(), 527 extra='nonnumeric port') 528 529 return connection 530 531 def capture_connection(self, request, http_connection_class): 532 """Capture or inject the request connection. 533 534 Two cases: 535 - the request have no connection: create a new one, 536 537 - the request have a connection: this one have been used 538 already, let's capture it, so that we can give it to 539 another transport to be reused. We don't do that 540 ourselves: the Transport object get the connection from 541 a first request and then propagate it, from request to 542 request or to cloned transports. 543 """ 544 connection = request.connection 545 if connection is None: 546 # Create a new one 547 connection = self.create_connection(request, http_connection_class) 548 request.connection = connection 549 550 # All connections will pass here, propagate debug level 551 connection.set_debuglevel(DEBUG) 552 return request 553 554 def http_request(self, request): 555 return self.capture_connection(request, HTTPConnection) 556 557 def https_request(self, request): 558 return self.capture_connection(request, HTTPSConnection) 559 560 561class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler): 562 """A custom handler for HTTP(S) requests. 563 564 We overrive urllib_request.AbstractHTTPHandler to get a better 565 control of the connection, the ability to implement new 566 request types and return a response able to cope with 567 persistent connections. 568 """ 569 570 # We change our order to be before urllib_request HTTP[S]Handlers 571 # and be chosen instead of them (the first http_open called 572 # wins). 573 handler_order = 400 574 575 _default_headers = {'Pragma': 'no-cache', 576 'Cache-control': 'max-age=0', 577 'Connection': 'Keep-Alive', 578 'User-agent': default_user_agent(), 579 'Accept': '*/*', 580 } 581 582 def __init__(self): 583 urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG) 584 585 def http_request(self, request): 586 """Common headers setting""" 587 588 for name, value in self._default_headers.items(): 589 if name not in request.headers: 590 request.headers[name] = value 591 # FIXME: We may have to add the Content-Length header if 592 # we have data to send. 593 return request 594 595 def retry_or_raise(self, http_class, request, first_try): 596 """Retry the request (once) or raise the exception. 597 598 urllib_request raises exception of application level kind, we 599 just have to translate them. 600 601 http_client can raise exceptions of transport level (badly 602 formatted dialog, loss of connexion or socket level 603 problems). In that case we should issue the request again 604 (http_client will close and reopen a new connection if 605 needed). 606 """ 607 # When an exception occurs, we give back the original 608 # Traceback or the bugs are hard to diagnose. 609 exc_type, exc_val, exc_tb = sys.exc_info() 610 if exc_type == socket.gaierror: 611 # No need to retry, that will not help 612 origin_req_host = request.origin_req_host 613 raise errors.ConnectionError("Couldn't resolve host '%s'" 614 % origin_req_host, 615 orig_error=exc_val) 616 elif isinstance(exc_val, http_client.ImproperConnectionState): 617 # The http_client pipeline is in incorrect state, it's a bug in our 618 # implementation. 619 raise exc_val.with_traceback(exc_tb) 620 else: 621 if first_try: 622 if self._debuglevel >= 2: 623 print('Received exception: [%r]' % exc_val) 624 print(' On connection: [%r]' % request.connection) 625 method = request.get_method() 626 url = request.get_full_url() 627 print(' Will retry, %s %r' % (method, url)) 628 request.connection.close() 629 response = self.do_open(http_class, request, False) 630 else: 631 if self._debuglevel >= 2: 632 print('Received second exception: [%r]' % exc_val) 633 print(' On connection: [%r]' % request.connection) 634 if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol): 635 # http_client.BadStatusLine and 636 # http_client.UnknownProtocol indicates that a 637 # bogus server was encountered or a bad 638 # connection (i.e. transient errors) is 639 # experimented, we have already retried once 640 # for that request so we raise the exception. 641 my_exception = errors.InvalidHttpResponse( 642 request.get_full_url(), 643 'Bad status line received', 644 orig_error=exc_val) 645 elif (isinstance(exc_val, socket.error) and len(exc_val.args) 646 and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)): 647 # 10053 == WSAECONNABORTED 648 # 10054 == WSAECONNRESET 649 raise errors.ConnectionReset( 650 "Connection lost while sending request.") 651 else: 652 # All other exception are considered connection related. 653 654 # socket errors generally occurs for reasons 655 # far outside our scope, so closing the 656 # connection and retrying is the best we can 657 # do. 658 selector = request.selector 659 my_exception = errors.ConnectionError( 660 msg='while sending %s %s:' % (request.get_method(), 661 selector), 662 orig_error=exc_val) 663 664 if self._debuglevel >= 2: 665 print('On connection: [%r]' % request.connection) 666 method = request.get_method() 667 url = request.get_full_url() 668 print(' Failed again, %s %r' % (method, url)) 669 print(' Will raise: [%r]' % my_exception) 670 raise my_exception.with_traceback(exc_tb) 671 return response 672 673 def do_open(self, http_class, request, first_try=True): 674 """See urllib_request.AbstractHTTPHandler.do_open for the general idea. 675 676 The request will be retried once if it fails. 677 """ 678 connection = request.connection 679 if connection is None: 680 raise AssertionError( 681 'Cannot process a request without a connection') 682 683 # Get all the headers 684 headers = {} 685 headers.update(request.header_items()) 686 headers.update(request.unredirected_hdrs) 687 # Some servers or proxies will choke on headers not properly 688 # cased. http_client/urllib/urllib_request all use capitalize to get canonical 689 # header names, but only python2.5 urllib_request use title() to fix them just 690 # before sending the request. And not all versions of python 2.5 do 691 # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it 692 # ourself below. 693 headers = {name.title(): val for name, val in headers.items()} 694 695 try: 696 method = request.get_method() 697 url = request.selector 698 if sys.version_info[:2] >= (3, 6): 699 connection._send_request(method, url, 700 # FIXME: implements 100-continue 701 # None, # We don't send the body yet 702 request.data, 703 headers, encode_chunked=False) 704 else: 705 connection._send_request(method, url, 706 # FIXME: implements 100-continue 707 # None, # We don't send the body yet 708 request.data, 709 headers) 710 if 'http' in debug.debug_flags: 711 trace.mutter('> %s %s' % (method, url)) 712 hdrs = [] 713 for k, v in headers.items(): 714 # People are often told to paste -Dhttp output to help 715 # debug. Don't compromise credentials. 716 if k in ('Authorization', 'Proxy-Authorization'): 717 v = '<masked>' 718 hdrs.append('%s: %s' % (k, v)) 719 trace.mutter('> ' + '\n> '.join(hdrs) + '\n') 720 if self._debuglevel >= 1: 721 print('Request sent: [%r] from (%s)' 722 % (request, request.connection.sock.getsockname())) 723 response = connection.getresponse() 724 convert_to_addinfourl = True 725 except (ssl.SSLError, ssl.CertificateError): 726 # Something is wrong with either the certificate or the hostname, 727 # re-trying won't help 728 raise 729 except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol, 730 socket.error, http_client.HTTPException): 731 response = self.retry_or_raise(http_class, request, first_try) 732 convert_to_addinfourl = False 733 734 response.msg = response.reason 735 return response 736 737# FIXME: HTTPConnection does not fully support 100-continue (the 738# server responses are just ignored) 739 740# if code == 100: 741# mutter('Will send the body') 742# # We can send the body now 743# body = request.data 744# if body is None: 745# raise URLError("No data given") 746# connection.send(body) 747# response = connection.getresponse() 748 749 if self._debuglevel >= 2: 750 print('Receives response: %r' % response) 751 print(' For: %r(%r)' % (request.get_method(), 752 request.get_full_url())) 753 754 if convert_to_addinfourl: 755 # Shamelessly copied from urllib_request 756 req = request 757 r = response 758 r.recv = r.read 759 fp = socket._fileobject(r, bufsize=65536) 760 resp = addinfourl(fp, r.msg, req.get_full_url()) 761 resp.code = r.status 762 resp.msg = r.reason 763 resp.version = r.version 764 if self._debuglevel >= 2: 765 print('Create addinfourl: %r' % resp) 766 print(' For: %r(%r)' % (request.get_method(), 767 request.get_full_url())) 768 if 'http' in debug.debug_flags: 769 version = 'HTTP/%d.%d' 770 try: 771 version = version % (resp.version / 10, 772 resp.version % 10) 773 except: 774 version = 'HTTP/%r' % resp.version 775 trace.mutter('< %s %s %s' % (version, resp.code, 776 resp.msg)) 777 # Use the raw header lines instead of treating resp.info() as a 778 # dict since we may miss duplicated headers otherwise. 779 hdrs = [h.rstrip('\r\n') for h in resp.info().headers] 780 trace.mutter('< ' + '\n< '.join(hdrs) + '\n') 781 else: 782 resp = response 783 return resp 784 785 786class HTTPHandler(AbstractHTTPHandler): 787 """A custom handler that just thunks into HTTPConnection""" 788 789 def http_open(self, request): 790 return self.do_open(HTTPConnection, request) 791 792 793class HTTPSHandler(AbstractHTTPHandler): 794 """A custom handler that just thunks into HTTPSConnection""" 795 796 https_request = AbstractHTTPHandler.http_request 797 798 def https_open(self, request): 799 connection = request.connection 800 if connection.sock is None and \ 801 connection.proxied_host is not None and \ 802 request.get_method() != 'CONNECT': # Don't loop 803 # FIXME: We need a gazillion connection tests here, but we still 804 # miss a https server :-( : 805 # - with and without proxy 806 # - with and without certificate 807 # - with self-signed certificate 808 # - with and without authentication 809 # - with good and bad credentials (especially the proxy auth around 810 # CONNECT) 811 # - with basic and digest schemes 812 # - reconnection on errors 813 # - connection persistence behaviour (including reconnection) 814 815 # We are about to connect for the first time via a proxy, we must 816 # issue a CONNECT request first to establish the encrypted link 817 connect = _ConnectRequest(request) 818 response = self.parent.open(connect) 819 if response.code != 200: 820 raise errors.ConnectionError("Can't connect to %s via proxy %s" % ( 821 connect.proxied_host, self.host)) 822 # Housekeeping 823 connection.cleanup_pipe() 824 # Establish the connection encryption 825 connection.connect_to_origin() 826 # Propagate the connection to the original request 827 request.connection = connection 828 return self.do_open(HTTPSConnection, request) 829 830 831class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler): 832 """Handles redirect requests. 833 834 We have to implement our own scheme because we use a specific 835 Request object and because we want to implement a specific 836 policy. 837 """ 838 _debuglevel = DEBUG 839 # RFC2616 says that only read requests should be redirected 840 # without interacting with the user. But Breezy uses some 841 # shortcuts to optimize against roundtrips which can leads to 842 # write requests being issued before read requests of 843 # containing dirs can be redirected. So we redirect write 844 # requests in the same way which seems to respect the spirit 845 # of the RFC if not its letter. 846 847 def redirect_request(self, req, fp, code, msg, headers, newurl): 848 """See urllib_request.HTTPRedirectHandler.redirect_request""" 849 # We would have preferred to update the request instead 850 # of creating a new one, but the urllib_request.Request object 851 # has a too complicated creation process to provide a 852 # simple enough equivalent update process. Instead, when 853 # redirecting, we only update the following request in 854 # the redirect chain with a reference to the parent 855 # request . 856 857 # Some codes make no sense in our context and are treated 858 # as errors: 859 860 # 300: Multiple choices for different representations of 861 # the URI. Using that mechanisn with Breezy will violate the 862 # protocol neutrality of Transport. 863 864 # 304: Not modified (SHOULD only occurs with conditional 865 # GETs which are not used by our implementation) 866 867 # 305: Use proxy. I can't imagine this one occurring in 868 # our context-- vila/20060909 869 870 # 306: Unused (if the RFC says so...) 871 872 # If the code is 302 and the request is HEAD, some may 873 # think that it is a sufficent hint that the file exists 874 # and that we MAY avoid following the redirections. But 875 # if we want to be sure, we MUST follow them. 876 877 origin_req_host = req.origin_req_host 878 879 if code in (301, 302, 303, 307, 308): 880 return Request(req.get_method(), newurl, 881 headers=req.headers, 882 origin_req_host=origin_req_host, 883 unverifiable=True, 884 # TODO: It will be nice to be able to 885 # detect virtual hosts sharing the same 886 # IP address, that will allow us to 887 # share the same connection... 888 connection=None, 889 parent=req, 890 ) 891 else: 892 raise urllib_request.HTTPError( 893 req.get_full_url(), code, msg, headers, fp) 894 895 def http_error_302(self, req, fp, code, msg, headers): 896 """Requests the redirected to URI. 897 898 Copied from urllib_request to be able to clean the pipe of the associated 899 connection, *before* issuing the redirected request but *after* having 900 eventually raised an error. 901 """ 902 # Some servers (incorrectly) return multiple Location headers 903 # (so probably same goes for URI). Use first header. 904 905 # TODO: Once we get rid of addinfourl objects, the 906 # following will need to be updated to use correct case 907 # for headers. 908 if 'location' in headers: 909 newurl = headers.get('location') 910 elif 'uri' in headers: 911 newurl = headers.get('uri') 912 else: 913 return 914 915 newurl = urljoin(req.get_full_url(), newurl) 916 917 if self._debuglevel >= 1: 918 print('Redirected to: %s (followed: %r)' % (newurl, 919 req.follow_redirections)) 920 if req.follow_redirections is False: 921 req.redirected_to = newurl 922 return fp 923 924 # This call succeeds or raise an error. urllib_request returns 925 # if redirect_request returns None, but our 926 # redirect_request never returns None. 927 redirected_req = self.redirect_request(req, fp, code, msg, headers, 928 newurl) 929 930 # loop detection 931 # .redirect_dict has a key url if url was previously visited. 932 if hasattr(req, 'redirect_dict'): 933 visited = redirected_req.redirect_dict = req.redirect_dict 934 if (visited.get(newurl, 0) >= self.max_repeats or 935 len(visited) >= self.max_redirections): 936 raise urllib_request.HTTPError(req.get_full_url(), code, 937 self.inf_msg + msg, headers, fp) 938 else: 939 visited = redirected_req.redirect_dict = req.redirect_dict = {} 940 visited[newurl] = visited.get(newurl, 0) + 1 941 942 # We can close the fp now that we are sure that we won't 943 # use it with HTTPError. 944 fp.close() 945 # We have all we need already in the response 946 req.connection.cleanup_pipe() 947 948 return self.parent.open(redirected_req) 949 950 http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302 951 952 953class ProxyHandler(urllib_request.ProxyHandler): 954 """Handles proxy setting. 955 956 Copied and modified from urllib_request to be able to modify the request during 957 the request pre-processing instead of modifying it at _open time. As we 958 capture (or create) the connection object during request processing, _open 959 time was too late. 960 961 The main task is to modify the request so that the connection is done to 962 the proxy while the request still refers to the destination host. 963 964 Note: the proxy handling *may* modify the protocol used; the request may be 965 against an https server proxied through an http proxy. So, https_request 966 will be called, but later it's really http_open that will be called. This 967 explains why we don't have to call self.parent.open as the urllib_request did. 968 """ 969 970 # Proxies must be in front 971 handler_order = 100 972 _debuglevel = DEBUG 973 974 def __init__(self, proxies=None): 975 urllib_request.ProxyHandler.__init__(self, proxies) 976 # First, let's get rid of urllib_request implementation 977 for type, proxy in self.proxies.items(): 978 if self._debuglevel >= 3: 979 print('Will unbind %s_open for %r' % (type, proxy)) 980 delattr(self, '%s_open' % type) 981 982 def bind_scheme_request(proxy, scheme): 983 if proxy is None: 984 return 985 scheme_request = scheme + '_request' 986 if self._debuglevel >= 3: 987 print('Will bind %s for %r' % (scheme_request, proxy)) 988 setattr(self, scheme_request, 989 lambda request: self.set_proxy(request, scheme)) 990 # We are interested only by the http[s] proxies 991 http_proxy = self.get_proxy_env_var('http') 992 bind_scheme_request(http_proxy, 'http') 993 https_proxy = self.get_proxy_env_var('https') 994 bind_scheme_request(https_proxy, 'https') 995 996 def get_proxy_env_var(self, name, default_to='all'): 997 """Get a proxy env var. 998 999 Note that we indirectly rely on 1000 urllib.getproxies_environment taking into account the 1001 uppercased values for proxy variables. 1002 """ 1003 try: 1004 return self.proxies[name.lower()] 1005 except KeyError: 1006 if default_to is not None: 1007 # Try to get the alternate environment variable 1008 try: 1009 return self.proxies[default_to] 1010 except KeyError: 1011 pass 1012 return None 1013 1014 def proxy_bypass(self, host): 1015 """Check if host should be proxied or not. 1016 1017 :returns: True to skip the proxy, False otherwise. 1018 """ 1019 no_proxy = self.get_proxy_env_var('no', default_to=None) 1020 bypass = self.evaluate_proxy_bypass(host, no_proxy) 1021 if bypass is None: 1022 # Nevertheless, there are platform-specific ways to 1023 # ignore proxies... 1024 return urllib_request.proxy_bypass(host) 1025 else: 1026 return bypass 1027 1028 def evaluate_proxy_bypass(self, host, no_proxy): 1029 """Check the host against a comma-separated no_proxy list as a string. 1030 1031 :param host: ``host:port`` being requested 1032 1033 :param no_proxy: comma-separated list of hosts to access directly. 1034 1035 :returns: True to skip the proxy, False not to, or None to 1036 leave it to urllib. 1037 """ 1038 if no_proxy is None: 1039 # All hosts are proxied 1040 return False 1041 hhost, hport = splitport(host) 1042 # Does host match any of the domains mentioned in 1043 # no_proxy ? The rules about what is authorized in no_proxy 1044 # are fuzzy (to say the least). We try to allow most 1045 # commonly seen values. 1046 for domain in no_proxy.split(','): 1047 domain = domain.strip() 1048 if domain == '': 1049 continue 1050 dhost, dport = splitport(domain) 1051 if hport == dport or dport is None: 1052 # Protect glob chars 1053 dhost = dhost.replace(".", r"\.") 1054 dhost = dhost.replace("*", r".*") 1055 dhost = dhost.replace("?", r".") 1056 if re.match(dhost, hhost, re.IGNORECASE): 1057 return True 1058 # Nothing explicitly avoid the host 1059 return None 1060 1061 def set_proxy(self, request, type): 1062 host = request.host 1063 if self.proxy_bypass(host): 1064 return request 1065 1066 proxy = self.get_proxy_env_var(type) 1067 if self._debuglevel >= 3: 1068 print('set_proxy %s_request for %r' % (type, proxy)) 1069 # FIXME: python 2.5 urlparse provides a better _parse_proxy which can 1070 # grok user:password@host:port as well as 1071 # http://user:password@host:port 1072 1073 parsed_url = transport.ConnectedTransport._split_url(proxy) 1074 if not parsed_url.host: 1075 raise urlutils.InvalidURL(proxy, 'No host component') 1076 1077 if request.proxy_auth == {}: 1078 # No proxy auth parameter are available, we are handling the first 1079 # proxied request, intialize. scheme (the authentication scheme) 1080 # and realm will be set by the AuthHandler 1081 request.proxy_auth = { 1082 'host': parsed_url.host, 1083 'port': parsed_url.port, 1084 'user': parsed_url.user, 1085 'password': parsed_url.password, 1086 'protocol': parsed_url.scheme, 1087 # We ignore path since we connect to a proxy 1088 'path': None} 1089 if parsed_url.port is None: 1090 phost = parsed_url.host 1091 else: 1092 phost = parsed_url.host + ':%d' % parsed_url.port 1093 request.set_proxy(phost, type) 1094 if self._debuglevel >= 3: 1095 print('set_proxy: proxy set to %s://%s' % (type, phost)) 1096 return request 1097 1098 1099class AbstractAuthHandler(urllib_request.BaseHandler): 1100 """A custom abstract authentication handler for all http authentications. 1101 1102 Provides the meat to handle authentication errors and 1103 preventively set authentication headers after the first 1104 successful authentication. 1105 1106 This can be used for http and proxy, as well as for basic, negotiate and 1107 digest authentications. 1108 1109 This provides an unified interface for all authentication handlers 1110 (urllib_request provides far too many with different policies). 1111 1112 The interaction between this handler and the urllib_request 1113 framework is not obvious, it works as follow: 1114 1115 opener.open(request) is called: 1116 1117 - that may trigger http_request which will add an authentication header 1118 (self.build_header) if enough info is available. 1119 1120 - the request is sent to the server, 1121 1122 - if an authentication error is received self.auth_required is called, 1123 we acquire the authentication info in the error headers and call 1124 self.auth_match to check that we are able to try the 1125 authentication and complete the authentication parameters, 1126 1127 - we call parent.open(request), that may trigger http_request 1128 and will add a header (self.build_header), but here we have 1129 all the required info (keep in mind that the request and 1130 authentication used in the recursive calls are really (and must be) 1131 the *same* objects). 1132 1133 - if the call returns a response, the authentication have been 1134 successful and the request authentication parameters have been updated. 1135 """ 1136 1137 scheme = None 1138 """The scheme as it appears in the server header (lower cased)""" 1139 1140 _max_retry = 3 1141 """We don't want to retry authenticating endlessly""" 1142 1143 requires_username = True 1144 """Whether the auth mechanism requires a username.""" 1145 1146 # The following attributes should be defined by daughter 1147 # classes: 1148 # - auth_required_header: the header received from the server 1149 # - auth_header: the header sent in the request 1150 1151 def __init__(self): 1152 # We want to know when we enter into an try/fail cycle of 1153 # authentications so we initialize to None to indicate that we aren't 1154 # in such a cycle by default. 1155 self._retry_count = None 1156 1157 def _parse_auth_header(self, server_header): 1158 """Parse the authentication header. 1159 1160 :param server_header: The value of the header sent by the server 1161 describing the authenticaion request. 1162 1163 :return: A tuple (scheme, remainder) scheme being the first word in the 1164 given header (lower cased), remainder may be None. 1165 """ 1166 try: 1167 scheme, remainder = server_header.split(None, 1) 1168 except ValueError: 1169 scheme = server_header 1170 remainder = None 1171 return (scheme.lower(), remainder) 1172 1173 def update_auth(self, auth, key, value): 1174 """Update a value in auth marking the auth as modified if needed""" 1175 old_value = auth.get(key, None) 1176 if old_value != value: 1177 auth[key] = value 1178 auth['modified'] = True 1179 1180 def auth_required(self, request, headers): 1181 """Retry the request if the auth scheme is ours. 1182 1183 :param request: The request needing authentication. 1184 :param headers: The headers for the authentication error response. 1185 :return: None or the response for the authenticated request. 1186 """ 1187 # Don't try to authenticate endlessly 1188 if self._retry_count is None: 1189 # The retry being recusrsive calls, None identify the first retry 1190 self._retry_count = 1 1191 else: 1192 self._retry_count += 1 1193 if self._retry_count > self._max_retry: 1194 # Let's be ready for next round 1195 self._retry_count = None 1196 return None 1197 server_headers = headers.get_all(self.auth_required_header) 1198 if not server_headers: 1199 # The http error MUST have the associated 1200 # header. This must never happen in production code. 1201 trace.mutter('%s not found', self.auth_required_header) 1202 return None 1203 1204 auth = self.get_auth(request) 1205 auth['modified'] = False 1206 # Put some common info in auth if the caller didn't 1207 if auth.get('path', None) is None: 1208 parsed_url = urlutils.URL.from_string(request.get_full_url()) 1209 self.update_auth(auth, 'protocol', parsed_url.scheme) 1210 self.update_auth(auth, 'host', parsed_url.host) 1211 self.update_auth(auth, 'port', parsed_url.port) 1212 self.update_auth(auth, 'path', parsed_url.path) 1213 # FIXME: the auth handler should be selected at a single place instead 1214 # of letting all handlers try to match all headers, but the current 1215 # design doesn't allow a simple implementation. 1216 for server_header in server_headers: 1217 # Several schemes can be proposed by the server, try to match each 1218 # one in turn 1219 matching_handler = self.auth_match(server_header, auth) 1220 if matching_handler: 1221 # auth_match may have modified auth (by adding the 1222 # password or changing the realm, for example) 1223 if (request.get_header(self.auth_header, None) is not None 1224 and not auth['modified']): 1225 # We already tried that, give up 1226 return None 1227 1228 # Only the most secure scheme proposed by the server should be 1229 # used, since the handlers use 'handler_order' to describe that 1230 # property, the first handler tried takes precedence, the 1231 # others should not attempt to authenticate if the best one 1232 # failed. 1233 best_scheme = auth.get('best_scheme', None) 1234 if best_scheme is None: 1235 # At that point, if current handler should doesn't succeed 1236 # the credentials are wrong (or incomplete), but we know 1237 # that the associated scheme should be used. 1238 best_scheme = auth['best_scheme'] = self.scheme 1239 if best_scheme != self.scheme: 1240 continue 1241 1242 if self.requires_username and auth.get('user', None) is None: 1243 # Without a known user, we can't authenticate 1244 return None 1245 1246 # Housekeeping 1247 request.connection.cleanup_pipe() 1248 # Retry the request with an authentication header added 1249 response = self.parent.open(request) 1250 if response: 1251 self.auth_successful(request, response) 1252 return response 1253 # We are not qualified to handle the authentication. 1254 # Note: the authentication error handling will try all 1255 # available handlers. If one of them authenticates 1256 # successfully, a response will be returned. If none of 1257 # them succeeds, None will be returned and the error 1258 # handler will raise the 401 'Unauthorized' or the 407 1259 # 'Proxy Authentication Required' error. 1260 return None 1261 1262 def add_auth_header(self, request, header): 1263 """Add the authentication header to the request""" 1264 request.add_unredirected_header(self.auth_header, header) 1265 1266 def auth_match(self, header, auth): 1267 """Check that we are able to handle that authentication scheme. 1268 1269 The request authentication parameters may need to be 1270 updated with info from the server. Some of these 1271 parameters, when combined, are considered to be the 1272 authentication key, if one of them change the 1273 authentication result may change. 'user' and 'password' 1274 are exampls, but some auth schemes may have others 1275 (digest's nonce is an example, digest's nonce_count is a 1276 *counter-example*). Such parameters must be updated by 1277 using the update_auth() method. 1278 1279 :param header: The authentication header sent by the server. 1280 :param auth: The auth parameters already known. They may be 1281 updated. 1282 :returns: True if we can try to handle the authentication. 1283 """ 1284 raise NotImplementedError(self.auth_match) 1285 1286 def build_auth_header(self, auth, request): 1287 """Build the value of the header used to authenticate. 1288 1289 :param auth: The auth parameters needed to build the header. 1290 :param request: The request needing authentication. 1291 1292 :return: None or header. 1293 """ 1294 raise NotImplementedError(self.build_auth_header) 1295 1296 def auth_successful(self, request, response): 1297 """The authentification was successful for the request. 1298 1299 Additional infos may be available in the response. 1300 1301 :param request: The succesfully authenticated request. 1302 :param response: The server response (may contain auth info). 1303 """ 1304 # It may happen that we need to reconnect later, let's be ready 1305 self._retry_count = None 1306 1307 def get_user_password(self, auth): 1308 """Ask user for a password if none is already available. 1309 1310 :param auth: authentication info gathered so far (from the initial url 1311 and then during dialog with the server). 1312 """ 1313 auth_conf = config.AuthenticationConfig() 1314 user = auth.get('user', None) 1315 password = auth.get('password', None) 1316 realm = auth['realm'] 1317 port = auth.get('port', None) 1318 1319 if user is None: 1320 user = auth_conf.get_user(auth['protocol'], auth['host'], 1321 port=port, path=auth['path'], 1322 realm=realm, ask=True, 1323 prompt=self.build_username_prompt(auth)) 1324 if user is not None and password is None: 1325 password = auth_conf.get_password( 1326 auth['protocol'], auth['host'], user, 1327 port=port, 1328 path=auth['path'], realm=realm, 1329 prompt=self.build_password_prompt(auth)) 1330 1331 return user, password 1332 1333 def _build_password_prompt(self, auth): 1334 """Build a prompt taking the protocol used into account. 1335 1336 The AuthHandler is used by http and https, we want that information in 1337 the prompt, so we build the prompt from the authentication dict which 1338 contains all the needed parts. 1339 1340 Also, http and proxy AuthHandlers present different prompts to the 1341 user. The daughter classes should implements a public 1342 build_password_prompt using this method. 1343 """ 1344 prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s' 1345 realm = auth['realm'] 1346 if realm is not None: 1347 prompt += u", Realm: '%s'" % realm 1348 prompt += u' password' 1349 return prompt 1350 1351 def _build_username_prompt(self, auth): 1352 """Build a prompt taking the protocol used into account. 1353 1354 The AuthHandler is used by http and https, we want that information in 1355 the prompt, so we build the prompt from the authentication dict which 1356 contains all the needed parts. 1357 1358 Also, http and proxy AuthHandlers present different prompts to the 1359 user. The daughter classes should implements a public 1360 build_username_prompt using this method. 1361 """ 1362 prompt = u'%s' % auth['protocol'].upper() + u' %(host)s' 1363 realm = auth['realm'] 1364 if realm is not None: 1365 prompt += u", Realm: '%s'" % realm 1366 prompt += u' username' 1367 return prompt 1368 1369 def http_request(self, request): 1370 """Insert an authentication header if information is available""" 1371 auth = self.get_auth(request) 1372 if self.auth_params_reusable(auth): 1373 self.add_auth_header( 1374 request, self.build_auth_header(auth, request)) 1375 return request 1376 1377 https_request = http_request # FIXME: Need test 1378 1379 1380class NegotiateAuthHandler(AbstractAuthHandler): 1381 """A authentication handler that handles WWW-Authenticate: Negotiate. 1382 1383 At the moment this handler supports just Kerberos. In the future, 1384 NTLM support may also be added. 1385 """ 1386 1387 scheme = 'negotiate' 1388 handler_order = 480 1389 requires_username = False 1390 1391 def auth_match(self, header, auth): 1392 scheme, raw_auth = self._parse_auth_header(header) 1393 if scheme != self.scheme: 1394 return False 1395 self.update_auth(auth, 'scheme', scheme) 1396 resp = self._auth_match_kerberos(auth) 1397 if resp is None: 1398 return False 1399 # Optionally should try to authenticate using NTLM here 1400 self.update_auth(auth, 'negotiate_response', resp) 1401 return True 1402 1403 def _auth_match_kerberos(self, auth): 1404 """Try to create a GSSAPI response for authenticating against a host.""" 1405 global kerberos, checked_kerberos 1406 if kerberos is None and not checked_kerberos: 1407 try: 1408 import kerberos 1409 except ImportError: 1410 kerberos = None 1411 checked_kerberos = True 1412 if kerberos is None: 1413 return None 1414 ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth) 1415 if ret < 1: 1416 trace.warning('Unable to create GSSAPI context for %s: %d', 1417 auth['host'], ret) 1418 return None 1419 ret = kerberos.authGSSClientStep(vc, "") 1420 if ret < 0: 1421 trace.mutter('authGSSClientStep failed: %d', ret) 1422 return None 1423 return kerberos.authGSSClientResponse(vc) 1424 1425 def build_auth_header(self, auth, request): 1426 return "Negotiate %s" % auth['negotiate_response'] 1427 1428 def auth_params_reusable(self, auth): 1429 # If the auth scheme is known, it means a previous 1430 # authentication was successful, all information is 1431 # available, no further checks are needed. 1432 return (auth.get('scheme', None) == 'negotiate' and 1433 auth.get('negotiate_response', None) is not None) 1434 1435 1436class BasicAuthHandler(AbstractAuthHandler): 1437 """A custom basic authentication handler.""" 1438 1439 scheme = 'basic' 1440 handler_order = 500 1441 auth_regexp = re.compile('realm="([^"]*)"', re.I) 1442 1443 def build_auth_header(self, auth, request): 1444 raw = '%s:%s' % (auth['user'], auth['password']) 1445 auth_header = 'Basic ' + \ 1446 base64.b64encode(raw.encode('utf-8')).decode('ascii') 1447 return auth_header 1448 1449 def extract_realm(self, header_value): 1450 match = self.auth_regexp.search(header_value) 1451 realm = None 1452 if match: 1453 realm = match.group(1) 1454 return match, realm 1455 1456 def auth_match(self, header, auth): 1457 scheme, raw_auth = self._parse_auth_header(header) 1458 if scheme != self.scheme: 1459 return False 1460 1461 match, realm = self.extract_realm(raw_auth) 1462 if match: 1463 # Put useful info into auth 1464 self.update_auth(auth, 'scheme', scheme) 1465 self.update_auth(auth, 'realm', realm) 1466 if (auth.get('user', None) is None 1467 or auth.get('password', None) is None): 1468 user, password = self.get_user_password(auth) 1469 self.update_auth(auth, 'user', user) 1470 self.update_auth(auth, 'password', password) 1471 return match is not None 1472 1473 def auth_params_reusable(self, auth): 1474 # If the auth scheme is known, it means a previous 1475 # authentication was successful, all information is 1476 # available, no further checks are needed. 1477 return auth.get('scheme', None) == 'basic' 1478 1479 1480def get_digest_algorithm_impls(algorithm): 1481 H = None 1482 KD = None 1483 if algorithm == 'MD5': 1484 def H(x): return osutils.md5(x).hexdigest() 1485 elif algorithm == 'SHA': 1486 H = osutils.sha_string 1487 if H is not None: 1488 def KD(secret, data): return H( 1489 ("%s:%s" % (secret, data)).encode('utf-8')) 1490 return H, KD 1491 1492 1493def get_new_cnonce(nonce, nonce_count): 1494 raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(), 1495 osutils.rand_chars(8)) 1496 return osutils.sha_string(raw.encode('utf-8'))[:16] 1497 1498 1499class DigestAuthHandler(AbstractAuthHandler): 1500 """A custom digest authentication handler.""" 1501 1502 scheme = 'digest' 1503 # Before basic as digest is a bit more secure and should be preferred 1504 handler_order = 490 1505 1506 def auth_params_reusable(self, auth): 1507 # If the auth scheme is known, it means a previous 1508 # authentication was successful, all information is 1509 # available, no further checks are needed. 1510 return auth.get('scheme', None) == 'digest' 1511 1512 def auth_match(self, header, auth): 1513 scheme, raw_auth = self._parse_auth_header(header) 1514 if scheme != self.scheme: 1515 return False 1516 1517 # Put the requested authentication info into a dict 1518 req_auth = urllib_request.parse_keqv_list( 1519 urllib_request.parse_http_list(raw_auth)) 1520 1521 # Check that we can handle that authentication 1522 qop = req_auth.get('qop', None) 1523 if qop != 'auth': # No auth-int so far 1524 return False 1525 1526 H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5')) 1527 if H is None: 1528 return False 1529 1530 realm = req_auth.get('realm', None) 1531 # Put useful info into auth 1532 self.update_auth(auth, 'scheme', scheme) 1533 self.update_auth(auth, 'realm', realm) 1534 if auth.get('user', None) is None or auth.get('password', None) is None: 1535 user, password = self.get_user_password(auth) 1536 self.update_auth(auth, 'user', user) 1537 self.update_auth(auth, 'password', password) 1538 1539 try: 1540 if req_auth.get('algorithm', None) is not None: 1541 self.update_auth(auth, 'algorithm', req_auth.get('algorithm')) 1542 nonce = req_auth['nonce'] 1543 if auth.get('nonce', None) != nonce: 1544 # A new nonce, never used 1545 self.update_auth(auth, 'nonce_count', 0) 1546 self.update_auth(auth, 'nonce', nonce) 1547 self.update_auth(auth, 'qop', qop) 1548 auth['opaque'] = req_auth.get('opaque', None) 1549 except KeyError: 1550 # Some required field is not there 1551 return False 1552 1553 return True 1554 1555 def build_auth_header(self, auth, request): 1556 selector = request.selector 1557 url_scheme, url_selector = splittype(selector) 1558 sel_host, uri = splithost(url_selector) 1559 1560 A1 = ('%s:%s:%s' % 1561 (auth['user'], auth['realm'], auth['password'])).encode('utf-8') 1562 A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8') 1563 1564 nonce = auth['nonce'] 1565 qop = auth['qop'] 1566 1567 nonce_count = auth['nonce_count'] + 1 1568 ncvalue = '%08x' % nonce_count 1569 cnonce = get_new_cnonce(nonce, nonce_count) 1570 1571 H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5')) 1572 nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2)) 1573 request_digest = KD(H(A1), nonce_data) 1574 1575 header = 'Digest ' 1576 header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'], 1577 auth['realm'], 1578 nonce) 1579 header += ', uri="%s"' % uri 1580 header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue) 1581 header += ', qop="%s"' % qop 1582 header += ', response="%s"' % request_digest 1583 # Append the optional fields 1584 opaque = auth.get('opaque', None) 1585 if opaque: 1586 header += ', opaque="%s"' % opaque 1587 if auth.get('algorithm', None): 1588 header += ', algorithm="%s"' % auth.get('algorithm') 1589 1590 # We have used the nonce once more, update the count 1591 auth['nonce_count'] = nonce_count 1592 1593 return header 1594 1595 1596class HTTPAuthHandler(AbstractAuthHandler): 1597 """Custom http authentication handler. 1598 1599 Send the authentication preventively to avoid the roundtrip 1600 associated with the 401 error and keep the revelant info in 1601 the auth request attribute. 1602 """ 1603 1604 auth_required_header = 'www-authenticate' 1605 auth_header = 'Authorization' 1606 1607 def get_auth(self, request): 1608 """Get the auth params from the request""" 1609 return request.auth 1610 1611 def set_auth(self, request, auth): 1612 """Set the auth params for the request""" 1613 request.auth = auth 1614 1615 def build_password_prompt(self, auth): 1616 return self._build_password_prompt(auth) 1617 1618 def build_username_prompt(self, auth): 1619 return self._build_username_prompt(auth) 1620 1621 def http_error_401(self, req, fp, code, msg, headers): 1622 return self.auth_required(req, headers) 1623 1624 1625class ProxyAuthHandler(AbstractAuthHandler): 1626 """Custom proxy authentication handler. 1627 1628 Send the authentication preventively to avoid the roundtrip 1629 associated with the 407 error and keep the revelant info in 1630 the proxy_auth request attribute.. 1631 """ 1632 1633 auth_required_header = 'proxy-authenticate' 1634 # FIXME: the correct capitalization is Proxy-Authorization, 1635 # but python-2.4 urllib_request.Request insist on using capitalize() 1636 # instead of title(). 1637 auth_header = 'Proxy-authorization' 1638 1639 def get_auth(self, request): 1640 """Get the auth params from the request""" 1641 return request.proxy_auth 1642 1643 def set_auth(self, request, auth): 1644 """Set the auth params for the request""" 1645 request.proxy_auth = auth 1646 1647 def build_password_prompt(self, auth): 1648 prompt = self._build_password_prompt(auth) 1649 prompt = u'Proxy ' + prompt 1650 return prompt 1651 1652 def build_username_prompt(self, auth): 1653 prompt = self._build_username_prompt(auth) 1654 prompt = u'Proxy ' + prompt 1655 return prompt 1656 1657 def http_error_407(self, req, fp, code, msg, headers): 1658 return self.auth_required(req, headers) 1659 1660 1661class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler): 1662 """Custom http basic authentication handler""" 1663 1664 1665class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler): 1666 """Custom proxy basic authentication handler""" 1667 1668 1669class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler): 1670 """Custom http basic authentication handler""" 1671 1672 1673class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler): 1674 """Custom proxy basic authentication handler""" 1675 1676 1677class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler): 1678 """Custom http negotiate authentication handler""" 1679 1680 1681class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler): 1682 """Custom proxy negotiate authentication handler""" 1683 1684 1685class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor): 1686 """Process HTTP error responses. 1687 1688 We don't really process the errors, quite the contrary 1689 instead, we leave our Transport handle them. 1690 """ 1691 1692 accepted_errors = [200, # Ok 1693 201, 1694 202, 1695 204, 1696 206, # Partial content 1697 400, 1698 403, 1699 404, # Not found 1700 405, # Method not allowed 1701 406, # Not Acceptable 1702 409, # Conflict 1703 416, # Range not satisfiable 1704 422, # Unprocessible entity 1705 501, # Not implemented 1706 ] 1707 """The error codes the caller will handle. 1708 1709 This can be specialized in the request on a case-by case basis, but the 1710 common cases are covered here. 1711 """ 1712 1713 def http_response(self, request, response): 1714 code, msg, hdrs = response.code, response.msg, response.info() 1715 1716 if code not in self.accepted_errors: 1717 response = self.parent.error('http', request, response, 1718 code, msg, hdrs) 1719 return response 1720 1721 https_response = http_response 1722 1723 1724class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler): 1725 """Translate common errors into Breezy Exceptions""" 1726 1727 def http_error_default(self, req, fp, code, msg, hdrs): 1728 if code == 403: 1729 raise errors.TransportError( 1730 'Server refuses to fulfill the request (403 Forbidden)' 1731 ' for %s' % req.get_full_url()) 1732 else: 1733 raise errors.UnexpectedHttpStatus( 1734 req.get_full_url(), code, 1735 'Unable to handle http code: %s' % msg) 1736 1737 1738class Opener(object): 1739 """A wrapper around urllib_request.build_opener 1740 1741 Daughter classes can override to build their own specific opener 1742 """ 1743 # TODO: Provides hooks for daughter classes. 1744 1745 def __init__(self, 1746 connection=ConnectionHandler, 1747 redirect=HTTPRedirectHandler, 1748 error=HTTPErrorProcessor, 1749 report_activity=None, 1750 ca_certs=None): 1751 self._opener = urllib_request.build_opener( 1752 connection(report_activity=report_activity, ca_certs=ca_certs), 1753 redirect, error, 1754 ProxyHandler(), 1755 HTTPBasicAuthHandler(), 1756 HTTPDigestAuthHandler(), 1757 HTTPNegotiateAuthHandler(), 1758 ProxyBasicAuthHandler(), 1759 ProxyDigestAuthHandler(), 1760 ProxyNegotiateAuthHandler(), 1761 HTTPHandler, 1762 HTTPSHandler, 1763 HTTPDefaultErrorHandler, 1764 ) 1765 1766 self.open = self._opener.open 1767 if DEBUG >= 9: 1768 # When dealing with handler order, it's easy to mess 1769 # things up, the following will help understand which 1770 # handler is used, when and for what. 1771 import pprint 1772 pprint.pprint(self._opener.__dict__) 1773 1774 1775class HttpTransport(ConnectedTransport): 1776 """HTTP Client implementations. 1777 1778 The protocol can be given as e.g. http+urllib://host/ to use a particular 1779 implementation. 1780 """ 1781 1782 # _unqualified_scheme: "http" or "https" 1783 # _scheme: may have "+pycurl", etc 1784 1785 # In order to debug we have to issue our traces in sync with 1786 # httplib, which use print :( 1787 _debuglevel = 0 1788 1789 def __init__(self, base, _from_transport=None, ca_certs=None): 1790 """Set the base path where files will be stored.""" 1791 proto_match = re.match(r'^(https?)(\+\w+)?://', base) 1792 if not proto_match: 1793 raise AssertionError("not a http url: %r" % base) 1794 self._unqualified_scheme = proto_match.group(1) 1795 super(HttpTransport, self).__init__( 1796 base, _from_transport=_from_transport) 1797 self._medium = None 1798 # range hint is handled dynamically throughout the life 1799 # of the transport object. We start by trying multi-range 1800 # requests and if the server returns bogus results, we 1801 # retry with single range requests and, finally, we 1802 # forget about range if the server really can't 1803 # understand. Once acquired, this piece of info is 1804 # propagated to clones. 1805 if _from_transport is not None: 1806 self._range_hint = _from_transport._range_hint 1807 self._opener = _from_transport._opener 1808 else: 1809 self._range_hint = 'multi' 1810 self._opener = Opener( 1811 report_activity=self._report_activity, ca_certs=ca_certs) 1812 1813 def request(self, method, url, fields=None, headers=None, **urlopen_kw): 1814 body = urlopen_kw.pop('body', None) 1815 if fields is not None: 1816 data = urlencode(fields).encode() 1817 if body is not None: 1818 raise ValueError( 1819 'body and fields are mutually exclusive') 1820 else: 1821 data = body 1822 if headers is None: 1823 headers = {} 1824 request = Request(method, url, data, headers) 1825 request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0) 1826 if urlopen_kw: 1827 raise NotImplementedError( 1828 'unknown arguments: %r' % urlopen_kw.keys()) 1829 connection = self._get_connection() 1830 if connection is not None: 1831 # Give back shared info 1832 request.connection = connection 1833 (auth, proxy_auth) = self._get_credentials() 1834 # Clean the httplib.HTTPConnection pipeline in case the previous 1835 # request couldn't do it 1836 connection.cleanup_pipe() 1837 else: 1838 # First request, initialize credentials. 1839 # scheme and realm will be set by the _urllib2_wrappers.AuthHandler 1840 auth = self._create_auth() 1841 # Proxy initialization will be done by the first proxied request 1842 proxy_auth = dict() 1843 # Ensure authentication info is provided 1844 request.auth = auth 1845 request.proxy_auth = proxy_auth 1846 1847 if self._debuglevel > 0: 1848 print('perform: %s base: %s, url: %s' % (request.method, self.base, 1849 request.get_full_url())) 1850 response = self._opener.open(request) 1851 if self._get_connection() is not request.connection: 1852 # First connection or reconnection 1853 self._set_connection(request.connection, 1854 (request.auth, request.proxy_auth)) 1855 else: 1856 # http may change the credentials while keeping the 1857 # connection opened 1858 self._update_credentials((request.auth, request.proxy_auth)) 1859 1860 code = response.code 1861 if (request.follow_redirections is False 1862 and code in (301, 302, 303, 307, 308)): 1863 raise errors.RedirectRequested(request.get_full_url(), 1864 request.redirected_to, 1865 is_permanent=(code in (301, 308))) 1866 1867 if request.redirected_to is not None: 1868 trace.mutter('redirected from: %s to: %s' % (request.get_full_url(), 1869 request.redirected_to)) 1870 1871 class Urllib3LikeResponse(object): 1872 1873 def __init__(self, actual): 1874 self._actual = actual 1875 self._data = None 1876 1877 def getheader(self, name, default=None): 1878 if self._actual.headers is None: 1879 raise http_client.ResponseNotReady() 1880 return self._actual.headers.get(name, default) 1881 1882 def getheaders(self): 1883 if self._actual.headers is None: 1884 raise http_client.ResponseNotReady() 1885 return list(self._actual.headers.items()) 1886 1887 @property 1888 def status(self): 1889 return self._actual.code 1890 1891 @property 1892 def reason(self): 1893 return self._actual.reason 1894 1895 @property 1896 def data(self): 1897 if self._data is None: 1898 self._data = self._actual.read() 1899 return self._data 1900 1901 @property 1902 def text(self): 1903 if self.status == 204: 1904 return None 1905 charset = cgi.parse_header( 1906 self._actual.headers['Content-Type'])[1].get('charset') 1907 if charset: 1908 return self.data.decode(charset) 1909 else: 1910 return self.data.decode() 1911 1912 def read(self, amt=None): 1913 return self._actual.read(amt) 1914 1915 def readlines(self): 1916 return self._actual.readlines() 1917 1918 def readline(self, size=-1): 1919 return self._actual.readline(size) 1920 1921 return Urllib3LikeResponse(response) 1922 1923 def disconnect(self): 1924 connection = self._get_connection() 1925 if connection is not None: 1926 connection.close() 1927 1928 def has(self, relpath): 1929 """Does the target location exist? 1930 """ 1931 response = self._head(relpath) 1932 1933 code = response.status 1934 if code == 200: # "ok", 1935 return True 1936 else: 1937 return False 1938 1939 def get(self, relpath): 1940 """Get the file at the given relative path. 1941 1942 :param relpath: The relative path to the file 1943 """ 1944 code, response_file = self._get(relpath, None) 1945 return response_file 1946 1947 def _get(self, relpath, offsets, tail_amount=0): 1948 """Get a file, or part of a file. 1949 1950 :param relpath: Path relative to transport base URL 1951 :param offsets: None to get the whole file; 1952 or a list of _CoalescedOffset to fetch parts of a file. 1953 :param tail_amount: The amount to get from the end of the file. 1954 1955 :returns: (http_code, result_file) 1956 """ 1957 abspath = self._remote_path(relpath) 1958 headers = {} 1959 if offsets or tail_amount: 1960 range_header = self._attempted_range_header(offsets, tail_amount) 1961 if range_header is not None: 1962 bytes = 'bytes=' + range_header 1963 headers = {'Range': bytes} 1964 else: 1965 range_header = None 1966 1967 response = self.request('GET', abspath, headers=headers) 1968 1969 if response.status == 404: # not found 1970 raise errors.NoSuchFile(abspath) 1971 elif response.status == 416: 1972 # We don't know which, but one of the ranges we specified was 1973 # wrong. 1974 raise errors.InvalidHttpRange(abspath, range_header, 1975 'Server return code %d' % response.status) 1976 elif response.status == 400: 1977 if range_header: 1978 # We don't know which, but one of the ranges we specified was 1979 # wrong. 1980 raise errors.InvalidHttpRange( 1981 abspath, range_header, 1982 'Server return code %d' % response.status) 1983 else: 1984 raise errors.BadHttpRequest(abspath, response.reason) 1985 elif response.status not in (200, 206): 1986 raise errors.UnexpectedHttpStatus(abspath, response.status) 1987 1988 data = handle_response( 1989 abspath, response.status, response.getheader, response) 1990 return response.status, data 1991 1992 def _remote_path(self, relpath): 1993 """See ConnectedTransport._remote_path. 1994 1995 user and passwords are not embedded in the path provided to the server. 1996 """ 1997 url = self._parsed_url.clone(relpath) 1998 url.user = url.quoted_user = None 1999 url.password = url.quoted_password = None 2000 url.scheme = self._unqualified_scheme 2001 return str(url) 2002 2003 def _create_auth(self): 2004 """Returns a dict containing the credentials provided at build time.""" 2005 auth = dict(host=self._parsed_url.host, port=self._parsed_url.port, 2006 user=self._parsed_url.user, password=self._parsed_url.password, 2007 protocol=self._unqualified_scheme, 2008 path=self._parsed_url.path) 2009 return auth 2010 2011 def get_smart_medium(self): 2012 """See Transport.get_smart_medium.""" 2013 if self._medium is None: 2014 # Since medium holds some state (smart server probing at least), we 2015 # need to keep it around. Note that this is needed because medium 2016 # has the same 'base' attribute as the transport so it can't be 2017 # shared between transports having different bases. 2018 self._medium = SmartClientHTTPMedium(self) 2019 return self._medium 2020 2021 def _degrade_range_hint(self, relpath, ranges): 2022 if self._range_hint == 'multi': 2023 self._range_hint = 'single' 2024 mutter('Retry "%s" with single range request' % relpath) 2025 elif self._range_hint == 'single': 2026 self._range_hint = None 2027 mutter('Retry "%s" without ranges' % relpath) 2028 else: 2029 # We tried all the tricks, but nothing worked, caller must reraise. 2030 return False 2031 return True 2032 2033 # _coalesce_offsets is a helper for readv, it try to combine ranges without 2034 # degrading readv performances. _bytes_to_read_before_seek is the value 2035 # used for the limit parameter and has been tuned for other transports. For 2036 # HTTP, the name is inappropriate but the parameter is still useful and 2037 # helps reduce the number of chunks in the response. The overhead for a 2038 # chunk (headers, length, footer around the data itself is variable but 2039 # around 50 bytes. We use 128 to reduce the range specifiers that appear in 2040 # the header, some servers (notably Apache) enforce a maximum length for a 2041 # header and issue a '400: Bad request' error when too much ranges are 2042 # specified. 2043 _bytes_to_read_before_seek = 128 2044 # No limit on the offset number that get combined into one, we are trying 2045 # to avoid downloading the whole file. 2046 _max_readv_combine = 0 2047 # By default Apache has a limit of ~400 ranges before replying with a 400 2048 # Bad Request. So we go underneath that amount to be safe. 2049 _max_get_ranges = 200 2050 # We impose no limit on the range size. But see _pycurl.py for a different 2051 # use. 2052 _get_max_size = 0 2053 2054 def _readv(self, relpath, offsets): 2055 """Get parts of the file at the given relative path. 2056 2057 :param offsets: A list of (offset, size) tuples. 2058 :param return: A list or generator of (offset, data) tuples 2059 """ 2060 # offsets may be a generator, we will iterate it several times, so 2061 # build a list 2062 offsets = list(offsets) 2063 2064 try_again = True 2065 retried_offset = None 2066 while try_again: 2067 try_again = False 2068 2069 # Coalesce the offsets to minimize the GET requests issued 2070 sorted_offsets = sorted(offsets) 2071 coalesced = self._coalesce_offsets( 2072 sorted_offsets, limit=self._max_readv_combine, 2073 fudge_factor=self._bytes_to_read_before_seek, 2074 max_size=self._get_max_size) 2075 2076 # Turn it into a list, we will iterate it several times 2077 coalesced = list(coalesced) 2078 if 'http' in debug.debug_flags: 2079 mutter('http readv of %s offsets => %s collapsed %s', 2080 relpath, len(offsets), len(coalesced)) 2081 2082 # Cache the data read, but only until it's been used 2083 data_map = {} 2084 # We will iterate on the data received from the GET requests and 2085 # serve the corresponding offsets respecting the initial order. We 2086 # need an offset iterator for that. 2087 iter_offsets = iter(offsets) 2088 try: 2089 cur_offset_and_size = next(iter_offsets) 2090 except StopIteration: 2091 return 2092 2093 try: 2094 for cur_coal, rfile in self._coalesce_readv(relpath, coalesced): 2095 # Split the received chunk 2096 for offset, size in cur_coal.ranges: 2097 start = cur_coal.start + offset 2098 rfile.seek(start, os.SEEK_SET) 2099 data = rfile.read(size) 2100 data_len = len(data) 2101 if data_len != size: 2102 raise errors.ShortReadvError(relpath, start, size, 2103 actual=data_len) 2104 if (start, size) == cur_offset_and_size: 2105 # The offset requested are sorted as the coalesced 2106 # ones, no need to cache. Win ! 2107 yield cur_offset_and_size[0], data 2108 try: 2109 cur_offset_and_size = next(iter_offsets) 2110 except StopIteration: 2111 return 2112 else: 2113 # Different sorting. We need to cache. 2114 data_map[(start, size)] = data 2115 2116 # Yield everything we can 2117 while cur_offset_and_size in data_map: 2118 # Clean the cached data since we use it 2119 # XXX: will break if offsets contains duplicates -- 2120 # vila20071129 2121 this_data = data_map.pop(cur_offset_and_size) 2122 yield cur_offset_and_size[0], this_data 2123 try: 2124 cur_offset_and_size = next(iter_offsets) 2125 except StopIteration: 2126 return 2127 2128 except (errors.ShortReadvError, errors.InvalidRange, 2129 errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e: 2130 mutter('Exception %r: %s during http._readv', e, e) 2131 if (not isinstance(e, errors.ShortReadvError) 2132 or retried_offset == cur_offset_and_size): 2133 # We don't degrade the range hint for ShortReadvError since 2134 # they do not indicate a problem with the server ability to 2135 # handle ranges. Except when we fail to get back a required 2136 # offset twice in a row. In that case, falling back to 2137 # single range or whole file should help. 2138 if not self._degrade_range_hint(relpath, coalesced): 2139 raise 2140 # Some offsets may have been already processed, so we retry 2141 # only the unsuccessful ones. 2142 offsets = [cur_offset_and_size] + [o for o in iter_offsets] 2143 retried_offset = cur_offset_and_size 2144 try_again = True 2145 2146 def _coalesce_readv(self, relpath, coalesced): 2147 """Issue several GET requests to satisfy the coalesced offsets""" 2148 2149 def get_and_yield(relpath, coalesced): 2150 if coalesced: 2151 # Note that the _get below may raise 2152 # errors.InvalidHttpRange. It's the caller's responsibility to 2153 # decide how to retry since it may provide different coalesced 2154 # offsets. 2155 code, rfile = self._get(relpath, coalesced) 2156 for coal in coalesced: 2157 yield coal, rfile 2158 2159 if self._range_hint is None: 2160 # Download whole file 2161 for c, rfile in get_and_yield(relpath, coalesced): 2162 yield c, rfile 2163 else: 2164 total = len(coalesced) 2165 if self._range_hint == 'multi': 2166 max_ranges = self._max_get_ranges 2167 elif self._range_hint == 'single': 2168 max_ranges = total 2169 else: 2170 raise AssertionError("Unknown _range_hint %r" 2171 % (self._range_hint,)) 2172 # TODO: Some web servers may ignore the range requests and return 2173 # the whole file, we may want to detect that and avoid further 2174 # requests. 2175 # Hint: test_readv_multiple_get_requests will fail once we do that 2176 cumul = 0 2177 ranges = [] 2178 for coal in coalesced: 2179 if ((self._get_max_size > 0 2180 and cumul + coal.length > self._get_max_size) or 2181 len(ranges) >= max_ranges): 2182 # Get that much and yield 2183 for c, rfile in get_and_yield(relpath, ranges): 2184 yield c, rfile 2185 # Restart with the current offset 2186 ranges = [coal] 2187 cumul = coal.length 2188 else: 2189 ranges.append(coal) 2190 cumul += coal.length 2191 # Get the rest and yield 2192 for c, rfile in get_and_yield(relpath, ranges): 2193 yield c, rfile 2194 2195 def recommended_page_size(self): 2196 """See Transport.recommended_page_size(). 2197 2198 For HTTP we suggest a large page size to reduce the overhead 2199 introduced by latency. 2200 """ 2201 return 64 * 1024 2202 2203 def _post(self, body_bytes): 2204 """POST body_bytes to .bzr/smart on this transport. 2205 2206 :returns: (response code, response body file-like object). 2207 """ 2208 # TODO: Requiring all the body_bytes to be available at the beginning of 2209 # the POST may require large client buffers. It would be nice to have 2210 # an interface that allows streaming via POST when possible (and 2211 # degrades to a local buffer when not). 2212 abspath = self._remote_path('.bzr/smart') 2213 response = self.request( 2214 'POST', abspath, body=body_bytes, 2215 headers={'Content-Type': 'application/octet-stream'}) 2216 if response.status not in (200, 403): 2217 raise errors.UnexpectedHttpStatus(abspath, response.status) 2218 code = response.status 2219 data = handle_response( 2220 abspath, code, response.getheader, response) 2221 return code, data 2222 2223 def _head(self, relpath): 2224 """Request the HEAD of a file. 2225 2226 Performs the request and leaves callers handle the results. 2227 """ 2228 abspath = self._remote_path(relpath) 2229 response = self.request('HEAD', abspath) 2230 if response.status not in (200, 404): 2231 raise errors.UnexpectedHttpStatus(abspath, response.status) 2232 2233 return response 2234 2235 raise NotImplementedError(self._post) 2236 2237 def put_file(self, relpath, f, mode=None): 2238 """Copy the file-like object into the location. 2239 2240 :param relpath: Location to put the contents, relative to base. 2241 :param f: File-like object. 2242 """ 2243 raise errors.TransportNotPossible('http PUT not supported') 2244 2245 def mkdir(self, relpath, mode=None): 2246 """Create a directory at the given path.""" 2247 raise errors.TransportNotPossible('http does not support mkdir()') 2248 2249 def rmdir(self, relpath): 2250 """See Transport.rmdir.""" 2251 raise errors.TransportNotPossible('http does not support rmdir()') 2252 2253 def append_file(self, relpath, f, mode=None): 2254 """Append the text in the file-like object into the final 2255 location. 2256 """ 2257 raise errors.TransportNotPossible('http does not support append()') 2258 2259 def copy(self, rel_from, rel_to): 2260 """Copy the item at rel_from to the location at rel_to""" 2261 raise errors.TransportNotPossible('http does not support copy()') 2262 2263 def copy_to(self, relpaths, other, mode=None, pb=None): 2264 """Copy a set of entries from self into another Transport. 2265 2266 :param relpaths: A list/generator of entries to be copied. 2267 2268 TODO: if other is LocalTransport, is it possible to 2269 do better than put(get())? 2270 """ 2271 # At this point HttpTransport might be able to check and see if 2272 # the remote location is the same, and rather than download, and 2273 # then upload, it could just issue a remote copy_this command. 2274 if isinstance(other, HttpTransport): 2275 raise errors.TransportNotPossible( 2276 'http cannot be the target of copy_to()') 2277 else: 2278 return super(HttpTransport, self).\ 2279 copy_to(relpaths, other, mode=mode, pb=pb) 2280 2281 def move(self, rel_from, rel_to): 2282 """Move the item at rel_from to the location at rel_to""" 2283 raise errors.TransportNotPossible('http does not support move()') 2284 2285 def delete(self, relpath): 2286 """Delete the item at relpath""" 2287 raise errors.TransportNotPossible('http does not support delete()') 2288 2289 def external_url(self): 2290 """See breezy.transport.Transport.external_url.""" 2291 # HTTP URL's are externally usable as long as they don't mention their 2292 # implementation qualifier 2293 url = self._parsed_url.clone() 2294 url.scheme = self._unqualified_scheme 2295 return str(url) 2296 2297 def is_readonly(self): 2298 """See Transport.is_readonly.""" 2299 return True 2300 2301 def listable(self): 2302 """See Transport.listable.""" 2303 return False 2304 2305 def stat(self, relpath): 2306 """Return the stat information for a file. 2307 """ 2308 raise errors.TransportNotPossible('http does not support stat()') 2309 2310 def lock_read(self, relpath): 2311 """Lock the given file for shared (read) access. 2312 :return: A lock object, which should be passed to Transport.unlock() 2313 """ 2314 # The old RemoteBranch ignore lock for reading, so we will 2315 # continue that tradition and return a bogus lock object. 2316 class BogusLock(object): 2317 def __init__(self, path): 2318 self.path = path 2319 2320 def unlock(self): 2321 pass 2322 return BogusLock(relpath) 2323 2324 def lock_write(self, relpath): 2325 """Lock the given file for exclusive (write) access. 2326 WARNING: many transports do not support this, so trying avoid using it 2327 2328 :return: A lock object, which should be passed to Transport.unlock() 2329 """ 2330 raise errors.TransportNotPossible('http does not support lock_write()') 2331 2332 def _attempted_range_header(self, offsets, tail_amount): 2333 """Prepare a HTTP Range header at a level the server should accept. 2334 2335 :return: the range header representing offsets/tail_amount or None if 2336 no header can be built. 2337 """ 2338 2339 if self._range_hint == 'multi': 2340 # Generate the header describing all offsets 2341 return self._range_header(offsets, tail_amount) 2342 elif self._range_hint == 'single': 2343 # Combine all the requested ranges into a single 2344 # encompassing one 2345 if len(offsets) > 0: 2346 if tail_amount not in (0, None): 2347 # Nothing we can do here to combine ranges with tail_amount 2348 # in a single range, just returns None. The whole file 2349 # should be downloaded. 2350 return None 2351 else: 2352 start = offsets[0].start 2353 last = offsets[-1] 2354 end = last.start + last.length - 1 2355 whole = self._coalesce_offsets([(start, end - start + 1)], 2356 limit=0, fudge_factor=0) 2357 return self._range_header(list(whole), 0) 2358 else: 2359 # Only tail_amount, requested, leave range_header 2360 # do its work 2361 return self._range_header(offsets, tail_amount) 2362 else: 2363 return None 2364 2365 @staticmethod 2366 def _range_header(ranges, tail_amount): 2367 """Turn a list of bytes ranges into a HTTP Range header value. 2368 2369 :param ranges: A list of _CoalescedOffset 2370 :param tail_amount: The amount to get from the end of the file. 2371 2372 :return: HTTP range header string. 2373 2374 At least a non-empty ranges *or* a tail_amount must be 2375 provided. 2376 """ 2377 strings = [] 2378 for offset in ranges: 2379 strings.append('%d-%d' % (offset.start, 2380 offset.start + offset.length - 1)) 2381 2382 if tail_amount: 2383 strings.append('-%d' % tail_amount) 2384 2385 return ','.join(strings) 2386 2387 def _redirected_to(self, source, target): 2388 """Returns a transport suitable to re-issue a redirected request. 2389 2390 :param source: The source url as returned by the server. 2391 :param target: The target url as returned by the server. 2392 2393 The redirection can be handled only if the relpath involved is not 2394 renamed by the redirection. 2395 2396 :returns: A transport 2397 :raise UnusableRedirect: when the URL can not be reinterpreted 2398 """ 2399 parsed_source = self._split_url(source) 2400 parsed_target = self._split_url(target) 2401 pl = len(self._parsed_url.path) 2402 # determine the excess tail - the relative path that was in 2403 # the original request but not part of this transports' URL. 2404 excess_tail = parsed_source.path[pl:].strip("/") 2405 if not parsed_target.path.endswith(excess_tail): 2406 # The final part of the url has been renamed, we can't handle the 2407 # redirection. 2408 raise UnusableRedirect( 2409 source, target, "final part of the url was renamed") 2410 2411 target_path = parsed_target.path 2412 if excess_tail: 2413 # Drop the tail that was in the redirect but not part of 2414 # the path of this transport. 2415 target_path = target_path[:-len(excess_tail)] 2416 2417 if parsed_target.scheme in ('http', 'https'): 2418 # Same protocol family (i.e. http[s]), we will preserve the same 2419 # http client implementation when a redirection occurs from one to 2420 # the other (otherwise users may be surprised that bzr switches 2421 # from one implementation to the other, and devs may suffer 2422 # debugging it). 2423 if (parsed_target.scheme == self._unqualified_scheme 2424 and parsed_target.host == self._parsed_url.host 2425 and parsed_target.port == self._parsed_url.port 2426 and (parsed_target.user is None or 2427 parsed_target.user == self._parsed_url.user)): 2428 # If a user is specified, it should match, we don't care about 2429 # passwords, wrong passwords will be rejected anyway. 2430 return self.clone(target_path) 2431 else: 2432 # Rebuild the url preserving the scheme qualification and the 2433 # credentials (if they don't apply, the redirected to server 2434 # will tell us, but if they do apply, we avoid prompting the 2435 # user) 2436 redir_scheme = parsed_target.scheme 2437 new_url = self._unsplit_url(redir_scheme, 2438 self._parsed_url.user, 2439 self._parsed_url.password, 2440 parsed_target.host, parsed_target.port, 2441 target_path) 2442 return transport.get_transport_from_url(new_url) 2443 else: 2444 # Redirected to a different protocol 2445 new_url = self._unsplit_url(parsed_target.scheme, 2446 parsed_target.user, 2447 parsed_target.password, 2448 parsed_target.host, parsed_target.port, 2449 target_path) 2450 return transport.get_transport_from_url(new_url) 2451 2452 def _options(self, relpath): 2453 abspath = self._remote_path(relpath) 2454 resp = self.request('OPTIONS', abspath) 2455 if resp.status == 404: 2456 raise errors.NoSuchFile(abspath) 2457 if resp.status in (403, 405): 2458 raise errors.InvalidHttpResponse( 2459 abspath, 2460 "OPTIONS not supported or forbidden for remote URL") 2461 return resp.getheaders() 2462 2463 2464# TODO: May be better located in smart/medium.py with the other 2465# SmartMedium classes 2466class SmartClientHTTPMedium(medium.SmartClientMedium): 2467 2468 def __init__(self, http_transport): 2469 super(SmartClientHTTPMedium, self).__init__(http_transport.base) 2470 # We don't want to create a circular reference between the http 2471 # transport and its associated medium. Since the transport will live 2472 # longer than the medium, the medium keep only a weak reference to its 2473 # transport. 2474 self._http_transport_ref = weakref.ref(http_transport) 2475 2476 def get_request(self): 2477 return SmartClientHTTPMediumRequest(self) 2478 2479 def should_probe(self): 2480 return True 2481 2482 def remote_path_from_transport(self, transport): 2483 # Strip the optional 'bzr+' prefix from transport so it will have the 2484 # same scheme as self. 2485 transport_base = transport.base 2486 if transport_base.startswith('bzr+'): 2487 transport_base = transport_base[4:] 2488 rel_url = urlutils.relative_url(self.base, transport_base) 2489 return urlutils.unquote(rel_url) 2490 2491 def send_http_smart_request(self, bytes): 2492 try: 2493 # Get back the http_transport hold by the weak reference 2494 t = self._http_transport_ref() 2495 code, body_filelike = t._post(bytes) 2496 if code != 200: 2497 raise errors.UnexpectedHttpStatus( 2498 t._remote_path('.bzr/smart'), code) 2499 except (errors.InvalidHttpResponse, errors.ConnectionReset) as e: 2500 raise errors.SmartProtocolError(str(e)) 2501 return body_filelike 2502 2503 def _report_activity(self, bytes, direction): 2504 """See SmartMedium._report_activity. 2505 2506 Does nothing; the underlying plain HTTP transport will report the 2507 activity that this medium would report. 2508 """ 2509 pass 2510 2511 def disconnect(self): 2512 """See SmartClientMedium.disconnect().""" 2513 t = self._http_transport_ref() 2514 t.disconnect() 2515 2516 2517# TODO: May be better located in smart/medium.py with the other 2518# SmartMediumRequest classes 2519class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest): 2520 """A SmartClientMediumRequest that works with an HTTP medium.""" 2521 2522 def __init__(self, client_medium): 2523 medium.SmartClientMediumRequest.__init__(self, client_medium) 2524 self._buffer = b'' 2525 2526 def _accept_bytes(self, bytes): 2527 self._buffer += bytes 2528 2529 def _finished_writing(self): 2530 data = self._medium.send_http_smart_request(self._buffer) 2531 self._response_body = data 2532 2533 def _read_bytes(self, count): 2534 """See SmartClientMediumRequest._read_bytes.""" 2535 return self._response_body.read(count) 2536 2537 def _read_line(self): 2538 line, excess = medium._get_line(self._response_body.read) 2539 if excess != b'': 2540 raise AssertionError( 2541 '_get_line returned excess bytes, but this mediumrequest ' 2542 'cannot handle excess. (%r)' % (excess,)) 2543 return line 2544 2545 def _finished_reading(self): 2546 """See SmartClientMediumRequest._finished_reading.""" 2547 pass 2548 2549 2550def unhtml_roughly(maybe_html, length_limit=1000): 2551 """Very approximate html->text translation, for presenting error bodies. 2552 2553 :param length_limit: Truncate the result to this many characters. 2554 2555 >>> unhtml_roughly("<b>bad</b> things happened\\n") 2556 ' bad things happened ' 2557 """ 2558 return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit] 2559 2560 2561def get_test_permutations(): 2562 """Return the permutations to be used in testing.""" 2563 from breezy.tests import ( 2564 features, 2565 http_server, 2566 ) 2567 permutations = [(HttpTransport, http_server.HttpServer), ] 2568 if features.HTTPSServerFeature.available(): 2569 from breezy.tests import ( 2570 https_server, 2571 ssl_certs, 2572 ) 2573 2574 class HTTPS_transport(HttpTransport): 2575 2576 def __init__(self, base, _from_transport=None): 2577 super(HTTPS_transport, self).__init__( 2578 base, _from_transport=_from_transport, 2579 ca_certs=ssl_certs.build_path('ca.crt')) 2580 2581 permutations.append((HTTPS_transport, 2582 https_server.HTTPSServer)) 2583 return permutations 2584