1from __future__ import absolute_import 2import time 3import logging 4from collections import namedtuple 5from itertools import takewhile 6import email 7import re 8 9from ..exceptions import ( 10 ConnectTimeoutError, 11 MaxRetryError, 12 ProtocolError, 13 ReadTimeoutError, 14 ResponseError, 15 InvalidHeader, 16 ProxyError, 17) 18from ..packages import six 19 20 21log = logging.getLogger(__name__) 22 23 24# Data structure for representing the metadata of requests that result in a retry. 25RequestHistory = namedtuple( 26 "RequestHistory", ["method", "url", "error", "status", "redirect_location"] 27) 28 29 30class Retry(object): 31 """ Retry configuration. 32 33 Each retry attempt will create a new Retry object with updated values, so 34 they can be safely reused. 35 36 Retries can be defined as a default for a pool:: 37 38 retries = Retry(connect=5, read=2, redirect=5) 39 http = PoolManager(retries=retries) 40 response = http.request('GET', 'http://example.com/') 41 42 Or per-request (which overrides the default for the pool):: 43 44 response = http.request('GET', 'http://example.com/', retries=Retry(10)) 45 46 Retries can be disabled by passing ``False``:: 47 48 response = http.request('GET', 'http://example.com/', retries=False) 49 50 Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless 51 retries are disabled, in which case the causing exception will be raised. 52 53 :param int total: 54 Total number of retries to allow. Takes precedence over other counts. 55 56 Set to ``None`` to remove this constraint and fall back on other 57 counts. It's a good idea to set this to some sensibly-high value to 58 account for unexpected edge cases and avoid infinite retry loops. 59 60 Set to ``0`` to fail on the first retry. 61 62 Set to ``False`` to disable and imply ``raise_on_redirect=False``. 63 64 :param int connect: 65 How many connection-related errors to retry on. 66 67 These are errors raised before the request is sent to the remote server, 68 which we assume has not triggered the server to process the request. 69 70 Set to ``0`` to fail on the first retry of this type. 71 72 :param int read: 73 How many times to retry on read errors. 74 75 These errors are raised after the request was sent to the server, so the 76 request may have side-effects. 77 78 Set to ``0`` to fail on the first retry of this type. 79 80 :param int redirect: 81 How many redirects to perform. Limit this to avoid infinite redirect 82 loops. 83 84 A redirect is a HTTP response with a status code 301, 302, 303, 307 or 85 308. 86 87 Set to ``0`` to fail on the first retry of this type. 88 89 Set to ``False`` to disable and imply ``raise_on_redirect=False``. 90 91 :param int status: 92 How many times to retry on bad status codes. 93 94 These are retries made on responses, where status code matches 95 ``status_forcelist``. 96 97 Set to ``0`` to fail on the first retry of this type. 98 99 :param iterable method_whitelist: 100 Set of uppercased HTTP method verbs that we should retry on. 101 102 By default, we only retry on methods which are considered to be 103 idempotent (multiple requests with the same parameters end with the 104 same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. 105 106 Set to a ``False`` value to retry on any verb. 107 108 :param iterable status_forcelist: 109 A set of integer HTTP status codes that we should force a retry on. 110 A retry is initiated if the request method is in ``method_whitelist`` 111 and the response status code is in ``status_forcelist``. 112 113 By default, this is disabled with ``None``. 114 115 :param float backoff_factor: 116 A backoff factor to apply between attempts after the second try 117 (most errors are resolved immediately by a second try without a 118 delay). urllib3 will sleep for:: 119 120 {backoff factor} * (2 ** ({number of total retries} - 1)) 121 122 seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep 123 for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer 124 than :attr:`Retry.BACKOFF_MAX`. 125 126 By default, backoff is disabled (set to 0). 127 128 :param bool raise_on_redirect: Whether, if the number of redirects is 129 exhausted, to raise a MaxRetryError, or to return a response with a 130 response code in the 3xx range. 131 132 :param bool raise_on_status: Similar meaning to ``raise_on_redirect``: 133 whether we should raise an exception, or return a response, 134 if status falls in ``status_forcelist`` range and retries have 135 been exhausted. 136 137 :param tuple history: The history of the request encountered during 138 each call to :meth:`~Retry.increment`. The list is in the order 139 the requests occurred. Each list item is of class :class:`RequestHistory`. 140 141 :param bool respect_retry_after_header: 142 Whether to respect Retry-After header on status codes defined as 143 :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not. 144 145 :param iterable remove_headers_on_redirect: 146 Sequence of headers to remove from the request when a response 147 indicating a redirect is returned before firing off the redirected 148 request. 149 """ 150 151 DEFAULT_METHOD_WHITELIST = frozenset( 152 ["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"] 153 ) 154 155 RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) 156 157 DEFAULT_REDIRECT_HEADERS_BLACKLIST = frozenset(["Authorization"]) 158 159 #: Maximum backoff time. 160 BACKOFF_MAX = 120 161 162 def __init__( 163 self, 164 total=10, 165 connect=None, 166 read=None, 167 redirect=None, 168 status=None, 169 method_whitelist=DEFAULT_METHOD_WHITELIST, 170 status_forcelist=None, 171 backoff_factor=0, 172 raise_on_redirect=True, 173 raise_on_status=True, 174 history=None, 175 respect_retry_after_header=True, 176 remove_headers_on_redirect=DEFAULT_REDIRECT_HEADERS_BLACKLIST, 177 ): 178 179 self.total = total 180 self.connect = connect 181 self.read = read 182 self.status = status 183 184 if redirect is False or total is False: 185 redirect = 0 186 raise_on_redirect = False 187 188 self.redirect = redirect 189 self.status_forcelist = status_forcelist or set() 190 self.method_whitelist = method_whitelist 191 self.backoff_factor = backoff_factor 192 self.raise_on_redirect = raise_on_redirect 193 self.raise_on_status = raise_on_status 194 self.history = history or tuple() 195 self.respect_retry_after_header = respect_retry_after_header 196 self.remove_headers_on_redirect = frozenset( 197 [h.lower() for h in remove_headers_on_redirect] 198 ) 199 200 def new(self, **kw): 201 params = dict( 202 total=self.total, 203 connect=self.connect, 204 read=self.read, 205 redirect=self.redirect, 206 status=self.status, 207 method_whitelist=self.method_whitelist, 208 status_forcelist=self.status_forcelist, 209 backoff_factor=self.backoff_factor, 210 raise_on_redirect=self.raise_on_redirect, 211 raise_on_status=self.raise_on_status, 212 history=self.history, 213 remove_headers_on_redirect=self.remove_headers_on_redirect, 214 respect_retry_after_header=self.respect_retry_after_header, 215 ) 216 params.update(kw) 217 return type(self)(**params) 218 219 @classmethod 220 def from_int(cls, retries, redirect=True, default=None): 221 """ Backwards-compatibility for the old retries format.""" 222 if retries is None: 223 retries = default if default is not None else cls.DEFAULT 224 225 if isinstance(retries, Retry): 226 return retries 227 228 redirect = bool(redirect) and None 229 new_retries = cls(retries, redirect=redirect) 230 log.debug("Converted retries value: %r -> %r", retries, new_retries) 231 return new_retries 232 233 def get_backoff_time(self): 234 """ Formula for computing the current backoff 235 236 :rtype: float 237 """ 238 # We want to consider only the last consecutive errors sequence (Ignore redirects). 239 consecutive_errors_len = len( 240 list( 241 takewhile(lambda x: x.redirect_location is None, reversed(self.history)) 242 ) 243 ) 244 if consecutive_errors_len <= 1: 245 return 0 246 247 backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1)) 248 return min(self.BACKOFF_MAX, backoff_value) 249 250 def parse_retry_after(self, retry_after): 251 # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 252 if re.match(r"^\s*[0-9]+\s*$", retry_after): 253 seconds = int(retry_after) 254 else: 255 retry_date_tuple = email.utils.parsedate(retry_after) 256 if retry_date_tuple is None: 257 raise InvalidHeader("Invalid Retry-After header: %s" % retry_after) 258 retry_date = time.mktime(retry_date_tuple) 259 seconds = retry_date - time.time() 260 261 if seconds < 0: 262 seconds = 0 263 264 return seconds 265 266 def get_retry_after(self, response): 267 """ Get the value of Retry-After in seconds. """ 268 269 retry_after = response.getheader("Retry-After") 270 271 if retry_after is None: 272 return None 273 274 return self.parse_retry_after(retry_after) 275 276 def sleep_for_retry(self, response=None): 277 retry_after = self.get_retry_after(response) 278 if retry_after: 279 time.sleep(retry_after) 280 return True 281 282 return False 283 284 def _sleep_backoff(self): 285 backoff = self.get_backoff_time() 286 if backoff <= 0: 287 return 288 time.sleep(backoff) 289 290 def sleep(self, response=None): 291 """ Sleep between retry attempts. 292 293 This method will respect a server's ``Retry-After`` response header 294 and sleep the duration of the time requested. If that is not present, it 295 will use an exponential backoff. By default, the backoff factor is 0 and 296 this method will return immediately. 297 """ 298 299 if self.respect_retry_after_header and response: 300 slept = self.sleep_for_retry(response) 301 if slept: 302 return 303 304 self._sleep_backoff() 305 306 def _is_connection_error(self, err): 307 """ Errors when we're fairly sure that the server did not receive the 308 request, so it should be safe to retry. 309 """ 310 if isinstance(err, ProxyError): 311 err = err.original_error 312 return isinstance(err, ConnectTimeoutError) 313 314 def _is_read_error(self, err): 315 """ Errors that occur after the request has been started, so we should 316 assume that the server began processing it. 317 """ 318 return isinstance(err, (ReadTimeoutError, ProtocolError)) 319 320 def _is_method_retryable(self, method): 321 """ Checks if a given HTTP method should be retried upon, depending if 322 it is included on the method whitelist. 323 """ 324 if self.method_whitelist and method.upper() not in self.method_whitelist: 325 return False 326 327 return True 328 329 def is_retry(self, method, status_code, has_retry_after=False): 330 """ Is this method/status code retryable? (Based on whitelists and control 331 variables such as the number of total retries to allow, whether to 332 respect the Retry-After header, whether this header is present, and 333 whether the returned status code is on the list of status codes to 334 be retried upon on the presence of the aforementioned header) 335 """ 336 if not self._is_method_retryable(method): 337 return False 338 339 if self.status_forcelist and status_code in self.status_forcelist: 340 return True 341 342 return ( 343 self.total 344 and self.respect_retry_after_header 345 and has_retry_after 346 and (status_code in self.RETRY_AFTER_STATUS_CODES) 347 ) 348 349 def is_exhausted(self): 350 """ Are we out of retries? """ 351 retry_counts = (self.total, self.connect, self.read, self.redirect, self.status) 352 retry_counts = list(filter(None, retry_counts)) 353 if not retry_counts: 354 return False 355 356 return min(retry_counts) < 0 357 358 def increment( 359 self, 360 method=None, 361 url=None, 362 response=None, 363 error=None, 364 _pool=None, 365 _stacktrace=None, 366 ): 367 """ Return a new Retry object with incremented retry counters. 368 369 :param response: A response object, or None, if the server did not 370 return a response. 371 :type response: :class:`~urllib3.response.HTTPResponse` 372 :param Exception error: An error encountered during the request, or 373 None if the response was received successfully. 374 375 :return: A new ``Retry`` object. 376 """ 377 if self.total is False and error: 378 # Disabled, indicate to re-raise the error. 379 raise six.reraise(type(error), error, _stacktrace) 380 381 total = self.total 382 if total is not None: 383 total -= 1 384 385 connect = self.connect 386 read = self.read 387 redirect = self.redirect 388 status_count = self.status 389 cause = "unknown" 390 status = None 391 redirect_location = None 392 393 if error and self._is_connection_error(error): 394 # Connect retry? 395 if connect is False: 396 raise six.reraise(type(error), error, _stacktrace) 397 elif connect is not None: 398 connect -= 1 399 400 elif error and self._is_read_error(error): 401 # Read retry? 402 if read is False or not self._is_method_retryable(method): 403 raise six.reraise(type(error), error, _stacktrace) 404 elif read is not None: 405 read -= 1 406 407 elif response and response.get_redirect_location(): 408 # Redirect retry? 409 if redirect is not None: 410 redirect -= 1 411 cause = "too many redirects" 412 redirect_location = response.get_redirect_location() 413 status = response.status 414 415 else: 416 # Incrementing because of a server error like a 500 in 417 # status_forcelist and a the given method is in the whitelist 418 cause = ResponseError.GENERIC_ERROR 419 if response and response.status: 420 if status_count is not None: 421 status_count -= 1 422 cause = ResponseError.SPECIFIC_ERROR.format(status_code=response.status) 423 status = response.status 424 425 history = self.history + ( 426 RequestHistory(method, url, error, status, redirect_location), 427 ) 428 429 new_retry = self.new( 430 total=total, 431 connect=connect, 432 read=read, 433 redirect=redirect, 434 status=status_count, 435 history=history, 436 ) 437 438 if new_retry.is_exhausted(): 439 raise MaxRetryError(_pool, url, error or ResponseError(cause)) 440 441 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) 442 443 return new_retry 444 445 def __repr__(self): 446 return ( 447 "{cls.__name__}(total={self.total}, connect={self.connect}, " 448 "read={self.read}, redirect={self.redirect}, status={self.status})" 449 ).format(cls=type(self), self=self) 450 451 452# For backwards compatibility (equivalent to pre-v1.9): 453Retry.DEFAULT = Retry(3) 454