1# -*- coding: utf-8 -*- 2"""oauthlib.common ~~~~~~~~~~~~~~ 3 4This module provides data structures and utilities common 5to all implementations of OAuth. 6""" 7from __future__ import absolute_import, unicode_literals 8 9import collections 10import datetime 11import logging 12import re 13import sys 14import time 15 16try: 17 from secrets import randbits 18 from secrets import SystemRandom 19except ImportError: 20 from random import getrandbits as randbits 21 from random import SystemRandom 22try: 23 from urllib import quote as _quote 24 from urllib import unquote as _unquote 25 from urllib import urlencode as _urlencode 26except ImportError: 27 from urllib.parse import quote as _quote 28 from urllib.parse import unquote as _unquote 29 from urllib.parse import urlencode as _urlencode 30try: 31 import urlparse 32except ImportError: 33 import urllib.parse as urlparse 34 35UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz' 36 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 37 '0123456789') 38 39CLIENT_ID_CHARACTER_SET = (r' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN' 40 'OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}') 41 42SANITIZE_PATTERN = re.compile(r'([^&;]*(?:password|token)[^=]*=)[^&;]+', 43 re.IGNORECASE) 44INVALID_HEX_PATTERN = re.compile(r'%[^0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]') 45 46always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 47 'abcdefghijklmnopqrstuvwxyz' 48 '0123456789' 49 '_.-') 50 51log = logging.getLogger('oauthlib') 52 53PY3 = sys.version_info[0] == 3 54 55if PY3: 56 unicode_type = str 57else: 58 unicode_type = unicode 59 60 61# 'safe' must be bytes (Python 2.6 requires bytes, other versions allow either) 62def quote(s, safe=b'/'): 63 s = s.encode('utf-8') if isinstance(s, unicode_type) else s 64 s = _quote(s, safe) 65 # PY3 always returns unicode. PY2 may return either, depending on whether 66 # it had to modify the string. 67 if isinstance(s, bytes): 68 s = s.decode('utf-8') 69 return s 70 71 72def unquote(s): 73 s = _unquote(s) 74 # PY3 always returns unicode. PY2 seems to always return what you give it, 75 # which differs from quote's behavior. Just to be safe, make sure it is 76 # unicode before we return. 77 if isinstance(s, bytes): 78 s = s.decode('utf-8') 79 return s 80 81 82def urlencode(params): 83 utf8_params = encode_params_utf8(params) 84 urlencoded = _urlencode(utf8_params) 85 if isinstance(urlencoded, unicode_type): # PY3 returns unicode 86 return urlencoded 87 else: 88 return urlencoded.decode('utf-8') 89 90 91def encode_params_utf8(params): 92 """Ensures that all parameters in a list of 2-element tuples are encoded to 93 94 bytestrings using UTF-8 95 """ 96 encoded = [] 97 for k, v in params: 98 encoded.append((k.encode('utf-8') if isinstance(k, unicode_type) else k, 99 v.encode('utf-8') if isinstance(v, unicode_type) else v)) 100 return encoded 101 102 103def decode_params_utf8(params): 104 """Ensures that all parameters in a list of 2-element tuples are decoded to 105 106 unicode using UTF-8. 107 """ 108 decoded = [] 109 for k, v in params: 110 decoded.append((k.decode('utf-8') if isinstance(k, bytes) else k, 111 v.decode('utf-8') if isinstance(v, bytes) else v)) 112 return decoded 113 114 115urlencoded = set(always_safe) | set('=&;:%+~,*@!()/?\'$') 116 117 118def urldecode(query): 119 """Decode a query string in x-www-form-urlencoded format into a sequence 120 121 of two-element tuples. 122 123 Unlike urlparse.parse_qsl(..., strict_parsing=True) urldecode will enforce 124 correct formatting of the query string by validation. If validation fails 125 a ValueError will be raised. urllib.parse_qsl will only raise errors if 126 any of name-value pairs omits the equals sign. 127 """ 128 # Check if query contains invalid characters 129 if query and not set(query) <= urlencoded: 130 error = ('Error trying to decode a non urlencoded string. ' 131 'Found invalid characters: %s ' 132 "in the string: '%s'. " 133 'Please ensure the request/response body is ' 134 'x-www-form-urlencoded.') 135 raise ValueError(error % (set(query) - urlencoded, query)) 136 137 # Check for correctly hex encoded values using a regular expression 138 # All encoded values begin with % followed by two hex characters 139 # correct = %00, %A0, %0A, %FF 140 # invalid = %G0, %5H, %PO 141 if INVALID_HEX_PATTERN.search(query): 142 raise ValueError('Invalid hex encoding in query string.') 143 144 # We encode to utf-8 prior to parsing because parse_qsl behaves 145 # differently on unicode input in python 2 and 3. 146 # Python 2.7 147 # >>> urlparse.parse_qsl(u'%E5%95%A6%E5%95%A6') 148 # u'\xe5\x95\xa6\xe5\x95\xa6' 149 # Python 2.7, non unicode input gives the same 150 # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6') 151 # '\xe5\x95\xa6\xe5\x95\xa6' 152 # but now we can decode it to unicode 153 # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6').decode('utf-8') 154 # u'\u5566\u5566' 155 # Python 3.3 however 156 # >>> urllib.parse.parse_qsl(u'%E5%95%A6%E5%95%A6') 157 # u'\u5566\u5566' 158 query = query.encode('utf-8') if not PY3 and isinstance( 159 query, unicode_type) else query 160 # We want to allow queries such as "c2" whereas urlparse.parse_qsl 161 # with the strict_parsing flag will not. 162 params = urlparse.parse_qsl(query, keep_blank_values=True) 163 164 # unicode all the things 165 return decode_params_utf8(params) 166 167 168def extract_params(raw): 169 """Extract parameters and return them as a list of 2-tuples. 170 171 Will successfully extract parameters from urlencoded query strings, 172 dicts, or lists of 2-tuples. Empty strings/dicts/lists will return an 173 empty list of parameters. Any other input will result in a return 174 value of None. 175 """ 176 if isinstance(raw, bytes) or isinstance(raw, unicode_type): 177 try: 178 params = urldecode(raw) 179 except ValueError: 180 params = None 181 elif hasattr(raw, '__iter__'): 182 try: 183 dict(raw) 184 except ValueError: 185 params = None 186 except TypeError: 187 params = None 188 else: 189 params = list(raw.items() if isinstance(raw, dict) else raw) 190 params = decode_params_utf8(params) 191 else: 192 params = None 193 194 return params 195 196 197def generate_nonce(): 198 """Generate pseudorandom nonce that is unlikely to repeat. 199 200 Per `section 3.3`_ of the OAuth 1 RFC 5849 spec. 201 Per `section 3.2.1`_ of the MAC Access Authentication spec. 202 203 A random 64-bit number is appended to the epoch timestamp for both 204 randomness and to decrease the likelihood of collisions. 205 206 .. _`section 3.2.1`: 207 https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 208 .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 209 """ 210 return unicode_type(unicode_type(randbits(64)) + generate_timestamp()) 211 212 213def generate_timestamp(): 214 """Get seconds since epoch (UTC). 215 216 Per `section 3.3`_ of the OAuth 1 RFC 5849 spec. 217 Per `section 3.2.1`_ of the MAC Access Authentication spec. 218 219 .. _`section 3.2.1`: 220 https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 221 .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 222 """ 223 return unicode_type(int(time.time())) 224 225 226def generate_token(length=30, chars=UNICODE_ASCII_CHARACTER_SET): 227 """Generates a non-guessable OAuth token 228 229 OAuth (1 and 2) does not specify the format of tokens except that they 230 should be strings of random characters. Tokens should not be guessable 231 and entropy when generating the random characters is important. Which is 232 why SystemRandom is used instead of the default random.choice method. 233 """ 234 rand = SystemRandom() 235 return ''.join(rand.choice(chars) for x in range(length)) 236 237 238def generate_signed_token(private_pem, request): 239 import jwt 240 241 now = datetime.datetime.utcnow() 242 243 claims = { 244 'scope': request.scope, 245 'exp': now + datetime.timedelta(seconds=request.expires_in) 246 } 247 248 claims.update(request.claims) 249 250 token = jwt.encode(claims, private_pem, 'RS256') 251 token = to_unicode(token, 'UTF-8') 252 253 return token 254 255 256def verify_signed_token(public_pem, token): 257 import jwt 258 259 return jwt.decode(token, public_pem, algorithms=['RS256']) 260 261 262def generate_client_id(length=30, chars=CLIENT_ID_CHARACTER_SET): 263 """Generates an OAuth client_id 264 265 OAuth 2 specify the format of client_id in 266 https://tools.ietf.org/html/rfc6749#appendix-A. 267 """ 268 return generate_token(length, chars) 269 270 271def add_params_to_qs(query, params): 272 """Extend a query with a list of two-tuples.""" 273 if isinstance(params, dict): 274 params = params.items() 275 queryparams = urlparse.parse_qsl(query, keep_blank_values=True) 276 queryparams.extend(params) 277 return urlencode(queryparams) 278 279 280def add_params_to_uri(uri, params, fragment=False): 281 """Add a list of two-tuples to the uri query components.""" 282 sch, net, path, par, query, fra = urlparse.urlparse(uri) 283 if fragment: 284 fra = add_params_to_qs(fra, params) 285 else: 286 query = add_params_to_qs(query, params) 287 return urlparse.urlunparse((sch, net, path, par, query, fra)) 288 289 290def safe_string_equals(a, b): 291 """ Near-constant time string comparison. 292 293 Used in order to avoid timing attacks on sensitive information such 294 as secret keys during request verification (`rootLabs`_). 295 296 .. _`rootLabs`: 297 http://rdist.root.org/2010/01/07/timing-independent-array-comparison/ 298 299 """ 300 if len(a) != len(b): 301 return False 302 303 result = 0 304 for x, y in zip(a, b): 305 result |= ord(x) ^ ord(y) 306 return result == 0 307 308 309def to_unicode(data, encoding='UTF-8'): 310 """Convert a number of different types of objects to unicode.""" 311 if isinstance(data, unicode_type): 312 return data 313 314 if isinstance(data, bytes): 315 return unicode_type(data, encoding=encoding) 316 317 if hasattr(data, '__iter__'): 318 try: 319 dict(data) 320 except TypeError: 321 pass 322 except ValueError: 323 # Assume it's a one dimensional data structure 324 return (to_unicode(i, encoding) for i in data) 325 else: 326 # We support 2.6 which lacks dict comprehensions 327 if hasattr(data, 'items'): 328 data = data.items() 329 return dict( 330 ((to_unicode(k, encoding), to_unicode(v, encoding)) for k, v in data)) 331 332 return data 333 334 335class CaseInsensitiveDict(dict): 336 """Basic case insensitive dict with strings only keys.""" 337 338 proxy = {} 339 340 def __init__(self, data): 341 self.proxy = dict((k.lower(), k) for k in data) 342 for k in data: 343 self[k] = data[k] 344 345 def __contains__(self, k): 346 return k.lower() in self.proxy 347 348 def __delitem__(self, k): 349 key = self.proxy[k.lower()] 350 super(CaseInsensitiveDict, self).__delitem__(key) 351 del self.proxy[k.lower()] 352 353 def __getitem__(self, k): 354 key = self.proxy[k.lower()] 355 return super(CaseInsensitiveDict, self).__getitem__(key) 356 357 def get(self, k, default=None): 358 return self[k] if k in self else default 359 360 def __setitem__(self, k, v): 361 super(CaseInsensitiveDict, self).__setitem__(k, v) 362 self.proxy[k.lower()] = k 363 364 def update(self, *args, **kwargs): 365 super(CaseInsensitiveDict, self).update(*args, **kwargs) 366 for k in dict(*args, **kwargs): 367 self.proxy[k.lower()] = k 368 369 370class Request(object): 371 """A malleable representation of a signable HTTP request. 372 373 Body argument may contain any data, but parameters will only be decoded if 374 they are one of: 375 376 * urlencoded query string 377 * dict 378 * list of 2-tuples 379 380 Anything else will be treated as raw body data to be passed through 381 unmolested. 382 """ 383 384 def __init__(self, 385 uri, 386 http_method='GET', 387 body=None, 388 headers=None, 389 encoding='utf-8'): 390 # Convert to unicode using encoding if given, else assume unicode 391 encode = lambda x: to_unicode(x, encoding) if encoding else x 392 393 self.uri = encode(uri) 394 self.http_method = encode(http_method) 395 self.headers = CaseInsensitiveDict(encode(headers or {})) 396 self.body = encode(body) 397 self.decoded_body = extract_params(self.body) 398 self.oauth_params = [] 399 self.validator_log = {} 400 401 self._params = { 402 'access_token': None, 403 'client': None, 404 'client_id': None, 405 'client_secret': None, 406 'code': None, 407 'code_challenge': None, 408 'code_challenge_method': None, 409 'code_verifier': None, 410 'extra_credentials': None, 411 'grant_type': None, 412 'redirect_uri': None, 413 'refresh_token': None, 414 'request_token': None, 415 'response_type': None, 416 'scope': None, 417 'scopes': None, 418 'state': None, 419 'token': None, 420 'user': None, 421 'token_type_hint': None, 422 423 # OpenID Connect 424 'response_mode': None, 425 'nonce': None, 426 'display': None, 427 'prompt': None, 428 'claims': None, 429 'max_age': None, 430 'ui_locales': None, 431 'id_token_hint': None, 432 'login_hint': None, 433 'acr_values': None 434 } 435 self._params.update(dict(urldecode(self.uri_query))) 436 self._params.update(dict(self.decoded_body or [])) 437 438 def __getattr__(self, name): 439 if name in self._params: 440 return self._params[name] 441 else: 442 raise AttributeError(name) 443 444 def __repr__(self): 445 body = self.body 446 headers = self.headers.copy() 447 if body: 448 body = SANITIZE_PATTERN.sub('\1<SANITIZED>', str(body)) 449 if 'Authorization' in headers: 450 headers['Authorization'] = '<SANITIZED>' 451 return ('<oauthlib.Request url="%s", http_method="%s", headers="%s", ' 452 'body="%s">') % ( 453 self.uri, self.http_method, headers, body) 454 455 @property 456 def uri_query(self): 457 return urlparse.urlparse(self.uri).query 458 459 @property 460 def uri_query_params(self): 461 if not self.uri_query: 462 return [] 463 return urlparse.parse_qsl( 464 self.uri_query, keep_blank_values=True, strict_parsing=True) 465 466 @property 467 def duplicate_params(self): 468 seen_keys = collections.defaultdict(int) 469 all_keys = (p[0] for p in (self.decoded_body or []) + self.uri_query_params) 470 for k in all_keys: 471 seen_keys[k] += 1 472 return [k for k, c in seen_keys.items() if c > 1] 473