1# -*- coding: utf-8 -*-
2"""oauthlib.common ~~~~~~~~~~~~~~
3
4This module provides data structures and utilities common
5to all implementations of OAuth.
6"""
7from __future__ import absolute_import, unicode_literals
8
9import collections
10import datetime
11import logging
12import re
13import sys
14import time
15
16try:
17  from secrets import randbits
18  from secrets import SystemRandom
19except ImportError:
20  from random import getrandbits as randbits
21  from random import SystemRandom
22try:
23  from urllib import quote as _quote
24  from urllib import unquote as _unquote
25  from urllib import urlencode as _urlencode
26except ImportError:
27  from urllib.parse import quote as _quote
28  from urllib.parse import unquote as _unquote
29  from urllib.parse import urlencode as _urlencode
30try:
31  import urlparse
32except ImportError:
33  import urllib.parse as urlparse
34
35UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz'
36                               'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
37                               '0123456789')
38
39CLIENT_ID_CHARACTER_SET = (r' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN'
40                           'OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}')
41
42SANITIZE_PATTERN = re.compile(r'([^&;]*(?:password|token)[^=]*=)[^&;]+',
43                              re.IGNORECASE)
44INVALID_HEX_PATTERN = re.compile(r'%[^0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]')
45
46always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
47               'abcdefghijklmnopqrstuvwxyz'
48               '0123456789'
49               '_.-')
50
51log = logging.getLogger('oauthlib')
52
53PY3 = sys.version_info[0] == 3
54
55if PY3:
56  unicode_type = str
57else:
58  unicode_type = unicode
59
60
61# 'safe' must be bytes (Python 2.6 requires bytes, other versions allow either)
62def quote(s, safe=b'/'):
63  s = s.encode('utf-8') if isinstance(s, unicode_type) else s
64  s = _quote(s, safe)
65  # PY3 always returns unicode.  PY2 may return either, depending on whether
66  # it had to modify the string.
67  if isinstance(s, bytes):
68    s = s.decode('utf-8')
69  return s
70
71
72def unquote(s):
73  s = _unquote(s)
74  # PY3 always returns unicode.  PY2 seems to always return what you give it,
75  # which differs from quote's behavior.  Just to be safe, make sure it is
76  # unicode before we return.
77  if isinstance(s, bytes):
78    s = s.decode('utf-8')
79  return s
80
81
82def urlencode(params):
83  utf8_params = encode_params_utf8(params)
84  urlencoded = _urlencode(utf8_params)
85  if isinstance(urlencoded, unicode_type):  # PY3 returns unicode
86    return urlencoded
87  else:
88    return urlencoded.decode('utf-8')
89
90
91def encode_params_utf8(params):
92  """Ensures that all parameters in a list of 2-element tuples are encoded to
93
94    bytestrings using UTF-8
95    """
96  encoded = []
97  for k, v in params:
98    encoded.append((k.encode('utf-8') if isinstance(k, unicode_type) else k,
99                    v.encode('utf-8') if isinstance(v, unicode_type) else v))
100  return encoded
101
102
103def decode_params_utf8(params):
104  """Ensures that all parameters in a list of 2-element tuples are decoded to
105
106    unicode using UTF-8.
107    """
108  decoded = []
109  for k, v in params:
110    decoded.append((k.decode('utf-8') if isinstance(k, bytes) else k,
111                    v.decode('utf-8') if isinstance(v, bytes) else v))
112  return decoded
113
114
115urlencoded = set(always_safe) | set('=&;:%+~,*@!()/?\'$')
116
117
118def urldecode(query):
119  """Decode a query string in x-www-form-urlencoded format into a sequence
120
121    of two-element tuples.
122
123    Unlike urlparse.parse_qsl(..., strict_parsing=True) urldecode will enforce
124    correct formatting of the query string by validation. If validation fails
125    a ValueError will be raised. urllib.parse_qsl will only raise errors if
126    any of name-value pairs omits the equals sign.
127    """
128  # Check if query contains invalid characters
129  if query and not set(query) <= urlencoded:
130    error = ('Error trying to decode a non urlencoded string. '
131             'Found invalid characters: %s '
132             "in the string: '%s'. "
133             'Please ensure the request/response body is '
134             'x-www-form-urlencoded.')
135    raise ValueError(error % (set(query) - urlencoded, query))
136
137  # Check for correctly hex encoded values using a regular expression
138  # All encoded values begin with % followed by two hex characters
139  # correct = %00, %A0, %0A, %FF
140  # invalid = %G0, %5H, %PO
141  if INVALID_HEX_PATTERN.search(query):
142    raise ValueError('Invalid hex encoding in query string.')
143
144  # We encode to utf-8 prior to parsing because parse_qsl behaves
145  # differently on unicode input in python 2 and 3.
146  # Python 2.7
147  # >>> urlparse.parse_qsl(u'%E5%95%A6%E5%95%A6')
148  # u'\xe5\x95\xa6\xe5\x95\xa6'
149  # Python 2.7, non unicode input gives the same
150  # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6')
151  # '\xe5\x95\xa6\xe5\x95\xa6'
152  # but now we can decode it to unicode
153  # >>> urlparse.parse_qsl('%E5%95%A6%E5%95%A6').decode('utf-8')
154  # u'\u5566\u5566'
155  # Python 3.3 however
156  # >>> urllib.parse.parse_qsl(u'%E5%95%A6%E5%95%A6')
157  # u'\u5566\u5566'
158  query = query.encode('utf-8') if not PY3 and isinstance(
159      query, unicode_type) else query
160  # We want to allow queries such as "c2" whereas urlparse.parse_qsl
161  # with the strict_parsing flag will not.
162  params = urlparse.parse_qsl(query, keep_blank_values=True)
163
164  # unicode all the things
165  return decode_params_utf8(params)
166
167
168def extract_params(raw):
169  """Extract parameters and return them as a list of 2-tuples.
170
171    Will successfully extract parameters from urlencoded query strings,
172    dicts, or lists of 2-tuples. Empty strings/dicts/lists will return an
173    empty list of parameters. Any other input will result in a return
174    value of None.
175    """
176  if isinstance(raw, bytes) or isinstance(raw, unicode_type):
177    try:
178      params = urldecode(raw)
179    except ValueError:
180      params = None
181  elif hasattr(raw, '__iter__'):
182    try:
183      dict(raw)
184    except ValueError:
185      params = None
186    except TypeError:
187      params = None
188    else:
189      params = list(raw.items() if isinstance(raw, dict) else raw)
190      params = decode_params_utf8(params)
191  else:
192    params = None
193
194  return params
195
196
197def generate_nonce():
198  """Generate pseudorandom nonce that is unlikely to repeat.
199
200    Per `section 3.3`_ of the OAuth 1 RFC 5849 spec.
201    Per `section 3.2.1`_ of the MAC Access Authentication spec.
202
203    A random 64-bit number is appended to the epoch timestamp for both
204    randomness and to decrease the likelihood of collisions.
205
206    .. _`section 3.2.1`:
207    https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1
208    .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3
209    """
210  return unicode_type(unicode_type(randbits(64)) + generate_timestamp())
211
212
213def generate_timestamp():
214  """Get seconds since epoch (UTC).
215
216    Per `section 3.3`_ of the OAuth 1 RFC 5849 spec.
217    Per `section 3.2.1`_ of the MAC Access Authentication spec.
218
219    .. _`section 3.2.1`:
220    https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1
221    .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3
222    """
223  return unicode_type(int(time.time()))
224
225
226def generate_token(length=30, chars=UNICODE_ASCII_CHARACTER_SET):
227  """Generates a non-guessable OAuth token
228
229    OAuth (1 and 2) does not specify the format of tokens except that they
230    should be strings of random characters. Tokens should not be guessable
231    and entropy when generating the random characters is important. Which is
232    why SystemRandom is used instead of the default random.choice method.
233    """
234  rand = SystemRandom()
235  return ''.join(rand.choice(chars) for x in range(length))
236
237
238def generate_signed_token(private_pem, request):
239  import jwt
240
241  now = datetime.datetime.utcnow()
242
243  claims = {
244      'scope': request.scope,
245      'exp': now + datetime.timedelta(seconds=request.expires_in)
246  }
247
248  claims.update(request.claims)
249
250  token = jwt.encode(claims, private_pem, 'RS256')
251  token = to_unicode(token, 'UTF-8')
252
253  return token
254
255
256def verify_signed_token(public_pem, token):
257  import jwt
258
259  return jwt.decode(token, public_pem, algorithms=['RS256'])
260
261
262def generate_client_id(length=30, chars=CLIENT_ID_CHARACTER_SET):
263  """Generates an OAuth client_id
264
265    OAuth 2 specify the format of client_id in
266    https://tools.ietf.org/html/rfc6749#appendix-A.
267    """
268  return generate_token(length, chars)
269
270
271def add_params_to_qs(query, params):
272  """Extend a query with a list of two-tuples."""
273  if isinstance(params, dict):
274    params = params.items()
275  queryparams = urlparse.parse_qsl(query, keep_blank_values=True)
276  queryparams.extend(params)
277  return urlencode(queryparams)
278
279
280def add_params_to_uri(uri, params, fragment=False):
281  """Add a list of two-tuples to the uri query components."""
282  sch, net, path, par, query, fra = urlparse.urlparse(uri)
283  if fragment:
284    fra = add_params_to_qs(fra, params)
285  else:
286    query = add_params_to_qs(query, params)
287  return urlparse.urlunparse((sch, net, path, par, query, fra))
288
289
290def safe_string_equals(a, b):
291  """ Near-constant time string comparison.
292
293    Used in order to avoid timing attacks on sensitive information such
294    as secret keys during request verification (`rootLabs`_).
295
296    .. _`rootLabs`:
297    http://rdist.root.org/2010/01/07/timing-independent-array-comparison/
298
299    """
300  if len(a) != len(b):
301    return False
302
303  result = 0
304  for x, y in zip(a, b):
305    result |= ord(x) ^ ord(y)
306  return result == 0
307
308
309def to_unicode(data, encoding='UTF-8'):
310  """Convert a number of different types of objects to unicode."""
311  if isinstance(data, unicode_type):
312    return data
313
314  if isinstance(data, bytes):
315    return unicode_type(data, encoding=encoding)
316
317  if hasattr(data, '__iter__'):
318    try:
319      dict(data)
320    except TypeError:
321      pass
322    except ValueError:
323      # Assume it's a one dimensional data structure
324      return (to_unicode(i, encoding) for i in data)
325    else:
326      # We support 2.6 which lacks dict comprehensions
327      if hasattr(data, 'items'):
328        data = data.items()
329      return dict(
330          ((to_unicode(k, encoding), to_unicode(v, encoding)) for k, v in data))
331
332  return data
333
334
335class CaseInsensitiveDict(dict):
336  """Basic case insensitive dict with strings only keys."""
337
338  proxy = {}
339
340  def __init__(self, data):
341    self.proxy = dict((k.lower(), k) for k in data)
342    for k in data:
343      self[k] = data[k]
344
345  def __contains__(self, k):
346    return k.lower() in self.proxy
347
348  def __delitem__(self, k):
349    key = self.proxy[k.lower()]
350    super(CaseInsensitiveDict, self).__delitem__(key)
351    del self.proxy[k.lower()]
352
353  def __getitem__(self, k):
354    key = self.proxy[k.lower()]
355    return super(CaseInsensitiveDict, self).__getitem__(key)
356
357  def get(self, k, default=None):
358    return self[k] if k in self else default
359
360  def __setitem__(self, k, v):
361    super(CaseInsensitiveDict, self).__setitem__(k, v)
362    self.proxy[k.lower()] = k
363
364  def update(self, *args, **kwargs):
365    super(CaseInsensitiveDict, self).update(*args, **kwargs)
366    for k in dict(*args, **kwargs):
367      self.proxy[k.lower()] = k
368
369
370class Request(object):
371  """A malleable representation of a signable HTTP request.
372
373    Body argument may contain any data, but parameters will only be decoded if
374    they are one of:
375
376    * urlencoded query string
377    * dict
378    * list of 2-tuples
379
380    Anything else will be treated as raw body data to be passed through
381    unmolested.
382    """
383
384  def __init__(self,
385               uri,
386               http_method='GET',
387               body=None,
388               headers=None,
389               encoding='utf-8'):
390    # Convert to unicode using encoding if given, else assume unicode
391    encode = lambda x: to_unicode(x, encoding) if encoding else x
392
393    self.uri = encode(uri)
394    self.http_method = encode(http_method)
395    self.headers = CaseInsensitiveDict(encode(headers or {}))
396    self.body = encode(body)
397    self.decoded_body = extract_params(self.body)
398    self.oauth_params = []
399    self.validator_log = {}
400
401    self._params = {
402        'access_token': None,
403        'client': None,
404        'client_id': None,
405        'client_secret': None,
406        'code': None,
407        'code_challenge': None,
408        'code_challenge_method': None,
409        'code_verifier': None,
410        'extra_credentials': None,
411        'grant_type': None,
412        'redirect_uri': None,
413        'refresh_token': None,
414        'request_token': None,
415        'response_type': None,
416        'scope': None,
417        'scopes': None,
418        'state': None,
419        'token': None,
420        'user': None,
421        'token_type_hint': None,
422
423        # OpenID Connect
424        'response_mode': None,
425        'nonce': None,
426        'display': None,
427        'prompt': None,
428        'claims': None,
429        'max_age': None,
430        'ui_locales': None,
431        'id_token_hint': None,
432        'login_hint': None,
433        'acr_values': None
434    }
435    self._params.update(dict(urldecode(self.uri_query)))
436    self._params.update(dict(self.decoded_body or []))
437
438  def __getattr__(self, name):
439    if name in self._params:
440      return self._params[name]
441    else:
442      raise AttributeError(name)
443
444  def __repr__(self):
445    body = self.body
446    headers = self.headers.copy()
447    if body:
448      body = SANITIZE_PATTERN.sub('\1<SANITIZED>', str(body))
449    if 'Authorization' in headers:
450      headers['Authorization'] = '<SANITIZED>'
451    return ('<oauthlib.Request url="%s", http_method="%s", headers="%s", '
452            'body="%s">') % (
453        self.uri, self.http_method, headers, body)
454
455  @property
456  def uri_query(self):
457    return urlparse.urlparse(self.uri).query
458
459  @property
460  def uri_query_params(self):
461    if not self.uri_query:
462      return []
463    return urlparse.parse_qsl(
464        self.uri_query, keep_blank_values=True, strict_parsing=True)
465
466  @property
467  def duplicate_params(self):
468    seen_keys = collections.defaultdict(int)
469    all_keys = (p[0] for p in (self.decoded_body or []) + self.uri_query_params)
470    for k in all_keys:
471      seen_keys[k] += 1
472    return [k for k, c in seen_keys.items() if c > 1]
473