1"""passlib.utils -- helpers for writing password hashes"""
2#=============================================================================
3# imports
4#=============================================================================
5from passlib.utils.compat import JYTHON
6# core
7from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError
8from base64 import b64encode, b64decode
9try:
10    from collections.abc import Sequence
11    from collections.abc import Iterable
12except ImportError:
13    # py2 compat
14    from collections import Sequence
15    from collections import Iterable
16from codecs import lookup as _lookup_codec
17from functools import update_wrapper
18import itertools
19import inspect
20import logging; log = logging.getLogger(__name__)
21import math
22import os
23import sys
24import random
25import re
26if JYTHON: # pragma: no cover -- runtime detection
27    # Jython 2.5.2 lacks stringprep module -
28    # see http://bugs.jython.org/issue1758320
29    try:
30        import stringprep
31    except ImportError:
32        stringprep = None
33        _stringprep_missing_reason = "not present under Jython"
34else:
35    import stringprep
36import time
37if stringprep:
38    import unicodedata
39try:
40    import threading
41except ImportError:
42    # module optional before py37
43    threading = None
44import timeit
45import types
46from warnings import warn
47# site
48# pkg
49from passlib.utils.binary import (
50    # [remove these aliases in 2.0]
51    BASE64_CHARS, AB64_CHARS, HASH64_CHARS, BCRYPT_CHARS,
52    Base64Engine, LazyBase64Engine, h64, h64big, bcrypt64,
53    ab64_encode, ab64_decode, b64s_encode, b64s_decode
54)
55from passlib.utils.decor import (
56    # [remove these aliases in 2.0]
57    deprecated_function,
58    deprecated_method,
59    memoized_property,
60    classproperty,
61    hybrid_method,
62)
63from passlib.exc import ExpectedStringError, ExpectedTypeError
64from passlib.utils.compat import (add_doc, join_bytes, join_byte_values,
65                                  join_byte_elems, irange, imap, PY3, u,
66                                  join_unicode, unicode, byte_elem_value, nextgetter,
67                                  unicode_or_str, unicode_or_bytes_types,
68                                  get_method_function, suppress_cause, PYPY)
69# local
70__all__ = [
71    # constants
72    'JYTHON',
73    'sys_bits',
74    'unix_crypt_schemes',
75    'rounds_cost_values',
76
77    # unicode helpers
78    'consteq',
79    'saslprep',
80
81    # bytes helpers
82    "xor_bytes",
83    "render_bytes",
84
85    # encoding helpers
86    'is_same_codec',
87    'is_ascii_safe',
88    'to_bytes',
89    'to_unicode',
90    'to_native_str',
91
92    # host OS
93    'has_crypt',
94    'test_crypt',
95    'safe_crypt',
96    'tick',
97
98    # randomness
99    'rng',
100    'getrandbytes',
101    'getrandstr',
102    'generate_password',
103
104    # object type / interface tests
105    'is_crypt_handler',
106    'is_crypt_context',
107    'has_rounds_info',
108    'has_salt_info',
109]
110
111#=============================================================================
112# constants
113#=============================================================================
114
115# bitsize of system architecture (32 or 64)
116sys_bits = int(math.log(sys.maxsize if PY3 else sys.maxint, 2) + 1.5)
117
118# list of hashes algs supported by crypt() on at least one OS.
119# XXX: move to .registry for passlib 2.0?
120unix_crypt_schemes = [
121    "sha512_crypt", "sha256_crypt",
122    "sha1_crypt", "bcrypt",
123    "md5_crypt",
124    # "bsd_nthash",
125    "bsdi_crypt", "des_crypt",
126    ]
127
128# list of rounds_cost constants
129rounds_cost_values = [ "linear", "log2" ]
130
131# legacy import, will be removed in 1.8
132from passlib.exc import MissingBackendError
133
134# internal helpers
135_BEMPTY = b''
136_UEMPTY = u("")
137_USPACE = u(" ")
138
139# maximum password size which passlib will allow; see exc.PasswordSizeError
140MAX_PASSWORD_SIZE = int(os.environ.get("PASSLIB_MAX_PASSWORD_SIZE") or 4096)
141
142#=============================================================================
143# type helpers
144#=============================================================================
145
146class SequenceMixin(object):
147    """
148    helper which lets result object act like a fixed-length sequence.
149    subclass just needs to provide :meth:`_as_tuple()`.
150    """
151    def _as_tuple(self):
152        raise NotImplementedError("implement in subclass")
153
154    def __repr__(self):
155        return repr(self._as_tuple())
156
157    def __getitem__(self, idx):
158        return self._as_tuple()[idx]
159
160    def __iter__(self):
161        return iter(self._as_tuple())
162
163    def __len__(self):
164        return len(self._as_tuple())
165
166    def __eq__(self, other):
167        return self._as_tuple() == other
168
169    def __ne__(self, other):
170        return not self.__eq__(other)
171
172if PY3:
173    # getargspec() is deprecated, use this under py3.
174    # even though it's a lot more awkward to get basic info :|
175
176    _VAR_KEYWORD = inspect.Parameter.VAR_KEYWORD
177    _VAR_ANY_SET = set([_VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL])
178
179    def accepts_keyword(func, key):
180        """test if function accepts specified keyword"""
181        params = inspect.signature(get_method_function(func)).parameters
182        if not params:
183            return False
184        arg = params.get(key)
185        if arg and arg.kind not in _VAR_ANY_SET:
186            return True
187        # XXX: annoying what we have to do to determine if VAR_KWDS in use.
188        return params[list(params)[-1]].kind == _VAR_KEYWORD
189
190else:
191
192    def accepts_keyword(func, key):
193        """test if function accepts specified keyword"""
194        spec = inspect.getargspec(get_method_function(func))
195        return key in spec.args or spec.keywords is not None
196
197def update_mixin_classes(target, add=None, remove=None, append=False,
198                         before=None, after=None, dryrun=False):
199    """
200    helper to update mixin classes installed in target class.
201
202    :param target:
203        target class whose bases will be modified.
204
205    :param add:
206        class / classes to install into target's base class list.
207
208    :param remove:
209        class / classes to remove from target's base class list.
210
211    :param append:
212        by default, prepends mixins to front of list.
213        if True, appends to end of list instead.
214
215    :param after:
216        optionally make sure all mixins are inserted after
217        this class / classes.
218
219    :param before:
220        optionally make sure all mixins are inserted before
221        this class / classes.
222
223    :param dryrun:
224        optionally perform all calculations / raise errors,
225        but don't actually modify the class.
226    """
227    if isinstance(add, type):
228        add = [add]
229
230    bases = list(target.__bases__)
231
232    # strip out requested mixins
233    if remove:
234        if isinstance(remove, type):
235            remove = [remove]
236        for mixin in remove:
237            if add and mixin in add:
238                continue
239            if mixin in bases:
240                bases.remove(mixin)
241
242    # add requested mixins
243    if add:
244        for mixin in add:
245            # if mixin already present (explicitly or not), leave alone
246            if any(issubclass(base, mixin) for base in bases):
247                continue
248
249            # determine insertion point
250            if append:
251                for idx, base in enumerate(bases):
252                    if issubclass(mixin, base):
253                        # don't insert mixin after one of it's own bases
254                        break
255                    if before and issubclass(base, before):
256                        # don't insert mixin after any <before> classes.
257                        break
258                else:
259                    # append to end
260                    idx = len(bases)
261            elif after:
262                for end_idx, base in enumerate(reversed(bases)):
263                    if issubclass(base, after):
264                        # don't insert mixin before any <after> classes.
265                        idx = len(bases) - end_idx
266                        assert bases[idx-1] == base
267                        break
268                else:
269                    idx = 0
270            else:
271                # insert at start
272                idx = 0
273
274            # insert mixin
275            bases.insert(idx, mixin)
276
277    # modify class
278    if not dryrun:
279        target.__bases__ = tuple(bases)
280
281#=============================================================================
282# collection helpers
283#=============================================================================
284def batch(source, size):
285    """
286    split iterable into chunks of <size> elements.
287    """
288    if size < 1:
289        raise ValueError("size must be positive integer")
290    if isinstance(source, Sequence):
291        end = len(source)
292        i = 0
293        while i < end:
294            n = i + size
295            yield source[i:n]
296            i = n
297    elif isinstance(source, Iterable):
298        itr = iter(source)
299        while True:
300            chunk_itr = itertools.islice(itr, size)
301            try:
302                first = next(chunk_itr)
303            except StopIteration:
304                break
305            yield itertools.chain((first,), chunk_itr)
306    else:
307        raise TypeError("source must be iterable")
308
309#=============================================================================
310# unicode helpers
311#=============================================================================
312
313# XXX: should this be moved to passlib.crypto, or compat backports?
314
315def consteq(left, right):
316    """Check two strings/bytes for equality.
317
318    This function uses an approach designed to prevent
319    timing analysis, making it appropriate for cryptography.
320    a and b must both be of the same type: either str (ASCII only),
321    or any type that supports the buffer protocol (e.g. bytes).
322
323    Note: If a and b are of different lengths, or if an error occurs,
324    a timing attack could theoretically reveal information about the
325    types and lengths of a and b--but not their values.
326    """
327    # NOTE:
328    # resources & discussions considered in the design of this function:
329    #   hmac timing attack --
330    #       http://rdist.root.org/2009/05/28/timing-attack-in-google-keyczar-library/
331    #   python developer discussion surrounding similar function --
332    #       http://bugs.python.org/issue15061
333    #       http://bugs.python.org/issue14955
334
335    # validate types
336    if isinstance(left, unicode):
337        if not isinstance(right, unicode):
338            raise TypeError("inputs must be both unicode or both bytes")
339        is_py3_bytes = False
340    elif isinstance(left, bytes):
341        if not isinstance(right, bytes):
342            raise TypeError("inputs must be both unicode or both bytes")
343        is_py3_bytes = PY3
344    else:
345        raise TypeError("inputs must be both unicode or both bytes")
346
347    # do size comparison.
348    # NOTE: the double-if construction below is done deliberately, to ensure
349    # the same number of operations (including branches) is performed regardless
350    # of whether left & right are the same size.
351    same_size = (len(left) == len(right))
352    if same_size:
353        # if sizes are the same, setup loop to perform actual check of contents.
354        tmp = left
355        result = 0
356    if not same_size:
357        # if sizes aren't the same, set 'result' so equality will fail regardless
358        # of contents. then, to ensure we do exactly 'len(right)' iterations
359        # of the loop, just compare 'right' against itself.
360        tmp = right
361        result = 1
362
363    # run constant-time string comparision
364    # TODO: use izip instead (but first verify it's faster than zip for this case)
365    if is_py3_bytes:
366        for l,r in zip(tmp, right):
367            result |= l ^ r
368    else:
369        for l,r in zip(tmp, right):
370            result |= ord(l) ^ ord(r)
371    return result == 0
372
373# keep copy of this around since stdlib's version throws error on non-ascii chars in unicode strings.
374# our version does, but suffers from some underlying VM issues.  but something is better than
375# nothing for plaintext hashes, which need this.  everything else should use consteq(),
376# since the stdlib one is going to be as good / better in the general case.
377str_consteq = consteq
378
379try:
380    # for py3.3 and up, use the stdlib version
381    from hmac import compare_digest as consteq
382except ImportError:
383    pass
384
385    # TODO: could check for cryptography package's version,
386    #       but only operates on bytes, so would need a wrapper,
387    #       or separate consteq() into a unicode & a bytes variant.
388    # from cryptography.hazmat.primitives.constant_time import bytes_eq as consteq
389
390def splitcomma(source, sep=","):
391    """split comma-separated string into list of elements,
392    stripping whitespace.
393    """
394    source = source.strip()
395    if source.endswith(sep):
396        source = source[:-1]
397    if not source:
398        return []
399    return [ elem.strip() for elem in source.split(sep) ]
400
401def saslprep(source, param="value"):
402    """Normalizes unicode strings using SASLPrep stringprep profile.
403
404    The SASLPrep profile is defined in :rfc:`4013`.
405    It provides a uniform scheme for normalizing unicode usernames
406    and passwords before performing byte-value sensitive operations
407    such as hashing. Among other things, it normalizes diacritic
408    representations, removes non-printing characters, and forbids
409    invalid characters such as ``\\n``. Properly internationalized
410    applications should run user passwords through this function
411    before hashing.
412
413    :arg source:
414        unicode string to normalize & validate
415
416    :param param:
417        Optional noun identifying source parameter in error messages
418        (Defaults to the string ``"value"``). This is mainly useful to make the caller's error
419        messages make more sense contextually.
420
421    :raises ValueError:
422        if any characters forbidden by the SASLPrep profile are encountered.
423
424    :raises TypeError:
425        if input is not :class:`!unicode`
426
427    :returns:
428        normalized unicode string
429
430    .. note::
431
432        This function is not available under Jython,
433        as the Jython stdlib is missing the :mod:`!stringprep` module
434        (`Jython issue 1758320 <http://bugs.jython.org/issue1758320>`_).
435
436    .. versionadded:: 1.6
437    """
438    # saslprep - http://tools.ietf.org/html/rfc4013
439    # stringprep - http://tools.ietf.org/html/rfc3454
440    #              http://docs.python.org/library/stringprep.html
441
442    # validate type
443    # XXX: support bytes (e.g. run through want_unicode)?
444    #      might be easier to just integrate this into cryptcontext.
445    if not isinstance(source, unicode):
446        raise TypeError("input must be unicode string, not %s" %
447                        (type(source),))
448
449    # mapping stage
450    #   - map non-ascii spaces to U+0020 (stringprep C.1.2)
451    #   - strip 'commonly mapped to nothing' chars (stringprep B.1)
452    in_table_c12 = stringprep.in_table_c12
453    in_table_b1 = stringprep.in_table_b1
454    data = join_unicode(
455        _USPACE if in_table_c12(c) else c
456        for c in source
457        if not in_table_b1(c)
458        )
459
460    # normalize to KC form
461    data = unicodedata.normalize('NFKC', data)
462    if not data:
463        return _UEMPTY
464
465    # check for invalid bi-directional strings.
466    # stringprep requires the following:
467    #   - chars in C.8 must be prohibited.
468    #   - if any R/AL chars in string:
469    #       - no L chars allowed in string
470    #       - first and last must be R/AL chars
471    # this checks if start/end are R/AL chars. if so, prohibited loop
472    # will forbid all L chars. if not, prohibited loop will forbid all
473    # R/AL chars instead. in both cases, prohibited loop takes care of C.8.
474    is_ral_char = stringprep.in_table_d1
475    if is_ral_char(data[0]):
476        if not is_ral_char(data[-1]):
477            raise ValueError("malformed bidi sequence in " + param)
478        # forbid L chars within R/AL sequence.
479        is_forbidden_bidi_char = stringprep.in_table_d2
480    else:
481        # forbid R/AL chars if start not setup correctly; L chars allowed.
482        is_forbidden_bidi_char = is_ral_char
483
484    # check for prohibited output - stringprep tables A.1, B.1, C.1.2, C.2 - C.9
485    in_table_a1 = stringprep.in_table_a1
486    in_table_c21_c22 = stringprep.in_table_c21_c22
487    in_table_c3 = stringprep.in_table_c3
488    in_table_c4 = stringprep.in_table_c4
489    in_table_c5 = stringprep.in_table_c5
490    in_table_c6 = stringprep.in_table_c6
491    in_table_c7 = stringprep.in_table_c7
492    in_table_c8 = stringprep.in_table_c8
493    in_table_c9 = stringprep.in_table_c9
494    for c in data:
495        # check for chars mapping stage should have removed
496        assert not in_table_b1(c), "failed to strip B.1 in mapping stage"
497        assert not in_table_c12(c), "failed to replace C.1.2 in mapping stage"
498
499        # check for forbidden chars
500        if in_table_a1(c):
501            raise ValueError("unassigned code points forbidden in " + param)
502        if in_table_c21_c22(c):
503            raise ValueError("control characters forbidden in " + param)
504        if in_table_c3(c):
505            raise ValueError("private use characters forbidden in " + param)
506        if in_table_c4(c):
507            raise ValueError("non-char code points forbidden in " + param)
508        if in_table_c5(c):
509            raise ValueError("surrogate codes forbidden in " + param)
510        if in_table_c6(c):
511            raise ValueError("non-plaintext chars forbidden in " + param)
512        if in_table_c7(c):
513            # XXX: should these have been caught by normalize?
514            # if so, should change this to an assert
515            raise ValueError("non-canonical chars forbidden in " + param)
516        if in_table_c8(c):
517            raise ValueError("display-modifying / deprecated chars "
518                             "forbidden in" + param)
519        if in_table_c9(c):
520            raise ValueError("tagged characters forbidden in " + param)
521
522        # do bidi constraint check chosen by bidi init, above
523        if is_forbidden_bidi_char(c):
524            raise ValueError("forbidden bidi character in " + param)
525
526    return data
527
528# replace saslprep() with stub when stringprep is missing
529if stringprep is None: # pragma: no cover -- runtime detection
530    def saslprep(source, param="value"):
531        """stub for saslprep()"""
532        raise NotImplementedError("saslprep() support requires the 'stringprep' "
533                            "module, which is " + _stringprep_missing_reason)
534
535#=============================================================================
536# bytes helpers
537#=============================================================================
538def render_bytes(source, *args):
539    """Peform ``%`` formating using bytes in a uniform manner across Python 2/3.
540
541    This function is motivated by the fact that
542    :class:`bytes` instances do not support ``%`` or ``{}`` formatting under Python 3.
543    This function is an attempt to provide a replacement:
544    it converts everything to unicode (decoding bytes instances as ``latin-1``),
545    performs the required formatting, then encodes the result to ``latin-1``.
546
547    Calling ``render_bytes(source, *args)`` should function roughly the same as
548    ``source % args`` under Python 2.
549
550    .. todo::
551        python >= 3.5 added back limited support for bytes %,
552        can revisit when 3.3/3.4 is dropped.
553    """
554    if isinstance(source, bytes):
555        source = source.decode("latin-1")
556    result = source % tuple(arg.decode("latin-1") if isinstance(arg, bytes)
557                            else arg for arg in args)
558    return result.encode("latin-1")
559
560if PY3:
561    # new in py32
562    def bytes_to_int(value):
563        return int.from_bytes(value, 'big')
564    def int_to_bytes(value, count):
565        return value.to_bytes(count, 'big')
566else:
567    # XXX: can any of these be sped up?
568    from binascii import hexlify, unhexlify
569    def bytes_to_int(value):
570        return int(hexlify(value),16)
571    def int_to_bytes(value, count):
572        return unhexlify(('%%0%dx' % (count<<1)) % value)
573
574add_doc(bytes_to_int, "decode byte string as single big-endian integer")
575add_doc(int_to_bytes, "encode integer as single big-endian byte string")
576
577def xor_bytes(left, right):
578    """Perform bitwise-xor of two byte strings (must be same size)"""
579    return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left))
580
581def repeat_string(source, size):
582    """
583    repeat or truncate <source> string, so it has length <size>
584    """
585    mult = 1 + (size - 1) // len(source)
586    return (source * mult)[:size]
587
588
589def utf8_repeat_string(source, size):
590    """
591    variant of repeat_string() which truncates to nearest UTF8 boundary.
592    """
593    mult = 1 + (size - 1) // len(source)
594    return utf8_truncate(source * mult, size)
595
596
597_BNULL = b"\x00"
598_UNULL = u("\x00")
599
600def right_pad_string(source, size, pad=None):
601    """right-pad or truncate <source> string, so it has length <size>"""
602    cur = len(source)
603    if size > cur:
604        if pad is None:
605            pad = _UNULL if isinstance(source, unicode) else _BNULL
606        return source+pad*(size-cur)
607    else:
608        return source[:size]
609
610
611def utf8_truncate(source, index):
612    """
613    helper to truncate UTF8 byte string to nearest character boundary ON OR AFTER <index>.
614    returned prefix will always have length of at least <index>, and will stop on the
615    first byte that's not a UTF8 continuation byte (128 - 191 inclusive).
616    since utf8 should never take more than 4 bytes to encode known unicode values,
617    we can stop after ``index+3`` is reached.
618
619    :param bytes source:
620    :param int index:
621    :rtype: bytes
622    """
623    # general approach:
624    #
625    # * UTF8 bytes will have high two bits (0xC0) as one of:
626    #   00 -- ascii char
627    #   01 -- ascii char
628    #   10 -- continuation of multibyte char
629    #   11 -- start of multibyte char.
630    #   thus we can cut on anything where high bits aren't "10" (0x80; continuation byte)
631    #
632    # * UTF8 characters SHOULD always be 1 to 4 bytes, though they may be unbounded.
633    #   so we just keep going until first non-continuation byte is encountered, or end of str.
634    #   this should work predictably even for malformed/non UTF8 inputs.
635
636    if not isinstance(source, bytes):
637        raise ExpectedTypeError(source, bytes, "source")
638
639    # validate index
640    end = len(source)
641    if index < 0:
642        index = max(0, index + end)
643    if index >= end:
644        return source
645
646    # can stop search after 4 bytes, won't ever have longer utf8 sequence.
647    end = min(index + 3, end)
648
649    # loop until we find non-continuation byte
650    while index < end:
651        if byte_elem_value(source[index]) & 0xC0 != 0x80:
652            # found single-char byte, or start-char byte.
653            break
654        # else: found continuation byte.
655        index += 1
656    else:
657        assert index == end
658
659    # truncate at final index
660    result = source[:index]
661
662    def sanity_check():
663        # try to decode source
664        try:
665            text = source.decode("utf-8")
666        except UnicodeDecodeError:
667            # if source isn't valid utf8, byte level match is enough
668            return True
669
670        # validate that result was cut on character boundary
671        assert text.startswith(result.decode("utf-8"))
672        return True
673
674    assert sanity_check()
675
676    return result
677
678#=============================================================================
679# encoding helpers
680#=============================================================================
681_ASCII_TEST_BYTES = b"\x00\n aA:#!\x7f"
682_ASCII_TEST_UNICODE = _ASCII_TEST_BYTES.decode("ascii")
683
684def is_ascii_codec(codec):
685    """Test if codec is compatible with 7-bit ascii (e.g. latin-1, utf-8; but not utf-16)"""
686    return _ASCII_TEST_UNICODE.encode(codec) == _ASCII_TEST_BYTES
687
688def is_same_codec(left, right):
689    """Check if two codec names are aliases for same codec"""
690    if left == right:
691        return True
692    if not (left and right):
693        return False
694    return _lookup_codec(left).name == _lookup_codec(right).name
695
696_B80 = b'\x80'[0]
697_U80 = u('\x80')
698def is_ascii_safe(source):
699    """Check if string (bytes or unicode) contains only 7-bit ascii"""
700    r = _B80 if isinstance(source, bytes) else _U80
701    return all(c < r for c in source)
702
703def to_bytes(source, encoding="utf-8", param="value", source_encoding=None):
704    """Helper to normalize input to bytes.
705
706    :arg source:
707        Source bytes/unicode to process.
708
709    :arg encoding:
710        Target encoding (defaults to ``"utf-8"``).
711
712    :param param:
713        Optional name of variable/noun to reference when raising errors
714
715    :param source_encoding:
716        If this is specified, and the source is bytes,
717        the source will be transcoded from *source_encoding* to *encoding*
718        (via unicode).
719
720    :raises TypeError: if source is not unicode or bytes.
721
722    :returns:
723        * unicode strings will be encoded using *encoding*, and returned.
724        * if *source_encoding* is not specified, byte strings will be
725          returned unchanged.
726        * if *source_encoding* is specified, byte strings will be transcoded
727          to *encoding*.
728    """
729    assert encoding
730    if isinstance(source, bytes):
731        if source_encoding and not is_same_codec(source_encoding, encoding):
732            return source.decode(source_encoding).encode(encoding)
733        else:
734            return source
735    elif isinstance(source, unicode):
736        return source.encode(encoding)
737    else:
738        raise ExpectedStringError(source, param)
739
740def to_unicode(source, encoding="utf-8", param="value"):
741    """Helper to normalize input to unicode.
742
743    :arg source:
744        source bytes/unicode to process.
745
746    :arg encoding:
747        encoding to use when decoding bytes instances.
748
749    :param param:
750        optional name of variable/noun to reference when raising errors.
751
752    :raises TypeError: if source is not unicode or bytes.
753
754    :returns:
755        * returns unicode strings unchanged.
756        * returns bytes strings decoded using *encoding*
757    """
758    assert encoding
759    if isinstance(source, unicode):
760        return source
761    elif isinstance(source, bytes):
762        return source.decode(encoding)
763    else:
764        raise ExpectedStringError(source, param)
765
766if PY3:
767    def to_native_str(source, encoding="utf-8", param="value"):
768        if isinstance(source, bytes):
769            return source.decode(encoding)
770        elif isinstance(source, unicode):
771            return source
772        else:
773            raise ExpectedStringError(source, param)
774else:
775    def to_native_str(source, encoding="utf-8", param="value"):
776        if isinstance(source, bytes):
777            return source
778        elif isinstance(source, unicode):
779            return source.encode(encoding)
780        else:
781            raise ExpectedStringError(source, param)
782
783add_doc(to_native_str,
784    """Take in unicode or bytes, return native string.
785
786    Python 2: encodes unicode using specified encoding, leaves bytes alone.
787    Python 3: leaves unicode alone, decodes bytes using specified encoding.
788
789    :raises TypeError: if source is not unicode or bytes.
790
791    :arg source:
792        source unicode or bytes string.
793
794    :arg encoding:
795        encoding to use when encoding unicode or decoding bytes.
796        this defaults to ``"utf-8"``.
797
798    :param param:
799        optional name of variable/noun to reference when raising errors.
800
801    :returns: :class:`str` instance
802    """)
803
804@deprecated_function(deprecated="1.6", removed="1.7")
805def to_hash_str(source, encoding="ascii"): # pragma: no cover -- deprecated & unused
806    """deprecated, use to_native_str() instead"""
807    return to_native_str(source, encoding, param="hash")
808
809_true_set = set("true t yes y on 1 enable enabled".split())
810_false_set = set("false f no n off 0 disable disabled".split())
811_none_set = set(["", "none"])
812
813def as_bool(value, none=None, param="boolean"):
814    """
815    helper to convert value to boolean.
816    recognizes strings such as "true", "false"
817    """
818    assert none in [True, False, None]
819    if isinstance(value, unicode_or_bytes_types):
820        clean = value.lower().strip()
821        if clean in _true_set:
822            return True
823        if clean in _false_set:
824            return False
825        if clean in _none_set:
826            return none
827        raise ValueError("unrecognized %s value: %r" % (param, value))
828    elif isinstance(value, bool):
829        return value
830    elif value is None:
831        return none
832    else:
833        return bool(value)
834
835#=============================================================================
836# host OS helpers
837#=============================================================================
838
839def is_safe_crypt_input(value):
840    """
841    UT helper --
842    test if value is safe to pass to crypt.crypt();
843    under PY3, can't pass non-UTF8 bytes to crypt.crypt.
844    """
845    if crypt_accepts_bytes or not isinstance(value, bytes):
846        return True
847    try:
848        value.decode("utf-8")
849        return True
850    except UnicodeDecodeError:
851        return False
852
853try:
854    from crypt import crypt as _crypt
855except ImportError: # pragma: no cover
856    _crypt = None
857    has_crypt = False
858    crypt_accepts_bytes = False
859    crypt_needs_lock = False
860    _safe_crypt_lock = None
861    def safe_crypt(secret, hash):
862        return None
863else:
864    has_crypt = True
865    _NULL = '\x00'
866
867    # XXX: replace this with lazy-evaluated bug detection?
868    if threading and PYPY and (7, 2, 0) <= sys.pypy_version_info <= (7, 3, 3):
869        #: internal lock used to wrap crypt() calls.
870        #: WARNING: if non-passlib code invokes crypt(), this lock won't be enough!
871        _safe_crypt_lock = threading.Lock()
872
873        #: detect if crypt.crypt() needs a thread lock around calls.
874        crypt_needs_lock = True
875
876    else:
877        from passlib.utils.compat import nullcontext
878        _safe_crypt_lock = nullcontext()
879        crypt_needs_lock = False
880
881    # some crypt() variants will return various constant strings when
882    # an invalid/unrecognized config string is passed in; instead of
883    # returning NULL / None. examples include ":", ":0", "*0", etc.
884    # safe_crypt() returns None for any string starting with one of the
885    # chars in this string...
886    _invalid_prefixes = u("*:!")
887
888    if PY3:
889
890        # * pypy3 (as of v7.3.1) has a crypt which accepts bytes, or ASCII-only unicode.
891        # * whereas CPython3 (as of v3.9) has a crypt which doesn't take bytes,
892        #   but accepts ANY unicode (which it always encodes to UTF8).
893        crypt_accepts_bytes = True
894        try:
895            _crypt(b"\xEE", "xx")
896        except TypeError:
897            # CPython will throw TypeError
898            crypt_accepts_bytes = False
899        except:  # no pragma
900            # don't care about other errors this might throw,
901            # just want to see if we get past initial type-coercion step.
902            pass
903
904        def safe_crypt(secret, hash):
905            if crypt_accepts_bytes:
906                # PyPy3 -- all bytes accepted, but unicode encoded to ASCII,
907                # so handling that ourselves.
908                if isinstance(secret, unicode):
909                    secret = secret.encode("utf-8")
910                if _BNULL in secret:
911                    raise ValueError("null character in secret")
912                if isinstance(hash, unicode):
913                    hash = hash.encode("ascii")
914            else:
915                # CPython3's crypt() doesn't take bytes, only unicode; unicode which is then
916                # encoding using utf-8 before passing to the C-level crypt().
917                # so we have to decode the secret.
918                if isinstance(secret, bytes):
919                    orig = secret
920                    try:
921                        secret = secret.decode("utf-8")
922                    except UnicodeDecodeError:
923                        return None
924                    # sanity check it encodes back to original byte string,
925                    # otherwise when crypt() does it's encoding, it'll hash the wrong bytes!
926                    assert secret.encode("utf-8") == orig, \
927                                "utf-8 spec says this can't happen!"
928                if _NULL in secret:
929                    raise ValueError("null character in secret")
930                if isinstance(hash, bytes):
931                    hash = hash.decode("ascii")
932            try:
933                with _safe_crypt_lock:
934                    result = _crypt(secret, hash)
935            except OSError:
936                # new in py39 -- per https://bugs.python.org/issue39289,
937                # crypt() now throws OSError for various things, mainly unknown hash formats
938                # translating that to None for now (may revise safe_crypt behavior in future)
939                return None
940            # NOTE: per issue 113, crypt() may return bytes in some odd cases.
941            #       assuming it should still return an ASCII hash though,
942            #       or there's a bigger issue at hand.
943            if isinstance(result, bytes):
944                result = result.decode("ascii")
945            if not result or result[0] in _invalid_prefixes:
946                return None
947            return result
948    else:
949
950        #: see feature-detection in PY3 fork above
951        crypt_accepts_bytes = True
952
953        # Python 2 crypt handler
954        def safe_crypt(secret, hash):
955            if isinstance(secret, unicode):
956                secret = secret.encode("utf-8")
957            if _NULL in secret:
958                raise ValueError("null character in secret")
959            if isinstance(hash, unicode):
960                hash = hash.encode("ascii")
961            with _safe_crypt_lock:
962                result = _crypt(secret, hash)
963            if not result:
964                return None
965            result = result.decode("ascii")
966            if result[0] in _invalid_prefixes:
967                return None
968            return result
969
970add_doc(safe_crypt, """Wrapper around stdlib's crypt.
971
972    This is a wrapper around stdlib's :func:`!crypt.crypt`, which attempts
973    to provide uniform behavior across Python 2 and 3.
974
975    :arg secret:
976        password, as bytes or unicode (unicode will be encoded as ``utf-8``).
977
978    :arg hash:
979        hash or config string, as ascii bytes or unicode.
980
981    :returns:
982        resulting hash as ascii unicode; or ``None`` if the password
983        couldn't be hashed due to one of the issues:
984
985        * :func:`crypt()` not available on platform.
986
987        * Under Python 3, if *secret* is specified as bytes,
988          it must be use ``utf-8`` or it can't be passed
989          to :func:`crypt()`.
990
991        * Some OSes will return ``None`` if they don't recognize
992          the algorithm being used (though most will simply fall
993          back to des-crypt).
994
995        * Some OSes will return an error string if the input config
996          is recognized but malformed; current code converts these to ``None``
997          as well.
998    """)
999
1000def test_crypt(secret, hash):
1001    """check if :func:`crypt.crypt` supports specific hash
1002    :arg secret: password to test
1003    :arg hash: known hash of password to use as reference
1004    :returns: True or False
1005    """
1006    # safe_crypt() always returns unicode, which means that for py3,
1007    # 'hash' can't be bytes, or "== hash" will never be True.
1008    # under py2 unicode & str(bytes) will compare fine;
1009    # so just enforcing "unicode_or_str" limitation
1010    assert isinstance(hash, unicode_or_str), \
1011        "hash must be unicode_or_str, got %s" % type(hash)
1012    assert hash, "hash must be non-empty"
1013    return safe_crypt(secret, hash) == hash
1014
1015timer = timeit.default_timer
1016# legacy alias, will be removed in passlib 2.0
1017tick = timer
1018
1019def parse_version(source):
1020    """helper to parse version string"""
1021    m = re.search(r"(\d+(?:\.\d+)+)", source)
1022    if m:
1023        return tuple(int(elem) for elem in m.group(1).split("."))
1024    return None
1025
1026#=============================================================================
1027# randomness
1028#=============================================================================
1029
1030#------------------------------------------------------------------------
1031# setup rng for generating salts
1032#------------------------------------------------------------------------
1033
1034# NOTE:
1035# generating salts (e.g. h64_gensalt, below) doesn't require cryptographically
1036# strong randomness. it just requires enough range of possible outputs
1037# that making a rainbow table is too costly. so it should be ok to
1038# fall back on python's builtin mersenne twister prng, as long as it's seeded each time
1039# this module is imported, using a couple of minor entropy sources.
1040
1041try:
1042    os.urandom(1)
1043    has_urandom = True
1044except NotImplementedError: # pragma: no cover
1045    has_urandom = False
1046
1047def genseed(value=None):
1048    """generate prng seed value from system resources"""
1049    from hashlib import sha512
1050    if hasattr(value, "getstate") and hasattr(value, "getrandbits"):
1051        # caller passed in RNG as seed value
1052        try:
1053            value = value.getstate()
1054        except NotImplementedError:
1055            # this method throws error for e.g. SystemRandom instances,
1056            # so fall back to extracting 4k of state
1057            value = value.getrandbits(1 << 15)
1058    text = u("%s %s %s %.15f %.15f %s") % (
1059        # if caller specified a seed value, mix it in
1060        value,
1061
1062        # add current process id
1063        # NOTE: not available in some environments, e.g. GAE
1064        os.getpid() if hasattr(os, "getpid") else None,
1065
1066        # id of a freshly created object.
1067        # (at least 1 byte of which should be hard to predict)
1068        id(object()),
1069
1070        # the current time, to whatever precision os uses
1071        time.time(),
1072        tick(),
1073
1074        # if urandom available, might as well mix some bytes in.
1075        os.urandom(32).decode("latin-1") if has_urandom else 0,
1076        )
1077    # hash it all up and return it as int/long
1078    return int(sha512(text.encode("utf-8")).hexdigest(), 16)
1079
1080if has_urandom:
1081    rng = random.SystemRandom()
1082else: # pragma: no cover -- runtime detection
1083    # NOTE: to reseed use ``rng.seed(genseed(rng))``
1084    # XXX: could reseed on every call
1085    rng = random.Random(genseed())
1086
1087#------------------------------------------------------------------------
1088# some rng helpers
1089#------------------------------------------------------------------------
1090def getrandbytes(rng, count):
1091    """return byte-string containing *count* number of randomly generated bytes, using specified rng"""
1092    # NOTE: would be nice if this was present in stdlib Random class
1093
1094    ###just in case rng provides this...
1095    ##meth = getattr(rng, "getrandbytes", None)
1096    ##if meth:
1097    ##    return meth(count)
1098
1099    if not count:
1100        return _BEMPTY
1101    def helper():
1102        # XXX: break into chunks for large number of bits?
1103        value = rng.getrandbits(count<<3)
1104        i = 0
1105        while i < count:
1106            yield value & 0xff
1107            value >>= 3
1108            i += 1
1109    return join_byte_values(helper())
1110
1111def getrandstr(rng, charset, count):
1112    """return string containing *count* number of chars/bytes, whose elements are drawn from specified charset, using specified rng"""
1113    # NOTE: tests determined this is 4x faster than rng.sample(),
1114    # which is why that's not being used here.
1115
1116    # check alphabet & count
1117    if count < 0:
1118        raise ValueError("count must be >= 0")
1119    letters = len(charset)
1120    if letters == 0:
1121        raise ValueError("alphabet must not be empty")
1122    if letters == 1:
1123        return charset * count
1124
1125    # get random value, and write out to buffer
1126    def helper():
1127        # XXX: break into chunks for large number of letters?
1128        value = rng.randrange(0, letters**count)
1129        i = 0
1130        while i < count:
1131            yield charset[value % letters]
1132            value //= letters
1133            i += 1
1134
1135    if isinstance(charset, unicode):
1136        return join_unicode(helper())
1137    else:
1138        return join_byte_elems(helper())
1139
1140_52charset = '2346789ABCDEFGHJKMNPQRTUVWXYZabcdefghjkmnpqrstuvwxyz'
1141
1142@deprecated_function(deprecated="1.7", removed="2.0",
1143                     replacement="passlib.pwd.genword() / passlib.pwd.genphrase()")
1144def generate_password(size=10, charset=_52charset):
1145    """generate random password using given length & charset
1146
1147    :param size:
1148        size of password.
1149
1150    :param charset:
1151        optional string specified set of characters to draw from.
1152
1153        the default charset contains all normal alphanumeric characters,
1154        except for the characters ``1IiLl0OoS5``, which were omitted
1155        due to their visual similarity.
1156
1157    :returns: :class:`!str` containing randomly generated password.
1158
1159    .. note::
1160
1161        Using the default character set, on a OS with :class:`!SystemRandom` support,
1162        this function should generate passwords with 5.7 bits of entropy per character.
1163    """
1164    return getrandstr(rng, charset, size)
1165
1166#=============================================================================
1167# object type / interface tests
1168#=============================================================================
1169_handler_attrs = (
1170        "name",
1171        "setting_kwds", "context_kwds",
1172        "verify", "hash", "identify",
1173        )
1174
1175def is_crypt_handler(obj):
1176    """check if object follows the :ref:`password-hash-api`"""
1177    # XXX: change to use isinstance(obj, PasswordHash) under py26+?
1178    return all(hasattr(obj, name) for name in _handler_attrs)
1179
1180_context_attrs = (
1181        "needs_update",
1182        "genconfig", "genhash",
1183        "verify", "encrypt", "identify",
1184        )
1185
1186def is_crypt_context(obj):
1187    """check if object appears to be a :class:`~passlib.context.CryptContext` instance"""
1188    # XXX: change to use isinstance(obj, CryptContext)?
1189    return all(hasattr(obj, name) for name in _context_attrs)
1190
1191##def has_many_backends(handler):
1192##    "check if handler provides multiple baceknds"
1193##    # NOTE: should also provide get_backend(), .has_backend(), and .backends attr
1194##    return hasattr(handler, "set_backend")
1195
1196def has_rounds_info(handler):
1197    """check if handler provides the optional :ref:`rounds information <rounds-attributes>` attributes"""
1198    return ('rounds' in handler.setting_kwds and
1199            getattr(handler, "min_rounds", None) is not None)
1200
1201def has_salt_info(handler):
1202    """check if handler provides the optional :ref:`salt information <salt-attributes>` attributes"""
1203    return ('salt' in handler.setting_kwds and
1204            getattr(handler, "min_salt_size", None) is not None)
1205
1206##def has_raw_salt(handler):
1207##    "check if handler takes in encoded salt as unicode (False), or decoded salt as bytes (True)"
1208##    sc = getattr(handler, "salt_chars", None)
1209##    if sc is None:
1210##        return None
1211##    elif isinstance(sc, unicode):
1212##        return False
1213##    elif isinstance(sc, bytes):
1214##        return True
1215##    else:
1216##        raise TypeError("handler.salt_chars must be None/unicode/bytes")
1217
1218#=============================================================================
1219# eof
1220#=============================================================================
1221