1"""passlib.crypto.digest -- crytographic helpers used by the password hashes in passlib
2
3.. versionadded:: 1.7
4"""
5#=============================================================================
6# imports
7#=============================================================================
8from __future__ import division
9# core
10import hashlib
11import logging; log = logging.getLogger(__name__)
12try:
13    # new in py3.4
14    from hashlib import pbkdf2_hmac as _stdlib_pbkdf2_hmac
15    if _stdlib_pbkdf2_hmac.__module__ == "hashlib":
16        # builtin pure-python backends are slightly faster than stdlib's pure python fallback,
17        # so only using stdlib's version if it's backed by openssl's pbkdf2_hmac()
18        log.debug("ignoring pure-python hashlib.pbkdf2_hmac()")
19        _stdlib_pbkdf2_hmac = None
20except ImportError:
21    _stdlib_pbkdf2_hmac = None
22import re
23import os
24from struct import Struct
25from warnings import warn
26# site
27try:
28    # https://pypi.python.org/pypi/fastpbkdf2/
29    from fastpbkdf2 import pbkdf2_hmac as _fast_pbkdf2_hmac
30except ImportError:
31    _fast_pbkdf2_hmac = None
32# pkg
33from passlib import exc
34from passlib.utils import join_bytes, to_native_str, join_byte_values, to_bytes, \
35                          SequenceMixin, as_bool
36from passlib.utils.compat import irange, int_types, unicode_or_bytes_types, PY3, error_from
37from passlib.utils.decor import memoized_property
38# local
39__all__ = [
40    # hash utils
41    "lookup_hash",
42    "HashInfo",
43    "norm_hash_name",
44
45    # hmac utils
46    "compile_hmac",
47
48    # kdfs
49    "pbkdf1",
50    "pbkdf2_hmac",
51]
52
53#=============================================================================
54# generic constants
55#=============================================================================
56
57#: max 32-bit value
58MAX_UINT32 = (1 << 32) - 1
59
60#: max 64-bit value
61MAX_UINT64 = (1 << 64) - 1
62
63#=============================================================================
64# hash utils
65#=============================================================================
66
67#: list of known hash names, used by lookup_hash()'s _norm_hash_name() helper
68_known_hash_names = [
69    # format: (hashlib/ssl name, iana name or standin, other known aliases ...)
70
71    #----------------------------------------------------
72    # hashes with official IANA-assigned names
73    # (as of 2012-03 - http://www.iana.org/assignments/hash-function-text-names)
74    #----------------------------------------------------
75    ("md2", "md2"),  # NOTE: openssl dropped md2 support in v1.0.0
76    ("md5", "md5"),
77    ("sha1", "sha-1"),
78    ("sha224", "sha-224", "sha2-224"),
79    ("sha256", "sha-256", "sha2-256"),
80    ("sha384", "sha-384", "sha2-384"),
81    ("sha512", "sha-512", "sha2-512"),
82
83    # TODO: add sha3 to this table.
84
85    #----------------------------------------------------
86    # hashlib/ssl-supported hashes without official IANA names,
87    # (hopefully-) compatible stand-ins have been chosen.
88    #----------------------------------------------------
89
90    ("blake2b", "blake-2b"),
91    ("blake2s", "blake-2s"),
92    ("md4", "md4"),
93    # NOTE: there was an older "ripemd" and "ripemd-128",
94    #       but python 2.7+ resolves "ripemd" -> "ripemd160",
95    #       so treating "ripemd" as alias here.
96    ("ripemd160", "ripemd-160", "ripemd"),
97]
98
99
100#: dict mapping hashlib names to hardcoded digest info;
101#: so this is available even when hashes aren't present.
102_fallback_info = {
103    # name: (digest_size, block_size)
104    'blake2b': (64, 128),
105    'blake2s': (32, 64),
106    'md4': (16, 64),
107    'md5': (16, 64),
108    'sha1': (20, 64),
109    'sha224': (28, 64),
110    'sha256': (32, 64),
111    'sha384': (48, 128),
112    'sha3_224': (28, 144),
113    'sha3_256': (32, 136),
114    'sha3_384': (48, 104),
115    'sha3_512': (64, 72),
116    'sha512': (64, 128),
117    'shake128': (16, 168),
118    'shake256': (32, 136),
119}
120
121
122def _gen_fallback_info():
123    """
124    internal helper used to generate ``_fallback_info`` dict.
125    currently only run manually to update the above list;
126    not invoked at runtime.
127    """
128    out = {}
129    for alg in sorted(hashlib.algorithms_available | set(["md4"])):
130        info = lookup_hash(alg)
131        out[info.name] = (info.digest_size, info.block_size)
132    return out
133
134
135#: cache of hash info instances used by lookup_hash()
136_hash_info_cache = {}
137
138def _get_hash_aliases(name):
139    """
140    internal helper used by :func:`lookup_hash` --
141    normalize arbitrary hash name to hashlib format.
142    if name not recognized, returns dummy record and issues a warning.
143
144    :arg name:
145        unnormalized name
146
147    :returns:
148        tuple with 2+ elements: ``(hashlib_name, iana_name|None, ... 0+ aliases)``.
149    """
150
151    # normalize input
152    orig = name
153    if not isinstance(name, str):
154        name = to_native_str(name, 'utf-8', 'hash name')
155    name = re.sub("[_ /]", "-", name.strip().lower())
156    if name.startswith("scram-"): # helper for SCRAM protocol (see passlib.handlers.scram)
157        name = name[6:]
158        if name.endswith("-plus"):
159            name = name[:-5]
160
161    # look through standard names and known aliases
162    def check_table(name):
163        for row in _known_hash_names:
164            if name in row:
165                return row
166    result = check_table(name)
167    if result:
168        return result
169
170    # try to clean name up some more
171    m = re.match(r"(?i)^(?P<name>[a-z]+)-?(?P<rev>\d)?-?(?P<size>\d{3,4})?$", name)
172    if m:
173        # roughly follows "SHA2-256" style format, normalize representation,
174        # and checked table.
175        iana_name, rev, size = m.group("name", "rev", "size")
176        if rev:
177            iana_name += rev
178        hashlib_name = iana_name
179        if size:
180            iana_name += "-" + size
181            if rev:
182                hashlib_name += "_"
183            hashlib_name += size
184        result = check_table(iana_name)
185        if result:
186            return result
187
188        # not found in table, but roughly recognize format. use names we built up as fallback.
189        log.info("normalizing unrecognized hash name %r => %r / %r",
190                 orig, hashlib_name, iana_name)
191
192    else:
193        # just can't make sense of it. return something
194        iana_name = name
195        hashlib_name = name.replace("-", "_")
196        log.warning("normalizing unrecognized hash name and format %r => %r / %r",
197                    orig, hashlib_name, iana_name)
198
199    return hashlib_name, iana_name
200
201
202def _get_hash_const(name):
203    """
204    internal helper used by :func:`lookup_hash` --
205    lookup hash constructor by name
206
207    :arg name:
208        name (normalized to hashlib format, e.g. ``"sha256"``)
209
210    :returns:
211        hash constructor, e.g. ``hashlib.sha256()``;
212        or None if hash can't be located.
213    """
214    # check hashlib.<attr> for an efficient constructor
215    if not name.startswith("_") and name not in ("new", "algorithms"):
216        try:
217            return getattr(hashlib, name)
218        except AttributeError:
219            pass
220
221    # check hashlib.new() in case SSL supports the digest
222    new_ssl_hash = hashlib.new
223    try:
224        # new() should throw ValueError if alg is unknown
225        new_ssl_hash(name, b"")
226    except ValueError:
227        pass
228    else:
229        # create wrapper function
230        # XXX: is there a faster way to wrap this?
231        def const(msg=b""):
232            return new_ssl_hash(name, msg)
233        const.__name__ = name
234        const.__module__ = "hashlib"
235        const.__doc__ = ("wrapper for hashlib.new(%r),\n"
236                         "generated by passlib.crypto.digest.lookup_hash()") % name
237        return const
238
239    # use builtin md4 as fallback when not supported by hashlib
240    if name == "md4":
241        from passlib.crypto._md4 import md4
242        return md4
243
244    # XXX: any other modules / registries we should check?
245    # TODO: add pysha3 support.
246
247    return None
248
249
250def lookup_hash(digest,  # *,
251                return_unknown=False, required=True):
252    """
253    Returns a :class:`HashInfo` record containing information about a given hash function.
254    Can be used to look up a hash constructor by name, normalize hash name representation, etc.
255
256    :arg digest:
257        This can be any of:
258
259        * A string containing a :mod:`!hashlib` digest name (e.g. ``"sha256"``),
260        * A string containing an IANA-assigned hash name,
261        * A digest constructor function (e.g. ``hashlib.sha256``).
262
263        Case is ignored, underscores are converted to hyphens,
264        and various other cleanups are made.
265
266    :param required:
267        By default (True), this function will throw an :exc:`~passlib.exc.UnknownHashError` if no hash constructor
268        can be found, or if the hash is not actually available.
269
270        If this flag is False, it will instead return a dummy :class:`!HashInfo` record
271        which will defer throwing the error until it's constructor function is called.
272        This is mainly used by :func:`norm_hash_name`.
273
274    :param return_unknown:
275
276        .. deprecated:: 1.7.3
277
278            deprecated, and will be removed in passlib 2.0.
279            this acts like inverse of **required**.
280
281    :returns HashInfo:
282        :class:`HashInfo` instance containing information about specified digest.
283
284        Multiple calls resolving to the same hash should always
285        return the same :class:`!HashInfo` instance.
286    """
287    # check for cached entry
288    cache = _hash_info_cache
289    try:
290        return cache[digest]
291    except (KeyError, TypeError):
292        # NOTE: TypeError is to catch 'TypeError: unhashable type' (e.g. HashInfo)
293        pass
294
295    # legacy alias
296    if return_unknown:
297        required = False
298
299    # resolve ``digest`` to ``const`` & ``name_record``
300    cache_by_name = True
301    if isinstance(digest, unicode_or_bytes_types):
302        # normalize name
303        name_list = _get_hash_aliases(digest)
304        name = name_list[0]
305        assert name
306
307        # if name wasn't normalized to hashlib format,
308        # get info for normalized name and reuse it.
309        if name != digest:
310            info = lookup_hash(name, required=required)
311            cache[digest] = info
312            return info
313
314        # else look up constructor
315        # NOTE: may return None, which is handled by HashInfo constructor
316        const = _get_hash_const(name)
317
318        # if mock fips mode is enabled, replace with dummy constructor
319        # (to replicate how it would behave on a real fips system).
320        if const and mock_fips_mode and name not in _fips_algorithms:
321            def const(source=b""):
322                raise ValueError("%r disabled for fips by passlib set_mock_fips_mode()" % name)
323
324    elif isinstance(digest, HashInfo):
325        # handle border case where HashInfo is passed in.
326        return digest
327
328    elif callable(digest):
329        # try to lookup digest based on it's self-reported name
330        # (which we trust to be the canonical "hashlib" name)
331        const = digest
332        name_list = _get_hash_aliases(const().name)
333        name = name_list[0]
334        other_const = _get_hash_const(name)
335        if other_const is None:
336            # this is probably a third-party digest we don't know about,
337            # so just pass it on through, and register reverse lookup for it's name.
338            pass
339
340        elif other_const is const:
341            # if we got back same constructor, this is just a known stdlib constructor,
342            # which was passed in before we had cached it by name. proceed normally.
343            pass
344
345        else:
346            # if we got back different object, then ``const`` is something else
347            # (such as a mock object), in which case we want to skip caching it by name,
348            # as that would conflict with real hash.
349            cache_by_name = False
350
351    else:
352        raise exc.ExpectedTypeError(digest, "digest name or constructor", "digest")
353
354    # create new instance
355    info = HashInfo(const=const, names=name_list, required=required)
356
357    # populate cache
358    if const is not None:
359        cache[const] = info
360    if cache_by_name:
361        for name in name_list:
362            if name:  # (skips iana name if it's empty)
363                assert cache.get(name) in [None, info], "%r already in cache" % name
364                cache[name] = info
365    return info
366
367#: UT helper for clearing internal cache
368lookup_hash.clear_cache = _hash_info_cache.clear
369
370
371def norm_hash_name(name, format="hashlib"):
372    """Normalize hash function name (convenience wrapper for :func:`lookup_hash`).
373
374    :arg name:
375        Original hash function name.
376
377        This name can be a Python :mod:`~hashlib` digest name,
378        a SCRAM mechanism name, IANA assigned hash name, etc.
379        Case is ignored, and underscores are converted to hyphens.
380
381    :param format:
382        Naming convention to normalize to.
383        Possible values are:
384
385        * ``"hashlib"`` (the default) - normalizes name to be compatible
386          with Python's :mod:`!hashlib`.
387
388        * ``"iana"`` - normalizes name to IANA-assigned hash function name.
389          For hashes which IANA hasn't assigned a name for, this issues a warning,
390          and then uses a heuristic to return a "best guess" name.
391
392    :returns:
393        Hash name, returned as native :class:`!str`.
394    """
395    info = lookup_hash(name, required=False)
396    if info.unknown:
397        warn("norm_hash_name(): " + info.error_text, exc.PasslibRuntimeWarning)
398    if format == "hashlib":
399        return info.name
400    elif format == "iana":
401        return info.iana_name
402    else:
403        raise ValueError("unknown format: %r" % (format,))
404
405
406class HashInfo(SequenceMixin):
407    """
408    Record containing information about a given hash algorithm, as returned :func:`lookup_hash`.
409
410    This class exposes the following attributes:
411
412    .. autoattribute:: const
413    .. autoattribute:: digest_size
414    .. autoattribute:: block_size
415    .. autoattribute:: name
416    .. autoattribute:: iana_name
417    .. autoattribute:: aliases
418    .. autoattribute:: supported
419
420    This object can also be treated a 3-element sequence
421    containing ``(const, digest_size, block_size)``.
422    """
423    #=========================================================================
424    # instance attrs
425    #=========================================================================
426
427    #: Canonical / hashlib-compatible name (e.g. ``"sha256"``).
428    name = None
429
430    #: IANA assigned name (e.g. ``"sha-256"``), may be ``None`` if unknown.
431    iana_name = None
432
433    #: Tuple of other known aliases (may be empty)
434    aliases = ()
435
436    #: Hash constructor function (e.g. :func:`hashlib.sha256`)
437    const = None
438
439    #: Hash's digest size
440    digest_size = None
441
442    #: Hash's block size
443    block_size = None
444
445    #: set when hash isn't available, will be filled in with string containing error text
446    #: that const() will raise.
447    error_text = None
448
449    #: set when error_text is due to hash algorithm being completely unknown
450    #: (not just unavailable on current system)
451    unknown = False
452
453    #=========================================================================
454    # init
455    #=========================================================================
456
457    def __init__(self,  # *,
458                 const, names, required=True):
459        """
460        initialize new instance.
461        :arg const:
462            hash constructor
463        :arg names:
464            list of 2+ names. should be list of ``(name, iana_name, ... 0+ aliases)``.
465            names must be lower-case. only iana name may be None.
466        """
467        # init names
468        name = self.name = names[0]
469        self.iana_name = names[1]
470        self.aliases = names[2:]
471
472        def use_stub_const(msg):
473            """
474            helper that installs stub constructor which throws specified error <msg>.
475            """
476            def const(source=b""):
477                raise exc.UnknownHashError(msg, name)
478            if required:
479                # if caller only wants supported digests returned,
480                # just throw error immediately...
481                const()
482                assert "shouldn't get here"
483            self.error_text = msg
484            self.const = const
485            try:
486                self.digest_size, self.block_size = _fallback_info[name]
487            except KeyError:
488                pass
489
490        # handle "constructor not available" case
491        if const is None:
492            if names in _known_hash_names:
493                msg = "unsupported hash: %r" % name
494            else:
495                msg = "unknown hash: %r" % name
496                self.unknown = True
497            use_stub_const(msg)
498            # TODO: load in preset digest size info for known hashes.
499            return
500
501        # create hash instance to inspect
502        try:
503            hash = const()
504        except ValueError as err:
505            # per issue 116, FIPS compliant systems will have a constructor;
506            # but it will throw a ValueError with this message.  As of 1.7.3,
507            # translating this into DisabledHashError.
508            # "ValueError: error:060800A3:digital envelope routines:EVP_DigestInit_ex:disabled for fips"
509            if "disabled for fips" in str(err).lower():
510                msg = "%r hash disabled for fips" % name
511            else:
512                msg = "internal error in %r constructor\n(%s: %s)" % (name, type(err).__name__, err)
513            use_stub_const(msg)
514            return
515
516        # store stats about hash
517        self.const = const
518        self.digest_size = hash.digest_size
519        self.block_size = hash.block_size
520
521        # do sanity check on digest size
522        if len(hash.digest()) != hash.digest_size:
523            raise RuntimeError("%r constructor failed sanity check" % self.name)
524
525        # do sanity check on name.
526        if hash.name != self.name:
527            warn("inconsistent digest name: %r resolved to %r, which reports name as %r" %
528                 (self.name, const, hash.name), exc.PasslibRuntimeWarning)
529
530    #=========================================================================
531    # methods
532    #=========================================================================
533    def __repr__(self):
534        return "<lookup_hash(%r): digest_size=%r block_size=%r)" % \
535               (self.name, self.digest_size, self.block_size)
536
537    def _as_tuple(self):
538        return self.const, self.digest_size, self.block_size
539
540    @memoized_property
541    def supported(self):
542        """
543        whether hash is available for use
544        (if False, constructor will throw UnknownHashError if called)
545        """
546        return self.error_text is None
547
548    @memoized_property
549    def supported_by_fastpbkdf2(self):
550        """helper to detect if hash is supported by fastpbkdf2()"""
551        if not _fast_pbkdf2_hmac:
552            return None
553        try:
554            _fast_pbkdf2_hmac(self.name, b"p", b"s", 1)
555            return True
556        except ValueError:
557            # "unsupported hash type"
558            return False
559
560    @memoized_property
561    def supported_by_hashlib_pbkdf2(self):
562        """helper to detect if hash is supported by hashlib.pbkdf2_hmac()"""
563        if not _stdlib_pbkdf2_hmac:
564            return None
565        try:
566            _stdlib_pbkdf2_hmac(self.name, b"p", b"s", 1)
567            return True
568        except ValueError:
569            # "unsupported hash type"
570            return False
571
572    #=========================================================================
573    # eoc
574    #=========================================================================
575
576
577#---------------------------------------------------------------------
578# mock fips mode monkeypatch
579#---------------------------------------------------------------------
580
581#: flag for detecting if mock fips mode is enabled.
582mock_fips_mode = False
583
584
585#: algorithms allowed under FIPS mode (subset of hashlib.algorithms_available);
586#: per https://csrc.nist.gov/Projects/Hash-Functions FIPS 202 list.
587_fips_algorithms = set([
588    # FIPS 180-4  and FIPS 202
589    'sha1',
590    'sha224',
591    'sha256',
592    'sha384',
593    'sha512',
594    # 'sha512/224',
595    # 'sha512/256',
596
597    # FIPS 202 only
598    'sha3_224',
599    'sha3_256',
600    'sha3_384',
601    'sha3_512',
602    'shake_128',
603    'shake_256',
604])
605
606
607def _set_mock_fips_mode(enable=True):
608    """
609    UT helper which monkeypatches lookup_hash() internals to replicate FIPS mode.
610    """
611    global mock_fips_mode
612    mock_fips_mode = enable
613    lookup_hash.clear_cache()
614
615
616# helper for UTs
617if as_bool(os.environ.get("PASSLIB_MOCK_FIPS_MODE")):
618    _set_mock_fips_mode()
619
620#=============================================================================
621# hmac utils
622#=============================================================================
623
624#: translation tables used by compile_hmac()
625_TRANS_5C = join_byte_values((x ^ 0x5C) for x in irange(256))
626_TRANS_36 = join_byte_values((x ^ 0x36) for x in irange(256))
627
628def compile_hmac(digest, key, multipart=False):
629    """
630    This function returns an efficient HMAC function, hardcoded with a specific digest & key.
631    It can be used via ``hmac = compile_hmac(digest, key)``.
632
633    :arg digest:
634        digest name or constructor.
635
636    :arg key:
637        secret key as :class:`!bytes` or :class:`!unicode` (unicode will be encoded using utf-8).
638
639    :param multipart:
640        request a multipart constructor instead (see return description).
641
642    :returns:
643        By default, the returned function has the signature ``hmac(msg) -> digest output``.
644
645        However, if ``multipart=True``, the returned function has the signature
646        ``hmac() -> update, finalize``, where ``update(msg)`` may be called multiple times,
647        and ``finalize() -> digest_output`` may be repeatedly called at any point to
648        calculate the HMAC digest so far.
649
650        The returned object will also have a ``digest_info`` attribute, containing
651        a :class:`lookup_hash` instance for the specified digest.
652
653    This function exists, and has the weird signature it does, in order to squeeze as
654    provide as much efficiency as possible, by omitting much of the setup cost
655    and features of the stdlib :mod:`hmac` module.
656    """
657    # all the following was adapted from stdlib's hmac module
658
659    # resolve digest (cached)
660    digest_info = lookup_hash(digest)
661    const, digest_size, block_size = digest_info
662    assert block_size >= 16, "block size too small"
663
664    # prepare key
665    if not isinstance(key, bytes):
666        key = to_bytes(key, param="key")
667    klen = len(key)
668    if klen > block_size:
669        key = const(key).digest()
670        klen = digest_size
671    if klen < block_size:
672        key += b'\x00' * (block_size - klen)
673
674    # create pre-initialized hash constructors
675    _inner_copy = const(key.translate(_TRANS_36)).copy
676    _outer_copy = const(key.translate(_TRANS_5C)).copy
677
678    if multipart:
679        # create multi-part function
680        # NOTE: this is slightly slower than the single-shot version,
681        #       and should only be used if needed.
682        def hmac():
683            """generated by compile_hmac(multipart=True)"""
684            inner = _inner_copy()
685            def finalize():
686                outer = _outer_copy()
687                outer.update(inner.digest())
688                return outer.digest()
689            return inner.update, finalize
690    else:
691
692        # single-shot function
693        def hmac(msg):
694            """generated by compile_hmac()"""
695            inner = _inner_copy()
696            inner.update(msg)
697            outer = _outer_copy()
698            outer.update(inner.digest())
699            return outer.digest()
700
701    # add info attr
702    hmac.digest_info = digest_info
703    return hmac
704
705#=============================================================================
706# pbkdf1
707#=============================================================================
708def pbkdf1(digest, secret, salt, rounds, keylen=None):
709    """pkcs#5 password-based key derivation v1.5
710
711    :arg digest:
712        digest name or constructor.
713
714    :arg secret:
715        secret to use when generating the key.
716        may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
717
718    :arg salt:
719        salt string to use when generating key.
720        may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
721
722    :param rounds:
723        number of rounds to use to generate key.
724
725    :arg keylen:
726        number of bytes to generate (if omitted / ``None``, uses digest's native size)
727
728    :returns:
729        raw :class:`bytes` of generated key
730
731    .. note::
732
733        This algorithm has been deprecated, new code should use PBKDF2.
734        Among other limitations, ``keylen`` cannot be larger
735        than the digest size of the specified hash.
736    """
737    # resolve digest
738    const, digest_size, block_size = lookup_hash(digest)
739
740    # validate secret & salt
741    secret = to_bytes(secret, param="secret")
742    salt = to_bytes(salt, param="salt")
743
744    # validate rounds
745    if not isinstance(rounds, int_types):
746        raise exc.ExpectedTypeError(rounds, "int", "rounds")
747    if rounds < 1:
748        raise ValueError("rounds must be at least 1")
749
750    # validate keylen
751    if keylen is None:
752        keylen = digest_size
753    elif not isinstance(keylen, int_types):
754        raise exc.ExpectedTypeError(keylen, "int or None", "keylen")
755    elif keylen < 0:
756        raise ValueError("keylen must be at least 0")
757    elif keylen > digest_size:
758        raise ValueError("keylength too large for digest: %r > %r" %
759                         (keylen, digest_size))
760
761    # main pbkdf1 loop
762    block = secret + salt
763    for _ in irange(rounds):
764        block = const(block).digest()
765    return block[:keylen]
766
767#=============================================================================
768# pbkdf2
769#=============================================================================
770
771_pack_uint32 = Struct(">L").pack
772
773def pbkdf2_hmac(digest, secret, salt, rounds, keylen=None):
774    """pkcs#5 password-based key derivation v2.0 using HMAC + arbitrary digest.
775
776    :arg digest:
777        digest name or constructor.
778
779    :arg secret:
780        passphrase to use to generate key.
781        may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
782
783    :arg salt:
784        salt string to use when generating key.
785        may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
786
787    :param rounds:
788        number of rounds to use to generate key.
789
790    :arg keylen:
791        number of bytes to generate.
792        if omitted / ``None``, will use digest's native output size.
793
794    :returns:
795        raw bytes of generated key
796
797    .. versionchanged:: 1.7
798
799        This function will use the first available of the following backends:
800
801        * `fastpbk2 <https://pypi.python.org/pypi/fastpbkdf2>`_
802        * :func:`hashlib.pbkdf2_hmac` (only available in py2 >= 2.7.8, and py3 >= 3.4)
803        * builtin pure-python backend
804
805        See :data:`passlib.crypto.digest.PBKDF2_BACKENDS` to determine
806        which backend(s) are in use.
807    """
808    # validate secret & salt
809    secret = to_bytes(secret, param="secret")
810    salt = to_bytes(salt, param="salt")
811
812    # resolve digest
813    digest_info = lookup_hash(digest)
814    digest_size = digest_info.digest_size
815
816    # validate rounds
817    if not isinstance(rounds, int_types):
818        raise exc.ExpectedTypeError(rounds, "int", "rounds")
819    if rounds < 1:
820        raise ValueError("rounds must be at least 1")
821
822    # validate keylen
823    if keylen is None:
824        keylen = digest_size
825    elif not isinstance(keylen, int_types):
826        raise exc.ExpectedTypeError(keylen, "int or None", "keylen")
827    elif keylen < 1:
828        # XXX: could allow keylen=0, but want to be compat w/ stdlib
829        raise ValueError("keylen must be at least 1")
830
831    # find smallest block count s.t. keylen <= block_count * digest_size;
832    # make sure block count won't overflow (per pbkdf2 spec)
833    # this corresponds to throwing error if keylen > digest_size * MAX_UINT32
834    # NOTE: stdlib will throw error at lower bound (keylen > MAX_SINT32)
835    # NOTE: have do this before other backends checked, since fastpbkdf2 raises wrong error
836    #       (InvocationError, not OverflowError)
837    block_count = (keylen + digest_size - 1) // digest_size
838    if block_count > MAX_UINT32:
839        raise OverflowError("keylen too long for digest")
840
841    #
842    # check for various high-speed backends
843    #
844
845    # ~3x faster than pure-python backend
846    # NOTE: have to do this after above guards since fastpbkdf2 lacks bounds checks.
847    if digest_info.supported_by_fastpbkdf2:
848        return _fast_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen)
849
850    # ~1.4x faster than pure-python backend
851    # NOTE: have to do this after fastpbkdf2 since hashlib-ssl is slower,
852    #       will support larger number of hashes.
853    if digest_info.supported_by_hashlib_pbkdf2:
854        return _stdlib_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen)
855
856    #
857    # otherwise use our own implementation
858    #
859
860    # generated keyed hmac
861    keyed_hmac = compile_hmac(digest, secret)
862
863    # get helper to calculate pbkdf2 inner loop efficiently
864    calc_block = _get_pbkdf2_looper(digest_size)
865
866    # assemble & return result
867    return join_bytes(
868        calc_block(keyed_hmac, keyed_hmac(salt + _pack_uint32(i)), rounds)
869        for i in irange(1, block_count + 1)
870    )[:keylen]
871
872#-------------------------------------------------------------------------------------
873# pick best choice for pure-python helper
874# TODO: consider some alternatives, such as C-accelerated xor_bytes helper if available
875#-------------------------------------------------------------------------------------
876# NOTE: this env var is only present to support the admin/benchmark_pbkdf2 script
877_force_backend = os.environ.get("PASSLIB_PBKDF2_BACKEND") or "any"
878
879if PY3 and _force_backend in ["any", "from-bytes"]:
880    from functools import partial
881
882    def _get_pbkdf2_looper(digest_size):
883        return partial(_pbkdf2_looper, digest_size)
884
885    def _pbkdf2_looper(digest_size, keyed_hmac, digest, rounds):
886        """
887        py3-only implementation of pbkdf2 inner loop;
888        uses 'int.from_bytes' + integer XOR
889        """
890        from_bytes = int.from_bytes
891        BIG = "big"  # endianess doesn't matter, just has to be consistent
892        accum = from_bytes(digest, BIG)
893        for _ in irange(rounds - 1):
894            digest = keyed_hmac(digest)
895            accum ^= from_bytes(digest, BIG)
896        return accum.to_bytes(digest_size, BIG)
897
898    _builtin_backend = "from-bytes"
899
900elif _force_backend in ["any", "unpack", "from-bytes"]:
901    from struct import Struct
902    from passlib.utils import sys_bits
903
904    _have_64_bit = (sys_bits >= 64)
905
906    #: cache used by _get_pbkdf2_looper
907    _looper_cache = {}
908
909    def _get_pbkdf2_looper(digest_size):
910        """
911        We want a helper function which performs equivalent of the following::
912
913          def helper(keyed_hmac, digest, rounds):
914              accum = digest
915              for _ in irange(rounds - 1):
916                  digest = keyed_hmac(digest)
917                  accum ^= digest
918              return accum
919
920        However, no efficient way to implement "bytes ^ bytes" in python.
921        Instead, using approach where we dynamically compile a helper function based
922        on digest size.  Instead of a single `accum` var, this helper breaks the digest
923        into a series of integers.
924
925        It stores these in a series of`accum_<i>` vars, and performs `accum ^= digest`
926        by unpacking digest and perform xor for each "accum_<i> ^= digest_<i>".
927        this keeps everything in locals, avoiding excessive list creation, encoding or decoding,
928        etc.
929
930        :param digest_size:
931            digest size to compile for, in bytes. (must be multiple of 4).
932
933        :return:
934            helper function with call signature outlined above.
935        """
936        #
937        # cache helpers
938        #
939        try:
940            return _looper_cache[digest_size]
941        except KeyError:
942            pass
943
944        #
945        # figure out most efficient struct format to unpack digest into list of native ints
946        #
947        if _have_64_bit and not digest_size & 0x7:
948            # digest size multiple of 8, on a 64 bit system -- use array of UINT64
949            count = (digest_size >> 3)
950            fmt = "=%dQ" % count
951        elif not digest_size & 0x3:
952            if _have_64_bit:
953                # digest size multiple of 4, on a 64 bit system -- use array of UINT64 + 1 UINT32
954                count = (digest_size >> 3)
955                fmt = "=%dQI" % count
956                count += 1
957            else:
958                # digest size multiple of 4, on a 32 bit system -- use array of UINT32
959                count = (digest_size >> 2)
960                fmt = "=%dI" % count
961        else:
962            # stopping here, cause no known hashes have digest size that isn't multiple of 4 bytes.
963            # if needed, could go crazy w/ "H" & "B"
964            raise NotImplementedError("unsupported digest size: %d" % digest_size)
965        struct = Struct(fmt)
966
967        #
968        # build helper source
969        #
970        tdict = dict(
971            digest_size=digest_size,
972            accum_vars=", ".join("acc_%d" % i for i in irange(count)),
973            digest_vars=", ".join("dig_%d" % i for i in irange(count)),
974        )
975
976        # head of function
977        source = (
978                        "def helper(keyed_hmac, digest, rounds):\n"
979                        "    '''pbkdf2 loop helper for digest_size={digest_size}'''\n"
980                        "    unpack_digest = struct.unpack\n"
981                        "    {accum_vars} = unpack_digest(digest)\n"
982                        "    for _ in irange(1, rounds):\n"
983                        "        digest = keyed_hmac(digest)\n"
984                        "        {digest_vars} = unpack_digest(digest)\n"
985        ).format(**tdict)
986
987        # xor digest
988        for i in irange(count):
989            source +=   "        acc_%d ^= dig_%d\n" % (i, i)
990
991        # return result
992        source +=       "    return struct.pack({accum_vars})\n".format(**tdict)
993
994        #
995        # compile helper
996        #
997        code = compile(source, "<generated by passlib.crypto.digest._get_pbkdf2_looper()>", "exec")
998        gdict = dict(irange=irange, struct=struct)
999        ldict = dict()
1000        eval(code, gdict, ldict)
1001        helper = ldict['helper']
1002        if __debug__:
1003            helper.__source__ = source
1004
1005        #
1006        # store in cache
1007        #
1008        _looper_cache[digest_size] = helper
1009        return helper
1010
1011    _builtin_backend = "unpack"
1012
1013else:
1014    assert _force_backend in ["any", "hexlify"]
1015
1016    # XXX: older & slower approach that used int(hexlify()),
1017    #      keeping it around for a little while just for benchmarking.
1018
1019    from binascii import hexlify as _hexlify
1020    from passlib.utils import int_to_bytes
1021
1022    def _get_pbkdf2_looper(digest_size):
1023        return _pbkdf2_looper
1024
1025    def _pbkdf2_looper(keyed_hmac, digest, rounds):
1026        hexlify = _hexlify
1027        accum = int(hexlify(digest), 16)
1028        for _ in irange(rounds - 1):
1029            digest = keyed_hmac(digest)
1030            accum ^= int(hexlify(digest), 16)
1031        return int_to_bytes(accum, len(digest))
1032
1033    _builtin_backend = "hexlify"
1034
1035# helper for benchmark script -- disable hashlib, fastpbkdf2 support if builtin requested
1036if _force_backend == _builtin_backend:
1037    _fast_pbkdf2_hmac = _stdlib_pbkdf2_hmac = None
1038
1039# expose info about what backends are active
1040PBKDF2_BACKENDS = [b for b in [
1041    "fastpbkdf2" if _fast_pbkdf2_hmac else None,
1042    "hashlib-ssl" if _stdlib_pbkdf2_hmac else None,
1043    "builtin-" + _builtin_backend
1044] if b]
1045
1046# *very* rough estimate of relative speed (compared to sha256 using 'unpack' backend on 64bit arch)
1047if "fastpbkdf2" in PBKDF2_BACKENDS:
1048    PBKDF2_SPEED_FACTOR = 3
1049elif "hashlib-ssl" in PBKDF2_BACKENDS:
1050    PBKDF2_SPEED_FACTOR = 1.4
1051else:
1052    # remaining backends have *some* difference in performance, but not enough to matter
1053    PBKDF2_SPEED_FACTOR = 1
1054
1055#=============================================================================
1056# eof
1057#=============================================================================
1058