1"""passlib.crypto.digest -- crytographic helpers used by the password hashes in passlib 2 3.. versionadded:: 1.7 4""" 5#============================================================================= 6# imports 7#============================================================================= 8from __future__ import division 9# core 10import hashlib 11import logging; log = logging.getLogger(__name__) 12try: 13 # new in py3.4 14 from hashlib import pbkdf2_hmac as _stdlib_pbkdf2_hmac 15 if _stdlib_pbkdf2_hmac.__module__ == "hashlib": 16 # builtin pure-python backends are slightly faster than stdlib's pure python fallback, 17 # so only using stdlib's version if it's backed by openssl's pbkdf2_hmac() 18 log.debug("ignoring pure-python hashlib.pbkdf2_hmac()") 19 _stdlib_pbkdf2_hmac = None 20except ImportError: 21 _stdlib_pbkdf2_hmac = None 22import re 23import os 24from struct import Struct 25from warnings import warn 26# site 27try: 28 # https://pypi.python.org/pypi/fastpbkdf2/ 29 from fastpbkdf2 import pbkdf2_hmac as _fast_pbkdf2_hmac 30except ImportError: 31 _fast_pbkdf2_hmac = None 32# pkg 33from passlib import exc 34from passlib.utils import join_bytes, to_native_str, join_byte_values, to_bytes, \ 35 SequenceMixin, as_bool 36from passlib.utils.compat import irange, int_types, unicode_or_bytes_types, PY3, error_from 37from passlib.utils.decor import memoized_property 38# local 39__all__ = [ 40 # hash utils 41 "lookup_hash", 42 "HashInfo", 43 "norm_hash_name", 44 45 # hmac utils 46 "compile_hmac", 47 48 # kdfs 49 "pbkdf1", 50 "pbkdf2_hmac", 51] 52 53#============================================================================= 54# generic constants 55#============================================================================= 56 57#: max 32-bit value 58MAX_UINT32 = (1 << 32) - 1 59 60#: max 64-bit value 61MAX_UINT64 = (1 << 64) - 1 62 63#============================================================================= 64# hash utils 65#============================================================================= 66 67#: list of known hash names, used by lookup_hash()'s _norm_hash_name() helper 68_known_hash_names = [ 69 # format: (hashlib/ssl name, iana name or standin, other known aliases ...) 70 71 #---------------------------------------------------- 72 # hashes with official IANA-assigned names 73 # (as of 2012-03 - http://www.iana.org/assignments/hash-function-text-names) 74 #---------------------------------------------------- 75 ("md2", "md2"), # NOTE: openssl dropped md2 support in v1.0.0 76 ("md5", "md5"), 77 ("sha1", "sha-1"), 78 ("sha224", "sha-224", "sha2-224"), 79 ("sha256", "sha-256", "sha2-256"), 80 ("sha384", "sha-384", "sha2-384"), 81 ("sha512", "sha-512", "sha2-512"), 82 83 # TODO: add sha3 to this table. 84 85 #---------------------------------------------------- 86 # hashlib/ssl-supported hashes without official IANA names, 87 # (hopefully-) compatible stand-ins have been chosen. 88 #---------------------------------------------------- 89 90 ("blake2b", "blake-2b"), 91 ("blake2s", "blake-2s"), 92 ("md4", "md4"), 93 # NOTE: there was an older "ripemd" and "ripemd-128", 94 # but python 2.7+ resolves "ripemd" -> "ripemd160", 95 # so treating "ripemd" as alias here. 96 ("ripemd160", "ripemd-160", "ripemd"), 97] 98 99 100#: dict mapping hashlib names to hardcoded digest info; 101#: so this is available even when hashes aren't present. 102_fallback_info = { 103 # name: (digest_size, block_size) 104 'blake2b': (64, 128), 105 'blake2s': (32, 64), 106 'md4': (16, 64), 107 'md5': (16, 64), 108 'sha1': (20, 64), 109 'sha224': (28, 64), 110 'sha256': (32, 64), 111 'sha384': (48, 128), 112 'sha3_224': (28, 144), 113 'sha3_256': (32, 136), 114 'sha3_384': (48, 104), 115 'sha3_512': (64, 72), 116 'sha512': (64, 128), 117 'shake128': (16, 168), 118 'shake256': (32, 136), 119} 120 121 122def _gen_fallback_info(): 123 """ 124 internal helper used to generate ``_fallback_info`` dict. 125 currently only run manually to update the above list; 126 not invoked at runtime. 127 """ 128 out = {} 129 for alg in sorted(hashlib.algorithms_available | set(["md4"])): 130 info = lookup_hash(alg) 131 out[info.name] = (info.digest_size, info.block_size) 132 return out 133 134 135#: cache of hash info instances used by lookup_hash() 136_hash_info_cache = {} 137 138def _get_hash_aliases(name): 139 """ 140 internal helper used by :func:`lookup_hash` -- 141 normalize arbitrary hash name to hashlib format. 142 if name not recognized, returns dummy record and issues a warning. 143 144 :arg name: 145 unnormalized name 146 147 :returns: 148 tuple with 2+ elements: ``(hashlib_name, iana_name|None, ... 0+ aliases)``. 149 """ 150 151 # normalize input 152 orig = name 153 if not isinstance(name, str): 154 name = to_native_str(name, 'utf-8', 'hash name') 155 name = re.sub("[_ /]", "-", name.strip().lower()) 156 if name.startswith("scram-"): # helper for SCRAM protocol (see passlib.handlers.scram) 157 name = name[6:] 158 if name.endswith("-plus"): 159 name = name[:-5] 160 161 # look through standard names and known aliases 162 def check_table(name): 163 for row in _known_hash_names: 164 if name in row: 165 return row 166 result = check_table(name) 167 if result: 168 return result 169 170 # try to clean name up some more 171 m = re.match(r"(?i)^(?P<name>[a-z]+)-?(?P<rev>\d)?-?(?P<size>\d{3,4})?$", name) 172 if m: 173 # roughly follows "SHA2-256" style format, normalize representation, 174 # and checked table. 175 iana_name, rev, size = m.group("name", "rev", "size") 176 if rev: 177 iana_name += rev 178 hashlib_name = iana_name 179 if size: 180 iana_name += "-" + size 181 if rev: 182 hashlib_name += "_" 183 hashlib_name += size 184 result = check_table(iana_name) 185 if result: 186 return result 187 188 # not found in table, but roughly recognize format. use names we built up as fallback. 189 log.info("normalizing unrecognized hash name %r => %r / %r", 190 orig, hashlib_name, iana_name) 191 192 else: 193 # just can't make sense of it. return something 194 iana_name = name 195 hashlib_name = name.replace("-", "_") 196 log.warning("normalizing unrecognized hash name and format %r => %r / %r", 197 orig, hashlib_name, iana_name) 198 199 return hashlib_name, iana_name 200 201 202def _get_hash_const(name): 203 """ 204 internal helper used by :func:`lookup_hash` -- 205 lookup hash constructor by name 206 207 :arg name: 208 name (normalized to hashlib format, e.g. ``"sha256"``) 209 210 :returns: 211 hash constructor, e.g. ``hashlib.sha256()``; 212 or None if hash can't be located. 213 """ 214 # check hashlib.<attr> for an efficient constructor 215 if not name.startswith("_") and name not in ("new", "algorithms"): 216 try: 217 return getattr(hashlib, name) 218 except AttributeError: 219 pass 220 221 # check hashlib.new() in case SSL supports the digest 222 new_ssl_hash = hashlib.new 223 try: 224 # new() should throw ValueError if alg is unknown 225 new_ssl_hash(name, b"") 226 except ValueError: 227 pass 228 else: 229 # create wrapper function 230 # XXX: is there a faster way to wrap this? 231 def const(msg=b""): 232 return new_ssl_hash(name, msg) 233 const.__name__ = name 234 const.__module__ = "hashlib" 235 const.__doc__ = ("wrapper for hashlib.new(%r),\n" 236 "generated by passlib.crypto.digest.lookup_hash()") % name 237 return const 238 239 # use builtin md4 as fallback when not supported by hashlib 240 if name == "md4": 241 from passlib.crypto._md4 import md4 242 return md4 243 244 # XXX: any other modules / registries we should check? 245 # TODO: add pysha3 support. 246 247 return None 248 249 250def lookup_hash(digest, # *, 251 return_unknown=False, required=True): 252 """ 253 Returns a :class:`HashInfo` record containing information about a given hash function. 254 Can be used to look up a hash constructor by name, normalize hash name representation, etc. 255 256 :arg digest: 257 This can be any of: 258 259 * A string containing a :mod:`!hashlib` digest name (e.g. ``"sha256"``), 260 * A string containing an IANA-assigned hash name, 261 * A digest constructor function (e.g. ``hashlib.sha256``). 262 263 Case is ignored, underscores are converted to hyphens, 264 and various other cleanups are made. 265 266 :param required: 267 By default (True), this function will throw an :exc:`~passlib.exc.UnknownHashError` if no hash constructor 268 can be found, or if the hash is not actually available. 269 270 If this flag is False, it will instead return a dummy :class:`!HashInfo` record 271 which will defer throwing the error until it's constructor function is called. 272 This is mainly used by :func:`norm_hash_name`. 273 274 :param return_unknown: 275 276 .. deprecated:: 1.7.3 277 278 deprecated, and will be removed in passlib 2.0. 279 this acts like inverse of **required**. 280 281 :returns HashInfo: 282 :class:`HashInfo` instance containing information about specified digest. 283 284 Multiple calls resolving to the same hash should always 285 return the same :class:`!HashInfo` instance. 286 """ 287 # check for cached entry 288 cache = _hash_info_cache 289 try: 290 return cache[digest] 291 except (KeyError, TypeError): 292 # NOTE: TypeError is to catch 'TypeError: unhashable type' (e.g. HashInfo) 293 pass 294 295 # legacy alias 296 if return_unknown: 297 required = False 298 299 # resolve ``digest`` to ``const`` & ``name_record`` 300 cache_by_name = True 301 if isinstance(digest, unicode_or_bytes_types): 302 # normalize name 303 name_list = _get_hash_aliases(digest) 304 name = name_list[0] 305 assert name 306 307 # if name wasn't normalized to hashlib format, 308 # get info for normalized name and reuse it. 309 if name != digest: 310 info = lookup_hash(name, required=required) 311 cache[digest] = info 312 return info 313 314 # else look up constructor 315 # NOTE: may return None, which is handled by HashInfo constructor 316 const = _get_hash_const(name) 317 318 # if mock fips mode is enabled, replace with dummy constructor 319 # (to replicate how it would behave on a real fips system). 320 if const and mock_fips_mode and name not in _fips_algorithms: 321 def const(source=b""): 322 raise ValueError("%r disabled for fips by passlib set_mock_fips_mode()" % name) 323 324 elif isinstance(digest, HashInfo): 325 # handle border case where HashInfo is passed in. 326 return digest 327 328 elif callable(digest): 329 # try to lookup digest based on it's self-reported name 330 # (which we trust to be the canonical "hashlib" name) 331 const = digest 332 name_list = _get_hash_aliases(const().name) 333 name = name_list[0] 334 other_const = _get_hash_const(name) 335 if other_const is None: 336 # this is probably a third-party digest we don't know about, 337 # so just pass it on through, and register reverse lookup for it's name. 338 pass 339 340 elif other_const is const: 341 # if we got back same constructor, this is just a known stdlib constructor, 342 # which was passed in before we had cached it by name. proceed normally. 343 pass 344 345 else: 346 # if we got back different object, then ``const`` is something else 347 # (such as a mock object), in which case we want to skip caching it by name, 348 # as that would conflict with real hash. 349 cache_by_name = False 350 351 else: 352 raise exc.ExpectedTypeError(digest, "digest name or constructor", "digest") 353 354 # create new instance 355 info = HashInfo(const=const, names=name_list, required=required) 356 357 # populate cache 358 if const is not None: 359 cache[const] = info 360 if cache_by_name: 361 for name in name_list: 362 if name: # (skips iana name if it's empty) 363 assert cache.get(name) in [None, info], "%r already in cache" % name 364 cache[name] = info 365 return info 366 367#: UT helper for clearing internal cache 368lookup_hash.clear_cache = _hash_info_cache.clear 369 370 371def norm_hash_name(name, format="hashlib"): 372 """Normalize hash function name (convenience wrapper for :func:`lookup_hash`). 373 374 :arg name: 375 Original hash function name. 376 377 This name can be a Python :mod:`~hashlib` digest name, 378 a SCRAM mechanism name, IANA assigned hash name, etc. 379 Case is ignored, and underscores are converted to hyphens. 380 381 :param format: 382 Naming convention to normalize to. 383 Possible values are: 384 385 * ``"hashlib"`` (the default) - normalizes name to be compatible 386 with Python's :mod:`!hashlib`. 387 388 * ``"iana"`` - normalizes name to IANA-assigned hash function name. 389 For hashes which IANA hasn't assigned a name for, this issues a warning, 390 and then uses a heuristic to return a "best guess" name. 391 392 :returns: 393 Hash name, returned as native :class:`!str`. 394 """ 395 info = lookup_hash(name, required=False) 396 if info.unknown: 397 warn("norm_hash_name(): " + info.error_text, exc.PasslibRuntimeWarning) 398 if format == "hashlib": 399 return info.name 400 elif format == "iana": 401 return info.iana_name 402 else: 403 raise ValueError("unknown format: %r" % (format,)) 404 405 406class HashInfo(SequenceMixin): 407 """ 408 Record containing information about a given hash algorithm, as returned :func:`lookup_hash`. 409 410 This class exposes the following attributes: 411 412 .. autoattribute:: const 413 .. autoattribute:: digest_size 414 .. autoattribute:: block_size 415 .. autoattribute:: name 416 .. autoattribute:: iana_name 417 .. autoattribute:: aliases 418 .. autoattribute:: supported 419 420 This object can also be treated a 3-element sequence 421 containing ``(const, digest_size, block_size)``. 422 """ 423 #========================================================================= 424 # instance attrs 425 #========================================================================= 426 427 #: Canonical / hashlib-compatible name (e.g. ``"sha256"``). 428 name = None 429 430 #: IANA assigned name (e.g. ``"sha-256"``), may be ``None`` if unknown. 431 iana_name = None 432 433 #: Tuple of other known aliases (may be empty) 434 aliases = () 435 436 #: Hash constructor function (e.g. :func:`hashlib.sha256`) 437 const = None 438 439 #: Hash's digest size 440 digest_size = None 441 442 #: Hash's block size 443 block_size = None 444 445 #: set when hash isn't available, will be filled in with string containing error text 446 #: that const() will raise. 447 error_text = None 448 449 #: set when error_text is due to hash algorithm being completely unknown 450 #: (not just unavailable on current system) 451 unknown = False 452 453 #========================================================================= 454 # init 455 #========================================================================= 456 457 def __init__(self, # *, 458 const, names, required=True): 459 """ 460 initialize new instance. 461 :arg const: 462 hash constructor 463 :arg names: 464 list of 2+ names. should be list of ``(name, iana_name, ... 0+ aliases)``. 465 names must be lower-case. only iana name may be None. 466 """ 467 # init names 468 name = self.name = names[0] 469 self.iana_name = names[1] 470 self.aliases = names[2:] 471 472 def use_stub_const(msg): 473 """ 474 helper that installs stub constructor which throws specified error <msg>. 475 """ 476 def const(source=b""): 477 raise exc.UnknownHashError(msg, name) 478 if required: 479 # if caller only wants supported digests returned, 480 # just throw error immediately... 481 const() 482 assert "shouldn't get here" 483 self.error_text = msg 484 self.const = const 485 try: 486 self.digest_size, self.block_size = _fallback_info[name] 487 except KeyError: 488 pass 489 490 # handle "constructor not available" case 491 if const is None: 492 if names in _known_hash_names: 493 msg = "unsupported hash: %r" % name 494 else: 495 msg = "unknown hash: %r" % name 496 self.unknown = True 497 use_stub_const(msg) 498 # TODO: load in preset digest size info for known hashes. 499 return 500 501 # create hash instance to inspect 502 try: 503 hash = const() 504 except ValueError as err: 505 # per issue 116, FIPS compliant systems will have a constructor; 506 # but it will throw a ValueError with this message. As of 1.7.3, 507 # translating this into DisabledHashError. 508 # "ValueError: error:060800A3:digital envelope routines:EVP_DigestInit_ex:disabled for fips" 509 if "disabled for fips" in str(err).lower(): 510 msg = "%r hash disabled for fips" % name 511 else: 512 msg = "internal error in %r constructor\n(%s: %s)" % (name, type(err).__name__, err) 513 use_stub_const(msg) 514 return 515 516 # store stats about hash 517 self.const = const 518 self.digest_size = hash.digest_size 519 self.block_size = hash.block_size 520 521 # do sanity check on digest size 522 if len(hash.digest()) != hash.digest_size: 523 raise RuntimeError("%r constructor failed sanity check" % self.name) 524 525 # do sanity check on name. 526 if hash.name != self.name: 527 warn("inconsistent digest name: %r resolved to %r, which reports name as %r" % 528 (self.name, const, hash.name), exc.PasslibRuntimeWarning) 529 530 #========================================================================= 531 # methods 532 #========================================================================= 533 def __repr__(self): 534 return "<lookup_hash(%r): digest_size=%r block_size=%r)" % \ 535 (self.name, self.digest_size, self.block_size) 536 537 def _as_tuple(self): 538 return self.const, self.digest_size, self.block_size 539 540 @memoized_property 541 def supported(self): 542 """ 543 whether hash is available for use 544 (if False, constructor will throw UnknownHashError if called) 545 """ 546 return self.error_text is None 547 548 @memoized_property 549 def supported_by_fastpbkdf2(self): 550 """helper to detect if hash is supported by fastpbkdf2()""" 551 if not _fast_pbkdf2_hmac: 552 return None 553 try: 554 _fast_pbkdf2_hmac(self.name, b"p", b"s", 1) 555 return True 556 except ValueError: 557 # "unsupported hash type" 558 return False 559 560 @memoized_property 561 def supported_by_hashlib_pbkdf2(self): 562 """helper to detect if hash is supported by hashlib.pbkdf2_hmac()""" 563 if not _stdlib_pbkdf2_hmac: 564 return None 565 try: 566 _stdlib_pbkdf2_hmac(self.name, b"p", b"s", 1) 567 return True 568 except ValueError: 569 # "unsupported hash type" 570 return False 571 572 #========================================================================= 573 # eoc 574 #========================================================================= 575 576 577#--------------------------------------------------------------------- 578# mock fips mode monkeypatch 579#--------------------------------------------------------------------- 580 581#: flag for detecting if mock fips mode is enabled. 582mock_fips_mode = False 583 584 585#: algorithms allowed under FIPS mode (subset of hashlib.algorithms_available); 586#: per https://csrc.nist.gov/Projects/Hash-Functions FIPS 202 list. 587_fips_algorithms = set([ 588 # FIPS 180-4 and FIPS 202 589 'sha1', 590 'sha224', 591 'sha256', 592 'sha384', 593 'sha512', 594 # 'sha512/224', 595 # 'sha512/256', 596 597 # FIPS 202 only 598 'sha3_224', 599 'sha3_256', 600 'sha3_384', 601 'sha3_512', 602 'shake_128', 603 'shake_256', 604]) 605 606 607def _set_mock_fips_mode(enable=True): 608 """ 609 UT helper which monkeypatches lookup_hash() internals to replicate FIPS mode. 610 """ 611 global mock_fips_mode 612 mock_fips_mode = enable 613 lookup_hash.clear_cache() 614 615 616# helper for UTs 617if as_bool(os.environ.get("PASSLIB_MOCK_FIPS_MODE")): 618 _set_mock_fips_mode() 619 620#============================================================================= 621# hmac utils 622#============================================================================= 623 624#: translation tables used by compile_hmac() 625_TRANS_5C = join_byte_values((x ^ 0x5C) for x in irange(256)) 626_TRANS_36 = join_byte_values((x ^ 0x36) for x in irange(256)) 627 628def compile_hmac(digest, key, multipart=False): 629 """ 630 This function returns an efficient HMAC function, hardcoded with a specific digest & key. 631 It can be used via ``hmac = compile_hmac(digest, key)``. 632 633 :arg digest: 634 digest name or constructor. 635 636 :arg key: 637 secret key as :class:`!bytes` or :class:`!unicode` (unicode will be encoded using utf-8). 638 639 :param multipart: 640 request a multipart constructor instead (see return description). 641 642 :returns: 643 By default, the returned function has the signature ``hmac(msg) -> digest output``. 644 645 However, if ``multipart=True``, the returned function has the signature 646 ``hmac() -> update, finalize``, where ``update(msg)`` may be called multiple times, 647 and ``finalize() -> digest_output`` may be repeatedly called at any point to 648 calculate the HMAC digest so far. 649 650 The returned object will also have a ``digest_info`` attribute, containing 651 a :class:`lookup_hash` instance for the specified digest. 652 653 This function exists, and has the weird signature it does, in order to squeeze as 654 provide as much efficiency as possible, by omitting much of the setup cost 655 and features of the stdlib :mod:`hmac` module. 656 """ 657 # all the following was adapted from stdlib's hmac module 658 659 # resolve digest (cached) 660 digest_info = lookup_hash(digest) 661 const, digest_size, block_size = digest_info 662 assert block_size >= 16, "block size too small" 663 664 # prepare key 665 if not isinstance(key, bytes): 666 key = to_bytes(key, param="key") 667 klen = len(key) 668 if klen > block_size: 669 key = const(key).digest() 670 klen = digest_size 671 if klen < block_size: 672 key += b'\x00' * (block_size - klen) 673 674 # create pre-initialized hash constructors 675 _inner_copy = const(key.translate(_TRANS_36)).copy 676 _outer_copy = const(key.translate(_TRANS_5C)).copy 677 678 if multipart: 679 # create multi-part function 680 # NOTE: this is slightly slower than the single-shot version, 681 # and should only be used if needed. 682 def hmac(): 683 """generated by compile_hmac(multipart=True)""" 684 inner = _inner_copy() 685 def finalize(): 686 outer = _outer_copy() 687 outer.update(inner.digest()) 688 return outer.digest() 689 return inner.update, finalize 690 else: 691 692 # single-shot function 693 def hmac(msg): 694 """generated by compile_hmac()""" 695 inner = _inner_copy() 696 inner.update(msg) 697 outer = _outer_copy() 698 outer.update(inner.digest()) 699 return outer.digest() 700 701 # add info attr 702 hmac.digest_info = digest_info 703 return hmac 704 705#============================================================================= 706# pbkdf1 707#============================================================================= 708def pbkdf1(digest, secret, salt, rounds, keylen=None): 709 """pkcs#5 password-based key derivation v1.5 710 711 :arg digest: 712 digest name or constructor. 713 714 :arg secret: 715 secret to use when generating the key. 716 may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8). 717 718 :arg salt: 719 salt string to use when generating key. 720 may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8). 721 722 :param rounds: 723 number of rounds to use to generate key. 724 725 :arg keylen: 726 number of bytes to generate (if omitted / ``None``, uses digest's native size) 727 728 :returns: 729 raw :class:`bytes` of generated key 730 731 .. note:: 732 733 This algorithm has been deprecated, new code should use PBKDF2. 734 Among other limitations, ``keylen`` cannot be larger 735 than the digest size of the specified hash. 736 """ 737 # resolve digest 738 const, digest_size, block_size = lookup_hash(digest) 739 740 # validate secret & salt 741 secret = to_bytes(secret, param="secret") 742 salt = to_bytes(salt, param="salt") 743 744 # validate rounds 745 if not isinstance(rounds, int_types): 746 raise exc.ExpectedTypeError(rounds, "int", "rounds") 747 if rounds < 1: 748 raise ValueError("rounds must be at least 1") 749 750 # validate keylen 751 if keylen is None: 752 keylen = digest_size 753 elif not isinstance(keylen, int_types): 754 raise exc.ExpectedTypeError(keylen, "int or None", "keylen") 755 elif keylen < 0: 756 raise ValueError("keylen must be at least 0") 757 elif keylen > digest_size: 758 raise ValueError("keylength too large for digest: %r > %r" % 759 (keylen, digest_size)) 760 761 # main pbkdf1 loop 762 block = secret + salt 763 for _ in irange(rounds): 764 block = const(block).digest() 765 return block[:keylen] 766 767#============================================================================= 768# pbkdf2 769#============================================================================= 770 771_pack_uint32 = Struct(">L").pack 772 773def pbkdf2_hmac(digest, secret, salt, rounds, keylen=None): 774 """pkcs#5 password-based key derivation v2.0 using HMAC + arbitrary digest. 775 776 :arg digest: 777 digest name or constructor. 778 779 :arg secret: 780 passphrase to use to generate key. 781 may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8). 782 783 :arg salt: 784 salt string to use when generating key. 785 may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8). 786 787 :param rounds: 788 number of rounds to use to generate key. 789 790 :arg keylen: 791 number of bytes to generate. 792 if omitted / ``None``, will use digest's native output size. 793 794 :returns: 795 raw bytes of generated key 796 797 .. versionchanged:: 1.7 798 799 This function will use the first available of the following backends: 800 801 * `fastpbk2 <https://pypi.python.org/pypi/fastpbkdf2>`_ 802 * :func:`hashlib.pbkdf2_hmac` (only available in py2 >= 2.7.8, and py3 >= 3.4) 803 * builtin pure-python backend 804 805 See :data:`passlib.crypto.digest.PBKDF2_BACKENDS` to determine 806 which backend(s) are in use. 807 """ 808 # validate secret & salt 809 secret = to_bytes(secret, param="secret") 810 salt = to_bytes(salt, param="salt") 811 812 # resolve digest 813 digest_info = lookup_hash(digest) 814 digest_size = digest_info.digest_size 815 816 # validate rounds 817 if not isinstance(rounds, int_types): 818 raise exc.ExpectedTypeError(rounds, "int", "rounds") 819 if rounds < 1: 820 raise ValueError("rounds must be at least 1") 821 822 # validate keylen 823 if keylen is None: 824 keylen = digest_size 825 elif not isinstance(keylen, int_types): 826 raise exc.ExpectedTypeError(keylen, "int or None", "keylen") 827 elif keylen < 1: 828 # XXX: could allow keylen=0, but want to be compat w/ stdlib 829 raise ValueError("keylen must be at least 1") 830 831 # find smallest block count s.t. keylen <= block_count * digest_size; 832 # make sure block count won't overflow (per pbkdf2 spec) 833 # this corresponds to throwing error if keylen > digest_size * MAX_UINT32 834 # NOTE: stdlib will throw error at lower bound (keylen > MAX_SINT32) 835 # NOTE: have do this before other backends checked, since fastpbkdf2 raises wrong error 836 # (InvocationError, not OverflowError) 837 block_count = (keylen + digest_size - 1) // digest_size 838 if block_count > MAX_UINT32: 839 raise OverflowError("keylen too long for digest") 840 841 # 842 # check for various high-speed backends 843 # 844 845 # ~3x faster than pure-python backend 846 # NOTE: have to do this after above guards since fastpbkdf2 lacks bounds checks. 847 if digest_info.supported_by_fastpbkdf2: 848 return _fast_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen) 849 850 # ~1.4x faster than pure-python backend 851 # NOTE: have to do this after fastpbkdf2 since hashlib-ssl is slower, 852 # will support larger number of hashes. 853 if digest_info.supported_by_hashlib_pbkdf2: 854 return _stdlib_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen) 855 856 # 857 # otherwise use our own implementation 858 # 859 860 # generated keyed hmac 861 keyed_hmac = compile_hmac(digest, secret) 862 863 # get helper to calculate pbkdf2 inner loop efficiently 864 calc_block = _get_pbkdf2_looper(digest_size) 865 866 # assemble & return result 867 return join_bytes( 868 calc_block(keyed_hmac, keyed_hmac(salt + _pack_uint32(i)), rounds) 869 for i in irange(1, block_count + 1) 870 )[:keylen] 871 872#------------------------------------------------------------------------------------- 873# pick best choice for pure-python helper 874# TODO: consider some alternatives, such as C-accelerated xor_bytes helper if available 875#------------------------------------------------------------------------------------- 876# NOTE: this env var is only present to support the admin/benchmark_pbkdf2 script 877_force_backend = os.environ.get("PASSLIB_PBKDF2_BACKEND") or "any" 878 879if PY3 and _force_backend in ["any", "from-bytes"]: 880 from functools import partial 881 882 def _get_pbkdf2_looper(digest_size): 883 return partial(_pbkdf2_looper, digest_size) 884 885 def _pbkdf2_looper(digest_size, keyed_hmac, digest, rounds): 886 """ 887 py3-only implementation of pbkdf2 inner loop; 888 uses 'int.from_bytes' + integer XOR 889 """ 890 from_bytes = int.from_bytes 891 BIG = "big" # endianess doesn't matter, just has to be consistent 892 accum = from_bytes(digest, BIG) 893 for _ in irange(rounds - 1): 894 digest = keyed_hmac(digest) 895 accum ^= from_bytes(digest, BIG) 896 return accum.to_bytes(digest_size, BIG) 897 898 _builtin_backend = "from-bytes" 899 900elif _force_backend in ["any", "unpack", "from-bytes"]: 901 from struct import Struct 902 from passlib.utils import sys_bits 903 904 _have_64_bit = (sys_bits >= 64) 905 906 #: cache used by _get_pbkdf2_looper 907 _looper_cache = {} 908 909 def _get_pbkdf2_looper(digest_size): 910 """ 911 We want a helper function which performs equivalent of the following:: 912 913 def helper(keyed_hmac, digest, rounds): 914 accum = digest 915 for _ in irange(rounds - 1): 916 digest = keyed_hmac(digest) 917 accum ^= digest 918 return accum 919 920 However, no efficient way to implement "bytes ^ bytes" in python. 921 Instead, using approach where we dynamically compile a helper function based 922 on digest size. Instead of a single `accum` var, this helper breaks the digest 923 into a series of integers. 924 925 It stores these in a series of`accum_<i>` vars, and performs `accum ^= digest` 926 by unpacking digest and perform xor for each "accum_<i> ^= digest_<i>". 927 this keeps everything in locals, avoiding excessive list creation, encoding or decoding, 928 etc. 929 930 :param digest_size: 931 digest size to compile for, in bytes. (must be multiple of 4). 932 933 :return: 934 helper function with call signature outlined above. 935 """ 936 # 937 # cache helpers 938 # 939 try: 940 return _looper_cache[digest_size] 941 except KeyError: 942 pass 943 944 # 945 # figure out most efficient struct format to unpack digest into list of native ints 946 # 947 if _have_64_bit and not digest_size & 0x7: 948 # digest size multiple of 8, on a 64 bit system -- use array of UINT64 949 count = (digest_size >> 3) 950 fmt = "=%dQ" % count 951 elif not digest_size & 0x3: 952 if _have_64_bit: 953 # digest size multiple of 4, on a 64 bit system -- use array of UINT64 + 1 UINT32 954 count = (digest_size >> 3) 955 fmt = "=%dQI" % count 956 count += 1 957 else: 958 # digest size multiple of 4, on a 32 bit system -- use array of UINT32 959 count = (digest_size >> 2) 960 fmt = "=%dI" % count 961 else: 962 # stopping here, cause no known hashes have digest size that isn't multiple of 4 bytes. 963 # if needed, could go crazy w/ "H" & "B" 964 raise NotImplementedError("unsupported digest size: %d" % digest_size) 965 struct = Struct(fmt) 966 967 # 968 # build helper source 969 # 970 tdict = dict( 971 digest_size=digest_size, 972 accum_vars=", ".join("acc_%d" % i for i in irange(count)), 973 digest_vars=", ".join("dig_%d" % i for i in irange(count)), 974 ) 975 976 # head of function 977 source = ( 978 "def helper(keyed_hmac, digest, rounds):\n" 979 " '''pbkdf2 loop helper for digest_size={digest_size}'''\n" 980 " unpack_digest = struct.unpack\n" 981 " {accum_vars} = unpack_digest(digest)\n" 982 " for _ in irange(1, rounds):\n" 983 " digest = keyed_hmac(digest)\n" 984 " {digest_vars} = unpack_digest(digest)\n" 985 ).format(**tdict) 986 987 # xor digest 988 for i in irange(count): 989 source += " acc_%d ^= dig_%d\n" % (i, i) 990 991 # return result 992 source += " return struct.pack({accum_vars})\n".format(**tdict) 993 994 # 995 # compile helper 996 # 997 code = compile(source, "<generated by passlib.crypto.digest._get_pbkdf2_looper()>", "exec") 998 gdict = dict(irange=irange, struct=struct) 999 ldict = dict() 1000 eval(code, gdict, ldict) 1001 helper = ldict['helper'] 1002 if __debug__: 1003 helper.__source__ = source 1004 1005 # 1006 # store in cache 1007 # 1008 _looper_cache[digest_size] = helper 1009 return helper 1010 1011 _builtin_backend = "unpack" 1012 1013else: 1014 assert _force_backend in ["any", "hexlify"] 1015 1016 # XXX: older & slower approach that used int(hexlify()), 1017 # keeping it around for a little while just for benchmarking. 1018 1019 from binascii import hexlify as _hexlify 1020 from passlib.utils import int_to_bytes 1021 1022 def _get_pbkdf2_looper(digest_size): 1023 return _pbkdf2_looper 1024 1025 def _pbkdf2_looper(keyed_hmac, digest, rounds): 1026 hexlify = _hexlify 1027 accum = int(hexlify(digest), 16) 1028 for _ in irange(rounds - 1): 1029 digest = keyed_hmac(digest) 1030 accum ^= int(hexlify(digest), 16) 1031 return int_to_bytes(accum, len(digest)) 1032 1033 _builtin_backend = "hexlify" 1034 1035# helper for benchmark script -- disable hashlib, fastpbkdf2 support if builtin requested 1036if _force_backend == _builtin_backend: 1037 _fast_pbkdf2_hmac = _stdlib_pbkdf2_hmac = None 1038 1039# expose info about what backends are active 1040PBKDF2_BACKENDS = [b for b in [ 1041 "fastpbkdf2" if _fast_pbkdf2_hmac else None, 1042 "hashlib-ssl" if _stdlib_pbkdf2_hmac else None, 1043 "builtin-" + _builtin_backend 1044] if b] 1045 1046# *very* rough estimate of relative speed (compared to sha256 using 'unpack' backend on 64bit arch) 1047if "fastpbkdf2" in PBKDF2_BACKENDS: 1048 PBKDF2_SPEED_FACTOR = 3 1049elif "hashlib-ssl" in PBKDF2_BACKENDS: 1050 PBKDF2_SPEED_FACTOR = 1.4 1051else: 1052 # remaining backends have *some* difference in performance, but not enough to matter 1053 PBKDF2_SPEED_FACTOR = 1 1054 1055#============================================================================= 1056# eof 1057#============================================================================= 1058