1"""passlib.utils -- helpers for writing password hashes""" 2#============================================================================= 3# imports 4#============================================================================= 5from passlib.utils.compat import JYTHON 6# core 7from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError 8from base64 import b64encode, b64decode 9try: 10 from collections.abc import Sequence 11 from collections.abc import Iterable 12except ImportError: 13 # py2 compat 14 from collections import Sequence 15 from collections import Iterable 16from codecs import lookup as _lookup_codec 17from functools import update_wrapper 18import itertools 19import inspect 20import logging; log = logging.getLogger(__name__) 21import math 22import os 23import sys 24import random 25import re 26if JYTHON: # pragma: no cover -- runtime detection 27 # Jython 2.5.2 lacks stringprep module - 28 # see http://bugs.jython.org/issue1758320 29 try: 30 import stringprep 31 except ImportError: 32 stringprep = None 33 _stringprep_missing_reason = "not present under Jython" 34else: 35 import stringprep 36import time 37if stringprep: 38 import unicodedata 39try: 40 import threading 41except ImportError: 42 # module optional before py37 43 threading = None 44import timeit 45import types 46from warnings import warn 47# site 48# pkg 49from passlib.utils.binary import ( 50 # [remove these aliases in 2.0] 51 BASE64_CHARS, AB64_CHARS, HASH64_CHARS, BCRYPT_CHARS, 52 Base64Engine, LazyBase64Engine, h64, h64big, bcrypt64, 53 ab64_encode, ab64_decode, b64s_encode, b64s_decode 54) 55from passlib.utils.decor import ( 56 # [remove these aliases in 2.0] 57 deprecated_function, 58 deprecated_method, 59 memoized_property, 60 classproperty, 61 hybrid_method, 62) 63from passlib.exc import ExpectedStringError, ExpectedTypeError 64from passlib.utils.compat import (add_doc, join_bytes, join_byte_values, 65 join_byte_elems, irange, imap, PY3, u, 66 join_unicode, unicode, byte_elem_value, nextgetter, 67 unicode_or_str, unicode_or_bytes_types, 68 get_method_function, suppress_cause, PYPY) 69# local 70__all__ = [ 71 # constants 72 'JYTHON', 73 'sys_bits', 74 'unix_crypt_schemes', 75 'rounds_cost_values', 76 77 # unicode helpers 78 'consteq', 79 'saslprep', 80 81 # bytes helpers 82 "xor_bytes", 83 "render_bytes", 84 85 # encoding helpers 86 'is_same_codec', 87 'is_ascii_safe', 88 'to_bytes', 89 'to_unicode', 90 'to_native_str', 91 92 # host OS 93 'has_crypt', 94 'test_crypt', 95 'safe_crypt', 96 'tick', 97 98 # randomness 99 'rng', 100 'getrandbytes', 101 'getrandstr', 102 'generate_password', 103 104 # object type / interface tests 105 'is_crypt_handler', 106 'is_crypt_context', 107 'has_rounds_info', 108 'has_salt_info', 109] 110 111#============================================================================= 112# constants 113#============================================================================= 114 115# bitsize of system architecture (32 or 64) 116sys_bits = int(math.log(sys.maxsize if PY3 else sys.maxint, 2) + 1.5) 117 118# list of hashes algs supported by crypt() on at least one OS. 119# XXX: move to .registry for passlib 2.0? 120unix_crypt_schemes = [ 121 "sha512_crypt", "sha256_crypt", 122 "sha1_crypt", "bcrypt", 123 "md5_crypt", 124 # "bsd_nthash", 125 "bsdi_crypt", "des_crypt", 126 ] 127 128# list of rounds_cost constants 129rounds_cost_values = [ "linear", "log2" ] 130 131# legacy import, will be removed in 1.8 132from passlib.exc import MissingBackendError 133 134# internal helpers 135_BEMPTY = b'' 136_UEMPTY = u("") 137_USPACE = u(" ") 138 139# maximum password size which passlib will allow; see exc.PasswordSizeError 140MAX_PASSWORD_SIZE = int(os.environ.get("PASSLIB_MAX_PASSWORD_SIZE") or 4096) 141 142#============================================================================= 143# type helpers 144#============================================================================= 145 146class SequenceMixin(object): 147 """ 148 helper which lets result object act like a fixed-length sequence. 149 subclass just needs to provide :meth:`_as_tuple()`. 150 """ 151 def _as_tuple(self): 152 raise NotImplementedError("implement in subclass") 153 154 def __repr__(self): 155 return repr(self._as_tuple()) 156 157 def __getitem__(self, idx): 158 return self._as_tuple()[idx] 159 160 def __iter__(self): 161 return iter(self._as_tuple()) 162 163 def __len__(self): 164 return len(self._as_tuple()) 165 166 def __eq__(self, other): 167 return self._as_tuple() == other 168 169 def __ne__(self, other): 170 return not self.__eq__(other) 171 172if PY3: 173 # getargspec() is deprecated, use this under py3. 174 # even though it's a lot more awkward to get basic info :| 175 176 _VAR_KEYWORD = inspect.Parameter.VAR_KEYWORD 177 _VAR_ANY_SET = set([_VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL]) 178 179 def accepts_keyword(func, key): 180 """test if function accepts specified keyword""" 181 params = inspect.signature(get_method_function(func)).parameters 182 if not params: 183 return False 184 arg = params.get(key) 185 if arg and arg.kind not in _VAR_ANY_SET: 186 return True 187 # XXX: annoying what we have to do to determine if VAR_KWDS in use. 188 return params[list(params)[-1]].kind == _VAR_KEYWORD 189 190else: 191 192 def accepts_keyword(func, key): 193 """test if function accepts specified keyword""" 194 spec = inspect.getargspec(get_method_function(func)) 195 return key in spec.args or spec.keywords is not None 196 197def update_mixin_classes(target, add=None, remove=None, append=False, 198 before=None, after=None, dryrun=False): 199 """ 200 helper to update mixin classes installed in target class. 201 202 :param target: 203 target class whose bases will be modified. 204 205 :param add: 206 class / classes to install into target's base class list. 207 208 :param remove: 209 class / classes to remove from target's base class list. 210 211 :param append: 212 by default, prepends mixins to front of list. 213 if True, appends to end of list instead. 214 215 :param after: 216 optionally make sure all mixins are inserted after 217 this class / classes. 218 219 :param before: 220 optionally make sure all mixins are inserted before 221 this class / classes. 222 223 :param dryrun: 224 optionally perform all calculations / raise errors, 225 but don't actually modify the class. 226 """ 227 if isinstance(add, type): 228 add = [add] 229 230 bases = list(target.__bases__) 231 232 # strip out requested mixins 233 if remove: 234 if isinstance(remove, type): 235 remove = [remove] 236 for mixin in remove: 237 if add and mixin in add: 238 continue 239 if mixin in bases: 240 bases.remove(mixin) 241 242 # add requested mixins 243 if add: 244 for mixin in add: 245 # if mixin already present (explicitly or not), leave alone 246 if any(issubclass(base, mixin) for base in bases): 247 continue 248 249 # determine insertion point 250 if append: 251 for idx, base in enumerate(bases): 252 if issubclass(mixin, base): 253 # don't insert mixin after one of it's own bases 254 break 255 if before and issubclass(base, before): 256 # don't insert mixin after any <before> classes. 257 break 258 else: 259 # append to end 260 idx = len(bases) 261 elif after: 262 for end_idx, base in enumerate(reversed(bases)): 263 if issubclass(base, after): 264 # don't insert mixin before any <after> classes. 265 idx = len(bases) - end_idx 266 assert bases[idx-1] == base 267 break 268 else: 269 idx = 0 270 else: 271 # insert at start 272 idx = 0 273 274 # insert mixin 275 bases.insert(idx, mixin) 276 277 # modify class 278 if not dryrun: 279 target.__bases__ = tuple(bases) 280 281#============================================================================= 282# collection helpers 283#============================================================================= 284def batch(source, size): 285 """ 286 split iterable into chunks of <size> elements. 287 """ 288 if size < 1: 289 raise ValueError("size must be positive integer") 290 if isinstance(source, Sequence): 291 end = len(source) 292 i = 0 293 while i < end: 294 n = i + size 295 yield source[i:n] 296 i = n 297 elif isinstance(source, Iterable): 298 itr = iter(source) 299 while True: 300 chunk_itr = itertools.islice(itr, size) 301 try: 302 first = next(chunk_itr) 303 except StopIteration: 304 break 305 yield itertools.chain((first,), chunk_itr) 306 else: 307 raise TypeError("source must be iterable") 308 309#============================================================================= 310# unicode helpers 311#============================================================================= 312 313# XXX: should this be moved to passlib.crypto, or compat backports? 314 315def consteq(left, right): 316 """Check two strings/bytes for equality. 317 318 This function uses an approach designed to prevent 319 timing analysis, making it appropriate for cryptography. 320 a and b must both be of the same type: either str (ASCII only), 321 or any type that supports the buffer protocol (e.g. bytes). 322 323 Note: If a and b are of different lengths, or if an error occurs, 324 a timing attack could theoretically reveal information about the 325 types and lengths of a and b--but not their values. 326 """ 327 # NOTE: 328 # resources & discussions considered in the design of this function: 329 # hmac timing attack -- 330 # http://rdist.root.org/2009/05/28/timing-attack-in-google-keyczar-library/ 331 # python developer discussion surrounding similar function -- 332 # http://bugs.python.org/issue15061 333 # http://bugs.python.org/issue14955 334 335 # validate types 336 if isinstance(left, unicode): 337 if not isinstance(right, unicode): 338 raise TypeError("inputs must be both unicode or both bytes") 339 is_py3_bytes = False 340 elif isinstance(left, bytes): 341 if not isinstance(right, bytes): 342 raise TypeError("inputs must be both unicode or both bytes") 343 is_py3_bytes = PY3 344 else: 345 raise TypeError("inputs must be both unicode or both bytes") 346 347 # do size comparison. 348 # NOTE: the double-if construction below is done deliberately, to ensure 349 # the same number of operations (including branches) is performed regardless 350 # of whether left & right are the same size. 351 same_size = (len(left) == len(right)) 352 if same_size: 353 # if sizes are the same, setup loop to perform actual check of contents. 354 tmp = left 355 result = 0 356 if not same_size: 357 # if sizes aren't the same, set 'result' so equality will fail regardless 358 # of contents. then, to ensure we do exactly 'len(right)' iterations 359 # of the loop, just compare 'right' against itself. 360 tmp = right 361 result = 1 362 363 # run constant-time string comparision 364 # TODO: use izip instead (but first verify it's faster than zip for this case) 365 if is_py3_bytes: 366 for l,r in zip(tmp, right): 367 result |= l ^ r 368 else: 369 for l,r in zip(tmp, right): 370 result |= ord(l) ^ ord(r) 371 return result == 0 372 373# keep copy of this around since stdlib's version throws error on non-ascii chars in unicode strings. 374# our version does, but suffers from some underlying VM issues. but something is better than 375# nothing for plaintext hashes, which need this. everything else should use consteq(), 376# since the stdlib one is going to be as good / better in the general case. 377str_consteq = consteq 378 379try: 380 # for py3.3 and up, use the stdlib version 381 from hmac import compare_digest as consteq 382except ImportError: 383 pass 384 385 # TODO: could check for cryptography package's version, 386 # but only operates on bytes, so would need a wrapper, 387 # or separate consteq() into a unicode & a bytes variant. 388 # from cryptography.hazmat.primitives.constant_time import bytes_eq as consteq 389 390def splitcomma(source, sep=","): 391 """split comma-separated string into list of elements, 392 stripping whitespace. 393 """ 394 source = source.strip() 395 if source.endswith(sep): 396 source = source[:-1] 397 if not source: 398 return [] 399 return [ elem.strip() for elem in source.split(sep) ] 400 401def saslprep(source, param="value"): 402 """Normalizes unicode strings using SASLPrep stringprep profile. 403 404 The SASLPrep profile is defined in :rfc:`4013`. 405 It provides a uniform scheme for normalizing unicode usernames 406 and passwords before performing byte-value sensitive operations 407 such as hashing. Among other things, it normalizes diacritic 408 representations, removes non-printing characters, and forbids 409 invalid characters such as ``\\n``. Properly internationalized 410 applications should run user passwords through this function 411 before hashing. 412 413 :arg source: 414 unicode string to normalize & validate 415 416 :param param: 417 Optional noun identifying source parameter in error messages 418 (Defaults to the string ``"value"``). This is mainly useful to make the caller's error 419 messages make more sense contextually. 420 421 :raises ValueError: 422 if any characters forbidden by the SASLPrep profile are encountered. 423 424 :raises TypeError: 425 if input is not :class:`!unicode` 426 427 :returns: 428 normalized unicode string 429 430 .. note:: 431 432 This function is not available under Jython, 433 as the Jython stdlib is missing the :mod:`!stringprep` module 434 (`Jython issue 1758320 <http://bugs.jython.org/issue1758320>`_). 435 436 .. versionadded:: 1.6 437 """ 438 # saslprep - http://tools.ietf.org/html/rfc4013 439 # stringprep - http://tools.ietf.org/html/rfc3454 440 # http://docs.python.org/library/stringprep.html 441 442 # validate type 443 # XXX: support bytes (e.g. run through want_unicode)? 444 # might be easier to just integrate this into cryptcontext. 445 if not isinstance(source, unicode): 446 raise TypeError("input must be unicode string, not %s" % 447 (type(source),)) 448 449 # mapping stage 450 # - map non-ascii spaces to U+0020 (stringprep C.1.2) 451 # - strip 'commonly mapped to nothing' chars (stringprep B.1) 452 in_table_c12 = stringprep.in_table_c12 453 in_table_b1 = stringprep.in_table_b1 454 data = join_unicode( 455 _USPACE if in_table_c12(c) else c 456 for c in source 457 if not in_table_b1(c) 458 ) 459 460 # normalize to KC form 461 data = unicodedata.normalize('NFKC', data) 462 if not data: 463 return _UEMPTY 464 465 # check for invalid bi-directional strings. 466 # stringprep requires the following: 467 # - chars in C.8 must be prohibited. 468 # - if any R/AL chars in string: 469 # - no L chars allowed in string 470 # - first and last must be R/AL chars 471 # this checks if start/end are R/AL chars. if so, prohibited loop 472 # will forbid all L chars. if not, prohibited loop will forbid all 473 # R/AL chars instead. in both cases, prohibited loop takes care of C.8. 474 is_ral_char = stringprep.in_table_d1 475 if is_ral_char(data[0]): 476 if not is_ral_char(data[-1]): 477 raise ValueError("malformed bidi sequence in " + param) 478 # forbid L chars within R/AL sequence. 479 is_forbidden_bidi_char = stringprep.in_table_d2 480 else: 481 # forbid R/AL chars if start not setup correctly; L chars allowed. 482 is_forbidden_bidi_char = is_ral_char 483 484 # check for prohibited output - stringprep tables A.1, B.1, C.1.2, C.2 - C.9 485 in_table_a1 = stringprep.in_table_a1 486 in_table_c21_c22 = stringprep.in_table_c21_c22 487 in_table_c3 = stringprep.in_table_c3 488 in_table_c4 = stringprep.in_table_c4 489 in_table_c5 = stringprep.in_table_c5 490 in_table_c6 = stringprep.in_table_c6 491 in_table_c7 = stringprep.in_table_c7 492 in_table_c8 = stringprep.in_table_c8 493 in_table_c9 = stringprep.in_table_c9 494 for c in data: 495 # check for chars mapping stage should have removed 496 assert not in_table_b1(c), "failed to strip B.1 in mapping stage" 497 assert not in_table_c12(c), "failed to replace C.1.2 in mapping stage" 498 499 # check for forbidden chars 500 if in_table_a1(c): 501 raise ValueError("unassigned code points forbidden in " + param) 502 if in_table_c21_c22(c): 503 raise ValueError("control characters forbidden in " + param) 504 if in_table_c3(c): 505 raise ValueError("private use characters forbidden in " + param) 506 if in_table_c4(c): 507 raise ValueError("non-char code points forbidden in " + param) 508 if in_table_c5(c): 509 raise ValueError("surrogate codes forbidden in " + param) 510 if in_table_c6(c): 511 raise ValueError("non-plaintext chars forbidden in " + param) 512 if in_table_c7(c): 513 # XXX: should these have been caught by normalize? 514 # if so, should change this to an assert 515 raise ValueError("non-canonical chars forbidden in " + param) 516 if in_table_c8(c): 517 raise ValueError("display-modifying / deprecated chars " 518 "forbidden in" + param) 519 if in_table_c9(c): 520 raise ValueError("tagged characters forbidden in " + param) 521 522 # do bidi constraint check chosen by bidi init, above 523 if is_forbidden_bidi_char(c): 524 raise ValueError("forbidden bidi character in " + param) 525 526 return data 527 528# replace saslprep() with stub when stringprep is missing 529if stringprep is None: # pragma: no cover -- runtime detection 530 def saslprep(source, param="value"): 531 """stub for saslprep()""" 532 raise NotImplementedError("saslprep() support requires the 'stringprep' " 533 "module, which is " + _stringprep_missing_reason) 534 535#============================================================================= 536# bytes helpers 537#============================================================================= 538def render_bytes(source, *args): 539 """Peform ``%`` formating using bytes in a uniform manner across Python 2/3. 540 541 This function is motivated by the fact that 542 :class:`bytes` instances do not support ``%`` or ``{}`` formatting under Python 3. 543 This function is an attempt to provide a replacement: 544 it converts everything to unicode (decoding bytes instances as ``latin-1``), 545 performs the required formatting, then encodes the result to ``latin-1``. 546 547 Calling ``render_bytes(source, *args)`` should function roughly the same as 548 ``source % args`` under Python 2. 549 550 .. todo:: 551 python >= 3.5 added back limited support for bytes %, 552 can revisit when 3.3/3.4 is dropped. 553 """ 554 if isinstance(source, bytes): 555 source = source.decode("latin-1") 556 result = source % tuple(arg.decode("latin-1") if isinstance(arg, bytes) 557 else arg for arg in args) 558 return result.encode("latin-1") 559 560if PY3: 561 # new in py32 562 def bytes_to_int(value): 563 return int.from_bytes(value, 'big') 564 def int_to_bytes(value, count): 565 return value.to_bytes(count, 'big') 566else: 567 # XXX: can any of these be sped up? 568 from binascii import hexlify, unhexlify 569 def bytes_to_int(value): 570 return int(hexlify(value),16) 571 def int_to_bytes(value, count): 572 return unhexlify(('%%0%dx' % (count<<1)) % value) 573 574add_doc(bytes_to_int, "decode byte string as single big-endian integer") 575add_doc(int_to_bytes, "encode integer as single big-endian byte string") 576 577def xor_bytes(left, right): 578 """Perform bitwise-xor of two byte strings (must be same size)""" 579 return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left)) 580 581def repeat_string(source, size): 582 """ 583 repeat or truncate <source> string, so it has length <size> 584 """ 585 mult = 1 + (size - 1) // len(source) 586 return (source * mult)[:size] 587 588 589def utf8_repeat_string(source, size): 590 """ 591 variant of repeat_string() which truncates to nearest UTF8 boundary. 592 """ 593 mult = 1 + (size - 1) // len(source) 594 return utf8_truncate(source * mult, size) 595 596 597_BNULL = b"\x00" 598_UNULL = u("\x00") 599 600def right_pad_string(source, size, pad=None): 601 """right-pad or truncate <source> string, so it has length <size>""" 602 cur = len(source) 603 if size > cur: 604 if pad is None: 605 pad = _UNULL if isinstance(source, unicode) else _BNULL 606 return source+pad*(size-cur) 607 else: 608 return source[:size] 609 610 611def utf8_truncate(source, index): 612 """ 613 helper to truncate UTF8 byte string to nearest character boundary ON OR AFTER <index>. 614 returned prefix will always have length of at least <index>, and will stop on the 615 first byte that's not a UTF8 continuation byte (128 - 191 inclusive). 616 since utf8 should never take more than 4 bytes to encode known unicode values, 617 we can stop after ``index+3`` is reached. 618 619 :param bytes source: 620 :param int index: 621 :rtype: bytes 622 """ 623 # general approach: 624 # 625 # * UTF8 bytes will have high two bits (0xC0) as one of: 626 # 00 -- ascii char 627 # 01 -- ascii char 628 # 10 -- continuation of multibyte char 629 # 11 -- start of multibyte char. 630 # thus we can cut on anything where high bits aren't "10" (0x80; continuation byte) 631 # 632 # * UTF8 characters SHOULD always be 1 to 4 bytes, though they may be unbounded. 633 # so we just keep going until first non-continuation byte is encountered, or end of str. 634 # this should work predictably even for malformed/non UTF8 inputs. 635 636 if not isinstance(source, bytes): 637 raise ExpectedTypeError(source, bytes, "source") 638 639 # validate index 640 end = len(source) 641 if index < 0: 642 index = max(0, index + end) 643 if index >= end: 644 return source 645 646 # can stop search after 4 bytes, won't ever have longer utf8 sequence. 647 end = min(index + 3, end) 648 649 # loop until we find non-continuation byte 650 while index < end: 651 if byte_elem_value(source[index]) & 0xC0 != 0x80: 652 # found single-char byte, or start-char byte. 653 break 654 # else: found continuation byte. 655 index += 1 656 else: 657 assert index == end 658 659 # truncate at final index 660 result = source[:index] 661 662 def sanity_check(): 663 # try to decode source 664 try: 665 text = source.decode("utf-8") 666 except UnicodeDecodeError: 667 # if source isn't valid utf8, byte level match is enough 668 return True 669 670 # validate that result was cut on character boundary 671 assert text.startswith(result.decode("utf-8")) 672 return True 673 674 assert sanity_check() 675 676 return result 677 678#============================================================================= 679# encoding helpers 680#============================================================================= 681_ASCII_TEST_BYTES = b"\x00\n aA:#!\x7f" 682_ASCII_TEST_UNICODE = _ASCII_TEST_BYTES.decode("ascii") 683 684def is_ascii_codec(codec): 685 """Test if codec is compatible with 7-bit ascii (e.g. latin-1, utf-8; but not utf-16)""" 686 return _ASCII_TEST_UNICODE.encode(codec) == _ASCII_TEST_BYTES 687 688def is_same_codec(left, right): 689 """Check if two codec names are aliases for same codec""" 690 if left == right: 691 return True 692 if not (left and right): 693 return False 694 return _lookup_codec(left).name == _lookup_codec(right).name 695 696_B80 = b'\x80'[0] 697_U80 = u('\x80') 698def is_ascii_safe(source): 699 """Check if string (bytes or unicode) contains only 7-bit ascii""" 700 r = _B80 if isinstance(source, bytes) else _U80 701 return all(c < r for c in source) 702 703def to_bytes(source, encoding="utf-8", param="value", source_encoding=None): 704 """Helper to normalize input to bytes. 705 706 :arg source: 707 Source bytes/unicode to process. 708 709 :arg encoding: 710 Target encoding (defaults to ``"utf-8"``). 711 712 :param param: 713 Optional name of variable/noun to reference when raising errors 714 715 :param source_encoding: 716 If this is specified, and the source is bytes, 717 the source will be transcoded from *source_encoding* to *encoding* 718 (via unicode). 719 720 :raises TypeError: if source is not unicode or bytes. 721 722 :returns: 723 * unicode strings will be encoded using *encoding*, and returned. 724 * if *source_encoding* is not specified, byte strings will be 725 returned unchanged. 726 * if *source_encoding* is specified, byte strings will be transcoded 727 to *encoding*. 728 """ 729 assert encoding 730 if isinstance(source, bytes): 731 if source_encoding and not is_same_codec(source_encoding, encoding): 732 return source.decode(source_encoding).encode(encoding) 733 else: 734 return source 735 elif isinstance(source, unicode): 736 return source.encode(encoding) 737 else: 738 raise ExpectedStringError(source, param) 739 740def to_unicode(source, encoding="utf-8", param="value"): 741 """Helper to normalize input to unicode. 742 743 :arg source: 744 source bytes/unicode to process. 745 746 :arg encoding: 747 encoding to use when decoding bytes instances. 748 749 :param param: 750 optional name of variable/noun to reference when raising errors. 751 752 :raises TypeError: if source is not unicode or bytes. 753 754 :returns: 755 * returns unicode strings unchanged. 756 * returns bytes strings decoded using *encoding* 757 """ 758 assert encoding 759 if isinstance(source, unicode): 760 return source 761 elif isinstance(source, bytes): 762 return source.decode(encoding) 763 else: 764 raise ExpectedStringError(source, param) 765 766if PY3: 767 def to_native_str(source, encoding="utf-8", param="value"): 768 if isinstance(source, bytes): 769 return source.decode(encoding) 770 elif isinstance(source, unicode): 771 return source 772 else: 773 raise ExpectedStringError(source, param) 774else: 775 def to_native_str(source, encoding="utf-8", param="value"): 776 if isinstance(source, bytes): 777 return source 778 elif isinstance(source, unicode): 779 return source.encode(encoding) 780 else: 781 raise ExpectedStringError(source, param) 782 783add_doc(to_native_str, 784 """Take in unicode or bytes, return native string. 785 786 Python 2: encodes unicode using specified encoding, leaves bytes alone. 787 Python 3: leaves unicode alone, decodes bytes using specified encoding. 788 789 :raises TypeError: if source is not unicode or bytes. 790 791 :arg source: 792 source unicode or bytes string. 793 794 :arg encoding: 795 encoding to use when encoding unicode or decoding bytes. 796 this defaults to ``"utf-8"``. 797 798 :param param: 799 optional name of variable/noun to reference when raising errors. 800 801 :returns: :class:`str` instance 802 """) 803 804@deprecated_function(deprecated="1.6", removed="1.7") 805def to_hash_str(source, encoding="ascii"): # pragma: no cover -- deprecated & unused 806 """deprecated, use to_native_str() instead""" 807 return to_native_str(source, encoding, param="hash") 808 809_true_set = set("true t yes y on 1 enable enabled".split()) 810_false_set = set("false f no n off 0 disable disabled".split()) 811_none_set = set(["", "none"]) 812 813def as_bool(value, none=None, param="boolean"): 814 """ 815 helper to convert value to boolean. 816 recognizes strings such as "true", "false" 817 """ 818 assert none in [True, False, None] 819 if isinstance(value, unicode_or_bytes_types): 820 clean = value.lower().strip() 821 if clean in _true_set: 822 return True 823 if clean in _false_set: 824 return False 825 if clean in _none_set: 826 return none 827 raise ValueError("unrecognized %s value: %r" % (param, value)) 828 elif isinstance(value, bool): 829 return value 830 elif value is None: 831 return none 832 else: 833 return bool(value) 834 835#============================================================================= 836# host OS helpers 837#============================================================================= 838 839def is_safe_crypt_input(value): 840 """ 841 UT helper -- 842 test if value is safe to pass to crypt.crypt(); 843 under PY3, can't pass non-UTF8 bytes to crypt.crypt. 844 """ 845 if crypt_accepts_bytes or not isinstance(value, bytes): 846 return True 847 try: 848 value.decode("utf-8") 849 return True 850 except UnicodeDecodeError: 851 return False 852 853try: 854 from crypt import crypt as _crypt 855except ImportError: # pragma: no cover 856 _crypt = None 857 has_crypt = False 858 crypt_accepts_bytes = False 859 crypt_needs_lock = False 860 _safe_crypt_lock = None 861 def safe_crypt(secret, hash): 862 return None 863else: 864 has_crypt = True 865 _NULL = '\x00' 866 867 # XXX: replace this with lazy-evaluated bug detection? 868 if threading and PYPY and (7, 2, 0) <= sys.pypy_version_info <= (7, 3, 3): 869 #: internal lock used to wrap crypt() calls. 870 #: WARNING: if non-passlib code invokes crypt(), this lock won't be enough! 871 _safe_crypt_lock = threading.Lock() 872 873 #: detect if crypt.crypt() needs a thread lock around calls. 874 crypt_needs_lock = True 875 876 else: 877 from passlib.utils.compat import nullcontext 878 _safe_crypt_lock = nullcontext() 879 crypt_needs_lock = False 880 881 # some crypt() variants will return various constant strings when 882 # an invalid/unrecognized config string is passed in; instead of 883 # returning NULL / None. examples include ":", ":0", "*0", etc. 884 # safe_crypt() returns None for any string starting with one of the 885 # chars in this string... 886 _invalid_prefixes = u("*:!") 887 888 if PY3: 889 890 # * pypy3 (as of v7.3.1) has a crypt which accepts bytes, or ASCII-only unicode. 891 # * whereas CPython3 (as of v3.9) has a crypt which doesn't take bytes, 892 # but accepts ANY unicode (which it always encodes to UTF8). 893 crypt_accepts_bytes = True 894 try: 895 _crypt(b"\xEE", "xx") 896 except TypeError: 897 # CPython will throw TypeError 898 crypt_accepts_bytes = False 899 except: # no pragma 900 # don't care about other errors this might throw, 901 # just want to see if we get past initial type-coercion step. 902 pass 903 904 def safe_crypt(secret, hash): 905 if crypt_accepts_bytes: 906 # PyPy3 -- all bytes accepted, but unicode encoded to ASCII, 907 # so handling that ourselves. 908 if isinstance(secret, unicode): 909 secret = secret.encode("utf-8") 910 if _BNULL in secret: 911 raise ValueError("null character in secret") 912 if isinstance(hash, unicode): 913 hash = hash.encode("ascii") 914 else: 915 # CPython3's crypt() doesn't take bytes, only unicode; unicode which is then 916 # encoding using utf-8 before passing to the C-level crypt(). 917 # so we have to decode the secret. 918 if isinstance(secret, bytes): 919 orig = secret 920 try: 921 secret = secret.decode("utf-8") 922 except UnicodeDecodeError: 923 return None 924 # sanity check it encodes back to original byte string, 925 # otherwise when crypt() does it's encoding, it'll hash the wrong bytes! 926 assert secret.encode("utf-8") == orig, \ 927 "utf-8 spec says this can't happen!" 928 if _NULL in secret: 929 raise ValueError("null character in secret") 930 if isinstance(hash, bytes): 931 hash = hash.decode("ascii") 932 try: 933 with _safe_crypt_lock: 934 result = _crypt(secret, hash) 935 except OSError: 936 # new in py39 -- per https://bugs.python.org/issue39289, 937 # crypt() now throws OSError for various things, mainly unknown hash formats 938 # translating that to None for now (may revise safe_crypt behavior in future) 939 return None 940 # NOTE: per issue 113, crypt() may return bytes in some odd cases. 941 # assuming it should still return an ASCII hash though, 942 # or there's a bigger issue at hand. 943 if isinstance(result, bytes): 944 result = result.decode("ascii") 945 if not result or result[0] in _invalid_prefixes: 946 return None 947 return result 948 else: 949 950 #: see feature-detection in PY3 fork above 951 crypt_accepts_bytes = True 952 953 # Python 2 crypt handler 954 def safe_crypt(secret, hash): 955 if isinstance(secret, unicode): 956 secret = secret.encode("utf-8") 957 if _NULL in secret: 958 raise ValueError("null character in secret") 959 if isinstance(hash, unicode): 960 hash = hash.encode("ascii") 961 with _safe_crypt_lock: 962 result = _crypt(secret, hash) 963 if not result: 964 return None 965 result = result.decode("ascii") 966 if result[0] in _invalid_prefixes: 967 return None 968 return result 969 970add_doc(safe_crypt, """Wrapper around stdlib's crypt. 971 972 This is a wrapper around stdlib's :func:`!crypt.crypt`, which attempts 973 to provide uniform behavior across Python 2 and 3. 974 975 :arg secret: 976 password, as bytes or unicode (unicode will be encoded as ``utf-8``). 977 978 :arg hash: 979 hash or config string, as ascii bytes or unicode. 980 981 :returns: 982 resulting hash as ascii unicode; or ``None`` if the password 983 couldn't be hashed due to one of the issues: 984 985 * :func:`crypt()` not available on platform. 986 987 * Under Python 3, if *secret* is specified as bytes, 988 it must be use ``utf-8`` or it can't be passed 989 to :func:`crypt()`. 990 991 * Some OSes will return ``None`` if they don't recognize 992 the algorithm being used (though most will simply fall 993 back to des-crypt). 994 995 * Some OSes will return an error string if the input config 996 is recognized but malformed; current code converts these to ``None`` 997 as well. 998 """) 999 1000def test_crypt(secret, hash): 1001 """check if :func:`crypt.crypt` supports specific hash 1002 :arg secret: password to test 1003 :arg hash: known hash of password to use as reference 1004 :returns: True or False 1005 """ 1006 # safe_crypt() always returns unicode, which means that for py3, 1007 # 'hash' can't be bytes, or "== hash" will never be True. 1008 # under py2 unicode & str(bytes) will compare fine; 1009 # so just enforcing "unicode_or_str" limitation 1010 assert isinstance(hash, unicode_or_str), \ 1011 "hash must be unicode_or_str, got %s" % type(hash) 1012 assert hash, "hash must be non-empty" 1013 return safe_crypt(secret, hash) == hash 1014 1015timer = timeit.default_timer 1016# legacy alias, will be removed in passlib 2.0 1017tick = timer 1018 1019def parse_version(source): 1020 """helper to parse version string""" 1021 m = re.search(r"(\d+(?:\.\d+)+)", source) 1022 if m: 1023 return tuple(int(elem) for elem in m.group(1).split(".")) 1024 return None 1025 1026#============================================================================= 1027# randomness 1028#============================================================================= 1029 1030#------------------------------------------------------------------------ 1031# setup rng for generating salts 1032#------------------------------------------------------------------------ 1033 1034# NOTE: 1035# generating salts (e.g. h64_gensalt, below) doesn't require cryptographically 1036# strong randomness. it just requires enough range of possible outputs 1037# that making a rainbow table is too costly. so it should be ok to 1038# fall back on python's builtin mersenne twister prng, as long as it's seeded each time 1039# this module is imported, using a couple of minor entropy sources. 1040 1041try: 1042 os.urandom(1) 1043 has_urandom = True 1044except NotImplementedError: # pragma: no cover 1045 has_urandom = False 1046 1047def genseed(value=None): 1048 """generate prng seed value from system resources""" 1049 from hashlib import sha512 1050 if hasattr(value, "getstate") and hasattr(value, "getrandbits"): 1051 # caller passed in RNG as seed value 1052 try: 1053 value = value.getstate() 1054 except NotImplementedError: 1055 # this method throws error for e.g. SystemRandom instances, 1056 # so fall back to extracting 4k of state 1057 value = value.getrandbits(1 << 15) 1058 text = u("%s %s %s %.15f %.15f %s") % ( 1059 # if caller specified a seed value, mix it in 1060 value, 1061 1062 # add current process id 1063 # NOTE: not available in some environments, e.g. GAE 1064 os.getpid() if hasattr(os, "getpid") else None, 1065 1066 # id of a freshly created object. 1067 # (at least 1 byte of which should be hard to predict) 1068 id(object()), 1069 1070 # the current time, to whatever precision os uses 1071 time.time(), 1072 tick(), 1073 1074 # if urandom available, might as well mix some bytes in. 1075 os.urandom(32).decode("latin-1") if has_urandom else 0, 1076 ) 1077 # hash it all up and return it as int/long 1078 return int(sha512(text.encode("utf-8")).hexdigest(), 16) 1079 1080if has_urandom: 1081 rng = random.SystemRandom() 1082else: # pragma: no cover -- runtime detection 1083 # NOTE: to reseed use ``rng.seed(genseed(rng))`` 1084 # XXX: could reseed on every call 1085 rng = random.Random(genseed()) 1086 1087#------------------------------------------------------------------------ 1088# some rng helpers 1089#------------------------------------------------------------------------ 1090def getrandbytes(rng, count): 1091 """return byte-string containing *count* number of randomly generated bytes, using specified rng""" 1092 # NOTE: would be nice if this was present in stdlib Random class 1093 1094 ###just in case rng provides this... 1095 ##meth = getattr(rng, "getrandbytes", None) 1096 ##if meth: 1097 ## return meth(count) 1098 1099 if not count: 1100 return _BEMPTY 1101 def helper(): 1102 # XXX: break into chunks for large number of bits? 1103 value = rng.getrandbits(count<<3) 1104 i = 0 1105 while i < count: 1106 yield value & 0xff 1107 value >>= 3 1108 i += 1 1109 return join_byte_values(helper()) 1110 1111def getrandstr(rng, charset, count): 1112 """return string containing *count* number of chars/bytes, whose elements are drawn from specified charset, using specified rng""" 1113 # NOTE: tests determined this is 4x faster than rng.sample(), 1114 # which is why that's not being used here. 1115 1116 # check alphabet & count 1117 if count < 0: 1118 raise ValueError("count must be >= 0") 1119 letters = len(charset) 1120 if letters == 0: 1121 raise ValueError("alphabet must not be empty") 1122 if letters == 1: 1123 return charset * count 1124 1125 # get random value, and write out to buffer 1126 def helper(): 1127 # XXX: break into chunks for large number of letters? 1128 value = rng.randrange(0, letters**count) 1129 i = 0 1130 while i < count: 1131 yield charset[value % letters] 1132 value //= letters 1133 i += 1 1134 1135 if isinstance(charset, unicode): 1136 return join_unicode(helper()) 1137 else: 1138 return join_byte_elems(helper()) 1139 1140_52charset = '2346789ABCDEFGHJKMNPQRTUVWXYZabcdefghjkmnpqrstuvwxyz' 1141 1142@deprecated_function(deprecated="1.7", removed="2.0", 1143 replacement="passlib.pwd.genword() / passlib.pwd.genphrase()") 1144def generate_password(size=10, charset=_52charset): 1145 """generate random password using given length & charset 1146 1147 :param size: 1148 size of password. 1149 1150 :param charset: 1151 optional string specified set of characters to draw from. 1152 1153 the default charset contains all normal alphanumeric characters, 1154 except for the characters ``1IiLl0OoS5``, which were omitted 1155 due to their visual similarity. 1156 1157 :returns: :class:`!str` containing randomly generated password. 1158 1159 .. note:: 1160 1161 Using the default character set, on a OS with :class:`!SystemRandom` support, 1162 this function should generate passwords with 5.7 bits of entropy per character. 1163 """ 1164 return getrandstr(rng, charset, size) 1165 1166#============================================================================= 1167# object type / interface tests 1168#============================================================================= 1169_handler_attrs = ( 1170 "name", 1171 "setting_kwds", "context_kwds", 1172 "verify", "hash", "identify", 1173 ) 1174 1175def is_crypt_handler(obj): 1176 """check if object follows the :ref:`password-hash-api`""" 1177 # XXX: change to use isinstance(obj, PasswordHash) under py26+? 1178 return all(hasattr(obj, name) for name in _handler_attrs) 1179 1180_context_attrs = ( 1181 "needs_update", 1182 "genconfig", "genhash", 1183 "verify", "encrypt", "identify", 1184 ) 1185 1186def is_crypt_context(obj): 1187 """check if object appears to be a :class:`~passlib.context.CryptContext` instance""" 1188 # XXX: change to use isinstance(obj, CryptContext)? 1189 return all(hasattr(obj, name) for name in _context_attrs) 1190 1191##def has_many_backends(handler): 1192## "check if handler provides multiple baceknds" 1193## # NOTE: should also provide get_backend(), .has_backend(), and .backends attr 1194## return hasattr(handler, "set_backend") 1195 1196def has_rounds_info(handler): 1197 """check if handler provides the optional :ref:`rounds information <rounds-attributes>` attributes""" 1198 return ('rounds' in handler.setting_kwds and 1199 getattr(handler, "min_rounds", None) is not None) 1200 1201def has_salt_info(handler): 1202 """check if handler provides the optional :ref:`salt information <salt-attributes>` attributes""" 1203 return ('salt' in handler.setting_kwds and 1204 getattr(handler, "min_salt_size", None) is not None) 1205 1206##def has_raw_salt(handler): 1207## "check if handler takes in encoded salt as unicode (False), or decoded salt as bytes (True)" 1208## sc = getattr(handler, "salt_chars", None) 1209## if sc is None: 1210## return None 1211## elif isinstance(sc, unicode): 1212## return False 1213## elif isinstance(sc, bytes): 1214## return True 1215## else: 1216## raise TypeError("handler.salt_chars must be None/unicode/bytes") 1217 1218#============================================================================= 1219# eof 1220#============================================================================= 1221