#! /usr/bin/python3 # Compute test cases for ka-* tests. # # Written by Zack Weinberg in 2019. # To the extent possible under law, Zack Weinberg has waived all # copyright and related or neighboring rights to this work. # # See https://creativecommons.org/publicdomain/zero/1.0/ for further # details. # This program generates ka-table.inc, which defines the set # of tests performed by the ka-*.c tests. It is not run automatically # during the build for two reasons: it's very slow, and it requires Python # 3.6 or greater with Passlib # available. # # If you modify this program, make sure to update ka-table.inc, # by running 'make regen-ka-table' (libcrypt.so must already have been # built), and then check in the updates to that file in the same # commit as your changes to this program. You will need to install # Passlib itself, but not any other libraries. # # This program intentionally uses Passlib's slow pure-Python back # ends, rather than accelerated C modules that tend to be, at their # core, the same code libxcrypt uses itself, so that we really are # testing libxcrypt against known answers generated with a different # implementation. import array import ctypes import multiprocessing import os import re import sys # force passlib to allow use of its built-in bcrypt implementation os.environ["PASSLIB_BUILTIN_BCRYPT"] = "enabled" import passlib.hash # In order to tickle various bugs and limitations in older hashing # methods precisely, we need to test several passphrases whose byte # sequences are not valid Unicode text in any encoding. We therefore # use exclusively byte strings in this array. PHRASES = [ # All ASCII printable, various lengths. Most of these were taken # from older known-answer tests for specific hashing methods. b'', b' ', b'a', b'ab', b'abc', b'U*U', b'U*U*', b'U*U*U', b'.....', b'dragon', b'dRaGoN', b'DrAgOn', b'PAROLX', b'U*U***U', b'abcdefg', b'01234567', b'726 even', b'zyxwvuts', b'ab1234567', b'alexander', b'beautiful', b'challenge', b'chocolate', b'cr1234567', b'katherine', b'stephanie', b'sunflower', b'basketball', b'porsche911', b'|_337T`/p3', b'thunderbird', b'Hello world!', b'pleaseletmein', b'a short string', b'zxyDPWgydbQjgq', b'photojournalism', b'ecclesiastically', b'congregationalism', b'dihydrosphingosine', b'semianthropological', b'palaeogeographically', b'electromyographically', b'noninterchangeableness', b'abcdefghijklmnopqrstuvwxyz', b'electroencephalographically', b'antidisestablishmentarianism', b'cyclotrimethylenetrinitramine', b'dichlorodiphenyltrichloroethane', b'multiple words seperated by spaces', b'supercalifragilisticexpialidocious', b'we have a short salt string but not a short password', b'multiple word$ $eperated by $pace$ and $pecial character$', b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', (b'1234567890123456789012345678901234567890' b'1234567890123456789012345678901234567890'), (b'a very much longer text to encrypt. This one even stretches over more' b'than one line.'), # ASCII printables with their high bits flipped - DES-based hashes collide. # All of these have an exact counterpart above. b'\xd0\xc1\xd2\xcf\xcc\xd8', # 'PAROLX' b'\xd5\xaa\xd5\xaa\xaa\xaa\xd5\xaa', # 'U*U***U*' b'\xe1\xec\xe5\xf8\xe1\xee\xe4\xe5\xf2', # 'alexander' b'\xf3\xf4\xe5\xf0\xe8\xe1\xee\xe9\xe5', # 'stephanie' # '*U*U*U*U*U*U*U*U*' b'\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa', # A few UTF-8 strings and what they will collide with for # DES-based hashes. b'\xC3\xA9tude', b'C)tude', # UTF-8(NFC(étude)) b'Chl\xC3\xB6e', b'ChlC6e', # UTF-8(NFC(Chlöe)) # Eight letters, but 10 bytes: UTF-8(NFC(Ångström)) b'\xC3\x85ngstr\xC3\xB6m', b'C\x05ngstrC6m', b'C\x05ngstrC' # descrypt truncates everything to 8 characters. b'U*U***U*', b'U*U***U*ignored', b'U*U*U*U*', b'U*U*U*U*ignored', b'*U*U*U*U', b'*U*U*U*U*', b'*U*U*U*U*U*U*U*U', b'*U*U*U*U*U*U*U*U*', # Patterns designed to tickle the bcrypt $2x$ sign-extension bug. b'\xa3', b'\xa3a', b'\xd1\x91', b'\xa3ab', b'\xff\xff\xa3', b'1\xa3345', b'\xff\xa3345', b'\xff\xa334\xff\xff\xff\xa3345', (b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'), (b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'), # bcrypt truncates to 72 characters (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' b'0123456789'), (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' b'0123456789chars after 72 are ignored'), (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'), (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' b'chars after 72 are ignored as usual'), # bigcrypt truncates to 128 characters # (first sentence of _Twenty Thousand Leagues Under The Sea_) (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and ' b'downright inexplicable phenomenon that surely no one has forgotten.'), # first 8 characters of above (des) b'THE YEAR', # first 72 characters (bcrypt) b'THE YEAR 1866 was marked by a bizarre development, an unexplained and do', # first 128 characters (bigcrypt) (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and ' b'downright inexplicable phenomenon that surely no one has f') ] # passlib does not support all of the hashing methods we do, no longer # supports generation of bare setting strings, and in some cases does # not support all of the variants we need to generate. Therefore, # there is a shim for each hashing method (variant). Shims must be # named 'h_METHOD' where METHOD is the name for the method used by the # INCLUDE_method macros. # # Each shim function takes at least the arguments phrase, rounds, and # salt, in that order. Additional optional arguments are allowed. It # should do 'yield (phrase, setting, expected)' at least once, where # phrase is the phrase argument, setting is a setting string generated # from rounds and salt, and expected is the hashed passphrase expected # to be generated from that combination of phrase and setting. # # When implementing new shims, use of passlib's pure-Python "backends" # is strongly preferred where possible, because the speed of this # program does not matter, and the C backends tend to be based on some # incarnation of the same code that libxcrypt uses itself, so it # wouldn't be a proper interop test. # straightforward passlib wrappers, sorted by age of algorithm DES_CRYPT = passlib.hash.des_crypt DES_CRYPT.set_backend("builtin") def h_descrypt(phrase, rounds, salt): expected = DES_CRYPT.using( salt=salt, truncate_error=False ).hash(phrase) setting = expected[:2] yield (phrase, setting, expected) BIGCRYPT = passlib.hash.bigcrypt # BIGCRYPT.set_backend("builtin") # currently p.h.bigcrypt always uses builtin def h_bigcrypt(phrase, rounds, salt): # p.h.bigcrypt doesn't truncate to 128 chars. # The bigcrypt implementation in libxcrypt was reverse engineered # from a closed-source original and it's possible that they could # have gotten it wrong, but let's stick to what we have. expected = BIGCRYPT.using( salt=salt ).hash(phrase[:128]) # bigcrypt has no prefix, so our crypt() looks at the length of # the setting string to decide whether it should use bigcrypt or # descrypt. For bigcrypt to be used, the setting must be too long # to be a traditional DES hashed password. setting = expected[:2] + ".............." yield (phrase, setting, expected) BSDI_CRYPT = passlib.hash.bsdi_crypt BSDI_CRYPT.set_backend("builtin") def h_bsdicrypt(phrase, rounds, salt): expected = BSDI_CRYPT.using( salt=salt, rounds=rounds ).hash(phrase) setting = expected[:9] yield (phrase, setting, expected) MD5_CRYPT = passlib.hash.md5_crypt MD5_CRYPT.set_backend("builtin") def h_md5crypt(phrase, rounds, salt): expected = MD5_CRYPT.using( salt=salt ).hash(phrase) setting = expected[:expected.rfind('$')] yield (phrase, setting, expected) BSD_NTHASH = passlib.hash.bsd_nthash #BSD_NTHASH.set_backend("builtin") # has only the built-in backend def h_nt(phrase, rounds, salt): # passlib.hash.bsd_nthash attempts to decode byte strings as UTF-8, which # is Not What We Want. Python's iso_8859_1 is an identity map from 00..FF # to U+0000..U+00FF, which is correct for this application. expected = BSD_NTHASH.hash(phrase.decode("iso_8859_1")) # NTHash doesn't have a salt. # Older versions of libxcrypt generated a fake salt which # we should ensure is ignored. yield (phrase, "$3$", expected) yield (phrase, "$3$__not_used__0123456789abcdef", expected) SHA1_CRYPT = passlib.hash.sha1_crypt SHA1_CRYPT.set_backend("builtin") def h_sha1crypt(phrase, rounds, salt): expected = SHA1_CRYPT.using( salt=salt, rounds=rounds ).hash(phrase) setting = expected[:expected.rfind('$')] yield (phrase, setting, expected) SHA256_CRYPT = passlib.hash.sha256_crypt SHA256_CRYPT.set_backend("builtin") def h_sha256crypt(phrase, rounds, salt): expected = SHA256_CRYPT.using( salt=salt, rounds=rounds ).hash(phrase) setting = expected[:expected.rfind('$')] yield (phrase, setting, expected) SHA512_CRYPT = passlib.hash.sha512_crypt SHA512_CRYPT.set_backend("builtin") def h_sha512crypt(phrase, rounds, salt): expected = SHA512_CRYPT.using( salt=salt, rounds=rounds ).hash(phrase) setting = expected[:expected.rfind('$')] yield (phrase, setting, expected) # these need to do more work by hand # We need to test setting strings both with and without the suffix # that triggers the off-by-one error in the original Sun hash parser # (which must be preserved by all interoperable implementations). # passlib.hash.sun_md5_crypt.using(..., bare_salt=True) does not # actually work. from passlib.handlers.sun_md5_crypt import raw_sun_md5_crypt def h_sunmd5(phrase, rounds, salt): # sunmd5 is extremely slow in this test, compared to all the other # hashes, because we have to do extra tests of bug-compatibility, # because its round count cannot be reduced below 4096, and # because on approximately half of those rounds it feeds an # additional 1.5k of text to MD5_Update. The only optimization # that wouldn't break compatibility would be to plug in a faster # MD5 core, but that's not worth the engineering effort since it # would only benefit obsolete hashes. Instead, skip most of the # test phrases for this hash. This cuts the wall-clock time for # ka-sunmd5 (on a current-generation x86-64) from fifty to nine # seconds, which we can live with. sunmd5 feeds the phrase # verbatim to MD5_Update, only once, with no length limit, so we # don't need a lot of careful testing of different phrases. We do # still include at least a few of the non-ASCII test phrases, and # one very long phrase. if 6 <= len(phrase) <= 128: return if rounds == 0: bare_setting = "$md5$" + salt else: bare_setting = "$md5,rounds={}${}".format(rounds, salt) suff_setting = bare_setting + "$" bare_cksum = raw_sun_md5_crypt(phrase, rounds, bare_setting.encode("ascii")) suff_cksum = raw_sun_md5_crypt(phrase, rounds, suff_setting.encode("ascii")) bare_cksum = bare_cksum.decode("ascii") suff_cksum = suff_cksum.decode("ascii") yield (phrase, bare_setting, bare_setting + "$" + bare_cksum) yield (phrase, bare_setting + "$x", bare_setting + "$" + bare_cksum) yield (phrase, suff_setting, suff_setting + "$" + suff_cksum) yield (phrase, suff_setting + "$", suff_setting + "$" + suff_cksum) # testing bcrypt $2b$ and $2y$ is easy, but ... BCRYPT = passlib.hash.bcrypt BCRYPT.set_backend("builtin") def h_bcrypt(phrase, rounds, salt): expected = BCRYPT.using( salt=salt, rounds=rounds, ident="2b" ).hash(phrase) setting = expected[:-31] yield (phrase, setting, expected) def h_bcrypt_y(phrase, rounds, salt): expected = BCRYPT.using( salt=salt, rounds=rounds, ident="2y" ).hash(phrase) setting = expected[:-31] yield (phrase, setting, expected) # ...passlib doesn't implement the quirks of crypt_blowfish's $2a$ or # $2x$, but we really must test them. It is theoretically possible to # implement the $2x$ quirk by a transformation on the input # passphrase, but it would be hard to get right, and the $2a$ quirk # cannot be implemented this way. The path of least resistance is to # compute the $2b$ hash and then look up its output in a table of # substitutions for each quirk. The collision resistance of the $2b$ # hash should protect us from false hits in these tables. (Remember # that the $2x$ quirk is a _bug_, preserved for backward compatibility's # sake, that causes output collisions; duplicate entries on the # right-hand side of the $2x$ substitution table are expected.) bcrypt_a_substitutions = { 'HdhhdUXVgLADnbTYf12kvsasO1gS51C': '5jlqAXzFdq.3//pJFBa432Pepsclbdu', 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa': '7N5c8AaH.dbqz7.2o.V2mRkUDV0TZnO', '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW': 'D8jTC5oJeIumVOhMVpz79BzoGVhCjrW', '9f4sA9SRA0scUKcRyC5kce8dao2.GKe': 'E8Lpo1/qkGPTBDBxEsJjeEzh9nkZ9uW', 'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG': 'ZQhQBRpiYJCaQFgyHlB.t/F01cqLCIu', 'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US': 'euRNRfAA6e0fjpTfQPPAMU1PCOf9IHq', 'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m': 'h87NWu/js59XXaIj1hDyHxnjw7MJ5K6', 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142': 'vrukkCtLqHBLoBDsz6QoBtSwzI9Qxiq', } def h_bcrypt_a(phrase, rounds, salt): base = BCRYPT.using( salt=salt, rounds=rounds, ident="2b" ).hash(phrase) base_setting = base[4:-31] base_output = base[-31:] output = bcrypt_a_substitutions.get(base_output, base_output) setting = "$2a$" + base_setting expected = setting + output yield (phrase, setting, expected) bcrypt_x_substitutions = { 'xGPMyJSPyyeICKolPQ2gecm8rOgHwz.': '.sDifhVkUxvjPx6U4yeM2tC411Wuc.W', 'SRMKxjeMqVSDMhSLhOnvtEZ/p5KhUbq': '1Z0zKnHbUU3q/kk//Pknlv19a4/T8.K', 'PQInhDOdCKnXeUE.n/L.kQmTKM9ldK2': '25hhqa/GOJGmXui3avNI5MN8lOI2bCW', '7UwHe/ywPmdp.nr.ZLQSxd8hqn7qURW': '2E0h7UFL/4fALemA5ApWrCWllQXSPTu', 'FZEYKXyyMgG13MK0uV8dwNotWf4Wm6e': '2M7Vc.sF98e8DDmnxFjRfAmrudbv6y.', 'ZHZAnvydiPNiYH2VjRNhEAD6BEiyaWS': '3kVpkKaj1q2TAXm.ptIi98Nj3zVV8A2', 'I7vjUzDOKf8XqcK8VSCm9b0bwoSm1Qm': '6He0iAS8JsdM.iB4OQ4fbsKMXPagLhy', 'tLCLEXAt3RUjOgs.yvfWSni4j1JX/JS': '7bLwFi3rlVcl.xfhc7LxjqwOExfxki2', 'zQPVIDk6wF8XmESji30KDHFabTlu0WK': '8gGm3RkYFflDX50UQs.tJ8InKNy.HGO', 'KBv1eBM2he2T/QheS8zPHMejn5fMNRe': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW', 'XEhidoDX1kz.RFnwWIzMJvtW2aP/k4e': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW', 'UIzecIiCguM2sPh1F7L9IS3zOtmg5Im': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe', 'Xl2V4mQ7s0zX0b4XXH/b9UkRRCWpq3e': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe', 'tHwehGUs3q0b/Ejn42MsoM4Yv/iA1rq': '9iflVP0Ezo/iaxO0XS74wFglLNeryTS', 'K2KXzhsMefB8v6hJJG3bOyKV.XRf6Qe': 'C3nvb4DotHdnRYgOPcdiK0C4q.DkIDO', 'EuEnx.TyCLYgkTV/uhWL5xJTHV7ZMjG': 'HdhhdUXVgLADnbTYf12kvsasO1gS51C', 'oaE0x.b2rUEITWgPdg.GEcU4ePHLh6m': 'IZEWKJgp.b.KG29zgOadgd2rUt5iV1e', '3cFqlEl7Y0HWaVLHpyyCqG8dBNk1DSm': 'IvJ5WHgSbYKj7g9hhdVsAjyzcnVT/.m', 'gI4g/1M6K/Sz2bsgu9VDeEl6reszuXa': 'J6Y/kPTV/aHj7iJKuDfD5OPjVTvT2BK', 'rz1efvzeJjL4mQ813hrZNg3p1.ivOii': 'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG', 'Tp1b9XCEV16BcrQ.0k4xf7V/OGPZLnK': 'N5E4WSTo/R5henexIN1o8xkGwe2V86W', 'CARhc7ugFdgoPjDb7LUG.yQF2lboK6e': 'NjlOVoE5aHHQGtU9zc25wu0VykHnD1G', 'nCdKcLO57oLlc6J6sNnGyfT9FrIawiW': 'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m', 'DQjlTXDA5PBQ97.qBJY/vsHPQhLJDMe': 'PMOS6ygjFMSbDo.iJJam/G63inGIOBO', '1qOUgfpg30XDHLx/zrbWiMRcWyFhwye': 'QZ7A0p9q1Ag9Utfnfl/xif8NiDtVhO.', 'h0JFRyDXfP0duxAkVxWGr8nMDEDvPca': 'QiT.KUY9PXgIzL2aECMKb0EvVl0Pzw6', 'BvtRGGx3p8o0C5C36uS442Qqnrwofrq': 'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US', 'UTFLPGm29p.YVzcY6pqejGEql1x8Ccq': 'TYqa73Yp3leHe3D6.ysuJtNLwOma87C', 'YdPam5/ypFIyDUQMyCCEIwzVsTi0Sa6': 'TmFuGBy/Zgc6JVAr667oHeCvGQGyS1q', 'gbhoNOH4mWxoEhRrQNdeI.rpk9XeuZS': 'UPPO3QqmgMIXGHvbOLe2IkNzHLAToY2', 'ZkQGqjbMpqQ9oCsxNZjN8LQJaHFqPMC': 'VTMVcF7YBLV2/O6V1PNcQw0BD3hTN6a', 'RbKkfW2ph8bd8B5yul5E97DxgDw9cT.': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142', 'WI7ZNXFtzCd9mN1mWoNMQRHEmkDsZnm': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142', '2WegkGS5Xr/qYNkfEi6JmnR16WVSwcW': 'Xv3TUB0NdnMpyn4cfg4g48oZxRSIrNC', 'LN/CEHXLfFeYdOOxbdxKu8ZqSIKgqAu': 'YdqUOXeMKw7X6zbqBXP6c1xqIKun7Oq', 'VAQY6kySmwStlNY.sut9Y87njVr0mm.': 'Ysbn1VpHCTzInfW/z/8Q3k676rxfmSW', 'qJn4AY9ch/WAR5JXeeJtVGeovjQrhd2': 'ZH9vItRapPbkFKo0iQqU4v71o0e19Mm', '4QrucGf30zIbQA.sO0d1QrU63xBrEYq': 'bqJMLkbvnTFj0OYMu9tPnQXstRzX/e6', 'HD8RnTmGEavoR3LFVfdHvh3xA0QPka2': 'cYbtH8J2lfpMIiBKfF3pKpMno7JlLui', '6WgD2zYQDPgxR2sXlUeEeGKknxt95W.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa', 'rPSVExmrZ2WB1xntSbqZ/DRQRlKtVw.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa', 'PYLCOpTKZmhFn1CoBM2XNbWgqMX4Jk2': 'cxMAJfIx3T.Fv3O0KjL9VdM/oSSUVRK', 'w8RVl3rh7sNazq544l0944qGq4GUFUq': 'fOj7giyJz5k22FHTKGVo8o1o5zGzPsq', 'KHsCqMFVxOAGJObHwEBR3JaEdKVu1.m': 'fRmxM11/x97bxCrhecMENdkPm7YpRbe', '3wn02pxRJPnFwvlGt75DURDbt4g7om.': 'fY4v5x6.8txtKUKDP86z1xjlXG/GgZO', 'k9Hv17Gha84losGKAq61csCZokj5pyy': 'gLfxf5sydYesf658mrFYb51nLrn/4Sm', 'uTNb9MEHVGI7kd6UnQjYxgRNiKJM01S': 'h.z2vLHB/tYSU5fPXkrYB7TxLHGJnI6', 'heAts1y/8kcTTP0/vD3yeuMX1ihF8dO': 'j72N2Fi2j3pGalOZvTqtyH3bYGotuju', 'SqNATdQiNEckAKLsqgsKbAM5.hZoMCq': 'mjGosqV8OkKEcduYTNz5PKN2scswFya', 'k786rdsOdUP4cRi.dLa3dsYueMj5UnS': 'nQF1kDoMDjBBwXy2wwMni2gJLKqA0ta', 'G6PeIXKiqeNUPUbqFkMJvvI7G9hd51W': 'oBvt6zJCTP5OED1esTYUYPn31cWqwsa', 'TszY8.avBpwJ6xbNjwws3SKBbK6kj6S': 'odAvHZH9azlhi1x4pBLF25.hj08RMFi', 'z4QFggBRTVUeHRGL/CQxlAYHraYPcpa': 'ojiyBkc.4HZ2y5Yh0LxBbI6ZkLiRg0C', 'PPtdI0NcxZ4Txyv/Y5ORfcP1XFriKT2': 'pjce7u/YRnectNa8DXjsSGzRdyH2PSG', 'uIZ1Lgb.jHRDU/Z/LVXfpQCK72fTEHq': 'q5NMeQZ0UTyP/bILj02wdQ.Si5KHU1K', '51cV.PJOQVwmiao4t4lXsb9Cc3Jnuem': 'rB3dV.fJGdSihNlP0vo5PemoaZRp6LS', 's6h1E6A2RzVn2KxXLQXsKosQeRo8bLa': 'sND7G4.cx6Dzn6TqbXfK99bElU0a7P.', 'A96emG/jBf0K1K6vCG.eZGdLkSridom': 'tlD3cmtHgs/TwWAvy5E3F.freZS1bau', 'hWYb0x3Q3zM0aBkB2G1arbzmWxRQS/i': 'whpbcVuyGrJbgveSSM3XQKa8G5alyRm', 'AqM0XavJxJXeVlJ3Te3umGJaPOCYmZi': 'xP2lldc1.10LvZDjJZXNBKLzWqnkbOa', 'UPBzTBMwJb5mKWflQ.5Rid4481RrxVy': 'xSD.pz8Zg3vt0Jiovghl5Dqrs8aw8ni', 'yM59Cq5iVZDB3u45gTNhRSnOgrY1tdG': 'yED5tIjzyeH90te88BUWvTrMFHsWgCi', 'k.qekGiJym3QgfeFCwNhPHg0Zk99KSa': 'yphVralDu2JlxYbCqwwGli/H6wBgBtC', 'iYbzuFNFwSfCgqTGNsUFtSDh8PJAqSe': 'zAUUWh4XGsBGYs6yyUJTSfEgzoLXO6G', } def h_bcrypt_x(phrase, rounds, salt): base = BCRYPT.using( salt=salt, rounds=rounds, ident="2b" ).hash(phrase) base_setting = base[4:-31] base_output = base[-31:] output = bcrypt_x_substitutions.get(base_output, base_output) setting = "$2x$" + base_setting expected = setting + output yield (phrase, setting, expected) # passlib includes an scrypt implementation, but its encoded password # format is not the $7$ format we implement, so instead we use the # stdlib's hashlib.scrypt (this is why 3.6+ is required) and encode # the setting string by hand. This may produce strings encoding N/r/p # combinations that don't normally occur in the wild, but that's OK. # The algorithm implemented by hashlib.scrypt is standardized as # RFC 7914, so it's not an issue where that implementation came from. # Yes, the salt is properly passed to raw_scrypt as-is. from hashlib import scrypt as raw_scrypt from passlib.utils.binary import h64 as hash64 def h_scrypt(phrase, rounds, salt): p = 1 r = 8 log2N = rounds + 7 N = 1 << log2N bytesalt = salt.encode("ascii") setting = (b"$7$" + hash64.encode_int6(log2N) + hash64.encode_int30(r) + hash64.encode_int30(p) + bytesalt) binhash = raw_scrypt(phrase, salt=bytesalt, p=p, r=r, n=N, dklen=32) yield (phrase, setting, setting + b'$' + hash64.encode_bytes(binhash)) # # passlib does not support either yescrypt or gost-yescrypt. In fact, # as far as I can tell, at the time of writing, there exists only one # implementation of yescrypt and gost-yescrypt, by Solar Designer et al # which is the code we use ourselves. However, a test for round- # trippability and API consistency is still worthwhile, as is a test # that the implementation's current behavior is compatible with its # behavior some time ago. Therefore, we encode setting strings by # hand, and ctypes is used to access crypt_ra in the just-built # libcrypt.so. This will only work if the library was configured with # --enable-hashes=yescrypt,gost-yescrypt,[others] and --enable-shared, # which is OK, since it's not run during a normal build. Remove this # once passlib supports these hashes. # # crypt_ra is used because it's thread-safe but doesn't require us to # know how big struct crypt_data is. There is no good way to arrange # for the data object to be deallocated. Oh well. LIBCRYPT = ctypes.cdll.LoadLibrary(os.path.join(os.getcwd(), ".libs", "libcrypt.so")) _xcrypt_crypt_ra = LIBCRYPT.crypt_ra _xcrypt_crypt_ra.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int)] _xcrypt_crypt_ra.restype = ctypes.c_char_p _xcrypt_crypt_ra_data = ctypes.c_void_p(0) _xcrypt_crypt_ra_datasize = ctypes.c_int(0) def xcrypt_crypt(phrase, setting): global _xcrypt_crypt_ra_data, _xcrypt_crypt_ra_datasize if not isinstance(phrase, bytes): phrase = phrase.encode("utf-8") if not isinstance(setting, bytes): setting = setting.encode("ascii") rv = _xcrypt_crypt_ra(phrase, setting, ctypes.byref(_xcrypt_crypt_ra_data), ctypes.byref(_xcrypt_crypt_ra_datasize)) if not rv: err = ctypes.get_errno() raise OSError(err, os.strerror(err)) return bytes(rv) def yescrypt_gensalt(ident, rounds, salt): if rounds == 1: params = "j75" elif rounds == 2: params = "j85" else: raise RuntimeError("don't know how to encode rounds={}" .format(rounds)) return "${}${}${}".format(ident, params, salt) def h_yescrypt(phrase, rounds, salt): setting = yescrypt_gensalt("y", rounds, salt) yield (phrase, setting, xcrypt_crypt(phrase, setting)) def h_gost_yescrypt(phrase, rounds, salt): setting = yescrypt_gensalt("gy", rounds, salt) yield (phrase, setting, xcrypt_crypt(phrase, setting)) # Each method should contribute a group of parameters to the array # below. Each block has the form # # ('method', [ # (rounds, salt), # (rounds, salt), # ... # ]) # # where 'method' is the method name used in the INCLUDE_ macros, # rounds is a number and salt is a salt string. The appropriate # h_method function will be called with arguments (phrase, *params) # where params is one of the tuples from its block, so it is OK to add # extra arguments after the salt if necessary. # # If the method has a tunable rounds parameter, its array of # (rounds, salt) pairs should have two salts * at least two values of # the rounds parameter. If it does not, it should have two salts and # use 0 for the rounds parameter. # # The point of this test is not to exercise brute force resistance, # so keep cost parameters low. # # Methods should be in alphabetical order by their INCLUDE_macro name. SETTINGS = [ ('bcrypt', [ (5, 'CCCCCCCCCCCCCCCCCCCCC.'), (5, 'abcdefghijklmnopqrstuu'), (4, 'CCCCCCCCCCCCCCCCCCCCC.'), (4, 'abcdefghijklmnopqrstuu'), ]), ('bcrypt_y', [ (5, 'CCCCCCCCCCCCCCCCCCCCC.'), (5, 'abcdefghijklmnopqrstuu'), (4, 'CCCCCCCCCCCCCCCCCCCCC.'), (4, 'abcdefghijklmnopqrstuu'), ]), ('bcrypt_a', [ (5, 'CCCCCCCCCCCCCCCCCCCCC.'), (5, 'abcdefghijklmnopqrstuu'), (4, 'CCCCCCCCCCCCCCCCCCCCC.'), (4, 'abcdefghijklmnopqrstuu'), ]), ('bcrypt_x', [ (5, 'CCCCCCCCCCCCCCCCCCCCC.'), (5, 'abcdefghijklmnopqrstuu'), (4, 'CCCCCCCCCCCCCCCCCCCCC.'), (4, 'abcdefghijklmnopqrstuu'), ]), ('bigcrypt', [ (0, 'CC'), (0, 'ab'), ]), # The bsdicrypt round count is required to be odd. ('bsdicrypt', [ (1, 'CCCC'), (1, 'abcd'), (13, 'CCCC'), (13, 'abcd'), ]), ('descrypt', [ (0, 'CC'), (0, 'ab'), ]), ('gost_yescrypt', [ (1, '.......'), (1, 'LdJMENpBABJJ3hIHjB1Bi.'), (2, '.......'), (2, 'LdJMENpBABJJ3hIHjB1Bi.'), ]), ('md5crypt', [ (0, 'CCCCCCCC'), (0, 'abcdefgh'), ]), ('nt', [ (0, ''), ]), ('scrypt', [ (1, 'SodiumChloride'), (1, 'unUNunUNunUNun'), (2, 'SodiumChloride'), (2, 'unUNunUNunUNun'), ]), ('sha1crypt', [ (12, 'GGXpNqoJvglVTkGU'), (12, 'xSZGpk6Bp4SA3.cR'), (456, 'GGXpNqoJvglVTkGU'), (456, 'xSZGpk6Bp4SA3.cR'), ]), ('sha256crypt', [ (1000, 'saltstring'), (1000, 'short'), (5000, 'saltstring'), (5000, 'short'), ]), ('sha512crypt', [ (1000, 'saltstring'), (1000, 'short'), (5000, 'saltstring'), (5000, 'short'), ]), ('sunmd5', [ (0, '9ZLwtuTO'), (0, '1xMeE.at'), (12, '9ZLwtuTO'), (12, '1xMeE.at'), ]), ('yescrypt', [ (1, '.......'), (1, 'LdJMENpBABJJ3hIHjB1Bi.'), (2, '.......'), (2, 'LdJMENpBABJJ3hIHjB1Bi.'), ]), ] # Normally, we expect that (1) for fixed salt, no two phrases hash to # the same string; (2) for fixed phrase, no two settings produce the # same string. The known exceptions are all due to limitations and/or # bugs in the hashing method. Check the table produced by this # program to ensure that all of the collisions in the ->expected # strings are due to one of the known exceptions. test-crypt-kat.c # itself doesn't need to do this test; as long as all of the hashes # produced by the just-built crypt() match the appropriate ->expected # string, no new collisions can have been introduced. def strneq_7bit (p1, p2, limit): n1 = len(p1) n2 = len(p2) for i in range(limit): if i == n1 and i == n2: # strings are the same length, within the limit, and no # mismatched characters were found return True if i == n1 or i == n2: # one string is longer than the other, within the limit return False if (p1[i] & 0x7F) != (p2[i] & 0x7F): # characters not equal, after masking the 8th bit return False # reached the limit, no mismatches found return True # The bug in bcrypt mode "x" (preserved from the original # implementation of bcrypt) is, at its root, that the code below # sign- rather than zero-extends *p before or-ing it into 'tmp'. # When *p has its 8th bit set, it is therefore or-ed in as # 0xFF_FF_FF_xx rather than 0x00_00_00_xx, and clobbers the other # three bytes in 'tmp'. Depending on its position within the input, # this can erase up to three other characters of the passphrase. # The exact set of strings involved in any one group of collisions is # difficult to describe in words and may depend on the endianness of # the CPU. The test cases in this file have only been verified on # a little-endian CPU. BF_KEY_LEN = 18 def buggy_expand_BF_key(phrase): p = 0 lp = len(phrase) expanded = [0]*BF_KEY_LEN if lp > 0: for i in range(BF_KEY_LEN): tmp = 0 for j in range(4): if p == lp: c = 0 else: c = phrase[p] stmp = ((c & 0x7F) - (c & 0x80)) & 0xFFFFFFFF tmp = ((tmp << 8) | stmp) & 0xFFFFFFFF p += 1 if p == lp + 1: p = 0 expanded[i] = tmp return expanded def sign_extension_collision_p(p1, p2): return buggy_expand_BF_key(p1) == buggy_expand_BF_key(p2) def equivalent_sunmd5_settings_p(s1, s2): if s1[:4] != "$md5": return False if s2[:4] != "$md5": return False l1 = len(s1) l2 = len(s2) if l1 < l2: ll = l1 lh = l2 sl = s1 sh = s2 else: ll = l2 lh = l1 sl = s2 sh = s1 if sl[:ll] != sh[:ll]: return False # The two cases where sunmd5 settings are equivalent: # $md5...$ and $md5...$$ # $md5... and $md5...$x if sl[ll-1] == '$': if ll+1 != lh or sh[ll] != '$': return False else: if ll+2 != lh or sh[ll] != '$' or sh[ll+1] != 'x': return False return True def collision_expected(p1, p2, s1, s2): if not isinstance(p1, bytes): p1 = p1.encode("iso_8859_1") if not isinstance(p2, bytes): p2 = p2.encode("iso_8859_1") if isinstance(s1, bytes): s1 = s1.decode("ascii") if isinstance(s2, bytes): s2 = s2.decode("ascii") # Under no circumstances should two hashes with different settings # collide, except... if s1 != s2: # a descrypt hash can collide with a bigcrypt hash when the phrase # input to bigcrypt was fewer than 8 characters long if ( s1[0] != '$' and s1[0] != '_' and s2[0] != '$' and s2[0] != '_' and ( (len(s1) == 2 and len(s2) > 2 and len(p2) <= 8) or (len(s2) == 2 and len(s1) > 2 and len(p1) <= 8))): return strneq_7bit(p1, p2, 8) # all settings for NTHASH are equivalent if s1[:3] == '$3$' and s2[:3] == '$3$': return p1 == p2 # sunmd5 has pairs of equivalent settings if equivalent_sunmd5_settings_p (s1, s2): return p1 == p2 return False if s1[:2] == '$2': # bcrypt truncates passphrases to 72 characters if p1[:72] == p2[:72]: return True # preserved bcrypt $2x bug? if s1[:3] == '$2x' and sign_extension_collision_p(p1, p2): return True return False if s1[0] != '$' and s1[0] != '_': if len(s1) == 2: # descrypt truncates passphrases to 8 characters and strips the # 8th bit return strneq_7bit(p1, p2, 8) else: # bigcrypt truncates passphrases to 128 characters and strips the # 8th bit return strneq_7bit(p1, p2, 128) if s1[0] == '_': # bsdicrypt does not truncate but does still strip the 8th bit return strneq_7bit(p1, p2, max(len(p1), len(p2))) return False def report_unexpected_collision(p1, p2, s1, s2, expected): sys.stderr.write("UNEXPECTED HASH COLLISION:\n" " hash = {}\n" " p1 = {!r}\n" " p2 = {!r}\n" " s1 = {!r}\n" " s2 = {!r}\n" "\n".format(expected, p1, p2, s1, s2)) # Master control. # # To reduce the painful slowness of this program _somewhat_, # we use a multiprocessing pool to compute all of the hashes. def generate_phrase_setting_combs(): for macro_name, settings in SETTINGS: for phrase in PHRASES: for setting in settings: yield (macro_name, phrase, setting) def worker_compute_one(args): method, phrase, setting = args import __main__ sfunc = getattr(__main__, 'h_' + method) return [(method, case) for case in sfunc(phrase, *setting)] # Python specifies that an \x escape in a string literal consumes # exactly two subsequent hexadecimal digits. C, on the other hand, # specifies that \x in a string literal consumes *any number of* # hexadecimal digits, and if the hexadecimal number is larger than the # range representable by 'unsigned char' the result is # implementation-defined. For instance, "\x303" == "03" in Python, # but in C the string on the left could be anything. The simplest way # to deal with this is to escape the string Python's way and then # replace sequences like '\x303' with '\x30""3'. c_hex_escape_fixup_re_ = re.compile( r"(\\x[0-9a-fA-F]{2})([0-9a-fA-F])") def c_hex_escape(s): if isinstance(s, bytes): s = s.decode("iso_8859_1") s = s.encode("unicode_escape").decode("ascii") return c_hex_escape_fixup_re_.sub(r'\1""\2', s) def format_case(phrase, setting, expected): return (' {{ "{}", "{}", "{}" }},\n' .format(c_hex_escape(setting), c_hex_escape(expected), c_hex_escape(phrase))) def main(): # FIXME: This only detects collisions that actually happen, not # collisions that ought to have happened but didn't. (Detecting # collisions that ought to have happened, but didn't, would be # unavoidably quadratic in the total number of test cases, so I'm # not sure it's worth it.) items = [] collisions = {} collision_error = False with multiprocessing.Pool() as pool: for group in pool.imap(worker_compute_one, generate_phrase_setting_combs(), chunksize=100): for method, (phrase, setting, expected) in group: if expected in collisions: p1, s1 = collisions[expected] if not collision_expected(p1, phrase, s1, setting): report_unexpected_collision(p1, phrase, s1, setting, expected) collision_error = True else: collisions[expected] = (phrase, setting) items.append((method, format_case(phrase, setting, expected))) if collision_error: sys.exit(1) sys.stdout.write( "/* Known-answer tests for passphrase hashes. -*- mode: c -*-\n" " Automatically generated by ka-table-gen.py.\n" " Do not edit this file by hand. */\n\n") prev_method = None for method, case in items: if method != prev_method: if prev_method is not None: sys.stdout.write("#endif // {}\n\n".format(prev_method)) sys.stdout.write("#if INCLUDE_{} && defined TEST_{}\n" .format(method, method)) prev_method = method sys.stdout.write(case) if prev_method is not None: sys.stdout.write("#endif // {}\n".format(prev_method)) if __name__ == '__main__': main()