1#! /usr/bin/python3 2# Compute test cases for ka-* tests. 3# 4# Written by Zack Weinberg <zackw at panix.com> in 2019. 5# To the extent possible under law, Zack Weinberg has waived all 6# copyright and related or neighboring rights to this work. 7# 8# See https://creativecommons.org/publicdomain/zero/1.0/ for further 9# details. 10 11# This program generates ka-table.inc, which defines the set 12# of tests performed by the ka-*.c tests. It is not run automatically 13# during the build for two reasons: it's very slow, and it requires Python 14# 3.6 or greater with Passlib <https://passlib.readthedocs.io/en/stable/> 15# available. 16# 17# If you modify this program, make sure to update ka-table.inc, 18# by running 'make regen-ka-table' (libcrypt.so must already have been 19# built), and then check in the updates to that file in the same 20# commit as your changes to this program. You will need to install 21# Passlib itself, but not any other libraries. 22# 23# This program intentionally uses Passlib's slow pure-Python back 24# ends, rather than accelerated C modules that tend to be, at their 25# core, the same code libxcrypt uses itself, so that we really are 26# testing libxcrypt against known answers generated with a different 27# implementation. 28 29import array 30import ctypes 31import multiprocessing 32import os 33import re 34import sys 35 36# force passlib to allow use of its built-in bcrypt implementation 37os.environ["PASSLIB_BUILTIN_BCRYPT"] = "enabled" 38 39import passlib.hash 40 41# In order to tickle various bugs and limitations in older hashing 42# methods precisely, we need to test several passphrases whose byte 43# sequences are not valid Unicode text in any encoding. We therefore 44# use exclusively byte strings in this array. 45PHRASES = [ 46 # All ASCII printable, various lengths. Most of these were taken 47 # from older known-answer tests for specific hashing methods. 48 b'', 49 b' ', 50 b'a', 51 b'ab', 52 b'abc', 53 b'U*U', 54 b'U*U*', 55 b'U*U*U', 56 b'.....', 57 b'dragon', 58 b'dRaGoN', 59 b'DrAgOn', 60 b'PAROLX', 61 b'U*U***U', 62 b'abcdefg', 63 b'01234567', 64 b'726 even', 65 b'zyxwvuts', 66 b'ab1234567', 67 b'alexander', 68 b'beautiful', 69 b'challenge', 70 b'chocolate', 71 b'cr1234567', 72 b'katherine', 73 b'stephanie', 74 b'sunflower', 75 b'basketball', 76 b'porsche911', 77 b'|_337T`/p3', 78 b'thunderbird', 79 b'Hello world!', 80 b'pleaseletmein', 81 b'a short string', 82 b'zxyDPWgydbQjgq', 83 b'photojournalism', 84 b'ecclesiastically', 85 b'congregationalism', 86 b'dihydrosphingosine', 87 b'semianthropological', 88 b'palaeogeographically', 89 b'electromyographically', 90 b'noninterchangeableness', 91 b'abcdefghijklmnopqrstuvwxyz', 92 b'electroencephalographically', 93 b'antidisestablishmentarianism', 94 b'cyclotrimethylenetrinitramine', 95 b'dichlorodiphenyltrichloroethane', 96 b'multiple words seperated by spaces', 97 b'supercalifragilisticexpialidocious', 98 b'we have a short salt string but not a short password', 99 b'multiple word$ $eperated by $pace$ and $pecial character$', 100 b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', 101 (b'1234567890123456789012345678901234567890' 102 b'1234567890123456789012345678901234567890'), 103 (b'a very much longer text to encrypt. This one even stretches over more' 104 b'than one line.'), 105 106 # ASCII printables with their high bits flipped - DES-based hashes collide. 107 # All of these have an exact counterpart above. 108 b'\xd0\xc1\xd2\xcf\xcc\xd8', # 'PAROLX' 109 b'\xd5\xaa\xd5\xaa\xaa\xaa\xd5\xaa', # 'U*U***U*' 110 b'\xe1\xec\xe5\xf8\xe1\xee\xe4\xe5\xf2', # 'alexander' 111 b'\xf3\xf4\xe5\xf0\xe8\xe1\xee\xe9\xe5', # 'stephanie' 112 # '*U*U*U*U*U*U*U*U*' 113 b'\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa', 114 115 # A few UTF-8 strings and what they will collide with for 116 # DES-based hashes. 117 b'\xC3\xA9tude', b'C)tude', # UTF-8(NFC(étude)) 118 b'Chl\xC3\xB6e', b'ChlC6e', # UTF-8(NFC(Chlöe)) 119 # Eight letters, but 10 bytes: UTF-8(NFC(Ångström)) 120 b'\xC3\x85ngstr\xC3\xB6m', b'C\x05ngstrC6m', b'C\x05ngstrC' 121 122 # descrypt truncates everything to 8 characters. 123 b'U*U***U*', b'U*U***U*ignored', 124 b'U*U*U*U*', b'U*U*U*U*ignored', 125 b'*U*U*U*U', b'*U*U*U*U*', b'*U*U*U*U*U*U*U*U', b'*U*U*U*U*U*U*U*U*', 126 127 # Patterns designed to tickle the bcrypt $2x$ sign-extension bug. 128 b'\xa3', 129 b'\xa3a', 130 b'\xd1\x91', 131 b'\xa3ab', 132 b'\xff\xff\xa3', 133 b'1\xa3345', 134 b'\xff\xa3345', 135 b'\xff\xa334\xff\xff\xff\xa3345', 136 137 (b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' 138 b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' 139 b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' 140 b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' 141 b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff' 142 b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'), 143 144 (b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' 145 b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' 146 b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' 147 b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' 148 b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55' 149 b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'), 150 151 # bcrypt truncates to 72 characters 152 (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 153 b'0123456789'), 154 (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 155 b'0123456789chars after 72 are ignored'), 156 157 (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 158 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 159 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 160 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 161 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 162 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'), 163 164 (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 165 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 166 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 167 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 168 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 169 b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa' 170 b'chars after 72 are ignored as usual'), 171 172 # bigcrypt truncates to 128 characters 173 # (first sentence of _Twenty Thousand Leagues Under The Sea_) 174 (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and ' 175 b'downright inexplicable phenomenon that surely no one has forgotten.'), 176 177 # first 8 characters of above (des) 178 b'THE YEAR', 179 # first 72 characters (bcrypt) 180 b'THE YEAR 1866 was marked by a bizarre development, an unexplained and do', 181 # first 128 characters (bigcrypt) 182 (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and ' 183 b'downright inexplicable phenomenon that surely no one has f') 184] 185 186# passlib does not support all of the hashing methods we do, no longer 187# supports generation of bare setting strings, and in some cases does 188# not support all of the variants we need to generate. Therefore, 189# there is a shim for each hashing method (variant). Shims must be 190# named 'h_METHOD' where METHOD is the name for the method used by the 191# INCLUDE_method macros. 192# 193# Each shim function takes at least the arguments phrase, rounds, and 194# salt, in that order. Additional optional arguments are allowed. It 195# should do 'yield (phrase, setting, expected)' at least once, where 196# phrase is the phrase argument, setting is a setting string generated 197# from rounds and salt, and expected is the hashed passphrase expected 198# to be generated from that combination of phrase and setting. 199# 200# When implementing new shims, use of passlib's pure-Python "backends" 201# is strongly preferred where possible, because the speed of this 202# program does not matter, and the C backends tend to be based on some 203# incarnation of the same code that libxcrypt uses itself, so it 204# wouldn't be a proper interop test. 205 206# straightforward passlib wrappers, sorted by age of algorithm 207DES_CRYPT = passlib.hash.des_crypt 208DES_CRYPT.set_backend("builtin") 209def h_descrypt(phrase, rounds, salt): 210 expected = DES_CRYPT.using( 211 salt=salt, truncate_error=False 212 ).hash(phrase) 213 setting = expected[:2] 214 yield (phrase, setting, expected) 215 216BIGCRYPT = passlib.hash.bigcrypt 217# BIGCRYPT.set_backend("builtin") # currently p.h.bigcrypt always uses builtin 218def h_bigcrypt(phrase, rounds, salt): 219 # p.h.bigcrypt doesn't truncate to 128 chars. 220 # The bigcrypt implementation in libxcrypt was reverse engineered 221 # from a closed-source original and it's possible that they could 222 # have gotten it wrong, but let's stick to what we have. 223 expected = BIGCRYPT.using( 224 salt=salt 225 ).hash(phrase[:128]) 226 # bigcrypt has no prefix, so our crypt() looks at the length of 227 # the setting string to decide whether it should use bigcrypt or 228 # descrypt. For bigcrypt to be used, the setting must be too long 229 # to be a traditional DES hashed password. 230 setting = expected[:2] + ".............." 231 yield (phrase, setting, expected) 232 233BSDI_CRYPT = passlib.hash.bsdi_crypt 234BSDI_CRYPT.set_backend("builtin") 235def h_bsdicrypt(phrase, rounds, salt): 236 expected = BSDI_CRYPT.using( 237 salt=salt, rounds=rounds 238 ).hash(phrase) 239 setting = expected[:9] 240 yield (phrase, setting, expected) 241 242MD5_CRYPT = passlib.hash.md5_crypt 243MD5_CRYPT.set_backend("builtin") 244def h_md5crypt(phrase, rounds, salt): 245 expected = MD5_CRYPT.using( 246 salt=salt 247 ).hash(phrase) 248 setting = expected[:expected.rfind('$')] 249 yield (phrase, setting, expected) 250 251BSD_NTHASH = passlib.hash.bsd_nthash 252#BSD_NTHASH.set_backend("builtin") # has only the built-in backend 253def h_nt(phrase, rounds, salt): 254 # passlib.hash.bsd_nthash attempts to decode byte strings as UTF-8, which 255 # is Not What We Want. Python's iso_8859_1 is an identity map from 00..FF 256 # to U+0000..U+00FF, which is correct for this application. 257 expected = BSD_NTHASH.hash(phrase.decode("iso_8859_1")) 258 # NTHash doesn't have a salt. 259 # Older versions of libxcrypt generated a fake salt which 260 # we should ensure is ignored. 261 yield (phrase, "$3$", expected) 262 yield (phrase, "$3$__not_used__0123456789abcdef", expected) 263 264SHA1_CRYPT = passlib.hash.sha1_crypt 265SHA1_CRYPT.set_backend("builtin") 266def h_sha1crypt(phrase, rounds, salt): 267 expected = SHA1_CRYPT.using( 268 salt=salt, rounds=rounds 269 ).hash(phrase) 270 setting = expected[:expected.rfind('$')] 271 yield (phrase, setting, expected) 272 273SHA256_CRYPT = passlib.hash.sha256_crypt 274SHA256_CRYPT.set_backend("builtin") 275def h_sha256crypt(phrase, rounds, salt): 276 expected = SHA256_CRYPT.using( 277 salt=salt, rounds=rounds 278 ).hash(phrase) 279 setting = expected[:expected.rfind('$')] 280 yield (phrase, setting, expected) 281 282SHA512_CRYPT = passlib.hash.sha512_crypt 283SHA512_CRYPT.set_backend("builtin") 284def h_sha512crypt(phrase, rounds, salt): 285 expected = SHA512_CRYPT.using( 286 salt=salt, rounds=rounds 287 ).hash(phrase) 288 setting = expected[:expected.rfind('$')] 289 yield (phrase, setting, expected) 290 291# these need to do more work by hand 292 293# We need to test setting strings both with and without the suffix 294# that triggers the off-by-one error in the original Sun hash parser 295# (which must be preserved by all interoperable implementations). 296# passlib.hash.sun_md5_crypt.using(..., bare_salt=True) does not 297# actually work. 298from passlib.handlers.sun_md5_crypt import raw_sun_md5_crypt 299def h_sunmd5(phrase, rounds, salt): 300 301 # sunmd5 is extremely slow in this test, compared to all the other 302 # hashes, because we have to do extra tests of bug-compatibility, 303 # because its round count cannot be reduced below 4096, and 304 # because on approximately half of those rounds it feeds an 305 # additional 1.5k of text to MD5_Update. The only optimization 306 # that wouldn't break compatibility would be to plug in a faster 307 # MD5 core, but that's not worth the engineering effort since it 308 # would only benefit obsolete hashes. Instead, skip most of the 309 # test phrases for this hash. This cuts the wall-clock time for 310 # ka-sunmd5 (on a current-generation x86-64) from fifty to nine 311 # seconds, which we can live with. sunmd5 feeds the phrase 312 # verbatim to MD5_Update, only once, with no length limit, so we 313 # don't need a lot of careful testing of different phrases. We do 314 # still include at least a few of the non-ASCII test phrases, and 315 # one very long phrase. 316 if 6 <= len(phrase) <= 128: 317 return 318 319 if rounds == 0: 320 bare_setting = "$md5$" + salt 321 else: 322 bare_setting = "$md5,rounds={}${}".format(rounds, salt) 323 324 suff_setting = bare_setting + "$" 325 bare_cksum = raw_sun_md5_crypt(phrase, rounds, bare_setting.encode("ascii")) 326 suff_cksum = raw_sun_md5_crypt(phrase, rounds, suff_setting.encode("ascii")) 327 328 bare_cksum = bare_cksum.decode("ascii") 329 suff_cksum = suff_cksum.decode("ascii") 330 331 yield (phrase, bare_setting, bare_setting + "$" + bare_cksum) 332 yield (phrase, bare_setting + "$x", bare_setting + "$" + bare_cksum) 333 yield (phrase, suff_setting, suff_setting + "$" + suff_cksum) 334 yield (phrase, suff_setting + "$", suff_setting + "$" + suff_cksum) 335 336# testing bcrypt $2b$ and $2y$ is easy, but ... 337BCRYPT = passlib.hash.bcrypt 338BCRYPT.set_backend("builtin") 339def h_bcrypt(phrase, rounds, salt): 340 expected = BCRYPT.using( 341 salt=salt, rounds=rounds, ident="2b" 342 ).hash(phrase) 343 setting = expected[:-31] 344 yield (phrase, setting, expected) 345 346def h_bcrypt_y(phrase, rounds, salt): 347 expected = BCRYPT.using( 348 salt=salt, rounds=rounds, ident="2y" 349 ).hash(phrase) 350 setting = expected[:-31] 351 yield (phrase, setting, expected) 352 353# ...passlib doesn't implement the quirks of crypt_blowfish's $2a$ or 354# $2x$, but we really must test them. It is theoretically possible to 355# implement the $2x$ quirk by a transformation on the input 356# passphrase, but it would be hard to get right, and the $2a$ quirk 357# cannot be implemented this way. The path of least resistance is to 358# compute the $2b$ hash and then look up its output in a table of 359# substitutions for each quirk. The collision resistance of the $2b$ 360# hash should protect us from false hits in these tables. (Remember 361# that the $2x$ quirk is a _bug_, preserved for backward compatibility's 362# sake, that causes output collisions; duplicate entries on the 363# right-hand side of the $2x$ substitution table are expected.) 364bcrypt_a_substitutions = { 365 'HdhhdUXVgLADnbTYf12kvsasO1gS51C': '5jlqAXzFdq.3//pJFBa432Pepsclbdu', 366 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa': '7N5c8AaH.dbqz7.2o.V2mRkUDV0TZnO', 367 '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW': 'D8jTC5oJeIumVOhMVpz79BzoGVhCjrW', 368 '9f4sA9SRA0scUKcRyC5kce8dao2.GKe': 'E8Lpo1/qkGPTBDBxEsJjeEzh9nkZ9uW', 369 'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG': 'ZQhQBRpiYJCaQFgyHlB.t/F01cqLCIu', 370 'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US': 'euRNRfAA6e0fjpTfQPPAMU1PCOf9IHq', 371 'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m': 'h87NWu/js59XXaIj1hDyHxnjw7MJ5K6', 372 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142': 'vrukkCtLqHBLoBDsz6QoBtSwzI9Qxiq', 373} 374def h_bcrypt_a(phrase, rounds, salt): 375 base = BCRYPT.using( 376 salt=salt, rounds=rounds, ident="2b" 377 ).hash(phrase) 378 base_setting = base[4:-31] 379 base_output = base[-31:] 380 output = bcrypt_a_substitutions.get(base_output, base_output) 381 382 setting = "$2a$" + base_setting 383 expected = setting + output 384 yield (phrase, setting, expected) 385 386bcrypt_x_substitutions = { 387 'xGPMyJSPyyeICKolPQ2gecm8rOgHwz.': '.sDifhVkUxvjPx6U4yeM2tC411Wuc.W', 388 'SRMKxjeMqVSDMhSLhOnvtEZ/p5KhUbq': '1Z0zKnHbUU3q/kk//Pknlv19a4/T8.K', 389 'PQInhDOdCKnXeUE.n/L.kQmTKM9ldK2': '25hhqa/GOJGmXui3avNI5MN8lOI2bCW', 390 '7UwHe/ywPmdp.nr.ZLQSxd8hqn7qURW': '2E0h7UFL/4fALemA5ApWrCWllQXSPTu', 391 'FZEYKXyyMgG13MK0uV8dwNotWf4Wm6e': '2M7Vc.sF98e8DDmnxFjRfAmrudbv6y.', 392 'ZHZAnvydiPNiYH2VjRNhEAD6BEiyaWS': '3kVpkKaj1q2TAXm.ptIi98Nj3zVV8A2', 393 'I7vjUzDOKf8XqcK8VSCm9b0bwoSm1Qm': '6He0iAS8JsdM.iB4OQ4fbsKMXPagLhy', 394 'tLCLEXAt3RUjOgs.yvfWSni4j1JX/JS': '7bLwFi3rlVcl.xfhc7LxjqwOExfxki2', 395 'zQPVIDk6wF8XmESji30KDHFabTlu0WK': '8gGm3RkYFflDX50UQs.tJ8InKNy.HGO', 396 'KBv1eBM2he2T/QheS8zPHMejn5fMNRe': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW', 397 'XEhidoDX1kz.RFnwWIzMJvtW2aP/k4e': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW', 398 'UIzecIiCguM2sPh1F7L9IS3zOtmg5Im': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe', 399 'Xl2V4mQ7s0zX0b4XXH/b9UkRRCWpq3e': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe', 400 'tHwehGUs3q0b/Ejn42MsoM4Yv/iA1rq': '9iflVP0Ezo/iaxO0XS74wFglLNeryTS', 401 'K2KXzhsMefB8v6hJJG3bOyKV.XRf6Qe': 'C3nvb4DotHdnRYgOPcdiK0C4q.DkIDO', 402 'EuEnx.TyCLYgkTV/uhWL5xJTHV7ZMjG': 'HdhhdUXVgLADnbTYf12kvsasO1gS51C', 403 'oaE0x.b2rUEITWgPdg.GEcU4ePHLh6m': 'IZEWKJgp.b.KG29zgOadgd2rUt5iV1e', 404 '3cFqlEl7Y0HWaVLHpyyCqG8dBNk1DSm': 'IvJ5WHgSbYKj7g9hhdVsAjyzcnVT/.m', 405 'gI4g/1M6K/Sz2bsgu9VDeEl6reszuXa': 'J6Y/kPTV/aHj7iJKuDfD5OPjVTvT2BK', 406 'rz1efvzeJjL4mQ813hrZNg3p1.ivOii': 'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG', 407 'Tp1b9XCEV16BcrQ.0k4xf7V/OGPZLnK': 'N5E4WSTo/R5henexIN1o8xkGwe2V86W', 408 'CARhc7ugFdgoPjDb7LUG.yQF2lboK6e': 'NjlOVoE5aHHQGtU9zc25wu0VykHnD1G', 409 'nCdKcLO57oLlc6J6sNnGyfT9FrIawiW': 'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m', 410 'DQjlTXDA5PBQ97.qBJY/vsHPQhLJDMe': 'PMOS6ygjFMSbDo.iJJam/G63inGIOBO', 411 '1qOUgfpg30XDHLx/zrbWiMRcWyFhwye': 'QZ7A0p9q1Ag9Utfnfl/xif8NiDtVhO.', 412 'h0JFRyDXfP0duxAkVxWGr8nMDEDvPca': 'QiT.KUY9PXgIzL2aECMKb0EvVl0Pzw6', 413 'BvtRGGx3p8o0C5C36uS442Qqnrwofrq': 'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US', 414 'UTFLPGm29p.YVzcY6pqejGEql1x8Ccq': 'TYqa73Yp3leHe3D6.ysuJtNLwOma87C', 415 'YdPam5/ypFIyDUQMyCCEIwzVsTi0Sa6': 'TmFuGBy/Zgc6JVAr667oHeCvGQGyS1q', 416 'gbhoNOH4mWxoEhRrQNdeI.rpk9XeuZS': 'UPPO3QqmgMIXGHvbOLe2IkNzHLAToY2', 417 'ZkQGqjbMpqQ9oCsxNZjN8LQJaHFqPMC': 'VTMVcF7YBLV2/O6V1PNcQw0BD3hTN6a', 418 'RbKkfW2ph8bd8B5yul5E97DxgDw9cT.': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142', 419 'WI7ZNXFtzCd9mN1mWoNMQRHEmkDsZnm': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142', 420 '2WegkGS5Xr/qYNkfEi6JmnR16WVSwcW': 'Xv3TUB0NdnMpyn4cfg4g48oZxRSIrNC', 421 'LN/CEHXLfFeYdOOxbdxKu8ZqSIKgqAu': 'YdqUOXeMKw7X6zbqBXP6c1xqIKun7Oq', 422 'VAQY6kySmwStlNY.sut9Y87njVr0mm.': 'Ysbn1VpHCTzInfW/z/8Q3k676rxfmSW', 423 'qJn4AY9ch/WAR5JXeeJtVGeovjQrhd2': 'ZH9vItRapPbkFKo0iQqU4v71o0e19Mm', 424 '4QrucGf30zIbQA.sO0d1QrU63xBrEYq': 'bqJMLkbvnTFj0OYMu9tPnQXstRzX/e6', 425 'HD8RnTmGEavoR3LFVfdHvh3xA0QPka2': 'cYbtH8J2lfpMIiBKfF3pKpMno7JlLui', 426 '6WgD2zYQDPgxR2sXlUeEeGKknxt95W.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa', 427 'rPSVExmrZ2WB1xntSbqZ/DRQRlKtVw.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa', 428 'PYLCOpTKZmhFn1CoBM2XNbWgqMX4Jk2': 'cxMAJfIx3T.Fv3O0KjL9VdM/oSSUVRK', 429 'w8RVl3rh7sNazq544l0944qGq4GUFUq': 'fOj7giyJz5k22FHTKGVo8o1o5zGzPsq', 430 'KHsCqMFVxOAGJObHwEBR3JaEdKVu1.m': 'fRmxM11/x97bxCrhecMENdkPm7YpRbe', 431 '3wn02pxRJPnFwvlGt75DURDbt4g7om.': 'fY4v5x6.8txtKUKDP86z1xjlXG/GgZO', 432 'k9Hv17Gha84losGKAq61csCZokj5pyy': 'gLfxf5sydYesf658mrFYb51nLrn/4Sm', 433 'uTNb9MEHVGI7kd6UnQjYxgRNiKJM01S': 'h.z2vLHB/tYSU5fPXkrYB7TxLHGJnI6', 434 'heAts1y/8kcTTP0/vD3yeuMX1ihF8dO': 'j72N2Fi2j3pGalOZvTqtyH3bYGotuju', 435 'SqNATdQiNEckAKLsqgsKbAM5.hZoMCq': 'mjGosqV8OkKEcduYTNz5PKN2scswFya', 436 'k786rdsOdUP4cRi.dLa3dsYueMj5UnS': 'nQF1kDoMDjBBwXy2wwMni2gJLKqA0ta', 437 'G6PeIXKiqeNUPUbqFkMJvvI7G9hd51W': 'oBvt6zJCTP5OED1esTYUYPn31cWqwsa', 438 'TszY8.avBpwJ6xbNjwws3SKBbK6kj6S': 'odAvHZH9azlhi1x4pBLF25.hj08RMFi', 439 'z4QFggBRTVUeHRGL/CQxlAYHraYPcpa': 'ojiyBkc.4HZ2y5Yh0LxBbI6ZkLiRg0C', 440 'PPtdI0NcxZ4Txyv/Y5ORfcP1XFriKT2': 'pjce7u/YRnectNa8DXjsSGzRdyH2PSG', 441 'uIZ1Lgb.jHRDU/Z/LVXfpQCK72fTEHq': 'q5NMeQZ0UTyP/bILj02wdQ.Si5KHU1K', 442 '51cV.PJOQVwmiao4t4lXsb9Cc3Jnuem': 'rB3dV.fJGdSihNlP0vo5PemoaZRp6LS', 443 's6h1E6A2RzVn2KxXLQXsKosQeRo8bLa': 'sND7G4.cx6Dzn6TqbXfK99bElU0a7P.', 444 'A96emG/jBf0K1K6vCG.eZGdLkSridom': 'tlD3cmtHgs/TwWAvy5E3F.freZS1bau', 445 'hWYb0x3Q3zM0aBkB2G1arbzmWxRQS/i': 'whpbcVuyGrJbgveSSM3XQKa8G5alyRm', 446 'AqM0XavJxJXeVlJ3Te3umGJaPOCYmZi': 'xP2lldc1.10LvZDjJZXNBKLzWqnkbOa', 447 'UPBzTBMwJb5mKWflQ.5Rid4481RrxVy': 'xSD.pz8Zg3vt0Jiovghl5Dqrs8aw8ni', 448 'yM59Cq5iVZDB3u45gTNhRSnOgrY1tdG': 'yED5tIjzyeH90te88BUWvTrMFHsWgCi', 449 'k.qekGiJym3QgfeFCwNhPHg0Zk99KSa': 'yphVralDu2JlxYbCqwwGli/H6wBgBtC', 450 'iYbzuFNFwSfCgqTGNsUFtSDh8PJAqSe': 'zAUUWh4XGsBGYs6yyUJTSfEgzoLXO6G', 451} 452def h_bcrypt_x(phrase, rounds, salt): 453 base = BCRYPT.using( 454 salt=salt, rounds=rounds, ident="2b" 455 ).hash(phrase) 456 base_setting = base[4:-31] 457 base_output = base[-31:] 458 output = bcrypt_x_substitutions.get(base_output, base_output) 459 460 setting = "$2x$" + base_setting 461 expected = setting + output 462 yield (phrase, setting, expected) 463 464# passlib includes an scrypt implementation, but its encoded password 465# format is not the $7$ format we implement, so instead we use the 466# stdlib's hashlib.scrypt (this is why 3.6+ is required) and encode 467# the setting string by hand. This may produce strings encoding N/r/p 468# combinations that don't normally occur in the wild, but that's OK. 469# The algorithm implemented by hashlib.scrypt is standardized as 470# RFC 7914, so it's not an issue where that implementation came from. 471# Yes, the salt is properly passed to raw_scrypt as-is. 472from hashlib import scrypt as raw_scrypt 473from passlib.utils.binary import h64 as hash64 474def h_scrypt(phrase, rounds, salt): 475 p = 1 476 r = 8 477 log2N = rounds + 7 478 N = 1 << log2N 479 480 bytesalt = salt.encode("ascii") 481 setting = (b"$7$" + 482 hash64.encode_int6(log2N) + 483 hash64.encode_int30(r) + 484 hash64.encode_int30(p) + 485 bytesalt) 486 487 binhash = raw_scrypt(phrase, salt=bytesalt, p=p, r=r, n=N, dklen=32) 488 489 yield (phrase, setting, setting + b'$' + hash64.encode_bytes(binhash)) 490 491# 492# passlib does not support either yescrypt or gost-yescrypt. In fact, 493# as far as I can tell, at the time of writing, there exists only one 494# implementation of yescrypt and gost-yescrypt, by Solar Designer et al 495# which is the code we use ourselves. However, a test for round- 496# trippability and API consistency is still worthwhile, as is a test 497# that the implementation's current behavior is compatible with its 498# behavior some time ago. Therefore, we encode setting strings by 499# hand, and ctypes is used to access crypt_ra in the just-built 500# libcrypt.so. This will only work if the library was configured with 501# --enable-hashes=yescrypt,gost-yescrypt,[others] and --enable-shared, 502# which is OK, since it's not run during a normal build. Remove this 503# once passlib supports these hashes. 504# 505# crypt_ra is used because it's thread-safe but doesn't require us to 506# know how big struct crypt_data is. There is no good way to arrange 507# for the data object to be deallocated. Oh well. 508LIBCRYPT = ctypes.cdll.LoadLibrary(os.path.join(os.getcwd(), ".libs", 509 "libcrypt.so")) 510_xcrypt_crypt_ra = LIBCRYPT.crypt_ra 511_xcrypt_crypt_ra.argtypes = [ctypes.c_char_p, ctypes.c_char_p, 512 ctypes.POINTER(ctypes.c_void_p), 513 ctypes.POINTER(ctypes.c_int)] 514_xcrypt_crypt_ra.restype = ctypes.c_char_p 515_xcrypt_crypt_ra_data = ctypes.c_void_p(0) 516_xcrypt_crypt_ra_datasize = ctypes.c_int(0) 517 518def xcrypt_crypt(phrase, setting): 519 global _xcrypt_crypt_ra_data, _xcrypt_crypt_ra_datasize 520 if not isinstance(phrase, bytes): phrase = phrase.encode("utf-8") 521 if not isinstance(setting, bytes): setting = setting.encode("ascii") 522 rv = _xcrypt_crypt_ra(phrase, setting, 523 ctypes.byref(_xcrypt_crypt_ra_data), 524 ctypes.byref(_xcrypt_crypt_ra_datasize)) 525 if not rv: 526 err = ctypes.get_errno() 527 raise OSError(err, os.strerror(err)) 528 return bytes(rv) 529 530def yescrypt_gensalt(ident, rounds, salt): 531 if rounds == 1: 532 params = "j75" 533 elif rounds == 2: 534 params = "j85" 535 else: 536 raise RuntimeError("don't know how to encode rounds={}" 537 .format(rounds)) 538 539 return "${}${}${}".format(ident, params, salt) 540 541def h_yescrypt(phrase, rounds, salt): 542 setting = yescrypt_gensalt("y", rounds, salt) 543 yield (phrase, setting, xcrypt_crypt(phrase, setting)) 544 545def h_gost_yescrypt(phrase, rounds, salt): 546 setting = yescrypt_gensalt("gy", rounds, salt) 547 yield (phrase, setting, xcrypt_crypt(phrase, setting)) 548 549# Each method should contribute a group of parameters to the array 550# below. Each block has the form 551# 552# ('method', [ 553# (rounds, salt), 554# (rounds, salt), 555# ... 556# ]) 557# 558# where 'method' is the method name used in the INCLUDE_ macros, 559# rounds is a number and salt is a salt string. The appropriate 560# h_method function will be called with arguments (phrase, *params) 561# where params is one of the tuples from its block, so it is OK to add 562# extra arguments after the salt if necessary. 563# 564# If the method has a tunable rounds parameter, its array of 565# (rounds, salt) pairs should have two salts * at least two values of 566# the rounds parameter. If it does not, it should have two salts and 567# use 0 for the rounds parameter. 568# 569# The point of this test is not to exercise brute force resistance, 570# so keep cost parameters low. 571# 572# Methods should be in alphabetical order by their INCLUDE_macro name. 573 574SETTINGS = [ 575 ('bcrypt', [ 576 (5, 'CCCCCCCCCCCCCCCCCCCCC.'), 577 (5, 'abcdefghijklmnopqrstuu'), 578 (4, 'CCCCCCCCCCCCCCCCCCCCC.'), 579 (4, 'abcdefghijklmnopqrstuu'), 580 ]), 581 582 ('bcrypt_y', [ 583 (5, 'CCCCCCCCCCCCCCCCCCCCC.'), 584 (5, 'abcdefghijklmnopqrstuu'), 585 (4, 'CCCCCCCCCCCCCCCCCCCCC.'), 586 (4, 'abcdefghijklmnopqrstuu'), 587 ]), 588 589 ('bcrypt_a', [ 590 (5, 'CCCCCCCCCCCCCCCCCCCCC.'), 591 (5, 'abcdefghijklmnopqrstuu'), 592 (4, 'CCCCCCCCCCCCCCCCCCCCC.'), 593 (4, 'abcdefghijklmnopqrstuu'), 594 ]), 595 596 ('bcrypt_x', [ 597 (5, 'CCCCCCCCCCCCCCCCCCCCC.'), 598 (5, 'abcdefghijklmnopqrstuu'), 599 (4, 'CCCCCCCCCCCCCCCCCCCCC.'), 600 (4, 'abcdefghijklmnopqrstuu'), 601 ]), 602 603 ('bigcrypt', [ 604 (0, 'CC'), 605 (0, 'ab'), 606 ]), 607 608 # The bsdicrypt round count is required to be odd. 609 ('bsdicrypt', [ 610 (1, 'CCCC'), 611 (1, 'abcd'), 612 (13, 'CCCC'), 613 (13, 'abcd'), 614 ]), 615 616 ('descrypt', [ 617 (0, 'CC'), 618 (0, 'ab'), 619 ]), 620 621 ('gost_yescrypt', [ 622 (1, '.......'), 623 (1, 'LdJMENpBABJJ3hIHjB1Bi.'), 624 (2, '.......'), 625 (2, 'LdJMENpBABJJ3hIHjB1Bi.'), 626 ]), 627 628 ('md5crypt', [ 629 (0, 'CCCCCCCC'), 630 (0, 'abcdefgh'), 631 ]), 632 633 ('nt', [ 634 (0, ''), 635 ]), 636 637 ('scrypt', [ 638 (1, 'SodiumChloride'), 639 (1, 'unUNunUNunUNun'), 640 (2, 'SodiumChloride'), 641 (2, 'unUNunUNunUNun'), 642 ]), 643 644 ('sha1crypt', [ 645 (12, 'GGXpNqoJvglVTkGU'), 646 (12, 'xSZGpk6Bp4SA3.cR'), 647 (456, 'GGXpNqoJvglVTkGU'), 648 (456, 'xSZGpk6Bp4SA3.cR'), 649 ]), 650 651 ('sha256crypt', [ 652 (1000, 'saltstring'), 653 (1000, 'short'), 654 (5000, 'saltstring'), 655 (5000, 'short'), 656 ]), 657 658 ('sha512crypt', [ 659 (1000, 'saltstring'), 660 (1000, 'short'), 661 (5000, 'saltstring'), 662 (5000, 'short'), 663 ]), 664 665 ('sunmd5', [ 666 (0, '9ZLwtuTO'), 667 (0, '1xMeE.at'), 668 (12, '9ZLwtuTO'), 669 (12, '1xMeE.at'), 670 ]), 671 672 ('yescrypt', [ 673 (1, '.......'), 674 (1, 'LdJMENpBABJJ3hIHjB1Bi.'), 675 (2, '.......'), 676 (2, 'LdJMENpBABJJ3hIHjB1Bi.'), 677 ]), 678] 679 680# Normally, we expect that (1) for fixed salt, no two phrases hash to 681# the same string; (2) for fixed phrase, no two settings produce the 682# same string. The known exceptions are all due to limitations and/or 683# bugs in the hashing method. Check the table produced by this 684# program to ensure that all of the collisions in the ->expected 685# strings are due to one of the known exceptions. test-crypt-kat.c 686# itself doesn't need to do this test; as long as all of the hashes 687# produced by the just-built crypt() match the appropriate ->expected 688# string, no new collisions can have been introduced. 689 690def strneq_7bit (p1, p2, limit): 691 n1 = len(p1) 692 n2 = len(p2) 693 for i in range(limit): 694 if i == n1 and i == n2: 695 # strings are the same length, within the limit, and no 696 # mismatched characters were found 697 return True 698 if i == n1 or i == n2: 699 # one string is longer than the other, within the limit 700 return False 701 if (p1[i] & 0x7F) != (p2[i] & 0x7F): 702 # characters not equal, after masking the 8th bit 703 return False 704 # reached the limit, no mismatches found 705 return True 706 707# The bug in bcrypt mode "x" (preserved from the original 708# implementation of bcrypt) is, at its root, that the code below 709# sign- rather than zero-extends *p before or-ing it into 'tmp'. 710# When *p has its 8th bit set, it is therefore or-ed in as 711# 0xFF_FF_FF_xx rather than 0x00_00_00_xx, and clobbers the other 712# three bytes in 'tmp'. Depending on its position within the input, 713# this can erase up to three other characters of the passphrase. 714# The exact set of strings involved in any one group of collisions is 715# difficult to describe in words and may depend on the endianness of 716# the CPU. The test cases in this file have only been verified on 717# a little-endian CPU. 718BF_KEY_LEN = 18 719 720def buggy_expand_BF_key(phrase): 721 p = 0 722 lp = len(phrase) 723 expanded = [0]*BF_KEY_LEN 724 if lp > 0: 725 for i in range(BF_KEY_LEN): 726 tmp = 0 727 for j in range(4): 728 if p == lp: 729 c = 0 730 else: 731 c = phrase[p] 732 stmp = ((c & 0x7F) - (c & 0x80)) & 0xFFFFFFFF 733 tmp = ((tmp << 8) | stmp) & 0xFFFFFFFF 734 p += 1 735 if p == lp + 1: 736 p = 0 737 expanded[i] = tmp 738 return expanded 739 740def sign_extension_collision_p(p1, p2): 741 return buggy_expand_BF_key(p1) == buggy_expand_BF_key(p2) 742 743def equivalent_sunmd5_settings_p(s1, s2): 744 if s1[:4] != "$md5": return False 745 if s2[:4] != "$md5": return False 746 747 l1 = len(s1) 748 l2 = len(s2) 749 if l1 < l2: 750 ll = l1 751 lh = l2 752 sl = s1 753 sh = s2 754 else: 755 ll = l2 756 lh = l1 757 sl = s2 758 sh = s1 759 if sl[:ll] != sh[:ll]: 760 return False 761 762 # The two cases where sunmd5 settings are equivalent: 763 # $md5...$ and $md5...$$ 764 # $md5... and $md5...$x 765 if sl[ll-1] == '$': 766 if ll+1 != lh or sh[ll] != '$': 767 return False 768 else: 769 if ll+2 != lh or sh[ll] != '$' or sh[ll+1] != 'x': 770 return False 771 return True 772 773def collision_expected(p1, p2, s1, s2): 774 if not isinstance(p1, bytes): p1 = p1.encode("iso_8859_1") 775 if not isinstance(p2, bytes): p2 = p2.encode("iso_8859_1") 776 if isinstance(s1, bytes): s1 = s1.decode("ascii") 777 if isinstance(s2, bytes): s2 = s2.decode("ascii") 778 # Under no circumstances should two hashes with different settings 779 # collide, except... 780 if s1 != s2: 781 # a descrypt hash can collide with a bigcrypt hash when the phrase 782 # input to bigcrypt was fewer than 8 characters long 783 if ( s1[0] != '$' and s1[0] != '_' 784 and s2[0] != '$' and s2[0] != '_' 785 and ( (len(s1) == 2 and len(s2) > 2 and len(p2) <= 8) 786 or (len(s2) == 2 and len(s1) > 2 and len(p1) <= 8))): 787 return strneq_7bit(p1, p2, 8) 788 789 # all settings for NTHASH are equivalent 790 if s1[:3] == '$3$' and s2[:3] == '$3$': 791 return p1 == p2 792 793 # sunmd5 has pairs of equivalent settings 794 if equivalent_sunmd5_settings_p (s1, s2): 795 return p1 == p2 796 797 return False 798 799 if s1[:2] == '$2': 800 # bcrypt truncates passphrases to 72 characters 801 if p1[:72] == p2[:72]: 802 return True 803 # preserved bcrypt $2x bug? 804 if s1[:3] == '$2x' and sign_extension_collision_p(p1, p2): 805 return True 806 return False 807 808 if s1[0] != '$' and s1[0] != '_': 809 if len(s1) == 2: 810 # descrypt truncates passphrases to 8 characters and strips the 811 # 8th bit 812 return strneq_7bit(p1, p2, 8) 813 else: 814 # bigcrypt truncates passphrases to 128 characters and strips the 815 # 8th bit 816 return strneq_7bit(p1, p2, 128) 817 818 if s1[0] == '_': 819 # bsdicrypt does not truncate but does still strip the 8th bit 820 return strneq_7bit(p1, p2, max(len(p1), len(p2))) 821 822 return False 823 824def report_unexpected_collision(p1, p2, s1, s2, expected): 825 sys.stderr.write("UNEXPECTED HASH COLLISION:\n" 826 " hash = {}\n" 827 " p1 = {!r}\n" 828 " p2 = {!r}\n" 829 " s1 = {!r}\n" 830 " s2 = {!r}\n" 831 "\n".format(expected, p1, p2, s1, s2)) 832 833# Master control. 834# 835# To reduce the painful slowness of this program _somewhat_, 836# we use a multiprocessing pool to compute all of the hashes. 837 838def generate_phrase_setting_combs(): 839 for macro_name, settings in SETTINGS: 840 for phrase in PHRASES: 841 for setting in settings: 842 yield (macro_name, phrase, setting) 843 844def worker_compute_one(args): 845 method, phrase, setting = args 846 847 import __main__ 848 sfunc = getattr(__main__, 'h_' + method) 849 return [(method, case) for case in sfunc(phrase, *setting)] 850 851# Python specifies that an \x escape in a string literal consumes 852# exactly two subsequent hexadecimal digits. C, on the other hand, 853# specifies that \x in a string literal consumes *any number of* 854# hexadecimal digits, and if the hexadecimal number is larger than the 855# range representable by 'unsigned char' the result is 856# implementation-defined. For instance, "\x303" == "03" in Python, 857# but in C the string on the left could be anything. The simplest way 858# to deal with this is to escape the string Python's way and then 859# replace sequences like '\x303' with '\x30""3'. 860c_hex_escape_fixup_re_ = re.compile( 861 r"(\\x[0-9a-fA-F]{2})([0-9a-fA-F])") 862 863def c_hex_escape(s): 864 if isinstance(s, bytes): 865 s = s.decode("iso_8859_1") 866 s = s.encode("unicode_escape").decode("ascii") 867 868 return c_hex_escape_fixup_re_.sub(r'\1""\2', s) 869 870def format_case(phrase, setting, expected): 871 return (' {{ "{}", "{}", "{}" }},\n' 872 .format(c_hex_escape(setting), 873 c_hex_escape(expected), 874 c_hex_escape(phrase))) 875 876def main(): 877 # FIXME: This only detects collisions that actually happen, not 878 # collisions that ought to have happened but didn't. (Detecting 879 # collisions that ought to have happened, but didn't, would be 880 # unavoidably quadratic in the total number of test cases, so I'm 881 # not sure it's worth it.) 882 items = [] 883 collisions = {} 884 collision_error = False 885 with multiprocessing.Pool() as pool: 886 for group in pool.imap(worker_compute_one, 887 generate_phrase_setting_combs(), 888 chunksize=100): 889 for method, (phrase, setting, expected) in group: 890 if expected in collisions: 891 p1, s1 = collisions[expected] 892 if not collision_expected(p1, phrase, s1, setting): 893 report_unexpected_collision(p1, phrase, s1, setting, 894 expected) 895 collision_error = True 896 else: 897 collisions[expected] = (phrase, setting) 898 items.append((method, format_case(phrase, setting, expected))) 899 900 if collision_error: 901 sys.exit(1) 902 903 sys.stdout.write( 904 "/* Known-answer tests for passphrase hashes. -*- mode: c -*-\n" 905 " Automatically generated by ka-table-gen.py.\n" 906 " Do not edit this file by hand. */\n\n") 907 908 prev_method = None 909 for method, case in items: 910 if method != prev_method: 911 if prev_method is not None: 912 sys.stdout.write("#endif // {}\n\n".format(prev_method)) 913 sys.stdout.write("#if INCLUDE_{} && defined TEST_{}\n" 914 .format(method, method)) 915 prev_method = method 916 sys.stdout.write(case) 917 918 if prev_method is not None: 919 sys.stdout.write("#endif // {}\n".format(prev_method)) 920 921if __name__ == '__main__': 922 main() 923