1#! /usr/bin/python3
2# Compute test cases for ka-* tests.
3#
4# Written by Zack Weinberg <zackw at panix.com> in 2019.
5# To the extent possible under law, Zack Weinberg has waived all
6# copyright and related or neighboring rights to this work.
7#
8# See https://creativecommons.org/publicdomain/zero/1.0/ for further
9# details.
10
11# This program generates ka-table.inc, which defines the set
12# of tests performed by the ka-*.c tests.  It is not run automatically
13# during the build for two reasons: it's very slow, and it requires Python
14# 3.6 or greater with Passlib <https://passlib.readthedocs.io/en/stable/>
15# available.
16#
17# If you modify this program, make sure to update ka-table.inc,
18# by running 'make regen-ka-table' (libcrypt.so must already have been
19# built), and then check in the updates to that file in the same
20# commit as your changes to this program.  You will need to install
21# Passlib itself, but not any other libraries.
22#
23# This program intentionally uses Passlib's slow pure-Python back
24# ends, rather than accelerated C modules that tend to be, at their
25# core, the same code libxcrypt uses itself, so that we really are
26# testing libxcrypt against known answers generated with a different
27# implementation.
28
29import array
30import ctypes
31import multiprocessing
32import os
33import re
34import sys
35
36# force passlib to allow use of its built-in bcrypt implementation
37os.environ["PASSLIB_BUILTIN_BCRYPT"] = "enabled"
38
39import passlib.hash
40
41# In order to tickle various bugs and limitations in older hashing
42# methods precisely, we need to test several passphrases whose byte
43# sequences are not valid Unicode text in any encoding.  We therefore
44# use exclusively byte strings in this array.
45PHRASES = [
46    # All ASCII printable, various lengths.  Most of these were taken
47    # from older known-answer tests for specific hashing methods.
48    b'',
49    b' ',
50    b'a',
51    b'ab',
52    b'abc',
53    b'U*U',
54    b'U*U*',
55    b'U*U*U',
56    b'.....',
57    b'dragon',
58    b'dRaGoN',
59    b'DrAgOn',
60    b'PAROLX',
61    b'U*U***U',
62    b'abcdefg',
63    b'01234567',
64    b'726 even',
65    b'zyxwvuts',
66    b'ab1234567',
67    b'alexander',
68    b'beautiful',
69    b'challenge',
70    b'chocolate',
71    b'cr1234567',
72    b'katherine',
73    b'stephanie',
74    b'sunflower',
75    b'basketball',
76    b'porsche911',
77    b'|_337T`/p3',
78    b'thunderbird',
79    b'Hello world!',
80    b'pleaseletmein',
81    b'a short string',
82    b'zxyDPWgydbQjgq',
83    b'photojournalism',
84    b'ecclesiastically',
85    b'congregationalism',
86    b'dihydrosphingosine',
87    b'semianthropological',
88    b'palaeogeographically',
89    b'electromyographically',
90    b'noninterchangeableness',
91    b'abcdefghijklmnopqrstuvwxyz',
92    b'electroencephalographically',
93    b'antidisestablishmentarianism',
94    b'cyclotrimethylenetrinitramine',
95    b'dichlorodiphenyltrichloroethane',
96    b'multiple words seperated by spaces',
97    b'supercalifragilisticexpialidocious',
98    b'we have a short salt string but not a short password',
99    b'multiple word$ $eperated by $pace$ and $pecial character$',
100    b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
101    (b'1234567890123456789012345678901234567890'
102     b'1234567890123456789012345678901234567890'),
103    (b'a very much longer text to encrypt.  This one even stretches over more'
104     b'than one line.'),
105
106    # ASCII printables with their high bits flipped - DES-based hashes collide.
107    # All of these have an exact counterpart above.
108    b'\xd0\xc1\xd2\xcf\xcc\xd8',              # 'PAROLX'
109    b'\xd5\xaa\xd5\xaa\xaa\xaa\xd5\xaa',      # 'U*U***U*'
110    b'\xe1\xec\xe5\xf8\xe1\xee\xe4\xe5\xf2',  # 'alexander'
111    b'\xf3\xf4\xe5\xf0\xe8\xe1\xee\xe9\xe5',  # 'stephanie'
112    # '*U*U*U*U*U*U*U*U*'
113    b'\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa\xd5\xaa',
114
115    # A few UTF-8 strings and what they will collide with for
116    # DES-based hashes.
117    b'\xC3\xA9tude', b'C)tude', # UTF-8(NFC(étude))
118    b'Chl\xC3\xB6e', b'ChlC6e', # UTF-8(NFC(Chlöe))
119    # Eight letters, but 10 bytes: UTF-8(NFC(Ångström))
120    b'\xC3\x85ngstr\xC3\xB6m', b'C\x05ngstrC6m', b'C\x05ngstrC'
121
122    # descrypt truncates everything to 8 characters.
123    b'U*U***U*', b'U*U***U*ignored',
124    b'U*U*U*U*', b'U*U*U*U*ignored',
125    b'*U*U*U*U', b'*U*U*U*U*', b'*U*U*U*U*U*U*U*U', b'*U*U*U*U*U*U*U*U*',
126
127    # Patterns designed to tickle the bcrypt $2x$ sign-extension bug.
128    b'\xa3',
129    b'\xa3a',
130    b'\xd1\x91',
131    b'\xa3ab',
132    b'\xff\xff\xa3',
133    b'1\xa3345',
134    b'\xff\xa3345',
135    b'\xff\xa334\xff\xff\xff\xa3345',
136
137    (b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'
138     b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'
139     b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'
140     b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'
141     b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'
142     b'\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff'),
143
144    (b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'
145     b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'
146     b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'
147     b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'
148     b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'
149     b'\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55'),
150
151    # bcrypt truncates to 72 characters
152    (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
153     b'0123456789'),
154    (b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
155     b'0123456789chars after 72 are ignored'),
156
157    (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
158     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
159     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
160     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
161     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
162     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'),
163
164    (b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
165     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
166     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
167     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
168     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
169     b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'
170     b'chars after 72 are ignored as usual'),
171
172    # bigcrypt truncates to 128 characters
173    # (first sentence of _Twenty Thousand Leagues Under The Sea_)
174    (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and '
175     b'downright inexplicable phenomenon that surely no one has forgotten.'),
176
177    # first 8 characters of above (des)
178    b'THE YEAR',
179    # first 72 characters (bcrypt)
180    b'THE YEAR 1866 was marked by a bizarre development, an unexplained and do',
181    # first 128 characters (bigcrypt)
182    (b'THE YEAR 1866 was marked by a bizarre development, an unexplained and '
183     b'downright inexplicable phenomenon that surely no one has f')
184]
185
186# passlib does not support all of the hashing methods we do, no longer
187# supports generation of bare setting strings, and in some cases does
188# not support all of the variants we need to generate.  Therefore,
189# there is a shim for each hashing method (variant).  Shims must be
190# named 'h_METHOD' where METHOD is the name for the method used by the
191# INCLUDE_method macros.
192#
193# Each shim function takes at least the arguments phrase, rounds, and
194# salt, in that order.  Additional optional arguments are allowed.  It
195# should do 'yield (phrase, setting, expected)' at least once, where
196# phrase is the phrase argument, setting is a setting string generated
197# from rounds and salt, and expected is the hashed passphrase expected
198# to be generated from that combination of phrase and setting.
199#
200# When implementing new shims, use of passlib's pure-Python "backends"
201# is strongly preferred where possible, because the speed of this
202# program does not matter, and the C backends tend to be based on some
203# incarnation of the same code that libxcrypt uses itself, so it
204# wouldn't be a proper interop test.
205
206# straightforward passlib wrappers, sorted by age of algorithm
207DES_CRYPT = passlib.hash.des_crypt
208DES_CRYPT.set_backend("builtin")
209def h_descrypt(phrase, rounds, salt):
210    expected = DES_CRYPT.using(
211        salt=salt, truncate_error=False
212    ).hash(phrase)
213    setting = expected[:2]
214    yield (phrase, setting, expected)
215
216BIGCRYPT = passlib.hash.bigcrypt
217# BIGCRYPT.set_backend("builtin") # currently p.h.bigcrypt always uses builtin
218def h_bigcrypt(phrase, rounds, salt):
219    # p.h.bigcrypt doesn't truncate to 128 chars.
220    # The bigcrypt implementation in libxcrypt was reverse engineered
221    # from a closed-source original and it's possible that they could
222    # have gotten it wrong, but let's stick to what we have.
223    expected = BIGCRYPT.using(
224        salt=salt
225    ).hash(phrase[:128])
226    # bigcrypt has no prefix, so our crypt() looks at the length of
227    # the setting string to decide whether it should use bigcrypt or
228    # descrypt.  For bigcrypt to be used, the setting must be too long
229    # to be a traditional DES hashed password.
230    setting = expected[:2] + ".............."
231    yield (phrase, setting, expected)
232
233BSDI_CRYPT = passlib.hash.bsdi_crypt
234BSDI_CRYPT.set_backend("builtin")
235def h_bsdicrypt(phrase, rounds, salt):
236    expected = BSDI_CRYPT.using(
237        salt=salt, rounds=rounds
238    ).hash(phrase)
239    setting = expected[:9]
240    yield (phrase, setting, expected)
241
242MD5_CRYPT = passlib.hash.md5_crypt
243MD5_CRYPT.set_backend("builtin")
244def h_md5crypt(phrase, rounds, salt):
245    expected = MD5_CRYPT.using(
246        salt=salt
247    ).hash(phrase)
248    setting = expected[:expected.rfind('$')]
249    yield (phrase, setting, expected)
250
251BSD_NTHASH = passlib.hash.bsd_nthash
252#BSD_NTHASH.set_backend("builtin") # has only the built-in backend
253def h_nt(phrase, rounds, salt):
254    # passlib.hash.bsd_nthash attempts to decode byte strings as UTF-8, which
255    # is Not What We Want.  Python's iso_8859_1 is an identity map from 00..FF
256    # to U+0000..U+00FF, which is correct for this application.
257    expected = BSD_NTHASH.hash(phrase.decode("iso_8859_1"))
258    # NTHash doesn't have a salt.
259    # Older versions of libxcrypt generated a fake salt which
260    # we should ensure is ignored.
261    yield (phrase, "$3$", expected)
262    yield (phrase, "$3$__not_used__0123456789abcdef", expected)
263
264SHA1_CRYPT = passlib.hash.sha1_crypt
265SHA1_CRYPT.set_backend("builtin")
266def h_sha1crypt(phrase, rounds, salt):
267    expected = SHA1_CRYPT.using(
268        salt=salt, rounds=rounds
269    ).hash(phrase)
270    setting = expected[:expected.rfind('$')]
271    yield (phrase, setting, expected)
272
273SHA256_CRYPT = passlib.hash.sha256_crypt
274SHA256_CRYPT.set_backend("builtin")
275def h_sha256crypt(phrase, rounds, salt):
276    expected = SHA256_CRYPT.using(
277        salt=salt, rounds=rounds
278    ).hash(phrase)
279    setting = expected[:expected.rfind('$')]
280    yield (phrase, setting, expected)
281
282SHA512_CRYPT = passlib.hash.sha512_crypt
283SHA512_CRYPT.set_backend("builtin")
284def h_sha512crypt(phrase, rounds, salt):
285    expected = SHA512_CRYPT.using(
286        salt=salt, rounds=rounds
287    ).hash(phrase)
288    setting = expected[:expected.rfind('$')]
289    yield (phrase, setting, expected)
290
291# these need to do more work by hand
292
293# We need to test setting strings both with and without the suffix
294# that triggers the off-by-one error in the original Sun hash parser
295# (which must be preserved by all interoperable implementations).
296# passlib.hash.sun_md5_crypt.using(..., bare_salt=True) does not
297# actually work.
298from passlib.handlers.sun_md5_crypt import raw_sun_md5_crypt
299def h_sunmd5(phrase, rounds, salt):
300
301    # sunmd5 is extremely slow in this test, compared to all the other
302    # hashes, because we have to do extra tests of bug-compatibility,
303    # because its round count cannot be reduced below 4096, and
304    # because on approximately half of those rounds it feeds an
305    # additional 1.5k of text to MD5_Update.  The only optimization
306    # that wouldn't break compatibility would be to plug in a faster
307    # MD5 core, but that's not worth the engineering effort since it
308    # would only benefit obsolete hashes.  Instead, skip most of the
309    # test phrases for this hash.  This cuts the wall-clock time for
310    # ka-sunmd5 (on a current-generation x86-64) from fifty to nine
311    # seconds, which we can live with.  sunmd5 feeds the phrase
312    # verbatim to MD5_Update, only once, with no length limit, so we
313    # don't need a lot of careful testing of different phrases.  We do
314    # still include at least a few of the non-ASCII test phrases, and
315    # one very long phrase.
316    if 6 <= len(phrase) <= 128:
317        return
318
319    if rounds == 0:
320        bare_setting = "$md5$" + salt
321    else:
322        bare_setting = "$md5,rounds={}${}".format(rounds, salt)
323
324    suff_setting = bare_setting + "$"
325    bare_cksum = raw_sun_md5_crypt(phrase, rounds, bare_setting.encode("ascii"))
326    suff_cksum = raw_sun_md5_crypt(phrase, rounds, suff_setting.encode("ascii"))
327
328    bare_cksum = bare_cksum.decode("ascii")
329    suff_cksum = suff_cksum.decode("ascii")
330
331    yield (phrase, bare_setting, bare_setting + "$" + bare_cksum)
332    yield (phrase, bare_setting + "$x", bare_setting + "$" + bare_cksum)
333    yield (phrase, suff_setting, suff_setting + "$" + suff_cksum)
334    yield (phrase, suff_setting + "$", suff_setting + "$" + suff_cksum)
335
336# testing bcrypt $2b$ and $2y$ is easy, but ...
337BCRYPT = passlib.hash.bcrypt
338BCRYPT.set_backend("builtin")
339def h_bcrypt(phrase, rounds, salt):
340    expected = BCRYPT.using(
341        salt=salt, rounds=rounds, ident="2b"
342    ).hash(phrase)
343    setting = expected[:-31]
344    yield (phrase, setting, expected)
345
346def h_bcrypt_y(phrase, rounds, salt):
347    expected = BCRYPT.using(
348        salt=salt, rounds=rounds, ident="2y"
349    ).hash(phrase)
350    setting = expected[:-31]
351    yield (phrase, setting, expected)
352
353# ...passlib doesn't implement the quirks of crypt_blowfish's $2a$ or
354# $2x$, but we really must test them.  It is theoretically possible to
355# implement the $2x$ quirk by a transformation on the input
356# passphrase, but it would be hard to get right, and the $2a$ quirk
357# cannot be implemented this way.  The path of least resistance is to
358# compute the $2b$ hash and then look up its output in a table of
359# substitutions for each quirk.  The collision resistance of the $2b$
360# hash should protect us from false hits in these tables.  (Remember
361# that the $2x$ quirk is a _bug_, preserved for backward compatibility's
362# sake, that causes output collisions; duplicate entries on the
363# right-hand side of the $2x$ substitution table are expected.)
364bcrypt_a_substitutions = {
365    'HdhhdUXVgLADnbTYf12kvsasO1gS51C': '5jlqAXzFdq.3//pJFBa432Pepsclbdu',
366    'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa': '7N5c8AaH.dbqz7.2o.V2mRkUDV0TZnO',
367    '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW': 'D8jTC5oJeIumVOhMVpz79BzoGVhCjrW',
368    '9f4sA9SRA0scUKcRyC5kce8dao2.GKe': 'E8Lpo1/qkGPTBDBxEsJjeEzh9nkZ9uW',
369    'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG': 'ZQhQBRpiYJCaQFgyHlB.t/F01cqLCIu',
370    'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US': 'euRNRfAA6e0fjpTfQPPAMU1PCOf9IHq',
371    'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m': 'h87NWu/js59XXaIj1hDyHxnjw7MJ5K6',
372    'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142': 'vrukkCtLqHBLoBDsz6QoBtSwzI9Qxiq',
373}
374def h_bcrypt_a(phrase, rounds, salt):
375    base = BCRYPT.using(
376        salt=salt, rounds=rounds, ident="2b"
377    ).hash(phrase)
378    base_setting = base[4:-31]
379    base_output  = base[-31:]
380    output = bcrypt_a_substitutions.get(base_output, base_output)
381
382    setting = "$2a$" + base_setting
383    expected = setting + output
384    yield (phrase, setting, expected)
385
386bcrypt_x_substitutions = {
387    'xGPMyJSPyyeICKolPQ2gecm8rOgHwz.': '.sDifhVkUxvjPx6U4yeM2tC411Wuc.W',
388    'SRMKxjeMqVSDMhSLhOnvtEZ/p5KhUbq': '1Z0zKnHbUU3q/kk//Pknlv19a4/T8.K',
389    'PQInhDOdCKnXeUE.n/L.kQmTKM9ldK2': '25hhqa/GOJGmXui3avNI5MN8lOI2bCW',
390    '7UwHe/ywPmdp.nr.ZLQSxd8hqn7qURW': '2E0h7UFL/4fALemA5ApWrCWllQXSPTu',
391    'FZEYKXyyMgG13MK0uV8dwNotWf4Wm6e': '2M7Vc.sF98e8DDmnxFjRfAmrudbv6y.',
392    'ZHZAnvydiPNiYH2VjRNhEAD6BEiyaWS': '3kVpkKaj1q2TAXm.ptIi98Nj3zVV8A2',
393    'I7vjUzDOKf8XqcK8VSCm9b0bwoSm1Qm': '6He0iAS8JsdM.iB4OQ4fbsKMXPagLhy',
394    'tLCLEXAt3RUjOgs.yvfWSni4j1JX/JS': '7bLwFi3rlVcl.xfhc7LxjqwOExfxki2',
395    'zQPVIDk6wF8XmESji30KDHFabTlu0WK': '8gGm3RkYFflDX50UQs.tJ8InKNy.HGO',
396    'KBv1eBM2he2T/QheS8zPHMejn5fMNRe': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW',
397    'XEhidoDX1kz.RFnwWIzMJvtW2aP/k4e': '8jdeg8QqT4CX3ERA9vZPFZAkxZRpxJW',
398    'UIzecIiCguM2sPh1F7L9IS3zOtmg5Im': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe',
399    'Xl2V4mQ7s0zX0b4XXH/b9UkRRCWpq3e': '9f4sA9SRA0scUKcRyC5kce8dao2.GKe',
400    'tHwehGUs3q0b/Ejn42MsoM4Yv/iA1rq': '9iflVP0Ezo/iaxO0XS74wFglLNeryTS',
401    'K2KXzhsMefB8v6hJJG3bOyKV.XRf6Qe': 'C3nvb4DotHdnRYgOPcdiK0C4q.DkIDO',
402    'EuEnx.TyCLYgkTV/uhWL5xJTHV7ZMjG': 'HdhhdUXVgLADnbTYf12kvsasO1gS51C',
403    'oaE0x.b2rUEITWgPdg.GEcU4ePHLh6m': 'IZEWKJgp.b.KG29zgOadgd2rUt5iV1e',
404    '3cFqlEl7Y0HWaVLHpyyCqG8dBNk1DSm': 'IvJ5WHgSbYKj7g9hhdVsAjyzcnVT/.m',
405    'gI4g/1M6K/Sz2bsgu9VDeEl6reszuXa': 'J6Y/kPTV/aHj7iJKuDfD5OPjVTvT2BK',
406    'rz1efvzeJjL4mQ813hrZNg3p1.ivOii': 'MOaOTHB4gEm.rriBjXNwBNh.Oc4mKGG',
407    'Tp1b9XCEV16BcrQ.0k4xf7V/OGPZLnK': 'N5E4WSTo/R5henexIN1o8xkGwe2V86W',
408    'CARhc7ugFdgoPjDb7LUG.yQF2lboK6e': 'NjlOVoE5aHHQGtU9zc25wu0VykHnD1G',
409    'nCdKcLO57oLlc6J6sNnGyfT9FrIawiW': 'PIeeyENZVZmrKLAq5lwBUU9fMRVfV2m',
410    'DQjlTXDA5PBQ97.qBJY/vsHPQhLJDMe': 'PMOS6ygjFMSbDo.iJJam/G63inGIOBO',
411    '1qOUgfpg30XDHLx/zrbWiMRcWyFhwye': 'QZ7A0p9q1Ag9Utfnfl/xif8NiDtVhO.',
412    'h0JFRyDXfP0duxAkVxWGr8nMDEDvPca': 'QiT.KUY9PXgIzL2aECMKb0EvVl0Pzw6',
413    'BvtRGGx3p8o0C5C36uS442Qqnrwofrq': 'Qjdj3GXX7D0sFE9jji6wxSTWIhqI3US',
414    'UTFLPGm29p.YVzcY6pqejGEql1x8Ccq': 'TYqa73Yp3leHe3D6.ysuJtNLwOma87C',
415    'YdPam5/ypFIyDUQMyCCEIwzVsTi0Sa6': 'TmFuGBy/Zgc6JVAr667oHeCvGQGyS1q',
416    'gbhoNOH4mWxoEhRrQNdeI.rpk9XeuZS': 'UPPO3QqmgMIXGHvbOLe2IkNzHLAToY2',
417    'ZkQGqjbMpqQ9oCsxNZjN8LQJaHFqPMC': 'VTMVcF7YBLV2/O6V1PNcQw0BD3hTN6a',
418    'RbKkfW2ph8bd8B5yul5E97DxgDw9cT.': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142',
419    'WI7ZNXFtzCd9mN1mWoNMQRHEmkDsZnm': 'VmFQpoXeVuKTzkg2ZRsAf.8PZJZg142',
420    '2WegkGS5Xr/qYNkfEi6JmnR16WVSwcW': 'Xv3TUB0NdnMpyn4cfg4g48oZxRSIrNC',
421    'LN/CEHXLfFeYdOOxbdxKu8ZqSIKgqAu': 'YdqUOXeMKw7X6zbqBXP6c1xqIKun7Oq',
422    'VAQY6kySmwStlNY.sut9Y87njVr0mm.': 'Ysbn1VpHCTzInfW/z/8Q3k676rxfmSW',
423    'qJn4AY9ch/WAR5JXeeJtVGeovjQrhd2': 'ZH9vItRapPbkFKo0iQqU4v71o0e19Mm',
424    '4QrucGf30zIbQA.sO0d1QrU63xBrEYq': 'bqJMLkbvnTFj0OYMu9tPnQXstRzX/e6',
425    'HD8RnTmGEavoR3LFVfdHvh3xA0QPka2': 'cYbtH8J2lfpMIiBKfF3pKpMno7JlLui',
426    '6WgD2zYQDPgxR2sXlUeEeGKknxt95W.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa',
427    'rPSVExmrZ2WB1xntSbqZ/DRQRlKtVw.': 'caGU5ROXj4M8Tgsx3s/D5BQIuhazIWa',
428    'PYLCOpTKZmhFn1CoBM2XNbWgqMX4Jk2': 'cxMAJfIx3T.Fv3O0KjL9VdM/oSSUVRK',
429    'w8RVl3rh7sNazq544l0944qGq4GUFUq': 'fOj7giyJz5k22FHTKGVo8o1o5zGzPsq',
430    'KHsCqMFVxOAGJObHwEBR3JaEdKVu1.m': 'fRmxM11/x97bxCrhecMENdkPm7YpRbe',
431    '3wn02pxRJPnFwvlGt75DURDbt4g7om.': 'fY4v5x6.8txtKUKDP86z1xjlXG/GgZO',
432    'k9Hv17Gha84losGKAq61csCZokj5pyy': 'gLfxf5sydYesf658mrFYb51nLrn/4Sm',
433    'uTNb9MEHVGI7kd6UnQjYxgRNiKJM01S': 'h.z2vLHB/tYSU5fPXkrYB7TxLHGJnI6',
434    'heAts1y/8kcTTP0/vD3yeuMX1ihF8dO': 'j72N2Fi2j3pGalOZvTqtyH3bYGotuju',
435    'SqNATdQiNEckAKLsqgsKbAM5.hZoMCq': 'mjGosqV8OkKEcduYTNz5PKN2scswFya',
436    'k786rdsOdUP4cRi.dLa3dsYueMj5UnS': 'nQF1kDoMDjBBwXy2wwMni2gJLKqA0ta',
437    'G6PeIXKiqeNUPUbqFkMJvvI7G9hd51W': 'oBvt6zJCTP5OED1esTYUYPn31cWqwsa',
438    'TszY8.avBpwJ6xbNjwws3SKBbK6kj6S': 'odAvHZH9azlhi1x4pBLF25.hj08RMFi',
439    'z4QFggBRTVUeHRGL/CQxlAYHraYPcpa': 'ojiyBkc.4HZ2y5Yh0LxBbI6ZkLiRg0C',
440    'PPtdI0NcxZ4Txyv/Y5ORfcP1XFriKT2': 'pjce7u/YRnectNa8DXjsSGzRdyH2PSG',
441    'uIZ1Lgb.jHRDU/Z/LVXfpQCK72fTEHq': 'q5NMeQZ0UTyP/bILj02wdQ.Si5KHU1K',
442    '51cV.PJOQVwmiao4t4lXsb9Cc3Jnuem': 'rB3dV.fJGdSihNlP0vo5PemoaZRp6LS',
443    's6h1E6A2RzVn2KxXLQXsKosQeRo8bLa': 'sND7G4.cx6Dzn6TqbXfK99bElU0a7P.',
444    'A96emG/jBf0K1K6vCG.eZGdLkSridom': 'tlD3cmtHgs/TwWAvy5E3F.freZS1bau',
445    'hWYb0x3Q3zM0aBkB2G1arbzmWxRQS/i': 'whpbcVuyGrJbgveSSM3XQKa8G5alyRm',
446    'AqM0XavJxJXeVlJ3Te3umGJaPOCYmZi': 'xP2lldc1.10LvZDjJZXNBKLzWqnkbOa',
447    'UPBzTBMwJb5mKWflQ.5Rid4481RrxVy': 'xSD.pz8Zg3vt0Jiovghl5Dqrs8aw8ni',
448    'yM59Cq5iVZDB3u45gTNhRSnOgrY1tdG': 'yED5tIjzyeH90te88BUWvTrMFHsWgCi',
449    'k.qekGiJym3QgfeFCwNhPHg0Zk99KSa': 'yphVralDu2JlxYbCqwwGli/H6wBgBtC',
450    'iYbzuFNFwSfCgqTGNsUFtSDh8PJAqSe': 'zAUUWh4XGsBGYs6yyUJTSfEgzoLXO6G',
451}
452def h_bcrypt_x(phrase, rounds, salt):
453    base = BCRYPT.using(
454        salt=salt, rounds=rounds, ident="2b"
455    ).hash(phrase)
456    base_setting = base[4:-31]
457    base_output  = base[-31:]
458    output = bcrypt_x_substitutions.get(base_output, base_output)
459
460    setting = "$2x$" + base_setting
461    expected = setting + output
462    yield (phrase, setting, expected)
463
464# passlib includes an scrypt implementation, but its encoded password
465# format is not the $7$ format we implement, so instead we use the
466# stdlib's hashlib.scrypt (this is why 3.6+ is required) and encode
467# the setting string by hand.  This may produce strings encoding N/r/p
468# combinations that don't normally occur in the wild, but that's OK.
469# The algorithm implemented by hashlib.scrypt is standardized as
470# RFC 7914, so it's not an issue where that implementation came from.
471# Yes, the salt is properly passed to raw_scrypt as-is.
472from hashlib import scrypt as raw_scrypt
473from passlib.utils.binary import h64 as hash64
474def h_scrypt(phrase, rounds, salt):
475    p = 1
476    r = 8
477    log2N = rounds + 7
478    N = 1 << log2N
479
480    bytesalt = salt.encode("ascii")
481    setting = (b"$7$" +
482               hash64.encode_int6(log2N) +
483               hash64.encode_int30(r) +
484               hash64.encode_int30(p) +
485               bytesalt)
486
487    binhash = raw_scrypt(phrase, salt=bytesalt, p=p, r=r, n=N, dklen=32)
488
489    yield (phrase, setting, setting + b'$' + hash64.encode_bytes(binhash))
490
491#
492# passlib does not support either yescrypt or gost-yescrypt.  In fact,
493# as far as I can tell, at the time of writing, there exists only one
494# implementation of yescrypt and gost-yescrypt, by Solar Designer et al
495# which is the code we use ourselves.  However, a test for round-
496# trippability and API consistency is still worthwhile, as is a test
497# that the implementation's current behavior is compatible with its
498# behavior some time ago.  Therefore, we encode setting strings by
499# hand, and ctypes is used to access crypt_ra in the just-built
500# libcrypt.so.  This will only work if the library was configured with
501# --enable-hashes=yescrypt,gost-yescrypt,[others] and --enable-shared,
502# which is OK, since it's not run during a normal build.  Remove this
503# once passlib supports these hashes.
504#
505# crypt_ra is used because it's thread-safe but doesn't require us to
506# know how big struct crypt_data is.  There is no good way to arrange
507# for the data object to be deallocated.  Oh well.
508LIBCRYPT = ctypes.cdll.LoadLibrary(os.path.join(os.getcwd(), ".libs",
509                                                "libcrypt.so"))
510_xcrypt_crypt_ra = LIBCRYPT.crypt_ra
511_xcrypt_crypt_ra.argtypes = [ctypes.c_char_p, ctypes.c_char_p,
512                             ctypes.POINTER(ctypes.c_void_p),
513                             ctypes.POINTER(ctypes.c_int)]
514_xcrypt_crypt_ra.restype = ctypes.c_char_p
515_xcrypt_crypt_ra_data = ctypes.c_void_p(0)
516_xcrypt_crypt_ra_datasize = ctypes.c_int(0)
517
518def xcrypt_crypt(phrase, setting):
519    global _xcrypt_crypt_ra_data, _xcrypt_crypt_ra_datasize
520    if not isinstance(phrase, bytes): phrase = phrase.encode("utf-8")
521    if not isinstance(setting, bytes): setting = setting.encode("ascii")
522    rv = _xcrypt_crypt_ra(phrase, setting,
523                          ctypes.byref(_xcrypt_crypt_ra_data),
524                          ctypes.byref(_xcrypt_crypt_ra_datasize))
525    if not rv:
526        err = ctypes.get_errno()
527        raise OSError(err, os.strerror(err))
528    return bytes(rv)
529
530def yescrypt_gensalt(ident, rounds, salt):
531    if rounds == 1:
532        params = "j75"
533    elif rounds == 2:
534        params = "j85"
535    else:
536        raise RuntimeError("don't know how to encode rounds={}"
537                           .format(rounds))
538
539    return "${}${}${}".format(ident, params, salt)
540
541def h_yescrypt(phrase, rounds, salt):
542    setting = yescrypt_gensalt("y", rounds, salt)
543    yield (phrase, setting, xcrypt_crypt(phrase, setting))
544
545def h_gost_yescrypt(phrase, rounds, salt):
546    setting = yescrypt_gensalt("gy", rounds, salt)
547    yield (phrase, setting, xcrypt_crypt(phrase, setting))
548
549# Each method should contribute a group of parameters to the array
550# below.  Each block has the form
551#
552#  ('method', [
553#     (rounds, salt),
554#     (rounds, salt),
555#     ...
556#  ])
557#
558# where 'method' is the method name used in the INCLUDE_ macros,
559# rounds is a number and salt is a salt string.  The appropriate
560# h_method function will be called with arguments (phrase, *params)
561# where params is one of the tuples from its block, so it is OK to add
562# extra arguments after the salt if necessary.
563#
564# If the method has a tunable rounds parameter, its array of
565# (rounds, salt) pairs should have two salts * at least two values of
566# the rounds parameter.  If it does not, it should have two salts and
567# use 0 for the rounds parameter.
568#
569# The point of this test is not to exercise brute force resistance,
570# so keep cost parameters low.
571#
572# Methods should be in alphabetical order by their INCLUDE_macro name.
573
574SETTINGS = [
575    ('bcrypt', [
576        (5, 'CCCCCCCCCCCCCCCCCCCCC.'),
577        (5, 'abcdefghijklmnopqrstuu'),
578        (4, 'CCCCCCCCCCCCCCCCCCCCC.'),
579        (4, 'abcdefghijklmnopqrstuu'),
580    ]),
581
582    ('bcrypt_y', [
583        (5, 'CCCCCCCCCCCCCCCCCCCCC.'),
584        (5, 'abcdefghijklmnopqrstuu'),
585        (4, 'CCCCCCCCCCCCCCCCCCCCC.'),
586        (4, 'abcdefghijklmnopqrstuu'),
587    ]),
588
589    ('bcrypt_a', [
590        (5, 'CCCCCCCCCCCCCCCCCCCCC.'),
591        (5, 'abcdefghijklmnopqrstuu'),
592        (4, 'CCCCCCCCCCCCCCCCCCCCC.'),
593        (4, 'abcdefghijklmnopqrstuu'),
594    ]),
595
596    ('bcrypt_x', [
597        (5, 'CCCCCCCCCCCCCCCCCCCCC.'),
598        (5, 'abcdefghijklmnopqrstuu'),
599        (4, 'CCCCCCCCCCCCCCCCCCCCC.'),
600        (4, 'abcdefghijklmnopqrstuu'),
601    ]),
602
603    ('bigcrypt', [
604        (0, 'CC'),
605        (0, 'ab'),
606    ]),
607
608    # The bsdicrypt round count is required to be odd.
609    ('bsdicrypt', [
610        (1, 'CCCC'),
611        (1, 'abcd'),
612        (13, 'CCCC'),
613        (13, 'abcd'),
614    ]),
615
616    ('descrypt', [
617        (0, 'CC'),
618        (0, 'ab'),
619    ]),
620
621    ('gost_yescrypt', [
622        (1, '.......'),
623        (1, 'LdJMENpBABJJ3hIHjB1Bi.'),
624        (2, '.......'),
625        (2, 'LdJMENpBABJJ3hIHjB1Bi.'),
626    ]),
627
628    ('md5crypt', [
629        (0, 'CCCCCCCC'),
630        (0, 'abcdefgh'),
631    ]),
632
633    ('nt', [
634        (0, ''),
635    ]),
636
637    ('scrypt', [
638        (1, 'SodiumChloride'),
639        (1, 'unUNunUNunUNun'),
640        (2, 'SodiumChloride'),
641        (2, 'unUNunUNunUNun'),
642    ]),
643
644    ('sha1crypt', [
645        (12, 'GGXpNqoJvglVTkGU'),
646        (12, 'xSZGpk6Bp4SA3.cR'),
647        (456, 'GGXpNqoJvglVTkGU'),
648        (456, 'xSZGpk6Bp4SA3.cR'),
649    ]),
650
651    ('sha256crypt', [
652        (1000, 'saltstring'),
653        (1000, 'short'),
654        (5000, 'saltstring'),
655        (5000, 'short'),
656    ]),
657
658    ('sha512crypt', [
659        (1000, 'saltstring'),
660        (1000, 'short'),
661        (5000, 'saltstring'),
662        (5000, 'short'),
663    ]),
664
665    ('sunmd5', [
666        (0, '9ZLwtuTO'),
667        (0, '1xMeE.at'),
668        (12, '9ZLwtuTO'),
669        (12, '1xMeE.at'),
670    ]),
671
672    ('yescrypt', [
673        (1, '.......'),
674        (1, 'LdJMENpBABJJ3hIHjB1Bi.'),
675        (2, '.......'),
676        (2, 'LdJMENpBABJJ3hIHjB1Bi.'),
677    ]),
678]
679
680# Normally, we expect that (1) for fixed salt, no two phrases hash to
681# the same string; (2) for fixed phrase, no two settings produce the
682# same string.  The known exceptions are all due to limitations and/or
683# bugs in the hashing method.  Check the table produced by this
684# program to ensure that all of the collisions in the ->expected
685# strings are due to one of the known exceptions.  test-crypt-kat.c
686# itself doesn't need to do this test; as long as all of the hashes
687# produced by the just-built crypt() match the appropriate ->expected
688# string, no new collisions can have been introduced.
689
690def strneq_7bit (p1, p2, limit):
691    n1 = len(p1)
692    n2 = len(p2)
693    for i in range(limit):
694        if i == n1 and i == n2:
695            # strings are the same length, within the limit, and no
696            # mismatched characters were found
697            return True
698        if i == n1 or i == n2:
699            # one string is longer than the other, within the limit
700            return False
701        if (p1[i] & 0x7F) != (p2[i] & 0x7F):
702            # characters not equal, after masking the 8th bit
703            return False
704    # reached the limit, no mismatches found
705    return True
706
707# The bug in bcrypt mode "x" (preserved from the original
708# implementation of bcrypt) is, at its root, that the code below
709# sign- rather than zero-extends *p before or-ing it into 'tmp'.
710# When *p has its 8th bit set, it is therefore or-ed in as
711# 0xFF_FF_FF_xx rather than 0x00_00_00_xx, and clobbers the other
712# three bytes in 'tmp'.  Depending on its position within the input,
713# this can erase up to three other characters of the passphrase.
714# The exact set of strings involved in any one group of collisions is
715# difficult to describe in words and may depend on the endianness of
716# the CPU.  The test cases in this file have only been verified on
717# a little-endian CPU.
718BF_KEY_LEN  = 18
719
720def buggy_expand_BF_key(phrase):
721    p = 0
722    lp = len(phrase)
723    expanded = [0]*BF_KEY_LEN
724    if lp > 0:
725        for i in range(BF_KEY_LEN):
726            tmp = 0
727            for j in range(4):
728                if p == lp:
729                    c = 0
730                else:
731                    c = phrase[p]
732                stmp = ((c & 0x7F) - (c & 0x80)) & 0xFFFFFFFF
733                tmp = ((tmp << 8) | stmp) & 0xFFFFFFFF
734                p += 1
735                if p == lp + 1:
736                    p = 0
737            expanded[i] = tmp
738    return expanded
739
740def sign_extension_collision_p(p1, p2):
741    return buggy_expand_BF_key(p1) == buggy_expand_BF_key(p2)
742
743def equivalent_sunmd5_settings_p(s1, s2):
744    if s1[:4] != "$md5": return False
745    if s2[:4] != "$md5": return False
746
747    l1 = len(s1)
748    l2 = len(s2)
749    if l1 < l2:
750        ll = l1
751        lh = l2
752        sl = s1
753        sh = s2
754    else:
755        ll = l2
756        lh = l1
757        sl = s2
758        sh = s1
759    if sl[:ll] != sh[:ll]:
760        return False
761
762    # The two cases where sunmd5 settings are equivalent:
763    # $md5...$ and $md5...$$
764    # $md5...  and $md5...$x
765    if sl[ll-1] == '$':
766        if ll+1 != lh or sh[ll] != '$':
767            return False
768    else:
769        if ll+2 != lh or sh[ll] != '$' or sh[ll+1] != 'x':
770            return False
771    return True
772
773def collision_expected(p1, p2, s1, s2):
774    if not isinstance(p1, bytes): p1 = p1.encode("iso_8859_1")
775    if not isinstance(p2, bytes): p2 = p2.encode("iso_8859_1")
776    if isinstance(s1, bytes):     s1 = s1.decode("ascii")
777    if isinstance(s2, bytes):     s2 = s2.decode("ascii")
778    # Under no circumstances should two hashes with different settings
779    # collide, except...
780    if s1 != s2:
781        # a descrypt hash can collide with a bigcrypt hash when the phrase
782        # input to bigcrypt was fewer than 8 characters long
783        if (    s1[0] != '$' and s1[0] != '_'
784            and s2[0] != '$' and s2[0] != '_'
785            and (   (len(s1) == 2 and len(s2) > 2 and len(p2) <= 8)
786                 or (len(s2) == 2 and len(s1) > 2 and len(p1) <= 8))):
787            return strneq_7bit(p1, p2, 8)
788
789        # all settings for NTHASH are equivalent
790        if s1[:3] == '$3$' and s2[:3] == '$3$':
791            return p1 == p2
792
793        # sunmd5 has pairs of equivalent settings
794        if equivalent_sunmd5_settings_p (s1, s2):
795            return p1 == p2
796
797        return False
798
799    if s1[:2] == '$2':
800        # bcrypt truncates passphrases to 72 characters
801        if p1[:72] == p2[:72]:
802            return True
803        # preserved bcrypt $2x bug?
804        if s1[:3] == '$2x' and sign_extension_collision_p(p1, p2):
805            return True
806        return False
807
808    if s1[0] != '$' and s1[0] != '_':
809        if len(s1) == 2:
810            # descrypt truncates passphrases to 8 characters and strips the
811            # 8th bit
812            return strneq_7bit(p1, p2, 8)
813        else:
814            # bigcrypt truncates passphrases to 128 characters and strips the
815            # 8th bit
816            return strneq_7bit(p1, p2, 128)
817
818    if s1[0] == '_':
819        # bsdicrypt does not truncate but does still strip the 8th bit
820        return strneq_7bit(p1, p2, max(len(p1), len(p2)))
821
822    return False
823
824def report_unexpected_collision(p1, p2, s1, s2, expected):
825    sys.stderr.write("UNEXPECTED HASH COLLISION:\n"
826                     "  hash = {}\n"
827                     "    p1 = {!r}\n"
828                     "    p2 = {!r}\n"
829                     "    s1 = {!r}\n"
830                     "    s2 = {!r}\n"
831                     "\n".format(expected, p1, p2, s1, s2))
832
833# Master control.
834#
835# To reduce the painful slowness of this program _somewhat_,
836# we use a multiprocessing pool to compute all of the hashes.
837
838def generate_phrase_setting_combs():
839    for macro_name, settings in SETTINGS:
840        for phrase in PHRASES:
841            for setting in settings:
842                yield (macro_name, phrase, setting)
843
844def worker_compute_one(args):
845    method, phrase, setting = args
846
847    import __main__
848    sfunc = getattr(__main__, 'h_' + method)
849    return [(method, case) for case in sfunc(phrase, *setting)]
850
851# Python specifies that an \x escape in a string literal consumes
852# exactly two subsequent hexadecimal digits.  C, on the other hand,
853# specifies that \x in a string literal consumes *any number of*
854# hexadecimal digits, and if the hexadecimal number is larger than the
855# range representable by 'unsigned char' the result is
856# implementation-defined.  For instance, "\x303" == "03" in Python,
857# but in C the string on the left could be anything.  The simplest way
858# to deal with this is to escape the string Python's way and then
859# replace sequences like '\x303' with '\x30""3'.
860c_hex_escape_fixup_re_ = re.compile(
861    r"(\\x[0-9a-fA-F]{2})([0-9a-fA-F])")
862
863def c_hex_escape(s):
864    if isinstance(s, bytes):
865        s = s.decode("iso_8859_1")
866    s = s.encode("unicode_escape").decode("ascii")
867
868    return c_hex_escape_fixup_re_.sub(r'\1""\2', s)
869
870def format_case(phrase, setting, expected):
871    return ('  {{ "{}", "{}", "{}" }},\n'
872            .format(c_hex_escape(setting),
873                    c_hex_escape(expected),
874                    c_hex_escape(phrase)))
875
876def main():
877    # FIXME: This only detects collisions that actually happen, not
878    # collisions that ought to have happened but didn't.  (Detecting
879    # collisions that ought to have happened, but didn't, would be
880    # unavoidably quadratic in the total number of test cases, so I'm
881    # not sure it's worth it.)
882    items = []
883    collisions = {}
884    collision_error = False
885    with multiprocessing.Pool() as pool:
886        for group in pool.imap(worker_compute_one,
887                               generate_phrase_setting_combs(),
888                               chunksize=100):
889            for method, (phrase, setting, expected) in group:
890                if expected in collisions:
891                    p1, s1 = collisions[expected]
892                    if not collision_expected(p1, phrase, s1, setting):
893                        report_unexpected_collision(p1, phrase, s1, setting,
894                                                    expected)
895                        collision_error = True
896                else:
897                    collisions[expected] = (phrase, setting)
898                items.append((method, format_case(phrase, setting, expected)))
899
900    if collision_error:
901        sys.exit(1)
902
903    sys.stdout.write(
904        "/* Known-answer tests for passphrase hashes.  -*- mode: c -*-\n"
905        "   Automatically generated by ka-table-gen.py.\n"
906        "   Do not edit this file by hand.  */\n\n")
907
908    prev_method = None
909    for method, case in items:
910        if method != prev_method:
911            if prev_method is not None:
912                sys.stdout.write("#endif // {}\n\n".format(prev_method))
913            sys.stdout.write("#if INCLUDE_{} && defined TEST_{}\n"
914                             .format(method, method))
915            prev_method = method
916        sys.stdout.write(case)
917
918    if prev_method is not None:
919        sys.stdout.write("#endif // {}\n".format(prev_method))
920
921if __name__ == '__main__':
922    main()
923