1*8c117293SSascha Wildner /*
2*8c117293SSascha Wildner  * Copyright (c) 2020 by Solar Designer
3*8c117293SSascha Wildner  * See LICENSE
4*8c117293SSascha Wildner  */
5*8c117293SSascha Wildner 
6*8c117293SSascha Wildner #ifndef PASSWDQC_FILTER_H__
7*8c117293SSascha Wildner #define PASSWDQC_FILTER_H__
8*8c117293SSascha Wildner 
9*8c117293SSascha Wildner #include <stdint.h>
10*8c117293SSascha Wildner 
11*8c117293SSascha Wildner /* Higher-level API for use by passwdqc_check.c */
12*8c117293SSascha Wildner 
13*8c117293SSascha Wildner typedef struct {
14*8c117293SSascha Wildner 	uint8_t version[4]; /* PASSWDQC_FILTER_VERSION */
15*8c117293SSascha Wildner 	uint8_t threshold; /* 0 to 4 */
16*8c117293SSascha Wildner 	uint8_t bucket_size; /* 2 to 4 */
17*8c117293SSascha Wildner 	uint8_t hash_id; /* one of PASSWDQC_FILTER_HASH_* */
18*8c117293SSascha Wildner 	uint8_t reserved1;
19*8c117293SSascha Wildner 	uint64_t endianness; /* PASSWDQC_FILTER_ENDIANNESS */
20*8c117293SSascha Wildner 	uint64_t reserved2; /* e.g., for checksum */
21*8c117293SSascha Wildner 	uint64_t capacity, deletes, inserts, dupes, kicks;
22*8c117293SSascha Wildner } passwdqc_filter_header_t;
23*8c117293SSascha Wildner 
24*8c117293SSascha Wildner typedef struct {
25*8c117293SSascha Wildner 	passwdqc_filter_header_t header;
26*8c117293SSascha Wildner 	int fd;
27*8c117293SSascha Wildner } passwdqc_filter_t;
28*8c117293SSascha Wildner 
29*8c117293SSascha Wildner extern int passwdqc_filter_open(passwdqc_filter_t *flt, const char *filename);
30*8c117293SSascha Wildner extern int passwdqc_filter_lookup(const passwdqc_filter_t *flt, const char *plaintext);
31*8c117293SSascha Wildner extern int passwdqc_filter_close(passwdqc_filter_t *flt);
32*8c117293SSascha Wildner 
33*8c117293SSascha Wildner #ifdef PASSWDQC_FILTER_INTERNALS
34*8c117293SSascha Wildner /* Lower-level inlines for shared use by pwqfilter.c and passwdqc_filter.c */
35*8c117293SSascha Wildner 
36*8c117293SSascha Wildner #include <string.h> /* for strcspn() */
37*8c117293SSascha Wildner #ifndef _MSC_VER
38*8c117293SSascha Wildner #include <endian.h>
39*8c117293SSascha Wildner #endif
40*8c117293SSascha Wildner 
41*8c117293SSascha Wildner #include "md4.h"
42*8c117293SSascha Wildner 
43*8c117293SSascha Wildner #define PASSWDQC_FILTER_VERSION "PWQ0"
44*8c117293SSascha Wildner #define PASSWDQC_FILTER_ENDIANNESS 0x0807060504030201ULL
45*8c117293SSascha Wildner 
passwdqc_filter_verify_header(const passwdqc_filter_header_t * header)46*8c117293SSascha Wildner static inline int passwdqc_filter_verify_header(const passwdqc_filter_header_t *header)
47*8c117293SSascha Wildner {
48*8c117293SSascha Wildner 	return (memcmp(header->version, PASSWDQC_FILTER_VERSION, sizeof(header->version)) ||
49*8c117293SSascha Wildner 	    header->threshold > header->bucket_size || header->bucket_size < 2 || header->bucket_size > 4 ||
50*8c117293SSascha Wildner 	    header->endianness != PASSWDQC_FILTER_ENDIANNESS ||
51*8c117293SSascha Wildner 	    (header->capacity & 3) || header->capacity < 4 || header->capacity > ((1ULL << 32) - 1) * 4 ||
52*8c117293SSascha Wildner 	    header->inserts - header->deletes > header->capacity) ? -1 : 0;
53*8c117293SSascha Wildner }
54*8c117293SSascha Wildner 
55*8c117293SSascha Wildner typedef enum {
56*8c117293SSascha Wildner 	PASSWDQC_FILTER_HASH_OPAQUE = 0,
57*8c117293SSascha Wildner 	PASSWDQC_FILTER_HASH_MIN = 1,
58*8c117293SSascha Wildner 	PASSWDQC_FILTER_HASH_MD4 = 1,
59*8c117293SSascha Wildner 	PASSWDQC_FILTER_HASH_NTLM_CP1252 = 2,
60*8c117293SSascha Wildner 	PASSWDQC_FILTER_HASH_MAX = 2
61*8c117293SSascha Wildner } passwdqc_filter_hash_id_t;
62*8c117293SSascha Wildner 
63*8c117293SSascha Wildner typedef struct {
64*8c117293SSascha Wildner 	uint64_t hi, lo; /* we access hi first, so let's also place it first */
65*8c117293SSascha Wildner } passwdqc_filter_packed_t;
66*8c117293SSascha Wildner 
67*8c117293SSascha Wildner typedef uint32_t passwdqc_filter_i_t;
68*8c117293SSascha Wildner typedef uint64_t passwdqc_filter_f_t;
69*8c117293SSascha Wildner 
70*8c117293SSascha Wildner typedef struct {
71*8c117293SSascha Wildner 	passwdqc_filter_f_t slots[4];
72*8c117293SSascha Wildner } passwdqc_filter_unpacked_t;
73*8c117293SSascha Wildner 
74*8c117293SSascha Wildner typedef union {
75*8c117293SSascha Wildner 	unsigned char uc[16];
76*8c117293SSascha Wildner 	uint32_t u32[4];
77*8c117293SSascha Wildner 	uint64_t u64[2];
78*8c117293SSascha Wildner } passwdqc_filter_hash_t;
79*8c117293SSascha Wildner 
80*8c117293SSascha Wildner #ifdef __GNUC__
81*8c117293SSascha Wildner #define force_inline	__attribute__ ((always_inline)) inline
82*8c117293SSascha Wildner #define likely(x)	__builtin_expect(!!(x), 1)
83*8c117293SSascha Wildner #define unlikely(x)	__builtin_expect(!!(x), 0)
84*8c117293SSascha Wildner #else
85*8c117293SSascha Wildner #define force_inline	inline
86*8c117293SSascha Wildner #define likely(x)	(x)
87*8c117293SSascha Wildner #define unlikely(x)	(x)
88*8c117293SSascha Wildner #endif
89*8c117293SSascha Wildner 
passwdqc_filter_wrap(uint32_t what,passwdqc_filter_i_t m)90*8c117293SSascha Wildner static force_inline passwdqc_filter_i_t passwdqc_filter_wrap(uint32_t what, passwdqc_filter_i_t m)
91*8c117293SSascha Wildner {
92*8c117293SSascha Wildner 	return ((uint64_t)what * m) >> 32;
93*8c117293SSascha Wildner }
94*8c117293SSascha Wildner 
passwdqc_filter_h2i(passwdqc_filter_hash_t * h,passwdqc_filter_i_t m)95*8c117293SSascha Wildner static force_inline passwdqc_filter_i_t passwdqc_filter_h2i(passwdqc_filter_hash_t *h, passwdqc_filter_i_t m)
96*8c117293SSascha Wildner {
97*8c117293SSascha Wildner 	uint32_t i;
98*8c117293SSascha Wildner /*
99*8c117293SSascha Wildner  * Controversial optimization: when converting a hash to its hash table index
100*8c117293SSascha Wildner  * for the primary bucket, take its initial portion and swap the nibbles so
101*8c117293SSascha Wildner  * that we process most of the hash table semi-sequentially in case our input
102*8c117293SSascha Wildner  * is an ASCII-sorted list of hex-encoded hashes.  A drawback is that we fail
103*8c117293SSascha Wildner  * to reach high load if our input is a biased fragment from such sorted list.
104*8c117293SSascha Wildner  */
105*8c117293SSascha Wildner #if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN
106*8c117293SSascha Wildner 	i = h->u32[0];
107*8c117293SSascha Wildner #else
108*8c117293SSascha Wildner 	i = (uint32_t)h->uc[0] << 24;
109*8c117293SSascha Wildner 	i |= (uint32_t)h->uc[1] << 16;
110*8c117293SSascha Wildner 	i |= (uint32_t)h->uc[2] << 8;
111*8c117293SSascha Wildner 	i |= (uint32_t)h->uc[3];
112*8c117293SSascha Wildner #endif
113*8c117293SSascha Wildner 	i = ((i & 0x0f0f0f0f) << 4) | ((i >> 4) & 0x0f0f0f0f);
114*8c117293SSascha Wildner 	return passwdqc_filter_wrap(i, m);
115*8c117293SSascha Wildner }
116*8c117293SSascha Wildner 
passwdqc_filter_h2f(passwdqc_filter_hash_t * h)117*8c117293SSascha Wildner static force_inline passwdqc_filter_f_t passwdqc_filter_h2f(passwdqc_filter_hash_t *h)
118*8c117293SSascha Wildner {
119*8c117293SSascha Wildner #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
120*8c117293SSascha Wildner 	return h->u64[1];
121*8c117293SSascha Wildner #else
122*8c117293SSascha Wildner 	uint64_t f;
123*8c117293SSascha Wildner 	f = (uint64_t)h->uc[8];
124*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[9] << 8;
125*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[10] << 16;
126*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[11] << 24;
127*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[12] << 32;
128*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[13] << 40;
129*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[14] << 48;
130*8c117293SSascha Wildner 	f |= (uint64_t)h->uc[15] << 56;
131*8c117293SSascha Wildner 	return f;
132*8c117293SSascha Wildner #endif
133*8c117293SSascha Wildner }
134*8c117293SSascha Wildner 
passwdqc_filter_alti(passwdqc_filter_i_t i,passwdqc_filter_f_t f,passwdqc_filter_i_t m)135*8c117293SSascha Wildner static force_inline passwdqc_filter_i_t passwdqc_filter_alti(passwdqc_filter_i_t i, passwdqc_filter_f_t f, passwdqc_filter_i_t m)
136*8c117293SSascha Wildner {
137*8c117293SSascha Wildner /*
138*8c117293SSascha Wildner  * We must not use more than 33 bits of the fingerprint here for consistent
139*8c117293SSascha Wildner  * behavior in case the fingerprint later gets truncated.
140*8c117293SSascha Wildner  */
141*8c117293SSascha Wildner 	int64_t alti = (int64_t)(m - 1 - i) - passwdqc_filter_wrap((uint32_t)f, m);
142*8c117293SSascha Wildner #if 0
143*8c117293SSascha Wildner /*
144*8c117293SSascha Wildner  * This is how we could have made use of the 33rd bit while staying in range,
145*8c117293SSascha Wildner  * but testing shows that this is unnecessary.
146*8c117293SSascha Wildner  */
147*8c117293SSascha Wildner 	alti -= ((f >> 32) & 1);
148*8c117293SSascha Wildner #endif
149*8c117293SSascha Wildner 	if (alti < 0)
150*8c117293SSascha Wildner 		alti += m;
151*8c117293SSascha Wildner #if 0
152*8c117293SSascha Wildner 	assert((passwdqc_filter_i_t)alti < m);
153*8c117293SSascha Wildner #endif
154*8c117293SSascha Wildner 	return (passwdqc_filter_i_t)alti;
155*8c117293SSascha Wildner }
156*8c117293SSascha Wildner 
passwdqc_filter_ssdecode(unsigned int src)157*8c117293SSascha Wildner static inline unsigned int passwdqc_filter_ssdecode(unsigned int src)
158*8c117293SSascha Wildner {
159*8c117293SSascha Wildner 	/* First 16 tetrahedral numbers (n*(n+1)*(n+2)/3!) in reverse order */
160*8c117293SSascha Wildner 	static const uint16_t tab4[] = {816, 680, 560, 455, 364, 286, 220, 165, 120, 84, 56, 35, 20, 10, 4, 1};
161*8c117293SSascha Wildner 	/* First 16 triangular numbers (n*(n+1)/2!) in reverse order */
162*8c117293SSascha Wildner 	static const uint8_t tab3[] = {136, 120, 105, 91, 78, 66, 55, 45, 36, 28, 21, 15, 10, 6, 3, 1};
163*8c117293SSascha Wildner 
164*8c117293SSascha Wildner 	unsigned int dst, i = 0;
165*8c117293SSascha Wildner 
166*8c117293SSascha Wildner 	while (src >= tab4[i])
167*8c117293SSascha Wildner 		src -= tab4[i++];
168*8c117293SSascha Wildner 	dst = i << 12;
169*8c117293SSascha Wildner 
170*8c117293SSascha Wildner 	while (src >= tab3[i])
171*8c117293SSascha Wildner 		src -= tab3[i++];
172*8c117293SSascha Wildner 	dst |= i << 8;
173*8c117293SSascha Wildner 
174*8c117293SSascha Wildner 	while (src >= 16 - i)
175*8c117293SSascha Wildner 		src -= 16 - i++;
176*8c117293SSascha Wildner 	dst |= i << 4;
177*8c117293SSascha Wildner 
178*8c117293SSascha Wildner 	dst |= i + src;
179*8c117293SSascha Wildner 
180*8c117293SSascha Wildner 	return dst;
181*8c117293SSascha Wildner }
182*8c117293SSascha Wildner 
passwdqc_filter_unpack(passwdqc_filter_unpacked_t * dst,const passwdqc_filter_packed_t * src,const uint16_t * ssdecode)183*8c117293SSascha Wildner static force_inline int passwdqc_filter_unpack(passwdqc_filter_unpacked_t *dst, const passwdqc_filter_packed_t *src,
184*8c117293SSascha Wildner     const uint16_t *ssdecode)
185*8c117293SSascha Wildner {
186*8c117293SSascha Wildner 	uint64_t hi = src->hi, f = src->lo;
187*8c117293SSascha Wildner 	unsigned int ssi = hi >> (64 - 12); /* semi-sort index */
188*8c117293SSascha Wildner 
189*8c117293SSascha Wildner 	if (likely(ssi - 1 < 3876)) {
190*8c117293SSascha Wildner 		passwdqc_filter_f_t ssd = ssdecode ? ssdecode[ssi - 1] : passwdqc_filter_ssdecode(ssi - 1);
191*8c117293SSascha Wildner 		const unsigned int fbits = 33;
192*8c117293SSascha Wildner 		const unsigned int lobits = fbits - 4;
193*8c117293SSascha Wildner 		const passwdqc_filter_f_t lomask = ((passwdqc_filter_f_t)1 << lobits) - 1;
194*8c117293SSascha Wildner 		dst->slots[0] = (f & lomask) | ((ssd & 0x000f) << lobits);
195*8c117293SSascha Wildner 		f >>= lobits;
196*8c117293SSascha Wildner 		dst->slots[1] = (f & lomask) | ((ssd & 0x00f0) << (lobits - 4));
197*8c117293SSascha Wildner 		f >>= lobits;
198*8c117293SSascha Wildner 		f |= hi << (64 - 2 * lobits);
199*8c117293SSascha Wildner 		dst->slots[2] = (f & lomask) | ((ssd & 0x0f00) << (lobits - 8));
200*8c117293SSascha Wildner 		f >>= lobits;
201*8c117293SSascha Wildner 		dst->slots[3] = (f & lomask) | ((ssd & 0xf000) << (lobits - 12));
202*8c117293SSascha Wildner 		return 4;
203*8c117293SSascha Wildner 	}
204*8c117293SSascha Wildner 
205*8c117293SSascha Wildner 	if (likely(hi <= 1)) {
206*8c117293SSascha Wildner 		if (!hi)
207*8c117293SSascha Wildner 			return unlikely(f) ? -1 : 0;
208*8c117293SSascha Wildner 
209*8c117293SSascha Wildner 		dst->slots[0] = f;
210*8c117293SSascha Wildner 		return 1;
211*8c117293SSascha Wildner 	}
212*8c117293SSascha Wildner 
213*8c117293SSascha Wildner 	if (likely((ssi & 0xf80) == 0xf80)) {
214*8c117293SSascha Wildner 		const unsigned int fbits = 41;
215*8c117293SSascha Wildner 		const passwdqc_filter_f_t fmask = ((passwdqc_filter_f_t)1 << fbits) - 1;
216*8c117293SSascha Wildner 		dst->slots[0] = f & fmask;
217*8c117293SSascha Wildner 		f >>= fbits;
218*8c117293SSascha Wildner 		f |= hi << (64 - fbits);
219*8c117293SSascha Wildner 		dst->slots[1] = f & fmask;
220*8c117293SSascha Wildner 		if (unlikely(dst->slots[0] < dst->slots[1]))
221*8c117293SSascha Wildner 			return -1;
222*8c117293SSascha Wildner 		f = hi >> (2 * fbits - 64);
223*8c117293SSascha Wildner 		dst->slots[2] = f & fmask;
224*8c117293SSascha Wildner 		if (unlikely(dst->slots[1] < dst->slots[2]))
225*8c117293SSascha Wildner 			return -1;
226*8c117293SSascha Wildner 		return 3;
227*8c117293SSascha Wildner 	}
228*8c117293SSascha Wildner 
229*8c117293SSascha Wildner 	if (likely((ssi & 0xfc0) == 0xf40)) {
230*8c117293SSascha Wildner 		const unsigned int fbits = 61;
231*8c117293SSascha Wildner 		const passwdqc_filter_f_t fmask = ((passwdqc_filter_f_t)1 << fbits) - 1;
232*8c117293SSascha Wildner 		dst->slots[0] = f & fmask;
233*8c117293SSascha Wildner 		f >>= fbits;
234*8c117293SSascha Wildner 		f |= hi << (64 - fbits);
235*8c117293SSascha Wildner 		dst->slots[1] = f & fmask;
236*8c117293SSascha Wildner 		if (unlikely(dst->slots[0] < dst->slots[1]))
237*8c117293SSascha Wildner 			return -1;
238*8c117293SSascha Wildner 		return 2;
239*8c117293SSascha Wildner 	}
240*8c117293SSascha Wildner 
241*8c117293SSascha Wildner 	return -1;
242*8c117293SSascha Wildner }
243*8c117293SSascha Wildner 
passwdqc_filter_f_eq(passwdqc_filter_f_t stored,passwdqc_filter_f_t full,unsigned int largest_bucket_size)244*8c117293SSascha Wildner static inline int passwdqc_filter_f_eq(passwdqc_filter_f_t stored, passwdqc_filter_f_t full, unsigned int largest_bucket_size)
245*8c117293SSascha Wildner {
246*8c117293SSascha Wildner 	if (likely((uint32_t)stored != (uint32_t)full))
247*8c117293SSascha Wildner 		return 0;
248*8c117293SSascha Wildner /*
249*8c117293SSascha Wildner  * Ignore optional high bits of a stored fingerprint if they're all-zero,
250*8c117293SSascha Wildner  * regardless of whether the fingerprint possibly came from a large enough slot
251*8c117293SSascha Wildner  * for those zeroes to potentially be meaningful.  We have to do this because
252*8c117293SSascha Wildner  * the fingerprint might have been previously stored in a larger (smaller-slot)
253*8c117293SSascha Wildner  * bucket and been kicked from there, in which case the zeroes are meaningless.
254*8c117293SSascha Wildner  * Exception: we don't have to do this if there were no larger buckets so far.
255*8c117293SSascha Wildner  */
256*8c117293SSascha Wildner 	if ((stored >> 33) || largest_bucket_size < 4) {
257*8c117293SSascha Wildner 		if ((stored >> 41) || largest_bucket_size < 3) {
258*8c117293SSascha Wildner 			if (stored >> 61)
259*8c117293SSascha Wildner 				return likely(stored == full);
260*8c117293SSascha Wildner 			else
261*8c117293SSascha Wildner 				return likely(stored == (full & (((passwdqc_filter_f_t)1 << 61) - 1)));
262*8c117293SSascha Wildner 		} else {
263*8c117293SSascha Wildner 			return likely(stored == (full & (((passwdqc_filter_f_t)1 << 41) - 1)));
264*8c117293SSascha Wildner 		}
265*8c117293SSascha Wildner 	} else {
266*8c117293SSascha Wildner 		return likely(stored == (full & (((passwdqc_filter_f_t)1 << 33) - 1)));
267*8c117293SSascha Wildner 	}
268*8c117293SSascha Wildner }
269*8c117293SSascha Wildner 
passwdqc_filter_md4(passwdqc_filter_hash_t * dst,const char * src)270*8c117293SSascha Wildner static inline void passwdqc_filter_md4(passwdqc_filter_hash_t *dst, const char *src)
271*8c117293SSascha Wildner {
272*8c117293SSascha Wildner 	MD4_CTX ctx;
273*8c117293SSascha Wildner 	MD4_Init(&ctx);
274*8c117293SSascha Wildner 	MD4_Update(&ctx, src, strcspn(src, "\n\r"));
275*8c117293SSascha Wildner 	MD4_Final(dst->uc, &ctx);
276*8c117293SSascha Wildner }
277*8c117293SSascha Wildner 
passwdqc_filter_ntlm_cp1252(passwdqc_filter_hash_t * dst,const char * src)278*8c117293SSascha Wildner static inline void passwdqc_filter_ntlm_cp1252(passwdqc_filter_hash_t *dst, const char *src)
279*8c117293SSascha Wildner {
280*8c117293SSascha Wildner /*
281*8c117293SSascha Wildner  * 5 of these codes are undefined in CP1252.  We let the original single-byte
282*8c117293SSascha Wildner  * values for them pass through, which appears to match how the HIBP v7 NTLM
283*8c117293SSascha Wildner  * hashes were generated.
284*8c117293SSascha Wildner  */
285*8c117293SSascha Wildner 	static const uint16_t c1[] = {
286*8c117293SSascha Wildner 		0x20ac, 0x81, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
287*8c117293SSascha Wildner 		0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x8d, 0x017d, 0x8f,
288*8c117293SSascha Wildner 		0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
289*8c117293SSascha Wildner 		0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x9d, 0x017e, 0x0178
290*8c117293SSascha Wildner 	};
291*8c117293SSascha Wildner 
292*8c117293SSascha Wildner 	MD4_CTX ctx;
293*8c117293SSascha Wildner 	MD4_Init(&ctx);
294*8c117293SSascha Wildner 	while (*src != '\n' && *src != '\r' && *src) {
295*8c117293SSascha Wildner 		unsigned int c = *(unsigned char *)src++;
296*8c117293SSascha Wildner 		if (c - 128 < sizeof(c1) / sizeof(c1[0]))
297*8c117293SSascha Wildner 			c = c1[c - 128];
298*8c117293SSascha Wildner #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
299*8c117293SSascha Wildner 		MD4_Update(&ctx, &c, 2);
300*8c117293SSascha Wildner #else
301*8c117293SSascha Wildner 		uint8_t ucs2[2] = {c, c >> 8};
302*8c117293SSascha Wildner 		MD4_Update(&ctx, ucs2, 2);
303*8c117293SSascha Wildner #endif
304*8c117293SSascha Wildner 	}
305*8c117293SSascha Wildner 	MD4_Final(dst->uc, &ctx);
306*8c117293SSascha Wildner }
307*8c117293SSascha Wildner 
308*8c117293SSascha Wildner #endif /* PASSWDQC_FILTER_INTERNALS */
309*8c117293SSascha Wildner #endif /* PASSWDQC_FILTER_H__ */
310