1 /*
2  * Copyright (c) 2000-2002,2010,2013,2016,2020 by Solar Designer.  See LICENSE.
3  */
4 
5 #ifdef _MSC_VER
6 #define _CRT_SECURE_NO_WARNINGS /* we use fopen(), sprintf(), strncat() */
7 #endif
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 
14 #include "passwdqc.h" /* also provides <pwd.h> or equivalent "struct passwd" */
15 #include "passwdqc_filter.h"
16 #include "wordset_4k.h"
17 
18 #include "passwdqc_i18n.h"
19 
20 #define REASON_ERROR \
21 	_("check failed")
22 
23 #define REASON_SAME \
24 	_("is the same as the old one")
25 #define REASON_SIMILAR \
26 	_("is based on the old one")
27 
28 #define REASON_SHORT \
29 	_("too short")
30 #define REASON_LONG \
31 	_("too long")
32 
33 #define REASON_SIMPLESHORT \
34 	_("not enough different characters or classes for this length")
35 #define REASON_SIMPLE \
36 	_("not enough different characters or classes")
37 
38 #define REASON_PERSONAL \
39 	_("based on personal login information")
40 
41 #define REASON_WORD \
42 	_("based on a dictionary word and not a passphrase")
43 
44 #define REASON_SEQ \
45 	_("based on a common sequence of characters and not a passphrase")
46 
47 #define REASON_WORDLIST \
48 	_("based on a word list entry")
49 
50 #define REASON_DENYLIST \
51 	_("is in deny list")
52 
53 #define REASON_FILTER \
54 	_("appears to be in a database")
55 
56 #define FIXED_BITS			15
57 
58 typedef unsigned long fixed;
59 
60 /*
61  * Calculates the expected number of different characters for a random
62  * password of a given length.  The result is rounded down.  We use this
63  * with the _requested_ minimum length (so longer passwords don't have
64  * to meet this strict requirement for their length).
65  */
66 static int expected_different(int charset, int length)
67 {
68 	fixed x, y, z;
69 
70 	x = ((fixed)(charset - 1) << FIXED_BITS) / charset;
71 	y = x;
72 	while (--length > 0)
73 		y = (y * x) >> FIXED_BITS;
74 	z = (fixed)charset * (((fixed)1 << FIXED_BITS) - y);
75 
76 	return (int)(z >> FIXED_BITS);
77 }
78 
79 /*
80  * A password is too simple if it is too short for its class, or doesn't
81  * contain enough different characters for its class, or doesn't contain
82  * enough words for a passphrase.
83  *
84  * The biases are added to the length, and they may be positive or negative.
85  * The passphrase length check uses passphrase_bias instead of bias so that
86  * zero may be passed for this parameter when the (other) bias is non-zero
87  * because of a dictionary word, which is perfectly normal for a passphrase.
88  * The biases do not affect the number of different characters, character
89  * classes, and word count.
90  */
91 static int is_simple(const passwdqc_params_qc_t *params, const char *newpass,
92     int bias, int passphrase_bias)
93 {
94 	int length, classes, words, chars;
95 	int digits, lowers, uppers, others, unknowns;
96 	int c, p;
97 
98 	length = classes = words = chars = 0;
99 	digits = lowers = uppers = others = unknowns = 0;
100 	p = ' ';
101 	while ((c = (unsigned char)newpass[length])) {
102 		length++;
103 
104 		if (!isascii(c))
105 			unknowns++;
106 		else if (isdigit(c))
107 			digits++;
108 		else if (islower(c))
109 			lowers++;
110 		else if (isupper(c))
111 			uppers++;
112 		else
113 			others++;
114 
115 /* A word starts when a letter follows a non-letter or when a non-ASCII
116  * character follows a space character.  We treat all non-ASCII characters
117  * as non-spaces, which is not entirely correct (there's the non-breaking
118  * space character at 0xa0, 0x9a, or 0xff), but it should not hurt. */
119 		if (isascii(p)) {
120 			if (isascii(c)) {
121 				if (isalpha(c) && !isalpha(p))
122 					words++;
123 			} else if (isspace(p))
124 				words++;
125 		}
126 		p = c;
127 
128 /* Count this character just once: when we're not going to see it anymore */
129 		if (!strchr(&newpass[length], c))
130 			chars++;
131 	}
132 
133 	if (!length)
134 		return 1;
135 
136 /* Upper case characters and digits used in common ways don't increase the
137  * strength of a password */
138 	c = (unsigned char)newpass[0];
139 	if (uppers && isascii(c) && isupper(c))
140 		uppers--;
141 	c = (unsigned char)newpass[length - 1];
142 	if (digits && isascii(c) && isdigit(c))
143 		digits--;
144 
145 /* Count the number of different character classes we've seen.  We assume
146  * that there are no non-ASCII characters for digits. */
147 	classes = 0;
148 	if (digits)
149 		classes++;
150 	if (lowers)
151 		classes++;
152 	if (uppers)
153 		classes++;
154 	if (others)
155 		classes++;
156 	if (unknowns && classes <= 1 && (!classes || digits || words >= 2))
157 		classes++;
158 
159 	for (; classes > 0; classes--)
160 	switch (classes) {
161 	case 1:
162 		if (length + bias >= params->min[0] &&
163 		    chars >= expected_different(10, params->min[0]) - 1)
164 			return 0;
165 		return 1;
166 
167 	case 2:
168 		if (length + bias >= params->min[1] &&
169 		    chars >= expected_different(36, params->min[1]) - 1)
170 			return 0;
171 		if (!params->passphrase_words ||
172 		    words < params->passphrase_words)
173 			continue;
174 		if (length + passphrase_bias >= params->min[2] &&
175 		    chars >= expected_different(27, params->min[2]) - 1)
176 			return 0;
177 		continue;
178 
179 	case 3:
180 		if (length + bias >= params->min[3] &&
181 		    chars >= expected_different(62, params->min[3]) - 1)
182 			return 0;
183 		continue;
184 
185 	case 4:
186 		if (length + bias >= params->min[4] &&
187 		    chars >= expected_different(95, params->min[4]) - 1)
188 			return 0;
189 		continue;
190 	}
191 
192 	return 1;
193 }
194 
195 static char *unify(char *dst, const char *src)
196 {
197 	const char *sptr;
198 	char *dptr;
199 	int c;
200 
201 	if (!dst && !(dst = malloc(strlen(src) + 1)))
202 		return NULL;
203 
204 	sptr = src;
205 	dptr = dst;
206 	do {
207 		c = (unsigned char)*sptr;
208 		if (isascii(c) && isupper(c))
209 			c = tolower(c);
210 		switch (c) {
211 		case 'a': case '@':
212 			c = '4'; break;
213 		case 'e':
214 			c = '3'; break;
215 /* Unfortunately, if we translate both 'i' and 'l' to '1', this would
216  * associate these two letters with each other - e.g., "mile" would
217  * match "MLLE", which is undesired.  To solve this, we'd need to test
218  * different translations separately, which is not implemented yet. */
219 		case 'i': case '|':
220 			c = '!'; break;
221 		case 'l':
222 			c = '1'; break;
223 		case 'o':
224 			c = '0'; break;
225 		case 's': case '$':
226 			c = '5'; break;
227 		case 't': case '+':
228 			c = '7'; break;
229 		}
230 		*dptr++ = c;
231 	} while (*sptr++);
232 
233 	return dst;
234 }
235 
236 static char *reverse(const char *src)
237 {
238 	const char *sptr;
239 	char *dst, *dptr;
240 
241 	if (!(dst = malloc(strlen(src) + 1)))
242 		return NULL;
243 
244 	sptr = &src[strlen(src)];
245 	dptr = dst;
246 	while (sptr > src)
247 		*dptr++ = *--sptr;
248 	*dptr = '\0';
249 
250 	return dst;
251 }
252 
253 static void clean(char *dst)
254 {
255 	if (!dst)
256 		return;
257 	_passwdqc_memzero(dst, strlen(dst));
258 	free(dst);
259 }
260 
261 /*
262  * Needle is based on haystack if both contain a long enough common
263  * substring and needle would be too simple for a password with the
264  * substring either removed with partial length credit for it added
265  * or partially discounted for the purpose of the length check.
266  */
267 static int is_based(const passwdqc_params_qc_t *params,
268     const char *haystack, const char *needle, const char *original,
269     int mode)
270 {
271 	char *scratch;
272 	int length;
273 	int i, j;
274 	const char *p;
275 	int worst_bias;
276 
277 	if (!params->match_length)	/* disabled */
278 		return 0;
279 
280 	if (params->match_length < 0)	/* misconfigured */
281 		return 1;
282 
283 	scratch = NULL;
284 	worst_bias = 0;
285 
286 	length = (int)strlen(needle);
287 	for (i = 0; i <= length - params->match_length; i++)
288 	for (j = params->match_length; i + j <= length; j++) {
289 		int bias = 0, j1 = j - 1;
290 		const char q0 = needle[i], *q1 = &needle[i + 1];
291 		for (p = haystack; *p; p++)
292 		if (*p == q0 && !strncmp(p + 1, q1, j1)) { /* or memcmp() */
293 			if ((mode & 0xff) == 0) { /* remove & credit */
294 				if (!scratch) {
295 					if (!(scratch = malloc(length + 1)))
296 						return 1;
297 				}
298 				/* remove j chars */
299 				{
300 					int pos = length - (i + j);
301 					if (!(mode & 0x100)) /* not reversed */
302 						pos = i;
303 					memcpy(scratch, original, pos);
304 					memcpy(&scratch[pos],
305 					    &original[pos + j],
306 					    length + 1 - (pos + j));
307 				}
308 				/* add credit for match_length - 1 chars */
309 				bias = params->match_length - 1;
310 				if (is_simple(params, scratch, bias, bias)) {
311 					clean(scratch);
312 					return 1;
313 				}
314 			} else { /* discount */
315 /* Require a 1 character longer match for substrings containing leetspeak
316  * when matching against dictionary words */
317 				bias = -1;
318 				if ((mode & 0xff) == 1) { /* words */
319 					int pos = i, end = i + j;
320 					if (mode & 0x100) { /* reversed */
321 						pos = length - end;
322 						end = length - i;
323 					}
324 					for (; pos < end; pos++)
325 					if (!isalpha((int)(unsigned char)
326 					    original[pos])) {
327 						if (j == params->match_length)
328 							goto next_match_length;
329 						bias = 0;
330 						break;
331 					}
332 				}
333 
334 				/* discount j - (match_length + bias) chars */
335 				bias += (int)params->match_length - j;
336 				/* bias <= -1 */
337 				if (bias < worst_bias) {
338 					if (is_simple(params, original, bias,
339 					    (mode & 0xff) == 1 ? 0 : bias))
340 						return 1;
341 					worst_bias = bias;
342 				}
343 			}
344 		}
345 /* Zero bias implies that there were no matches for this length.  If so,
346  * there's no reason to try the next substring length (it would result in
347  * no matches as well).  We break out of the substring length loop and
348  * proceed with all substring lengths for the next position in needle. */
349 		if (!bias)
350 			break;
351 next_match_length:
352 		;
353 	}
354 
355 	clean(scratch);
356 
357 	return 0;
358 }
359 
360 #define READ_LINE_MAX 8192
361 #define READ_LINE_SIZE (READ_LINE_MAX + 2)
362 
363 static char *read_line(FILE *f, char *buf)
364 {
365 	buf[READ_LINE_MAX] = '\n';
366 
367 	if (!fgets(buf, READ_LINE_SIZE, f))
368 		return NULL;
369 
370 	if (buf[READ_LINE_MAX] != '\n') {
371 		int c;
372 		do {
373 			c = getc(f);
374 		} while (c != EOF && c != '\n');
375 		if (ferror(f))
376 			return NULL;
377 	}
378 
379 	char *p;
380 	if ((p = strpbrk(buf, "\r\n")))
381 		*p = '\0';
382 
383 	return buf;
384 }
385 
386 /*
387  * Common sequences of characters.
388  * We don't need to list any of the entire strings in reverse order because the
389  * code checks the new password in both "unified" and "unified and reversed"
390  * form against these strings (unifying them first indeed).  We also don't have
391  * to include common repeats of characters (e.g., "777", "!!!", "1000") because
392  * these are often taken care of by the requirement on the number of different
393  * characters.
394  */
395 const char * const seq[] = {
396 	"0123456789",
397 	"`1234567890-=",
398 	"~!@#$%^&*()_+",
399 	"abcdefghijklmnopqrstuvwxyz",
400 	"a1b2c3d4e5f6g7h8i9j0",
401 	"1a2b3c4d5e6f7g8h9i0j",
402 	"abc123",
403 	"qwertyuiop[]\\asdfghjkl;'zxcvbnm,./",
404 	"qwertyuiop{}|asdfghjkl:\"zxcvbnm<>?",
405 	"qwertyuiopasdfghjklzxcvbnm",
406 	"1qaz2wsx3edc4rfv5tgb6yhn7ujm8ik,9ol.0p;/-['=]\\",
407 	"!qaz@wsx#edc$rfv%tgb^yhn&ujm*ik<(ol>)p:?_{\"+}|",
408 	"qazwsxedcrfvtgbyhnujmikolp",
409 	"1q2w3e4r5t6y7u8i9o0p-[=]",
410 	"q1w2e3r4t5y6u7i8o9p0[-]=\\",
411 	"1qaz1qaz",
412 	"1qaz!qaz", /* can't unify '1' and '!' - see comment in unify() */
413 	"1qazzaq1",
414 	"zaq!1qaz",
415 	"zaq!2wsx"
416 };
417 
418 /*
419  * This wordlist check is now the least important given the checks above
420  * and the support for passphrases (which are based on dictionary words,
421  * and checked by other means).  It is still useful to trap simple short
422  * passwords (if short passwords are allowed) that are word-based, but
423  * passed the other checks due to uncommon capitalization, digits, and
424  * special characters.  We (mis)use the same set of words that are used
425  * to generate random passwords.  This list is much smaller than those
426  * used for password crackers, and it doesn't contain common passwords
427  * that aren't short English words.  We also support optional external
428  * wordlist (for inexact matching) and deny list (for exact matching).
429  */
430 static const char *is_word_based(const passwdqc_params_qc_t *params,
431     const char *unified, const char *reversed, const char *original)
432 {
433 	const char *reason = REASON_ERROR;
434 	char word[WORDSET_4K_LENGTH_MAX + 1], *buf = NULL;
435 	FILE *f = NULL;
436 	unsigned int i;
437 
438 	word[WORDSET_4K_LENGTH_MAX] = '\0';
439 	if (params->match_length)
440 	for (i = 0; _passwdqc_wordset_4k[i][0]; i++) {
441 		memcpy(word, _passwdqc_wordset_4k[i], WORDSET_4K_LENGTH_MAX);
442 		int length = (int)strlen(word);
443 		if (length < params->match_length)
444 			continue;
445 		if (!memcmp(word, _passwdqc_wordset_4k[i + 1], length))
446 			continue;
447 		unify(word, word);
448 		if (is_based(params, word, unified, original, 1) ||
449 		    is_based(params, word, reversed, original, 0x101)) {
450 			reason = REASON_WORD;
451 			goto out;
452 		}
453 	}
454 
455 	if (params->match_length)
456 	for (i = 0; i < sizeof(seq) / sizeof(seq[0]); i++) {
457 		char *seq_i = unify(NULL, seq[i]);
458 		if (!seq_i)
459 			goto out;
460 		if (is_based(params, seq_i, unified, original, 2) ||
461 		    is_based(params, seq_i, reversed, original, 0x102)) {
462 			clean(seq_i);
463 			reason = REASON_SEQ;
464 			goto out;
465 		}
466 		clean(seq_i);
467 	}
468 
469 	if (params->match_length && params->match_length <= 4)
470 	for (i = 1900; i <= 2039; i++) {
471 		sprintf(word, "%u", i);
472 		if (is_based(params, word, unified, original, 2) ||
473 		    is_based(params, word, reversed, original, 0x102)) {
474 			reason = REASON_SEQ;
475 			goto out;
476 		}
477 	}
478 
479 	if (params->wordlist || params->denylist)
480 		if (!(buf = malloc(READ_LINE_SIZE)))
481 			goto out;
482 
483 	if (params->wordlist) {
484 		if (!(f = fopen(params->wordlist, "r")))
485 			goto out;
486 		while (read_line(f, buf)) {
487 			unify(buf, buf);
488 			if (!strcmp(buf, unified) || !strcmp(buf, reversed))
489 				goto out_wordlist;
490 			if (!params->match_length ||
491 			    strlen(buf) < (size_t)params->match_length)
492 				continue;
493 			if (is_based(params, buf, unified, original, 1) ||
494 			    is_based(params, buf, reversed, original, 0x101)) {
495 out_wordlist:
496 				reason = REASON_WORDLIST;
497 				goto out;
498 			}
499 		}
500 		if (ferror(f))
501 			goto out;
502 		fclose(f); f = NULL;
503 	}
504 
505 	if (params->denylist) {
506 		if (!(f = fopen(params->denylist, "r")))
507 			goto out;
508 		while (read_line(f, buf)) {
509 			if (!strcmp(buf, original)) {
510 				reason = REASON_DENYLIST;
511 				goto out;
512 			}
513 		}
514 		if (ferror(f))
515 			goto out;
516 	}
517 
518 	reason = NULL;
519 
520 out:
521 	if (f)
522 		fclose(f);
523 	if (buf) {
524 		_passwdqc_memzero(buf, READ_LINE_SIZE);
525 		free(buf);
526 	}
527 	_passwdqc_memzero(word, sizeof(word));
528 	return reason;
529 }
530 
531 const char *passwdqc_check(const passwdqc_params_qc_t *params,
532     const char *newpass, const char *oldpass, const struct passwd *pw)
533 {
534 	char truncated[9];
535 	char *u_newpass = NULL, *u_reversed = NULL;
536 	char *u_oldpass = NULL;
537 	char *u_name = NULL, *u_gecos = NULL, *u_dir = NULL;
538 	const char *reason = REASON_ERROR;
539 
540 	size_t length = strlen(newpass);
541 
542 	if (length < (size_t)params->min[4]) {
543 		reason = REASON_SHORT;
544 		goto out;
545 	}
546 
547 	if (length > 10000) {
548 		reason = REASON_LONG;
549 		goto out;
550 	}
551 
552 	if (length > (size_t)params->max) {
553 		if (params->max == 8) {
554 			truncated[0] = '\0';
555 			strncat(truncated, newpass, 8);
556 			newpass = truncated;
557 			length = 8;
558 			if (oldpass && !strncmp(oldpass, newpass, 8)) {
559 				reason = REASON_SAME;
560 				goto out;
561 			}
562 		} else {
563 			reason = REASON_LONG;
564 			goto out;
565 		}
566 	}
567 
568 	if (oldpass && !strcmp(oldpass, newpass)) {
569 		reason = REASON_SAME;
570 		goto out;
571 	}
572 
573 	if (is_simple(params, newpass, 0, 0)) {
574 		reason = REASON_SIMPLE;
575 		if (length < (size_t)params->min[1] &&
576 		    params->min[1] <= params->max)
577 			reason = REASON_SIMPLESHORT;
578 		goto out;
579 	}
580 
581 	if (!(u_newpass = unify(NULL, newpass)))
582 		goto out; /* REASON_ERROR */
583 	if (!(u_reversed = reverse(u_newpass)))
584 		goto out;
585 	if (oldpass && !(u_oldpass = unify(NULL, oldpass)))
586 		goto out;
587 	if (pw) {
588 		if (!(u_name = unify(NULL, pw->pw_name)) ||
589 		    !(u_gecos = unify(NULL, pw->pw_gecos)) ||
590 		    !(u_dir = unify(NULL, pw->pw_dir)))
591 			goto out;
592 	}
593 
594 	if (oldpass && params->similar_deny &&
595 	    (is_based(params, u_oldpass, u_newpass, newpass, 0) ||
596 	     is_based(params, u_oldpass, u_reversed, newpass, 0x100))) {
597 		reason = REASON_SIMILAR;
598 		goto out;
599 	}
600 
601 	if (pw &&
602 	    (is_based(params, u_name, u_newpass, newpass, 0) ||
603 	     is_based(params, u_name, u_reversed, newpass, 0x100) ||
604 	     is_based(params, u_gecos, u_newpass, newpass, 0) ||
605 	     is_based(params, u_gecos, u_reversed, newpass, 0x100) ||
606 	     is_based(params, u_dir, u_newpass, newpass, 0) ||
607 	     is_based(params, u_dir, u_reversed, newpass, 0x100))) {
608 		reason = REASON_PERSONAL;
609 		goto out;
610 	}
611 
612 	reason = is_word_based(params, u_newpass, u_reversed, newpass);
613 
614 	if (!reason && params->filter) {
615 		passwdqc_filter_t flt;
616 		reason = REASON_ERROR;
617 		if (passwdqc_filter_open(&flt, params->filter))
618 			goto out;
619 		int result = passwdqc_filter_lookup(&flt, newpass);
620 		passwdqc_filter_close(&flt);
621 		if (result < 0)
622 			goto out;
623 		reason = result ? REASON_FILTER : NULL;
624 	}
625 
626 out:
627 	_passwdqc_memzero(truncated, sizeof(truncated));
628 	clean(u_newpass);
629 	clean(u_reversed);
630 	clean(u_oldpass);
631 	clean(u_name);
632 	clean(u_gecos);
633 	clean(u_dir);
634 
635 	return reason;
636 }
637