xref: /dragonfly/sys/kern/kern_nrandom.c (revision 61c0377f)
1 /*
2  * Copyright (c) 2004, 2005, 2006 Robin J Carey. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions, and the following disclaimer,
9  *    without modification, immediately at the beginning of the file.
10  * 2. The name of the author may not be used to endorse or promote products
11  *    derived from this software without specific prior written permission.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
17  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 /*			   --- NOTES ---
26  *
27  * Note: The word "entropy" is often incorrectly used to describe
28  * random data. The word "entropy" originates from the science of
29  * Physics. The correct descriptive definition would be something
30  * along the lines of "seed", "unpredictable numbers" or
31  * "unpredictable data".
32  *
33  * Note: Some /dev/[u]random implementations save "seed" between
34  * boots which represents a security hazard since an adversary
35  * could acquire this data (since it is stored in a file). If
36  * the unpredictable data used in the above routines is only
37  * generated during Kernel operation, then an adversary can only
38  * acquire that data through a Kernel security compromise and/or
39  * a cryptographic algorithm failure/cryptanalysis.
40  *
41  * Note: On FreeBSD-4.11, interrupts have to be manually enabled
42  * using the rndcontrol(8) command.
43  *
44  *		--- DESIGN (FreeBSD-4.11 based) ---
45  *
46  *   The rnddev module automatically initializes itself the first time
47  * it is used (client calls any public rnddev_*() interface routine).
48  * Both CSPRNGs are initially seeded from the precise nano[up]time() routines.
49  * Tests show this method produces good enough results, suitable for intended
50  * use. It is necessary for both CSPRNGs to be completely seeded, initially.
51  *
52  *   After initialization and during Kernel operation the only suitable
53  * unpredictable data available is:
54  *
55  *	(1) Keyboard scan-codes.
56  *	(2) Nanouptime acquired by a Keyboard/Read-Event.
57  *	(3) Suitable interrupt source; hard-disk/ATA-device.
58  *
59  *      (X) Mouse-event (xyz-data unsuitable); NOT IMPLEMENTED.
60  *
61  *   This data is added to both CSPRNGs in real-time as it happens/
62  * becomes-available. Additionally, unpredictable (?) data may be
63  * acquired from a true-random number generator if such a device is
64  * available to the system (not advisable !).
65  *   Nanouptime() acquired by a Read-Event is a very important aspect of
66  * this design, since it ensures that unpredictable data is added to
67  * the CSPRNGs even if there are no other sources.
68  *   The nanouptime() Kernel routine is used since time relative to
69  * boot is less adversary-known than time itself.
70  *
71  *   This design has been thoroughly tested with debug logging
72  * and the output from both /dev/random and /dev/urandom has
73  * been tested with the DIEHARD test-suite; both pass.
74  *
75  * MODIFICATIONS MADE TO ORIGINAL "kern_random.c":
76  *
77  * 6th July 2005:
78  *
79  * o Changed ReadSeed() function to schedule future read-seed-events
80  *   by at least one second. Previous implementation used a randomised
81  *   scheduling { 0, 1, 2, 3 seconds }.
82  * o Changed SEED_NANOUP() function to use a "previous" accumulator
83  *   algorithm similar to ReadSeed(). This ensures that there is no
84  *   way that an adversary can tell what number is being added to the
85  *   CSPRNGs, since the number added to the CSPRNGs at Event-Time is
86  *   the sum of nanouptime()@Event and an unknown/secret number.
87  * o Changed rnddev_add_interrupt() function to schedule future
88  *   interrupt-events by at least one second. Previous implementation
89  *   had no scheduling algorithm which allowed an "interrupt storm"
90  *   to occur resulting in skewed data entering into the CSPRNGs.
91  *
92  *
93  * 9th July 2005:
94  *
95  * o Some small cleanups and change all internal functions to be
96  *   static/private.
97  * o Removed ReadSeed() since its functionality is already performed
98  *   by another function { rnddev_add_interrupt_OR_read() } and remove
99  *   the silly rndByte accumulator/feedback-thing (since multipying by
100  *   rndByte could yield a value of 0).
101  * o Made IBAA/L14 public interface become static/private;
102  *   Local to this file (not changed to that in the original C modules).
103  *
104  * 16th July 2005:
105  *
106  * o SEED_NANOUP() -> NANOUP_EVENT() function rename.
107  * o Make NANOUP_EVENT() handle the time-buffering directly so that all
108  *   time-stamp-events use this single time-buffer (including keyboard).
109  *   This removes dependancy on "time_second" Kernel variable.
110  * o Removed second-time-buffer code in rnddev_add_interrupt_OR_read (void).
111  * o Rewrote the time-buffering algorithm in NANOUP_EVENT() to use a
112  *   randomised time-delay range.
113  *
114  * 12th Dec 2005:
115  *
116  * o Updated to (hopefully final) L15 algorithm.
117  *
118  * 12th June 2006:
119  *
120  * o Added missing (u_char *) cast in RnddevRead() function.
121  * o Changed copyright to 3-clause BSD license and cleaned up the layout
122  *   of this file.
123  */
124 
125 #include <sys/types.h>
126 #include <sys/kernel.h>
127 #include <sys/systm.h>
128 #include <sys/poll.h>
129 #include <sys/event.h>
130 #include <sys/random.h>
131 #include <sys/systimer.h>
132 #include <sys/time.h>
133 #include <sys/proc.h>
134 #include <sys/lock.h>
135 #include <sys/sysctl.h>
136 #include <sys/spinlock.h>
137 #include <machine/clock.h>
138 
139 #include <sys/thread2.h>
140 #include <sys/spinlock2.h>
141 #include <sys/mplock2.h>
142 
143 /*
144  * Portability note: The u_char/unsigned char type is used where
145  * uint8_t from <stdint.h> or u_int8_t from <sys/types.h> should really
146  * be being used. On FreeBSD, it is safe to make the assumption that these
147  * different types are equivalent (on all architectures).
148  * The FreeBSD <sys/crypto/rc4> module also makes this assumption.
149  */
150 
151 /*------------------------------ IBAA ----------------------------------*/
152 
153 /*-------------------------- IBAA CSPRNG -------------------------------*/
154 
155 /*
156  * NOTE: The original source code from which this source code (IBAA)
157  *       was taken has no copyright/license. The algorithm has no patent
158  *       and is freely/publicly available from:
159  *
160  *           http://www.burtleburtle.net/bob/rand/isaac.html
161  */
162 
163 /*
164  * ^ means XOR, & means bitwise AND, a<<b means shift a by b.
165  * barrel(a) shifts a 19 bits to the left, and bits wrap around
166  * ind(x) is (x AND 255), or (x mod 256)
167  */
168 typedef	u_int32_t	u4;   /* unsigned four bytes, 32 bits */
169 
170 #define	ALPHA		(8)
171 #define	SIZE		(1 << ALPHA)
172 #define MASK		(SIZE - 1)
173 #define	ind(x)		((x) & (SIZE - 1))
174 #define	barrel(a)	(((a) << 20) ^ ((a) >> 12))  /* beta=32,shift=20 */
175 
176 static void IBAA
177 (
178 	u4 *m,		/* Memory: array of SIZE ALPHA-bit terms */
179 	u4 *r,		/* Results: the sequence, same size as m */
180 	u4 *aa,		/* Accumulator: a single value */
181 	u4 *bb,		/* the previous result */
182 	u4 *counter	/* counter */
183 )
184 {
185 	u4 a, b, x, y, i;
186 
187 	a = *aa;
188 	b = *bb + *counter;
189 	++*counter;
190 	for (i = 0; i < SIZE; ++i) {
191 		x = m[i];
192 		a = barrel(a) + m[ind(i + (SIZE / 2))];	/* set a */
193 		m[i] = y = m[ind(x)] + a + b;		/* set m */
194 		r[i] = b = m[ind(y >> ALPHA)] + x;	/* set r */
195 	}
196 	*bb = b; *aa = a;
197 }
198 
199 /*-------------------------- IBAA CSPRNG -------------------------------*/
200 
201 
202 static u4	IBAA_memory[SIZE];
203 static u4	IBAA_results[SIZE];
204 static u4	IBAA_aa;
205 static u4	IBAA_bb;
206 static u4	IBAA_counter;
207 
208 static volatile int IBAA_byte_index;
209 
210 
211 static void	IBAA_Init(void);
212 static void	IBAA_Call(void);
213 static void	IBAA_Seed(const u_int32_t val);
214 static u_char	IBAA_Byte(void);
215 
216 /*
217  * Initialize IBAA.
218  */
219 static void
220 IBAA_Init(void)
221 {
222 	size_t	i;
223 
224 	for (i = 0; i < SIZE; ++i) {
225 		IBAA_memory[i] = i;
226 	}
227 	IBAA_aa = IBAA_bb = 0;
228 	IBAA_counter = 0;
229 	IBAA_byte_index = sizeof(IBAA_results);	/* force IBAA_Call() */
230 }
231 
232 /*
233  * PRIVATE: Call IBAA to produce 256 32-bit u4 results.
234  */
235 static void
236 IBAA_Call (void)
237 {
238 	IBAA(IBAA_memory, IBAA_results, &IBAA_aa, &IBAA_bb, &IBAA_counter);
239 	IBAA_byte_index = 0;
240 }
241 
242 /*
243  * Add a 32-bit u4 seed value into IBAAs memory.  Mix the low 4 bits
244  * with 4 bits of PNG data to reduce the possibility of a seeding-based
245  * attack.
246  */
247 static void
248 IBAA_Seed (const u_int32_t val)
249 {
250 	static int memIndex;
251 	u4 *iptr;
252 
253 	iptr = &IBAA_memory[memIndex & MASK];
254 	*iptr = ((*iptr << 3) | (*iptr >> 29)) + (val ^ (IBAA_Byte() & 15));
255 	++memIndex;
256 }
257 
258 /*
259  * Extract a byte from IBAAs 256 32-bit u4 results array.
260  *
261  * NOTE: This code is designed to prevent MP races from taking
262  * IBAA_byte_index out of bounds.
263  */
264 static u_char
265 IBAA_Byte(void)
266 {
267 	u_char result;
268 	int index;
269 
270 	index = IBAA_byte_index;
271 	if (index == sizeof(IBAA_results)) {
272 		IBAA_Call();
273 		index = 0;
274 	}
275 	result = ((u_char *)IBAA_results)[index];
276 	IBAA_byte_index = index + 1;
277 	return result;
278 }
279 
280 /*------------------------------ IBAA ----------------------------------*/
281 
282 
283 /*------------------------------- L15 ----------------------------------*/
284 
285 /*
286  * IMPORTANT NOTE: LByteType must be exactly 8-bits in size or this software
287  * will not function correctly.
288  */
289 typedef unsigned char	LByteType;
290 
291 #define	L15_STATE_SIZE	256
292 
293 static LByteType	L15_x, L15_y;
294 static LByteType	L15_start_x;
295 static LByteType	L15_state[L15_STATE_SIZE];
296 
297 /*
298  * PRIVATE FUNCS:
299  */
300 
301 static void		L15_Swap(const LByteType pos1, const LByteType pos2);
302 static void		L15_InitState(void);
303 static void		L15_KSA(const LByteType * const key,
304 				const size_t keyLen);
305 static void		L15_Discard(const LByteType numCalls);
306 
307 /*
308  * PUBLIC INTERFACE:
309  */
310 static void		L15(const LByteType * const key, const size_t keyLen);
311 static LByteType	L15_Byte(void);
312 static void		L15_Vector(const LByteType * const key,
313 				const size_t keyLen);
314 
315 static __inline void
316 L15_Swap(const LByteType pos1, const LByteType pos2)
317 {
318 	const LByteType	save1 = L15_state[pos1];
319 
320 	L15_state[pos1] = L15_state[pos2];
321 	L15_state[pos2] = save1;
322 }
323 
324 static void
325 L15_InitState (void)
326 {
327 	size_t i;
328 	for (i = 0; i < L15_STATE_SIZE; ++i)
329 		L15_state[i] = i;
330 }
331 
332 #define  L_SCHEDULE(xx)						\
333 								\
334 for (i = 0; i < L15_STATE_SIZE; ++i) {				\
335     L15_Swap(i, (stateIndex += (L15_state[i] + (xx))));		\
336 }
337 
338 static void
339 L15_KSA (const LByteType * const key, const size_t keyLen)
340 {
341 	size_t	i, keyIndex;
342 	LByteType stateIndex = 0;
343 
344 	L_SCHEDULE(keyLen);
345 	for (keyIndex = 0; keyIndex < keyLen; ++keyIndex) {
346 		L_SCHEDULE(key[keyIndex]);
347 	}
348 }
349 
350 static void
351 L15_Discard(const LByteType numCalls)
352 {
353 	LByteType i;
354 	for (i = 0; i < numCalls; ++i) {
355 		(void)L15_Byte();
356 	}
357 }
358 
359 
360 /*
361  * PUBLIC INTERFACE:
362  */
363 static void
364 L15(const LByteType * const key, const size_t keyLen)
365 {
366 	L15_x = L15_start_x = 0;
367 	L15_y = L15_STATE_SIZE - 1;
368 	L15_InitState();
369 	L15_KSA(key, keyLen);
370 	L15_Discard(L15_Byte());
371 }
372 
373 static LByteType
374 L15_Byte(void)
375 {
376 	LByteType z;
377 
378 	L15_Swap(L15_state[L15_x], L15_y);
379 	z = (L15_state [L15_x++] + L15_state[L15_y--]);
380 	if (L15_x == L15_start_x) {
381 		--L15_y;
382 	}
383 	return (L15_state[z]);
384 }
385 
386 static void
387 L15_Vector (const LByteType * const key, const size_t keyLen)
388 {
389 	L15_KSA(key, keyLen);
390 }
391 
392 /*------------------------------- L15 ----------------------------------*/
393 
394 /************************************************************************
395  *				KERNEL INTERFACE			*
396  ************************************************************************
397  *
398  * By Robin J Carey and Matthew Dillon.
399  */
400 
401 static int rand_thread_signal = 1;
402 static void NANOUP_EVENT(void);
403 static thread_t rand_td;
404 static struct spinlock rand_spin;
405 
406 static int sysctl_kern_random(SYSCTL_HANDLER_ARGS);
407 
408 static int nrandevents;
409 SYSCTL_INT(_kern, OID_AUTO, nrandevents, CTLFLAG_RD, &nrandevents, 0, "");
410 SYSCTL_PROC(_kern, OID_AUTO, random, CTLFLAG_RD | CTLFLAG_ANYBODY, 0, 0,
411 		sysctl_kern_random, "I", "Acquire random data");
412 
413 /*
414  * Called from early boot
415  */
416 void
417 rand_initialize(void)
418 {
419 	struct timespec	now;
420 	int i;
421 
422 	spin_init(&rand_spin);
423 
424 	/* Initialize IBAA. */
425 	IBAA_Init();
426 
427 	/* Initialize L15. */
428 	nanouptime(&now);
429 	L15((const LByteType *)&now.tv_nsec, sizeof(now.tv_nsec));
430 	for (i = 0; i < (SIZE / 2); ++i) {
431 		nanotime(&now);
432 		IBAA_Seed(now.tv_nsec);
433 		L15_Vector((const LByteType *)&now.tv_nsec,
434 			   sizeof(now.tv_nsec));
435 		nanouptime(&now);
436 		IBAA_Seed(now.tv_nsec);
437 		L15_Vector((const LByteType *)&now.tv_nsec,
438 			   sizeof(now.tv_nsec));
439 	}
440 
441 	/*
442 	 * Warm up the generator to get rid of weak initial states.
443 	 */
444 	for (i = 0; i < 10; ++i)
445 		IBAA_Call();
446 }
447 
448 /*
449  * Keyboard events
450  */
451 void
452 add_keyboard_randomness(u_char scancode)
453 {
454 	spin_lock(&rand_spin);
455 	L15_Vector((const LByteType *) &scancode, sizeof (scancode));
456 	spin_unlock(&rand_spin);
457 	add_interrupt_randomness(0);
458 }
459 
460 /*
461  * Interrupt events.  This is SMP safe and allowed to race.
462  */
463 void
464 add_interrupt_randomness(int intr)
465 {
466 	if (rand_thread_signal == 0) {
467 		rand_thread_signal = 1;
468 		lwkt_schedule(rand_td);
469 	}
470 }
471 
472 /*
473  * True random number source
474  */
475 void
476 add_true_randomness(int val)
477 {
478 	spin_lock(&rand_spin);
479 	IBAA_Seed(val);
480 	L15_Vector((const LByteType *) &val, sizeof (val));
481 	++nrandevents;
482 	spin_unlock(&rand_spin);
483 }
484 
485 int
486 add_buffer_randomness(const char *buf, int bytes)
487 {
488 	int i;
489 
490 	while (bytes >= sizeof(int)) {
491 		add_true_randomness(*(const int *)buf);
492 		buf += sizeof(int);
493 		bytes -= sizeof(int);
494 	}
495 
496 	/*
497 	 * Warm up the generator to get rid of weak initial states.
498 	 */
499 	for (i = 0; i < 10; ++i)
500 		IBAA_Call();
501 
502 	return 0;
503 }
504 
505 /*
506  * Kqueue filter (always succeeds)
507  */
508 int
509 random_filter_read(struct knote *kn, long hint)
510 {
511 	return (1);
512 }
513 
514 /*
515  * Heavy weight random number generator.  May return less then the
516  * requested number of bytes.
517  */
518 u_int
519 read_random(void *buf, u_int nbytes)
520 {
521 	u_int i;
522 
523 	spin_lock(&rand_spin);
524 	for (i = 0; i < nbytes; ++i)
525 		((u_char *)buf)[i] = IBAA_Byte();
526 	spin_unlock(&rand_spin);
527 	add_interrupt_randomness(0);
528 	return(i);
529 }
530 
531 /*
532  * Lightweight random number generator.  Must return requested number of
533  * bytes.
534  */
535 u_int
536 read_random_unlimited(void *buf, u_int nbytes)
537 {
538 	u_int i;
539 
540 	spin_lock(&rand_spin);
541 	for (i = 0; i < nbytes; ++i)
542 		((u_char *)buf)[i] = L15_Byte();
543 	spin_unlock(&rand_spin);
544 	add_interrupt_randomness(0);
545 	return (i);
546 }
547 
548 /*
549  * Read random data via sysctl().
550  */
551 static
552 int
553 sysctl_kern_random(SYSCTL_HANDLER_ARGS)
554 {
555 	char buf[64];
556 	size_t n;
557 	size_t r;
558 	int error = 0;
559 
560 	n = req->oldlen;
561 	if (n > 1024 * 1024)
562 		n = 1024 * 1024;
563 	while (n > 0) {
564 		if ((r = n) > sizeof(buf))
565 			r = sizeof(buf);
566 		read_random_unlimited(buf, r);
567 		error = SYSCTL_OUT(req, buf, r);
568 		if (error)
569 			break;
570 		n -= r;
571 	}
572 	return(error);
573 }
574 
575 /*
576  * Random number generator helper thread.  This limits code overhead from
577  * high frequency events by delaying the clearing of rand_thread_signal.
578  *
579  * MPSAFE thread
580  */
581 static
582 void
583 rand_thread_loop(void *dummy)
584 {
585 	int count;
586 
587 	for (;;) {
588 		NANOUP_EVENT ();
589 		spin_lock(&rand_spin);
590 		count = (int)(L15_Byte() * hz / (256 * 10) + hz / 10 + 1);
591 		spin_unlock(&rand_spin);
592 		tsleep(rand_td, 0, "rwait", count);
593 		crit_enter();
594 		lwkt_deschedule_self(rand_td);
595 		cpu_sfence();
596 		rand_thread_signal = 0;
597 		crit_exit();
598 		lwkt_switch();
599 	}
600 }
601 
602 static
603 void
604 rand_thread_init(void)
605 {
606 	lwkt_create(rand_thread_loop, NULL, &rand_td, NULL, 0, 0, "random");
607 }
608 
609 SYSINIT(rand, SI_SUB_HELPER_THREADS, SI_ORDER_ANY, rand_thread_init, 0);
610 
611 /*
612  * Time-buffered event time-stamping. This is necessary to cutoff higher
613  * event frequencies, e.g. an interrupt occuring at 25Hz. In such cases
614  * the CPU is being chewed and the timestamps are skewed (minimal variation).
615  * Use a nano-second time-delay to limit how many times an Event can occur
616  * in one second; <= 5Hz. Note that this doesn't prevent time-stamp skewing.
617  * This implementation randmoises the time-delay between events, which adds
618  * a layer of security/unpredictability with regard to read-events (a user
619  * controlled input).
620  *
621  * Note: now.tv_nsec should range [ 0 - 1000,000,000 ].
622  * Note: "ACCUM" is a security measure (result = capped-unknown + unknown),
623  *       and also produces an uncapped (>=32-bit) value.
624  */
625 static void
626 NANOUP_EVENT(void)
627 {
628 	static struct timespec	ACCUM = { 0, 0 };
629 	static struct timespec	NEXT  = { 0, 0 };
630 	struct timespec		now;
631 
632 	nanouptime(&now);
633 	spin_lock(&rand_spin);
634 	if ((now.tv_nsec > NEXT.tv_nsec) || (now.tv_sec != NEXT.tv_sec)) {
635 		/*
636 		 * Randomised time-delay: 200e6 - 350e6 ns; 5 - 2.86 Hz.
637 		 */
638 		unsigned long one_mil;
639 		unsigned long timeDelay;
640 
641 		one_mil = 1000000UL;	/* 0.001 s */
642 		timeDelay = (one_mil * 200) +
643 			    (((unsigned long)ACCUM.tv_nsec % 151) * one_mil);
644 		NEXT.tv_nsec = now.tv_nsec + timeDelay;
645 		NEXT.tv_sec = now.tv_sec;
646 		ACCUM.tv_nsec += now.tv_nsec;
647 
648 		/*
649 		 * The TSC, if present, generally has an even higher
650 		 * resolution.  Integrate a portion of it into our seed.
651 		 */
652 		if (tsc_present)
653 			ACCUM.tv_nsec ^= rdtsc() & 255;
654 
655 		IBAA_Seed(ACCUM.tv_nsec);
656 		L15_Vector((const LByteType *)&ACCUM.tv_nsec,
657 			   sizeof(ACCUM.tv_nsec));
658 		++nrandevents;
659 	}
660 	spin_unlock(&rand_spin);
661 }
662 
663