1 /* 2 * Copyright (c) 2004, 2005, 2006 Robin J Carey. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions, and the following disclaimer, 9 * without modification, immediately at the beginning of the file. 10 * 2. The name of the author may not be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 17 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 /* --- NOTES --- 26 * 27 * Note: The word "entropy" is often incorrectly used to describe 28 * random data. The word "entropy" originates from the science of 29 * Physics. The correct descriptive definition would be something 30 * along the lines of "seed", "unpredictable numbers" or 31 * "unpredictable data". 32 * 33 * Note: Some /dev/[u]random implementations save "seed" between 34 * boots which represents a security hazard since an adversary 35 * could acquire this data (since it is stored in a file). If 36 * the unpredictable data used in the above routines is only 37 * generated during Kernel operation, then an adversary can only 38 * acquire that data through a Kernel security compromise and/or 39 * a cryptographic algorithm failure/cryptanalysis. 40 * 41 * Note: On FreeBSD-4.11, interrupts have to be manually enabled 42 * using the rndcontrol(8) command. 43 * 44 * --- DESIGN (FreeBSD-4.11 based) --- 45 * 46 * The rnddev module automatically initializes itself the first time 47 * it is used (client calls any public rnddev_*() interface routine). 48 * Both CSPRNGs are initially seeded from the precise nano[up]time() routines. 49 * Tests show this method produces good enough results, suitable for intended 50 * use. It is necessary for both CSPRNGs to be completely seeded, initially. 51 * 52 * After initialization and during Kernel operation the only suitable 53 * unpredictable data available is: 54 * 55 * (1) Keyboard scan-codes. 56 * (2) Nanouptime acquired by a Keyboard/Read-Event. 57 * (3) Suitable interrupt source; hard-disk/ATA-device. 58 * 59 * (X) Mouse-event (xyz-data unsuitable); NOT IMPLEMENTED. 60 * 61 * This data is added to both CSPRNGs in real-time as it happens/ 62 * becomes-available. Additionally, unpredictable (?) data may be 63 * acquired from a true-random number generator if such a device is 64 * available to the system (not advisable !). 65 * Nanouptime() acquired by a Read-Event is a very important aspect of 66 * this design, since it ensures that unpredictable data is added to 67 * the CSPRNGs even if there are no other sources. 68 * The nanouptime() Kernel routine is used since time relative to 69 * boot is less adversary-known than time itself. 70 * 71 * This design has been thoroughly tested with debug logging 72 * and the output from both /dev/random and /dev/urandom has 73 * been tested with the DIEHARD test-suite; both pass. 74 * 75 * MODIFICATIONS MADE TO ORIGINAL "kern_random.c": 76 * 77 * 6th July 2005: 78 * 79 * o Changed ReadSeed() function to schedule future read-seed-events 80 * by at least one second. Previous implementation used a randomised 81 * scheduling { 0, 1, 2, 3 seconds }. 82 * o Changed SEED_NANOUP() function to use a "previous" accumulator 83 * algorithm similar to ReadSeed(). This ensures that there is no 84 * way that an adversary can tell what number is being added to the 85 * CSPRNGs, since the number added to the CSPRNGs at Event-Time is 86 * the sum of nanouptime()@Event and an unknown/secret number. 87 * o Changed rnddev_add_interrupt() function to schedule future 88 * interrupt-events by at least one second. Previous implementation 89 * had no scheduling algorithm which allowed an "interrupt storm" 90 * to occur resulting in skewed data entering into the CSPRNGs. 91 * 92 * 93 * 9th July 2005: 94 * 95 * o Some small cleanups and change all internal functions to be 96 * static/private. 97 * o Removed ReadSeed() since its functionality is already performed 98 * by another function { rnddev_add_interrupt_OR_read() } and remove 99 * the silly rndByte accumulator/feedback-thing (since multipying by 100 * rndByte could yield a value of 0). 101 * o Made IBAA/L14 public interface become static/private; 102 * Local to this file (not changed to that in the original C modules). 103 * 104 * 16th July 2005: 105 * 106 * o SEED_NANOUP() -> NANOUP_EVENT() function rename. 107 * o Make NANOUP_EVENT() handle the time-buffering directly so that all 108 * time-stamp-events use this single time-buffer (including keyboard). 109 * This removes dependancy on "time_second" Kernel variable. 110 * o Removed second-time-buffer code in rnddev_add_interrupt_OR_read (void). 111 * o Rewrote the time-buffering algorithm in NANOUP_EVENT() to use a 112 * randomised time-delay range. 113 * 114 * 12th Dec 2005: 115 * 116 * o Updated to (hopefully final) L15 algorithm. 117 * 118 * 12th June 2006: 119 * 120 * o Added missing (u_char *) cast in RnddevRead() function. 121 * o Changed copyright to 3-clause BSD license and cleaned up the layout 122 * of this file. 123 */ 124 125 #include <sys/types.h> 126 #include <sys/kernel.h> 127 #include <sys/systm.h> 128 #include <sys/poll.h> 129 #include <sys/event.h> 130 #include <sys/random.h> 131 #include <sys/systimer.h> 132 #include <sys/time.h> 133 #include <sys/proc.h> 134 #include <sys/lock.h> 135 #include <sys/sysctl.h> 136 #include <sys/spinlock.h> 137 #include <machine/clock.h> 138 139 #include <sys/thread2.h> 140 #include <sys/spinlock2.h> 141 #include <sys/mplock2.h> 142 143 /* 144 * Portability note: The u_char/unsigned char type is used where 145 * uint8_t from <stdint.h> or u_int8_t from <sys/types.h> should really 146 * be being used. On FreeBSD, it is safe to make the assumption that these 147 * different types are equivalent (on all architectures). 148 * The FreeBSD <sys/crypto/rc4> module also makes this assumption. 149 */ 150 151 /*------------------------------ IBAA ----------------------------------*/ 152 153 /*-------------------------- IBAA CSPRNG -------------------------------*/ 154 155 /* 156 * NOTE: The original source code from which this source code (IBAA) 157 * was taken has no copyright/license. The algorithm has no patent 158 * and is freely/publicly available from: 159 * 160 * http://www.burtleburtle.net/bob/rand/isaac.html 161 */ 162 163 /* 164 * ^ means XOR, & means bitwise AND, a<<b means shift a by b. 165 * barrel(a) shifts a 19 bits to the left, and bits wrap around 166 * ind(x) is (x AND 255), or (x mod 256) 167 */ 168 typedef u_int32_t u4; /* unsigned four bytes, 32 bits */ 169 170 #define ALPHA (8) 171 #define SIZE (1 << ALPHA) 172 #define MASK (SIZE - 1) 173 #define ind(x) ((x) & (SIZE - 1)) 174 #define barrel(a) (((a) << 20) ^ ((a) >> 12)) /* beta=32,shift=20 */ 175 176 static void IBAA 177 ( 178 u4 *m, /* Memory: array of SIZE ALPHA-bit terms */ 179 u4 *r, /* Results: the sequence, same size as m */ 180 u4 *aa, /* Accumulator: a single value */ 181 u4 *bb, /* the previous result */ 182 u4 *counter /* counter */ 183 ) 184 { 185 u4 a, b, x, y, i; 186 187 a = *aa; 188 b = *bb + *counter; 189 ++*counter; 190 for (i = 0; i < SIZE; ++i) { 191 x = m[i]; 192 a = barrel(a) + m[ind(i + (SIZE / 2))]; /* set a */ 193 m[i] = y = m[ind(x)] + a + b; /* set m */ 194 r[i] = b = m[ind(y >> ALPHA)] + x; /* set r */ 195 } 196 *bb = b; *aa = a; 197 } 198 199 /*-------------------------- IBAA CSPRNG -------------------------------*/ 200 201 202 static u4 IBAA_memory[SIZE]; 203 static u4 IBAA_results[SIZE]; 204 static u4 IBAA_aa; 205 static u4 IBAA_bb; 206 static u4 IBAA_counter; 207 208 static volatile int IBAA_byte_index; 209 210 211 static void IBAA_Init(void); 212 static void IBAA_Call(void); 213 static void IBAA_Seed(const u_int32_t val); 214 static u_char IBAA_Byte(void); 215 216 /* 217 * Initialize IBAA. 218 */ 219 static void 220 IBAA_Init(void) 221 { 222 size_t i; 223 224 for (i = 0; i < SIZE; ++i) { 225 IBAA_memory[i] = i; 226 } 227 IBAA_aa = IBAA_bb = 0; 228 IBAA_counter = 0; 229 IBAA_byte_index = sizeof(IBAA_results); /* force IBAA_Call() */ 230 } 231 232 /* 233 * PRIVATE: Call IBAA to produce 256 32-bit u4 results. 234 */ 235 static void 236 IBAA_Call (void) 237 { 238 IBAA(IBAA_memory, IBAA_results, &IBAA_aa, &IBAA_bb, &IBAA_counter); 239 IBAA_byte_index = 0; 240 } 241 242 /* 243 * Add a 32-bit u4 seed value into IBAAs memory. Mix the low 4 bits 244 * with 4 bits of PNG data to reduce the possibility of a seeding-based 245 * attack. 246 */ 247 static void 248 IBAA_Seed (const u_int32_t val) 249 { 250 static int memIndex; 251 u4 *iptr; 252 253 iptr = &IBAA_memory[memIndex & MASK]; 254 *iptr = ((*iptr << 3) | (*iptr >> 29)) + (val ^ (IBAA_Byte() & 15)); 255 ++memIndex; 256 } 257 258 /* 259 * Extract a byte from IBAAs 256 32-bit u4 results array. 260 * 261 * NOTE: This code is designed to prevent MP races from taking 262 * IBAA_byte_index out of bounds. 263 */ 264 static u_char 265 IBAA_Byte(void) 266 { 267 u_char result; 268 int index; 269 270 index = IBAA_byte_index; 271 if (index == sizeof(IBAA_results)) { 272 IBAA_Call(); 273 index = 0; 274 } 275 result = ((u_char *)IBAA_results)[index]; 276 IBAA_byte_index = index + 1; 277 return result; 278 } 279 280 /*------------------------------ IBAA ----------------------------------*/ 281 282 283 /*------------------------------- L15 ----------------------------------*/ 284 285 /* 286 * IMPORTANT NOTE: LByteType must be exactly 8-bits in size or this software 287 * will not function correctly. 288 */ 289 typedef unsigned char LByteType; 290 291 #define L15_STATE_SIZE 256 292 293 static LByteType L15_x, L15_y; 294 static LByteType L15_start_x; 295 static LByteType L15_state[L15_STATE_SIZE]; 296 297 /* 298 * PRIVATE FUNCS: 299 */ 300 301 static void L15_Swap(const LByteType pos1, const LByteType pos2); 302 static void L15_InitState(void); 303 static void L15_KSA(const LByteType * const key, 304 const size_t keyLen); 305 static void L15_Discard(const LByteType numCalls); 306 307 /* 308 * PUBLIC INTERFACE: 309 */ 310 static void L15(const LByteType * const key, const size_t keyLen); 311 static LByteType L15_Byte(void); 312 static void L15_Vector(const LByteType * const key, 313 const size_t keyLen); 314 315 static __inline void 316 L15_Swap(const LByteType pos1, const LByteType pos2) 317 { 318 const LByteType save1 = L15_state[pos1]; 319 320 L15_state[pos1] = L15_state[pos2]; 321 L15_state[pos2] = save1; 322 } 323 324 static void 325 L15_InitState (void) 326 { 327 size_t i; 328 for (i = 0; i < L15_STATE_SIZE; ++i) 329 L15_state[i] = i; 330 } 331 332 #define L_SCHEDULE(xx) \ 333 \ 334 for (i = 0; i < L15_STATE_SIZE; ++i) { \ 335 L15_Swap(i, (stateIndex += (L15_state[i] + (xx)))); \ 336 } 337 338 static void 339 L15_KSA (const LByteType * const key, const size_t keyLen) 340 { 341 size_t i, keyIndex; 342 LByteType stateIndex = 0; 343 344 L_SCHEDULE(keyLen); 345 for (keyIndex = 0; keyIndex < keyLen; ++keyIndex) { 346 L_SCHEDULE(key[keyIndex]); 347 } 348 } 349 350 static void 351 L15_Discard(const LByteType numCalls) 352 { 353 LByteType i; 354 for (i = 0; i < numCalls; ++i) { 355 (void)L15_Byte(); 356 } 357 } 358 359 360 /* 361 * PUBLIC INTERFACE: 362 */ 363 static void 364 L15(const LByteType * const key, const size_t keyLen) 365 { 366 L15_x = L15_start_x = 0; 367 L15_y = L15_STATE_SIZE - 1; 368 L15_InitState(); 369 L15_KSA(key, keyLen); 370 L15_Discard(L15_Byte()); 371 } 372 373 static LByteType 374 L15_Byte(void) 375 { 376 LByteType z; 377 378 L15_Swap(L15_state[L15_x], L15_y); 379 z = (L15_state [L15_x++] + L15_state[L15_y--]); 380 if (L15_x == L15_start_x) { 381 --L15_y; 382 } 383 return (L15_state[z]); 384 } 385 386 static void 387 L15_Vector (const LByteType * const key, const size_t keyLen) 388 { 389 L15_KSA(key, keyLen); 390 } 391 392 /*------------------------------- L15 ----------------------------------*/ 393 394 /************************************************************************ 395 * KERNEL INTERFACE * 396 ************************************************************************ 397 * 398 * By Robin J Carey and Matthew Dillon. 399 */ 400 401 static int rand_thread_signal = 1; 402 static void NANOUP_EVENT(void); 403 static thread_t rand_td; 404 static struct spinlock rand_spin; 405 406 static int sysctl_kern_random(SYSCTL_HANDLER_ARGS); 407 408 static int nrandevents; 409 SYSCTL_INT(_kern, OID_AUTO, nrandevents, CTLFLAG_RD, &nrandevents, 0, ""); 410 SYSCTL_PROC(_kern, OID_AUTO, random, CTLFLAG_RD | CTLFLAG_ANYBODY, 0, 0, 411 sysctl_kern_random, "I", "Acquire random data"); 412 413 /* 414 * Called from early boot 415 */ 416 void 417 rand_initialize(void) 418 { 419 struct timespec now; 420 int i; 421 422 spin_init(&rand_spin); 423 424 /* Initialize IBAA. */ 425 IBAA_Init(); 426 427 /* Initialize L15. */ 428 nanouptime(&now); 429 L15((const LByteType *)&now.tv_nsec, sizeof(now.tv_nsec)); 430 for (i = 0; i < (SIZE / 2); ++i) { 431 nanotime(&now); 432 IBAA_Seed(now.tv_nsec); 433 L15_Vector((const LByteType *)&now.tv_nsec, 434 sizeof(now.tv_nsec)); 435 nanouptime(&now); 436 IBAA_Seed(now.tv_nsec); 437 L15_Vector((const LByteType *)&now.tv_nsec, 438 sizeof(now.tv_nsec)); 439 } 440 441 /* 442 * Warm up the generator to get rid of weak initial states. 443 */ 444 for (i = 0; i < 10; ++i) 445 IBAA_Call(); 446 } 447 448 /* 449 * Keyboard events 450 */ 451 void 452 add_keyboard_randomness(u_char scancode) 453 { 454 spin_lock(&rand_spin); 455 L15_Vector((const LByteType *) &scancode, sizeof (scancode)); 456 spin_unlock(&rand_spin); 457 add_interrupt_randomness(0); 458 } 459 460 /* 461 * Interrupt events. This is SMP safe and allowed to race. 462 */ 463 void 464 add_interrupt_randomness(int intr) 465 { 466 if (rand_thread_signal == 0) { 467 rand_thread_signal = 1; 468 lwkt_schedule(rand_td); 469 } 470 } 471 472 /* 473 * True random number source 474 */ 475 void 476 add_true_randomness(int val) 477 { 478 spin_lock(&rand_spin); 479 IBAA_Seed(val); 480 L15_Vector((const LByteType *) &val, sizeof (val)); 481 ++nrandevents; 482 spin_unlock(&rand_spin); 483 } 484 485 int 486 add_buffer_randomness(const char *buf, int bytes) 487 { 488 int i; 489 490 while (bytes >= sizeof(int)) { 491 add_true_randomness(*(const int *)buf); 492 buf += sizeof(int); 493 bytes -= sizeof(int); 494 } 495 496 /* 497 * Warm up the generator to get rid of weak initial states. 498 */ 499 for (i = 0; i < 10; ++i) 500 IBAA_Call(); 501 502 return 0; 503 } 504 505 /* 506 * Kqueue filter (always succeeds) 507 */ 508 int 509 random_filter_read(struct knote *kn, long hint) 510 { 511 return (1); 512 } 513 514 /* 515 * Heavy weight random number generator. May return less then the 516 * requested number of bytes. 517 */ 518 u_int 519 read_random(void *buf, u_int nbytes) 520 { 521 u_int i; 522 523 spin_lock(&rand_spin); 524 for (i = 0; i < nbytes; ++i) 525 ((u_char *)buf)[i] = IBAA_Byte(); 526 spin_unlock(&rand_spin); 527 add_interrupt_randomness(0); 528 return(i); 529 } 530 531 /* 532 * Lightweight random number generator. Must return requested number of 533 * bytes. 534 */ 535 u_int 536 read_random_unlimited(void *buf, u_int nbytes) 537 { 538 u_int i; 539 540 spin_lock(&rand_spin); 541 for (i = 0; i < nbytes; ++i) 542 ((u_char *)buf)[i] = L15_Byte(); 543 spin_unlock(&rand_spin); 544 add_interrupt_randomness(0); 545 return (i); 546 } 547 548 /* 549 * Read random data via sysctl(). 550 */ 551 static 552 int 553 sysctl_kern_random(SYSCTL_HANDLER_ARGS) 554 { 555 char buf[64]; 556 size_t n; 557 size_t r; 558 int error = 0; 559 560 n = req->oldlen; 561 if (n > 1024 * 1024) 562 n = 1024 * 1024; 563 while (n > 0) { 564 if ((r = n) > sizeof(buf)) 565 r = sizeof(buf); 566 read_random_unlimited(buf, r); 567 error = SYSCTL_OUT(req, buf, r); 568 if (error) 569 break; 570 n -= r; 571 } 572 return(error); 573 } 574 575 /* 576 * Random number generator helper thread. This limits code overhead from 577 * high frequency events by delaying the clearing of rand_thread_signal. 578 * 579 * MPSAFE thread 580 */ 581 static 582 void 583 rand_thread_loop(void *dummy) 584 { 585 int count; 586 587 for (;;) { 588 NANOUP_EVENT (); 589 spin_lock(&rand_spin); 590 count = (int)(L15_Byte() * hz / (256 * 10) + hz / 10 + 1); 591 spin_unlock(&rand_spin); 592 tsleep(rand_td, 0, "rwait", count); 593 crit_enter(); 594 lwkt_deschedule_self(rand_td); 595 cpu_sfence(); 596 rand_thread_signal = 0; 597 crit_exit(); 598 lwkt_switch(); 599 } 600 } 601 602 static 603 void 604 rand_thread_init(void) 605 { 606 lwkt_create(rand_thread_loop, NULL, &rand_td, NULL, 0, 0, "random"); 607 } 608 609 SYSINIT(rand, SI_SUB_HELPER_THREADS, SI_ORDER_ANY, rand_thread_init, 0); 610 611 /* 612 * Time-buffered event time-stamping. This is necessary to cutoff higher 613 * event frequencies, e.g. an interrupt occuring at 25Hz. In such cases 614 * the CPU is being chewed and the timestamps are skewed (minimal variation). 615 * Use a nano-second time-delay to limit how many times an Event can occur 616 * in one second; <= 5Hz. Note that this doesn't prevent time-stamp skewing. 617 * This implementation randmoises the time-delay between events, which adds 618 * a layer of security/unpredictability with regard to read-events (a user 619 * controlled input). 620 * 621 * Note: now.tv_nsec should range [ 0 - 1000,000,000 ]. 622 * Note: "ACCUM" is a security measure (result = capped-unknown + unknown), 623 * and also produces an uncapped (>=32-bit) value. 624 */ 625 static void 626 NANOUP_EVENT(void) 627 { 628 static struct timespec ACCUM = { 0, 0 }; 629 static struct timespec NEXT = { 0, 0 }; 630 struct timespec now; 631 632 nanouptime(&now); 633 spin_lock(&rand_spin); 634 if ((now.tv_nsec > NEXT.tv_nsec) || (now.tv_sec != NEXT.tv_sec)) { 635 /* 636 * Randomised time-delay: 200e6 - 350e6 ns; 5 - 2.86 Hz. 637 */ 638 unsigned long one_mil; 639 unsigned long timeDelay; 640 641 one_mil = 1000000UL; /* 0.001 s */ 642 timeDelay = (one_mil * 200) + 643 (((unsigned long)ACCUM.tv_nsec % 151) * one_mil); 644 NEXT.tv_nsec = now.tv_nsec + timeDelay; 645 NEXT.tv_sec = now.tv_sec; 646 ACCUM.tv_nsec += now.tv_nsec; 647 648 /* 649 * The TSC, if present, generally has an even higher 650 * resolution. Integrate a portion of it into our seed. 651 */ 652 if (tsc_present) 653 ACCUM.tv_nsec ^= rdtsc() & 255; 654 655 IBAA_Seed(ACCUM.tv_nsec); 656 L15_Vector((const LByteType *)&ACCUM.tv_nsec, 657 sizeof(ACCUM.tv_nsec)); 658 ++nrandevents; 659 } 660 spin_unlock(&rand_spin); 661 } 662 663