1 /* 2 * Copyright (c) 2012 Alex Hornung <alex@alexhornung.com>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/kobj.h> 33 #include <sys/libkern.h> 34 #include <sys/module.h> 35 #include <sys/bus.h> 36 #include <sys/random.h> 37 #include <sys/malloc.h> 38 #include <sys/sysctl.h> 39 40 #include <machine/specialreg.h> 41 42 /* 43 * WARNING! 44 * 45 * The RDRAND instruction is a very slow instruction, burning approximately 46 * 0.79uS per 64-bit word on a modern ryzen cpu. Intel cpu's run this 47 * instruction far more quickly. The quality of the results are unknown 48 * either way. 49 * 50 * However, possibly an even bigger problem is the cost of calling 51 * add_buffer_randomness(), which takes an enormous amount of time 52 * when handed a large buffer. 53 * 54 * Our code harvests at a 10hz rate on every single core, and also chains 55 * some entropy from core to core so honestly it doesn't take much to really 56 * mix things up. Use a decent size (16 or 32 bytes should be good). 57 * 58 * On a TR3990 going from 512 to 16 gave userland almost 10% additional 59 * performance... a very stark difference. A simple test loop: 60 * 61 * BEFORE: (RDRAND_SIZE 512) 62 * 7.258u 0.000s 0:07.69 94.2% 2+70k 2+0io 0pf+0w 63 * 7.222u 0.000s 0:07.67 94.1% 2+70k 0+0io 0pf+0w 64 * 7.239u 0.000s 0:07.69 94.0% 2+70k 0+0io 0pf+0w 65 * 66 * AFTER: (RDRAND_SIZE 16) (9.3% faster) 67 * 7.019u 0.000s 0:07.02 99.8% 2+66k 0+0io 0pf+0w 68 * 7.019u 0.000s 0:07.02 99.8% 2+66k 0+0io 0pf+0w 69 * 7.028u 0.000s 0:07.02 100.0% 2+66k 0+0io 0pf+0w 70 */ 71 #define RDRAND_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16)) 72 #define RDRAND_SIZE 16 73 74 static int rdrand_debug; 75 SYSCTL_INT(_debug, OID_AUTO, rdrand, CTLFLAG_RW, &rdrand_debug, 0, 76 "Enable rdrand debugging"); 77 78 struct rdrand_softc { 79 struct callout *sc_rng_co; 80 int32_t sc_rng_ticks; 81 }; 82 83 84 static void rdrand_rng_harvest(void *); 85 int rdrand_rng(uint8_t *out, long limit); 86 87 88 static void 89 rdrand_identify(driver_t *drv, device_t parent) 90 { 91 92 /* NB: order 10 is so we get attached after h/w devices */ 93 if (device_find_child(parent, "rdrand", -1) == NULL && 94 BUS_ADD_CHILD(parent, parent, 10, "rdrand", -1) == 0) 95 panic("rdrand: could not attach"); 96 } 97 98 99 static int 100 rdrand_probe(device_t dev) 101 { 102 103 if ((cpu_feature2 & CPUID2_RDRAND) == 0) { 104 device_printf(dev, "No RdRand support.\n"); 105 return (EINVAL); 106 } 107 108 device_set_desc(dev, "RdRand RNG"); 109 return 0; 110 } 111 112 113 static int 114 rdrand_attach(device_t dev) 115 { 116 struct rdrand_softc *sc; 117 int i; 118 119 sc = device_get_softc(dev); 120 121 if (hz > 10) 122 sc->sc_rng_ticks = hz / 10; 123 else 124 sc->sc_rng_ticks = 1; 125 126 sc->sc_rng_co = kmalloc(ncpus * sizeof(*sc->sc_rng_co), 127 M_TEMP, M_WAITOK | M_ZERO); 128 129 /* 130 * Set an initial offset so we don't pound all cores simultaneously 131 * for no good reason. 132 */ 133 for (i = 0; i < ncpus; ++i) { 134 callout_init_mp(&sc->sc_rng_co[i]); 135 callout_reset_bycpu(&sc->sc_rng_co[i], 136 i, rdrand_rng_harvest, sc, i); 137 } 138 139 return 0; 140 } 141 142 143 static int 144 rdrand_detach(device_t dev) 145 { 146 struct rdrand_softc *sc; 147 int i; 148 149 sc = device_get_softc(dev); 150 151 for (i = 0; i < ncpus; ++i) { 152 callout_terminate(&sc->sc_rng_co[i]); 153 } 154 155 return (0); 156 } 157 158 159 static void 160 rdrand_rng_harvest(void *arg) 161 { 162 struct rdrand_softc *sc = arg; 163 uint8_t randomness[RDRAND_SIZE + 32]; 164 uint8_t *arandomness; /* randomness aligned */ 165 int cnt; 166 167 arandomness = RDRAND_ALIGN(randomness); 168 169 cnt = rdrand_rng(arandomness, RDRAND_SIZE); 170 if (cnt > 0 && cnt < sizeof(randomness)) { 171 add_buffer_randomness_src(arandomness, cnt, 172 RAND_SRC_RDRAND | 173 RAND_SRCF_PCPU); 174 175 if (rdrand_debug > 0) { 176 --rdrand_debug; 177 kprintf("rdrand(%d,cpu=%d): %02x %02x %02x %02x...\n", 178 cnt, mycpu->gd_cpuid, 179 arandomness[0], 180 arandomness[1], 181 arandomness[2], 182 arandomness[3]); 183 } 184 } 185 186 callout_reset(&sc->sc_rng_co[mycpu->gd_cpuid], sc->sc_rng_ticks, 187 rdrand_rng_harvest, sc); 188 } 189 190 191 static device_method_t rdrand_methods[] = { 192 DEVMETHOD(device_identify, rdrand_identify), 193 DEVMETHOD(device_probe, rdrand_probe), 194 DEVMETHOD(device_attach, rdrand_attach), 195 DEVMETHOD(device_detach, rdrand_detach), 196 197 DEVMETHOD_END 198 }; 199 200 201 static driver_t rdrand_driver = { 202 "rdrand", 203 rdrand_methods, 204 sizeof(struct rdrand_softc), 205 }; 206 207 static devclass_t rdrand_devclass; 208 209 DRIVER_MODULE(rdrand, nexus, rdrand_driver, rdrand_devclass, NULL, NULL); 210 MODULE_VERSION(rdrand, 1); 211 MODULE_DEPEND(rdrand, crypto, 1, 1, 1); 212