1 /*
2 * Copyright (c) 2012 Alex Hornung <alex@alexhornung.com>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/kobj.h>
33 #include <sys/libkern.h>
34 #include <sys/module.h>
35 #include <sys/bus.h>
36 #include <sys/random.h>
37 #include <sys/malloc.h>
38 #include <sys/sysctl.h>
39
40 #include <machine/specialreg.h>
41
42 /*
43 * WARNING!
44 *
45 * The RDRAND instruction is a very slow instruction, burning approximately
46 * 0.79uS per 64-bit word on a modern ryzen cpu. Intel cpu's run this
47 * instruction far more quickly. The quality of the results are unknown
48 * either way.
49 *
50 * However, possibly an even bigger problem is the cost of calling
51 * add_buffer_randomness(), which takes an enormous amount of time
52 * when handed a large buffer.
53 *
54 * Our code harvests at a 10hz rate on every single core, and also chains
55 * some entropy from core to core so honestly it doesn't take much to really
56 * mix things up. Use a decent size (16 or 32 bytes should be good).
57 *
58 * On a TR3990 going from 512 to 16 gave userland almost 10% additional
59 * performance... a very stark difference. A simple test loop:
60 *
61 * BEFORE: (RDRAND_SIZE 512)
62 * 7.258u 0.000s 0:07.69 94.2% 2+70k 2+0io 0pf+0w
63 * 7.222u 0.000s 0:07.67 94.1% 2+70k 0+0io 0pf+0w
64 * 7.239u 0.000s 0:07.69 94.0% 2+70k 0+0io 0pf+0w
65 *
66 * AFTER: (RDRAND_SIZE 16) (9.3% faster)
67 * 7.019u 0.000s 0:07.02 99.8% 2+66k 0+0io 0pf+0w
68 * 7.019u 0.000s 0:07.02 99.8% 2+66k 0+0io 0pf+0w
69 * 7.028u 0.000s 0:07.02 100.0% 2+66k 0+0io 0pf+0w
70 */
71 #define RDRAND_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16))
72 #define RDRAND_SIZE 16
73
74 static int rdrand_debug;
75 SYSCTL_INT(_debug, OID_AUTO, rdrand, CTLFLAG_RW, &rdrand_debug, 0,
76 "Enable rdrand debugging");
77
78 struct rdrand_softc {
79 struct callout *sc_rng_co;
80 int32_t sc_rng_ticks;
81 };
82
83
84 static void rdrand_rng_harvest(void *);
85 int rdrand_rng(uint8_t *out, long limit);
86
87
88 static void
rdrand_identify(driver_t * drv,device_t parent)89 rdrand_identify(driver_t *drv, device_t parent)
90 {
91
92 /* NB: order 10 is so we get attached after h/w devices */
93 if (device_find_child(parent, "rdrand", -1) == NULL &&
94 BUS_ADD_CHILD(parent, parent, 10, "rdrand", -1) == 0)
95 panic("rdrand: could not attach");
96 }
97
98
99 static int
rdrand_probe(device_t dev)100 rdrand_probe(device_t dev)
101 {
102
103 if ((cpu_feature2 & CPUID2_RDRAND) == 0) {
104 device_printf(dev, "No RdRand support.\n");
105 return (EINVAL);
106 }
107
108 device_set_desc(dev, "RdRand RNG");
109 return 0;
110 }
111
112
113 static int
rdrand_attach(device_t dev)114 rdrand_attach(device_t dev)
115 {
116 struct rdrand_softc *sc;
117 int i;
118
119 sc = device_get_softc(dev);
120
121 if (hz > 10)
122 sc->sc_rng_ticks = hz / 10;
123 else
124 sc->sc_rng_ticks = 1;
125
126 sc->sc_rng_co = kmalloc(ncpus * sizeof(*sc->sc_rng_co),
127 M_TEMP, M_WAITOK | M_ZERO);
128
129 /*
130 * Set an initial offset so we don't pound all cores simultaneously
131 * for no good reason.
132 */
133 for (i = 0; i < ncpus; ++i) {
134 callout_init_mp(&sc->sc_rng_co[i]);
135 callout_reset_bycpu(&sc->sc_rng_co[i],
136 i, rdrand_rng_harvest, sc, i);
137 }
138
139 return 0;
140 }
141
142
143 static int
rdrand_detach(device_t dev)144 rdrand_detach(device_t dev)
145 {
146 struct rdrand_softc *sc;
147 int i;
148
149 sc = device_get_softc(dev);
150
151 for (i = 0; i < ncpus; ++i) {
152 callout_terminate(&sc->sc_rng_co[i]);
153 }
154
155 return (0);
156 }
157
158
159 static void
rdrand_rng_harvest(void * arg)160 rdrand_rng_harvest(void *arg)
161 {
162 struct rdrand_softc *sc = arg;
163 uint8_t randomness[RDRAND_SIZE + 32];
164 uint8_t *arandomness; /* randomness aligned */
165 int cnt;
166
167 arandomness = RDRAND_ALIGN(randomness);
168
169 cnt = rdrand_rng(arandomness, RDRAND_SIZE);
170 if (cnt > 0 && cnt < sizeof(randomness)) {
171 add_buffer_randomness_src(arandomness, cnt,
172 RAND_SRC_RDRAND |
173 RAND_SRCF_PCPU);
174
175 if (rdrand_debug > 0) {
176 --rdrand_debug;
177 kprintf("rdrand(%d,cpu=%d): %02x %02x %02x %02x...\n",
178 cnt, mycpu->gd_cpuid,
179 arandomness[0],
180 arandomness[1],
181 arandomness[2],
182 arandomness[3]);
183 }
184 }
185
186 callout_reset(&sc->sc_rng_co[mycpu->gd_cpuid], sc->sc_rng_ticks,
187 rdrand_rng_harvest, sc);
188 }
189
190
191 static device_method_t rdrand_methods[] = {
192 DEVMETHOD(device_identify, rdrand_identify),
193 DEVMETHOD(device_probe, rdrand_probe),
194 DEVMETHOD(device_attach, rdrand_attach),
195 DEVMETHOD(device_detach, rdrand_detach),
196
197 DEVMETHOD_END
198 };
199
200
201 static driver_t rdrand_driver = {
202 "rdrand",
203 rdrand_methods,
204 sizeof(struct rdrand_softc),
205 };
206
207 static devclass_t rdrand_devclass;
208
209 DRIVER_MODULE(rdrand, nexus, rdrand_driver, rdrand_devclass, NULL, NULL);
210 MODULE_VERSION(rdrand, 1);
211 MODULE_DEPEND(rdrand, crypto, 1, 1, 1);
212