xref: /freebsd/sys/dev/nvdimm/nvdimm_e820.c (revision 4b9d6057)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Dell EMC Isilon
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/bio.h>
30 #include <sys/bitstring.h>
31 #include <sys/bus.h>
32 #include <sys/efi.h>
33 #include <sys/kernel.h>
34 #include <sys/linker.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/sbuf.h>
39 #include <sys/uuid.h>
40 
41 #include <vm/vm_param.h>
42 
43 #include <machine/metadata.h>
44 #include <machine/pc/bios.h>
45 
46 #include <contrib/dev/acpica/include/acpi.h>
47 
48 #include <dev/nvdimm/nvdimm_var.h>
49 
50 struct nvdimm_e820_bus {
51 	SLIST_HEAD(, SPA_mapping) spas;
52 };
53 
54 #define	NVDIMM_E820	"nvdimm_e820"
55 
56 static MALLOC_DEFINE(M_NVDIMM_E820, NVDIMM_E820, "NVDIMM e820 bus memory");
57 
58 static const struct bios_smap *smapbase;
59 static struct {
60 	vm_paddr_t start;
61 	vm_paddr_t size;
62 } pram_segments[VM_PHYSSEG_MAX];
63 static unsigned pram_nreg;
64 
65 static void
66 nvdimm_e820_dump_prams(device_t dev, const char *func, int hintunit)
67 {
68 	char buffer[256];
69 	struct sbuf sb;
70 	bool printed = false;
71 	unsigned i;
72 
73 	sbuf_new(&sb, buffer, sizeof(buffer), SBUF_FIXEDLEN);
74 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
75 
76 	sbuf_printf(&sb, "%s: %s: ", device_get_nameunit(dev), func);
77 	if (hintunit < 0)
78 		sbuf_cat(&sb, "Found BIOS PRAM regions: ");
79 	else
80 		sbuf_printf(&sb, "Remaining unallocated PRAM regions after "
81 		    "hint %d: ", hintunit);
82 
83 	for (i = 0; i < pram_nreg; i++) {
84 		if (pram_segments[i].size == 0)
85 			continue;
86 		if (printed)
87 			sbuf_putc(&sb, ',');
88 		else
89 			printed = true;
90 		sbuf_printf(&sb, "0x%jx-0x%jx",
91 		    (uintmax_t)pram_segments[i].start,
92 		    (uintmax_t)pram_segments[i].start + pram_segments[i].size
93 		    - 1);
94 	}
95 
96 	if (!printed)
97 		sbuf_cat(&sb, "<none>");
98 	sbuf_putc(&sb, '\n');
99 	sbuf_finish(&sb);
100 	sbuf_delete(&sb);
101 }
102 
103 static int
104 nvdimm_e820_create_spas(device_t dev)
105 {
106 	static const vm_size_t HINT_ALL = (vm_size_t)-1;
107 
108 	ACPI_NFIT_SYSTEM_ADDRESS nfit_sa;
109 	struct SPA_mapping *spa_mapping;
110 	enum SPA_mapping_type spa_type;
111 	struct nvdimm_e820_bus *sc;
112 	const char *hinttype;
113 	long hintaddrl, hintsizel;
114 	vm_paddr_t hintaddr;
115 	vm_size_t hintsize;
116 	unsigned i, j;
117 	int error;
118 
119 	sc = device_get_softc(dev);
120 	error = 0;
121 	nfit_sa = (ACPI_NFIT_SYSTEM_ADDRESS) { 0 };
122 
123 	if (bootverbose)
124 		nvdimm_e820_dump_prams(dev, __func__, -1);
125 
126 	for (i = 0;
127 	    resource_long_value("nvdimm_spa", i, "maddr", &hintaddrl) == 0;
128 	    i++) {
129 		if (resource_long_value("nvdimm_spa", i, "msize", &hintsizel)
130 		    != 0) {
131 			device_printf(dev, "hint.nvdimm_spa.%u missing msize\n",
132 			    i);
133 			continue;
134 		}
135 
136 		hintaddr = (vm_paddr_t)hintaddrl;
137 		hintsize = (vm_size_t)hintsizel;
138 		if ((hintaddr & PAGE_MASK) != 0 ||
139 		    ((hintsize & PAGE_MASK) != 0 && hintsize != HINT_ALL)) {
140 			device_printf(dev, "hint.nvdimm_spa.%u addr or size "
141 			    "not page aligned\n", i);
142 			continue;
143 		}
144 
145 		if (resource_string_value("nvdimm_spa", i, "type", &hinttype)
146 		    != 0) {
147 			device_printf(dev, "hint.nvdimm_spa.%u missing type\n",
148 			    i);
149 			continue;
150 		}
151 		spa_type = nvdimm_spa_type_from_name(hinttype);
152 		if (spa_type == SPA_TYPE_UNKNOWN) {
153 			device_printf(dev, "hint.nvdimm_spa%u.type does not "
154 			    "match any known SPA types\n", i);
155 			continue;
156 		}
157 
158 		for (j = 0; j < pram_nreg; j++) {
159 			if (pram_segments[j].start <= hintaddr &&
160 			    (hintsize == HINT_ALL ||
161 			    (pram_segments[j].start + pram_segments[j].size) >=
162 			    (hintaddr + hintsize)))
163 				break;
164 		}
165 
166 		if (j == pram_nreg) {
167 			device_printf(dev, "hint.nvdimm_spa%u hint does not "
168 			    "match any region\n", i);
169 			continue;
170 		}
171 
172 		/* Carve off "SPA" from available regions. */
173 		if (pram_segments[j].start == hintaddr) {
174 			/* Easy case first: beginning of segment. */
175 			if (hintsize == HINT_ALL)
176 				hintsize = pram_segments[j].size;
177 			pram_segments[j].start += hintsize;
178 			pram_segments[j].size -= hintsize;
179 			/* We might leave an empty segment; who cares. */
180 		} else if (hintsize == HINT_ALL ||
181 		    (pram_segments[j].start + pram_segments[j].size) ==
182 		    (hintaddr + hintsize)) {
183 			/* 2nd easy case: end of segment. */
184 			if (hintsize == HINT_ALL)
185 				hintsize = pram_segments[j].size -
186 				    (hintaddr - pram_segments[j].start);
187 			pram_segments[j].size -= hintsize;
188 		} else {
189 			/* Hard case: mid segment. */
190 			if (pram_nreg == nitems(pram_segments)) {
191 				/* Improbable, but handle gracefully. */
192 				device_printf(dev, "Ran out of %zu segments\n",
193 				    nitems(pram_segments));
194 				error = ENOBUFS;
195 				break;
196 			}
197 
198 			if (j != pram_nreg - 1) {
199 				memmove(&pram_segments[j + 2],
200 				    &pram_segments[j + 1],
201 				    (pram_nreg - 1 - j) *
202 				    sizeof(pram_segments[0]));
203 			}
204 			pram_nreg++;
205 
206 			pram_segments[j + 1].start = hintaddr + hintsize;
207 			pram_segments[j + 1].size =
208 			    (pram_segments[j].start + pram_segments[j].size) -
209 			    (hintaddr + hintsize);
210 			pram_segments[j].size = hintaddr -
211 			    pram_segments[j].start;
212 		}
213 
214 		if (bootverbose)
215 			nvdimm_e820_dump_prams(dev, __func__, (int)i);
216 
217 		spa_mapping = malloc(sizeof(*spa_mapping), M_NVDIMM_E820,
218 		    M_WAITOK | M_ZERO);
219 
220 		/* Mock up a super primitive table for nvdimm_spa_init(). */
221 		nfit_sa.RangeIndex = i;
222 		nfit_sa.Flags = 0;
223 		nfit_sa.Address = hintaddr;
224 		nfit_sa.Length = hintsize;
225 		nfit_sa.MemoryMapping = EFI_MD_ATTR_WB | EFI_MD_ATTR_WT |
226 		    EFI_MD_ATTR_UC;
227 
228 		error = nvdimm_spa_init(spa_mapping, &nfit_sa, spa_type);
229 		if (error != 0) {
230 			nvdimm_spa_fini(spa_mapping);
231 			free(spa_mapping, M_NVDIMM_E820);
232 			break;
233 		}
234 
235 		SLIST_INSERT_HEAD(&sc->spas, spa_mapping, link);
236 	}
237 	return (error);
238 }
239 
240 static int
241 nvdimm_e820_remove_spas(device_t dev)
242 {
243 	struct nvdimm_e820_bus *sc;
244 	struct SPA_mapping *spa, *next;
245 
246 	sc = device_get_softc(dev);
247 
248 	SLIST_FOREACH_SAFE(spa, &sc->spas, link, next) {
249 		nvdimm_spa_fini(spa);
250 		SLIST_REMOVE_HEAD(&sc->spas, link);
251 		free(spa, M_NVDIMM_E820);
252 	}
253 	return (0);
254 }
255 
256 static void
257 nvdimm_e820_identify(driver_t *driver, device_t parent)
258 {
259 	device_t child;
260 	caddr_t kmdp;
261 
262 	if (resource_disabled(driver->name, 0))
263 		return;
264 	/* Just create a single instance of the fake bus. */
265 	if (device_find_child(parent, driver->name, -1) != NULL)
266 		return;
267 
268 	kmdp = preload_search_by_type("elf kernel");
269 	if (kmdp == NULL)
270 		kmdp = preload_search_by_type("elf64 kernel");
271 	smapbase = (const void *)preload_search_info(kmdp,
272 	    MODINFO_METADATA | MODINFOMD_SMAP);
273 
274 	/* Only supports BIOS SMAP for now. */
275 	if (smapbase == NULL)
276 		return;
277 
278 	child = BUS_ADD_CHILD(parent, 0, driver->name, -1);
279 	if (child == NULL)
280 		device_printf(parent, "add %s child failed\n", driver->name);
281 }
282 
283 static int
284 nvdimm_e820_probe(device_t dev)
285 {
286 	/*
287 	 * nexus panics if a child doesn't have ivars.  BUS_ADD_CHILD uses
288 	 * nexus_add_child, which creates fuckin ivars.  but sometimes if you
289 	 * unload and reload nvdimm_e820, the device node stays but the ivars
290 	 * are deleted??? avoid trivial panic but this is a kludge.
291 	 */
292 	if (device_get_ivars(dev) == NULL)
293 		return (ENXIO);
294 
295 	device_quiet(dev);
296 	device_set_desc(dev, "Legacy e820 NVDIMM root device");
297 	return (BUS_PROBE_NOWILDCARD);
298 }
299 
300 static int
301 nvdimm_e820_attach(device_t dev)
302 {
303 	const struct bios_smap *smapend, *smap;
304 	uint32_t smapsize;
305 	unsigned nregions;
306 	int error;
307 
308 	smapsize = *((const uint32_t *)smapbase - 1);
309 	smapend = (const void *)((const char *)smapbase + smapsize);
310 
311 	for (nregions = 0, smap = smapbase; smap < smapend; smap++) {
312 		if (smap->type != SMAP_TYPE_PRAM || smap->length == 0)
313 			continue;
314 		pram_segments[nregions].start = smap->base;
315 		pram_segments[nregions].size = smap->length;
316 
317 		device_printf(dev, "Found PRAM 0x%jx +0x%jx\n",
318 		    (uintmax_t)smap->base, (uintmax_t)smap->length);
319 
320 		nregions++;
321 	}
322 
323 	if (nregions == 0) {
324 		device_printf(dev, "No e820 PRAM regions detected\n");
325 		return (ENXIO);
326 	}
327 	pram_nreg = nregions;
328 
329 	error = nvdimm_e820_create_spas(dev);
330 	return (error);
331 }
332 
333 static int
334 nvdimm_e820_detach(device_t dev)
335 {
336 	int error;
337 
338 	error = nvdimm_e820_remove_spas(dev);
339 	return (error);
340 }
341 
342 static device_method_t nvdimm_e820_methods[] = {
343 	DEVMETHOD(device_identify, nvdimm_e820_identify),
344 	DEVMETHOD(device_probe, nvdimm_e820_probe),
345 	DEVMETHOD(device_attach, nvdimm_e820_attach),
346 	DEVMETHOD(device_detach, nvdimm_e820_detach),
347 	DEVMETHOD_END
348 };
349 
350 static driver_t	nvdimm_e820_driver = {
351 	NVDIMM_E820,
352 	nvdimm_e820_methods,
353 	sizeof(struct nvdimm_e820_bus),
354 };
355 
356 static int
357 nvdimm_e820_chainevh(struct module *m, int e, void *arg __unused)
358 {
359 	devclass_t dc;
360 	device_t dev, parent;
361 	int i, error, maxunit;
362 
363 	switch (e) {
364 	case MOD_UNLOAD:
365 		dc = devclass_find(nvdimm_e820_driver.name);
366 		maxunit = devclass_get_maxunit(dc);
367 		for (i = 0; i < maxunit; i++) {
368 			dev = devclass_get_device(dc, i);
369 			if (dev == NULL)
370 				continue;
371 			parent = device_get_parent(dev);
372 			if (parent == NULL) {
373 				/* Not sure how this would happen. */
374 				continue;
375 			}
376 			error = device_delete_child(parent, dev);
377 			if (error != 0)
378 				return (error);
379 		}
380 		break;
381 	default:
382 		/* Prevent compiler warning about unhandled cases. */
383 		break;
384 	}
385 	return (0);
386 }
387 
388 DRIVER_MODULE(nvdimm_e820, nexus, nvdimm_e820_driver,
389     nvdimm_e820_chainevh, NULL);
390