xref: /freebsd/stand/kboot/kboot/main.c (revision 1edb7116)
1 /*-
2  * Copyright (C) 2010-2014 Nathan Whitehorn
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 #include <stand.h>
28 #include <sys/param.h>
29 #include <sys/boot.h>
30 #include <fdt_platform.h>
31 
32 #include <machine/cpufunc.h>
33 #include <bootstrap.h>
34 #include "host_syscall.h"
35 #include "kboot.h"
36 #include "stand.h"
37 #include <smbios.h>
38 
39 struct arch_switch	archsw;
40 extern void *_end;
41 
42 int kboot_getdev(void **vdev, const char *devspec, const char **path);
43 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
44 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
45 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
46 int kboot_autoload(void);
47 static void kboot_zfs_probe(void);
48 
49 extern int command_fdt_internal(int argc, char *argv[]);
50 
51 #define PA_INVAL (vm_offset_t)-1
52 static vm_offset_t pa_start = PA_INVAL;
53 static vm_offset_t padding;
54 static vm_offset_t offset;
55 
56 static uint64_t commit_limit;
57 static uint64_t committed_as;
58 static uint64_t mem_avail;
59 
60 static void
61 memory_limits(void)
62 {
63 	int fd;
64 	char buf[128];
65 
66 	/*
67 	 * To properly size the slabs, we need to find how much memory we can
68 	 * commit to using. commit_limit is the max, while commited_as is the
69 	 * current total. We can use these later to allocate the largetst amount
70 	 * of memory possible so we can support larger ram disks than we could
71 	 * by using fixed segment sizes. We also grab the memory available so
72 	 * we don't use more than 49% of that.
73 	 */
74 	fd = open("host:/proc/meminfo", O_RDONLY);
75 	if (fd != -1) {
76 		while (fgetstr(buf, sizeof(buf), fd) > 0) {
77 			if (strncmp(buf, "MemAvailable:", 13) == 0) {
78 				mem_avail = strtoll(buf + 13, NULL, 0);
79 				mem_avail <<= 10; /* Units are kB */
80 			} else if (strncmp(buf, "CommitLimit:", 12) == 0) {
81 				commit_limit = strtoll(buf + 13, NULL, 0);
82 				commit_limit <<= 10; /* Units are kB */
83 			} else if (strncmp(buf, "Committed_AS:", 13) == 0) {
84 				committed_as = strtoll(buf + 14, NULL, 0);
85 				committed_as <<= 10; /* Units are kB */
86 			}
87 		}
88 	} else {
89 		/* Otherwise, on FreeBSD host, for testing 32GB host: */
90 		mem_avail = 31ul << 30;			/* 31GB free */
91 		commit_limit = mem_avail * 9 / 10;	/* 90% comittable */
92 		committed_as = 20ul << 20;		/* 20MB used */
93 	}
94 	printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
95 	    (long long)commit_limit, (long long)committed_as,
96 	    (long long)mem_avail);
97 	close(fd);
98 }
99 
100 /*
101  * NB: getdev should likely be identical to this most places, except maybe
102  * we should move to storing the length of the platform devdesc.
103  */
104 int
105 kboot_getdev(void **vdev, const char *devspec, const char **path)
106 {
107 	struct devdesc **dev = (struct devdesc **)vdev;
108 	int				rv;
109 
110 	/*
111 	 * If it looks like this is just a path and no device, go with the
112 	 * current device.
113 	 */
114 	if (devspec == NULL || strchr(devspec, ':') == NULL) {
115 		if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
116 		    (path != NULL))
117 			*path = devspec;
118 		return (rv);
119 	}
120 
121 	/*
122 	 * Try to parse the device name off the beginning of the devspec
123 	 */
124 	return (devparse(dev, devspec, path));
125 }
126 
127 static int
128 parse_args(int argc, const char **argv)
129 {
130 	int howto = 0;
131 
132 	/*
133 	 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
134 	 * it's the name of the init program, and sometimes it's a placeholder
135 	 * string, so we exclude it here. For the other args, look for DOS-like
136 	 * and Unix-like absolte paths and exclude parsing it if we find that,
137 	 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
138 	 * 'foo=bar'). This is a little different than EFI where it argv[0]
139 	 * often times is the first argument passed in. There are cases when
140 	 * linux-booting via EFI that we have the EFI path we used to run
141 	 * bootXXX.efi as the arguments to init, so we need to exclude the paths
142 	 * there as well.
143 	 */
144 	for (int i = 1; i < argc; i++) {
145 		if (argv[i][0] != '\\' && argv[i][0] != '/') {
146 			howto |= boot_parse_arg(argv[i]);
147 		}
148 	}
149 
150 	return (howto);
151 }
152 
153 static vm_offset_t rsdp;
154 
155 static vm_offset_t
156 kboot_rsdp_from_efi(void)
157 {
158 	char buffer[512 + 1];
159 	char *walker, *ep;
160 
161 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
162 		return (0);	/* Not an EFI system */
163 	ep = buffer + strlen(buffer);
164 	walker = buffer;
165 	while (walker < ep) {
166 		if (strncmp("ACPI20=", walker, 7) == 0)
167 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
168 		if (strncmp("ACPI=", walker, 5) == 0)
169 			return((vm_offset_t)strtoull(walker + 5, NULL, 0));
170 		walker += strcspn(walker, "\n") + 1;
171 	}
172 	return (0);
173 }
174 
175 static void
176 find_acpi(void)
177 {
178 	rsdp = kboot_rsdp_from_efi();
179 #if 0	/* maybe for amd64 */
180 	if (rsdp == 0)
181 		rsdp = find_rsdp_arch();
182 #endif
183 }
184 
185 vm_offset_t
186 acpi_rsdp(void)
187 {
188 	return (rsdp);
189 }
190 
191 bool
192 has_acpi(void)
193 {
194 	return rsdp != 0;
195 }
196 
197 /*
198  * SMBIOS support. We map the physical memory address we get into a VA in this
199  * address space with mmap with 64k pages. Once we're done, we cleanup any
200  * mappings we made.
201  */
202 
203 #define MAX_MAP	10
204 #define PAGE	(64<<10)
205 
206 static struct mapping
207 {
208 	uintptr_t pa;
209 	caddr_t va;
210 } map[MAX_MAP];
211 static int smbios_fd;
212 static int nmap;
213 
214 caddr_t ptov(uintptr_t pa)
215 {
216 	caddr_t va;
217 	uintptr_t pa2;
218 	struct mapping *m = map;
219 
220 	pa2 = rounddown(pa, PAGE);
221 	for (int i = 0; i < nmap; i++, m++) {
222 		if (m->pa == pa2) {
223 			return (m->va + pa - m->pa);
224 		}
225 	}
226 	if (nmap == MAX_MAP)
227 		panic("Too many maps for smbios");
228 
229 	/*
230 	 * host_mmap returns small negative numbers on errors, can't return an
231 	 * error here, so we have to panic. The Linux wrapper will set errno
232 	 * based on this and then return HOST_MAP_FAILED. Since we're calling
233 	 * the raw system call we have to do that ourselves.
234 	 */
235 	va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
236 	if ((intptr_t)va < 0 && (intptr_t)va >= -511)
237 		panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
238 	m = &map[nmap++];
239 	m->pa = pa2;
240 	m->va = va;
241 	return (m->va + pa - m->pa);
242 }
243 
244 static void
245 smbios_cleanup(void)
246 {
247 	for (int i = 0; i < nmap; i++) {
248 		host_munmap(map[i].va, PAGE);
249 	}
250 }
251 
252 static vm_offset_t
253 kboot_find_smbios(void)
254 {
255 	char buffer[512 + 1];
256 	char *walker, *ep;
257 
258 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
259 		return (0);	/* Not an EFI system */
260 	ep = buffer + strlen(buffer);
261 	walker = buffer;
262 	while (walker <= ep) {
263 		if (strncmp("SMBIOS3=", walker, 8) == 0)
264 			return((vm_offset_t)strtoull(walker + 8, NULL, 0));
265 		if (strncmp("SMBIOS=", walker, 7) == 0)
266 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
267 		walker += strcspn(walker, "\n") + 1;
268 	}
269 	return (0);
270 }
271 
272 static void
273 find_smbios(void)
274 {
275 	char buf[40];
276 	uintptr_t pa;
277 	caddr_t va;
278 
279 	pa = kboot_find_smbios();
280 	printf("SMBIOS at %#jx\n", (uintmax_t)pa);
281 	if (pa == 0)
282 		return;
283 
284 	snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
285 	setenv("hint.smbios.0.mem", buf, 1);
286 	smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
287 	if (smbios_fd < 0) {
288 		printf("Can't open /dev/mem to read smbios\n");
289 		return;
290 	}
291 	va = ptov(pa);
292 	printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
293 	smbios_detect(va);
294 	smbios_cleanup();
295 	host_close(smbios_fd);
296 }
297 
298 static void
299 parse_file(const char *fn)
300 {
301 	struct stat st;
302 	int fd = -1;
303 	char *env = NULL;
304 
305 	if (stat(fn, &st) != 0)
306 		return;
307 	fd = open(fn, O_RDONLY);
308 	if (fd == -1)
309 		return;
310 	env = malloc(st.st_size + 1);
311 	if (env == NULL)
312 		goto out;
313 	if (read(fd, env, st.st_size) != st.st_size)
314 		goto out;
315 	env[st.st_size] = '\0';
316 	boot_parse_cmdline(env);
317 out:
318 	free(env);
319 	close(fd);
320 }
321 
322 
323 int
324 main(int argc, const char **argv)
325 {
326 	void *heapbase;
327 	const size_t heapsize = 64*1024*1024;
328 	const char *bootdev;
329 
330 	archsw.arch_getdev = kboot_getdev;
331 	archsw.arch_copyin = kboot_copyin;
332 	archsw.arch_copyout = kboot_copyout;
333 	archsw.arch_readin = kboot_readin;
334 	archsw.arch_autoload = kboot_autoload;
335 	archsw.arch_zfs_probe = kboot_zfs_probe;
336 
337 	/* Give us a sane world if we're running as init */
338 	do_init();
339 
340 	/*
341 	 * Setup the heap, 64MB is minimum for ZFS booting
342 	 */
343 	heapbase = host_getmem(heapsize);
344 	setheap(heapbase, heapbase + heapsize);
345 
346 	/* Parse the command line args -- ignoring for now the console selection */
347 	parse_args(argc, argv);
348 
349 	parse_file("host:/kboot.conf");
350 
351 	/*
352 	 * Set up console.
353 	 */
354 	cons_probe();
355 
356 	/* Initialize all the devices */
357 	devinit();
358 
359 	bootdev = getenv("bootdev");
360 	if (bootdev == NULL)
361 		bootdev = hostdisk_gen_probe();
362 	hostfs_root = getenv("hostfs_root");
363 	if (hostfs_root == NULL)
364 		hostfs_root = "/";
365 #if defined(LOADER_ZFS_SUPPORT)
366 	if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
367 		/*
368 		 * Pseudo device that says go find the right ZFS pool. This will be
369 		 * the first pool that we find that passes the sanity checks (eg looks
370 		 * like it might be vbootable) and sets currdev to the right thing based
371 		 * on active BEs, etc
372 		 */
373 		if (hostdisk_zfs_find_default())
374 			bootdev = getenv("currdev");
375 	}
376 #endif
377 	if (bootdev != NULL) {
378 		/*
379 		 * Otherwise, honor what's on the command line. If we've been
380 		 * given a specific ZFS partition, then we'll honor it w/o BE
381 		 * processing that would otherwise pick a different snapshot to
382 		 * boot than the default one in the pool.
383 		 */
384 		set_currdev(bootdev);
385 	} else {
386 		panic("Bootdev is still NULL");
387 	}
388 
389 	printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
390 
391 	printf("\n%s", bootprog_info);
392 
393 	setenv("LINES", "24", 1);
394 
395 	memory_limits();
396 	enumerate_memory_arch();
397 
398 	/*
399 	 * Find acpi, if it exists
400 	 */
401 	find_acpi();
402 
403 	find_smbios();
404 
405 	interact();			/* doesn't return */
406 
407 	return (0);
408 }
409 
410 void
411 exit(int code)
412 {
413 	host_exit(code);
414 	__unreachable();
415 }
416 
417 void
418 delay(int usecs)
419 {
420 	struct host_timeval tvi, tv;
421 	uint64_t ti, t;
422 	host_gettimeofday(&tvi, NULL);
423 	ti = tvi.tv_sec*1000000 + tvi.tv_usec;
424 	do {
425 		host_gettimeofday(&tv, NULL);
426 		t = tv.tv_sec*1000000 + tv.tv_usec;
427 	} while (t < ti + usecs);
428 }
429 
430 time_t
431 getsecs(void)
432 {
433 	struct host_timeval tv;
434 	host_gettimeofday(&tv, NULL);
435 	return (tv.tv_sec);
436 }
437 
438 time_t
439 time(time_t *tloc)
440 {
441 	time_t rv;
442 
443 	rv = getsecs();
444 	if (tloc != NULL)
445 		*tloc = rv;
446 
447 	return (rv);
448 }
449 
450 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
451 int nkexec_segments = 0;
452 
453 #define SEGALIGN (1ul<<20)
454 
455 static ssize_t
456 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
457 {
458 	int i = 0;
459 	const size_t segsize = 64*1024*1024;
460 	size_t sz, amt, l;
461 
462 	if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
463 		panic("Tried to load too many kexec segments");
464 	for (i = 0; i < nkexec_segments; i++) {
465 		if (dest >= (vm_offset_t)loaded_segments[i].mem &&
466 		    dest < (vm_offset_t)loaded_segments[i].mem +
467 		    loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
468 			goto out;
469 	}
470 
471 	sz = segsize;
472 	if (nkexec_segments == 0) {
473 		/* how much space does this segment have */
474 		sz = space_avail(dest);
475 		/* Clip to 45% of available memory (need 2 copies) */
476 		sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
477 		printf("limit to 45%% of mem_avail %zd\n", sz);
478 		/* And only use 95% of what we can allocate */
479 		sz = MIN(sz,
480 		    rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
481 		printf("Allocating %zd MB for first segment\n", sz >> 20);
482 	}
483 
484 	loaded_segments[nkexec_segments].buf = host_getmem(sz);
485 	loaded_segments[nkexec_segments].bufsz = sz;
486 	loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
487 	loaded_segments[nkexec_segments].memsz = 0;
488 
489 	i = nkexec_segments;
490 	nkexec_segments++;
491 
492 out:
493 	/*
494 	 * Keep track of the highest amount used in a segment
495 	 */
496 	amt = dest - (vm_offset_t)loaded_segments[i].mem;
497 	l = min(len,loaded_segments[i].bufsz - amt);
498 	*buf = loaded_segments[i].buf + amt;
499 	if (amt + l > loaded_segments[i].memsz)
500 		loaded_segments[i].memsz = amt + l;
501 	return (l);
502 }
503 
504 ssize_t
505 kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
506 {
507 	ssize_t segsize, remainder;
508 	void *destbuf;
509 
510 	if (pa_start == PA_INVAL) {
511 		pa_start = kboot_get_phys_load_segment();
512 //		padding = 2 << 20; /* XXX amd64: revisit this when we make it work */
513 		padding = 0;
514 		offset = dest;
515 		get_phys_buffer(pa_start, len, &destbuf);
516 	}
517 
518 	remainder = len;
519 	do {
520 		segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf);
521 		bcopy(src, destbuf, segsize);
522 		remainder -= segsize;
523 		src += segsize;
524 		dest += segsize;
525 	} while (remainder > 0);
526 
527 	return (len);
528 }
529 
530 ssize_t
531 kboot_copyout(vm_offset_t src, void *dest, const size_t len)
532 {
533 	ssize_t segsize, remainder;
534 	void *srcbuf;
535 
536 	remainder = len;
537 	do {
538 		segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf);
539 		bcopy(srcbuf, dest, segsize);
540 		remainder -= segsize;
541 		src += segsize;
542 		dest += segsize;
543 	} while (remainder > 0);
544 
545 	return (len);
546 }
547 
548 ssize_t
549 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
550 {
551 	void            *buf;
552 	size_t          resid, chunk, get;
553 	ssize_t         got;
554 	vm_offset_t     p;
555 
556 	p = dest;
557 
558 	chunk = min(PAGE_SIZE, len);
559 	buf = malloc(chunk);
560 	if (buf == NULL) {
561 		printf("kboot_readin: buf malloc failed\n");
562 		return (0);
563 	}
564 
565 	for (resid = len; resid > 0; resid -= got, p += got) {
566 		get = min(chunk, resid);
567 		got = VECTX_READ(fd, buf, get);
568 		if (got <= 0) {
569 			if (got < 0)
570 				printf("kboot_readin: read failed\n");
571 			break;
572 		}
573 
574 		kboot_copyin(buf, p, got);
575 	}
576 
577 	free (buf);
578 	return (len - resid);
579 }
580 
581 int
582 kboot_autoload(void)
583 {
584 
585 	return (0);
586 }
587 
588 void
589 kboot_kseg_get(int *nseg, void **ptr)
590 {
591 	printf("kseg_get: %d segments\n", nkexec_segments);
592 	printf("VA               SZ       PA               MEMSZ\n");
593 	printf("---------------- -------- ---------------- -----\n");
594 	for (int a = 0; a < nkexec_segments; a++) {
595 		/*
596 		 * Truncate each segment to just what we've used in the segment,
597 		 * rounded up to the next page.
598 		 */
599 		loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
600 		loaded_segments[a].bufsz = loaded_segments[a].memsz;
601 		printf("%016jx %08jx %016jx %08jx\n",
602 			(uintmax_t)loaded_segments[a].buf,
603 			(uintmax_t)loaded_segments[a].bufsz,
604 			(uintmax_t)loaded_segments[a].mem,
605 			(uintmax_t)loaded_segments[a].memsz);
606 	}
607 
608 	*nseg = nkexec_segments;
609 	*ptr = &loaded_segments[0];
610 }
611 
612 static void
613 kboot_zfs_probe(void)
614 {
615 #if defined(LOADER_ZFS_SUPPORT)
616 	/*
617 	 * Open all the disks and partitions we can find to see if there are ZFS
618 	 * pools on them.
619 	 */
620 	hostdisk_zfs_probe();
621 #endif
622 }
623 
624 /*
625  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
626  * and declaring it as extern is in contradiction with COMMAND_SET() macro
627  * (which uses static pointer), we're defining wrapper function, which
628  * calls the proper fdt handling routine.
629  */
630 static int
631 command_fdt(int argc, char *argv[])
632 {
633 
634 	return (command_fdt_internal(argc, argv));
635 }
636 
637 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
638 
639