xref: /freebsd/stand/kboot/kboot/main.c (revision 5f757f3f)
1 /*-
2  * Copyright (C) 2010-2014 Nathan Whitehorn
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include <stand.h>
27 #include <sys/param.h>
28 #include <sys/boot.h>
29 #include <fdt_platform.h>
30 
31 #include <machine/cpufunc.h>
32 #include <bootstrap.h>
33 #include "host_syscall.h"
34 #include "kboot.h"
35 #include "stand.h"
36 #include <smbios.h>
37 
38 struct arch_switch	archsw;
39 extern void *_end;
40 
41 int kboot_getdev(void **vdev, const char *devspec, const char **path);
42 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
43 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
44 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
45 int kboot_autoload(void);
46 static void kboot_zfs_probe(void);
47 
48 extern int command_fdt_internal(int argc, char *argv[]);
49 
50 #define PA_INVAL (vm_offset_t)-1
51 static vm_offset_t pa_start = PA_INVAL;
52 static vm_offset_t padding;
53 static vm_offset_t offset;
54 
55 static uint64_t commit_limit;
56 static uint64_t committed_as;
57 static uint64_t mem_avail;
58 
59 static void
60 memory_limits(void)
61 {
62 	int fd;
63 	char buf[128];
64 
65 	/*
66 	 * To properly size the slabs, we need to find how much memory we can
67 	 * commit to using. commit_limit is the max, while commited_as is the
68 	 * current total. We can use these later to allocate the largetst amount
69 	 * of memory possible so we can support larger ram disks than we could
70 	 * by using fixed segment sizes. We also grab the memory available so
71 	 * we don't use more than 49% of that.
72 	 */
73 	fd = open("host:/proc/meminfo", O_RDONLY);
74 	if (fd != -1) {
75 		while (fgetstr(buf, sizeof(buf), fd) > 0) {
76 			if (strncmp(buf, "MemAvailable:", 13) == 0) {
77 				mem_avail = strtoll(buf + 13, NULL, 0);
78 				mem_avail <<= 10; /* Units are kB */
79 			} else if (strncmp(buf, "CommitLimit:", 12) == 0) {
80 				commit_limit = strtoll(buf + 13, NULL, 0);
81 				commit_limit <<= 10; /* Units are kB */
82 			} else if (strncmp(buf, "Committed_AS:", 13) == 0) {
83 				committed_as = strtoll(buf + 14, NULL, 0);
84 				committed_as <<= 10; /* Units are kB */
85 			}
86 		}
87 	} else {
88 		/* Otherwise, on FreeBSD host, for testing 32GB host: */
89 		mem_avail = 31ul << 30;			/* 31GB free */
90 		commit_limit = mem_avail * 9 / 10;	/* 90% comittable */
91 		committed_as = 20ul << 20;		/* 20MB used */
92 	}
93 	printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
94 	    (long long)commit_limit, (long long)committed_as,
95 	    (long long)mem_avail);
96 	close(fd);
97 }
98 
99 /*
100  * NB: getdev should likely be identical to this most places, except maybe
101  * we should move to storing the length of the platform devdesc.
102  */
103 int
104 kboot_getdev(void **vdev, const char *devspec, const char **path)
105 {
106 	struct devdesc **dev = (struct devdesc **)vdev;
107 	int				rv;
108 
109 	/*
110 	 * If it looks like this is just a path and no device, go with the
111 	 * current device.
112 	 */
113 	if (devspec == NULL || strchr(devspec, ':') == NULL) {
114 		if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
115 		    (path != NULL))
116 			*path = devspec;
117 		return (rv);
118 	}
119 
120 	/*
121 	 * Try to parse the device name off the beginning of the devspec
122 	 */
123 	return (devparse(dev, devspec, path));
124 }
125 
126 static int
127 parse_args(int argc, const char **argv)
128 {
129 	int howto = 0;
130 
131 	/*
132 	 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
133 	 * it's the name of the init program, and sometimes it's a placeholder
134 	 * string, so we exclude it here. For the other args, look for DOS-like
135 	 * and Unix-like absolte paths and exclude parsing it if we find that,
136 	 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
137 	 * 'foo=bar'). This is a little different than EFI where it argv[0]
138 	 * often times is the first argument passed in. There are cases when
139 	 * linux-booting via EFI that we have the EFI path we used to run
140 	 * bootXXX.efi as the arguments to init, so we need to exclude the paths
141 	 * there as well.
142 	 */
143 	for (int i = 1; i < argc; i++) {
144 		if (argv[i][0] != '\\' && argv[i][0] != '/') {
145 			howto |= boot_parse_arg(argv[i]);
146 		}
147 	}
148 
149 	return (howto);
150 }
151 
152 static vm_offset_t rsdp;
153 
154 static vm_offset_t
155 kboot_rsdp_from_efi(void)
156 {
157 	char buffer[512 + 1];
158 	char *walker, *ep;
159 
160 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
161 		return (0);	/* Not an EFI system */
162 	ep = buffer + strlen(buffer);
163 	walker = buffer;
164 	while (walker < ep) {
165 		if (strncmp("ACPI20=", walker, 7) == 0)
166 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
167 		if (strncmp("ACPI=", walker, 5) == 0)
168 			return((vm_offset_t)strtoull(walker + 5, NULL, 0));
169 		walker += strcspn(walker, "\n") + 1;
170 	}
171 	return (0);
172 }
173 
174 static void
175 find_acpi(void)
176 {
177 	rsdp = kboot_rsdp_from_efi();
178 #if 0	/* maybe for amd64 */
179 	if (rsdp == 0)
180 		rsdp = find_rsdp_arch();
181 #endif
182 }
183 
184 vm_offset_t
185 acpi_rsdp(void)
186 {
187 	return (rsdp);
188 }
189 
190 bool
191 has_acpi(void)
192 {
193 	return rsdp != 0;
194 }
195 
196 /*
197  * SMBIOS support. We map the physical memory address we get into a VA in this
198  * address space with mmap with 64k pages. Once we're done, we cleanup any
199  * mappings we made.
200  */
201 
202 #define MAX_MAP	10
203 #define PAGE	(64<<10)
204 
205 static struct mapping
206 {
207 	uintptr_t pa;
208 	caddr_t va;
209 } map[MAX_MAP];
210 static int smbios_fd;
211 static int nmap;
212 
213 caddr_t ptov(uintptr_t pa)
214 {
215 	caddr_t va;
216 	uintptr_t pa2;
217 	struct mapping *m = map;
218 
219 	pa2 = rounddown(pa, PAGE);
220 	for (int i = 0; i < nmap; i++, m++) {
221 		if (m->pa == pa2) {
222 			return (m->va + pa - m->pa);
223 		}
224 	}
225 	if (nmap == MAX_MAP)
226 		panic("Too many maps for smbios");
227 
228 	/*
229 	 * host_mmap returns small negative numbers on errors, can't return an
230 	 * error here, so we have to panic. The Linux wrapper will set errno
231 	 * based on this and then return HOST_MAP_FAILED. Since we're calling
232 	 * the raw system call we have to do that ourselves.
233 	 */
234 	va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
235 	if (is_linux_error((long)va))
236 		panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
237 	m = &map[nmap++];
238 	m->pa = pa2;
239 	m->va = va;
240 	return (m->va + pa - m->pa);
241 }
242 
243 static void
244 smbios_cleanup(void)
245 {
246 	for (int i = 0; i < nmap; i++) {
247 		host_munmap(map[i].va, PAGE);
248 	}
249 }
250 
251 static vm_offset_t
252 kboot_find_smbios(void)
253 {
254 	char buffer[512 + 1];
255 	char *walker, *ep;
256 
257 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
258 		return (0);	/* Not an EFI system */
259 	ep = buffer + strlen(buffer);
260 	walker = buffer;
261 	while (walker <= ep) {
262 		if (strncmp("SMBIOS3=", walker, 8) == 0)
263 			return((vm_offset_t)strtoull(walker + 8, NULL, 0));
264 		if (strncmp("SMBIOS=", walker, 7) == 0)
265 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
266 		walker += strcspn(walker, "\n") + 1;
267 	}
268 	return (0);
269 }
270 
271 static void
272 find_smbios(void)
273 {
274 	char buf[40];
275 	uintptr_t pa;
276 	caddr_t va;
277 
278 	pa = kboot_find_smbios();
279 	printf("SMBIOS at %#jx\n", (uintmax_t)pa);
280 	if (pa == 0)
281 		return;
282 
283 	snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
284 	setenv("hint.smbios.0.mem", buf, 1);
285 	smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
286 	if (smbios_fd < 0) {
287 		printf("Can't open /dev/mem to read smbios\n");
288 		return;
289 	}
290 	va = ptov(pa);
291 	printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
292 	smbios_detect(va);
293 	smbios_cleanup();
294 	host_close(smbios_fd);
295 }
296 
297 static void
298 parse_file(const char *fn)
299 {
300 	struct stat st;
301 	int fd = -1;
302 	char *env = NULL;
303 
304 	if (stat(fn, &st) != 0)
305 		return;
306 	fd = open(fn, O_RDONLY);
307 	if (fd == -1)
308 		return;
309 	env = malloc(st.st_size + 1);
310 	if (env == NULL)
311 		goto out;
312 	if (read(fd, env, st.st_size) != st.st_size)
313 		goto out;
314 	env[st.st_size] = '\0';
315 	boot_parse_cmdline(env);
316 out:
317 	free(env);
318 	close(fd);
319 }
320 
321 
322 int
323 main(int argc, const char **argv)
324 {
325 	void *heapbase;
326 	const size_t heapsize = 64*1024*1024;
327 	const char *bootdev;
328 
329 	archsw.arch_getdev = kboot_getdev;
330 	archsw.arch_copyin = kboot_copyin;
331 	archsw.arch_copyout = kboot_copyout;
332 	archsw.arch_readin = kboot_readin;
333 	archsw.arch_autoload = kboot_autoload;
334 	archsw.arch_zfs_probe = kboot_zfs_probe;
335 
336 	/* Give us a sane world if we're running as init */
337 	do_init();
338 
339 	/*
340 	 * Setup the heap, 64MB is minimum for ZFS booting
341 	 */
342 	heapbase = host_getmem(heapsize);
343 	setheap(heapbase, heapbase + heapsize);
344 
345 	/* Parse the command line args -- ignoring for now the console selection */
346 	parse_args(argc, argv);
347 
348 	parse_file("host:/kboot.conf");
349 
350 	/*
351 	 * Set up console.
352 	 */
353 	cons_probe();
354 
355 	/* Initialize all the devices */
356 	devinit();
357 
358 	bootdev = getenv("bootdev");
359 	if (bootdev == NULL)
360 		bootdev = hostdisk_gen_probe();
361 	hostfs_root = getenv("hostfs_root");
362 	if (hostfs_root == NULL)
363 		hostfs_root = "/";
364 #if defined(LOADER_ZFS_SUPPORT)
365 	if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
366 		/*
367 		 * Pseudo device that says go find the right ZFS pool. This will be
368 		 * the first pool that we find that passes the sanity checks (eg looks
369 		 * like it might be vbootable) and sets currdev to the right thing based
370 		 * on active BEs, etc
371 		 */
372 		if (hostdisk_zfs_find_default())
373 			bootdev = getenv("currdev");
374 	}
375 #endif
376 	if (bootdev != NULL) {
377 		/*
378 		 * Otherwise, honor what's on the command line. If we've been
379 		 * given a specific ZFS partition, then we'll honor it w/o BE
380 		 * processing that would otherwise pick a different snapshot to
381 		 * boot than the default one in the pool.
382 		 */
383 		set_currdev(bootdev);
384 	} else {
385 		panic("Bootdev is still NULL");
386 	}
387 
388 	printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
389 
390 	printf("\n%s", bootprog_info);
391 
392 	setenv("LINES", "24", 1);
393 
394 	memory_limits();
395 	enumerate_memory_arch();
396 
397 	/*
398 	 * Find acpi, if it exists
399 	 */
400 	find_acpi();
401 
402 	find_smbios();
403 
404 	interact();			/* doesn't return */
405 
406 	return (0);
407 }
408 
409 void
410 exit(int code)
411 {
412 	host_exit(code);
413 	__unreachable();
414 }
415 
416 void
417 delay(int usecs)
418 {
419 	struct host_timeval tvi, tv;
420 	uint64_t ti, t;
421 	host_gettimeofday(&tvi, NULL);
422 	ti = tvi.tv_sec*1000000 + tvi.tv_usec;
423 	do {
424 		host_gettimeofday(&tv, NULL);
425 		t = tv.tv_sec*1000000 + tv.tv_usec;
426 	} while (t < ti + usecs);
427 }
428 
429 time_t
430 getsecs(void)
431 {
432 	struct host_timeval tv;
433 	host_gettimeofday(&tv, NULL);
434 	return (tv.tv_sec);
435 }
436 
437 time_t
438 time(time_t *tloc)
439 {
440 	time_t rv;
441 
442 	rv = getsecs();
443 	if (tloc != NULL)
444 		*tloc = rv;
445 
446 	return (rv);
447 }
448 
449 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
450 int nkexec_segments = 0;
451 
452 #define SEGALIGN (1ul<<20)
453 
454 static ssize_t
455 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
456 {
457 	int i = 0;
458 	const size_t segsize = 64*1024*1024;
459 	size_t sz, amt, l;
460 
461 	if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
462 		panic("Tried to load too many kexec segments");
463 	for (i = 0; i < nkexec_segments; i++) {
464 		if (dest >= (vm_offset_t)loaded_segments[i].mem &&
465 		    dest < (vm_offset_t)loaded_segments[i].mem +
466 		    loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
467 			goto out;
468 	}
469 
470 	sz = segsize;
471 	if (nkexec_segments == 0) {
472 		/* how much space does this segment have */
473 		sz = space_avail(dest);
474 		/* Clip to 45% of available memory (need 2 copies) */
475 		sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
476 		printf("limit to 45%% of mem_avail %zd\n", sz);
477 		/* And only use 95% of what we can allocate */
478 		sz = MIN(sz,
479 		    rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
480 		printf("Allocating %zd MB for first segment\n", sz >> 20);
481 	}
482 
483 	loaded_segments[nkexec_segments].buf = host_getmem(sz);
484 	loaded_segments[nkexec_segments].bufsz = sz;
485 	loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
486 	loaded_segments[nkexec_segments].memsz = 0;
487 
488 	i = nkexec_segments;
489 	nkexec_segments++;
490 
491 out:
492 	/*
493 	 * Keep track of the highest amount used in a segment
494 	 */
495 	amt = dest - (vm_offset_t)loaded_segments[i].mem;
496 	l = min(len,loaded_segments[i].bufsz - amt);
497 	*buf = loaded_segments[i].buf + amt;
498 	if (amt + l > loaded_segments[i].memsz)
499 		loaded_segments[i].memsz = amt + l;
500 	return (l);
501 }
502 
503 ssize_t
504 kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
505 {
506 	ssize_t segsize, remainder;
507 	void *destbuf;
508 
509 	if (pa_start == PA_INVAL) {
510 		pa_start = kboot_get_phys_load_segment();
511 //		padding = 2 << 20; /* XXX amd64: revisit this when we make it work */
512 		padding = 0;
513 		offset = dest;
514 		get_phys_buffer(pa_start, len, &destbuf);
515 	}
516 
517 	remainder = len;
518 	do {
519 		segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf);
520 		bcopy(src, destbuf, segsize);
521 		remainder -= segsize;
522 		src += segsize;
523 		dest += segsize;
524 	} while (remainder > 0);
525 
526 	return (len);
527 }
528 
529 ssize_t
530 kboot_copyout(vm_offset_t src, void *dest, const size_t len)
531 {
532 	ssize_t segsize, remainder;
533 	void *srcbuf;
534 
535 	remainder = len;
536 	do {
537 		segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf);
538 		bcopy(srcbuf, dest, segsize);
539 		remainder -= segsize;
540 		src += segsize;
541 		dest += segsize;
542 	} while (remainder > 0);
543 
544 	return (len);
545 }
546 
547 ssize_t
548 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
549 {
550 	void            *buf;
551 	size_t          resid, chunk, get;
552 	ssize_t         got;
553 	vm_offset_t     p;
554 
555 	p = dest;
556 
557 	chunk = min(PAGE_SIZE, len);
558 	buf = malloc(chunk);
559 	if (buf == NULL) {
560 		printf("kboot_readin: buf malloc failed\n");
561 		return (0);
562 	}
563 
564 	for (resid = len; resid > 0; resid -= got, p += got) {
565 		get = min(chunk, resid);
566 		got = VECTX_READ(fd, buf, get);
567 		if (got <= 0) {
568 			if (got < 0)
569 				printf("kboot_readin: read failed\n");
570 			break;
571 		}
572 
573 		kboot_copyin(buf, p, got);
574 	}
575 
576 	free (buf);
577 	return (len - resid);
578 }
579 
580 int
581 kboot_autoload(void)
582 {
583 
584 	return (0);
585 }
586 
587 void
588 kboot_kseg_get(int *nseg, void **ptr)
589 {
590 	printf("kseg_get: %d segments\n", nkexec_segments);
591 	printf("VA               SZ       PA               MEMSZ\n");
592 	printf("---------------- -------- ---------------- -----\n");
593 	for (int a = 0; a < nkexec_segments; a++) {
594 		/*
595 		 * Truncate each segment to just what we've used in the segment,
596 		 * rounded up to the next page.
597 		 */
598 		loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
599 		loaded_segments[a].bufsz = loaded_segments[a].memsz;
600 		printf("%016jx %08jx %016jx %08jx\n",
601 			(uintmax_t)loaded_segments[a].buf,
602 			(uintmax_t)loaded_segments[a].bufsz,
603 			(uintmax_t)loaded_segments[a].mem,
604 			(uintmax_t)loaded_segments[a].memsz);
605 	}
606 
607 	*nseg = nkexec_segments;
608 	*ptr = &loaded_segments[0];
609 }
610 
611 static void
612 kboot_zfs_probe(void)
613 {
614 #if defined(LOADER_ZFS_SUPPORT)
615 	/*
616 	 * Open all the disks and partitions we can find to see if there are ZFS
617 	 * pools on them.
618 	 */
619 	hostdisk_zfs_probe();
620 #endif
621 }
622 
623 /*
624  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
625  * and declaring it as extern is in contradiction with COMMAND_SET() macro
626  * (which uses static pointer), we're defining wrapper function, which
627  * calls the proper fdt handling routine.
628  */
629 static int
630 command_fdt(int argc, char *argv[])
631 {
632 
633 	return (command_fdt_internal(argc, argv));
634 }
635 
636 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
637 
638