1 /*-
2 * Copyright (C) 2010-2014 Nathan Whitehorn
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include <stand.h>
27 #include <sys/param.h>
28 #include <sys/boot.h>
29 #include <fdt_platform.h>
30
31 #include <machine/cpufunc.h>
32 #include <bootstrap.h>
33 #include "host_syscall.h"
34 #include "kboot.h"
35 #include "stand.h"
36 #include <smbios.h>
37
38 struct arch_switch archsw;
39 extern void *_end;
40
41 int kboot_getdev(void **vdev, const char *devspec, const char **path);
42 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
43 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
44 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
45 int kboot_autoload(void);
46 static void kboot_zfs_probe(void);
47
48 extern int command_fdt_internal(int argc, char *argv[]);
49
50 #define PA_INVAL (vm_offset_t)-1
51 static vm_offset_t pa_start = PA_INVAL;
52 static vm_offset_t padding;
53 static vm_offset_t offset;
54
55 static uint64_t commit_limit;
56 static uint64_t committed_as;
57 static uint64_t mem_avail;
58
59 static void
memory_limits(void)60 memory_limits(void)
61 {
62 int fd;
63 char buf[128];
64
65 /*
66 * To properly size the slabs, we need to find how much memory we can
67 * commit to using. commit_limit is the max, while commited_as is the
68 * current total. We can use these later to allocate the largetst amount
69 * of memory possible so we can support larger ram disks than we could
70 * by using fixed segment sizes. We also grab the memory available so
71 * we don't use more than 49% of that.
72 */
73 fd = open("host:/proc/meminfo", O_RDONLY);
74 if (fd != -1) {
75 while (fgetstr(buf, sizeof(buf), fd) > 0) {
76 if (strncmp(buf, "MemAvailable:", 13) == 0) {
77 mem_avail = strtoll(buf + 13, NULL, 0);
78 mem_avail <<= 10; /* Units are kB */
79 } else if (strncmp(buf, "CommitLimit:", 12) == 0) {
80 commit_limit = strtoll(buf + 13, NULL, 0);
81 commit_limit <<= 10; /* Units are kB */
82 } else if (strncmp(buf, "Committed_AS:", 13) == 0) {
83 committed_as = strtoll(buf + 14, NULL, 0);
84 committed_as <<= 10; /* Units are kB */
85 }
86 }
87 } else {
88 /* Otherwise, on FreeBSD host, for testing 32GB host: */
89 mem_avail = 31ul << 30; /* 31GB free */
90 commit_limit = mem_avail * 9 / 10; /* 90% comittable */
91 committed_as = 20ul << 20; /* 20MB used */
92 }
93 printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
94 (long long)commit_limit, (long long)committed_as,
95 (long long)mem_avail);
96 close(fd);
97 }
98
99 /*
100 * NB: getdev should likely be identical to this most places, except maybe
101 * we should move to storing the length of the platform devdesc.
102 */
103 int
kboot_getdev(void ** vdev,const char * devspec,const char ** path)104 kboot_getdev(void **vdev, const char *devspec, const char **path)
105 {
106 struct devdesc **dev = (struct devdesc **)vdev;
107 int rv;
108
109 /*
110 * If it looks like this is just a path and no device, go with the
111 * current device.
112 */
113 if (devspec == NULL || strchr(devspec, ':') == NULL) {
114 if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
115 (path != NULL))
116 *path = devspec;
117 return (rv);
118 }
119
120 /*
121 * Try to parse the device name off the beginning of the devspec
122 */
123 return (devparse(dev, devspec, path));
124 }
125
126 static int
parse_args(int argc,const char ** argv)127 parse_args(int argc, const char **argv)
128 {
129 int howto = 0;
130
131 /*
132 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
133 * it's the name of the init program, and sometimes it's a placeholder
134 * string, so we exclude it here. For the other args, look for DOS-like
135 * and Unix-like absolte paths and exclude parsing it if we find that,
136 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
137 * 'foo=bar'). This is a little different than EFI where it argv[0]
138 * often times is the first argument passed in. There are cases when
139 * linux-booting via EFI that we have the EFI path we used to run
140 * bootXXX.efi as the arguments to init, so we need to exclude the paths
141 * there as well.
142 */
143 for (int i = 1; i < argc; i++) {
144 if (argv[i][0] != '\\' && argv[i][0] != '/') {
145 howto |= boot_parse_arg(argv[i]);
146 }
147 }
148
149 return (howto);
150 }
151
152 static vm_offset_t rsdp;
153
154 static vm_offset_t
kboot_rsdp_from_efi(void)155 kboot_rsdp_from_efi(void)
156 {
157 char buffer[512 + 1];
158 char *walker, *ep;
159
160 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
161 return (0); /* Not an EFI system */
162 ep = buffer + strlen(buffer);
163 walker = buffer;
164 while (walker < ep) {
165 if (strncmp("ACPI20=", walker, 7) == 0)
166 return((vm_offset_t)strtoull(walker + 7, NULL, 0));
167 if (strncmp("ACPI=", walker, 5) == 0)
168 return((vm_offset_t)strtoull(walker + 5, NULL, 0));
169 walker += strcspn(walker, "\n") + 1;
170 }
171 return (0);
172 }
173
174 static void
find_acpi(void)175 find_acpi(void)
176 {
177 rsdp = kboot_rsdp_from_efi();
178 #if 0 /* maybe for amd64 */
179 if (rsdp == 0)
180 rsdp = find_rsdp_arch();
181 #endif
182 }
183
184 vm_offset_t
acpi_rsdp(void)185 acpi_rsdp(void)
186 {
187 return (rsdp);
188 }
189
190 bool
has_acpi(void)191 has_acpi(void)
192 {
193 return rsdp != 0;
194 }
195
196 /*
197 * SMBIOS support. We map the physical memory address we get into a VA in this
198 * address space with mmap with 64k pages. Once we're done, we cleanup any
199 * mappings we made.
200 */
201
202 #define MAX_MAP 10
203 #define PAGE (64<<10)
204
205 static struct mapping
206 {
207 uintptr_t pa;
208 caddr_t va;
209 } map[MAX_MAP];
210 static int smbios_fd;
211 static int nmap;
212
ptov(uintptr_t pa)213 caddr_t ptov(uintptr_t pa)
214 {
215 caddr_t va;
216 uintptr_t pa2;
217 struct mapping *m = map;
218
219 pa2 = rounddown(pa, PAGE);
220 for (int i = 0; i < nmap; i++, m++) {
221 if (m->pa == pa2) {
222 return (m->va + pa - m->pa);
223 }
224 }
225 if (nmap == MAX_MAP)
226 panic("Too many maps for smbios");
227
228 /*
229 * host_mmap returns small negative numbers on errors, can't return an
230 * error here, so we have to panic. The Linux wrapper will set errno
231 * based on this and then return HOST_MAP_FAILED. Since we're calling
232 * the raw system call we have to do that ourselves.
233 */
234 va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
235 if (is_linux_error((long)va))
236 panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
237 m = &map[nmap++];
238 m->pa = pa2;
239 m->va = va;
240 return (m->va + pa - m->pa);
241 }
242
243 static void
smbios_cleanup(void)244 smbios_cleanup(void)
245 {
246 for (int i = 0; i < nmap; i++) {
247 host_munmap(map[i].va, PAGE);
248 }
249 }
250
251 static vm_offset_t
kboot_find_smbios(void)252 kboot_find_smbios(void)
253 {
254 char buffer[512 + 1];
255 char *walker, *ep;
256
257 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
258 return (0); /* Not an EFI system */
259 ep = buffer + strlen(buffer);
260 walker = buffer;
261 while (walker <= ep) {
262 if (strncmp("SMBIOS3=", walker, 8) == 0)
263 return((vm_offset_t)strtoull(walker + 8, NULL, 0));
264 if (strncmp("SMBIOS=", walker, 7) == 0)
265 return((vm_offset_t)strtoull(walker + 7, NULL, 0));
266 walker += strcspn(walker, "\n") + 1;
267 }
268 return (0);
269 }
270
271 static void
find_smbios(void)272 find_smbios(void)
273 {
274 char buf[40];
275 uintptr_t pa;
276 caddr_t va;
277
278 pa = kboot_find_smbios();
279 printf("SMBIOS at %#jx\n", (uintmax_t)pa);
280 if (pa == 0)
281 return;
282
283 snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
284 setenv("hint.smbios.0.mem", buf, 1);
285 smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
286 if (smbios_fd < 0) {
287 printf("Can't open /dev/mem to read smbios\n");
288 return;
289 }
290 va = ptov(pa);
291 printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
292 smbios_detect(va);
293 smbios_cleanup();
294 host_close(smbios_fd);
295 }
296
297 static void
parse_file(const char * fn)298 parse_file(const char *fn)
299 {
300 struct stat st;
301 int fd = -1;
302 char *env = NULL;
303
304 if (stat(fn, &st) != 0)
305 return;
306 fd = open(fn, O_RDONLY);
307 if (fd == -1)
308 return;
309 env = malloc(st.st_size + 1);
310 if (env == NULL)
311 goto out;
312 if (read(fd, env, st.st_size) != st.st_size)
313 goto out;
314 env[st.st_size] = '\0';
315 boot_parse_cmdline(env);
316 out:
317 free(env);
318 close(fd);
319 }
320
321
322 int
main(int argc,const char ** argv)323 main(int argc, const char **argv)
324 {
325 void *heapbase;
326 const size_t heapsize = 64*1024*1024;
327 const char *bootdev;
328
329 archsw.arch_getdev = kboot_getdev;
330 archsw.arch_copyin = kboot_copyin;
331 archsw.arch_copyout = kboot_copyout;
332 archsw.arch_readin = kboot_readin;
333 archsw.arch_autoload = kboot_autoload;
334 archsw.arch_zfs_probe = kboot_zfs_probe;
335
336 /* Give us a sane world if we're running as init */
337 do_init();
338
339 /*
340 * Setup the heap, 64MB is minimum for ZFS booting
341 */
342 heapbase = host_getmem(heapsize);
343 setheap(heapbase, heapbase + heapsize);
344
345 /*
346 * Set up console so we get error messages.
347 */
348 cons_probe();
349
350 /*
351 * Find acpi and smbios, if they exists. This allows command line and
352 * later scripts to override if necessary.
353 */
354 find_acpi();
355 find_smbios();
356
357 /* Parse the command line args -- ignoring for now the console selection */
358 parse_args(argc, argv);
359
360 hostfs_root = getenv("hostfs_root");
361 if (hostfs_root == NULL)
362 hostfs_root = "/";
363
364 /* Initialize all the devices */
365 devinit();
366
367 /* Figure out where we're booting from */
368 bootdev = getenv("bootdev");
369 if (bootdev == NULL)
370 bootdev = hostdisk_gen_probe();
371 #if defined(LOADER_ZFS_SUPPORT)
372 if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
373 /*
374 * Pseudo device that says go find the right ZFS pool. This will be
375 * the first pool that we find that passes the sanity checks (eg looks
376 * like it might be vbootable) and sets currdev to the right thing based
377 * on active BEs, etc
378 */
379 if (hostdisk_zfs_find_default())
380 bootdev = getenv("currdev");
381 }
382 #endif
383 if (bootdev != NULL) {
384 /*
385 * Otherwise, honor what's on the command line. If we've been
386 * given a specific ZFS partition, then we'll honor it w/o BE
387 * processing that would otherwise pick a different snapshot to
388 * boot than the default one in the pool.
389 */
390 set_currdev(bootdev);
391 } else {
392 panic("Bootdev is still NULL");
393 }
394
395 printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
396
397 printf("\n%s", bootprog_info);
398
399 setenv("LINES", "24", 1);
400
401 memory_limits();
402 enumerate_memory_arch();
403
404 interact(); /* doesn't return */
405
406 return (0);
407 }
408
409 void
exit(int code)410 exit(int code)
411 {
412 host_exit(code);
413 __unreachable();
414 }
415
416 void
delay(int usecs)417 delay(int usecs)
418 {
419 struct host_timeval tvi, tv;
420 uint64_t ti, t;
421 host_gettimeofday(&tvi, NULL);
422 ti = tvi.tv_sec*1000000 + tvi.tv_usec;
423 do {
424 host_gettimeofday(&tv, NULL);
425 t = tv.tv_sec*1000000 + tv.tv_usec;
426 } while (t < ti + usecs);
427 }
428
429 time_t
getsecs(void)430 getsecs(void)
431 {
432 struct host_timeval tv;
433 host_gettimeofday(&tv, NULL);
434 return (tv.tv_sec);
435 }
436
437 time_t
time(time_t * tloc)438 time(time_t *tloc)
439 {
440 time_t rv;
441
442 rv = getsecs();
443 if (tloc != NULL)
444 *tloc = rv;
445
446 return (rv);
447 }
448
449 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
450 int nkexec_segments = 0;
451
452 #define SEGALIGN (1ul<<20)
453
454 static ssize_t
get_phys_buffer(vm_offset_t dest,const size_t len,void ** buf)455 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
456 {
457 int i = 0;
458 const size_t segsize = 64*1024*1024;
459 size_t sz, amt, l;
460
461 if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
462 panic("Tried to load too many kexec segments");
463 for (i = 0; i < nkexec_segments; i++) {
464 if (dest >= (vm_offset_t)loaded_segments[i].mem &&
465 dest < (vm_offset_t)loaded_segments[i].mem +
466 loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
467 goto out;
468 }
469
470 sz = segsize;
471 if (nkexec_segments == 0) {
472 /* how much space does this segment have */
473 sz = space_avail(dest);
474 /* Clip to 45% of available memory (need 2 copies) */
475 sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
476 printf("limit to 45%% of mem_avail %zd\n", sz);
477 /* And only use 95% of what we can allocate */
478 sz = MIN(sz,
479 rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
480 printf("Allocating %zd MB for first segment\n", sz >> 20);
481 }
482
483 loaded_segments[nkexec_segments].buf = host_getmem(sz);
484 loaded_segments[nkexec_segments].bufsz = sz;
485 loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
486 loaded_segments[nkexec_segments].memsz = 0;
487
488 i = nkexec_segments;
489 nkexec_segments++;
490
491 out:
492 /*
493 * Keep track of the highest amount used in a segment
494 */
495 amt = dest - (vm_offset_t)loaded_segments[i].mem;
496 l = min(len,loaded_segments[i].bufsz - amt);
497 *buf = loaded_segments[i].buf + amt;
498 if (amt + l > loaded_segments[i].memsz)
499 loaded_segments[i].memsz = amt + l;
500 return (l);
501 }
502
503 ssize_t
kboot_copyin(const void * src,vm_offset_t dest,const size_t len)504 kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
505 {
506 ssize_t segsize, remainder;
507 void *destbuf;
508
509 if (pa_start == PA_INVAL) {
510 pa_start = kboot_get_phys_load_segment();
511 // padding = 2 << 20; /* XXX amd64: revisit this when we make it work */
512 padding = 0;
513 offset = dest;
514 get_phys_buffer(pa_start, len, &destbuf);
515 }
516
517 remainder = len;
518 do {
519 segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf);
520 bcopy(src, destbuf, segsize);
521 remainder -= segsize;
522 src += segsize;
523 dest += segsize;
524 } while (remainder > 0);
525
526 return (len);
527 }
528
529 ssize_t
kboot_copyout(vm_offset_t src,void * dest,const size_t len)530 kboot_copyout(vm_offset_t src, void *dest, const size_t len)
531 {
532 ssize_t segsize, remainder;
533 void *srcbuf;
534
535 remainder = len;
536 do {
537 segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf);
538 bcopy(srcbuf, dest, segsize);
539 remainder -= segsize;
540 src += segsize;
541 dest += segsize;
542 } while (remainder > 0);
543
544 return (len);
545 }
546
547 ssize_t
kboot_readin(readin_handle_t fd,vm_offset_t dest,const size_t len)548 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
549 {
550 void *buf;
551 size_t resid, chunk, get;
552 ssize_t got;
553 vm_offset_t p;
554
555 p = dest;
556
557 chunk = min(PAGE_SIZE, len);
558 buf = malloc(chunk);
559 if (buf == NULL) {
560 printf("kboot_readin: buf malloc failed\n");
561 return (0);
562 }
563
564 for (resid = len; resid > 0; resid -= got, p += got) {
565 get = min(chunk, resid);
566 got = VECTX_READ(fd, buf, get);
567 if (got <= 0) {
568 if (got < 0)
569 printf("kboot_readin: read failed\n");
570 break;
571 }
572
573 kboot_copyin(buf, p, got);
574 }
575
576 free (buf);
577 return (len - resid);
578 }
579
580 int
kboot_autoload(void)581 kboot_autoload(void)
582 {
583
584 return (0);
585 }
586
587 void
kboot_kseg_get(int * nseg,void ** ptr)588 kboot_kseg_get(int *nseg, void **ptr)
589 {
590 printf("kseg_get: %d segments\n", nkexec_segments);
591 printf("VA SZ PA MEMSZ\n");
592 printf("---------------- -------- ---------------- -----\n");
593 for (int a = 0; a < nkexec_segments; a++) {
594 /*
595 * Truncate each segment to just what we've used in the segment,
596 * rounded up to the next page.
597 */
598 loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
599 loaded_segments[a].bufsz = loaded_segments[a].memsz;
600 printf("%016jx %08jx %016jx %08jx\n",
601 (uintmax_t)loaded_segments[a].buf,
602 (uintmax_t)loaded_segments[a].bufsz,
603 (uintmax_t)loaded_segments[a].mem,
604 (uintmax_t)loaded_segments[a].memsz);
605 }
606
607 *nseg = nkexec_segments;
608 *ptr = &loaded_segments[0];
609 }
610
611 static void
kboot_zfs_probe(void)612 kboot_zfs_probe(void)
613 {
614 #if defined(LOADER_ZFS_SUPPORT)
615 /*
616 * Open all the disks and partitions we can find to see if there are ZFS
617 * pools on them.
618 */
619 hostdisk_zfs_probe();
620 #endif
621 }
622
623 /*
624 * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
625 * and declaring it as extern is in contradiction with COMMAND_SET() macro
626 * (which uses static pointer), we're defining wrapper function, which
627 * calls the proper fdt handling routine.
628 */
629 static int
command_fdt(int argc,char * argv[])630 command_fdt(int argc, char *argv[])
631 {
632
633 return (command_fdt_internal(argc, argv));
634 }
635
636 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
637
638