1 /*- 2 * Copyright (C) 2010-2014 Nathan Whitehorn 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include <sys/cdefs.h> 27 #include <stand.h> 28 #include <sys/param.h> 29 #include <sys/boot.h> 30 #include <fdt_platform.h> 31 32 #include <machine/cpufunc.h> 33 #include <bootstrap.h> 34 #include "host_syscall.h" 35 #include "kboot.h" 36 #include "stand.h" 37 #include <smbios.h> 38 39 struct arch_switch archsw; 40 extern void *_end; 41 42 int kboot_getdev(void **vdev, const char *devspec, const char **path); 43 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len); 44 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len); 45 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len); 46 int kboot_autoload(void); 47 static void kboot_zfs_probe(void); 48 49 extern int command_fdt_internal(int argc, char *argv[]); 50 51 #define PA_INVAL (vm_offset_t)-1 52 static vm_offset_t pa_start = PA_INVAL; 53 static vm_offset_t padding; 54 static vm_offset_t offset; 55 56 static uint64_t commit_limit; 57 static uint64_t committed_as; 58 static uint64_t mem_avail; 59 60 static void 61 memory_limits(void) 62 { 63 int fd; 64 char buf[128]; 65 66 /* 67 * To properly size the slabs, we need to find how much memory we can 68 * commit to using. commit_limit is the max, while commited_as is the 69 * current total. We can use these later to allocate the largetst amount 70 * of memory possible so we can support larger ram disks than we could 71 * by using fixed segment sizes. We also grab the memory available so 72 * we don't use more than 49% of that. 73 */ 74 fd = open("host:/proc/meminfo", O_RDONLY); 75 if (fd != -1) { 76 while (fgetstr(buf, sizeof(buf), fd) > 0) { 77 if (strncmp(buf, "MemAvailable:", 13) == 0) { 78 mem_avail = strtoll(buf + 13, NULL, 0); 79 mem_avail <<= 10; /* Units are kB */ 80 } else if (strncmp(buf, "CommitLimit:", 12) == 0) { 81 commit_limit = strtoll(buf + 13, NULL, 0); 82 commit_limit <<= 10; /* Units are kB */ 83 } else if (strncmp(buf, "Committed_AS:", 13) == 0) { 84 committed_as = strtoll(buf + 14, NULL, 0); 85 committed_as <<= 10; /* Units are kB */ 86 } 87 } 88 } else { 89 /* Otherwise, on FreeBSD host, for testing 32GB host: */ 90 mem_avail = 31ul << 30; /* 31GB free */ 91 commit_limit = mem_avail * 9 / 10; /* 90% comittable */ 92 committed_as = 20ul << 20; /* 20MB used */ 93 } 94 printf("Commit limit: %lld Committed bytes %lld Available %lld\n", 95 (long long)commit_limit, (long long)committed_as, 96 (long long)mem_avail); 97 close(fd); 98 } 99 100 /* 101 * NB: getdev should likely be identical to this most places, except maybe 102 * we should move to storing the length of the platform devdesc. 103 */ 104 int 105 kboot_getdev(void **vdev, const char *devspec, const char **path) 106 { 107 struct devdesc **dev = (struct devdesc **)vdev; 108 int rv; 109 110 /* 111 * If it looks like this is just a path and no device, go with the 112 * current device. 113 */ 114 if (devspec == NULL || strchr(devspec, ':') == NULL) { 115 if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) && 116 (path != NULL)) 117 *path = devspec; 118 return (rv); 119 } 120 121 /* 122 * Try to parse the device name off the beginning of the devspec 123 */ 124 return (devparse(dev, devspec, path)); 125 } 126 127 static int 128 parse_args(int argc, const char **argv) 129 { 130 int howto = 0; 131 132 /* 133 * When run as init, sometimes argv[0] is a EFI-ESP path, other times 134 * it's the name of the init program, and sometimes it's a placeholder 135 * string, so we exclude it here. For the other args, look for DOS-like 136 * and Unix-like absolte paths and exclude parsing it if we find that, 137 * otherwise parse it as a command arg (so looking for '-X', 'foo' or 138 * 'foo=bar'). This is a little different than EFI where it argv[0] 139 * often times is the first argument passed in. There are cases when 140 * linux-booting via EFI that we have the EFI path we used to run 141 * bootXXX.efi as the arguments to init, so we need to exclude the paths 142 * there as well. 143 */ 144 for (int i = 1; i < argc; i++) { 145 if (argv[i][0] != '\\' && argv[i][0] != '/') { 146 howto |= boot_parse_arg(argv[i]); 147 } 148 } 149 150 return (howto); 151 } 152 153 static vm_offset_t rsdp; 154 155 static vm_offset_t 156 kboot_rsdp_from_efi(void) 157 { 158 char buffer[512 + 1]; 159 char *walker, *ep; 160 161 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 162 return (0); /* Not an EFI system */ 163 ep = buffer + strlen(buffer); 164 walker = buffer; 165 while (walker < ep) { 166 if (strncmp("ACPI20=", walker, 7) == 0) 167 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 168 if (strncmp("ACPI=", walker, 5) == 0) 169 return((vm_offset_t)strtoull(walker + 5, NULL, 0)); 170 walker += strcspn(walker, "\n") + 1; 171 } 172 return (0); 173 } 174 175 static void 176 find_acpi(void) 177 { 178 rsdp = kboot_rsdp_from_efi(); 179 #if 0 /* maybe for amd64 */ 180 if (rsdp == 0) 181 rsdp = find_rsdp_arch(); 182 #endif 183 } 184 185 vm_offset_t 186 acpi_rsdp(void) 187 { 188 return (rsdp); 189 } 190 191 bool 192 has_acpi(void) 193 { 194 return rsdp != 0; 195 } 196 197 /* 198 * SMBIOS support. We map the physical memory address we get into a VA in this 199 * address space with mmap with 64k pages. Once we're done, we cleanup any 200 * mappings we made. 201 */ 202 203 #define MAX_MAP 10 204 #define PAGE (64<<10) 205 206 static struct mapping 207 { 208 uintptr_t pa; 209 caddr_t va; 210 } map[MAX_MAP]; 211 static int smbios_fd; 212 static int nmap; 213 214 caddr_t ptov(uintptr_t pa) 215 { 216 caddr_t va; 217 uintptr_t pa2; 218 struct mapping *m = map; 219 220 pa2 = rounddown(pa, PAGE); 221 for (int i = 0; i < nmap; i++, m++) { 222 if (m->pa == pa2) { 223 return (m->va + pa - m->pa); 224 } 225 } 226 if (nmap == MAX_MAP) 227 panic("Too many maps for smbios"); 228 229 /* 230 * host_mmap returns small negative numbers on errors, can't return an 231 * error here, so we have to panic. The Linux wrapper will set errno 232 * based on this and then return HOST_MAP_FAILED. Since we're calling 233 * the raw system call we have to do that ourselves. 234 */ 235 va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2); 236 if ((intptr_t)va < 0 && (intptr_t)va >= -511) 237 panic("smbios mmap offset %#jx failed", (uintmax_t)pa2); 238 m = &map[nmap++]; 239 m->pa = pa2; 240 m->va = va; 241 return (m->va + pa - m->pa); 242 } 243 244 static void 245 smbios_cleanup(void) 246 { 247 for (int i = 0; i < nmap; i++) { 248 host_munmap(map[i].va, PAGE); 249 } 250 } 251 252 static vm_offset_t 253 kboot_find_smbios(void) 254 { 255 char buffer[512 + 1]; 256 char *walker, *ep; 257 258 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 259 return (0); /* Not an EFI system */ 260 ep = buffer + strlen(buffer); 261 walker = buffer; 262 while (walker <= ep) { 263 if (strncmp("SMBIOS3=", walker, 8) == 0) 264 return((vm_offset_t)strtoull(walker + 8, NULL, 0)); 265 if (strncmp("SMBIOS=", walker, 7) == 0) 266 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 267 walker += strcspn(walker, "\n") + 1; 268 } 269 return (0); 270 } 271 272 static void 273 find_smbios(void) 274 { 275 char buf[40]; 276 uintptr_t pa; 277 caddr_t va; 278 279 pa = kboot_find_smbios(); 280 printf("SMBIOS at %#jx\n", (uintmax_t)pa); 281 if (pa == 0) 282 return; 283 284 snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa); 285 setenv("hint.smbios.0.mem", buf, 1); 286 smbios_fd = host_open("/dev/mem", O_RDONLY, 0); 287 if (smbios_fd < 0) { 288 printf("Can't open /dev/mem to read smbios\n"); 289 return; 290 } 291 va = ptov(pa); 292 printf("Start of smbios at pa %p va %p\n", (void *)pa, va); 293 smbios_detect(va); 294 smbios_cleanup(); 295 host_close(smbios_fd); 296 } 297 298 static void 299 parse_file(const char *fn) 300 { 301 struct stat st; 302 int fd = -1; 303 char *env = NULL; 304 305 if (stat(fn, &st) != 0) 306 return; 307 fd = open(fn, O_RDONLY); 308 if (fd == -1) 309 return; 310 env = malloc(st.st_size + 1); 311 if (env == NULL) 312 goto out; 313 if (read(fd, env, st.st_size) != st.st_size) 314 goto out; 315 env[st.st_size] = '\0'; 316 boot_parse_cmdline(env); 317 out: 318 free(env); 319 close(fd); 320 } 321 322 323 int 324 main(int argc, const char **argv) 325 { 326 void *heapbase; 327 const size_t heapsize = 64*1024*1024; 328 const char *bootdev; 329 330 archsw.arch_getdev = kboot_getdev; 331 archsw.arch_copyin = kboot_copyin; 332 archsw.arch_copyout = kboot_copyout; 333 archsw.arch_readin = kboot_readin; 334 archsw.arch_autoload = kboot_autoload; 335 archsw.arch_zfs_probe = kboot_zfs_probe; 336 337 /* Give us a sane world if we're running as init */ 338 do_init(); 339 340 /* 341 * Setup the heap, 64MB is minimum for ZFS booting 342 */ 343 heapbase = host_getmem(heapsize); 344 setheap(heapbase, heapbase + heapsize); 345 346 /* Parse the command line args -- ignoring for now the console selection */ 347 parse_args(argc, argv); 348 349 parse_file("host:/kboot.conf"); 350 351 /* 352 * Set up console. 353 */ 354 cons_probe(); 355 356 /* Initialize all the devices */ 357 devinit(); 358 359 bootdev = getenv("bootdev"); 360 if (bootdev == NULL) 361 bootdev = hostdisk_gen_probe(); 362 hostfs_root = getenv("hostfs_root"); 363 if (hostfs_root == NULL) 364 hostfs_root = "/"; 365 #if defined(LOADER_ZFS_SUPPORT) 366 if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) { 367 /* 368 * Pseudo device that says go find the right ZFS pool. This will be 369 * the first pool that we find that passes the sanity checks (eg looks 370 * like it might be vbootable) and sets currdev to the right thing based 371 * on active BEs, etc 372 */ 373 if (hostdisk_zfs_find_default()) 374 bootdev = getenv("currdev"); 375 } 376 #endif 377 if (bootdev != NULL) { 378 /* 379 * Otherwise, honor what's on the command line. If we've been 380 * given a specific ZFS partition, then we'll honor it w/o BE 381 * processing that would otherwise pick a different snapshot to 382 * boot than the default one in the pool. 383 */ 384 set_currdev(bootdev); 385 } else { 386 panic("Bootdev is still NULL"); 387 } 388 389 printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root); 390 391 printf("\n%s", bootprog_info); 392 393 setenv("LINES", "24", 1); 394 395 memory_limits(); 396 enumerate_memory_arch(); 397 398 /* 399 * Find acpi, if it exists 400 */ 401 find_acpi(); 402 403 find_smbios(); 404 405 interact(); /* doesn't return */ 406 407 return (0); 408 } 409 410 void 411 exit(int code) 412 { 413 host_exit(code); 414 __unreachable(); 415 } 416 417 void 418 delay(int usecs) 419 { 420 struct host_timeval tvi, tv; 421 uint64_t ti, t; 422 host_gettimeofday(&tvi, NULL); 423 ti = tvi.tv_sec*1000000 + tvi.tv_usec; 424 do { 425 host_gettimeofday(&tv, NULL); 426 t = tv.tv_sec*1000000 + tv.tv_usec; 427 } while (t < ti + usecs); 428 } 429 430 time_t 431 getsecs(void) 432 { 433 struct host_timeval tv; 434 host_gettimeofday(&tv, NULL); 435 return (tv.tv_sec); 436 } 437 438 time_t 439 time(time_t *tloc) 440 { 441 time_t rv; 442 443 rv = getsecs(); 444 if (tloc != NULL) 445 *tloc = rv; 446 447 return (rv); 448 } 449 450 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX]; 451 int nkexec_segments = 0; 452 453 #define SEGALIGN (1ul<<20) 454 455 static ssize_t 456 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) 457 { 458 int i = 0; 459 const size_t segsize = 64*1024*1024; 460 size_t sz, amt, l; 461 462 if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX) 463 panic("Tried to load too many kexec segments"); 464 for (i = 0; i < nkexec_segments; i++) { 465 if (dest >= (vm_offset_t)loaded_segments[i].mem && 466 dest < (vm_offset_t)loaded_segments[i].mem + 467 loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */ 468 goto out; 469 } 470 471 sz = segsize; 472 if (nkexec_segments == 0) { 473 /* how much space does this segment have */ 474 sz = space_avail(dest); 475 /* Clip to 45% of available memory (need 2 copies) */ 476 sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN)); 477 printf("limit to 45%% of mem_avail %zd\n", sz); 478 /* And only use 95% of what we can allocate */ 479 sz = MIN(sz, 480 rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN)); 481 printf("Allocating %zd MB for first segment\n", sz >> 20); 482 } 483 484 loaded_segments[nkexec_segments].buf = host_getmem(sz); 485 loaded_segments[nkexec_segments].bufsz = sz; 486 loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN); 487 loaded_segments[nkexec_segments].memsz = 0; 488 489 i = nkexec_segments; 490 nkexec_segments++; 491 492 out: 493 /* 494 * Keep track of the highest amount used in a segment 495 */ 496 amt = dest - (vm_offset_t)loaded_segments[i].mem; 497 l = min(len,loaded_segments[i].bufsz - amt); 498 *buf = loaded_segments[i].buf + amt; 499 if (amt + l > loaded_segments[i].memsz) 500 loaded_segments[i].memsz = amt + l; 501 return (l); 502 } 503 504 ssize_t 505 kboot_copyin(const void *src, vm_offset_t dest, const size_t len) 506 { 507 ssize_t segsize, remainder; 508 void *destbuf; 509 510 if (pa_start == PA_INVAL) { 511 pa_start = kboot_get_phys_load_segment(); 512 // padding = 2 << 20; /* XXX amd64: revisit this when we make it work */ 513 padding = 0; 514 offset = dest; 515 get_phys_buffer(pa_start, len, &destbuf); 516 } 517 518 remainder = len; 519 do { 520 segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf); 521 bcopy(src, destbuf, segsize); 522 remainder -= segsize; 523 src += segsize; 524 dest += segsize; 525 } while (remainder > 0); 526 527 return (len); 528 } 529 530 ssize_t 531 kboot_copyout(vm_offset_t src, void *dest, const size_t len) 532 { 533 ssize_t segsize, remainder; 534 void *srcbuf; 535 536 remainder = len; 537 do { 538 segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf); 539 bcopy(srcbuf, dest, segsize); 540 remainder -= segsize; 541 src += segsize; 542 dest += segsize; 543 } while (remainder > 0); 544 545 return (len); 546 } 547 548 ssize_t 549 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 550 { 551 void *buf; 552 size_t resid, chunk, get; 553 ssize_t got; 554 vm_offset_t p; 555 556 p = dest; 557 558 chunk = min(PAGE_SIZE, len); 559 buf = malloc(chunk); 560 if (buf == NULL) { 561 printf("kboot_readin: buf malloc failed\n"); 562 return (0); 563 } 564 565 for (resid = len; resid > 0; resid -= got, p += got) { 566 get = min(chunk, resid); 567 got = VECTX_READ(fd, buf, get); 568 if (got <= 0) { 569 if (got < 0) 570 printf("kboot_readin: read failed\n"); 571 break; 572 } 573 574 kboot_copyin(buf, p, got); 575 } 576 577 free (buf); 578 return (len - resid); 579 } 580 581 int 582 kboot_autoload(void) 583 { 584 585 return (0); 586 } 587 588 void 589 kboot_kseg_get(int *nseg, void **ptr) 590 { 591 printf("kseg_get: %d segments\n", nkexec_segments); 592 printf("VA SZ PA MEMSZ\n"); 593 printf("---------------- -------- ---------------- -----\n"); 594 for (int a = 0; a < nkexec_segments; a++) { 595 /* 596 * Truncate each segment to just what we've used in the segment, 597 * rounded up to the next page. 598 */ 599 loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE); 600 loaded_segments[a].bufsz = loaded_segments[a].memsz; 601 printf("%016jx %08jx %016jx %08jx\n", 602 (uintmax_t)loaded_segments[a].buf, 603 (uintmax_t)loaded_segments[a].bufsz, 604 (uintmax_t)loaded_segments[a].mem, 605 (uintmax_t)loaded_segments[a].memsz); 606 } 607 608 *nseg = nkexec_segments; 609 *ptr = &loaded_segments[0]; 610 } 611 612 static void 613 kboot_zfs_probe(void) 614 { 615 #if defined(LOADER_ZFS_SUPPORT) 616 /* 617 * Open all the disks and partitions we can find to see if there are ZFS 618 * pools on them. 619 */ 620 hostdisk_zfs_probe(); 621 #endif 622 } 623 624 /* 625 * Since proper fdt command handling function is defined in fdt_loader_cmd.c, 626 * and declaring it as extern is in contradiction with COMMAND_SET() macro 627 * (which uses static pointer), we're defining wrapper function, which 628 * calls the proper fdt handling routine. 629 */ 630 static int 631 command_fdt(int argc, char *argv[]) 632 { 633 634 return (command_fdt_internal(argc, argv)); 635 } 636 637 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); 638 639