1 /*- 2 * Copyright (C) 2010-2014 Nathan Whitehorn 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include <stand.h> 27 #include <sys/param.h> 28 #include <sys/boot.h> 29 #include <fdt_platform.h> 30 31 #include <machine/cpufunc.h> 32 #include <bootstrap.h> 33 #include "host_syscall.h" 34 #include "kboot.h" 35 #include "stand.h" 36 #include <smbios.h> 37 38 struct arch_switch archsw; 39 extern void *_end; 40 41 int kboot_getdev(void **vdev, const char *devspec, const char **path); 42 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len); 43 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len); 44 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len); 45 int kboot_autoload(void); 46 static void kboot_zfs_probe(void); 47 48 extern int command_fdt_internal(int argc, char *argv[]); 49 50 #define PA_INVAL (vm_offset_t)-1 51 static vm_offset_t pa_start = PA_INVAL; 52 static vm_offset_t padding; 53 static vm_offset_t offset; 54 55 static uint64_t commit_limit; 56 static uint64_t committed_as; 57 static uint64_t mem_avail; 58 59 static void 60 memory_limits(void) 61 { 62 int fd; 63 char buf[128]; 64 65 /* 66 * To properly size the slabs, we need to find how much memory we can 67 * commit to using. commit_limit is the max, while commited_as is the 68 * current total. We can use these later to allocate the largetst amount 69 * of memory possible so we can support larger ram disks than we could 70 * by using fixed segment sizes. We also grab the memory available so 71 * we don't use more than 49% of that. 72 */ 73 fd = open("host:/proc/meminfo", O_RDONLY); 74 if (fd != -1) { 75 while (fgetstr(buf, sizeof(buf), fd) > 0) { 76 if (strncmp(buf, "MemAvailable:", 13) == 0) { 77 mem_avail = strtoll(buf + 13, NULL, 0); 78 mem_avail <<= 10; /* Units are kB */ 79 } else if (strncmp(buf, "CommitLimit:", 12) == 0) { 80 commit_limit = strtoll(buf + 13, NULL, 0); 81 commit_limit <<= 10; /* Units are kB */ 82 } else if (strncmp(buf, "Committed_AS:", 13) == 0) { 83 committed_as = strtoll(buf + 14, NULL, 0); 84 committed_as <<= 10; /* Units are kB */ 85 } 86 } 87 } else { 88 /* Otherwise, on FreeBSD host, for testing 32GB host: */ 89 mem_avail = 31ul << 30; /* 31GB free */ 90 commit_limit = mem_avail * 9 / 10; /* 90% comittable */ 91 committed_as = 20ul << 20; /* 20MB used */ 92 } 93 printf("Commit limit: %lld Committed bytes %lld Available %lld\n", 94 (long long)commit_limit, (long long)committed_as, 95 (long long)mem_avail); 96 close(fd); 97 } 98 99 /* 100 * NB: getdev should likely be identical to this most places, except maybe 101 * we should move to storing the length of the platform devdesc. 102 */ 103 int 104 kboot_getdev(void **vdev, const char *devspec, const char **path) 105 { 106 struct devdesc **dev = (struct devdesc **)vdev; 107 int rv; 108 109 /* 110 * If it looks like this is just a path and no device, go with the 111 * current device. 112 */ 113 if (devspec == NULL || strchr(devspec, ':') == NULL) { 114 if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) && 115 (path != NULL)) 116 *path = devspec; 117 return (rv); 118 } 119 120 /* 121 * Try to parse the device name off the beginning of the devspec 122 */ 123 return (devparse(dev, devspec, path)); 124 } 125 126 static int 127 parse_args(int argc, const char **argv) 128 { 129 int howto = 0; 130 131 /* 132 * When run as init, sometimes argv[0] is a EFI-ESP path, other times 133 * it's the name of the init program, and sometimes it's a placeholder 134 * string, so we exclude it here. For the other args, look for DOS-like 135 * and Unix-like absolte paths and exclude parsing it if we find that, 136 * otherwise parse it as a command arg (so looking for '-X', 'foo' or 137 * 'foo=bar'). This is a little different than EFI where it argv[0] 138 * often times is the first argument passed in. There are cases when 139 * linux-booting via EFI that we have the EFI path we used to run 140 * bootXXX.efi as the arguments to init, so we need to exclude the paths 141 * there as well. 142 */ 143 for (int i = 1; i < argc; i++) { 144 if (argv[i][0] != '\\' && argv[i][0] != '/') { 145 howto |= boot_parse_arg(argv[i]); 146 } 147 } 148 149 return (howto); 150 } 151 152 static vm_offset_t rsdp; 153 154 static vm_offset_t 155 kboot_rsdp_from_efi(void) 156 { 157 char buffer[512 + 1]; 158 char *walker, *ep; 159 160 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 161 return (0); /* Not an EFI system */ 162 ep = buffer + strlen(buffer); 163 walker = buffer; 164 while (walker < ep) { 165 if (strncmp("ACPI20=", walker, 7) == 0) 166 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 167 if (strncmp("ACPI=", walker, 5) == 0) 168 return((vm_offset_t)strtoull(walker + 5, NULL, 0)); 169 walker += strcspn(walker, "\n") + 1; 170 } 171 return (0); 172 } 173 174 static void 175 find_acpi(void) 176 { 177 rsdp = kboot_rsdp_from_efi(); 178 #if 0 /* maybe for amd64 */ 179 if (rsdp == 0) 180 rsdp = find_rsdp_arch(); 181 #endif 182 } 183 184 vm_offset_t 185 acpi_rsdp(void) 186 { 187 return (rsdp); 188 } 189 190 bool 191 has_acpi(void) 192 { 193 return rsdp != 0; 194 } 195 196 /* 197 * SMBIOS support. We map the physical memory address we get into a VA in this 198 * address space with mmap with 64k pages. Once we're done, we cleanup any 199 * mappings we made. 200 */ 201 202 #define MAX_MAP 10 203 #define PAGE (64<<10) 204 205 static struct mapping 206 { 207 uintptr_t pa; 208 caddr_t va; 209 } map[MAX_MAP]; 210 static int smbios_fd; 211 static int nmap; 212 213 caddr_t ptov(uintptr_t pa) 214 { 215 caddr_t va; 216 uintptr_t pa2; 217 struct mapping *m = map; 218 219 pa2 = rounddown(pa, PAGE); 220 for (int i = 0; i < nmap; i++, m++) { 221 if (m->pa == pa2) { 222 return (m->va + pa - m->pa); 223 } 224 } 225 if (nmap == MAX_MAP) 226 panic("Too many maps for smbios"); 227 228 /* 229 * host_mmap returns small negative numbers on errors, can't return an 230 * error here, so we have to panic. The Linux wrapper will set errno 231 * based on this and then return HOST_MAP_FAILED. Since we're calling 232 * the raw system call we have to do that ourselves. 233 */ 234 va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2); 235 if (is_linux_error((long)va)) 236 panic("smbios mmap offset %#jx failed", (uintmax_t)pa2); 237 m = &map[nmap++]; 238 m->pa = pa2; 239 m->va = va; 240 return (m->va + pa - m->pa); 241 } 242 243 static void 244 smbios_cleanup(void) 245 { 246 for (int i = 0; i < nmap; i++) { 247 host_munmap(map[i].va, PAGE); 248 } 249 } 250 251 static vm_offset_t 252 kboot_find_smbios(void) 253 { 254 char buffer[512 + 1]; 255 char *walker, *ep; 256 257 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 258 return (0); /* Not an EFI system */ 259 ep = buffer + strlen(buffer); 260 walker = buffer; 261 while (walker <= ep) { 262 if (strncmp("SMBIOS3=", walker, 8) == 0) 263 return((vm_offset_t)strtoull(walker + 8, NULL, 0)); 264 if (strncmp("SMBIOS=", walker, 7) == 0) 265 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 266 walker += strcspn(walker, "\n") + 1; 267 } 268 return (0); 269 } 270 271 static void 272 find_smbios(void) 273 { 274 char buf[40]; 275 uintptr_t pa; 276 caddr_t va; 277 278 pa = kboot_find_smbios(); 279 printf("SMBIOS at %#jx\n", (uintmax_t)pa); 280 if (pa == 0) 281 return; 282 283 snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa); 284 setenv("hint.smbios.0.mem", buf, 1); 285 smbios_fd = host_open("/dev/mem", O_RDONLY, 0); 286 if (smbios_fd < 0) { 287 printf("Can't open /dev/mem to read smbios\n"); 288 return; 289 } 290 va = ptov(pa); 291 printf("Start of smbios at pa %p va %p\n", (void *)pa, va); 292 smbios_detect(va); 293 smbios_cleanup(); 294 host_close(smbios_fd); 295 } 296 297 static void 298 parse_file(const char *fn) 299 { 300 struct stat st; 301 int fd = -1; 302 char *env = NULL; 303 304 if (stat(fn, &st) != 0) 305 return; 306 fd = open(fn, O_RDONLY); 307 if (fd == -1) 308 return; 309 env = malloc(st.st_size + 1); 310 if (env == NULL) 311 goto out; 312 if (read(fd, env, st.st_size) != st.st_size) 313 goto out; 314 env[st.st_size] = '\0'; 315 boot_parse_cmdline(env); 316 out: 317 free(env); 318 close(fd); 319 } 320 321 322 int 323 main(int argc, const char **argv) 324 { 325 void *heapbase; 326 const size_t heapsize = 64*1024*1024; 327 const char *bootdev; 328 329 archsw.arch_getdev = kboot_getdev; 330 archsw.arch_copyin = kboot_copyin; 331 archsw.arch_copyout = kboot_copyout; 332 archsw.arch_readin = kboot_readin; 333 archsw.arch_autoload = kboot_autoload; 334 archsw.arch_zfs_probe = kboot_zfs_probe; 335 336 /* Give us a sane world if we're running as init */ 337 do_init(); 338 339 /* 340 * Setup the heap, 64MB is minimum for ZFS booting 341 */ 342 heapbase = host_getmem(heapsize); 343 setheap(heapbase, heapbase + heapsize); 344 345 /* Parse the command line args -- ignoring for now the console selection */ 346 parse_args(argc, argv); 347 348 parse_file("host:/kboot.conf"); 349 350 /* 351 * Set up console. 352 */ 353 cons_probe(); 354 355 /* Initialize all the devices */ 356 devinit(); 357 358 bootdev = getenv("bootdev"); 359 if (bootdev == NULL) 360 bootdev = hostdisk_gen_probe(); 361 hostfs_root = getenv("hostfs_root"); 362 if (hostfs_root == NULL) 363 hostfs_root = "/"; 364 #if defined(LOADER_ZFS_SUPPORT) 365 if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) { 366 /* 367 * Pseudo device that says go find the right ZFS pool. This will be 368 * the first pool that we find that passes the sanity checks (eg looks 369 * like it might be vbootable) and sets currdev to the right thing based 370 * on active BEs, etc 371 */ 372 if (hostdisk_zfs_find_default()) 373 bootdev = getenv("currdev"); 374 } 375 #endif 376 if (bootdev != NULL) { 377 /* 378 * Otherwise, honor what's on the command line. If we've been 379 * given a specific ZFS partition, then we'll honor it w/o BE 380 * processing that would otherwise pick a different snapshot to 381 * boot than the default one in the pool. 382 */ 383 set_currdev(bootdev); 384 } else { 385 panic("Bootdev is still NULL"); 386 } 387 388 printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root); 389 390 printf("\n%s", bootprog_info); 391 392 setenv("LINES", "24", 1); 393 394 memory_limits(); 395 enumerate_memory_arch(); 396 397 /* 398 * Find acpi, if it exists 399 */ 400 find_acpi(); 401 402 find_smbios(); 403 404 interact(); /* doesn't return */ 405 406 return (0); 407 } 408 409 void 410 exit(int code) 411 { 412 host_exit(code); 413 __unreachable(); 414 } 415 416 void 417 delay(int usecs) 418 { 419 struct host_timeval tvi, tv; 420 uint64_t ti, t; 421 host_gettimeofday(&tvi, NULL); 422 ti = tvi.tv_sec*1000000 + tvi.tv_usec; 423 do { 424 host_gettimeofday(&tv, NULL); 425 t = tv.tv_sec*1000000 + tv.tv_usec; 426 } while (t < ti + usecs); 427 } 428 429 time_t 430 getsecs(void) 431 { 432 struct host_timeval tv; 433 host_gettimeofday(&tv, NULL); 434 return (tv.tv_sec); 435 } 436 437 time_t 438 time(time_t *tloc) 439 { 440 time_t rv; 441 442 rv = getsecs(); 443 if (tloc != NULL) 444 *tloc = rv; 445 446 return (rv); 447 } 448 449 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX]; 450 int nkexec_segments = 0; 451 452 #define SEGALIGN (1ul<<20) 453 454 static ssize_t 455 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) 456 { 457 int i = 0; 458 const size_t segsize = 64*1024*1024; 459 size_t sz, amt, l; 460 461 if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX) 462 panic("Tried to load too many kexec segments"); 463 for (i = 0; i < nkexec_segments; i++) { 464 if (dest >= (vm_offset_t)loaded_segments[i].mem && 465 dest < (vm_offset_t)loaded_segments[i].mem + 466 loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */ 467 goto out; 468 } 469 470 sz = segsize; 471 if (nkexec_segments == 0) { 472 /* how much space does this segment have */ 473 sz = space_avail(dest); 474 /* Clip to 45% of available memory (need 2 copies) */ 475 sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN)); 476 printf("limit to 45%% of mem_avail %zd\n", sz); 477 /* And only use 95% of what we can allocate */ 478 sz = MIN(sz, 479 rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN)); 480 printf("Allocating %zd MB for first segment\n", sz >> 20); 481 } 482 483 loaded_segments[nkexec_segments].buf = host_getmem(sz); 484 loaded_segments[nkexec_segments].bufsz = sz; 485 loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN); 486 loaded_segments[nkexec_segments].memsz = 0; 487 488 i = nkexec_segments; 489 nkexec_segments++; 490 491 out: 492 /* 493 * Keep track of the highest amount used in a segment 494 */ 495 amt = dest - (vm_offset_t)loaded_segments[i].mem; 496 l = min(len,loaded_segments[i].bufsz - amt); 497 *buf = loaded_segments[i].buf + amt; 498 if (amt + l > loaded_segments[i].memsz) 499 loaded_segments[i].memsz = amt + l; 500 return (l); 501 } 502 503 ssize_t 504 kboot_copyin(const void *src, vm_offset_t dest, const size_t len) 505 { 506 ssize_t segsize, remainder; 507 void *destbuf; 508 509 if (pa_start == PA_INVAL) { 510 pa_start = kboot_get_phys_load_segment(); 511 // padding = 2 << 20; /* XXX amd64: revisit this when we make it work */ 512 padding = 0; 513 offset = dest; 514 get_phys_buffer(pa_start, len, &destbuf); 515 } 516 517 remainder = len; 518 do { 519 segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf); 520 bcopy(src, destbuf, segsize); 521 remainder -= segsize; 522 src += segsize; 523 dest += segsize; 524 } while (remainder > 0); 525 526 return (len); 527 } 528 529 ssize_t 530 kboot_copyout(vm_offset_t src, void *dest, const size_t len) 531 { 532 ssize_t segsize, remainder; 533 void *srcbuf; 534 535 remainder = len; 536 do { 537 segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf); 538 bcopy(srcbuf, dest, segsize); 539 remainder -= segsize; 540 src += segsize; 541 dest += segsize; 542 } while (remainder > 0); 543 544 return (len); 545 } 546 547 ssize_t 548 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 549 { 550 void *buf; 551 size_t resid, chunk, get; 552 ssize_t got; 553 vm_offset_t p; 554 555 p = dest; 556 557 chunk = min(PAGE_SIZE, len); 558 buf = malloc(chunk); 559 if (buf == NULL) { 560 printf("kboot_readin: buf malloc failed\n"); 561 return (0); 562 } 563 564 for (resid = len; resid > 0; resid -= got, p += got) { 565 get = min(chunk, resid); 566 got = VECTX_READ(fd, buf, get); 567 if (got <= 0) { 568 if (got < 0) 569 printf("kboot_readin: read failed\n"); 570 break; 571 } 572 573 kboot_copyin(buf, p, got); 574 } 575 576 free (buf); 577 return (len - resid); 578 } 579 580 int 581 kboot_autoload(void) 582 { 583 584 return (0); 585 } 586 587 void 588 kboot_kseg_get(int *nseg, void **ptr) 589 { 590 printf("kseg_get: %d segments\n", nkexec_segments); 591 printf("VA SZ PA MEMSZ\n"); 592 printf("---------------- -------- ---------------- -----\n"); 593 for (int a = 0; a < nkexec_segments; a++) { 594 /* 595 * Truncate each segment to just what we've used in the segment, 596 * rounded up to the next page. 597 */ 598 loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE); 599 loaded_segments[a].bufsz = loaded_segments[a].memsz; 600 printf("%016jx %08jx %016jx %08jx\n", 601 (uintmax_t)loaded_segments[a].buf, 602 (uintmax_t)loaded_segments[a].bufsz, 603 (uintmax_t)loaded_segments[a].mem, 604 (uintmax_t)loaded_segments[a].memsz); 605 } 606 607 *nseg = nkexec_segments; 608 *ptr = &loaded_segments[0]; 609 } 610 611 static void 612 kboot_zfs_probe(void) 613 { 614 #if defined(LOADER_ZFS_SUPPORT) 615 /* 616 * Open all the disks and partitions we can find to see if there are ZFS 617 * pools on them. 618 */ 619 hostdisk_zfs_probe(); 620 #endif 621 } 622 623 /* 624 * Since proper fdt command handling function is defined in fdt_loader_cmd.c, 625 * and declaring it as extern is in contradiction with COMMAND_SET() macro 626 * (which uses static pointer), we're defining wrapper function, which 627 * calls the proper fdt handling routine. 628 */ 629 static int 630 command_fdt(int argc, char *argv[]) 631 { 632 633 return (command_fdt_internal(argc, argv)); 634 } 635 636 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); 637 638