1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.35 2019/05/16 21:16:04 claudio Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/ioctl.h> 86 #include <sys/reboot.h> 87 #include <sys/exec.h> 88 89 #include <elf.h> 90 #include <stdio.h> 91 #include <string.h> 92 #include <errno.h> 93 #include <stdlib.h> 94 #include <unistd.h> 95 #include <fcntl.h> 96 #include <err.h> 97 #include <errno.h> 98 #include <stddef.h> 99 100 #include <machine/vmmvar.h> 101 #include <machine/biosvar.h> 102 #include <machine/segments.h> 103 #include <machine/specialreg.h> 104 #include <machine/pte.h> 105 106 #include "loadfile.h" 107 #include "vmd.h" 108 109 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 110 111 union { 112 Elf32_Ehdr elf32; 113 Elf64_Ehdr elf64; 114 } hdr; 115 116 static void setsegment(struct mem_segment_descriptor *, uint32_t, 117 size_t, int, int, int, int); 118 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int); 119 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int); 120 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 121 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 122 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t); 123 static void push_gdt(void); 124 static void push_pt_32(void); 125 static void push_pt_64(void); 126 static void marc4random_buf(paddr_t, int); 127 static void mbzero(paddr_t, int); 128 static void mbcopy(void *, paddr_t, int); 129 130 extern char *__progname; 131 extern int vm_id; 132 133 /* 134 * setsegment 135 * 136 * Initializes a segment selector entry with the provided descriptor. 137 * For the purposes of the bootloader mimiced by vmd(8), we only need 138 * memory-type segment descriptor support. 139 * 140 * This function was copied from machdep.c 141 * 142 * Parameters: 143 * sd: Address of the entry to initialize 144 * base: base of the segment 145 * limit: limit of the segment 146 * type: type of the segment 147 * dpl: privilege level of the egment 148 * def32: default 16/32 bit size of the segment 149 * gran: granularity of the segment (byte/page) 150 */ 151 static void 152 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 153 int type, int dpl, int def32, int gran) 154 { 155 sd->sd_lolimit = (int)limit; 156 sd->sd_lobase = (int)base; 157 sd->sd_type = type; 158 sd->sd_dpl = dpl; 159 sd->sd_p = 1; 160 sd->sd_hilimit = (int)limit >> 16; 161 sd->sd_avl = 0; 162 sd->sd_long = 0; 163 sd->sd_def32 = def32; 164 sd->sd_gran = gran; 165 sd->sd_hibase = (int)base >> 24; 166 } 167 168 /* 169 * push_gdt 170 * 171 * Allocates and populates a page in the guest phys memory space to hold 172 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 173 * create the same GDT that a real bootloader would have created. 174 * This is loaded into the guest phys RAM space at address GDT_PAGE. 175 */ 176 static void 177 push_gdt(void) 178 { 179 uint8_t gdtpage[PAGE_SIZE]; 180 struct mem_segment_descriptor *sd; 181 182 memset(&gdtpage, 0, sizeof(gdtpage)); 183 184 sd = (struct mem_segment_descriptor *)&gdtpage; 185 186 /* 187 * Create three segment descriptors: 188 * 189 * GDT[0] : null desriptor. "Created" via memset above. 190 * GDT[1] (selector @ 0x8): Executable segment, for CS 191 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 192 */ 193 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 194 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 195 196 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 197 } 198 199 /* 200 * push_pt_32 201 * 202 * Create an identity-mapped page directory hierarchy mapping the first 203 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 204 * CPUs without unrestricted guest capability. 205 */ 206 static void 207 push_pt_32(void) 208 { 209 uint32_t ptes[1024], i; 210 211 memset(ptes, 0, sizeof(ptes)); 212 for (i = 0 ; i < 1024; i++) { 213 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 214 } 215 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 216 } 217 218 /* 219 * push_pt_64 220 * 221 * Create an identity-mapped page directory hierarchy mapping the first 222 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 223 * CPUs without unrestricted guest capability. 224 */ 225 static void 226 push_pt_64(void) 227 { 228 uint64_t ptes[512], i; 229 230 /* PDPDE0 - first 1GB */ 231 memset(ptes, 0, sizeof(ptes)); 232 ptes[0] = PG_V | PML3_PAGE; 233 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 234 235 /* PDE0 - first 1GB */ 236 memset(ptes, 0, sizeof(ptes)); 237 ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; 238 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 239 240 /* First 1GB (in 2MB pages) */ 241 memset(ptes, 0, sizeof(ptes)); 242 for (i = 0 ; i < 512; i++) { 243 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); 244 } 245 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 246 } 247 248 /* 249 * loadfile_elf 250 * 251 * Loads an ELF kernel to it's defined load address in the guest VM. 252 * The kernel is loaded to its defined start point as set in the ELF header. 253 * 254 * Parameters: 255 * fp: file of a kernel file to load 256 * vcp: the VM create parameters, holding the exact memory map 257 * (out) vrs: register state to set on init for this kernel 258 * bootdev: the optional non-default boot device 259 * howto: optional boot flags for the kernel 260 * 261 * Return values: 262 * 0 if successful 263 * various error codes returned from read(2) or loadelf functions 264 */ 265 int 266 loadfile_elf(FILE *fp, struct vm_create_params *vcp, 267 struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto, 268 unsigned int bootdevice) 269 { 270 int r, is_i386 = 0; 271 uint32_t bootargsz; 272 size_t n, stacksize; 273 u_long marks[MARK_MAX]; 274 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 275 bios_bootmac_t bm, *bootmac = NULL; 276 277 if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr)) 278 return 1; 279 280 memset(&marks, 0, sizeof(marks)); 281 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 282 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 283 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 284 is_i386 = 1; 285 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 286 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 287 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 288 } else 289 errno = ENOEXEC; 290 291 if (r) 292 return (r); 293 294 push_gdt(); 295 296 if (is_i386) { 297 push_pt_32(); 298 /* Reconfigure the default flat-64 register set for 32 bit */ 299 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 300 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 301 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 302 } 303 else 304 push_pt_64(); 305 306 if (bootdevice & VMBOOTDEV_NET) { 307 bootmac = &bm; 308 memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN); 309 } 310 n = create_bios_memmap(vcp, memmap); 311 bootargsz = push_bootargs(memmap, n, bootmac); 312 stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto); 313 314 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 315 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 316 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 317 318 log_debug("%s: loaded ELF kernel", __func__); 319 320 return (0); 321 } 322 323 /* 324 * create_bios_memmap 325 * 326 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 327 * 328 * Parameters: 329 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 330 * memmap (out): the BIOS memory map 331 * 332 * Return values: 333 * Number of bios_memmap_t entries, including the terminating nul-entry. 334 */ 335 static size_t 336 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 337 { 338 size_t i, n = 0, sz; 339 paddr_t gpa; 340 struct vm_mem_range *vmr; 341 342 for (i = 0; i < vcp->vcp_nmemranges; i++) { 343 vmr = &vcp->vcp_memranges[i]; 344 gpa = vmr->vmr_gpa; 345 sz = vmr->vmr_size; 346 347 /* 348 * Make sure that we do not mark the ROM/video RAM area in the 349 * low memory as physcal memory available to the kernel. 350 */ 351 if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { 352 if (gpa >= LOWMEM_KB * 1024) 353 sz = 0; 354 else 355 sz = LOWMEM_KB * 1024 - gpa; 356 } 357 358 if (sz != 0) { 359 memmap[n].addr = gpa; 360 memmap[n].size = sz; 361 memmap[n].type = 0x1; /* Type 1 : Normal memory */ 362 n++; 363 } 364 } 365 366 /* Null mem map entry to denote the end of the ranges */ 367 memmap[n].addr = 0x0; 368 memmap[n].size = 0x0; 369 memmap[n].type = 0x0; 370 n++; 371 372 return (n); 373 } 374 375 /* 376 * push_bootargs 377 * 378 * Creates the boot arguments page in the guest address space. 379 * Since vmd(8) is acting as the bootloader, we need to create the same boot 380 * arguments page that a real bootloader would have created. This is loaded 381 * into the guest phys RAM space at address BOOTARGS_PAGE. 382 * 383 * Parameters: 384 * memmap: the BIOS memory map 385 * n: number of entries in memmap 386 * 387 * Return values: 388 * The size of the bootargs 389 */ 390 static uint32_t 391 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 392 { 393 uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 394 bios_consdev_t consdev; 395 uint32_t ba[1024]; 396 397 memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t); 398 ba[0] = 0x0; /* memory map */ 399 ba[1] = memmap_sz; 400 ba[2] = memmap_sz; /* next */ 401 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 402 i = memmap_sz / sizeof(int); 403 404 /* Serial console device, COM1 @ 0x3f8 */ 405 consdev.consdev = makedev(8, 0); /* com1 @ 0x3f8 */ 406 consdev.conspeed = 115200; 407 consdev.consaddr = 0x3f8; 408 consdev.consfreq = 0; 409 410 consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t); 411 ba[i] = 0x5; /* consdev */ 412 ba[i + 1] = consdev_sz; 413 ba[i + 2] = consdev_sz; 414 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 415 i += consdev_sz / sizeof(int); 416 417 if (bootmac) { 418 bootmac_sz = 3 * sizeof(int) + (sizeof(bios_bootmac_t) + 3) & ~3; 419 ba[i] = 0x7; /* bootmac */ 420 ba[i + 1] = bootmac_sz; 421 ba[i + 2] = bootmac_sz; 422 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 423 i += bootmac_sz / sizeof(int); 424 } 425 426 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 427 428 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 429 430 return (i * sizeof(int)); 431 } 432 433 /* 434 * push_stack 435 * 436 * Creates the boot stack page in the guest address space. When using a real 437 * bootloader, the stack will be prepared using the following format before 438 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 439 * layout. The stack content is pushed to the guest phys RAM at address 440 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 441 * 4 bytes. 442 * 443 * Stack Layout: (TOS == Top Of Stack) 444 * TOS location of boot arguments page 445 * TOS - 0x4 size of the content in the boot arguments page 446 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 447 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 448 * TOS - 0x10 kernel 'end' symbol value 449 * TOS - 0x14 version of bootarg API 450 * 451 * Parameters: 452 * bootargsz: size of boot arguments 453 * end: kernel 'end' symbol value 454 * bootdev: the optional non-default boot device 455 * howto: optional boot flags for the kernel 456 * 457 * Return values: 458 * size of the stack 459 */ 460 static size_t 461 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto) 462 { 463 uint32_t stack[1024]; 464 uint16_t loc; 465 466 memset(&stack, 0, sizeof(stack)); 467 loc = 1024; 468 469 if (bootdev == 0) 470 bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 471 472 stack[--loc] = BOOTARGS_PAGE; 473 stack[--loc] = bootargsz; 474 stack[--loc] = 0; /* biosbasemem */ 475 stack[--loc] = 0; /* biosextmem */ 476 stack[--loc] = end; 477 stack[--loc] = 0x0e; 478 stack[--loc] = bootdev; 479 stack[--loc] = howto; 480 481 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 482 483 return (1024 - (loc - 1)) * sizeof(uint32_t); 484 } 485 486 /* 487 * mread 488 * 489 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 490 * into the guest address space at paddr 'addr'. 491 * 492 * Parameters: 493 * fd: file descriptor of the kernel image file to read from. 494 * addr: guest paddr_t to load to 495 * sz: number of bytes to load 496 * 497 * Return values: 498 * returns 'sz' if successful, or 0 otherwise. 499 */ 500 size_t 501 mread(FILE *fp, paddr_t addr, size_t sz) 502 { 503 size_t ct; 504 size_t i, rd, osz; 505 char buf[PAGE_SIZE]; 506 507 /* 508 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 509 * write_mem 510 */ 511 ct = 0; 512 rd = 0; 513 osz = sz; 514 if ((addr & PAGE_MASK) != 0) { 515 memset(buf, 0, sizeof(buf)); 516 if (sz > PAGE_SIZE) 517 ct = PAGE_SIZE - (addr & PAGE_MASK); 518 else 519 ct = sz; 520 521 if (fread(buf, 1, ct, fp) != ct) { 522 log_warn("%s: error %d in mread", __progname, errno); 523 return (0); 524 } 525 rd += ct; 526 527 if (write_mem(addr, buf, ct)) 528 return (0); 529 530 addr += ct; 531 } 532 533 sz = sz - ct; 534 535 if (sz == 0) 536 return (osz); 537 538 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 539 memset(buf, 0, sizeof(buf)); 540 if (i + PAGE_SIZE > sz) 541 ct = sz - i; 542 else 543 ct = PAGE_SIZE; 544 545 if (fread(buf, 1, ct, fp) != ct) { 546 log_warn("%s: error %d in mread", __progname, errno); 547 return (0); 548 } 549 rd += ct; 550 551 if (write_mem(addr, buf, ct)) 552 return (0); 553 } 554 555 return (osz); 556 } 557 558 /* 559 * marc4random_buf 560 * 561 * load 'sz' bytes of random data into the guest address space at paddr 562 * 'addr'. 563 * 564 * Parameters: 565 * addr: guest paddr_t to load random bytes into 566 * sz: number of random bytes to load 567 * 568 * Return values: 569 * nothing 570 */ 571 static void 572 marc4random_buf(paddr_t addr, int sz) 573 { 574 int i, ct; 575 char buf[PAGE_SIZE]; 576 577 /* 578 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 579 * write_mem 580 */ 581 ct = 0; 582 if (addr % PAGE_SIZE != 0) { 583 memset(buf, 0, sizeof(buf)); 584 ct = PAGE_SIZE - (addr % PAGE_SIZE); 585 586 arc4random_buf(buf, ct); 587 588 if (write_mem(addr, buf, ct)) 589 return; 590 591 addr += ct; 592 } 593 594 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 595 memset(buf, 0, sizeof(buf)); 596 if (i + PAGE_SIZE > sz) 597 ct = sz - i; 598 else 599 ct = PAGE_SIZE; 600 601 arc4random_buf(buf, ct); 602 603 if (write_mem(addr, buf, ct)) 604 return; 605 } 606 } 607 608 /* 609 * mbzero 610 * 611 * load 'sz' bytes of zeros into the guest address space at paddr 612 * 'addr'. 613 * 614 * Parameters: 615 * addr: guest paddr_t to zero 616 * sz: number of zero bytes to store 617 * 618 * Return values: 619 * nothing 620 */ 621 static void 622 mbzero(paddr_t addr, int sz) 623 { 624 if (write_mem(addr, NULL, sz)) 625 return; 626 } 627 628 /* 629 * mbcopy 630 * 631 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 632 * 633 * Parameters: 634 * src: source buffer to copy from 635 * dst: destination guest paddr_t to copy to 636 * sz: number of bytes to copy 637 * 638 * Return values: 639 * nothing 640 */ 641 static void 642 mbcopy(void *src, paddr_t dst, int sz) 643 { 644 write_mem(dst, src, sz); 645 } 646 647 /* 648 * elf64_exec 649 * 650 * Load the kernel indicated by 'fd' into the guest physical memory 651 * space, at the addresses defined in the ELF header. 652 * 653 * This function is used for 64 bit kernels. 654 * 655 * Parameters: 656 * fd: file descriptor of the kernel to load 657 * elf: ELF header of the kernel 658 * marks: array to store the offsets of various kernel structures 659 * (start, bss, etc) 660 * flags: flag value to indicate which section(s) to load (usually 661 * LOAD_ALL) 662 * 663 * Return values: 664 * 0 if successful 665 * 1 if unsuccessful 666 */ 667 static int 668 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags) 669 { 670 Elf64_Shdr *shp; 671 Elf64_Phdr *phdr; 672 Elf64_Off off; 673 int i; 674 size_t sz; 675 int first; 676 int havesyms, havelines; 677 paddr_t minp = ~0, maxp = 0, pos = 0; 678 paddr_t offset = marks[MARK_START], shpp, elfp; 679 680 sz = elf->e_phnum * sizeof(Elf64_Phdr); 681 phdr = malloc(sz); 682 683 if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 684 free(phdr); 685 return 1; 686 } 687 688 if (fread(phdr, 1, sz, fp) != sz) { 689 free(phdr); 690 return 1; 691 } 692 693 for (first = 1, i = 0; i < elf->e_phnum; i++) { 694 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 695 int m; 696 697 /* Fill segment if asked for. */ 698 if (flags & LOAD_RANDOM) { 699 for (pos = 0; pos < phdr[i].p_filesz; 700 pos += m) { 701 m = phdr[i].p_filesz - pos; 702 marc4random_buf(phdr[i].p_paddr + pos, 703 m); 704 } 705 } 706 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 707 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 708 marks[MARK_ERANDOM] = 709 marks[MARK_RANDOM] + phdr[i].p_filesz; 710 } 711 continue; 712 } 713 714 if (phdr[i].p_type != PT_LOAD || 715 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 716 continue; 717 718 #define IS_TEXT(p) (p.p_flags & PF_X) 719 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 720 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 721 /* 722 * XXX: Assume first address is lowest 723 */ 724 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 725 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 726 727 /* Read in segment. */ 728 if (fseeko(fp, (off_t)phdr[i].p_offset, 729 SEEK_SET) == -1) { 730 free(phdr); 731 return 1; 732 } 733 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 734 phdr[i].p_filesz) { 735 free(phdr); 736 return 1; 737 } 738 739 first = 0; 740 } 741 742 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 743 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 744 pos = phdr[i].p_paddr; 745 if (minp > pos) 746 minp = pos; 747 pos += phdr[i].p_filesz; 748 if (maxp < pos) 749 maxp = pos; 750 } 751 752 /* Zero out BSS. */ 753 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 754 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 755 phdr[i].p_memsz - phdr[i].p_filesz); 756 } 757 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 758 pos += phdr[i].p_memsz - phdr[i].p_filesz; 759 if (maxp < pos) 760 maxp = pos; 761 } 762 } 763 free(phdr); 764 765 /* 766 * Copy the ELF and section headers. 767 */ 768 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 769 if (flags & (LOAD_HDR | COUNT_HDR)) 770 maxp += sizeof(Elf64_Ehdr); 771 772 if (flags & (LOAD_SYM | COUNT_SYM)) { 773 if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 774 warn("lseek section headers"); 775 return 1; 776 } 777 sz = elf->e_shnum * sizeof(Elf64_Shdr); 778 shp = malloc(sz); 779 780 if (fread(shp, 1, sz, fp) != sz) { 781 free(shp); 782 return 1; 783 } 784 785 shpp = maxp; 786 maxp += roundup(sz, sizeof(Elf64_Addr)); 787 788 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 789 char *shstr = malloc(shstrsz); 790 if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 791 SEEK_SET) == -1) { 792 free(shstr); 793 free(shp); 794 return 1; 795 } 796 if (fread(shstr, 1, shstrsz, fp) != shstrsz) { 797 free(shstr); 798 free(shp); 799 return 1; 800 } 801 802 /* 803 * Now load the symbol sections themselves. Make sure the 804 * sections are aligned. Don't bother with string tables if 805 * there are no symbol sections. 806 */ 807 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 808 809 for (havesyms = havelines = i = 0; i < elf->e_shnum; i++) 810 if (shp[i].sh_type == SHT_SYMTAB) 811 havesyms = 1; 812 813 for (first = 1, i = 0; i < elf->e_shnum; i++) { 814 if (shp[i].sh_type == SHT_SYMTAB || 815 shp[i].sh_type == SHT_STRTAB || 816 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 817 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 818 if (havesyms && (flags & LOAD_SYM)) { 819 if (fseeko(fp, (off_t)shp[i].sh_offset, 820 SEEK_SET) == -1) { 821 free(shstr); 822 free(shp); 823 return 1; 824 } 825 if (mread(fp, maxp, 826 shp[i].sh_size) != shp[i].sh_size) { 827 free(shstr); 828 free(shp); 829 return 1; 830 } 831 } 832 maxp += roundup(shp[i].sh_size, 833 sizeof(Elf64_Addr)); 834 shp[i].sh_offset = off; 835 shp[i].sh_flags |= SHF_ALLOC; 836 off += roundup(shp[i].sh_size, 837 sizeof(Elf64_Addr)); 838 first = 0; 839 } 840 } 841 if (flags & LOAD_SYM) { 842 mbcopy(shp, shpp, sz); 843 } 844 free(shstr); 845 free(shp); 846 } 847 848 /* 849 * Frob the copied ELF header to give information relative 850 * to elfp. 851 */ 852 if (flags & LOAD_HDR) { 853 elf->e_phoff = 0; 854 elf->e_shoff = sizeof(Elf64_Ehdr); 855 elf->e_phentsize = 0; 856 elf->e_phnum = 0; 857 mbcopy(elf, elfp, sizeof(*elf)); 858 } 859 860 marks[MARK_START] = LOADADDR(minp); 861 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 862 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 863 marks[MARK_SYM] = LOADADDR(elfp); 864 marks[MARK_END] = LOADADDR(maxp); 865 866 return 0; 867 } 868 869 /* 870 * elf32_exec 871 * 872 * Load the kernel indicated by 'fd' into the guest physical memory 873 * space, at the addresses defined in the ELF header. 874 * 875 * This function is used for 32 bit kernels. 876 * 877 * Parameters: 878 * fd: file descriptor of the kernel to load 879 * elf: ELF header of the kernel 880 * marks: array to store the offsets of various kernel structures 881 * (start, bss, etc) 882 * flags: flag value to indicate which section(s) to load (usually 883 * LOAD_ALL) 884 * 885 * Return values: 886 * 0 if successful 887 * 1 if unsuccessful 888 */ 889 static int 890 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags) 891 { 892 Elf32_Shdr *shp; 893 Elf32_Phdr *phdr; 894 Elf32_Off off; 895 int i; 896 size_t sz; 897 int first; 898 int havesyms, havelines; 899 paddr_t minp = ~0, maxp = 0, pos = 0; 900 paddr_t offset = marks[MARK_START], shpp, elfp; 901 902 sz = elf->e_phnum * sizeof(Elf32_Phdr); 903 phdr = malloc(sz); 904 905 if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 906 free(phdr); 907 return 1; 908 } 909 910 if (fread(phdr, 1, sz, fp) != sz) { 911 free(phdr); 912 return 1; 913 } 914 915 for (first = 1, i = 0; i < elf->e_phnum; i++) { 916 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 917 int m; 918 919 /* Fill segment if asked for. */ 920 if (flags & LOAD_RANDOM) { 921 for (pos = 0; pos < phdr[i].p_filesz; 922 pos += m) { 923 m = phdr[i].p_filesz - pos; 924 marc4random_buf(phdr[i].p_paddr + pos, 925 m); 926 } 927 } 928 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 929 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 930 marks[MARK_ERANDOM] = 931 marks[MARK_RANDOM] + phdr[i].p_filesz; 932 } 933 continue; 934 } 935 936 if (phdr[i].p_type != PT_LOAD || 937 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 938 continue; 939 940 #define IS_TEXT(p) (p.p_flags & PF_X) 941 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 942 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 943 /* 944 * XXX: Assume first address is lowest 945 */ 946 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 947 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 948 949 /* Read in segment. */ 950 if (fseeko(fp, (off_t)phdr[i].p_offset, 951 SEEK_SET) == -1) { 952 free(phdr); 953 return 1; 954 } 955 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 956 phdr[i].p_filesz) { 957 free(phdr); 958 return 1; 959 } 960 961 first = 0; 962 } 963 964 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 965 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 966 pos = phdr[i].p_paddr; 967 if (minp > pos) 968 minp = pos; 969 pos += phdr[i].p_filesz; 970 if (maxp < pos) 971 maxp = pos; 972 } 973 974 /* Zero out BSS. */ 975 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 976 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 977 phdr[i].p_memsz - phdr[i].p_filesz); 978 } 979 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 980 pos += phdr[i].p_memsz - phdr[i].p_filesz; 981 if (maxp < pos) 982 maxp = pos; 983 } 984 } 985 free(phdr); 986 987 /* 988 * Copy the ELF and section headers. 989 */ 990 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 991 if (flags & (LOAD_HDR | COUNT_HDR)) 992 maxp += sizeof(Elf32_Ehdr); 993 994 if (flags & (LOAD_SYM | COUNT_SYM)) { 995 if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 996 warn("lseek section headers"); 997 return 1; 998 } 999 sz = elf->e_shnum * sizeof(Elf32_Shdr); 1000 shp = malloc(sz); 1001 1002 if (fread(shp, 1, sz, fp) != sz) { 1003 free(shp); 1004 return 1; 1005 } 1006 1007 shpp = maxp; 1008 maxp += roundup(sz, sizeof(Elf32_Addr)); 1009 1010 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1011 char *shstr = malloc(shstrsz); 1012 if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1013 SEEK_SET) == -1) { 1014 free(shstr); 1015 free(shp); 1016 return 1; 1017 } 1018 if (fread(shstr, 1, shstrsz, fp) != shstrsz) { 1019 free(shstr); 1020 free(shp); 1021 return 1; 1022 } 1023 1024 /* 1025 * Now load the symbol sections themselves. Make sure the 1026 * sections are aligned. Don't bother with string tables if 1027 * there are no symbol sections. 1028 */ 1029 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1030 1031 for (havesyms = havelines = i = 0; i < elf->e_shnum; i++) 1032 if (shp[i].sh_type == SHT_SYMTAB) 1033 havesyms = 1; 1034 1035 for (first = 1, i = 0; i < elf->e_shnum; i++) { 1036 if (shp[i].sh_type == SHT_SYMTAB || 1037 shp[i].sh_type == SHT_STRTAB || 1038 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1039 if (havesyms && (flags & LOAD_SYM)) { 1040 if (fseeko(fp, (off_t)shp[i].sh_offset, 1041 SEEK_SET) == -1) { 1042 free(shstr); 1043 free(shp); 1044 return 1; 1045 } 1046 if (mread(fp, maxp, 1047 shp[i].sh_size) != shp[i].sh_size) { 1048 free(shstr); 1049 free(shp); 1050 return 1; 1051 } 1052 } 1053 maxp += roundup(shp[i].sh_size, 1054 sizeof(Elf32_Addr)); 1055 shp[i].sh_offset = off; 1056 shp[i].sh_flags |= SHF_ALLOC; 1057 off += roundup(shp[i].sh_size, 1058 sizeof(Elf32_Addr)); 1059 first = 0; 1060 } 1061 } 1062 if (flags & LOAD_SYM) { 1063 mbcopy(shp, shpp, sz); 1064 } 1065 free(shstr); 1066 free(shp); 1067 } 1068 1069 /* 1070 * Frob the copied ELF header to give information relative 1071 * to elfp. 1072 */ 1073 if (flags & LOAD_HDR) { 1074 elf->e_phoff = 0; 1075 elf->e_shoff = sizeof(Elf32_Ehdr); 1076 elf->e_phentsize = 0; 1077 elf->e_phnum = 0; 1078 mbcopy(elf, elfp, sizeof(*elf)); 1079 } 1080 1081 marks[MARK_START] = LOADADDR(minp); 1082 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1083 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1084 marks[MARK_SYM] = LOADADDR(elfp); 1085 marks[MARK_END] = LOADADDR(maxp); 1086 1087 return 0; 1088 } 1089