1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.50 2024/09/26 01:45:13 jsg Exp $ */
3
4 /*-
5 * Copyright (c) 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center and by Christos Zoulas.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Copyright (c) 1992, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Ralph Campbell.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)boot.c 8.1 (Berkeley) 6/10/93
66 */
67
68 /*
69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70 *
71 * Permission to use, copy, modify, and distribute this software for any
72 * purpose with or without fee is hereby granted, provided that the above
73 * copyright notice and this permission notice appear in all copies.
74 *
75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82 */
83
84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/reboot.h>
86 #include <sys/exec.h>
87
88 #include <elf.h>
89 #include <string.h>
90 #include <errno.h>
91 #include <stdlib.h>
92 #include <unistd.h>
93 #include <err.h>
94
95 #include <dev/vmm/vmm.h>
96
97 #include <machine/biosvar.h>
98 #include <machine/segments.h>
99 #include <machine/specialreg.h>
100 #include <machine/pte.h>
101
102 #include "loadfile.h"
103 #include "vmd.h"
104
105 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff)
106
107 union {
108 Elf32_Ehdr elf32;
109 Elf64_Ehdr elf64;
110 } hdr;
111
112 static void setsegment(struct mem_segment_descriptor *, uint32_t,
113 size_t, int, int, int, int);
114 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
115 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
116 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
117 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
118 static size_t push_stack(uint32_t, uint32_t);
119 static void push_gdt(void);
120 static void push_pt_32(void);
121 static void push_pt_64(void);
122 static void marc4random_buf(paddr_t, int);
123 static void mbzero(paddr_t, int);
124 static void mbcopy(void *, paddr_t, int);
125
126 extern char *__progname;
127 extern int vm_id;
128
129 uint64_t pg_crypt = 0;
130
131 /*
132 * setsegment
133 *
134 * Initializes a segment selector entry with the provided descriptor.
135 * For the purposes of the bootloader mimiced by vmd(8), we only need
136 * memory-type segment descriptor support.
137 *
138 * This function was copied from machdep.c
139 *
140 * Parameters:
141 * sd: Address of the entry to initialize
142 * base: base of the segment
143 * limit: limit of the segment
144 * type: type of the segment
145 * dpl: privilege level of the egment
146 * def32: default 16/32 bit size of the segment
147 * gran: granularity of the segment (byte/page)
148 */
149 static void
setsegment(struct mem_segment_descriptor * sd,uint32_t base,size_t limit,int type,int dpl,int def32,int gran)150 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
151 int type, int dpl, int def32, int gran)
152 {
153 sd->sd_lolimit = (int)limit;
154 sd->sd_lobase = (int)base;
155 sd->sd_type = type;
156 sd->sd_dpl = dpl;
157 sd->sd_p = 1;
158 sd->sd_hilimit = (int)limit >> 16;
159 sd->sd_avl = 0;
160 sd->sd_long = 0;
161 sd->sd_def32 = def32;
162 sd->sd_gran = gran;
163 sd->sd_hibase = (int)base >> 24;
164 }
165
166 /*
167 * push_gdt
168 *
169 * Allocates and populates a page in the guest phys memory space to hold
170 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
171 * create the same GDT that a real bootloader would have created.
172 * This is loaded into the guest phys RAM space at address GDT_PAGE.
173 */
174 static void
push_gdt(void)175 push_gdt(void)
176 {
177 uint8_t gdtpage[PAGE_SIZE];
178 struct mem_segment_descriptor *sd;
179
180 memset(&gdtpage, 0, sizeof(gdtpage));
181
182 sd = (struct mem_segment_descriptor *)&gdtpage;
183
184 /*
185 * Create three segment descriptors:
186 *
187 * GDT[0] : null descriptor. "Created" via memset above.
188 * GDT[1] (selector @ 0x8): Executable segment, for CS
189 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
190 */
191 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
192 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
193
194 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
195 sev_register_encryption(GDT_PAGE, PAGE_SIZE);
196 }
197
198 /*
199 * push_pt_32
200 *
201 * Create an identity-mapped page directory hierarchy mapping the first
202 * 4GB of physical memory. This is used during bootstrapping i386 VMs on
203 * CPUs without unrestricted guest capability.
204 */
205 static void
push_pt_32(void)206 push_pt_32(void)
207 {
208 uint32_t ptes[1024], i;
209
210 memset(ptes, 0, sizeof(ptes));
211 for (i = 0 ; i < 1024; i++) {
212 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
213 }
214 write_mem(PML3_PAGE, ptes, PAGE_SIZE);
215 }
216
217 /*
218 * push_pt_64
219 *
220 * Create an identity-mapped page directory hierarchy mapping the first
221 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
222 * CPUs without unrestricted guest capability.
223 */
224 static void
push_pt_64(void)225 push_pt_64(void)
226 {
227 uint64_t ptes[512], i;
228
229 /* PDPDE0 - first 1GB */
230 memset(ptes, 0, sizeof(ptes));
231 ptes[0] = pg_crypt | PG_V | PML3_PAGE;
232 write_mem(PML4_PAGE, ptes, PAGE_SIZE);
233 sev_register_encryption(PML4_PAGE, PAGE_SIZE);
234
235 /* PDE0 - first 1GB */
236 memset(ptes, 0, sizeof(ptes));
237 ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE;
238 write_mem(PML3_PAGE, ptes, PAGE_SIZE);
239 sev_register_encryption(PML3_PAGE, PAGE_SIZE);
240
241 /* First 1GB (in 2MB pages) */
242 memset(ptes, 0, sizeof(ptes));
243 for (i = 0 ; i < 512; i++) {
244 ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS |
245 ((2048 * 1024) * i);
246 }
247 write_mem(PML2_PAGE, ptes, PAGE_SIZE);
248 sev_register_encryption(PML2_PAGE, PAGE_SIZE);
249 }
250
251 /*
252 * loadfile_elf
253 *
254 * Loads an ELF kernel to its defined load address in the guest VM.
255 * The kernel is loaded to its defined start point as set in the ELF header.
256 *
257 * Parameters:
258 * fp: file of a kernel file to load
259 * vcp: the VM create parameters, holding the exact memory map
260 * (out) vrs: register state to set on init for this kernel
261 * bootdev: the optional non-default boot device
262 * howto: optional boot flags for the kernel
263 *
264 * Return values:
265 * 0 if successful
266 * various error codes returned from gzread(3) or loadelf functions
267 */
268 int
loadfile_elf(gzFile fp,struct vmd_vm * vm,struct vcpu_reg_state * vrs,unsigned int bootdevice)269 loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
270 unsigned int bootdevice)
271 {
272 int r, is_i386 = 0;
273 uint32_t bootargsz;
274 size_t n, stacksize;
275 u_long marks[MARK_MAX];
276 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
277 bios_bootmac_t bm, *bootmac = NULL;
278 struct vm_create_params *vcp = &vm->vm_params.vmc_params;
279
280 if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr))
281 return 1;
282
283 memset(&marks, 0, sizeof(marks));
284 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
285 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
286 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
287 is_i386 = 1;
288 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
289 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
290 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
291 } else
292 errno = ENOEXEC;
293
294 if (r)
295 return (r);
296
297 push_gdt();
298
299 if (is_i386) {
300 push_pt_32();
301 /* Reconfigure the default flat-64 register set for 32 bit */
302 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
303 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
304 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
305 }
306 else {
307 if (vcp->vcp_sev) {
308 if (vcp->vcp_poscbit == 0) {
309 log_warnx("SEV enabled but no C-bit reported");
310 return 1;
311 }
312 pg_crypt = (1ULL << vcp->vcp_poscbit);
313 log_debug("%s: poscbit %d pg_crypt 0x%016llx",
314 __func__, vcp->vcp_poscbit, pg_crypt);
315 }
316 push_pt_64();
317 }
318
319 if (bootdevice == VMBOOTDEV_NET) {
320 bootmac = &bm;
321 memcpy(bootmac, vm->vm_params.vmc_macs[0], ETHER_ADDR_LEN);
322 }
323 n = create_bios_memmap(vcp, memmap);
324 bootargsz = push_bootargs(memmap, n, bootmac);
325 stacksize = push_stack(bootargsz, marks[MARK_END]);
326
327 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
328 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
329 vrs->vrs_gdtr.vsi_base = GDT_PAGE;
330
331 log_debug("%s: loaded ELF kernel", __func__);
332
333 return (0);
334 }
335
336 /*
337 * create_bios_memmap
338 *
339 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
340 *
341 * Parameters:
342 * vcp: the VM create parameters, containing the memory map passed to vmm(4)
343 * memmap (out): the BIOS memory map
344 *
345 * Return values:
346 * Number of bios_memmap_t entries, including the terminating nul-entry.
347 */
348 static size_t
create_bios_memmap(struct vm_create_params * vcp,bios_memmap_t * memmap)349 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
350 {
351 size_t i, n = 0;
352 struct vm_mem_range *vmr;
353
354 for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
355 vmr = &vcp->vcp_memranges[i];
356 memmap[n].addr = vmr->vmr_gpa;
357 memmap[n].size = vmr->vmr_size;
358 if (vmr->vmr_type == VM_MEM_RAM)
359 memmap[n].type = BIOS_MAP_FREE;
360 else
361 memmap[n].type = BIOS_MAP_RES;
362 }
363
364 /* Null mem map entry to denote the end of the ranges */
365 memmap[n].addr = 0x0;
366 memmap[n].size = 0x0;
367 memmap[n].type = BIOS_MAP_END;
368 n++;
369
370 return (n);
371 }
372
373 /*
374 * push_bootargs
375 *
376 * Creates the boot arguments page in the guest address space.
377 * Since vmd(8) is acting as the bootloader, we need to create the same boot
378 * arguments page that a real bootloader would have created. This is loaded
379 * into the guest phys RAM space at address BOOTARGS_PAGE.
380 *
381 * Parameters:
382 * memmap: the BIOS memory map
383 * n: number of entries in memmap
384 * bootmac: optional PXE boot MAC address
385 *
386 * Return values:
387 * The size of the bootargs in bytes
388 */
389 static uint32_t
push_bootargs(bios_memmap_t * memmap,size_t n,bios_bootmac_t * bootmac)390 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
391 {
392 uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
393 bios_consdev_t consdev;
394 uint32_t ba[1024];
395
396 memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t);
397 ba[0] = BOOTARG_MEMMAP;
398 ba[1] = memmap_sz;
399 ba[2] = memmap_sz;
400 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
401 i = memmap_sz / sizeof(uint32_t);
402
403 /* Serial console device, COM1 @ 0x3f8 */
404 memset(&consdev, 0, sizeof(consdev));
405 consdev.consdev = makedev(8, 0);
406 consdev.conspeed = 115200;
407 consdev.consaddr = 0x3f8;
408
409 consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t);
410 ba[i] = BOOTARG_CONSDEV;
411 ba[i + 1] = consdev_sz;
412 ba[i + 2] = consdev_sz;
413 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
414 i += consdev_sz / sizeof(uint32_t);
415
416 if (bootmac) {
417 bootmac_sz = 3 * sizeof(uint32_t) +
418 (sizeof(bios_bootmac_t) + 3) & ~3;
419 ba[i] = BOOTARG_BOOTMAC;
420 ba[i + 1] = bootmac_sz;
421 ba[i + 2] = bootmac_sz;
422 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
423 i += bootmac_sz / sizeof(uint32_t);
424 }
425
426 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
427
428 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
429 sev_register_encryption(BOOTARGS_PAGE, PAGE_SIZE);
430
431 return (i * sizeof(uint32_t));
432 }
433
434 /*
435 * push_stack
436 *
437 * Creates the boot stack page in the guest address space. When using a real
438 * bootloader, the stack will be prepared using the following format before
439 * transitioning to kernel start, so vmd(8) needs to mimic the same stack
440 * layout. The stack content is pushed to the guest phys RAM at address
441 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
442 * 4 bytes.
443 *
444 * Stack Layout: (TOS == Top Of Stack)
445 * TOS location of boot arguments page
446 * TOS - 0x4 size of the content in the boot arguments page
447 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0)
448 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all)
449 * TOS - 0x10 kernel 'end' symbol value
450 * TOS - 0x14 version of bootarg API
451 *
452 * Parameters:
453 * bootargsz: size of boot arguments
454 * end: kernel 'end' symbol value
455 * bootdev: the optional non-default boot device
456 * howto: optional boot flags for the kernel
457 *
458 * Return values:
459 * size of the stack
460 */
461 static size_t
push_stack(uint32_t bootargsz,uint32_t end)462 push_stack(uint32_t bootargsz, uint32_t end)
463 {
464 uint32_t stack[1024];
465 uint16_t loc;
466
467 memset(&stack, 0, sizeof(stack));
468 loc = 1024;
469
470 stack[--loc] = BOOTARGS_PAGE;
471 stack[--loc] = bootargsz;
472 stack[--loc] = 0; /* biosbasemem */
473 stack[--loc] = 0; /* biosextmem */
474 stack[--loc] = end;
475 stack[--loc] = 0x0e;
476 stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
477 stack[--loc] = 0;
478
479 write_mem(STACK_PAGE, &stack, PAGE_SIZE);
480 sev_register_encryption(STACK_PAGE, PAGE_SIZE);
481
482 return (1024 - (loc - 1)) * sizeof(uint32_t);
483 }
484
485 /*
486 * mread
487 *
488 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
489 * into the guest address space at paddr 'addr'.
490 *
491 * Parameters:
492 * fp: kernel image file to read from.
493 * addr: guest paddr_t to load to
494 * sz: number of bytes to load
495 *
496 * Return values:
497 * returns 'sz' if successful, or 0 otherwise.
498 */
499 size_t
mread(gzFile fp,paddr_t addr,size_t sz)500 mread(gzFile fp, paddr_t addr, size_t sz)
501 {
502 const char *errstr = NULL;
503 int errnum = 0;
504 size_t ct;
505 size_t i, osz;
506 char buf[PAGE_SIZE];
507
508 sev_register_encryption(addr, sz);
509
510 /*
511 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
512 * write_mem
513 */
514 ct = 0;
515 osz = sz;
516 if ((addr & PAGE_MASK) != 0) {
517 memset(buf, 0, sizeof(buf));
518 if (sz > PAGE_SIZE)
519 ct = PAGE_SIZE - (addr & PAGE_MASK);
520 else
521 ct = sz;
522
523 if ((size_t)gzread(fp, buf, ct) != ct) {
524 errstr = gzerror(fp, &errnum);
525 if (errnum == Z_ERRNO)
526 errnum = errno;
527 log_warnx("%s: error %d in mread, %s", __progname,
528 errnum, errstr);
529 return (0);
530 }
531
532 if (write_mem(addr, buf, ct))
533 return (0);
534
535 addr += ct;
536 }
537
538 sz = sz - ct;
539
540 if (sz == 0)
541 return (osz);
542
543 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
544 memset(buf, 0, sizeof(buf));
545 if (i + PAGE_SIZE > sz)
546 ct = sz - i;
547 else
548 ct = PAGE_SIZE;
549
550 if ((size_t)gzread(fp, buf, ct) != ct) {
551 errstr = gzerror(fp, &errnum);
552 if (errnum == Z_ERRNO)
553 errnum = errno;
554 log_warnx("%s: error %d in mread, %s", __progname,
555 errnum, errstr);
556 return (0);
557 }
558
559 if (write_mem(addr, buf, ct))
560 return (0);
561 }
562
563 return (osz);
564 }
565
566 /*
567 * marc4random_buf
568 *
569 * load 'sz' bytes of random data into the guest address space at paddr
570 * 'addr'.
571 *
572 * Parameters:
573 * addr: guest paddr_t to load random bytes into
574 * sz: number of random bytes to load
575 *
576 * Return values:
577 * nothing
578 */
579 static void
marc4random_buf(paddr_t addr,int sz)580 marc4random_buf(paddr_t addr, int sz)
581 {
582 int i, ct;
583 char buf[PAGE_SIZE];
584
585 sev_register_encryption(addr, sz);
586
587 /*
588 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
589 * write_mem
590 */
591 ct = 0;
592 if (addr % PAGE_SIZE != 0) {
593 memset(buf, 0, sizeof(buf));
594 ct = PAGE_SIZE - (addr % PAGE_SIZE);
595
596 arc4random_buf(buf, ct);
597
598 if (write_mem(addr, buf, ct))
599 return;
600
601 addr += ct;
602 }
603
604 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
605 memset(buf, 0, sizeof(buf));
606 if (i + PAGE_SIZE > sz)
607 ct = sz - i;
608 else
609 ct = PAGE_SIZE;
610
611 arc4random_buf(buf, ct);
612
613 if (write_mem(addr, buf, ct))
614 return;
615 }
616 }
617
618 /*
619 * mbzero
620 *
621 * load 'sz' bytes of zeros into the guest address space at paddr
622 * 'addr'.
623 *
624 * Parameters:
625 * addr: guest paddr_t to zero
626 * sz: number of zero bytes to store
627 *
628 * Return values:
629 * nothing
630 */
631 static void
mbzero(paddr_t addr,int sz)632 mbzero(paddr_t addr, int sz)
633 {
634 if (write_mem(addr, NULL, sz))
635 return;
636 sev_register_encryption(addr, sz);
637 }
638
639 /*
640 * mbcopy
641 *
642 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
643 *
644 * Parameters:
645 * src: source buffer to copy from
646 * dst: destination guest paddr_t to copy to
647 * sz: number of bytes to copy
648 *
649 * Return values:
650 * nothing
651 */
652 static void
mbcopy(void * src,paddr_t dst,int sz)653 mbcopy(void *src, paddr_t dst, int sz)
654 {
655 write_mem(dst, src, sz);
656 sev_register_encryption(dst, sz);
657 }
658
659 /*
660 * elf64_exec
661 *
662 * Load the kernel indicated by 'fp' into the guest physical memory
663 * space, at the addresses defined in the ELF header.
664 *
665 * This function is used for 64 bit kernels.
666 *
667 * Parameters:
668 * fp: kernel image file to load
669 * elf: ELF header of the kernel
670 * marks: array to store the offsets of various kernel structures
671 * (start, bss, etc)
672 * flags: flag value to indicate which section(s) to load (usually
673 * LOAD_ALL)
674 *
675 * Return values:
676 * 0 if successful
677 * 1 if unsuccessful
678 */
679 static int
elf64_exec(gzFile fp,Elf64_Ehdr * elf,u_long * marks,int flags)680 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags)
681 {
682 Elf64_Shdr *shp;
683 Elf64_Phdr *phdr;
684 Elf64_Off off;
685 int i;
686 size_t sz;
687 int havesyms;
688 paddr_t minp = ~0, maxp = 0, pos = 0;
689 paddr_t offset = marks[MARK_START], shpp, elfp;
690
691 sz = elf->e_phnum * sizeof(Elf64_Phdr);
692 phdr = malloc(sz);
693
694 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) {
695 free(phdr);
696 return 1;
697 }
698
699 if ((size_t)gzread(fp, phdr, sz) != sz) {
700 free(phdr);
701 return 1;
702 }
703
704 for (i = 0; i < elf->e_phnum; i++) {
705 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
706 int m;
707
708 /* Fill segment if asked for. */
709 if (flags & LOAD_RANDOM) {
710 for (pos = 0; pos < phdr[i].p_filesz;
711 pos += m) {
712 m = phdr[i].p_filesz - pos;
713 marc4random_buf(phdr[i].p_paddr + pos,
714 m);
715 }
716 }
717 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
718 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
719 marks[MARK_ERANDOM] =
720 marks[MARK_RANDOM] + phdr[i].p_filesz;
721 }
722 continue;
723 }
724
725 if (phdr[i].p_type != PT_LOAD ||
726 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
727 continue;
728
729 #define IS_TEXT(p) (p.p_flags & PF_X)
730 #define IS_DATA(p) ((p.p_flags & PF_X) == 0)
731 #define IS_BSS(p) (p.p_filesz < p.p_memsz)
732 /*
733 * XXX: Assume first address is lowest
734 */
735 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
736 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
737
738 /* Read in segment. */
739 if (gzseek(fp, (off_t)phdr[i].p_offset,
740 SEEK_SET) == -1) {
741 free(phdr);
742 return 1;
743 }
744 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
745 phdr[i].p_filesz) {
746 free(phdr);
747 return 1;
748 }
749 }
750
751 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
752 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
753 pos = phdr[i].p_paddr;
754 if (minp > pos)
755 minp = pos;
756 pos += phdr[i].p_filesz;
757 if (maxp < pos)
758 maxp = pos;
759 }
760
761 /* Zero out BSS. */
762 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
763 mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
764 phdr[i].p_memsz - phdr[i].p_filesz);
765 }
766 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
767 pos += phdr[i].p_memsz - phdr[i].p_filesz;
768 if (maxp < pos)
769 maxp = pos;
770 }
771 }
772 free(phdr);
773
774 /*
775 * Copy the ELF and section headers.
776 */
777 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
778 if (flags & (LOAD_HDR | COUNT_HDR))
779 maxp += sizeof(Elf64_Ehdr);
780
781 if (flags & (LOAD_SYM | COUNT_SYM)) {
782 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
783 warn("gzseek section headers");
784 return 1;
785 }
786 sz = elf->e_shnum * sizeof(Elf64_Shdr);
787 shp = malloc(sz);
788
789 if ((size_t)gzread(fp, shp, sz) != sz) {
790 free(shp);
791 return 1;
792 }
793
794 shpp = maxp;
795 maxp += roundup(sz, sizeof(Elf64_Addr));
796
797 size_t shstrsz = shp[elf->e_shstrndx].sh_size;
798 char *shstr = malloc(shstrsz);
799 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
800 SEEK_SET) == -1) {
801 free(shstr);
802 free(shp);
803 return 1;
804 }
805 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
806 free(shstr);
807 free(shp);
808 return 1;
809 }
810
811 /*
812 * Now load the symbol sections themselves. Make sure the
813 * sections are aligned. Don't bother with string tables if
814 * there are no symbol sections.
815 */
816 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
817
818 for (havesyms = i = 0; i < elf->e_shnum; i++)
819 if (shp[i].sh_type == SHT_SYMTAB)
820 havesyms = 1;
821
822 for (i = 0; i < elf->e_shnum; i++) {
823 if (shp[i].sh_type == SHT_SYMTAB ||
824 shp[i].sh_type == SHT_STRTAB ||
825 !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
826 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
827 if (havesyms && (flags & LOAD_SYM)) {
828 if (gzseek(fp, (off_t)shp[i].sh_offset,
829 SEEK_SET) == -1) {
830 free(shstr);
831 free(shp);
832 return 1;
833 }
834 if (mread(fp, maxp,
835 shp[i].sh_size) != shp[i].sh_size) {
836 free(shstr);
837 free(shp);
838 return 1;
839 }
840 }
841 maxp += roundup(shp[i].sh_size,
842 sizeof(Elf64_Addr));
843 shp[i].sh_offset = off;
844 shp[i].sh_flags |= SHF_ALLOC;
845 off += roundup(shp[i].sh_size,
846 sizeof(Elf64_Addr));
847 }
848 }
849 if (flags & LOAD_SYM) {
850 mbcopy(shp, shpp, sz);
851 }
852 free(shstr);
853 free(shp);
854 }
855
856 /*
857 * Frob the copied ELF header to give information relative
858 * to elfp.
859 */
860 if (flags & LOAD_HDR) {
861 elf->e_phoff = 0;
862 elf->e_shoff = sizeof(Elf64_Ehdr);
863 elf->e_phentsize = 0;
864 elf->e_phnum = 0;
865 mbcopy(elf, elfp, sizeof(*elf));
866 }
867
868 marks[MARK_START] = LOADADDR(minp);
869 marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
870 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */
871 marks[MARK_SYM] = LOADADDR(elfp);
872 marks[MARK_END] = LOADADDR(maxp);
873
874 return 0;
875 }
876
877 /*
878 * elf32_exec
879 *
880 * Load the kernel indicated by 'fp' into the guest physical memory
881 * space, at the addresses defined in the ELF header.
882 *
883 * This function is used for 32 bit kernels.
884 *
885 * Parameters:
886 * fp: kernel image file to load
887 * elf: ELF header of the kernel
888 * marks: array to store the offsets of various kernel structures
889 * (start, bss, etc)
890 * flags: flag value to indicate which section(s) to load (usually
891 * LOAD_ALL)
892 *
893 * Return values:
894 * 0 if successful
895 * 1 if unsuccessful
896 */
897 static int
elf32_exec(gzFile fp,Elf32_Ehdr * elf,u_long * marks,int flags)898 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags)
899 {
900 Elf32_Shdr *shp;
901 Elf32_Phdr *phdr;
902 Elf32_Off off;
903 int i;
904 size_t sz;
905 int havesyms;
906 paddr_t minp = ~0, maxp = 0, pos = 0;
907 paddr_t offset = marks[MARK_START], shpp, elfp;
908
909 sz = elf->e_phnum * sizeof(Elf32_Phdr);
910 phdr = malloc(sz);
911
912 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) {
913 free(phdr);
914 return 1;
915 }
916
917 if ((size_t)gzread(fp, phdr, sz) != sz) {
918 free(phdr);
919 return 1;
920 }
921
922 for (i = 0; i < elf->e_phnum; i++) {
923 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
924 int m;
925
926 /* Fill segment if asked for. */
927 if (flags & LOAD_RANDOM) {
928 for (pos = 0; pos < phdr[i].p_filesz;
929 pos += m) {
930 m = phdr[i].p_filesz - pos;
931 marc4random_buf(phdr[i].p_paddr + pos,
932 m);
933 }
934 }
935 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
936 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
937 marks[MARK_ERANDOM] =
938 marks[MARK_RANDOM] + phdr[i].p_filesz;
939 }
940 continue;
941 }
942
943 if (phdr[i].p_type != PT_LOAD ||
944 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
945 continue;
946
947 #define IS_TEXT(p) (p.p_flags & PF_X)
948 #define IS_DATA(p) ((p.p_flags & PF_X) == 0)
949 #define IS_BSS(p) (p.p_filesz < p.p_memsz)
950 /*
951 * XXX: Assume first address is lowest
952 */
953 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
954 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
955
956 /* Read in segment. */
957 if (gzseek(fp, (off_t)phdr[i].p_offset,
958 SEEK_SET) == -1) {
959 free(phdr);
960 return 1;
961 }
962 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
963 phdr[i].p_filesz) {
964 free(phdr);
965 return 1;
966 }
967 }
968
969 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
970 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
971 pos = phdr[i].p_paddr;
972 if (minp > pos)
973 minp = pos;
974 pos += phdr[i].p_filesz;
975 if (maxp < pos)
976 maxp = pos;
977 }
978
979 /* Zero out BSS. */
980 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
981 mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
982 phdr[i].p_memsz - phdr[i].p_filesz);
983 }
984 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
985 pos += phdr[i].p_memsz - phdr[i].p_filesz;
986 if (maxp < pos)
987 maxp = pos;
988 }
989 }
990 free(phdr);
991
992 /*
993 * Copy the ELF and section headers.
994 */
995 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
996 if (flags & (LOAD_HDR | COUNT_HDR))
997 maxp += sizeof(Elf32_Ehdr);
998
999 if (flags & (LOAD_SYM | COUNT_SYM)) {
1000 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
1001 warn("lseek section headers");
1002 return 1;
1003 }
1004 sz = elf->e_shnum * sizeof(Elf32_Shdr);
1005 shp = malloc(sz);
1006
1007 if ((size_t)gzread(fp, shp, sz) != sz) {
1008 free(shp);
1009 return 1;
1010 }
1011
1012 shpp = maxp;
1013 maxp += roundup(sz, sizeof(Elf32_Addr));
1014
1015 size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1016 char *shstr = malloc(shstrsz);
1017 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1018 SEEK_SET) == -1) {
1019 free(shstr);
1020 free(shp);
1021 return 1;
1022 }
1023 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
1024 free(shstr);
1025 free(shp);
1026 return 1;
1027 }
1028
1029 /*
1030 * Now load the symbol sections themselves. Make sure the
1031 * sections are aligned. Don't bother with string tables if
1032 * there are no symbol sections.
1033 */
1034 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1035
1036 for (havesyms = i = 0; i < elf->e_shnum; i++)
1037 if (shp[i].sh_type == SHT_SYMTAB)
1038 havesyms = 1;
1039
1040 for (i = 0; i < elf->e_shnum; i++) {
1041 if (shp[i].sh_type == SHT_SYMTAB ||
1042 shp[i].sh_type == SHT_STRTAB ||
1043 !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1044 if (havesyms && (flags & LOAD_SYM)) {
1045 if (gzseek(fp, (off_t)shp[i].sh_offset,
1046 SEEK_SET) == -1) {
1047 free(shstr);
1048 free(shp);
1049 return 1;
1050 }
1051 if (mread(fp, maxp,
1052 shp[i].sh_size) != shp[i].sh_size) {
1053 free(shstr);
1054 free(shp);
1055 return 1;
1056 }
1057 }
1058 maxp += roundup(shp[i].sh_size,
1059 sizeof(Elf32_Addr));
1060 shp[i].sh_offset = off;
1061 shp[i].sh_flags |= SHF_ALLOC;
1062 off += roundup(shp[i].sh_size,
1063 sizeof(Elf32_Addr));
1064 }
1065 }
1066 if (flags & LOAD_SYM) {
1067 mbcopy(shp, shpp, sz);
1068 }
1069 free(shstr);
1070 free(shp);
1071 }
1072
1073 /*
1074 * Frob the copied ELF header to give information relative
1075 * to elfp.
1076 */
1077 if (flags & LOAD_HDR) {
1078 elf->e_phoff = 0;
1079 elf->e_shoff = sizeof(Elf32_Ehdr);
1080 elf->e_phentsize = 0;
1081 elf->e_phnum = 0;
1082 mbcopy(elf, elfp, sizeof(*elf));
1083 }
1084
1085 marks[MARK_START] = LOADADDR(minp);
1086 marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1087 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */
1088 marks[MARK_SYM] = LOADADDR(elfp);
1089 marks[MARK_END] = LOADADDR(maxp);
1090
1091 return 0;
1092 }
1093