xref: /openbsd/usr.sbin/vmd/loadfile_elf.c (revision 65bbee46)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.50 2024/09/26 01:45:13 jsg Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/reboot.h>
86 #include <sys/exec.h>
87 
88 #include <elf.h>
89 #include <string.h>
90 #include <errno.h>
91 #include <stdlib.h>
92 #include <unistd.h>
93 #include <err.h>
94 
95 #include <dev/vmm/vmm.h>
96 
97 #include <machine/biosvar.h>
98 #include <machine/segments.h>
99 #include <machine/specialreg.h>
100 #include <machine/pte.h>
101 
102 #include "loadfile.h"
103 #include "vmd.h"
104 
105 #define LOADADDR(a)            ((((u_long)(a)) + offset)&0xfffffff)
106 
107 union {
108 	Elf32_Ehdr elf32;
109 	Elf64_Ehdr elf64;
110 } hdr;
111 
112 static void setsegment(struct mem_segment_descriptor *, uint32_t,
113     size_t, int, int, int, int);
114 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
115 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
116 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
117 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
118 static size_t push_stack(uint32_t, uint32_t);
119 static void push_gdt(void);
120 static void push_pt_32(void);
121 static void push_pt_64(void);
122 static void marc4random_buf(paddr_t, int);
123 static void mbzero(paddr_t, int);
124 static void mbcopy(void *, paddr_t, int);
125 
126 extern char *__progname;
127 extern int vm_id;
128 
129 uint64_t pg_crypt = 0;
130 
131 /*
132  * setsegment
133  *
134  * Initializes a segment selector entry with the provided descriptor.
135  * For the purposes of the bootloader mimiced by vmd(8), we only need
136  * memory-type segment descriptor support.
137  *
138  * This function was copied from machdep.c
139  *
140  * Parameters:
141  *  sd: Address of the entry to initialize
142  *  base: base of the segment
143  *  limit: limit of the segment
144  *  type: type of the segment
145  *  dpl: privilege level of the egment
146  *  def32: default 16/32 bit size of the segment
147  *  gran: granularity of the segment (byte/page)
148  */
149 static void
setsegment(struct mem_segment_descriptor * sd,uint32_t base,size_t limit,int type,int dpl,int def32,int gran)150 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
151     int type, int dpl, int def32, int gran)
152 {
153 	sd->sd_lolimit = (int)limit;
154 	sd->sd_lobase = (int)base;
155 	sd->sd_type = type;
156 	sd->sd_dpl = dpl;
157 	sd->sd_p = 1;
158 	sd->sd_hilimit = (int)limit >> 16;
159 	sd->sd_avl = 0;
160 	sd->sd_long = 0;
161 	sd->sd_def32 = def32;
162 	sd->sd_gran = gran;
163 	sd->sd_hibase = (int)base >> 24;
164 }
165 
166 /*
167  * push_gdt
168  *
169  * Allocates and populates a page in the guest phys memory space to hold
170  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
171  * create the same GDT that a real bootloader would have created.
172  * This is loaded into the guest phys RAM space at address GDT_PAGE.
173  */
174 static void
push_gdt(void)175 push_gdt(void)
176 {
177 	uint8_t gdtpage[PAGE_SIZE];
178 	struct mem_segment_descriptor *sd;
179 
180 	memset(&gdtpage, 0, sizeof(gdtpage));
181 
182 	sd = (struct mem_segment_descriptor *)&gdtpage;
183 
184 	/*
185 	 * Create three segment descriptors:
186 	 *
187 	 * GDT[0] : null descriptor. "Created" via memset above.
188 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
189 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
190 	 */
191 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
192 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
193 
194 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
195 	sev_register_encryption(GDT_PAGE, PAGE_SIZE);
196 }
197 
198 /*
199  * push_pt_32
200  *
201  * Create an identity-mapped page directory hierarchy mapping the first
202  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
203  * CPUs without unrestricted guest capability.
204  */
205 static void
push_pt_32(void)206 push_pt_32(void)
207 {
208 	uint32_t ptes[1024], i;
209 
210 	memset(ptes, 0, sizeof(ptes));
211 	for (i = 0 ; i < 1024; i++) {
212 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
213 	}
214 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
215 }
216 
217 /*
218  * push_pt_64
219  *
220  * Create an identity-mapped page directory hierarchy mapping the first
221  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
222  * CPUs without unrestricted guest capability.
223  */
224 static void
push_pt_64(void)225 push_pt_64(void)
226 {
227 	uint64_t ptes[512], i;
228 
229 	/* PDPDE0 - first 1GB */
230 	memset(ptes, 0, sizeof(ptes));
231 	ptes[0] = pg_crypt | PG_V | PML3_PAGE;
232 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
233 	sev_register_encryption(PML4_PAGE, PAGE_SIZE);
234 
235 	/* PDE0 - first 1GB */
236 	memset(ptes, 0, sizeof(ptes));
237 	ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE;
238 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
239 	sev_register_encryption(PML3_PAGE, PAGE_SIZE);
240 
241 	/* First 1GB (in 2MB pages) */
242 	memset(ptes, 0, sizeof(ptes));
243 	for (i = 0 ; i < 512; i++) {
244 		ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS |
245 		    ((2048 * 1024) * i);
246 	}
247 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
248 	sev_register_encryption(PML2_PAGE, PAGE_SIZE);
249 }
250 
251 /*
252  * loadfile_elf
253  *
254  * Loads an ELF kernel to its defined load address in the guest VM.
255  * The kernel is loaded to its defined start point as set in the ELF header.
256  *
257  * Parameters:
258  *  fp: file of a kernel file to load
259  *  vcp: the VM create parameters, holding the exact memory map
260  *  (out) vrs: register state to set on init for this kernel
261  *  bootdev: the optional non-default boot device
262  *  howto: optional boot flags for the kernel
263  *
264  * Return values:
265  *  0 if successful
266  *  various error codes returned from gzread(3) or loadelf functions
267  */
268 int
loadfile_elf(gzFile fp,struct vmd_vm * vm,struct vcpu_reg_state * vrs,unsigned int bootdevice)269 loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
270     unsigned int bootdevice)
271 {
272 	int r, is_i386 = 0;
273 	uint32_t bootargsz;
274 	size_t n, stacksize;
275 	u_long marks[MARK_MAX];
276 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
277 	bios_bootmac_t bm, *bootmac = NULL;
278 	struct vm_create_params *vcp = &vm->vm_params.vmc_params;
279 
280 	if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr))
281 		return 1;
282 
283 	memset(&marks, 0, sizeof(marks));
284 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
285 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
286 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
287 		is_i386 = 1;
288 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
289 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
290 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
291 	} else
292 		errno = ENOEXEC;
293 
294 	if (r)
295 		return (r);
296 
297 	push_gdt();
298 
299 	if (is_i386) {
300 		push_pt_32();
301 		/* Reconfigure the default flat-64 register set for 32 bit */
302 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
303 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
304 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
305 	}
306 	else {
307 		if (vcp->vcp_sev) {
308 			if (vcp->vcp_poscbit == 0) {
309 				log_warnx("SEV enabled but no C-bit reported");
310 				return 1;
311 			}
312 			pg_crypt = (1ULL << vcp->vcp_poscbit);
313 			log_debug("%s: poscbit %d pg_crypt 0x%016llx",
314 			    __func__, vcp->vcp_poscbit, pg_crypt);
315 		}
316 		push_pt_64();
317 	}
318 
319 	if (bootdevice == VMBOOTDEV_NET) {
320 		bootmac = &bm;
321 		memcpy(bootmac, vm->vm_params.vmc_macs[0], ETHER_ADDR_LEN);
322 	}
323 	n = create_bios_memmap(vcp, memmap);
324 	bootargsz = push_bootargs(memmap, n, bootmac);
325 	stacksize = push_stack(bootargsz, marks[MARK_END]);
326 
327 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
328 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
329 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
330 
331 	log_debug("%s: loaded ELF kernel", __func__);
332 
333 	return (0);
334 }
335 
336 /*
337  * create_bios_memmap
338  *
339  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
340  *
341  * Parameters:
342  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
343  *   memmap (out): the BIOS memory map
344  *
345  * Return values:
346  * Number of bios_memmap_t entries, including the terminating nul-entry.
347  */
348 static size_t
create_bios_memmap(struct vm_create_params * vcp,bios_memmap_t * memmap)349 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
350 {
351 	size_t i, n = 0;
352 	struct vm_mem_range *vmr;
353 
354 	for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
355 		vmr = &vcp->vcp_memranges[i];
356 		memmap[n].addr = vmr->vmr_gpa;
357 		memmap[n].size = vmr->vmr_size;
358 		if (vmr->vmr_type == VM_MEM_RAM)
359 			memmap[n].type = BIOS_MAP_FREE;
360 		else
361 			memmap[n].type = BIOS_MAP_RES;
362 	}
363 
364 	/* Null mem map entry to denote the end of the ranges */
365 	memmap[n].addr = 0x0;
366 	memmap[n].size = 0x0;
367 	memmap[n].type = BIOS_MAP_END;
368 	n++;
369 
370 	return (n);
371 }
372 
373 /*
374  * push_bootargs
375  *
376  * Creates the boot arguments page in the guest address space.
377  * Since vmd(8) is acting as the bootloader, we need to create the same boot
378  * arguments page that a real bootloader would have created. This is loaded
379  * into the guest phys RAM space at address BOOTARGS_PAGE.
380  *
381  * Parameters:
382  *  memmap: the BIOS memory map
383  *  n: number of entries in memmap
384  *  bootmac: optional PXE boot MAC address
385  *
386  * Return values:
387  *  The size of the bootargs in bytes
388  */
389 static uint32_t
push_bootargs(bios_memmap_t * memmap,size_t n,bios_bootmac_t * bootmac)390 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
391 {
392 	uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
393 	bios_consdev_t consdev;
394 	uint32_t ba[1024];
395 
396 	memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t);
397 	ba[0] = BOOTARG_MEMMAP;
398 	ba[1] = memmap_sz;
399 	ba[2] = memmap_sz;
400 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
401 	i = memmap_sz / sizeof(uint32_t);
402 
403 	/* Serial console device, COM1 @ 0x3f8 */
404 	memset(&consdev, 0, sizeof(consdev));
405 	consdev.consdev = makedev(8, 0);
406 	consdev.conspeed = 115200;
407 	consdev.consaddr = 0x3f8;
408 
409 	consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t);
410 	ba[i] = BOOTARG_CONSDEV;
411 	ba[i + 1] = consdev_sz;
412 	ba[i + 2] = consdev_sz;
413 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
414 	i += consdev_sz / sizeof(uint32_t);
415 
416 	if (bootmac) {
417 		bootmac_sz = 3 * sizeof(uint32_t) +
418 		    (sizeof(bios_bootmac_t) + 3) & ~3;
419 		ba[i] = BOOTARG_BOOTMAC;
420 		ba[i + 1] = bootmac_sz;
421 		ba[i + 2] = bootmac_sz;
422 		memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
423 		i += bootmac_sz / sizeof(uint32_t);
424 	}
425 
426 	ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
427 
428 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
429 	sev_register_encryption(BOOTARGS_PAGE, PAGE_SIZE);
430 
431 	return (i * sizeof(uint32_t));
432 }
433 
434 /*
435  * push_stack
436  *
437  * Creates the boot stack page in the guest address space. When using a real
438  * bootloader, the stack will be prepared using the following format before
439  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
440  * layout. The stack content is pushed to the guest phys RAM at address
441  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
442  * 4 bytes.
443  *
444  * Stack Layout: (TOS == Top Of Stack)
445  *  TOS		location of boot arguments page
446  *  TOS - 0x4	size of the content in the boot arguments page
447  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
448  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
449  *  TOS - 0x10	kernel 'end' symbol value
450  *  TOS - 0x14	version of bootarg API
451  *
452  * Parameters:
453  *  bootargsz: size of boot arguments
454  *  end: kernel 'end' symbol value
455  *  bootdev: the optional non-default boot device
456  *  howto: optional boot flags for the kernel
457  *
458  * Return values:
459  *  size of the stack
460  */
461 static size_t
push_stack(uint32_t bootargsz,uint32_t end)462 push_stack(uint32_t bootargsz, uint32_t end)
463 {
464 	uint32_t stack[1024];
465 	uint16_t loc;
466 
467 	memset(&stack, 0, sizeof(stack));
468 	loc = 1024;
469 
470 	stack[--loc] = BOOTARGS_PAGE;
471 	stack[--loc] = bootargsz;
472 	stack[--loc] = 0; /* biosbasemem */
473 	stack[--loc] = 0; /* biosextmem */
474 	stack[--loc] = end;
475 	stack[--loc] = 0x0e;
476 	stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
477 	stack[--loc] = 0;
478 
479 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
480 	sev_register_encryption(STACK_PAGE, PAGE_SIZE);
481 
482 	return (1024 - (loc - 1)) * sizeof(uint32_t);
483 }
484 
485 /*
486  * mread
487  *
488  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
489  * into the guest address space at paddr 'addr'.
490  *
491  * Parameters:
492  *  fp: kernel image file to read from.
493  *  addr: guest paddr_t to load to
494  *  sz: number of bytes to load
495  *
496  * Return values:
497  *  returns 'sz' if successful, or 0 otherwise.
498  */
499 size_t
mread(gzFile fp,paddr_t addr,size_t sz)500 mread(gzFile fp, paddr_t addr, size_t sz)
501 {
502 	const char *errstr = NULL;
503 	int errnum = 0;
504 	size_t ct;
505 	size_t i, osz;
506 	char buf[PAGE_SIZE];
507 
508 	sev_register_encryption(addr, sz);
509 
510 	/*
511 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
512 	 * write_mem
513 	 */
514 	ct = 0;
515 	osz = sz;
516 	if ((addr & PAGE_MASK) != 0) {
517 		memset(buf, 0, sizeof(buf));
518 		if (sz > PAGE_SIZE)
519 			ct = PAGE_SIZE - (addr & PAGE_MASK);
520 		else
521 			ct = sz;
522 
523 		if ((size_t)gzread(fp, buf, ct) != ct) {
524 			errstr = gzerror(fp, &errnum);
525 			if (errnum == Z_ERRNO)
526 				errnum = errno;
527 			log_warnx("%s: error %d in mread, %s", __progname,
528 			    errnum, errstr);
529 			return (0);
530 		}
531 
532 		if (write_mem(addr, buf, ct))
533 			return (0);
534 
535 		addr += ct;
536 	}
537 
538 	sz = sz - ct;
539 
540 	if (sz == 0)
541 		return (osz);
542 
543 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
544 		memset(buf, 0, sizeof(buf));
545 		if (i + PAGE_SIZE > sz)
546 			ct = sz - i;
547 		else
548 			ct = PAGE_SIZE;
549 
550 		if ((size_t)gzread(fp, buf, ct) != ct) {
551 			errstr = gzerror(fp, &errnum);
552 			if (errnum == Z_ERRNO)
553 				errnum = errno;
554 			log_warnx("%s: error %d in mread, %s", __progname,
555 			    errnum, errstr);
556 			return (0);
557 		}
558 
559 		if (write_mem(addr, buf, ct))
560 			return (0);
561 	}
562 
563 	return (osz);
564 }
565 
566 /*
567  * marc4random_buf
568  *
569  * load 'sz' bytes of random data into the guest address space at paddr
570  * 'addr'.
571  *
572  * Parameters:
573  *  addr: guest paddr_t to load random bytes into
574  *  sz: number of random bytes to load
575  *
576  * Return values:
577  *  nothing
578  */
579 static void
marc4random_buf(paddr_t addr,int sz)580 marc4random_buf(paddr_t addr, int sz)
581 {
582 	int i, ct;
583 	char buf[PAGE_SIZE];
584 
585 	sev_register_encryption(addr, sz);
586 
587 	/*
588 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
589 	 * write_mem
590 	 */
591 	ct = 0;
592 	if (addr % PAGE_SIZE != 0) {
593 		memset(buf, 0, sizeof(buf));
594 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
595 
596 		arc4random_buf(buf, ct);
597 
598 		if (write_mem(addr, buf, ct))
599 			return;
600 
601 		addr += ct;
602 	}
603 
604 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
605 		memset(buf, 0, sizeof(buf));
606 		if (i + PAGE_SIZE > sz)
607 			ct = sz - i;
608 		else
609 			ct = PAGE_SIZE;
610 
611 		arc4random_buf(buf, ct);
612 
613 		if (write_mem(addr, buf, ct))
614 			return;
615 	}
616 }
617 
618 /*
619  * mbzero
620  *
621  * load 'sz' bytes of zeros into the guest address space at paddr
622  * 'addr'.
623  *
624  * Parameters:
625  *  addr: guest paddr_t to zero
626  *  sz: number of zero bytes to store
627  *
628  * Return values:
629  *  nothing
630  */
631 static void
mbzero(paddr_t addr,int sz)632 mbzero(paddr_t addr, int sz)
633 {
634 	if (write_mem(addr, NULL, sz))
635 		return;
636 	sev_register_encryption(addr, sz);
637 }
638 
639 /*
640  * mbcopy
641  *
642  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
643  *
644  * Parameters:
645  *  src: source buffer to copy from
646  *  dst: destination guest paddr_t to copy to
647  *  sz: number of bytes to copy
648  *
649  * Return values:
650  *  nothing
651  */
652 static void
mbcopy(void * src,paddr_t dst,int sz)653 mbcopy(void *src, paddr_t dst, int sz)
654 {
655 	write_mem(dst, src, sz);
656 	sev_register_encryption(dst, sz);
657 }
658 
659 /*
660  * elf64_exec
661  *
662  * Load the kernel indicated by 'fp' into the guest physical memory
663  * space, at the addresses defined in the ELF header.
664  *
665  * This function is used for 64 bit kernels.
666  *
667  * Parameters:
668  *  fp: kernel image file to load
669  *  elf: ELF header of the kernel
670  *  marks: array to store the offsets of various kernel structures
671  *      (start, bss, etc)
672  *  flags: flag value to indicate which section(s) to load (usually
673  *      LOAD_ALL)
674  *
675  * Return values:
676  *  0 if successful
677  *  1 if unsuccessful
678  */
679 static int
elf64_exec(gzFile fp,Elf64_Ehdr * elf,u_long * marks,int flags)680 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags)
681 {
682 	Elf64_Shdr *shp;
683 	Elf64_Phdr *phdr;
684 	Elf64_Off off;
685 	int i;
686 	size_t sz;
687 	int havesyms;
688 	paddr_t minp = ~0, maxp = 0, pos = 0;
689 	paddr_t offset = marks[MARK_START], shpp, elfp;
690 
691 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
692 	phdr = malloc(sz);
693 
694 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
695 		free(phdr);
696 		return 1;
697 	}
698 
699 	if ((size_t)gzread(fp, phdr, sz) != sz) {
700 		free(phdr);
701 		return 1;
702 	}
703 
704 	for (i = 0; i < elf->e_phnum; i++) {
705 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
706 			int m;
707 
708 			/* Fill segment if asked for. */
709 			if (flags & LOAD_RANDOM) {
710 				for (pos = 0; pos < phdr[i].p_filesz;
711 				    pos += m) {
712 					m = phdr[i].p_filesz - pos;
713 					marc4random_buf(phdr[i].p_paddr + pos,
714 					    m);
715 				}
716 			}
717 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
718 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
719 				marks[MARK_ERANDOM] =
720 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
721 			}
722 			continue;
723 		}
724 
725 		if (phdr[i].p_type != PT_LOAD ||
726 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
727 			continue;
728 
729 #define IS_TEXT(p)	(p.p_flags & PF_X)
730 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
731 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
732 		/*
733 		 * XXX: Assume first address is lowest
734 		 */
735 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
736 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
737 
738 			/* Read in segment. */
739 			if (gzseek(fp, (off_t)phdr[i].p_offset,
740 			    SEEK_SET) == -1) {
741 				free(phdr);
742 				return 1;
743 			}
744 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
745 			    phdr[i].p_filesz) {
746 				free(phdr);
747 				return 1;
748 			}
749 		}
750 
751 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
752 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
753 			pos = phdr[i].p_paddr;
754 			if (minp > pos)
755 				minp = pos;
756 			pos += phdr[i].p_filesz;
757 			if (maxp < pos)
758 				maxp = pos;
759 		}
760 
761 		/* Zero out BSS. */
762 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
763 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
764 			    phdr[i].p_memsz - phdr[i].p_filesz);
765 		}
766 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
767 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
768 			if (maxp < pos)
769 				maxp = pos;
770 		}
771 	}
772 	free(phdr);
773 
774 	/*
775 	 * Copy the ELF and section headers.
776 	 */
777 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
778 	if (flags & (LOAD_HDR | COUNT_HDR))
779 		maxp += sizeof(Elf64_Ehdr);
780 
781 	if (flags & (LOAD_SYM | COUNT_SYM)) {
782 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
783 			warn("gzseek section headers");
784 			return 1;
785 		}
786 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
787 		shp = malloc(sz);
788 
789 		if ((size_t)gzread(fp, shp, sz) != sz) {
790 			free(shp);
791 			return 1;
792 		}
793 
794 		shpp = maxp;
795 		maxp += roundup(sz, sizeof(Elf64_Addr));
796 
797 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
798 		char *shstr = malloc(shstrsz);
799 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
800 		    SEEK_SET) == -1) {
801 			free(shstr);
802 			free(shp);
803 			return 1;
804 		}
805 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
806 			free(shstr);
807 			free(shp);
808 			return 1;
809 		}
810 
811 		/*
812 		 * Now load the symbol sections themselves. Make sure the
813 		 * sections are aligned. Don't bother with string tables if
814 		 * there are no symbol sections.
815 		 */
816 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
817 
818 		for (havesyms = i = 0; i < elf->e_shnum; i++)
819 			if (shp[i].sh_type == SHT_SYMTAB)
820 				havesyms = 1;
821 
822 		for (i = 0; i < elf->e_shnum; i++) {
823 			if (shp[i].sh_type == SHT_SYMTAB ||
824 			    shp[i].sh_type == SHT_STRTAB ||
825 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
826 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
827 				if (havesyms && (flags & LOAD_SYM)) {
828 					if (gzseek(fp, (off_t)shp[i].sh_offset,
829 					    SEEK_SET) == -1) {
830 						free(shstr);
831 						free(shp);
832 						return 1;
833 					}
834 					if (mread(fp, maxp,
835 					    shp[i].sh_size) != shp[i].sh_size) {
836 						free(shstr);
837 						free(shp);
838 						return 1;
839 					}
840 				}
841 				maxp += roundup(shp[i].sh_size,
842 				    sizeof(Elf64_Addr));
843 				shp[i].sh_offset = off;
844 				shp[i].sh_flags |= SHF_ALLOC;
845 				off += roundup(shp[i].sh_size,
846 				    sizeof(Elf64_Addr));
847 			}
848 		}
849 		if (flags & LOAD_SYM) {
850 			mbcopy(shp, shpp, sz);
851 		}
852 		free(shstr);
853 		free(shp);
854 	}
855 
856 	/*
857 	 * Frob the copied ELF header to give information relative
858 	 * to elfp.
859 	 */
860 	if (flags & LOAD_HDR) {
861 		elf->e_phoff = 0;
862 		elf->e_shoff = sizeof(Elf64_Ehdr);
863 		elf->e_phentsize = 0;
864 		elf->e_phnum = 0;
865 		mbcopy(elf, elfp, sizeof(*elf));
866 	}
867 
868 	marks[MARK_START] = LOADADDR(minp);
869 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
870 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
871 	marks[MARK_SYM] = LOADADDR(elfp);
872 	marks[MARK_END] = LOADADDR(maxp);
873 
874 	return 0;
875 }
876 
877 /*
878  * elf32_exec
879  *
880  * Load the kernel indicated by 'fp' into the guest physical memory
881  * space, at the addresses defined in the ELF header.
882  *
883  * This function is used for 32 bit kernels.
884  *
885  * Parameters:
886  *  fp: kernel image file to load
887  *  elf: ELF header of the kernel
888  *  marks: array to store the offsets of various kernel structures
889  *      (start, bss, etc)
890  *  flags: flag value to indicate which section(s) to load (usually
891  *      LOAD_ALL)
892  *
893  * Return values:
894  *  0 if successful
895  *  1 if unsuccessful
896  */
897 static int
elf32_exec(gzFile fp,Elf32_Ehdr * elf,u_long * marks,int flags)898 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags)
899 {
900 	Elf32_Shdr *shp;
901 	Elf32_Phdr *phdr;
902 	Elf32_Off off;
903 	int i;
904 	size_t sz;
905 	int havesyms;
906 	paddr_t minp = ~0, maxp = 0, pos = 0;
907 	paddr_t offset = marks[MARK_START], shpp, elfp;
908 
909 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
910 	phdr = malloc(sz);
911 
912 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
913 		free(phdr);
914 		return 1;
915 	}
916 
917 	if ((size_t)gzread(fp, phdr, sz) != sz) {
918 		free(phdr);
919 		return 1;
920 	}
921 
922 	for (i = 0; i < elf->e_phnum; i++) {
923 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
924 			int m;
925 
926 			/* Fill segment if asked for. */
927 			if (flags & LOAD_RANDOM) {
928 				for (pos = 0; pos < phdr[i].p_filesz;
929 				    pos += m) {
930 					m = phdr[i].p_filesz - pos;
931 					marc4random_buf(phdr[i].p_paddr + pos,
932 					    m);
933 				}
934 			}
935 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
936 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
937 				marks[MARK_ERANDOM] =
938 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
939 			}
940 			continue;
941 		}
942 
943 		if (phdr[i].p_type != PT_LOAD ||
944 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
945 			continue;
946 
947 #define IS_TEXT(p)	(p.p_flags & PF_X)
948 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
949 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
950 		/*
951 		 * XXX: Assume first address is lowest
952 		 */
953 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
954 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
955 
956 			/* Read in segment. */
957 			if (gzseek(fp, (off_t)phdr[i].p_offset,
958 			    SEEK_SET) == -1) {
959 				free(phdr);
960 				return 1;
961 			}
962 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
963 			    phdr[i].p_filesz) {
964 				free(phdr);
965 				return 1;
966 			}
967 		}
968 
969 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
970 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
971 			pos = phdr[i].p_paddr;
972 			if (minp > pos)
973 				minp = pos;
974 			pos += phdr[i].p_filesz;
975 			if (maxp < pos)
976 				maxp = pos;
977 		}
978 
979 		/* Zero out BSS. */
980 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
981 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
982 			    phdr[i].p_memsz - phdr[i].p_filesz);
983 		}
984 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
985 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
986 			if (maxp < pos)
987 				maxp = pos;
988 		}
989 	}
990 	free(phdr);
991 
992 	/*
993 	 * Copy the ELF and section headers.
994 	 */
995 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
996 	if (flags & (LOAD_HDR | COUNT_HDR))
997 		maxp += sizeof(Elf32_Ehdr);
998 
999 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1000 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
1001 			warn("lseek section headers");
1002 			return 1;
1003 		}
1004 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1005 		shp = malloc(sz);
1006 
1007 		if ((size_t)gzread(fp, shp, sz) != sz) {
1008 			free(shp);
1009 			return 1;
1010 		}
1011 
1012 		shpp = maxp;
1013 		maxp += roundup(sz, sizeof(Elf32_Addr));
1014 
1015 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1016 		char *shstr = malloc(shstrsz);
1017 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1018 		    SEEK_SET) == -1) {
1019 			free(shstr);
1020 			free(shp);
1021 			return 1;
1022 		}
1023 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
1024 			free(shstr);
1025 			free(shp);
1026 			return 1;
1027 		}
1028 
1029 		/*
1030 		 * Now load the symbol sections themselves. Make sure the
1031 		 * sections are aligned. Don't bother with string tables if
1032 		 * there are no symbol sections.
1033 		 */
1034 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1035 
1036 		for (havesyms = i = 0; i < elf->e_shnum; i++)
1037 			if (shp[i].sh_type == SHT_SYMTAB)
1038 				havesyms = 1;
1039 
1040 		for (i = 0; i < elf->e_shnum; i++) {
1041 			if (shp[i].sh_type == SHT_SYMTAB ||
1042 			    shp[i].sh_type == SHT_STRTAB ||
1043 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1044 				if (havesyms && (flags & LOAD_SYM)) {
1045 					if (gzseek(fp, (off_t)shp[i].sh_offset,
1046 					    SEEK_SET) == -1) {
1047 						free(shstr);
1048 						free(shp);
1049 						return 1;
1050 					}
1051 					if (mread(fp, maxp,
1052 					    shp[i].sh_size) != shp[i].sh_size) {
1053 						free(shstr);
1054 						free(shp);
1055 						return 1;
1056 					}
1057 				}
1058 				maxp += roundup(shp[i].sh_size,
1059 				    sizeof(Elf32_Addr));
1060 				shp[i].sh_offset = off;
1061 				shp[i].sh_flags |= SHF_ALLOC;
1062 				off += roundup(shp[i].sh_size,
1063 				    sizeof(Elf32_Addr));
1064 			}
1065 		}
1066 		if (flags & LOAD_SYM) {
1067 			mbcopy(shp, shpp, sz);
1068 		}
1069 		free(shstr);
1070 		free(shp);
1071 	}
1072 
1073 	/*
1074 	 * Frob the copied ELF header to give information relative
1075 	 * to elfp.
1076 	 */
1077 	if (flags & LOAD_HDR) {
1078 		elf->e_phoff = 0;
1079 		elf->e_shoff = sizeof(Elf32_Ehdr);
1080 		elf->e_phentsize = 0;
1081 		elf->e_phnum = 0;
1082 		mbcopy(elf, elfp, sizeof(*elf));
1083 	}
1084 
1085 	marks[MARK_START] = LOADADDR(minp);
1086 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1087 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1088 	marks[MARK_SYM] = LOADADDR(elfp);
1089 	marks[MARK_END] = LOADADDR(maxp);
1090 
1091 	return 0;
1092 }
1093