xref: /openbsd/usr.sbin/vmd/loadfile_elf.c (revision 09467b48)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.35 2019/05/16 21:16:04 claudio Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/specialreg.h>
104 #include <machine/pte.h>
105 
106 #include "loadfile.h"
107 #include "vmd.h"
108 
109 #define LOADADDR(a)            ((((u_long)(a)) + offset)&0xfffffff)
110 
111 union {
112 	Elf32_Ehdr elf32;
113 	Elf64_Ehdr elf64;
114 } hdr;
115 
116 static void setsegment(struct mem_segment_descriptor *, uint32_t,
117     size_t, int, int, int, int);
118 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
119 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
120 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
121 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
122 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
123 static void push_gdt(void);
124 static void push_pt_32(void);
125 static void push_pt_64(void);
126 static void marc4random_buf(paddr_t, int);
127 static void mbzero(paddr_t, int);
128 static void mbcopy(void *, paddr_t, int);
129 
130 extern char *__progname;
131 extern int vm_id;
132 
133 /*
134  * setsegment
135  *
136  * Initializes a segment selector entry with the provided descriptor.
137  * For the purposes of the bootloader mimiced by vmd(8), we only need
138  * memory-type segment descriptor support.
139  *
140  * This function was copied from machdep.c
141  *
142  * Parameters:
143  *  sd: Address of the entry to initialize
144  *  base: base of the segment
145  *  limit: limit of the segment
146  *  type: type of the segment
147  *  dpl: privilege level of the egment
148  *  def32: default 16/32 bit size of the segment
149  *  gran: granularity of the segment (byte/page)
150  */
151 static void
152 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
153     int type, int dpl, int def32, int gran)
154 {
155 	sd->sd_lolimit = (int)limit;
156 	sd->sd_lobase = (int)base;
157 	sd->sd_type = type;
158 	sd->sd_dpl = dpl;
159 	sd->sd_p = 1;
160 	sd->sd_hilimit = (int)limit >> 16;
161 	sd->sd_avl = 0;
162 	sd->sd_long = 0;
163 	sd->sd_def32 = def32;
164 	sd->sd_gran = gran;
165 	sd->sd_hibase = (int)base >> 24;
166 }
167 
168 /*
169  * push_gdt
170  *
171  * Allocates and populates a page in the guest phys memory space to hold
172  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
173  * create the same GDT that a real bootloader would have created.
174  * This is loaded into the guest phys RAM space at address GDT_PAGE.
175  */
176 static void
177 push_gdt(void)
178 {
179 	uint8_t gdtpage[PAGE_SIZE];
180 	struct mem_segment_descriptor *sd;
181 
182 	memset(&gdtpage, 0, sizeof(gdtpage));
183 
184 	sd = (struct mem_segment_descriptor *)&gdtpage;
185 
186 	/*
187 	 * Create three segment descriptors:
188 	 *
189 	 * GDT[0] : null desriptor. "Created" via memset above.
190 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
191 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
192 	 */
193 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
194 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
195 
196 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
197 }
198 
199 /*
200  * push_pt_32
201  *
202  * Create an identity-mapped page directory hierarchy mapping the first
203  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
204  * CPUs without unrestricted guest capability.
205  */
206 static void
207 push_pt_32(void)
208 {
209 	uint32_t ptes[1024], i;
210 
211 	memset(ptes, 0, sizeof(ptes));
212 	for (i = 0 ; i < 1024; i++) {
213 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
214 	}
215 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
216 }
217 
218 /*
219  * push_pt_64
220  *
221  * Create an identity-mapped page directory hierarchy mapping the first
222  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
223  * CPUs without unrestricted guest capability.
224  */
225 static void
226 push_pt_64(void)
227 {
228 	uint64_t ptes[512], i;
229 
230 	/* PDPDE0 - first 1GB */
231 	memset(ptes, 0, sizeof(ptes));
232 	ptes[0] = PG_V | PML3_PAGE;
233 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
234 
235 	/* PDE0 - first 1GB */
236 	memset(ptes, 0, sizeof(ptes));
237 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
238 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
239 
240 	/* First 1GB (in 2MB pages) */
241 	memset(ptes, 0, sizeof(ptes));
242 	for (i = 0 ; i < 512; i++) {
243 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
244 	}
245 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
246 }
247 
248 /*
249  * loadfile_elf
250  *
251  * Loads an ELF kernel to it's defined load address in the guest VM.
252  * The kernel is loaded to its defined start point as set in the ELF header.
253  *
254  * Parameters:
255  *  fp: file of a kernel file to load
256  *  vcp: the VM create parameters, holding the exact memory map
257  *  (out) vrs: register state to set on init for this kernel
258  *  bootdev: the optional non-default boot device
259  *  howto: optional boot flags for the kernel
260  *
261  * Return values:
262  *  0 if successful
263  *  various error codes returned from read(2) or loadelf functions
264  */
265 int
266 loadfile_elf(FILE *fp, struct vm_create_params *vcp,
267     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto,
268     unsigned int bootdevice)
269 {
270 	int r, is_i386 = 0;
271 	uint32_t bootargsz;
272 	size_t n, stacksize;
273 	u_long marks[MARK_MAX];
274 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
275 	bios_bootmac_t bm, *bootmac = NULL;
276 
277 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
278 		return 1;
279 
280 	memset(&marks, 0, sizeof(marks));
281 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
282 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
283 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
284 		is_i386 = 1;
285 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
286 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
287 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
288 	} else
289 		errno = ENOEXEC;
290 
291 	if (r)
292 		return (r);
293 
294 	push_gdt();
295 
296 	if (is_i386) {
297 		push_pt_32();
298 		/* Reconfigure the default flat-64 register set for 32 bit */
299 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
300 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
301 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
302 	}
303 	else
304 		push_pt_64();
305 
306 	if (bootdevice & VMBOOTDEV_NET) {
307 		bootmac = &bm;
308 		memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN);
309 	}
310 	n = create_bios_memmap(vcp, memmap);
311 	bootargsz = push_bootargs(memmap, n, bootmac);
312 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
313 
314 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
315 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
316 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
317 
318 	log_debug("%s: loaded ELF kernel", __func__);
319 
320 	return (0);
321 }
322 
323 /*
324  * create_bios_memmap
325  *
326  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
327  *
328  * Parameters:
329  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
330  *   memmap (out): the BIOS memory map
331  *
332  * Return values:
333  * Number of bios_memmap_t entries, including the terminating nul-entry.
334  */
335 static size_t
336 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
337 {
338 	size_t i, n = 0, sz;
339 	paddr_t gpa;
340 	struct vm_mem_range *vmr;
341 
342 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
343 		vmr = &vcp->vcp_memranges[i];
344 		gpa = vmr->vmr_gpa;
345 		sz = vmr->vmr_size;
346 
347 		/*
348 		 * Make sure that we do not mark the ROM/video RAM area in the
349 		 * low memory as physcal memory available to the kernel.
350 		 */
351 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
352 			if (gpa >= LOWMEM_KB * 1024)
353 				sz = 0;
354 			else
355 				sz = LOWMEM_KB * 1024 - gpa;
356 		}
357 
358 		if (sz != 0) {
359 			memmap[n].addr = gpa;
360 			memmap[n].size = sz;
361 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
362 			n++;
363 		}
364 	}
365 
366 	/* Null mem map entry to denote the end of the ranges */
367 	memmap[n].addr = 0x0;
368 	memmap[n].size = 0x0;
369 	memmap[n].type = 0x0;
370 	n++;
371 
372 	return (n);
373 }
374 
375 /*
376  * push_bootargs
377  *
378  * Creates the boot arguments page in the guest address space.
379  * Since vmd(8) is acting as the bootloader, we need to create the same boot
380  * arguments page that a real bootloader would have created. This is loaded
381  * into the guest phys RAM space at address BOOTARGS_PAGE.
382  *
383  * Parameters:
384  *  memmap: the BIOS memory map
385  *  n: number of entries in memmap
386  *
387  * Return values:
388  *  The size of the bootargs
389  */
390 static uint32_t
391 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
392 {
393 	uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
394 	bios_consdev_t consdev;
395 	uint32_t ba[1024];
396 
397 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
398 	ba[0] = 0x0;    /* memory map */
399 	ba[1] = memmap_sz;
400 	ba[2] = memmap_sz;	/* next */
401 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
402 	i = memmap_sz / sizeof(int);
403 
404 	/* Serial console device, COM1 @ 0x3f8 */
405 	consdev.consdev = makedev(8, 0);	/* com1 @ 0x3f8 */
406 	consdev.conspeed = 115200;
407 	consdev.consaddr = 0x3f8;
408 	consdev.consfreq = 0;
409 
410 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
411 	ba[i] = 0x5;   /* consdev */
412 	ba[i + 1] = consdev_sz;
413 	ba[i + 2] = consdev_sz;
414 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
415 	i += consdev_sz / sizeof(int);
416 
417 	if (bootmac) {
418 		bootmac_sz = 3 * sizeof(int) + (sizeof(bios_bootmac_t) + 3) & ~3;
419 		ba[i] = 0x7;   /* bootmac */
420 		ba[i + 1] = bootmac_sz;
421 		ba[i + 2] = bootmac_sz;
422 		memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
423 		i += bootmac_sz / sizeof(int);
424 	}
425 
426 	ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
427 
428 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
429 
430 	return (i * sizeof(int));
431 }
432 
433 /*
434  * push_stack
435  *
436  * Creates the boot stack page in the guest address space. When using a real
437  * bootloader, the stack will be prepared using the following format before
438  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
439  * layout. The stack content is pushed to the guest phys RAM at address
440  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
441  * 4 bytes.
442  *
443  * Stack Layout: (TOS == Top Of Stack)
444  *  TOS		location of boot arguments page
445  *  TOS - 0x4	size of the content in the boot arguments page
446  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
447  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
448  *  TOS - 0x10	kernel 'end' symbol value
449  *  TOS - 0x14	version of bootarg API
450  *
451  * Parameters:
452  *  bootargsz: size of boot arguments
453  *  end: kernel 'end' symbol value
454  *  bootdev: the optional non-default boot device
455  *  howto: optional boot flags for the kernel
456  *
457  * Return values:
458  *  size of the stack
459  */
460 static size_t
461 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
462 {
463 	uint32_t stack[1024];
464 	uint16_t loc;
465 
466 	memset(&stack, 0, sizeof(stack));
467 	loc = 1024;
468 
469 	if (bootdev == 0)
470 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
471 
472 	stack[--loc] = BOOTARGS_PAGE;
473 	stack[--loc] = bootargsz;
474 	stack[--loc] = 0; /* biosbasemem */
475 	stack[--loc] = 0; /* biosextmem */
476 	stack[--loc] = end;
477 	stack[--loc] = 0x0e;
478 	stack[--loc] = bootdev;
479 	stack[--loc] = howto;
480 
481 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
482 
483 	return (1024 - (loc - 1)) * sizeof(uint32_t);
484 }
485 
486 /*
487  * mread
488  *
489  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
490  * into the guest address space at paddr 'addr'.
491  *
492  * Parameters:
493  *  fd: file descriptor of the kernel image file to read from.
494  *  addr: guest paddr_t to load to
495  *  sz: number of bytes to load
496  *
497  * Return values:
498  *  returns 'sz' if successful, or 0 otherwise.
499  */
500 size_t
501 mread(FILE *fp, paddr_t addr, size_t sz)
502 {
503 	size_t ct;
504 	size_t i, rd, osz;
505 	char buf[PAGE_SIZE];
506 
507 	/*
508 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
509 	 * write_mem
510 	 */
511 	ct = 0;
512 	rd = 0;
513 	osz = sz;
514 	if ((addr & PAGE_MASK) != 0) {
515 		memset(buf, 0, sizeof(buf));
516 		if (sz > PAGE_SIZE)
517 			ct = PAGE_SIZE - (addr & PAGE_MASK);
518 		else
519 			ct = sz;
520 
521 		if (fread(buf, 1, ct, fp) != ct) {
522 			log_warn("%s: error %d in mread", __progname, errno);
523 			return (0);
524 		}
525 		rd += ct;
526 
527 		if (write_mem(addr, buf, ct))
528 			return (0);
529 
530 		addr += ct;
531 	}
532 
533 	sz = sz - ct;
534 
535 	if (sz == 0)
536 		return (osz);
537 
538 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
539 		memset(buf, 0, sizeof(buf));
540 		if (i + PAGE_SIZE > sz)
541 			ct = sz - i;
542 		else
543 			ct = PAGE_SIZE;
544 
545 		if (fread(buf, 1, ct, fp) != ct) {
546 			log_warn("%s: error %d in mread", __progname, errno);
547 			return (0);
548 		}
549 		rd += ct;
550 
551 		if (write_mem(addr, buf, ct))
552 			return (0);
553 	}
554 
555 	return (osz);
556 }
557 
558 /*
559  * marc4random_buf
560  *
561  * load 'sz' bytes of random data into the guest address space at paddr
562  * 'addr'.
563  *
564  * Parameters:
565  *  addr: guest paddr_t to load random bytes into
566  *  sz: number of random bytes to load
567  *
568  * Return values:
569  *  nothing
570  */
571 static void
572 marc4random_buf(paddr_t addr, int sz)
573 {
574 	int i, ct;
575 	char buf[PAGE_SIZE];
576 
577 	/*
578 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
579 	 * write_mem
580 	 */
581 	ct = 0;
582 	if (addr % PAGE_SIZE != 0) {
583 		memset(buf, 0, sizeof(buf));
584 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
585 
586 		arc4random_buf(buf, ct);
587 
588 		if (write_mem(addr, buf, ct))
589 			return;
590 
591 		addr += ct;
592 	}
593 
594 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
595 		memset(buf, 0, sizeof(buf));
596 		if (i + PAGE_SIZE > sz)
597 			ct = sz - i;
598 		else
599 			ct = PAGE_SIZE;
600 
601 		arc4random_buf(buf, ct);
602 
603 		if (write_mem(addr, buf, ct))
604 			return;
605 	}
606 }
607 
608 /*
609  * mbzero
610  *
611  * load 'sz' bytes of zeros into the guest address space at paddr
612  * 'addr'.
613  *
614  * Parameters:
615  *  addr: guest paddr_t to zero
616  *  sz: number of zero bytes to store
617  *
618  * Return values:
619  *  nothing
620  */
621 static void
622 mbzero(paddr_t addr, int sz)
623 {
624 	if (write_mem(addr, NULL, sz))
625 		return;
626 }
627 
628 /*
629  * mbcopy
630  *
631  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
632  *
633  * Parameters:
634  *  src: source buffer to copy from
635  *  dst: destination guest paddr_t to copy to
636  *  sz: number of bytes to copy
637  *
638  * Return values:
639  *  nothing
640  */
641 static void
642 mbcopy(void *src, paddr_t dst, int sz)
643 {
644 	write_mem(dst, src, sz);
645 }
646 
647 /*
648  * elf64_exec
649  *
650  * Load the kernel indicated by 'fd' into the guest physical memory
651  * space, at the addresses defined in the ELF header.
652  *
653  * This function is used for 64 bit kernels.
654  *
655  * Parameters:
656  *  fd: file descriptor of the kernel to load
657  *  elf: ELF header of the kernel
658  *  marks: array to store the offsets of various kernel structures
659  *      (start, bss, etc)
660  *  flags: flag value to indicate which section(s) to load (usually
661  *      LOAD_ALL)
662  *
663  * Return values:
664  *  0 if successful
665  *  1 if unsuccessful
666  */
667 static int
668 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
669 {
670 	Elf64_Shdr *shp;
671 	Elf64_Phdr *phdr;
672 	Elf64_Off off;
673 	int i;
674 	size_t sz;
675 	int first;
676 	int havesyms, havelines;
677 	paddr_t minp = ~0, maxp = 0, pos = 0;
678 	paddr_t offset = marks[MARK_START], shpp, elfp;
679 
680 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
681 	phdr = malloc(sz);
682 
683 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
684 		free(phdr);
685 		return 1;
686 	}
687 
688 	if (fread(phdr, 1, sz, fp) != sz) {
689 		free(phdr);
690 		return 1;
691 	}
692 
693 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
694 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
695 			int m;
696 
697 			/* Fill segment if asked for. */
698 			if (flags & LOAD_RANDOM) {
699 				for (pos = 0; pos < phdr[i].p_filesz;
700 				    pos += m) {
701 					m = phdr[i].p_filesz - pos;
702 					marc4random_buf(phdr[i].p_paddr + pos,
703 					    m);
704 				}
705 			}
706 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
707 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
708 				marks[MARK_ERANDOM] =
709 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
710 			}
711 			continue;
712 		}
713 
714 		if (phdr[i].p_type != PT_LOAD ||
715 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
716 			continue;
717 
718 #define IS_TEXT(p)	(p.p_flags & PF_X)
719 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
720 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
721 		/*
722 		 * XXX: Assume first address is lowest
723 		 */
724 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
725 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
726 
727 			/* Read in segment. */
728 			if (fseeko(fp, (off_t)phdr[i].p_offset,
729 			    SEEK_SET) == -1) {
730 				free(phdr);
731 				return 1;
732 			}
733 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
734 			    phdr[i].p_filesz) {
735 				free(phdr);
736 				return 1;
737 			}
738 
739 			first = 0;
740 		}
741 
742 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
743 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
744 			pos = phdr[i].p_paddr;
745 			if (minp > pos)
746 				minp = pos;
747 			pos += phdr[i].p_filesz;
748 			if (maxp < pos)
749 				maxp = pos;
750 		}
751 
752 		/* Zero out BSS. */
753 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
754 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
755 			    phdr[i].p_memsz - phdr[i].p_filesz);
756 		}
757 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
758 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
759 			if (maxp < pos)
760 				maxp = pos;
761 		}
762 	}
763 	free(phdr);
764 
765 	/*
766 	 * Copy the ELF and section headers.
767 	 */
768 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
769 	if (flags & (LOAD_HDR | COUNT_HDR))
770 		maxp += sizeof(Elf64_Ehdr);
771 
772 	if (flags & (LOAD_SYM | COUNT_SYM)) {
773 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
774 			warn("lseek section headers");
775 			return 1;
776 		}
777 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
778 		shp = malloc(sz);
779 
780 		if (fread(shp, 1, sz, fp) != sz) {
781 			free(shp);
782 			return 1;
783 		}
784 
785 		shpp = maxp;
786 		maxp += roundup(sz, sizeof(Elf64_Addr));
787 
788 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
789 		char *shstr = malloc(shstrsz);
790 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
791 		    SEEK_SET) == -1) {
792 			free(shstr);
793 			free(shp);
794 			return 1;
795 		}
796 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
797 			free(shstr);
798 			free(shp);
799 			return 1;
800 		}
801 
802 		/*
803 		 * Now load the symbol sections themselves. Make sure the
804 		 * sections are aligned. Don't bother with string tables if
805 		 * there are no symbol sections.
806 		 */
807 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
808 
809 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
810 			if (shp[i].sh_type == SHT_SYMTAB)
811 				havesyms = 1;
812 
813 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
814 			if (shp[i].sh_type == SHT_SYMTAB ||
815 			    shp[i].sh_type == SHT_STRTAB ||
816 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
817 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
818 				if (havesyms && (flags & LOAD_SYM)) {
819 					if (fseeko(fp, (off_t)shp[i].sh_offset,
820 					    SEEK_SET) == -1) {
821 						free(shstr);
822 						free(shp);
823 						return 1;
824 					}
825 					if (mread(fp, maxp,
826 					    shp[i].sh_size) != shp[i].sh_size) {
827 						free(shstr);
828 						free(shp);
829 						return 1;
830 					}
831 				}
832 				maxp += roundup(shp[i].sh_size,
833 				    sizeof(Elf64_Addr));
834 				shp[i].sh_offset = off;
835 				shp[i].sh_flags |= SHF_ALLOC;
836 				off += roundup(shp[i].sh_size,
837 				    sizeof(Elf64_Addr));
838 				first = 0;
839 			}
840 		}
841 		if (flags & LOAD_SYM) {
842 			mbcopy(shp, shpp, sz);
843 		}
844 		free(shstr);
845 		free(shp);
846 	}
847 
848 	/*
849 	 * Frob the copied ELF header to give information relative
850 	 * to elfp.
851 	 */
852 	if (flags & LOAD_HDR) {
853 		elf->e_phoff = 0;
854 		elf->e_shoff = sizeof(Elf64_Ehdr);
855 		elf->e_phentsize = 0;
856 		elf->e_phnum = 0;
857 		mbcopy(elf, elfp, sizeof(*elf));
858 	}
859 
860 	marks[MARK_START] = LOADADDR(minp);
861 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
862 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
863 	marks[MARK_SYM] = LOADADDR(elfp);
864 	marks[MARK_END] = LOADADDR(maxp);
865 
866 	return 0;
867 }
868 
869 /*
870  * elf32_exec
871  *
872  * Load the kernel indicated by 'fd' into the guest physical memory
873  * space, at the addresses defined in the ELF header.
874  *
875  * This function is used for 32 bit kernels.
876  *
877  * Parameters:
878  *  fd: file descriptor of the kernel to load
879  *  elf: ELF header of the kernel
880  *  marks: array to store the offsets of various kernel structures
881  *      (start, bss, etc)
882  *  flags: flag value to indicate which section(s) to load (usually
883  *      LOAD_ALL)
884  *
885  * Return values:
886  *  0 if successful
887  *  1 if unsuccessful
888  */
889 static int
890 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
891 {
892 	Elf32_Shdr *shp;
893 	Elf32_Phdr *phdr;
894 	Elf32_Off off;
895 	int i;
896 	size_t sz;
897 	int first;
898 	int havesyms, havelines;
899 	paddr_t minp = ~0, maxp = 0, pos = 0;
900 	paddr_t offset = marks[MARK_START], shpp, elfp;
901 
902 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
903 	phdr = malloc(sz);
904 
905 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
906 		free(phdr);
907 		return 1;
908 	}
909 
910 	if (fread(phdr, 1, sz, fp) != sz) {
911 		free(phdr);
912 		return 1;
913 	}
914 
915 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
916 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
917 			int m;
918 
919 			/* Fill segment if asked for. */
920 			if (flags & LOAD_RANDOM) {
921 				for (pos = 0; pos < phdr[i].p_filesz;
922 				    pos += m) {
923 					m = phdr[i].p_filesz - pos;
924 					marc4random_buf(phdr[i].p_paddr + pos,
925 					    m);
926 				}
927 			}
928 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
929 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
930 				marks[MARK_ERANDOM] =
931 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
932 			}
933 			continue;
934 		}
935 
936 		if (phdr[i].p_type != PT_LOAD ||
937 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
938 			continue;
939 
940 #define IS_TEXT(p)	(p.p_flags & PF_X)
941 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
942 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
943 		/*
944 		 * XXX: Assume first address is lowest
945 		 */
946 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
947 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
948 
949 			/* Read in segment. */
950 			if (fseeko(fp, (off_t)phdr[i].p_offset,
951 			    SEEK_SET) == -1) {
952 				free(phdr);
953 				return 1;
954 			}
955 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
956 			    phdr[i].p_filesz) {
957 				free(phdr);
958 				return 1;
959 			}
960 
961 			first = 0;
962 		}
963 
964 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
965 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
966 			pos = phdr[i].p_paddr;
967 			if (minp > pos)
968 				minp = pos;
969 			pos += phdr[i].p_filesz;
970 			if (maxp < pos)
971 				maxp = pos;
972 		}
973 
974 		/* Zero out BSS. */
975 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
976 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
977 			    phdr[i].p_memsz - phdr[i].p_filesz);
978 		}
979 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
980 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
981 			if (maxp < pos)
982 				maxp = pos;
983 		}
984 	}
985 	free(phdr);
986 
987 	/*
988 	 * Copy the ELF and section headers.
989 	 */
990 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
991 	if (flags & (LOAD_HDR | COUNT_HDR))
992 		maxp += sizeof(Elf32_Ehdr);
993 
994 	if (flags & (LOAD_SYM | COUNT_SYM)) {
995 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
996 			warn("lseek section headers");
997 			return 1;
998 		}
999 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1000 		shp = malloc(sz);
1001 
1002 		if (fread(shp, 1, sz, fp) != sz) {
1003 			free(shp);
1004 			return 1;
1005 		}
1006 
1007 		shpp = maxp;
1008 		maxp += roundup(sz, sizeof(Elf32_Addr));
1009 
1010 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1011 		char *shstr = malloc(shstrsz);
1012 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1013 		    SEEK_SET) == -1) {
1014 			free(shstr);
1015 			free(shp);
1016 			return 1;
1017 		}
1018 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1019 			free(shstr);
1020 			free(shp);
1021 			return 1;
1022 		}
1023 
1024 		/*
1025 		 * Now load the symbol sections themselves. Make sure the
1026 		 * sections are aligned. Don't bother with string tables if
1027 		 * there are no symbol sections.
1028 		 */
1029 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1030 
1031 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1032 			if (shp[i].sh_type == SHT_SYMTAB)
1033 				havesyms = 1;
1034 
1035 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1036 			if (shp[i].sh_type == SHT_SYMTAB ||
1037 			    shp[i].sh_type == SHT_STRTAB ||
1038 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1039 				if (havesyms && (flags & LOAD_SYM)) {
1040 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1041 					    SEEK_SET) == -1) {
1042 						free(shstr);
1043 						free(shp);
1044 						return 1;
1045 					}
1046 					if (mread(fp, maxp,
1047 					    shp[i].sh_size) != shp[i].sh_size) {
1048 						free(shstr);
1049 						free(shp);
1050 						return 1;
1051 					}
1052 				}
1053 				maxp += roundup(shp[i].sh_size,
1054 				    sizeof(Elf32_Addr));
1055 				shp[i].sh_offset = off;
1056 				shp[i].sh_flags |= SHF_ALLOC;
1057 				off += roundup(shp[i].sh_size,
1058 				    sizeof(Elf32_Addr));
1059 				first = 0;
1060 			}
1061 		}
1062 		if (flags & LOAD_SYM) {
1063 			mbcopy(shp, shpp, sz);
1064 		}
1065 		free(shstr);
1066 		free(shp);
1067 	}
1068 
1069 	/*
1070 	 * Frob the copied ELF header to give information relative
1071 	 * to elfp.
1072 	 */
1073 	if (flags & LOAD_HDR) {
1074 		elf->e_phoff = 0;
1075 		elf->e_shoff = sizeof(Elf32_Ehdr);
1076 		elf->e_phentsize = 0;
1077 		elf->e_phnum = 0;
1078 		mbcopy(elf, elfp, sizeof(*elf));
1079 	}
1080 
1081 	marks[MARK_START] = LOADADDR(minp);
1082 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1083 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1084 	marks[MARK_SYM] = LOADADDR(elfp);
1085 	marks[MARK_END] = LOADADDR(maxp);
1086 
1087 	return 0;
1088 }
1089