1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
28  * such dumps, since they do not have a namelist file or the typical
29  * dump structures we expect to aid bootstrapping.  Instead, we
30  * bootstrap based upon a debug_info structure at a known VA, using the
31  * guest's own page tables to resolve to physical addresses, and
32  * construct the namelist in a manner similar to ksyms_snapshot().
33  *
34  * Note that there are two formats understood by this module: the older,
35  * ad hoc format, which we call 'core' within this file, and an
36  * ELF-based format, known as 'elf'.
37  *
38  * We only support the older format generated on Solaris dom0: before we
39  * fixed it, core dump files were broken whenever a PFN didn't map a
40  * real MFN (!).
41  */
42 
43 #include <strings.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <stddef.h>
47 #include <stdarg.h>
48 #include <unistd.h>
49 #include <fcntl.h>
50 #include <gelf.h>
51 #include <errno.h>
52 
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <sys/debug_info.h>
56 #include <sys/xen_mmu.h>
57 #include <sys/elf.h>
58 #include <sys/machelf.h>
59 #include <sys/modctl.h>
60 #include <sys/kobj.h>
61 #include <sys/kobj_impl.h>
62 #include <sys/sysmacros.h>
63 #include <sys/privmregs.h>
64 #include <vm/as.h>
65 
66 #include <mdb/mdb_io.h>
67 #include <mdb/mdb_kb.h>
68 #include <mdb/mdb_target_impl.h>
69 
70 #include <xen/public/xen.h>
71 #include <xen/public/version.h>
72 #include <xen/public/elfnote.h>
73 
74 #define	XKB_SHDR_NULL 0
75 #define	XKB_SHDR_SYMTAB 1
76 #define	XKB_SHDR_STRTAB 2
77 #define	XKB_SHDR_SHSTRTAB 3
78 #define	XKB_SHDR_NUM 4
79 
80 #define	XKB_WALK_LOCAL 0x1
81 #define	XKB_WALK_GLOBAL 0x2
82 #define	XKB_WALK_STR 0x4
83 #define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
84 
85 #if defined(__i386)
86 #define	DEBUG_INFO 0xf4bff000
87 #define	DEBUG_INFO_HVM 0xfe7ff000
88 #elif defined(__amd64)
89 #define	DEBUG_INFO 0xfffffffffb7ff000
90 #define	DEBUG_INFO_HVM 0xfffffffffb7ff000
91 #endif
92 
93 #define	PAGE_SIZE 0x1000
94 #define	PAGE_SHIFT 12
95 #define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96 #define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97 #define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
98 #define	PT_PADDR_LGPG 0x000fffffffffe000ull
99 #define	PT_PADDR 0x000ffffffffff000ull
100 #define	PT_VALID 0x1
101 #define	PT_PAGESIZE 0x080
102 #define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
103 
104 #define	XC_CORE_MAGIC 0xF00FEBED
105 #define	XC_CORE_MAGIC_HVM 0xF00FEBEE
106 
107 #define	VGCF_HVM_GUEST (1<<1)
108 
109 typedef struct xc_core_header {
110 	unsigned int xch_magic;
111 	unsigned int xch_nr_vcpus;
112 	unsigned int xch_nr_pages;
113 	unsigned int xch_ctxt_offset;
114 	unsigned int xch_index_offset;
115 	unsigned int xch_pages_offset;
116 } xc_core_header_t;
117 
118 struct xc_elf_header {
119 	uint64_t xeh_magic;
120 	uint64_t xeh_nr_vcpus;
121 	uint64_t xeh_nr_pages;
122 	uint64_t xeh_page_size;
123 };
124 
125 struct xc_elf_version {
126 	uint64_t xev_major;
127 	uint64_t xev_minor;
128 	xen_extraversion_t xev_extra;
129 	xen_compile_info_t xev_compile_info;
130 	xen_capabilities_info_t xev_capabilities;
131 	xen_changeset_info_t xev_changeset;
132 	xen_platform_parameters_t xev_platform_parameters;
133 	uint64_t xev_pagesize;
134 };
135 
136 /*
137  * Either an old-style (3.0.4) core format, or the ELF format.
138  */
139 typedef enum {
140 	XKB_FORMAT_UNKNOWN = 0,
141 	XKB_FORMAT_CORE = 1,
142 	XKB_FORMAT_ELF = 2
143 } xkb_type_t;
144 
145 typedef struct mfn_map {
146 	mfn_t mm_mfn;
147 	char *mm_map;
148 } mfn_map_t;
149 
150 typedef struct mmu_info {
151 	size_t mi_max;
152 	size_t mi_shift[4];
153 	size_t mi_ptes;
154 	size_t mi_ptesize;
155 } mmu_info_t;
156 
157 typedef struct xkb_core {
158 	xc_core_header_t xc_hdr;
159 	void *xc_p2m_buf;
160 } xkb_core_t;
161 
162 typedef struct xkb_elf {
163 	mdb_gelf_file_t *xe_gelf;
164 	size_t *xe_off;
165 	struct xc_elf_header xe_hdr;
166 	struct xc_elf_version xe_version;
167 } xkb_elf_t;
168 
169 typedef struct xkb {
170 	char *xkb_path;
171 	int xkb_fd;
172 	int xkb_is_hvm;
173 
174 	xkb_type_t xkb_type;
175 	xkb_core_t xkb_core;
176 	xkb_elf_t xkb_elf;
177 
178 	size_t xkb_nr_vcpus;
179 	size_t xkb_nr_pages;
180 	size_t xkb_pages_off;
181 	xen_pfn_t xkb_max_pfn;
182 	mfn_t xkb_max_mfn;
183 	int xkb_is_pae;
184 
185 	mmu_info_t xkb_mmu;
186 	debug_info_t xkb_info;
187 
188 	void *xkb_vcpu_data;
189 	size_t xkb_vcpu_data_sz;
190 	struct vcpu_guest_context **xkb_vcpus;
191 
192 	char *xkb_pages;
193 	mfn_t *xkb_p2m;
194 	xen_pfn_t *xkb_m2p;
195 	mfn_map_t xkb_pt_map[4];
196 	mfn_map_t xkb_map;
197 
198 	char *xkb_namelist;
199 	size_t xkb_namesize;
200 } xkb_t;
201 
202 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
203 
204 typedef struct xkb_namelist {
205 	Ehdr	kh_elf_hdr;
206 	Phdr	kh_text_phdr;
207 	Phdr	kh_data_phdr;
208 	Shdr	kh_shdr[XKB_SHDR_NUM];
209 	char	shstrings[sizeof (xkb_shstrtab)];
210 } xkb_namelist_t;
211 
212 static int xkb_build_ksyms(xkb_t *);
213 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
214 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
215 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
216 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
217 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
218 static int xkb_close(xkb_t *);
219 
220 /*
221  * Jump through the hoops we need to to correctly identify a core file
222  * of either the old or new format.
223  */
224 int
225 xkb_identify(const char *file, int *longmode)
226 {
227 	xc_core_header_t header;
228 	mdb_gelf_file_t *gf = NULL;
229 	mdb_gelf_sect_t *sect = NULL;
230 	mdb_io_t *io = NULL;
231 	char *notes = NULL;
232 	char *pos;
233 	int ret = 0;
234 	size_t sz;
235 	int fd;
236 
237 	if ((fd = open64(file, O_RDONLY)) == -1)
238 		return (-1);
239 
240 	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
241 		(void) close(fd);
242 		return (0);
243 	}
244 
245 	(void) close(fd);
246 
247 	if (header.xch_magic == XC_CORE_MAGIC) {
248 		*longmode = 0;
249 
250 		/*
251 		 * Indeed.
252 		 */
253 		sz = header.xch_index_offset - header.xch_ctxt_offset;
254 #ifdef _LP64
255 		if (sizeof (struct vcpu_guest_context) *
256 		    header.xch_nr_vcpus == sz)
257 			*longmode = 1;
258 #else
259 		if (sizeof (struct vcpu_guest_context) *
260 		    header.xch_nr_vcpus != sz)
261 			*longmode = 1;
262 #endif /* _LP64 */
263 
264 		return (1);
265 	}
266 
267 	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
268 		return (-1);
269 
270 	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
271 		goto out;
272 
273 	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
274 		goto out;
275 
276 	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
277 		goto out;
278 
279 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
280 		struct xc_elf_version *vers;
281 		/* LINTED - alignment */
282 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
283 		char *desc;
284 		char *name;
285 
286 		name = pos + sizeof (*nhdr);
287 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
288 
289 		pos = desc + nhdr->n_descsz;
290 
291 		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
292 			continue;
293 
294 		/*
295 		 * The contents of this struct differ between 32 and 64
296 		 * bit; however, not until past the 'xev_capabilities'
297 		 * member, so we can just about get away with this.
298 		 */
299 
300 		/* LINTED - alignment */
301 		vers = (struct xc_elf_version *)desc;
302 
303 		if (strstr(vers->xev_capabilities, "x86_64")) {
304 			/*
305 			 * 64-bit hypervisor, but it can still be
306 			 * a 32-bit domain core. 32-bit domain cores
307 			 * are also dumped in Elf64 format, but they
308 			 * have e_machine set to EM_386, not EM_AMD64.
309 			 */
310 			if (gf->gf_ehdr.e_machine == EM_386)
311 				*longmode = 0;
312 			else
313 				*longmode = 1;
314 		} else if (strstr(vers->xev_capabilities, "x86_32") ||
315 		    strstr(vers->xev_capabilities, "x86_32p")) {
316 			/*
317 			 * 32-bit hypervisor, can only be a 32-bit core.
318 			 */
319 			*longmode = 0;
320 		} else {
321 			mdb_warn("couldn't derive word size of dump; "
322 			    "assuming 64-bit");
323 			*longmode = 1;
324 		}
325 	}
326 
327 	ret = 1;
328 
329 out:
330 	if (gf != NULL)
331 		mdb_gelf_destroy(gf);
332 	else if (io != NULL)
333 		mdb_io_destroy(io);
334 	return (ret);
335 }
336 
337 static void *
338 xkb_fail(xkb_t *xkb, const char *msg, ...)
339 {
340 	va_list args;
341 
342 	va_start(args, msg);
343 	if (xkb != NULL)
344 		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
345 	(void) vfprintf(stderr, msg, args);
346 	(void) fprintf(stderr, "\n");
347 	va_end(args);
348 	if (xkb != NULL)
349 		(void) xkb_close(xkb);
350 
351 	errno = ENOEXEC;
352 
353 	return (NULL);
354 }
355 
356 static int
357 xkb_build_m2p(xkb_t *xkb)
358 {
359 	size_t i;
360 
361 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
362 		if (xkb->xkb_p2m[i] != MFN_INVALID &&
363 		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
364 			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
365 	}
366 
367 	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
368 	    UM_SLEEP);
369 
370 	for (i = 0; i <= xkb->xkb_max_mfn; i++)
371 		xkb->xkb_m2p[i] = PFN_INVALID;
372 
373 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
374 		if (xkb->xkb_p2m[i] != MFN_INVALID)
375 			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
376 	}
377 
378 	return (1);
379 }
380 
381 /*
382  * With FORMAT_CORE, we can use the table in the dump file directly.
383  * Just to make things fun, they've not page-aligned the p2m table.
384  */
385 static int
386 xkb_map_p2m(xkb_t *xkb)
387 {
388 	offset_t off;
389 	size_t size;
390 	xkb_core_t *xc = &xkb->xkb_core;
391 	size_t count = xkb->xkb_nr_pages;
392 	size_t boff = xc->xc_hdr.xch_index_offset;
393 
394 	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
395 	size = PAGE_MASK(size);
396 	off = PAGE_MASK(boff);
397 
398 	/* LINTED - alignment */
399 	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
400 	    MAP_SHARED, xkb->xkb_fd, off);
401 
402 	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
403 		(void) xkb_fail(xkb, "cannot map p2m table");
404 		return (0);
405 	}
406 
407 	/* LINTED - alignment */
408 	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
409 	    PAGE_OFFSET(boff));
410 
411 	return (1);
412 }
413 
414 /*
415  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
416  * into a linear array indexed by pfn for convenience.  We also need to
417  * track the mapping between mfn and the offset in the file: a pfn with
418  * no mfn will not appear in the core file.
419  */
420 static int
421 xkb_build_p2m(xkb_t *xkb)
422 {
423 	xkb_elf_t *xe = &xkb->xkb_elf;
424 	mdb_gelf_sect_t *sect;
425 	size_t size;
426 	size_t i;
427 
428 	struct elf_p2m {
429 		uint64_t pfn;
430 		uint64_t gmfn;
431 	} *p2m;
432 
433 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
434 
435 	if (sect == NULL) {
436 		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
437 		return (0);
438 	}
439 
440 	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
441 		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
442 		return (0);
443 	}
444 
445 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
446 		if (p2m[i].pfn > xkb->xkb_max_pfn)
447 			xkb->xkb_max_pfn = p2m[i].pfn;
448 	}
449 
450 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
451 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
452 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
453 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
454 
455 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
456 		xkb->xkb_p2m[i] = PFN_INVALID;
457 		xe->xe_off[i] = (size_t)-1;
458 	}
459 
460 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
461 		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
462 		xe->xe_off[p2m[i].pfn] = i;
463 	}
464 
465 	return (1);
466 }
467 
468 /*
469  * For HVM images, we don't have the corresponding MFN list; the table
470  * is just a mapping from page index in the dump to the corresponding
471  * PFN.  To simplify the other code, we'll pretend that these PFNs are
472  * really MFNs as well, by populating xkb_p2m.
473  */
474 static int
475 xkb_build_fake_p2m(xkb_t *xkb)
476 {
477 	xkb_elf_t *xe = &xkb->xkb_elf;
478 	mdb_gelf_sect_t *sect;
479 	size_t size;
480 	size_t i;
481 
482 	uint64_t *p2pfn;
483 
484 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
485 
486 	if (sect == NULL) {
487 		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
488 		return (0);
489 	}
490 
491 	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
492 		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
493 		return (0);
494 	}
495 
496 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
497 		if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
498 			xkb->xkb_max_pfn = p2pfn[i];
499 	}
500 
501 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
502 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
503 
504 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
505 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
506 
507 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
508 		xkb->xkb_p2m[i] = PFN_INVALID;
509 		xe->xe_off[i] = (size_t)-1;
510 	}
511 
512 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
513 		if (p2pfn[i] == PFN_INVALID)
514 			continue;
515 		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
516 		xe->xe_off[p2pfn[i]] = i;
517 	}
518 
519 	return (1);
520 }
521 
522 /*
523  * Return the MFN of the top-level page table for the given as.
524  */
525 static mfn_t
526 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
527 {
528 	uintptr_t asp = (uintptr_t)as;
529 	uintptr_t hatp;
530 	uintptr_t htablep;
531 	uintptr_t pfn;
532 
533 	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
534 		return (MFN_INVALID);
535 	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
536 	    &htablep))
537 		return (MFN_INVALID);
538 	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
539 	    &pfn))
540 		return (MFN_INVALID);
541 
542 	if (pfn > xkb->xkb_max_pfn)
543 		return (MFN_INVALID);
544 
545 	return (xkb->xkb_p2m[pfn]);
546 }
547 
548 static mfn_t
549 xkb_cr3_to_pfn(xkb_t *xkb)
550 {
551 	uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
552 	if (xkb->xkb_is_hvm)
553 		return (cr3 >> PAGE_SHIFT);
554 	return (xen_cr3_to_pfn(cr3));
555 }
556 
557 static ssize_t
558 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
559     void *buf, size_t size)
560 {
561 	size_t left = size;
562 	int windowed = (xkb->xkb_pages == NULL);
563 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
564 
565 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
566 		return (-1);
567 
568 	while (left) {
569 		uint64_t pos = addr + (size - left);
570 		char *outpos = (char *)buf + (size - left);
571 		size_t pageoff = PAGE_OFFSET(pos);
572 		size_t sz = MIN(left, PAGE_SIZE - pageoff);
573 		mfn_t mfn;
574 
575 		if (!phys) {
576 			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
577 			if (mfn == MFN_INVALID)
578 				return (-1);
579 		} else {
580 			xen_pfn_t pfn = pos >> PAGE_SHIFT;
581 			if (pfn > xkb->xkb_max_pfn)
582 				return (-1);
583 			mfn = xkb->xkb_p2m[pfn];
584 			if (mfn == MFN_INVALID)
585 				return (-1);
586 		}
587 
588 		/*
589 		 * If we're windowed then pread() is much faster.
590 		 */
591 		if (windowed) {
592 			offset_t off = xkb_mfn_to_offset(xkb, mfn);
593 			int ret;
594 
595 			if (off == ~1ULL)
596 				return (-1);
597 
598 			off += pageoff;
599 
600 			ret = pread64(xkb->xkb_fd, outpos, sz, off);
601 			if (ret == -1)
602 				return (-1);
603 			if (ret != sz)
604 				return ((size - left) + ret);
605 
606 			left -= ret;
607 		} else {
608 			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
609 				return (-1);
610 
611 			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
612 
613 			left -= sz;
614 		}
615 	}
616 
617 	return (size);
618 }
619 
620 static ssize_t
621 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
622 {
623 	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
624 }
625 
626 static ssize_t
627 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
628 {
629 	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
630 }
631 
632 static ssize_t
633 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
634 {
635 	return (xkb_aread(xkb, addr, buf, size, NULL));
636 }
637 
638 static int
639 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
640 {
641 	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
642 	    sizeof (uintptr_t))
643 		return (0);
644 	return (1);
645 }
646 
647 static char *
648 xkb_readstr(xkb_t *xkb, uintptr_t addr)
649 {
650 	char *str = mdb_alloc(1024, UM_SLEEP);
651 	size_t i;
652 
653 	for (i = 0; i < 1024; i++) {
654 		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
655 			mdb_free(str, 1024);
656 			return (NULL);
657 		}
658 
659 		if (str[i] == '\0')
660 			break;
661 	}
662 
663 	if (i == 1024) {
664 		mdb_free(str, 1024);
665 		return (NULL);
666 	}
667 
668 	return (str);
669 }
670 
671 static offset_t
672 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
673 {
674 	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
675 		return (-1ULL);
676 
677 	if (xkb->xkb_type == XKB_FORMAT_CORE)
678 		return (PAGE_SIZE * pfn);
679 
680 	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
681 }
682 
683 static offset_t
684 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
685 {
686 	xen_pfn_t pfn;
687 
688 	if (mfn > xkb->xkb_max_mfn)
689 		return (-1ULL);
690 
691 	pfn = xkb->xkb_m2p[mfn];
692 
693 	if (pfn == PFN_INVALID)
694 		return (-1ULL);
695 
696 	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
697 }
698 
699 static char *
700 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
701 {
702 	int windowed = (xkb->xkb_pages == NULL);
703 	offset_t off;
704 
705 	if (mm->mm_mfn == mfn)
706 		return (mm->mm_map);
707 
708 	mm->mm_mfn = mfn;
709 
710 	if (windowed) {
711 		if (mm->mm_map != (char *)MAP_FAILED) {
712 			(void) munmap(mm->mm_map, PAGE_SIZE);
713 			mm->mm_map = (void *)MAP_FAILED;
714 		}
715 
716 		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
717 			return (NULL);
718 
719 		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
720 		    xkb->xkb_fd, off);
721 
722 		if (mm->mm_map == (char *)MAP_FAILED)
723 			return (NULL);
724 	} else {
725 		xen_pfn_t pfn;
726 
727 		mm->mm_map = NULL;
728 
729 		if (mfn > xkb->xkb_max_mfn)
730 			return (NULL);
731 
732 		pfn = xkb->xkb_m2p[mfn];
733 
734 		if (pfn == PFN_INVALID)
735 			return (NULL);
736 
737 		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
738 	}
739 
740 	return (mm->mm_map);
741 }
742 
743 static uint64_t
744 xkb_get_pte(mmu_info_t *mmu, char *ptep)
745 {
746 	uint64_t pte = 0;
747 
748 	if (mmu->mi_ptesize == 8) {
749 		/* LINTED - alignment */
750 		pte = *((uint64_t *)ptep);
751 	} else {
752 		/* LINTED - alignment */
753 		pte = *((uint32_t *)ptep);
754 	}
755 
756 	return (pte);
757 }
758 
759 static mfn_t
760 xkb_pte_to_base_mfn(uint64_t pte, size_t level)
761 {
762 	if (PTE_IS_LGPG(pte, level)) {
763 		pte &= PT_PADDR_LGPG;
764 	} else {
765 		pte &= PT_PADDR;
766 	}
767 
768 	return (pte >> PAGE_SHIFT);
769 }
770 
771 /*
772  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
773  * table.
774  */
775 static mfn_t
776 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
777 {
778 	mmu_info_t *mmu = &xkb->xkb_mmu;
779 	uint64_t pte;
780 	size_t level;
781 
782 	for (level = mmu->mi_max; ; --level) {
783 		size_t entry;
784 
785 		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
786 			return (MFN_INVALID);
787 
788 		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
789 
790 		pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
791 		    entry * mmu->mi_ptesize);
792 
793 		if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
794 			return (MFN_INVALID);
795 
796 		if (level == 0)
797 			break;
798 
799 		/*
800 		 * Currently 'mfn' refers to the base MFN of the
801 		 * large-page mapping.  Add on the 4K-sized index into
802 		 * the large-page mapping to get the right MFN within
803 		 * the mapping.
804 		 */
805 		if (PTE_IS_LGPG(pte, level)) {
806 			mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
807 			    PAGE_SHIFT;
808 			break;
809 		}
810 	}
811 
812 	return (mfn);
813 }
814 
815 static int
816 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
817     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
818 {
819 	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
820 	    sizeof (struct module))
821 		return (0);
822 
823 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
824 	    offsetof(Shdr, sh_addr), sym_addr))
825 		return (0);
826 
827 	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
828 	    offsetof(Shdr, sh_addr), str_addr))
829 		return (0);
830 
831 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
832 	    offsetof(Shdr, sh_size), sym_count))
833 		return (0);
834 	*sym_count /= sizeof (Sym);
835 
836 	return (1);
837 }
838 
839 static int
840 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
841     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
842 {
843 	size_t i;
844 
845 	for (i = 0; i < sym_count; i++) {
846 		Sym sym;
847 		char *name;
848 		size_t sz;
849 		int type = XKB_WALK_GLOBAL;
850 
851 		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
852 		    sizeof (sym)) != sizeof (sym))
853 			return (0);
854 
855 		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
856 			type = XKB_WALK_LOCAL;
857 
858 		name = xkb_readstr(xkb, str_addr + sym.st_name);
859 
860 		sym.st_shndx = SHN_ABS;
861 		sym.st_name = sizes[XKB_WALK_STR];
862 
863 		sizes[type] += sizeof (sym);
864 		sz = strlen(name) + 1;
865 		sizes[XKB_WALK_STR] += sz;
866 
867 		if (buf != NULL) {
868 			if (types & type) {
869 				bcopy(&sym, *buf, sizeof (sym));
870 				*buf += sizeof (sym);
871 			}
872 			if (types & XKB_WALK_STR) {
873 				bcopy(name, *buf, sz);
874 				*buf += sz;
875 			}
876 		}
877 
878 		mdb_free(name, 1024);
879 	}
880 
881 	return (1);
882 }
883 
884 static int
885 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
886     size_t *sizes, int types)
887 {
888 	uintptr_t modctl = modhead;
889 	uintptr_t modulep;
890 	struct module module;
891 	uintptr_t sym_count;
892 	uintptr_t sym_addr;
893 	uintptr_t str_addr;
894 	size_t max_iter = 500;
895 
896 	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
897 
898 	/*
899 	 * empty first symbol
900 	 */
901 	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
902 	sizes[XKB_WALK_STR] += 1;
903 
904 	if (buf != NULL) {
905 		if (types & XKB_WALK_LOCAL) {
906 			Sym tmp;
907 			bzero(&tmp, sizeof (tmp));
908 			bcopy(&tmp, *buf, sizeof (tmp));
909 			*buf += sizeof (tmp);
910 		}
911 		if (types & XKB_WALK_STR) {
912 			**buf = '\0';
913 			(*buf)++;
914 		}
915 	}
916 
917 	for (;;) {
918 		if (!xkb_read_word(xkb,
919 		    modctl + offsetof(struct modctl, mod_mp), &modulep))
920 			return (0);
921 
922 		if (modulep == 0)
923 			goto next;
924 
925 		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
926 		    &sym_count, &str_addr))
927 			return (0);
928 
929 		if ((module.flags & KOBJ_NOKSYMS))
930 			goto next;
931 
932 		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
933 		    str_addr, sym_count))
934 			return (0);
935 
936 next:
937 		if (!xkb_read_word(xkb,
938 		    modctl + offsetof(struct modctl, mod_next), &modctl))
939 			return (0);
940 
941 		if (modctl == modhead)
942 			break;
943 		/*
944 		 * Try and prevent us looping forever if we have a broken list.
945 		 */
946 		if (--max_iter == 0)
947 			break;
948 	}
949 
950 	return (1);
951 }
952 
953 /*
954  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
955  * file for hypervisor images, we fabricate one here using code similar
956  * to that of /dev/ksyms.
957  */
958 static int
959 xkb_build_ksyms(xkb_t *xkb)
960 {
961 	debug_info_t *info = &xkb->xkb_info;
962 	size_t sizes[XKB_WALK_STR + 1];
963 	xkb_namelist_t *hdr;
964 	char *buf;
965 	struct modctl modules;
966 	uintptr_t module;
967 	Shdr *shp;
968 
969 	if (xkb_read(xkb, info->di_modules, &modules,
970 	    sizeof (struct modctl)) != sizeof (struct modctl))
971 		return (0);
972 
973 	module = (uintptr_t)modules.mod_mp;
974 
975 	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
976 	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
977 		return (0);
978 
979 	xkb->xkb_namesize = sizeof (xkb_namelist_t);
980 	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
981 	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
982 	xkb->xkb_namesize += sizes[XKB_WALK_STR];
983 
984 	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
985 	    == NULL)
986 		return (0);
987 
988 	/* LINTED - alignment */
989 	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
990 
991 	if (xkb_read(xkb, module + offsetof(struct module, hdr),
992 	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
993 		return (0);
994 
995 	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
996 	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
997 	hdr->kh_elf_hdr.e_phnum = 2;
998 	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
999 	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
1000 
1001 	hdr->kh_text_phdr.p_type = PT_LOAD;
1002 	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
1003 	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
1004 	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
1005 
1006 	hdr->kh_data_phdr.p_type = PT_LOAD;
1007 	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
1008 	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
1009 	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
1010 
1011 	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
1012 	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
1013 	shp->sh_type = SHT_SYMTAB;
1014 	shp->sh_offset = sizeof (xkb_namelist_t);
1015 	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1016 	shp->sh_link = XKB_SHDR_STRTAB;
1017 	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
1018 	shp->sh_addralign = sizeof (Addr);
1019 	shp->sh_entsize = sizeof (Sym);
1020 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1021 
1022 
1023 	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
1024 	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
1025 	shp->sh_type = SHT_STRTAB;
1026 	shp->sh_offset = sizeof (xkb_namelist_t) +
1027 	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1028 	shp->sh_size = sizes[XKB_WALK_STR];
1029 	shp->sh_addralign = 1;
1030 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1031 
1032 
1033 	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
1034 	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
1035 	shp->sh_type = SHT_STRTAB;
1036 	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
1037 	shp->sh_size = sizeof (xkb_shstrtab);
1038 	shp->sh_addralign = 1;
1039 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1040 
1041 	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
1042 
1043 	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
1044 
1045 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1046 	    XKB_WALK_LOCAL))
1047 		return (0);
1048 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1049 	    XKB_WALK_GLOBAL))
1050 		return (0);
1051 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1052 	    XKB_WALK_STR))
1053 		return (0);
1054 
1055 	return (1);
1056 }
1057 
1058 static xkb_t *
1059 xkb_open_core(xkb_t *xkb)
1060 {
1061 	xkb_core_t *xc = &xkb->xkb_core;
1062 	size_t sz;
1063 	int i;
1064 	struct vcpu_guest_context *vcp;
1065 
1066 	xkb->xkb_type = XKB_FORMAT_CORE;
1067 
1068 	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
1069 		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
1070 
1071 	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
1072 	    sizeof (xc->xc_hdr))
1073 		return (xkb_fail(xkb, "invalid dump file"));
1074 
1075 	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
1076 		return (xkb_fail(xkb, "cannot process HVM images"));
1077 
1078 	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
1079 		return (xkb_fail(xkb, "invalid magic %d",
1080 		    xc->xc_hdr.xch_magic));
1081 	}
1082 
1083 	/*
1084 	 * With FORMAT_CORE, all pages are in the dump (non-existing
1085 	 * ones are zeroed out).
1086 	 */
1087 	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
1088 	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
1089 	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
1090 	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
1091 
1092 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
1093 	xkb->xkb_vcpu_data_sz = sz;
1094 	xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
1095 
1096 	if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
1097 	    xc->xc_hdr.xch_ctxt_offset) != sz)
1098 		return (xkb_fail(xkb, "cannot read VCPU contexts"));
1099 
1100 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1101 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1102 
1103 	vcp = xkb->xkb_vcpu_data;
1104 	for (i = 0; i < xkb->xkb_nr_vcpus; i++)
1105 		xkb->xkb_vcpus[i] = &vcp[i];
1106 
1107 	/*
1108 	 * Try to map all the data pages. If we can't, fall back to the
1109 	 * window/pread() approach, which is significantly slower.
1110 	 */
1111 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1112 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1113 
1114 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1115 		xkb->xkb_pages = NULL;
1116 
1117 	/*
1118 	 * We'd like to adapt for correctness' sake, but we have no way of
1119 	 * detecting a PAE guest, since cr4 writes are disallowed.
1120 	 */
1121 	xkb->xkb_is_pae = 1;
1122 
1123 	if (!xkb_map_p2m(xkb))
1124 		return (NULL);
1125 
1126 	return (xkb);
1127 }
1128 
1129 static xkb_t *
1130 xkb_open_elf(xkb_t *xkb)
1131 {
1132 	xkb_elf_t *xe = &xkb->xkb_elf;
1133 	mdb_gelf_sect_t *sect;
1134 	char *notes;
1135 	char *pos;
1136 	mdb_io_t *io;
1137 	size_t sz;
1138 	int i;
1139 	void *dp;
1140 
1141 	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1142 	    O_RDONLY, 0)) == NULL)
1143 		return (xkb_fail(xkb, "failed to open"));
1144 
1145 	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1146 
1147 	if (xe->xe_gelf == NULL) {
1148 		mdb_io_destroy(io);
1149 		return (xkb);
1150 	}
1151 
1152 	xkb->xkb_fd = mdb_fdio_fileno(io);
1153 
1154 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1155 
1156 	if (sect == NULL)
1157 		return (xkb);
1158 
1159 	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1160 		return (xkb);
1161 
1162 	/*
1163 	 * Now we know this is indeed a hypervisor core dump, even if
1164 	 * it's corrupted.
1165 	 */
1166 	xkb->xkb_type = XKB_FORMAT_ELF;
1167 
1168 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1169 		/* LINTED - alignment */
1170 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1171 		uint64_t vers;
1172 		char *desc;
1173 		char *name;
1174 
1175 		name = pos + sizeof (*nhdr);
1176 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1177 
1178 		pos = desc + nhdr->n_descsz;
1179 
1180 		switch (nhdr->n_type) {
1181 		case XEN_ELFNOTE_DUMPCORE_NONE:
1182 			break;
1183 
1184 		case XEN_ELFNOTE_DUMPCORE_HEADER:
1185 			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1186 				return (xkb_fail(xkb, "invalid ELF note "
1187 				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1188 			}
1189 
1190 			bcopy(desc, &xe->xe_hdr,
1191 			    sizeof (struct xc_elf_header));
1192 			break;
1193 
1194 		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1195 			if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
1196 				return (xkb_fail(xkb, "invalid ELF note "
1197 				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1198 			}
1199 
1200 			bcopy(desc, &xe->xe_version,
1201 			    sizeof (struct xc_elf_version));
1202 			break;
1203 
1204 		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1205 			/* LINTED - alignment */
1206 			vers = *((uint64_t *)desc);
1207 			if ((vers >> 32) != 0) {
1208 				return (xkb_fail(xkb, "unknown major "
1209 				    "version %d (expected 0)\n",
1210 				    (int)(vers >> 32)));
1211 			}
1212 
1213 			if ((vers & 0xffffffff) != 1) {
1214 				mdb_warn("unexpected dump minor number "
1215 				    "version %d (expected 1)\n",
1216 				    (int)(vers & 0xffffffff));
1217 			}
1218 			break;
1219 
1220 		default:
1221 			mdb_warn("unknown ELF note %d(%s)\n",
1222 			    nhdr->n_type, name);
1223 			break;
1224 		}
1225 	}
1226 
1227 	xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
1228 
1229 	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
1230 	    xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
1231 		return (xkb_fail(xkb, "invalid magic %d",
1232 		    xe->xe_hdr.xeh_magic));
1233 	}
1234 
1235 	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1236 	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1237 	    "x86_32p") != NULL);
1238 
1239 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1240 
1241 	if (sect == NULL)
1242 		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1243 
1244 	if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
1245 		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1246 
1247 	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1248 
1249 	xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
1250 	if (xkb->xkb_vcpu_data == NULL)
1251 		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1252 	xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
1253 
1254 	/*
1255 	 * The vcpu_guest_context structures saved in the core file
1256 	 * are actually unions of the 64-bit and 32-bit versions.
1257 	 * Don't rely on the entry size to match the size of
1258 	 * the structure, but set up an array of pointers.
1259 	 */
1260 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1261 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1262 	for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
1263 		dp = ((char *)xkb->xkb_vcpu_data +
1264 		    i * sect->gs_shdr.sh_entsize);
1265 		xkb->xkb_vcpus[i] = dp;
1266 	}
1267 
1268 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1269 
1270 	if (sect == NULL)
1271 		return (xkb_fail(xkb, "cannot find section .xen_pages"));
1272 
1273 	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1274 		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1275 
1276 	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1277 		return (xkb_fail(xkb, "invalid section .xen_pages"));
1278 
1279 	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1280 
1281 	/*
1282 	 * Try to map all the data pages. If we can't, fall back to the
1283 	 * window/pread() approach, which is significantly slower.
1284 	 */
1285 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1286 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1287 
1288 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1289 		xkb->xkb_pages = NULL;
1290 
1291 	if (xkb->xkb_is_hvm) {
1292 		if (!xkb_build_fake_p2m(xkb))
1293 			return (NULL);
1294 	} else {
1295 		if (!xkb_build_p2m(xkb))
1296 			return (NULL);
1297 	}
1298 
1299 	return (xkb);
1300 }
1301 
1302 static void
1303 xkb_init_mmu(xkb_t *xkb)
1304 {
1305 #if defined(__amd64)
1306 	xkb->xkb_mmu.mi_max = 3;
1307 	xkb->xkb_mmu.mi_shift[0] = 12;
1308 	xkb->xkb_mmu.mi_shift[1] = 21;
1309 	xkb->xkb_mmu.mi_shift[2] = 30;
1310 	xkb->xkb_mmu.mi_shift[3] = 39;
1311 	xkb->xkb_mmu.mi_ptes = 512;
1312 	xkb->xkb_mmu.mi_ptesize = 8;
1313 #elif defined(__i386)
1314 	if (xkb->xkb_is_pae) {
1315 		xkb->xkb_mmu.mi_max = 2;
1316 		xkb->xkb_mmu.mi_shift[0] = 12;
1317 		xkb->xkb_mmu.mi_shift[1] = 21;
1318 		xkb->xkb_mmu.mi_shift[2] = 30;
1319 		xkb->xkb_mmu.mi_ptes = 512;
1320 		xkb->xkb_mmu.mi_ptesize = 8;
1321 	} else {
1322 		xkb->xkb_mmu.mi_max = 1;
1323 		xkb->xkb_mmu.mi_shift[0] = 12;
1324 		xkb->xkb_mmu.mi_shift[1] = 22;
1325 		xkb->xkb_mmu.mi_ptes = 1024;
1326 		xkb->xkb_mmu.mi_ptesize = 4;
1327 	}
1328 #endif
1329 }
1330 
1331 /*ARGSUSED*/
1332 xkb_t *
1333 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1334     int flag, const char *err)
1335 {
1336 	uintptr_t debug_info = DEBUG_INFO;
1337 	struct stat64 corestat;
1338 	xkb_t *xkb = NULL;
1339 	size_t i;
1340 
1341 	if (stat64(corefile, &corestat) == -1)
1342 		return (xkb_fail(xkb, "cannot stat %s", corefile));
1343 
1344 	if (flag != O_RDONLY)
1345 		return (xkb_fail(xkb, "invalid open flags"));
1346 
1347 	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1348 
1349 	for (i = 0; i < 4; i++) {
1350 		xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
1351 		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1352 	}
1353 
1354 	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1355 	xkb->xkb_map.mm_mfn = MFN_INVALID;
1356 	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1357 	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1358 	xkb->xkb_fd = -1;
1359 
1360 	xkb->xkb_path = strdup(corefile);
1361 
1362 	if ((xkb = xkb_open_elf(xkb)) == NULL)
1363 		return (NULL);
1364 
1365 	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1366 		if (!xkb_open_core(xkb))
1367 			return (NULL);
1368 	}
1369 
1370 	xkb_init_mmu(xkb);
1371 
1372 	if (!xkb_build_m2p(xkb))
1373 		return (NULL);
1374 
1375 	if (xkb->xkb_is_hvm)
1376 		debug_info = DEBUG_INFO_HVM;
1377 
1378 	if (xkb_read(xkb, debug_info, &xkb->xkb_info,
1379 	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1380 		return (xkb_fail(xkb, "cannot read debug_info"));
1381 
1382 	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1383 		return (xkb_fail(xkb, "invalid debug info magic %d",
1384 		    xkb->xkb_info.di_magic));
1385 	}
1386 
1387 	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1388 		return (xkb_fail(xkb, "unknown debug info version %d",
1389 		    xkb->xkb_info.di_version));
1390 	}
1391 
1392 	if (!xkb_build_ksyms(xkb))
1393 		return (xkb_fail(xkb, "cannot construct namelist"));
1394 
1395 	return (xkb);
1396 }
1397 
1398 int
1399 xkb_close(xkb_t *xkb)
1400 {
1401 	size_t i, sz;
1402 
1403 	if (xkb == NULL)
1404 		return (0);
1405 
1406 	if (xkb->xkb_m2p != NULL) {
1407 		mdb_free(xkb->xkb_m2p,
1408 		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1409 	}
1410 
1411 	if (xkb->xkb_pages != NULL) {
1412 		(void) munmap((void *)xkb->xkb_pages,
1413 		    PAGE_SIZE * xkb->xkb_nr_pages);
1414 	} else {
1415 		for (i = 0; i < 4; i++) {
1416 			char *addr = xkb->xkb_pt_map[i].mm_map;
1417 			if (addr != (char *)MAP_FAILED)
1418 				(void) munmap((void *)addr, PAGE_SIZE);
1419 		}
1420 		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1421 			(void) munmap((void *)xkb->xkb_map.mm_map,
1422 			    PAGE_SIZE);
1423 		}
1424 	}
1425 
1426 	if (xkb->xkb_namelist != NULL)
1427 		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1428 
1429 	if (xkb->xkb_type == XKB_FORMAT_ELF) {
1430 		xkb_elf_t *xe = &xkb->xkb_elf;
1431 
1432 		if (xe->xe_gelf != NULL)
1433 			mdb_gelf_destroy(xe->xe_gelf);
1434 
1435 		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1436 
1437 		if (xkb->xkb_p2m != NULL)
1438 			mdb_free(xkb->xkb_p2m, sz);
1439 
1440 		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1441 
1442 		if (xe->xe_off != NULL)
1443 			mdb_free(xe->xe_off, sz);
1444 
1445 	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1446 		xkb_core_t *xc = &xkb->xkb_core;
1447 
1448 		if (xkb->xkb_fd != -1)
1449 			(void) close(xkb->xkb_fd);
1450 
1451 		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1452 		sz = PAGE_MASK(sz);
1453 
1454 		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1455 			(void) munmap(xc->xc_p2m_buf, sz);
1456 
1457 		if (xkb->xkb_vcpu_data != NULL)
1458 			mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
1459 	}
1460 
1461 	if (xkb->xkb_vcpus != NULL) {
1462 		sz = sizeof (struct vcpu_guest_context *) *
1463 		    xkb->xkb_nr_vcpus;
1464 		mdb_free(xkb->xkb_vcpus, sz);
1465 	}
1466 
1467 	free(xkb->xkb_path);
1468 
1469 	mdb_free(xkb, sizeof (*xkb));
1470 	return (0);
1471 }
1472 
1473 /*ARGSUSED*/
1474 static mdb_io_t *
1475 xkb_sym_io(xkb_t *xkb, const char *symfile)
1476 {
1477 	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1478 
1479 	if (io == NULL)
1480 		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1481 
1482 	return (io);
1483 }
1484 
1485 uint64_t
1486 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1487 {
1488 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
1489 	mfn_t mfn;
1490 
1491 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1492 		return (-1ULL);
1493 
1494 	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1495 
1496 	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1497 		return (-1ULL);
1498 
1499 	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1500 	    | PAGE_OFFSET(addr));
1501 }
1502 
1503 static int
1504 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1505 {
1506 	struct vcpu_guest_context *vcpu;
1507 	struct cpu_user_regs *ur;
1508 	struct regs *regs;
1509 
1510 	if (cpu >= xkb->xkb_nr_vcpus) {
1511 		errno = EINVAL;
1512 		return (-1);
1513 	}
1514 
1515 	bzero(mregs, sizeof (*mregs));
1516 
1517 	vcpu = xkb->xkb_vcpus[cpu];
1518 	ur = &vcpu->user_regs;
1519 	regs = &mregs->pm_gregs;
1520 
1521 	regs->r_ss = ur->ss;
1522 	regs->r_cs = ur->cs;
1523 	regs->r_ds = ur->ds;
1524 	regs->r_es = ur->es;
1525 	regs->r_fs = ur->fs;
1526 	regs->r_gs = ur->gs;
1527 	regs->r_trapno = ur->entry_vector;
1528 	regs->r_err = ur->error_code;
1529 #ifdef __amd64
1530 	regs->r_savfp = ur->rbp;
1531 	regs->r_savpc = ur->rip;
1532 	regs->r_rdi = ur->rdi;
1533 	regs->r_rsi = ur->rsi;
1534 	regs->r_rdx = ur->rdx;
1535 	regs->r_rcx = ur->rcx;
1536 	regs->r_r8 = ur->r8;
1537 	regs->r_r9 = ur->r9;
1538 	regs->r_rax = ur->rax;
1539 	regs->r_rbx = ur->rbx;
1540 	regs->r_rbp = ur->rbp;
1541 	regs->r_r10 = ur->r10;
1542 	regs->r_r11 = ur->r11;
1543 	regs->r_r12 = ur->r12;
1544 	regs->r_r13 = ur->r13;
1545 	regs->r_r14 = ur->r14;
1546 	regs->r_r15 = ur->r15;
1547 	regs->r_rip = ur->rip;
1548 	regs->r_rfl = ur->rflags;
1549 	regs->r_rsp = ur->rsp;
1550 #else
1551 	regs->r_savfp = ur->ebp;
1552 	regs->r_savpc = ur->eip;
1553 	regs->r_edi = ur->edi;
1554 	regs->r_esi = ur->esi;
1555 	regs->r_ebp = ur->ebp;
1556 	regs->r_esp = ur->esp;
1557 	regs->r_ebx = ur->ebx;
1558 	regs->r_edx = ur->edx;
1559 	regs->r_ecx = ur->ecx;
1560 	regs->r_eax = ur->eax;
1561 	regs->r_eip = ur->eip;
1562 	regs->r_efl = ur->eflags;
1563 	regs->r_uesp = 0;
1564 #endif
1565 
1566 	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1567 	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1568 
1569 	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1570 
1571 	return (0);
1572 }
1573 
1574 static mdb_kb_ops_t xpv_kb_ops = {
1575 	.kb_open = (void *(*)())xkb_open,
1576 	.kb_close = (int (*)())xkb_close,
1577 	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1578 	.kb_kread = (ssize_t (*)())xkb_read,
1579 	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1580 	.kb_aread = (ssize_t (*)())xkb_aread,
1581 	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1582 	.kb_pread = (ssize_t (*)())xkb_pread,
1583 	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1584 	.kb_vtop = (uint64_t (*)())xkb_vtop,
1585 	.kb_getmregs = (int (*)())xkb_getmregs
1586 };
1587 
1588 mdb_kb_ops_t *
1589 mdb_kb_ops(void)
1590 {
1591 	return (&xpv_kb_ops);
1592 }
1593 
1594 static const mdb_dcmd_t dcmds[] = { NULL, };
1595 static const mdb_walker_t walkers[] = { NULL, };
1596 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1597 
1598 const mdb_modinfo_t *
1599 _mdb_init(void)
1600 {
1601 	return (&modinfo);
1602 }
1603 
1604 void
1605 _mdb_fini(void)
1606 {
1607 }
1608