1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
28  * such dumps, since they do not have a namelist file or the typical
29  * dump structures we expect to aid bootstrapping.  Instead, we
30  * bootstrap based upon a debug_info structure at a known VA, using the
31  * guest's own page tables to resolve to physical addresses, and
32  * construct the namelist in a manner similar to ksyms_snapshot().
33  *
34  * Note that there are two formats understood by this module: the older,
35  * ad hoc format, which we call 'core' within this file, and an
36  * ELF-based format, known as 'elf'.
37  *
38  * We only support the older format generated on Solaris dom0: before we
39  * fixed it, core dump files were broken whenever a PFN didn't map a
40  * real MFN (!).
41  */
42 
43 #pragma ident	"%Z%%M%	%I%	%E% SMI"
44 
45 #include <strings.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 #include <stdarg.h>
50 #include <unistd.h>
51 #include <fcntl.h>
52 #include <gelf.h>
53 #include <errno.h>
54 
55 #include <sys/mman.h>
56 #include <sys/stat.h>
57 #include <sys/debug_info.h>
58 #include <sys/xen_mmu.h>
59 #include <sys/elf.h>
60 #include <sys/machelf.h>
61 #include <sys/modctl.h>
62 #include <sys/kobj.h>
63 #include <sys/kobj_impl.h>
64 #include <sys/sysmacros.h>
65 #include <sys/privmregs.h>
66 #include <vm/as.h>
67 
68 #include <mdb/mdb_io.h>
69 #include <mdb/mdb_kb.h>
70 #include <mdb/mdb_target_impl.h>
71 
72 #include <xen/public/xen.h>
73 #include <xen/public/version.h>
74 #include <xen/public/elfnote.h>
75 
76 #define	XKB_SHDR_NULL 0
77 #define	XKB_SHDR_SYMTAB 1
78 #define	XKB_SHDR_STRTAB 2
79 #define	XKB_SHDR_SHSTRTAB 3
80 #define	XKB_SHDR_NUM 4
81 
82 #define	XKB_WALK_LOCAL 0x1
83 #define	XKB_WALK_GLOBAL 0x2
84 #define	XKB_WALK_STR 0x4
85 #define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
86 
87 #if defined(__i386)
88 #define	DEBUG_INFO 0xf4bff000
89 #elif defined(__amd64)
90 #define	DEBUG_INFO 0xfffffffffb7ff000
91 #endif
92 
93 #define	PAGE_SIZE 0x1000
94 #define	PAGE_SHIFT 12
95 #define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96 #define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97 #define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
98 #define	PT_PADDR 0x000ffffffffff000ull
99 #define	PT_VALID 0x1
100 
101 #define	XC_CORE_MAGIC 0xF00FEBED
102 #define	XC_CORE_MAGIC_HVM 0xF00FEBEE
103 
104 #define	VGCF_HVM_GUEST (1<<1)
105 
106 typedef struct xc_core_header {
107 	unsigned int xch_magic;
108 	unsigned int xch_nr_vcpus;
109 	unsigned int xch_nr_pages;
110 	unsigned int xch_ctxt_offset;
111 	unsigned int xch_index_offset;
112 	unsigned int xch_pages_offset;
113 } xc_core_header_t;
114 
115 struct xc_elf_header {
116 	uint64_t xeh_magic;
117 	uint64_t xeh_nr_vcpus;
118 	uint64_t xeh_nr_pages;
119 	uint64_t xeh_page_size;
120 };
121 
122 struct xc_elf_version {
123 	uint64_t xev_major;
124 	uint64_t xev_minor;
125 	xen_extraversion_t xev_extra;
126 	xen_compile_info_t xev_compile_info;
127 	xen_capabilities_info_t xev_capabilities;
128 	xen_changeset_info_t xev_changeset;
129 	xen_platform_parameters_t xev_platform_parameters;
130 	uint64_t xev_pagesize;
131 };
132 
133 /*
134  * Either an old-style (3.0.4) core format, or the ELF format.
135  */
136 typedef enum {
137 	XKB_FORMAT_UNKNOWN = 0,
138 	XKB_FORMAT_CORE = 1,
139 	XKB_FORMAT_ELF = 2
140 } xkb_type_t;
141 
142 typedef struct mfn_map {
143 	mfn_t mm_mfn;
144 	char *mm_map;
145 } mfn_map_t;
146 
147 typedef struct mmu_info {
148 	size_t mi_max;
149 	size_t mi_shift[4];
150 	size_t mi_ptes;
151 	size_t mi_ptesize;
152 } mmu_info_t;
153 
154 typedef struct xkb_core {
155 	xc_core_header_t xc_hdr;
156 	void *xc_p2m_buf;
157 } xkb_core_t;
158 
159 typedef struct xkb_elf {
160 	mdb_gelf_file_t *xe_gelf;
161 	size_t *xe_off;
162 	struct xc_elf_header xe_hdr;
163 	struct xc_elf_version xe_version;
164 } xkb_elf_t;
165 
166 typedef struct xkb {
167 	char *xkb_path;
168 	int xkb_fd;
169 
170 	xkb_type_t xkb_type;
171 	xkb_core_t xkb_core;
172 	xkb_elf_t xkb_elf;
173 
174 	size_t xkb_nr_vcpus;
175 	size_t xkb_nr_pages;
176 	size_t xkb_pages_off;
177 	xen_pfn_t xkb_max_pfn;
178 	mfn_t xkb_max_mfn;
179 	int xkb_is_pae;
180 
181 	mmu_info_t xkb_mmu;
182 	debug_info_t xkb_info;
183 
184 	struct vcpu_guest_context *xkb_vcpus;
185 
186 	char *xkb_pages;
187 	mfn_t *xkb_p2m;
188 	xen_pfn_t *xkb_m2p;
189 	mfn_map_t xkb_pt_map[4];
190 	mfn_map_t xkb_map;
191 
192 	char *xkb_namelist;
193 	size_t xkb_namesize;
194 } xkb_t;
195 
196 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
197 
198 typedef struct xkb_namelist {
199 	Ehdr	kh_elf_hdr;
200 	Phdr	kh_text_phdr;
201 	Phdr	kh_data_phdr;
202 	Shdr	kh_shdr[XKB_SHDR_NUM];
203 	char	shstrings[sizeof (xkb_shstrtab)];
204 } xkb_namelist_t;
205 
206 static int xkb_build_ksyms(xkb_t *);
207 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
208 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
209 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
210 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
211 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
212 static int xkb_close(xkb_t *);
213 
214 /*
215  * Jump through the hoops we need to to correctly identify a core file
216  * of either the old or new format.
217  */
218 int
219 xkb_identify(const char *file, int *longmode)
220 {
221 	xc_core_header_t header;
222 	mdb_gelf_file_t *gf = NULL;
223 	mdb_gelf_sect_t *sect = NULL;
224 	mdb_io_t *io = NULL;
225 	char *notes = NULL;
226 	char *pos;
227 	int ret = 0;
228 	size_t sz;
229 	int fd;
230 
231 	if ((fd = open64(file, O_RDONLY)) == -1)
232 		return (-1);
233 
234 	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
235 		(void) close(fd);
236 		return (0);
237 	}
238 
239 	(void) close(fd);
240 
241 	if (header.xch_magic == XC_CORE_MAGIC) {
242 		*longmode = 0;
243 
244 		/*
245 		 * Indeed.
246 		 */
247 		sz = header.xch_index_offset - header.xch_ctxt_offset;
248 #ifdef _LP64
249 		if (sizeof (struct vcpu_guest_context) *
250 		    header.xch_nr_vcpus == sz)
251 			*longmode = 1;
252 #else
253 		if (sizeof (struct vcpu_guest_context) *
254 		    header.xch_nr_vcpus != sz)
255 			*longmode = 1;
256 #endif /* _LP64 */
257 
258 		return (1);
259 	}
260 
261 	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
262 		return (-1);
263 
264 	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
265 		goto out;
266 
267 	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
268 		goto out;
269 
270 	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
271 		goto out;
272 
273 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
274 		struct xc_elf_version *vers;
275 		/* LINTED - alignment */
276 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
277 		char *desc;
278 		char *name;
279 
280 		name = pos + sizeof (*nhdr);
281 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
282 
283 		pos = desc + nhdr->n_descsz;
284 
285 		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
286 			continue;
287 
288 		/*
289 		 * The contents of this struct differ between 32 and 64
290 		 * bit; however, not until past the 'xev_capabilities'
291 		 * member, so we can just about get away with this.
292 		 */
293 
294 		/* LINTED - alignment */
295 		vers = (struct xc_elf_version *)desc;
296 
297 		if (strstr(vers->xev_capabilities, "x86_64")) {
298 			*longmode = 1;
299 		} else if (strstr(vers->xev_capabilities, "x86_32") ||
300 		    strstr(vers->xev_capabilities, "x86_32p")) {
301 			*longmode = 0;
302 		} else {
303 			mdb_warn("couldn't derive word size of dump; "
304 			    "assuming 64-bit");
305 			*longmode = 1;
306 		}
307 	}
308 
309 	ret = 1;
310 
311 out:
312 	if (gf != NULL)
313 		mdb_gelf_destroy(gf);
314 	else if (io != NULL)
315 		mdb_io_destroy(io);
316 	return (ret);
317 }
318 
319 static void *
320 xkb_fail(xkb_t *xkb, const char *msg, ...)
321 {
322 	va_list args;
323 
324 	va_start(args, msg);
325 	if (xkb != NULL)
326 		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
327 	(void) vfprintf(stderr, msg, args);
328 	(void) fprintf(stderr, "\n");
329 	va_end(args);
330 	if (xkb != NULL)
331 		(void) xkb_close(xkb);
332 
333 	errno = ENOEXEC;
334 
335 	return (NULL);
336 }
337 
338 static int
339 xkb_build_m2p(xkb_t *xkb)
340 {
341 	size_t i;
342 
343 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
344 		if (xkb->xkb_p2m[i] != MFN_INVALID &&
345 		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
346 			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
347 	}
348 
349 	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
350 	    UM_SLEEP);
351 
352 	for (i = 0; i <= xkb->xkb_max_mfn; i++)
353 		xkb->xkb_m2p[i] = PFN_INVALID;
354 
355 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
356 		if (xkb->xkb_p2m[i] != MFN_INVALID)
357 			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
358 	}
359 
360 	return (1);
361 }
362 
363 /*
364  * With FORMAT_CORE, we can use the table in the dump file directly.
365  * Just to make things fun, they've not page-aligned the p2m table.
366  */
367 static int
368 xkb_map_p2m(xkb_t *xkb)
369 {
370 	offset_t off;
371 	size_t size;
372 	xkb_core_t *xc = &xkb->xkb_core;
373 	size_t count = xkb->xkb_nr_pages;
374 	size_t boff = xc->xc_hdr.xch_index_offset;
375 
376 	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
377 	size = PAGE_MASK(size);
378 	off = PAGE_MASK(boff);
379 
380 	/* LINTED - alignment */
381 	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
382 	    MAP_SHARED, xkb->xkb_fd, off);
383 
384 	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
385 		(void) xkb_fail(xkb, "cannot map p2m table");
386 		return (0);
387 	}
388 
389 	/* LINTED - alignment */
390 	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
391 	    PAGE_OFFSET(boff));
392 
393 	return (1);
394 }
395 
396 /*
397  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
398  * into a linear array indexed by pfn for convenience.  We also need to
399  * track the mapping between mfn and the offset in the file: a pfn with
400  * no mfn will not appear in the core file.
401  */
402 static int
403 xkb_build_p2m(xkb_t *xkb)
404 {
405 	xkb_elf_t *xe = &xkb->xkb_elf;
406 	mdb_gelf_sect_t *sect;
407 	size_t size;
408 	size_t i;
409 
410 	struct elf_p2m {
411 		uint64_t pfn;
412 		uint64_t gmfn;
413 	} *p2m;
414 
415 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
416 
417 	if (sect == NULL) {
418 		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
419 		return (0);
420 	}
421 
422 	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
423 		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
424 		return (0);
425 	}
426 
427 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
428 		if (p2m[i].pfn > xkb->xkb_max_pfn)
429 			xkb->xkb_max_pfn = p2m[i].pfn;
430 	}
431 
432 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
433 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
434 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
435 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
436 
437 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
438 		xkb->xkb_p2m[i] = PFN_INVALID;
439 		xe->xe_off[i] = (size_t)-1;
440 	}
441 
442 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
443 		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
444 		xe->xe_off[p2m[i].pfn] = i;
445 	}
446 
447 	return (1);
448 }
449 
450 /*
451  * Return the MFN of the top-level page table for the given as.
452  */
453 static mfn_t
454 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
455 {
456 	uintptr_t asp = (uintptr_t)as;
457 	uintptr_t hatp;
458 	uintptr_t htablep;
459 	uintptr_t pfn;
460 
461 	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
462 		return (MFN_INVALID);
463 	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
464 	    &htablep))
465 		return (MFN_INVALID);
466 	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
467 	    &pfn))
468 		return (MFN_INVALID);
469 
470 	if (pfn > xkb->xkb_max_pfn)
471 		return (MFN_INVALID);
472 
473 	return (xkb->xkb_p2m[pfn]);
474 }
475 
476 static ssize_t
477 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
478     void *buf, size_t size)
479 {
480 	size_t left = size;
481 	int windowed = (xkb->xkb_pages == NULL);
482 	mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]);
483 
484 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
485 		return (-1);
486 
487 	while (left) {
488 		uint64_t pos = addr + (size - left);
489 		char *outpos = (char *)buf + (size - left);
490 		size_t pageoff = PAGE_OFFSET(pos);
491 		size_t sz = MIN(left, PAGE_SIZE - pageoff);
492 		mfn_t mfn;
493 
494 		if (!phys) {
495 			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
496 			if (mfn == MFN_INVALID)
497 				return (-1);
498 		} else {
499 			xen_pfn_t pfn = pos >> PAGE_SHIFT;
500 			if (pfn > xkb->xkb_max_pfn)
501 				return (-1);
502 			mfn = xkb->xkb_p2m[pfn];
503 			if (mfn == MFN_INVALID)
504 				return (-1);
505 		}
506 
507 		/*
508 		 * If we're windowed then pread() is much faster.
509 		 */
510 		if (windowed) {
511 			offset_t off = xkb_mfn_to_offset(xkb, mfn);
512 			int ret;
513 
514 			if (off == ~1ULL)
515 				return (-1);
516 
517 			off += pageoff;
518 
519 			ret = pread64(xkb->xkb_fd, outpos, sz, off);
520 			if (ret == -1)
521 				return (-1);
522 			if (ret != sz)
523 				return ((size - left) + ret);
524 
525 			left -= ret;
526 		} else {
527 			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
528 				return (-1);
529 
530 			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
531 
532 			left -= sz;
533 		}
534 	}
535 
536 	return (size);
537 }
538 
539 static ssize_t
540 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
541 {
542 	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
543 }
544 
545 static ssize_t
546 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
547 {
548 	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
549 }
550 
551 static ssize_t
552 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
553 {
554 	return (xkb_aread(xkb, addr, buf, size, NULL));
555 }
556 
557 static int
558 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
559 {
560 	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
561 	    sizeof (uintptr_t))
562 		return (0);
563 	return (1);
564 }
565 
566 static char *
567 xkb_readstr(xkb_t *xkb, uintptr_t addr)
568 {
569 	char *str = mdb_alloc(1024, UM_SLEEP);
570 	size_t i;
571 
572 	for (i = 0; i < 1024; i++) {
573 		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
574 			mdb_free(str, 1024);
575 			return (NULL);
576 		}
577 
578 		if (str[i] == '\0')
579 			break;
580 	}
581 
582 	if (i == 1024) {
583 		mdb_free(str, 1024);
584 		return (NULL);
585 	}
586 
587 	return (str);
588 }
589 
590 static offset_t
591 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
592 {
593 	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
594 		return (-1ULL);
595 
596 	if (xkb->xkb_type == XKB_FORMAT_CORE)
597 		return (PAGE_SIZE * pfn);
598 
599 	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
600 }
601 
602 static offset_t
603 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
604 {
605 	xen_pfn_t pfn;
606 
607 	if (mfn > xkb->xkb_max_mfn)
608 		return (-1ULL);
609 
610 	pfn = xkb->xkb_m2p[mfn];
611 
612 	if (pfn == PFN_INVALID)
613 		return (-1ULL);
614 
615 	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
616 }
617 
618 static char *
619 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
620 {
621 	int windowed = (xkb->xkb_pages == NULL);
622 	offset_t off;
623 
624 	if (mm->mm_mfn == mfn)
625 		return (mm->mm_map);
626 
627 	mm->mm_mfn = mfn;
628 
629 	if (windowed) {
630 		if (mm->mm_map != (char *)MAP_FAILED) {
631 			(void) munmap(mm->mm_map, PAGE_SIZE);
632 			mm->mm_map = (void *)MAP_FAILED;
633 		}
634 
635 		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
636 			return (NULL);
637 
638 		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
639 		    xkb->xkb_fd, off);
640 
641 		if (mm->mm_map == (char *)MAP_FAILED)
642 			return (NULL);
643 	} else {
644 		xen_pfn_t pfn;
645 
646 		mm->mm_map = NULL;
647 
648 		if (mfn > xkb->xkb_max_mfn)
649 			return (NULL);
650 
651 		pfn = xkb->xkb_m2p[mfn];
652 
653 		if (pfn == PFN_INVALID)
654 			return (NULL);
655 
656 		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
657 	}
658 
659 	return (mm->mm_map);
660 }
661 
662 static mfn_t
663 xkb_pte_to_mfn(mmu_info_t *mmu, char *ptep)
664 {
665 	uint64_t pte = 0;
666 
667 	if (mmu->mi_ptesize == 8) {
668 		/* LINTED - alignment */
669 		pte = *((uint64_t *)ptep);
670 	} else {
671 		/* LINTED - alignment */
672 		pte = *((uint32_t *)ptep);
673 	}
674 
675 	if (!(pte & PT_VALID))
676 		return (MFN_INVALID);
677 
678 	/* XXX: doesn't do large pages */
679 	pte &= PT_PADDR;
680 
681 	return (pte >> PAGE_SHIFT);
682 }
683 
684 /*
685  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
686  * table.
687  */
688 static mfn_t
689 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
690 {
691 	mmu_info_t *mmu = &xkb->xkb_mmu;
692 	size_t level;
693 
694 	for (level = mmu->mi_max; ; --level) {
695 		size_t entry;
696 		char *tmp;
697 
698 		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
699 			return (MFN_INVALID);
700 
701 		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
702 
703 		tmp = (char *)xkb->xkb_pt_map[level].mm_map +
704 		    entry * mmu->mi_ptesize;
705 
706 		if ((mfn = xkb_pte_to_mfn(mmu, tmp)) == MFN_INVALID)
707 			return (MFN_INVALID);
708 
709 		if (level == 0)
710 			break;
711 	}
712 
713 	return (mfn);
714 }
715 
716 static int
717 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
718     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
719 {
720 	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
721 	    sizeof (struct module))
722 		return (0);
723 
724 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
725 	    offsetof(Shdr, sh_addr), sym_addr))
726 		return (0);
727 
728 	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
729 	    offsetof(Shdr, sh_addr), str_addr))
730 		return (0);
731 
732 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
733 	    offsetof(Shdr, sh_size), sym_count))
734 		return (0);
735 	*sym_count /= sizeof (Sym);
736 
737 	return (1);
738 }
739 
740 static int
741 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
742     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
743 {
744 	size_t i;
745 
746 	for (i = 0; i < sym_count; i++) {
747 		Sym sym;
748 		char *name;
749 		size_t sz;
750 		int type = XKB_WALK_GLOBAL;
751 
752 		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
753 		    sizeof (sym)) != sizeof (sym))
754 			return (0);
755 
756 		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
757 			type = XKB_WALK_LOCAL;
758 
759 		name = xkb_readstr(xkb, str_addr + sym.st_name);
760 
761 		sym.st_shndx = SHN_ABS;
762 		sym.st_name = sizes[XKB_WALK_STR];
763 
764 		sizes[type] += sizeof (sym);
765 		sz = strlen(name) + 1;
766 		sizes[XKB_WALK_STR] += sz;
767 
768 		if (buf != NULL) {
769 			if (types & type) {
770 				bcopy(&sym, *buf, sizeof (sym));
771 				*buf += sizeof (sym);
772 			}
773 			if (types & XKB_WALK_STR) {
774 				bcopy(name, *buf, sz);
775 				*buf += sz;
776 			}
777 		}
778 
779 		mdb_free(name, 1024);
780 	}
781 
782 	return (1);
783 }
784 
785 static int
786 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
787     size_t *sizes, int types)
788 {
789 	uintptr_t modctl = modhead;
790 	uintptr_t modulep;
791 	struct module module;
792 	uintptr_t sym_count;
793 	uintptr_t sym_addr;
794 	uintptr_t str_addr;
795 	size_t max_iter = 500;
796 
797 	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
798 
799 	/*
800 	 * empty first symbol
801 	 */
802 	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
803 	sizes[XKB_WALK_STR] += 1;
804 
805 	if (buf != NULL) {
806 		if (types & XKB_WALK_LOCAL) {
807 			Sym tmp;
808 			bzero(&tmp, sizeof (tmp));
809 			bcopy(&tmp, *buf, sizeof (tmp));
810 			*buf += sizeof (tmp);
811 		}
812 		if (types & XKB_WALK_STR) {
813 			**buf = '\0';
814 			(*buf)++;
815 		}
816 	}
817 
818 	for (;;) {
819 		if (!xkb_read_word(xkb,
820 		    modctl + offsetof(struct modctl, mod_mp), &modulep))
821 			return (0);
822 
823 		if (modulep == NULL)
824 			goto next;
825 
826 		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
827 		    &sym_count, &str_addr))
828 			return (0);
829 
830 		if ((module.flags & KOBJ_NOKSYMS))
831 			goto next;
832 
833 		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
834 		    str_addr, sym_count))
835 			return (0);
836 
837 next:
838 		if (!xkb_read_word(xkb,
839 		    modctl + offsetof(struct modctl, mod_next), &modctl))
840 			return (0);
841 
842 		if (modctl == modhead)
843 			break;
844 		/*
845 		 * Try and prevent us looping forever if we have a broken list.
846 		 */
847 		if (--max_iter == 0)
848 			break;
849 	}
850 
851 	return (1);
852 }
853 
854 /*
855  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
856  * file for hypervisor images, we fabricate one here using code similar
857  * to that of /dev/ksyms.
858  */
859 static int
860 xkb_build_ksyms(xkb_t *xkb)
861 {
862 	debug_info_t *info = &xkb->xkb_info;
863 	size_t sizes[XKB_WALK_STR + 1];
864 	xkb_namelist_t *hdr;
865 	char *buf;
866 	struct modctl modules;
867 	uintptr_t module;
868 	Shdr *shp;
869 
870 	if (xkb_read(xkb, info->di_modules, &modules,
871 	    sizeof (struct modctl)) != sizeof (struct modctl))
872 		return (0);
873 
874 	module = (uintptr_t)modules.mod_mp;
875 
876 	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
877 	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
878 		return (0);
879 
880 	xkb->xkb_namesize = sizeof (xkb_namelist_t);
881 	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
882 	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
883 	xkb->xkb_namesize += sizes[XKB_WALK_STR];
884 
885 	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
886 	    == NULL)
887 		return (0);
888 
889 	/* LINTED - alignment */
890 	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
891 
892 	if (xkb_read(xkb, module + offsetof(struct module, hdr),
893 	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
894 		return (0);
895 
896 	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
897 	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
898 	hdr->kh_elf_hdr.e_phnum = 2;
899 	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
900 	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
901 
902 	hdr->kh_text_phdr.p_type = PT_LOAD;
903 	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
904 	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
905 	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
906 
907 	hdr->kh_data_phdr.p_type = PT_LOAD;
908 	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
909 	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
910 	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
911 
912 	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
913 	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
914 	shp->sh_type = SHT_SYMTAB;
915 	shp->sh_offset = sizeof (xkb_namelist_t);
916 	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
917 	shp->sh_link = XKB_SHDR_STRTAB;
918 	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
919 	shp->sh_addralign = sizeof (Addr);
920 	shp->sh_entsize = sizeof (Sym);
921 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
922 
923 
924 	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
925 	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
926 	shp->sh_type = SHT_STRTAB;
927 	shp->sh_offset = sizeof (xkb_namelist_t) +
928 	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
929 	shp->sh_size = sizes[XKB_WALK_STR];
930 	shp->sh_addralign = 1;
931 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
932 
933 
934 	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
935 	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
936 	shp->sh_type = SHT_STRTAB;
937 	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
938 	shp->sh_size = sizeof (xkb_shstrtab);
939 	shp->sh_addralign = 1;
940 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
941 
942 	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
943 
944 	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
945 
946 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
947 	    XKB_WALK_LOCAL))
948 		return (0);
949 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
950 	    XKB_WALK_GLOBAL))
951 		return (0);
952 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
953 	    XKB_WALK_STR))
954 		return (0);
955 
956 	return (1);
957 }
958 
959 static xkb_t *
960 xkb_open_core(xkb_t *xkb)
961 {
962 	xkb_core_t *xc = &xkb->xkb_core;
963 	size_t sz;
964 
965 	xkb->xkb_type = XKB_FORMAT_CORE;
966 
967 	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
968 		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
969 
970 	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
971 	    sizeof (xc->xc_hdr))
972 		return (xkb_fail(xkb, "invalid dump file"));
973 
974 	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
975 		return (xkb_fail(xkb, "cannot process HVM images"));
976 
977 	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
978 		return (xkb_fail(xkb, "invalid magic %d",
979 		    xc->xc_hdr.xch_magic));
980 	}
981 
982 	/*
983 	 * With FORMAT_CORE, all pages are in the dump (non-existing
984 	 * ones are zeroed out).
985 	 */
986 	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
987 	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
988 	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
989 	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
990 
991 	sz = xkb->xkb_nr_vcpus * sizeof (*xkb->xkb_vcpus);
992 
993 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
994 
995 	if (pread64(xkb->xkb_fd, xkb->xkb_vcpus, sz,
996 	    xc->xc_hdr.xch_ctxt_offset) != sz)
997 		return (xkb_fail(xkb, "cannot read VCPU contexts"));
998 
999 	if (xkb->xkb_vcpus[0].flags & VGCF_HVM_GUEST)
1000 		return (xkb_fail(xkb, "cannot process HVM images"));
1001 
1002 	/*
1003 	 * Try to map all the data pages. If we can't, fall back to the
1004 	 * window/pread() approach, which is significantly slower.
1005 	 */
1006 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1007 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1008 
1009 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1010 		xkb->xkb_pages = NULL;
1011 
1012 	/*
1013 	 * We'd like to adapt for correctness' sake, but we have no way of
1014 	 * detecting a PAE guest, since cr4 writes are disallowed.
1015 	 */
1016 	xkb->xkb_is_pae = 1;
1017 
1018 	if (!xkb_map_p2m(xkb))
1019 		return (NULL);
1020 
1021 	return (xkb);
1022 }
1023 
1024 static xkb_t *
1025 xkb_open_elf(xkb_t *xkb)
1026 {
1027 	xkb_elf_t *xe = &xkb->xkb_elf;
1028 	mdb_gelf_sect_t *sect;
1029 	char *notes;
1030 	char *pos;
1031 	mdb_io_t *io;
1032 
1033 	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1034 	    O_RDONLY, 0)) == NULL)
1035 		return (xkb_fail(xkb, "failed to open"));
1036 
1037 	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1038 
1039 	if (xe->xe_gelf == NULL) {
1040 		mdb_io_destroy(io);
1041 		return (xkb);
1042 	}
1043 
1044 	xkb->xkb_fd = mdb_fdio_fileno(io);
1045 
1046 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1047 
1048 	if (sect == NULL)
1049 		return (xkb);
1050 
1051 	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1052 		return (xkb);
1053 
1054 	/*
1055 	 * Now we know this is indeed a hypervisor core dump, even if
1056 	 * it's corrupted.
1057 	 */
1058 	xkb->xkb_type = XKB_FORMAT_ELF;
1059 
1060 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1061 		/* LINTED - alignment */
1062 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1063 		uint64_t vers;
1064 		char *desc;
1065 		char *name;
1066 
1067 		name = pos + sizeof (*nhdr);
1068 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1069 
1070 		pos = desc + nhdr->n_descsz;
1071 
1072 		switch (nhdr->n_type) {
1073 		case XEN_ELFNOTE_DUMPCORE_NONE:
1074 			break;
1075 
1076 		case XEN_ELFNOTE_DUMPCORE_HEADER:
1077 			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1078 				return (xkb_fail(xkb, "invalid ELF note "
1079 				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1080 			}
1081 
1082 			bcopy(desc, &xe->xe_hdr,
1083 			    sizeof (struct xc_elf_header));
1084 			break;
1085 
1086 		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1087 			if (nhdr->n_descsz != sizeof (struct xc_elf_version)) {
1088 				return (xkb_fail(xkb, "invalid ELF note "
1089 				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1090 			}
1091 
1092 			bcopy(desc, &xe->xe_version,
1093 			    sizeof (struct xc_elf_version));
1094 			break;
1095 
1096 		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1097 			/* LINTED - alignment */
1098 			vers = *((uint64_t *)desc);
1099 			if ((vers >> 32) != 0) {
1100 				return (xkb_fail(xkb, "unknown major "
1101 				    "version %d (expected 0)\n",
1102 				    (int)(vers >> 32)));
1103 			}
1104 
1105 			if ((vers & 0xffffffff) != 1) {
1106 				mdb_warn("unexpected dump minor number "
1107 				    "version %d (expected 1)\n",
1108 				    (int)(vers & 0xffffffff));
1109 			}
1110 			break;
1111 
1112 		default:
1113 			mdb_warn("unknown ELF note %d(%s)\n",
1114 			    nhdr->n_type, name);
1115 			break;
1116 		}
1117 	}
1118 
1119 	if (xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM)
1120 		return (xkb_fail(xkb, "cannot process HVM images"));
1121 
1122 	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC) {
1123 		return (xkb_fail(xkb, "invalid magic %d",
1124 		    xe->xe_hdr.xeh_magic));
1125 	}
1126 
1127 	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1128 	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1129 	    "x86_32p") != NULL);
1130 
1131 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1132 
1133 	if (sect == NULL)
1134 		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1135 
1136 	if (sect->gs_shdr.sh_entsize != sizeof (vcpu_guest_context_t))
1137 		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1138 
1139 	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1140 
1141 	if ((xkb->xkb_vcpus = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1142 		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1143 
1144 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1145 
1146 	if (sect == NULL)
1147 		return (xkb_fail(xkb, "cannot find section .xen_pages"));
1148 
1149 	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1150 		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1151 
1152 	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1153 		return (xkb_fail(xkb, "invalid section .xen_pages"));
1154 
1155 	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1156 
1157 	/*
1158 	 * Try to map all the data pages. If we can't, fall back to the
1159 	 * window/pread() approach, which is significantly slower.
1160 	 */
1161 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1162 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1163 
1164 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1165 		xkb->xkb_pages = NULL;
1166 
1167 	if (!xkb_build_p2m(xkb))
1168 		return (NULL);
1169 
1170 	return (xkb);
1171 }
1172 
1173 static void
1174 xkb_init_mmu(xkb_t *xkb)
1175 {
1176 #if defined(__amd64)
1177 	xkb->xkb_mmu.mi_max = 3;
1178 	xkb->xkb_mmu.mi_shift[0] = 12;
1179 	xkb->xkb_mmu.mi_shift[1] = 21;
1180 	xkb->xkb_mmu.mi_shift[2] = 30;
1181 	xkb->xkb_mmu.mi_shift[3] = 39;
1182 	xkb->xkb_mmu.mi_ptes = 512;
1183 	xkb->xkb_mmu.mi_ptesize = 8;
1184 #elif defined(__i386)
1185 	if (xkb->xkb_is_pae) {
1186 		xkb->xkb_mmu.mi_max = 2;
1187 		xkb->xkb_mmu.mi_shift[0] = 12;
1188 		xkb->xkb_mmu.mi_shift[1] = 21;
1189 		xkb->xkb_mmu.mi_shift[2] = 30;
1190 		xkb->xkb_mmu.mi_ptes = 512;
1191 		xkb->xkb_mmu.mi_ptesize = 8;
1192 	} else {
1193 		xkb->xkb_mmu.mi_max = 1;
1194 		xkb->xkb_mmu.mi_shift[0] = 12;
1195 		xkb->xkb_mmu.mi_shift[1] = 22;
1196 		xkb->xkb_mmu.mi_ptes = 1024;
1197 		xkb->xkb_mmu.mi_ptesize = 4;
1198 	}
1199 #endif
1200 }
1201 
1202 /*ARGSUSED*/
1203 xkb_t *
1204 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1205     int flag, const char *err)
1206 {
1207 	struct stat64 corestat;
1208 	xkb_t *xkb = NULL;
1209 	size_t i;
1210 
1211 	if (stat64(corefile, &corestat) == -1)
1212 		return (xkb_fail(xkb, "cannot stat %s", corefile));
1213 
1214 	if (flag != O_RDONLY)
1215 		return (xkb_fail(xkb, "invalid open flags"));
1216 
1217 	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1218 
1219 	for (i = 0; i < 4; i++)
1220 		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1221 
1222 	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1223 	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1224 	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1225 	xkb->xkb_fd = -1;
1226 
1227 	xkb->xkb_path = strdup(corefile);
1228 
1229 	if ((xkb = xkb_open_elf(xkb)) == NULL)
1230 		return (NULL);
1231 
1232 	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1233 		if (!xkb_open_core(xkb))
1234 			return (NULL);
1235 	}
1236 
1237 	xkb_init_mmu(xkb);
1238 
1239 	if (!xkb_build_m2p(xkb))
1240 		return (NULL);
1241 
1242 	if (xkb_read(xkb, DEBUG_INFO, &xkb->xkb_info,
1243 	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1244 		return (xkb_fail(xkb, "cannot read debug_info"));
1245 
1246 	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1247 		return (xkb_fail(xkb, "invalid debug info magic %d",
1248 		    xkb->xkb_info.di_magic));
1249 	}
1250 
1251 	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1252 		return (xkb_fail(xkb, "unknown debug info version %d",
1253 		    xkb->xkb_info.di_version));
1254 	}
1255 
1256 	if (!xkb_build_ksyms(xkb))
1257 		return (xkb_fail(xkb, "cannot construct namelist"));
1258 
1259 	return (xkb);
1260 }
1261 
1262 int
1263 xkb_close(xkb_t *xkb)
1264 {
1265 	size_t i;
1266 
1267 	if (xkb == NULL)
1268 		return (0);
1269 
1270 	if (xkb->xkb_m2p != NULL) {
1271 		mdb_free(xkb->xkb_m2p,
1272 		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1273 	}
1274 
1275 	if (xkb->xkb_pages != NULL) {
1276 		(void) munmap((void *)xkb->xkb_pages,
1277 		    PAGE_SIZE * xkb->xkb_nr_pages);
1278 	} else {
1279 		for (i = 0; i < 4; i++) {
1280 			char *addr = xkb->xkb_pt_map[i].mm_map;
1281 			if (addr != (char *)MAP_FAILED)
1282 				(void) munmap((void *)addr, PAGE_SIZE);
1283 		}
1284 		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1285 			(void) munmap((void *)xkb->xkb_map.mm_map,
1286 			    PAGE_SIZE);
1287 		}
1288 	}
1289 
1290 	if (xkb->xkb_namelist != NULL)
1291 		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1292 
1293 	if (xkb->xkb_type == XKB_FORMAT_ELF) {
1294 		xkb_elf_t *xe = &xkb->xkb_elf;
1295 		size_t sz;
1296 
1297 		if (xe->xe_gelf != NULL)
1298 			mdb_gelf_destroy(xe->xe_gelf);
1299 
1300 		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1301 
1302 		if (xkb->xkb_p2m != NULL)
1303 			mdb_free(xkb->xkb_p2m, sz);
1304 
1305 		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1306 
1307 		if (xe->xe_off != NULL)
1308 			mdb_free(xe->xe_off, sz);
1309 	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1310 		xkb_core_t *xc = &xkb->xkb_core;
1311 		size_t sz;
1312 
1313 		if (xkb->xkb_fd != -1)
1314 			(void) close(xkb->xkb_fd);
1315 
1316 		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1317 		sz = PAGE_MASK(sz);
1318 
1319 		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1320 			(void) munmap(xc->xc_p2m_buf, sz);
1321 
1322 		if (xkb->xkb_vcpus != NULL) {
1323 			sz = sizeof (struct vcpu_guest_context) *
1324 			    xkb->xkb_nr_vcpus;
1325 			mdb_free(xkb->xkb_vcpus, sz);
1326 		}
1327 	}
1328 
1329 	free(xkb->xkb_path);
1330 
1331 	mdb_free(xkb, sizeof (*xkb));
1332 	return (0);
1333 }
1334 
1335 /*ARGSUSED*/
1336 static mdb_io_t *
1337 xkb_sym_io(xkb_t *xkb, const char *symfile)
1338 {
1339 	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1340 
1341 	if (io == NULL)
1342 		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1343 
1344 	return (io);
1345 }
1346 
1347 uint64_t
1348 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1349 {
1350 	mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]);
1351 	mfn_t mfn;
1352 
1353 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1354 		return (-1ULL);
1355 
1356 	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1357 
1358 	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1359 		return (-1ULL);
1360 
1361 	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1362 	    | PAGE_OFFSET(addr));
1363 }
1364 
1365 static int
1366 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1367 {
1368 	struct vcpu_guest_context *vcpu;
1369 	struct cpu_user_regs *ur;
1370 	struct regs *regs;
1371 
1372 	if (cpu >= xkb->xkb_nr_vcpus) {
1373 		errno = EINVAL;
1374 		return (-1);
1375 	}
1376 
1377 	bzero(mregs, sizeof (*mregs));
1378 
1379 	vcpu = &xkb->xkb_vcpus[cpu];
1380 	ur = &vcpu->user_regs;
1381 	regs = &mregs->pm_gregs;
1382 
1383 	regs->r_ss = ur->ss;
1384 	regs->r_cs = ur->cs;
1385 	regs->r_ds = ur->ds;
1386 	regs->r_es = ur->es;
1387 	regs->r_fs = ur->fs;
1388 	regs->r_gs = ur->gs;
1389 	regs->r_trapno = ur->entry_vector;
1390 	regs->r_err = ur->error_code;
1391 #ifdef __amd64
1392 	regs->r_savfp = ur->rbp;
1393 	regs->r_savpc = ur->rip;
1394 	regs->r_rdi = ur->rdi;
1395 	regs->r_rsi = ur->rsi;
1396 	regs->r_rdx = ur->rdx;
1397 	regs->r_rcx = ur->rcx;
1398 	regs->r_r8 = ur->r8;
1399 	regs->r_r9 = ur->r9;
1400 	regs->r_rax = ur->rax;
1401 	regs->r_rbx = ur->rbx;
1402 	regs->r_rbp = ur->rbp;
1403 	regs->r_r10 = ur->r10;
1404 	regs->r_r11 = ur->r11;
1405 	regs->r_r12 = ur->r12;
1406 	regs->r_r13 = ur->r13;
1407 	regs->r_r14 = ur->r14;
1408 	regs->r_r15 = ur->r15;
1409 	regs->r_rip = ur->rip;
1410 	regs->r_rfl = ur->rflags;
1411 	regs->r_rsp = ur->rsp;
1412 #else
1413 	regs->r_savfp = ur->ebp;
1414 	regs->r_savpc = ur->eip;
1415 	regs->r_edi = ur->edi;
1416 	regs->r_esi = ur->esi;
1417 	regs->r_ebp = ur->ebp;
1418 	regs->r_esp = ur->esp;
1419 	regs->r_ebx = ur->ebx;
1420 	regs->r_edx = ur->edx;
1421 	regs->r_ecx = ur->ecx;
1422 	regs->r_eax = ur->eax;
1423 	regs->r_eip = ur->eip;
1424 	regs->r_efl = ur->eflags;
1425 	regs->r_uesp = 0;
1426 #endif
1427 
1428 	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1429 	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1430 
1431 	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1432 
1433 	return (0);
1434 }
1435 
1436 static mdb_kb_ops_t xpv_kb_ops = {
1437 	.kb_open = (void *(*)())xkb_open,
1438 	.kb_close = (int (*)())xkb_close,
1439 	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1440 	.kb_kread = (ssize_t (*)())xkb_read,
1441 	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1442 	.kb_aread = (ssize_t (*)())xkb_aread,
1443 	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1444 	.kb_pread = (ssize_t (*)())xkb_pread,
1445 	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1446 	.kb_vtop = (uint64_t (*)())xkb_vtop,
1447 	.kb_getmregs = (int (*)())xkb_getmregs
1448 };
1449 
1450 mdb_kb_ops_t *
1451 mdb_kb_ops(void)
1452 {
1453 	return (&xpv_kb_ops);
1454 }
1455 
1456 static const mdb_dcmd_t dcmds[] = { NULL, };
1457 static const mdb_walker_t walkers[] = { NULL, };
1458 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1459 
1460 const mdb_modinfo_t *
1461 _mdb_init(void)
1462 {
1463 	return (&modinfo);
1464 }
1465 
1466 void
1467 _mdb_fini(void)
1468 {
1469 }
1470