1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
28  * such dumps, since they do not have a namelist file or the typical
29  * dump structures we expect to aid bootstrapping.  Instead, we
30  * bootstrap based upon a debug_info structure at a known VA, using the
31  * guest's own page tables to resolve to physical addresses, and
32  * construct the namelist in a manner similar to ksyms_snapshot().
33  *
34  * Note that there are two formats understood by this module: the older,
35  * ad hoc format, which we call 'core' within this file, and an
36  * ELF-based format, known as 'elf'.
37  *
38  * We only support the older format generated on Solaris dom0: before we
39  * fixed it, core dump files were broken whenever a PFN didn't map a
40  * real MFN (!).
41  */
42 
43 #include <strings.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <stddef.h>
47 #include <stdarg.h>
48 #include <unistd.h>
49 #include <fcntl.h>
50 #include <gelf.h>
51 #include <errno.h>
52 
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <sys/debug_info.h>
56 #include <sys/xen_mmu.h>
57 #include <sys/elf.h>
58 #include <sys/machelf.h>
59 #include <sys/modctl.h>
60 #include <sys/kobj.h>
61 #include <sys/kobj_impl.h>
62 #include <sys/sysmacros.h>
63 #include <sys/privmregs.h>
64 #include <vm/as.h>
65 
66 #include <mdb/mdb_io.h>
67 #include <mdb/mdb_kb.h>
68 #include <mdb/mdb_target_impl.h>
69 
70 #include <xen/public/xen.h>
71 #include <xen/public/version.h>
72 #include <xen/public/elfnote.h>
73 
74 #define	XKB_SHDR_NULL 0
75 #define	XKB_SHDR_SYMTAB 1
76 #define	XKB_SHDR_STRTAB 2
77 #define	XKB_SHDR_SHSTRTAB 3
78 #define	XKB_SHDR_NUM 4
79 
80 #define	XKB_WALK_LOCAL 0x1
81 #define	XKB_WALK_GLOBAL 0x2
82 #define	XKB_WALK_STR 0x4
83 #define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
84 
85 #if defined(__i386)
86 #define	DEBUG_INFO 0xf4bff000
87 #define	DEBUG_INFO_HVM 0xfe7ff000
88 #elif defined(__amd64)
89 #define	DEBUG_INFO 0xfffffffffb7ff000
90 #define	DEBUG_INFO_HVM 0xfffffffffb7ff000
91 #endif
92 
93 #define	PAGE_SIZE 0x1000
94 #define	PAGE_SHIFT 12
95 #define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96 #define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97 #define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
98 #define	PT_PADDR_LGPG 0x000fffffffffe000ull
99 #define	PT_PADDR 0x000ffffffffff000ull
100 #define	PT_VALID 0x1
101 #define	PT_PAGESIZE 0x080
102 #define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
103 
104 #define	XC_CORE_MAGIC 0xF00FEBED
105 #define	XC_CORE_MAGIC_HVM 0xF00FEBEE
106 
107 #define	VGCF_HVM_GUEST (1<<1)
108 
109 typedef struct xc_core_header {
110 	unsigned int xch_magic;
111 	unsigned int xch_nr_vcpus;
112 	unsigned int xch_nr_pages;
113 	unsigned int xch_ctxt_offset;
114 	unsigned int xch_index_offset;
115 	unsigned int xch_pages_offset;
116 } xc_core_header_t;
117 
118 struct xc_elf_header {
119 	uint64_t xeh_magic;
120 	uint64_t xeh_nr_vcpus;
121 	uint64_t xeh_nr_pages;
122 	uint64_t xeh_page_size;
123 };
124 
125 struct xc_elf_version {
126 	uint64_t xev_major;
127 	uint64_t xev_minor;
128 	xen_extraversion_t xev_extra;
129 	xen_compile_info_t xev_compile_info;
130 	xen_capabilities_info_t xev_capabilities;
131 	xen_changeset_info_t xev_changeset;
132 	xen_platform_parameters_t xev_platform_parameters;
133 	uint64_t xev_pagesize;
134 };
135 
136 /*
137  * Either an old-style (3.0.4) core format, or the ELF format.
138  */
139 typedef enum {
140 	XKB_FORMAT_UNKNOWN = 0,
141 	XKB_FORMAT_CORE = 1,
142 	XKB_FORMAT_ELF = 2
143 } xkb_type_t;
144 
145 typedef struct mfn_map {
146 	mfn_t mm_mfn;
147 	char *mm_map;
148 } mfn_map_t;
149 
150 typedef struct mmu_info {
151 	size_t mi_max;
152 	size_t mi_shift[4];
153 	size_t mi_ptes;
154 	size_t mi_ptesize;
155 } mmu_info_t;
156 
157 typedef struct xkb_core {
158 	xc_core_header_t xc_hdr;
159 	void *xc_p2m_buf;
160 } xkb_core_t;
161 
162 typedef struct xkb_elf {
163 	mdb_gelf_file_t *xe_gelf;
164 	size_t *xe_off;
165 	struct xc_elf_header xe_hdr;
166 	struct xc_elf_version xe_version;
167 } xkb_elf_t;
168 
169 typedef struct xkb {
170 	char *xkb_path;
171 	int xkb_fd;
172 	int xkb_is_hvm;
173 
174 	xkb_type_t xkb_type;
175 	xkb_core_t xkb_core;
176 	xkb_elf_t xkb_elf;
177 
178 	size_t xkb_nr_vcpus;
179 	size_t xkb_nr_pages;
180 	size_t xkb_pages_off;
181 	xen_pfn_t xkb_max_pfn;
182 	mfn_t xkb_max_mfn;
183 	int xkb_is_pae;
184 
185 	mmu_info_t xkb_mmu;
186 	debug_info_t xkb_info;
187 
188 	struct vcpu_guest_context *xkb_vcpus;
189 
190 	char *xkb_pages;
191 	mfn_t *xkb_p2m;
192 	xen_pfn_t *xkb_m2p;
193 	mfn_map_t xkb_pt_map[4];
194 	mfn_map_t xkb_map;
195 
196 	char *xkb_namelist;
197 	size_t xkb_namesize;
198 } xkb_t;
199 
200 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
201 
202 typedef struct xkb_namelist {
203 	Ehdr	kh_elf_hdr;
204 	Phdr	kh_text_phdr;
205 	Phdr	kh_data_phdr;
206 	Shdr	kh_shdr[XKB_SHDR_NUM];
207 	char	shstrings[sizeof (xkb_shstrtab)];
208 } xkb_namelist_t;
209 
210 static int xkb_build_ksyms(xkb_t *);
211 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
212 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
213 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
214 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
215 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
216 static int xkb_close(xkb_t *);
217 
218 /*
219  * Jump through the hoops we need to to correctly identify a core file
220  * of either the old or new format.
221  */
222 int
223 xkb_identify(const char *file, int *longmode)
224 {
225 	xc_core_header_t header;
226 	mdb_gelf_file_t *gf = NULL;
227 	mdb_gelf_sect_t *sect = NULL;
228 	mdb_io_t *io = NULL;
229 	char *notes = NULL;
230 	char *pos;
231 	int ret = 0;
232 	size_t sz;
233 	int fd;
234 
235 	if ((fd = open64(file, O_RDONLY)) == -1)
236 		return (-1);
237 
238 	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
239 		(void) close(fd);
240 		return (0);
241 	}
242 
243 	(void) close(fd);
244 
245 	if (header.xch_magic == XC_CORE_MAGIC) {
246 		*longmode = 0;
247 
248 		/*
249 		 * Indeed.
250 		 */
251 		sz = header.xch_index_offset - header.xch_ctxt_offset;
252 #ifdef _LP64
253 		if (sizeof (struct vcpu_guest_context) *
254 		    header.xch_nr_vcpus == sz)
255 			*longmode = 1;
256 #else
257 		if (sizeof (struct vcpu_guest_context) *
258 		    header.xch_nr_vcpus != sz)
259 			*longmode = 1;
260 #endif /* _LP64 */
261 
262 		return (1);
263 	}
264 
265 	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
266 		return (-1);
267 
268 	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
269 		goto out;
270 
271 	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
272 		goto out;
273 
274 	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
275 		goto out;
276 
277 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
278 		struct xc_elf_version *vers;
279 		/* LINTED - alignment */
280 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
281 		char *desc;
282 		char *name;
283 
284 		name = pos + sizeof (*nhdr);
285 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
286 
287 		pos = desc + nhdr->n_descsz;
288 
289 		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
290 			continue;
291 
292 		/*
293 		 * The contents of this struct differ between 32 and 64
294 		 * bit; however, not until past the 'xev_capabilities'
295 		 * member, so we can just about get away with this.
296 		 */
297 
298 		/* LINTED - alignment */
299 		vers = (struct xc_elf_version *)desc;
300 
301 		if (strstr(vers->xev_capabilities, "x86_64")) {
302 			*longmode = 1;
303 		} else if (strstr(vers->xev_capabilities, "x86_32") ||
304 		    strstr(vers->xev_capabilities, "x86_32p")) {
305 			*longmode = 0;
306 		} else {
307 			mdb_warn("couldn't derive word size of dump; "
308 			    "assuming 64-bit");
309 			*longmode = 1;
310 		}
311 	}
312 
313 	ret = 1;
314 
315 out:
316 	if (gf != NULL)
317 		mdb_gelf_destroy(gf);
318 	else if (io != NULL)
319 		mdb_io_destroy(io);
320 	return (ret);
321 }
322 
323 static void *
324 xkb_fail(xkb_t *xkb, const char *msg, ...)
325 {
326 	va_list args;
327 
328 	va_start(args, msg);
329 	if (xkb != NULL)
330 		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
331 	(void) vfprintf(stderr, msg, args);
332 	(void) fprintf(stderr, "\n");
333 	va_end(args);
334 	if (xkb != NULL)
335 		(void) xkb_close(xkb);
336 
337 	errno = ENOEXEC;
338 
339 	return (NULL);
340 }
341 
342 static int
343 xkb_build_m2p(xkb_t *xkb)
344 {
345 	size_t i;
346 
347 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
348 		if (xkb->xkb_p2m[i] != MFN_INVALID &&
349 		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
350 			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
351 	}
352 
353 	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
354 	    UM_SLEEP);
355 
356 	for (i = 0; i <= xkb->xkb_max_mfn; i++)
357 		xkb->xkb_m2p[i] = PFN_INVALID;
358 
359 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
360 		if (xkb->xkb_p2m[i] != MFN_INVALID)
361 			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
362 	}
363 
364 	return (1);
365 }
366 
367 /*
368  * With FORMAT_CORE, we can use the table in the dump file directly.
369  * Just to make things fun, they've not page-aligned the p2m table.
370  */
371 static int
372 xkb_map_p2m(xkb_t *xkb)
373 {
374 	offset_t off;
375 	size_t size;
376 	xkb_core_t *xc = &xkb->xkb_core;
377 	size_t count = xkb->xkb_nr_pages;
378 	size_t boff = xc->xc_hdr.xch_index_offset;
379 
380 	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
381 	size = PAGE_MASK(size);
382 	off = PAGE_MASK(boff);
383 
384 	/* LINTED - alignment */
385 	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
386 	    MAP_SHARED, xkb->xkb_fd, off);
387 
388 	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
389 		(void) xkb_fail(xkb, "cannot map p2m table");
390 		return (0);
391 	}
392 
393 	/* LINTED - alignment */
394 	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
395 	    PAGE_OFFSET(boff));
396 
397 	return (1);
398 }
399 
400 /*
401  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
402  * into a linear array indexed by pfn for convenience.  We also need to
403  * track the mapping between mfn and the offset in the file: a pfn with
404  * no mfn will not appear in the core file.
405  */
406 static int
407 xkb_build_p2m(xkb_t *xkb)
408 {
409 	xkb_elf_t *xe = &xkb->xkb_elf;
410 	mdb_gelf_sect_t *sect;
411 	size_t size;
412 	size_t i;
413 
414 	struct elf_p2m {
415 		uint64_t pfn;
416 		uint64_t gmfn;
417 	} *p2m;
418 
419 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
420 
421 	if (sect == NULL) {
422 		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
423 		return (0);
424 	}
425 
426 	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
427 		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
428 		return (0);
429 	}
430 
431 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
432 		if (p2m[i].pfn > xkb->xkb_max_pfn)
433 			xkb->xkb_max_pfn = p2m[i].pfn;
434 	}
435 
436 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
437 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
438 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
439 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
440 
441 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
442 		xkb->xkb_p2m[i] = PFN_INVALID;
443 		xe->xe_off[i] = (size_t)-1;
444 	}
445 
446 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
447 		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
448 		xe->xe_off[p2m[i].pfn] = i;
449 	}
450 
451 	return (1);
452 }
453 
454 /*
455  * For HVM images, we don't have the corresponding MFN list; the table
456  * is just a mapping from page index in the dump to the corresponding
457  * PFN.  To simplify the other code, we'll pretend that these PFNs are
458  * really MFNs as well, by populating xkb_p2m.
459  */
460 static int
461 xkb_build_fake_p2m(xkb_t *xkb)
462 {
463 	xkb_elf_t *xe = &xkb->xkb_elf;
464 	mdb_gelf_sect_t *sect;
465 	size_t size;
466 	size_t i;
467 
468 	uint64_t *p2pfn;
469 
470 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
471 
472 	if (sect == NULL) {
473 		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
474 		return (0);
475 	}
476 
477 	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
478 		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
479 		return (0);
480 	}
481 
482 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
483 		if (p2pfn[i] > xkb->xkb_max_pfn)
484 			xkb->xkb_max_pfn = p2pfn[i];
485 	}
486 
487 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
488 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
489 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
490 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
491 
492 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
493 		xkb->xkb_p2m[i] = PFN_INVALID;
494 		xe->xe_off[i] = (size_t)-1;
495 	}
496 
497 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
498 		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
499 		xe->xe_off[p2pfn[i]] = i;
500 	}
501 
502 	return (1);
503 }
504 
505 /*
506  * Return the MFN of the top-level page table for the given as.
507  */
508 static mfn_t
509 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
510 {
511 	uintptr_t asp = (uintptr_t)as;
512 	uintptr_t hatp;
513 	uintptr_t htablep;
514 	uintptr_t pfn;
515 
516 	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
517 		return (MFN_INVALID);
518 	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
519 	    &htablep))
520 		return (MFN_INVALID);
521 	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
522 	    &pfn))
523 		return (MFN_INVALID);
524 
525 	if (pfn > xkb->xkb_max_pfn)
526 		return (MFN_INVALID);
527 
528 	return (xkb->xkb_p2m[pfn]);
529 }
530 
531 static mfn_t
532 xkb_cr3_to_pfn(xkb_t *xkb)
533 {
534 	uint64_t cr3 = xkb->xkb_vcpus[0].ctrlreg[3];
535 	if (xkb->xkb_is_hvm)
536 		return (cr3 >> PAGE_SHIFT);
537 	return (xen_cr3_to_pfn(cr3));
538 }
539 
540 static ssize_t
541 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
542     void *buf, size_t size)
543 {
544 	size_t left = size;
545 	int windowed = (xkb->xkb_pages == NULL);
546 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
547 
548 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
549 		return (-1);
550 
551 	while (left) {
552 		uint64_t pos = addr + (size - left);
553 		char *outpos = (char *)buf + (size - left);
554 		size_t pageoff = PAGE_OFFSET(pos);
555 		size_t sz = MIN(left, PAGE_SIZE - pageoff);
556 		mfn_t mfn;
557 
558 		if (!phys) {
559 			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
560 			if (mfn == MFN_INVALID)
561 				return (-1);
562 		} else {
563 			xen_pfn_t pfn = pos >> PAGE_SHIFT;
564 			if (pfn > xkb->xkb_max_pfn)
565 				return (-1);
566 			mfn = xkb->xkb_p2m[pfn];
567 			if (mfn == MFN_INVALID)
568 				return (-1);
569 		}
570 
571 		/*
572 		 * If we're windowed then pread() is much faster.
573 		 */
574 		if (windowed) {
575 			offset_t off = xkb_mfn_to_offset(xkb, mfn);
576 			int ret;
577 
578 			if (off == ~1ULL)
579 				return (-1);
580 
581 			off += pageoff;
582 
583 			ret = pread64(xkb->xkb_fd, outpos, sz, off);
584 			if (ret == -1)
585 				return (-1);
586 			if (ret != sz)
587 				return ((size - left) + ret);
588 
589 			left -= ret;
590 		} else {
591 			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
592 				return (-1);
593 
594 			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
595 
596 			left -= sz;
597 		}
598 	}
599 
600 	return (size);
601 }
602 
603 static ssize_t
604 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
605 {
606 	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
607 }
608 
609 static ssize_t
610 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
611 {
612 	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
613 }
614 
615 static ssize_t
616 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
617 {
618 	return (xkb_aread(xkb, addr, buf, size, NULL));
619 }
620 
621 static int
622 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
623 {
624 	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
625 	    sizeof (uintptr_t))
626 		return (0);
627 	return (1);
628 }
629 
630 static char *
631 xkb_readstr(xkb_t *xkb, uintptr_t addr)
632 {
633 	char *str = mdb_alloc(1024, UM_SLEEP);
634 	size_t i;
635 
636 	for (i = 0; i < 1024; i++) {
637 		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
638 			mdb_free(str, 1024);
639 			return (NULL);
640 		}
641 
642 		if (str[i] == '\0')
643 			break;
644 	}
645 
646 	if (i == 1024) {
647 		mdb_free(str, 1024);
648 		return (NULL);
649 	}
650 
651 	return (str);
652 }
653 
654 static offset_t
655 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
656 {
657 	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
658 		return (-1ULL);
659 
660 	if (xkb->xkb_type == XKB_FORMAT_CORE)
661 		return (PAGE_SIZE * pfn);
662 
663 	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
664 }
665 
666 static offset_t
667 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
668 {
669 	xen_pfn_t pfn;
670 
671 	if (mfn > xkb->xkb_max_mfn)
672 		return (-1ULL);
673 
674 	pfn = xkb->xkb_m2p[mfn];
675 
676 	if (pfn == PFN_INVALID)
677 		return (-1ULL);
678 
679 	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
680 }
681 
682 static char *
683 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
684 {
685 	int windowed = (xkb->xkb_pages == NULL);
686 	offset_t off;
687 
688 	if (mm->mm_mfn == mfn)
689 		return (mm->mm_map);
690 
691 	mm->mm_mfn = mfn;
692 
693 	if (windowed) {
694 		if (mm->mm_map != (char *)MAP_FAILED) {
695 			(void) munmap(mm->mm_map, PAGE_SIZE);
696 			mm->mm_map = (void *)MAP_FAILED;
697 		}
698 
699 		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
700 			return (NULL);
701 
702 		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
703 		    xkb->xkb_fd, off);
704 
705 		if (mm->mm_map == (char *)MAP_FAILED)
706 			return (NULL);
707 	} else {
708 		xen_pfn_t pfn;
709 
710 		mm->mm_map = NULL;
711 
712 		if (mfn > xkb->xkb_max_mfn)
713 			return (NULL);
714 
715 		pfn = xkb->xkb_m2p[mfn];
716 
717 		if (pfn == PFN_INVALID)
718 			return (NULL);
719 
720 		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
721 	}
722 
723 	return (mm->mm_map);
724 }
725 
726 static uint64_t
727 xkb_get_pte(mmu_info_t *mmu, char *ptep)
728 {
729 	uint64_t pte = 0;
730 
731 	if (mmu->mi_ptesize == 8) {
732 		/* LINTED - alignment */
733 		pte = *((uint64_t *)ptep);
734 	} else {
735 		/* LINTED - alignment */
736 		pte = *((uint32_t *)ptep);
737 	}
738 
739 	return (pte);
740 }
741 
742 static mfn_t
743 xkb_pte_to_base_mfn(uint64_t pte, size_t level)
744 {
745 	if (PTE_IS_LGPG(pte, level)) {
746 		pte &= PT_PADDR_LGPG;
747 	} else {
748 		pte &= PT_PADDR;
749 	}
750 
751 	return (pte >> PAGE_SHIFT);
752 }
753 
754 /*
755  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
756  * table.
757  */
758 static mfn_t
759 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
760 {
761 	mmu_info_t *mmu = &xkb->xkb_mmu;
762 	uint64_t pte;
763 	size_t level;
764 
765 	for (level = mmu->mi_max; ; --level) {
766 		size_t entry;
767 
768 		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
769 			return (MFN_INVALID);
770 
771 		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
772 
773 		pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
774 		    entry * mmu->mi_ptesize);
775 
776 		if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
777 			return (MFN_INVALID);
778 
779 		if (level == 0)
780 			break;
781 
782 		/*
783 		 * Currently 'mfn' refers to the base MFN of the
784 		 * large-page mapping.  Add on the 4K-sized index into
785 		 * the large-page mapping to get the right MFN within
786 		 * the mapping.
787 		 */
788 		if (PTE_IS_LGPG(pte, level)) {
789 			mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
790 			    PAGE_SHIFT;
791 			break;
792 		}
793 	}
794 
795 	return (mfn);
796 }
797 
798 static int
799 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
800     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
801 {
802 	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
803 	    sizeof (struct module))
804 		return (0);
805 
806 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
807 	    offsetof(Shdr, sh_addr), sym_addr))
808 		return (0);
809 
810 	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
811 	    offsetof(Shdr, sh_addr), str_addr))
812 		return (0);
813 
814 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
815 	    offsetof(Shdr, sh_size), sym_count))
816 		return (0);
817 	*sym_count /= sizeof (Sym);
818 
819 	return (1);
820 }
821 
822 static int
823 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
824     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
825 {
826 	size_t i;
827 
828 	for (i = 0; i < sym_count; i++) {
829 		Sym sym;
830 		char *name;
831 		size_t sz;
832 		int type = XKB_WALK_GLOBAL;
833 
834 		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
835 		    sizeof (sym)) != sizeof (sym))
836 			return (0);
837 
838 		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
839 			type = XKB_WALK_LOCAL;
840 
841 		name = xkb_readstr(xkb, str_addr + sym.st_name);
842 
843 		sym.st_shndx = SHN_ABS;
844 		sym.st_name = sizes[XKB_WALK_STR];
845 
846 		sizes[type] += sizeof (sym);
847 		sz = strlen(name) + 1;
848 		sizes[XKB_WALK_STR] += sz;
849 
850 		if (buf != NULL) {
851 			if (types & type) {
852 				bcopy(&sym, *buf, sizeof (sym));
853 				*buf += sizeof (sym);
854 			}
855 			if (types & XKB_WALK_STR) {
856 				bcopy(name, *buf, sz);
857 				*buf += sz;
858 			}
859 		}
860 
861 		mdb_free(name, 1024);
862 	}
863 
864 	return (1);
865 }
866 
867 static int
868 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
869     size_t *sizes, int types)
870 {
871 	uintptr_t modctl = modhead;
872 	uintptr_t modulep;
873 	struct module module;
874 	uintptr_t sym_count;
875 	uintptr_t sym_addr;
876 	uintptr_t str_addr;
877 	size_t max_iter = 500;
878 
879 	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
880 
881 	/*
882 	 * empty first symbol
883 	 */
884 	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
885 	sizes[XKB_WALK_STR] += 1;
886 
887 	if (buf != NULL) {
888 		if (types & XKB_WALK_LOCAL) {
889 			Sym tmp;
890 			bzero(&tmp, sizeof (tmp));
891 			bcopy(&tmp, *buf, sizeof (tmp));
892 			*buf += sizeof (tmp);
893 		}
894 		if (types & XKB_WALK_STR) {
895 			**buf = '\0';
896 			(*buf)++;
897 		}
898 	}
899 
900 	for (;;) {
901 		if (!xkb_read_word(xkb,
902 		    modctl + offsetof(struct modctl, mod_mp), &modulep))
903 			return (0);
904 
905 		if (modulep == NULL)
906 			goto next;
907 
908 		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
909 		    &sym_count, &str_addr))
910 			return (0);
911 
912 		if ((module.flags & KOBJ_NOKSYMS))
913 			goto next;
914 
915 		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
916 		    str_addr, sym_count))
917 			return (0);
918 
919 next:
920 		if (!xkb_read_word(xkb,
921 		    modctl + offsetof(struct modctl, mod_next), &modctl))
922 			return (0);
923 
924 		if (modctl == modhead)
925 			break;
926 		/*
927 		 * Try and prevent us looping forever if we have a broken list.
928 		 */
929 		if (--max_iter == 0)
930 			break;
931 	}
932 
933 	return (1);
934 }
935 
936 /*
937  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
938  * file for hypervisor images, we fabricate one here using code similar
939  * to that of /dev/ksyms.
940  */
941 static int
942 xkb_build_ksyms(xkb_t *xkb)
943 {
944 	debug_info_t *info = &xkb->xkb_info;
945 	size_t sizes[XKB_WALK_STR + 1];
946 	xkb_namelist_t *hdr;
947 	char *buf;
948 	struct modctl modules;
949 	uintptr_t module;
950 	Shdr *shp;
951 
952 	if (xkb_read(xkb, info->di_modules, &modules,
953 	    sizeof (struct modctl)) != sizeof (struct modctl))
954 		return (0);
955 
956 	module = (uintptr_t)modules.mod_mp;
957 
958 	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
959 	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
960 		return (0);
961 
962 	xkb->xkb_namesize = sizeof (xkb_namelist_t);
963 	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
964 	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
965 	xkb->xkb_namesize += sizes[XKB_WALK_STR];
966 
967 	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
968 	    == NULL)
969 		return (0);
970 
971 	/* LINTED - alignment */
972 	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
973 
974 	if (xkb_read(xkb, module + offsetof(struct module, hdr),
975 	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
976 		return (0);
977 
978 	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
979 	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
980 	hdr->kh_elf_hdr.e_phnum = 2;
981 	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
982 	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
983 
984 	hdr->kh_text_phdr.p_type = PT_LOAD;
985 	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
986 	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
987 	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
988 
989 	hdr->kh_data_phdr.p_type = PT_LOAD;
990 	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
991 	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
992 	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
993 
994 	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
995 	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
996 	shp->sh_type = SHT_SYMTAB;
997 	shp->sh_offset = sizeof (xkb_namelist_t);
998 	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
999 	shp->sh_link = XKB_SHDR_STRTAB;
1000 	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
1001 	shp->sh_addralign = sizeof (Addr);
1002 	shp->sh_entsize = sizeof (Sym);
1003 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1004 
1005 
1006 	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
1007 	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
1008 	shp->sh_type = SHT_STRTAB;
1009 	shp->sh_offset = sizeof (xkb_namelist_t) +
1010 	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1011 	shp->sh_size = sizes[XKB_WALK_STR];
1012 	shp->sh_addralign = 1;
1013 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1014 
1015 
1016 	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
1017 	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
1018 	shp->sh_type = SHT_STRTAB;
1019 	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
1020 	shp->sh_size = sizeof (xkb_shstrtab);
1021 	shp->sh_addralign = 1;
1022 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1023 
1024 	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
1025 
1026 	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
1027 
1028 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1029 	    XKB_WALK_LOCAL))
1030 		return (0);
1031 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1032 	    XKB_WALK_GLOBAL))
1033 		return (0);
1034 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1035 	    XKB_WALK_STR))
1036 		return (0);
1037 
1038 	return (1);
1039 }
1040 
1041 static xkb_t *
1042 xkb_open_core(xkb_t *xkb)
1043 {
1044 	xkb_core_t *xc = &xkb->xkb_core;
1045 	size_t sz;
1046 
1047 	xkb->xkb_type = XKB_FORMAT_CORE;
1048 
1049 	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
1050 		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
1051 
1052 	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
1053 	    sizeof (xc->xc_hdr))
1054 		return (xkb_fail(xkb, "invalid dump file"));
1055 
1056 	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
1057 		return (xkb_fail(xkb, "cannot process HVM images"));
1058 
1059 	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
1060 		return (xkb_fail(xkb, "invalid magic %d",
1061 		    xc->xc_hdr.xch_magic));
1062 	}
1063 
1064 	/*
1065 	 * With FORMAT_CORE, all pages are in the dump (non-existing
1066 	 * ones are zeroed out).
1067 	 */
1068 	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
1069 	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
1070 	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
1071 	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
1072 
1073 	sz = xkb->xkb_nr_vcpus * sizeof (*xkb->xkb_vcpus);
1074 
1075 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1076 
1077 	if (pread64(xkb->xkb_fd, xkb->xkb_vcpus, sz,
1078 	    xc->xc_hdr.xch_ctxt_offset) != sz)
1079 		return (xkb_fail(xkb, "cannot read VCPU contexts"));
1080 
1081 	/*
1082 	 * Try to map all the data pages. If we can't, fall back to the
1083 	 * window/pread() approach, which is significantly slower.
1084 	 */
1085 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1086 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1087 
1088 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1089 		xkb->xkb_pages = NULL;
1090 
1091 	/*
1092 	 * We'd like to adapt for correctness' sake, but we have no way of
1093 	 * detecting a PAE guest, since cr4 writes are disallowed.
1094 	 */
1095 	xkb->xkb_is_pae = 1;
1096 
1097 	if (!xkb_map_p2m(xkb))
1098 		return (NULL);
1099 
1100 	return (xkb);
1101 }
1102 
1103 static xkb_t *
1104 xkb_open_elf(xkb_t *xkb)
1105 {
1106 	xkb_elf_t *xe = &xkb->xkb_elf;
1107 	mdb_gelf_sect_t *sect;
1108 	char *notes;
1109 	char *pos;
1110 	mdb_io_t *io;
1111 
1112 	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1113 	    O_RDONLY, 0)) == NULL)
1114 		return (xkb_fail(xkb, "failed to open"));
1115 
1116 	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1117 
1118 	if (xe->xe_gelf == NULL) {
1119 		mdb_io_destroy(io);
1120 		return (xkb);
1121 	}
1122 
1123 	xkb->xkb_fd = mdb_fdio_fileno(io);
1124 
1125 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1126 
1127 	if (sect == NULL)
1128 		return (xkb);
1129 
1130 	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1131 		return (xkb);
1132 
1133 	/*
1134 	 * Now we know this is indeed a hypervisor core dump, even if
1135 	 * it's corrupted.
1136 	 */
1137 	xkb->xkb_type = XKB_FORMAT_ELF;
1138 
1139 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1140 		/* LINTED - alignment */
1141 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1142 		uint64_t vers;
1143 		char *desc;
1144 		char *name;
1145 
1146 		name = pos + sizeof (*nhdr);
1147 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1148 
1149 		pos = desc + nhdr->n_descsz;
1150 
1151 		switch (nhdr->n_type) {
1152 		case XEN_ELFNOTE_DUMPCORE_NONE:
1153 			break;
1154 
1155 		case XEN_ELFNOTE_DUMPCORE_HEADER:
1156 			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1157 				return (xkb_fail(xkb, "invalid ELF note "
1158 				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1159 			}
1160 
1161 			bcopy(desc, &xe->xe_hdr,
1162 			    sizeof (struct xc_elf_header));
1163 			break;
1164 
1165 		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1166 			if (nhdr->n_descsz != sizeof (struct xc_elf_version)) {
1167 				return (xkb_fail(xkb, "invalid ELF note "
1168 				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1169 			}
1170 
1171 			bcopy(desc, &xe->xe_version,
1172 			    sizeof (struct xc_elf_version));
1173 			break;
1174 
1175 		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1176 			/* LINTED - alignment */
1177 			vers = *((uint64_t *)desc);
1178 			if ((vers >> 32) != 0) {
1179 				return (xkb_fail(xkb, "unknown major "
1180 				    "version %d (expected 0)\n",
1181 				    (int)(vers >> 32)));
1182 			}
1183 
1184 			if ((vers & 0xffffffff) != 1) {
1185 				mdb_warn("unexpected dump minor number "
1186 				    "version %d (expected 1)\n",
1187 				    (int)(vers & 0xffffffff));
1188 			}
1189 			break;
1190 
1191 		default:
1192 			mdb_warn("unknown ELF note %d(%s)\n",
1193 			    nhdr->n_type, name);
1194 			break;
1195 		}
1196 	}
1197 
1198 	xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
1199 
1200 	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
1201 	    xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
1202 		return (xkb_fail(xkb, "invalid magic %d",
1203 		    xe->xe_hdr.xeh_magic));
1204 	}
1205 
1206 	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1207 	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1208 	    "x86_32p") != NULL);
1209 
1210 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1211 
1212 	if (sect == NULL)
1213 		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1214 
1215 	if (sect->gs_shdr.sh_entsize != sizeof (vcpu_guest_context_t))
1216 		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1217 
1218 	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1219 
1220 	if ((xkb->xkb_vcpus = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1221 		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1222 
1223 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1224 
1225 	if (sect == NULL)
1226 		return (xkb_fail(xkb, "cannot find section .xen_pages"));
1227 
1228 	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1229 		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1230 
1231 	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1232 		return (xkb_fail(xkb, "invalid section .xen_pages"));
1233 
1234 	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1235 
1236 	/*
1237 	 * Try to map all the data pages. If we can't, fall back to the
1238 	 * window/pread() approach, which is significantly slower.
1239 	 */
1240 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1241 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1242 
1243 	if (xkb->xkb_pages == (char *)MAP_FAILED)
1244 		xkb->xkb_pages = NULL;
1245 
1246 	if (xkb->xkb_is_hvm) {
1247 		if (!xkb_build_fake_p2m(xkb))
1248 			return (NULL);
1249 	} else {
1250 		if (!xkb_build_p2m(xkb))
1251 			return (NULL);
1252 	}
1253 
1254 	return (xkb);
1255 }
1256 
1257 static void
1258 xkb_init_mmu(xkb_t *xkb)
1259 {
1260 #if defined(__amd64)
1261 	xkb->xkb_mmu.mi_max = 3;
1262 	xkb->xkb_mmu.mi_shift[0] = 12;
1263 	xkb->xkb_mmu.mi_shift[1] = 21;
1264 	xkb->xkb_mmu.mi_shift[2] = 30;
1265 	xkb->xkb_mmu.mi_shift[3] = 39;
1266 	xkb->xkb_mmu.mi_ptes = 512;
1267 	xkb->xkb_mmu.mi_ptesize = 8;
1268 #elif defined(__i386)
1269 	if (xkb->xkb_is_pae) {
1270 		xkb->xkb_mmu.mi_max = 2;
1271 		xkb->xkb_mmu.mi_shift[0] = 12;
1272 		xkb->xkb_mmu.mi_shift[1] = 21;
1273 		xkb->xkb_mmu.mi_shift[2] = 30;
1274 		xkb->xkb_mmu.mi_ptes = 512;
1275 		xkb->xkb_mmu.mi_ptesize = 8;
1276 	} else {
1277 		xkb->xkb_mmu.mi_max = 1;
1278 		xkb->xkb_mmu.mi_shift[0] = 12;
1279 		xkb->xkb_mmu.mi_shift[1] = 22;
1280 		xkb->xkb_mmu.mi_ptes = 1024;
1281 		xkb->xkb_mmu.mi_ptesize = 4;
1282 	}
1283 #endif
1284 }
1285 
1286 /*ARGSUSED*/
1287 xkb_t *
1288 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1289     int flag, const char *err)
1290 {
1291 	uintptr_t debug_info = DEBUG_INFO;
1292 	struct stat64 corestat;
1293 	xkb_t *xkb = NULL;
1294 	size_t i;
1295 
1296 	if (stat64(corefile, &corestat) == -1)
1297 		return (xkb_fail(xkb, "cannot stat %s", corefile));
1298 
1299 	if (flag != O_RDONLY)
1300 		return (xkb_fail(xkb, "invalid open flags"));
1301 
1302 	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1303 
1304 	for (i = 0; i < 4; i++) {
1305 		xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
1306 		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1307 	}
1308 
1309 	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1310 	xkb->xkb_map.mm_mfn = MFN_INVALID;
1311 	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1312 	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1313 	xkb->xkb_fd = -1;
1314 
1315 	xkb->xkb_path = strdup(corefile);
1316 
1317 	if ((xkb = xkb_open_elf(xkb)) == NULL)
1318 		return (NULL);
1319 
1320 	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1321 		if (!xkb_open_core(xkb))
1322 			return (NULL);
1323 	}
1324 
1325 	xkb_init_mmu(xkb);
1326 
1327 	if (!xkb_build_m2p(xkb))
1328 		return (NULL);
1329 
1330 	if (xkb->xkb_is_hvm)
1331 		debug_info = DEBUG_INFO_HVM;
1332 
1333 	if (xkb_read(xkb, debug_info, &xkb->xkb_info,
1334 	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1335 		return (xkb_fail(xkb, "cannot read debug_info"));
1336 
1337 	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1338 		return (xkb_fail(xkb, "invalid debug info magic %d",
1339 		    xkb->xkb_info.di_magic));
1340 	}
1341 
1342 	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1343 		return (xkb_fail(xkb, "unknown debug info version %d",
1344 		    xkb->xkb_info.di_version));
1345 	}
1346 
1347 	if (!xkb_build_ksyms(xkb))
1348 		return (xkb_fail(xkb, "cannot construct namelist"));
1349 
1350 	return (xkb);
1351 }
1352 
1353 int
1354 xkb_close(xkb_t *xkb)
1355 {
1356 	size_t i;
1357 
1358 	if (xkb == NULL)
1359 		return (0);
1360 
1361 	if (xkb->xkb_m2p != NULL) {
1362 		mdb_free(xkb->xkb_m2p,
1363 		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1364 	}
1365 
1366 	if (xkb->xkb_pages != NULL) {
1367 		(void) munmap((void *)xkb->xkb_pages,
1368 		    PAGE_SIZE * xkb->xkb_nr_pages);
1369 	} else {
1370 		for (i = 0; i < 4; i++) {
1371 			char *addr = xkb->xkb_pt_map[i].mm_map;
1372 			if (addr != (char *)MAP_FAILED)
1373 				(void) munmap((void *)addr, PAGE_SIZE);
1374 		}
1375 		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1376 			(void) munmap((void *)xkb->xkb_map.mm_map,
1377 			    PAGE_SIZE);
1378 		}
1379 	}
1380 
1381 	if (xkb->xkb_namelist != NULL)
1382 		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1383 
1384 	if (xkb->xkb_type == XKB_FORMAT_ELF) {
1385 		xkb_elf_t *xe = &xkb->xkb_elf;
1386 		size_t sz;
1387 
1388 		if (xe->xe_gelf != NULL)
1389 			mdb_gelf_destroy(xe->xe_gelf);
1390 
1391 		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1392 
1393 		if (xkb->xkb_p2m != NULL)
1394 			mdb_free(xkb->xkb_p2m, sz);
1395 
1396 		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1397 
1398 		if (xe->xe_off != NULL)
1399 			mdb_free(xe->xe_off, sz);
1400 	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1401 		xkb_core_t *xc = &xkb->xkb_core;
1402 		size_t sz;
1403 
1404 		if (xkb->xkb_fd != -1)
1405 			(void) close(xkb->xkb_fd);
1406 
1407 		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1408 		sz = PAGE_MASK(sz);
1409 
1410 		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1411 			(void) munmap(xc->xc_p2m_buf, sz);
1412 
1413 		if (xkb->xkb_vcpus != NULL) {
1414 			sz = sizeof (struct vcpu_guest_context) *
1415 			    xkb->xkb_nr_vcpus;
1416 			mdb_free(xkb->xkb_vcpus, sz);
1417 		}
1418 	}
1419 
1420 	free(xkb->xkb_path);
1421 
1422 	mdb_free(xkb, sizeof (*xkb));
1423 	return (0);
1424 }
1425 
1426 /*ARGSUSED*/
1427 static mdb_io_t *
1428 xkb_sym_io(xkb_t *xkb, const char *symfile)
1429 {
1430 	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1431 
1432 	if (io == NULL)
1433 		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1434 
1435 	return (io);
1436 }
1437 
1438 uint64_t
1439 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1440 {
1441 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
1442 	mfn_t mfn;
1443 
1444 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1445 		return (-1ULL);
1446 
1447 	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1448 
1449 	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1450 		return (-1ULL);
1451 
1452 	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1453 	    | PAGE_OFFSET(addr));
1454 }
1455 
1456 static int
1457 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1458 {
1459 	struct vcpu_guest_context *vcpu;
1460 	struct cpu_user_regs *ur;
1461 	struct regs *regs;
1462 
1463 	if (cpu >= xkb->xkb_nr_vcpus) {
1464 		errno = EINVAL;
1465 		return (-1);
1466 	}
1467 
1468 	bzero(mregs, sizeof (*mregs));
1469 
1470 	vcpu = &xkb->xkb_vcpus[cpu];
1471 	ur = &vcpu->user_regs;
1472 	regs = &mregs->pm_gregs;
1473 
1474 	regs->r_ss = ur->ss;
1475 	regs->r_cs = ur->cs;
1476 	regs->r_ds = ur->ds;
1477 	regs->r_es = ur->es;
1478 	regs->r_fs = ur->fs;
1479 	regs->r_gs = ur->gs;
1480 	regs->r_trapno = ur->entry_vector;
1481 	regs->r_err = ur->error_code;
1482 #ifdef __amd64
1483 	regs->r_savfp = ur->rbp;
1484 	regs->r_savpc = ur->rip;
1485 	regs->r_rdi = ur->rdi;
1486 	regs->r_rsi = ur->rsi;
1487 	regs->r_rdx = ur->rdx;
1488 	regs->r_rcx = ur->rcx;
1489 	regs->r_r8 = ur->r8;
1490 	regs->r_r9 = ur->r9;
1491 	regs->r_rax = ur->rax;
1492 	regs->r_rbx = ur->rbx;
1493 	regs->r_rbp = ur->rbp;
1494 	regs->r_r10 = ur->r10;
1495 	regs->r_r11 = ur->r11;
1496 	regs->r_r12 = ur->r12;
1497 	regs->r_r13 = ur->r13;
1498 	regs->r_r14 = ur->r14;
1499 	regs->r_r15 = ur->r15;
1500 	regs->r_rip = ur->rip;
1501 	regs->r_rfl = ur->rflags;
1502 	regs->r_rsp = ur->rsp;
1503 #else
1504 	regs->r_savfp = ur->ebp;
1505 	regs->r_savpc = ur->eip;
1506 	regs->r_edi = ur->edi;
1507 	regs->r_esi = ur->esi;
1508 	regs->r_ebp = ur->ebp;
1509 	regs->r_esp = ur->esp;
1510 	regs->r_ebx = ur->ebx;
1511 	regs->r_edx = ur->edx;
1512 	regs->r_ecx = ur->ecx;
1513 	regs->r_eax = ur->eax;
1514 	regs->r_eip = ur->eip;
1515 	regs->r_efl = ur->eflags;
1516 	regs->r_uesp = 0;
1517 #endif
1518 
1519 	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1520 	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1521 
1522 	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1523 
1524 	return (0);
1525 }
1526 
1527 static mdb_kb_ops_t xpv_kb_ops = {
1528 	.kb_open = (void *(*)())xkb_open,
1529 	.kb_close = (int (*)())xkb_close,
1530 	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1531 	.kb_kread = (ssize_t (*)())xkb_read,
1532 	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1533 	.kb_aread = (ssize_t (*)())xkb_aread,
1534 	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1535 	.kb_pread = (ssize_t (*)())xkb_pread,
1536 	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1537 	.kb_vtop = (uint64_t (*)())xkb_vtop,
1538 	.kb_getmregs = (int (*)())xkb_getmregs
1539 };
1540 
1541 mdb_kb_ops_t *
1542 mdb_kb_ops(void)
1543 {
1544 	return (&xpv_kb_ops);
1545 }
1546 
1547 static const mdb_dcmd_t dcmds[] = { NULL, };
1548 static const mdb_walker_t walkers[] = { NULL, };
1549 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1550 
1551 const mdb_modinfo_t *
1552 _mdb_init(void)
1553 {
1554 	return (&modinfo);
1555 }
1556 
1557 void
1558 _mdb_fini(void)
1559 {
1560 }
1561