1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2018 Joyent, Inc.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ctf.h>
28 #include <sys/cpuvar.h>
29 #include <sys/systm.h>
30 #include <sys/traptrace.h>
31 #include <sys/x_call.h>
32 #include <sys/xc_levels.h>
33 #include <sys/avintr.h>
34 #include <sys/systm.h>
35 #include <sys/trap.h>
36 #include <sys/mutex.h>
37 #include <sys/mutex_impl.h>
38 #include "i86mmu.h"
39 #include "unix_sup.h"
40 #include <sys/apix.h>
41 #include <sys/x86_archext.h>
42 #include <sys/bitmap.h>
43 #include <sys/controlregs.h>
44 
45 #define	TT_HDLR_WIDTH	17
46 
47 
48 /* apix only */
49 static apix_impl_t *d_apixs[NCPU];
50 static int use_apix = 0;
51 
52 static int
53 ttrace_ttr_size_check(void)
54 {
55 	mdb_ctf_id_t ttrtid;
56 	ssize_t ttr_size;
57 
58 	if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
59 	    mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
60 		mdb_warn("failed to determine size of trap_trace_rec_t; "
61 		    "non-TRAPTRACE kernel?\n");
62 		return (0);
63 	}
64 
65 	if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
66 	    sizeof (trap_trace_rec_t)) {
67 		/*
68 		 * On Intel machines, this will happen when TTR_STACK_DEPTH
69 		 * is changed.  This code could be smarter, and could
70 		 * dynamically adapt to different depths, but not until a
71 		 * need for such adaptation is demonstrated.
72 		 */
73 		mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
74 		    "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
75 		return (0);
76 	}
77 
78 	return (1);
79 }
80 
81 int
82 ttrace_walk_init(mdb_walk_state_t *wsp)
83 {
84 	trap_trace_ctl_t *ttcp;
85 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
86 	int i;
87 
88 	if (!ttrace_ttr_size_check())
89 		return (WALK_ERR);
90 
91 	ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
92 
93 	if (wsp->walk_addr != NULL) {
94 		mdb_warn("ttrace only supports global walks\n");
95 		return (WALK_ERR);
96 	}
97 
98 	if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
99 		mdb_warn("symbol 'trap_trace_ctl' not found; "
100 		    "non-TRAPTRACE kernel?\n");
101 		mdb_free(ttcp, ttc_size);
102 		return (WALK_ERR);
103 	}
104 
105 	/*
106 	 * We'll poach the ttc_current pointer (which isn't used for
107 	 * anything) to store a pointer to our current TRAPTRACE record.
108 	 * This allows us to only keep the array of trap_trace_ctl structures
109 	 * as our walker state (ttc_current may be the only kernel data
110 	 * structure member added exclusively to make writing the mdb walker
111 	 * a little easier).
112 	 */
113 	for (i = 0; i < NCPU; i++) {
114 		trap_trace_ctl_t *ttc = &ttcp[i];
115 
116 		if (ttc->ttc_first == NULL)
117 			continue;
118 
119 		/*
120 		 * Assign ttc_current to be the last completed record.
121 		 * Note that the error checking (i.e. in the ttc_next ==
122 		 * ttc_first case) is performed in the step function.
123 		 */
124 		ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
125 	}
126 
127 	wsp->walk_data = ttcp;
128 	return (WALK_NEXT);
129 }
130 
131 int
132 ttrace_walk_step(mdb_walk_state_t *wsp)
133 {
134 	trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
135 	trap_trace_rec_t rec;
136 	int rval, i, recsize = sizeof (trap_trace_rec_t);
137 	hrtime_t latest = 0;
138 
139 	/*
140 	 * Loop through the CPUs, looking for the latest trap trace record
141 	 * (we want to walk through the trap trace records in reverse
142 	 * chronological order).
143 	 */
144 	for (i = 0; i < NCPU; i++) {
145 		ttc = &ttcp[i];
146 
147 		if (ttc->ttc_current == NULL)
148 			continue;
149 
150 		if (ttc->ttc_current < ttc->ttc_first)
151 			ttc->ttc_current = ttc->ttc_limit - recsize;
152 
153 		if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
154 			mdb_warn("couldn't read rec at %p", ttc->ttc_current);
155 			return (WALK_ERR);
156 		}
157 
158 		if (rec.ttr_stamp > latest) {
159 			latest = rec.ttr_stamp;
160 			latest_ttc = ttc;
161 		}
162 	}
163 
164 	if (latest == 0)
165 		return (WALK_DONE);
166 
167 	ttc = latest_ttc;
168 
169 	if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
170 		mdb_warn("couldn't read rec at %p", ttc->ttc_current);
171 		return (WALK_ERR);
172 	}
173 
174 	rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
175 
176 	if (ttc->ttc_current == ttc->ttc_next)
177 		ttc->ttc_current = NULL;
178 	else
179 		ttc->ttc_current -= sizeof (trap_trace_rec_t);
180 
181 	return (rval);
182 }
183 
184 void
185 ttrace_walk_fini(mdb_walk_state_t *wsp)
186 {
187 	mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
188 }
189 
190 static int
191 ttrace_syscall(trap_trace_rec_t *rec)
192 {
193 	GElf_Sym sym;
194 	int sysnum = rec->ttr_sysnum;
195 	uintptr_t addr;
196 	struct sysent sys;
197 
198 	mdb_printf("%-3x", sysnum);
199 
200 	if (rec->ttr_sysnum > NSYSCALL) {
201 		mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
202 		return (0);
203 	}
204 
205 	if (mdb_lookup_by_name("sysent", &sym) == -1) {
206 		mdb_warn("\ncouldn't find 'sysent'");
207 		return (-1);
208 	}
209 
210 	addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
211 
212 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
213 		mdb_warn("\nsysnum %d out-of-range\n", sysnum);
214 		return (-1);
215 	}
216 
217 	if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
218 		mdb_warn("\nfailed to read sysent at %p", addr);
219 		return (-1);
220 	}
221 
222 	mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
223 
224 	return (0);
225 }
226 
227 static int
228 ttrace_interrupt(trap_trace_rec_t *rec)
229 {
230 	GElf_Sym sym;
231 	uintptr_t addr;
232 	struct av_head hd;
233 	struct autovec av;
234 
235 	switch (rec->ttr_regs.r_trapno) {
236 	case T_SOFTINT:
237 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
238 		return (0);
239 	default:
240 		break;
241 	}
242 
243 	mdb_printf("%-3x ", rec->ttr_vector);
244 
245 	if (mdb_lookup_by_name("autovect", &sym) == -1) {
246 		mdb_warn("\ncouldn't find 'autovect'");
247 		return (-1);
248 	}
249 
250 	addr = (uintptr_t)sym.st_value +
251 	    rec->ttr_vector * sizeof (struct av_head);
252 
253 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
254 		mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
255 		return (-1);
256 	}
257 
258 	if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
259 		mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
260 		return (-1);
261 	}
262 
263 	if (hd.avh_link == NULL) {
264 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
265 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
266 		else
267 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
268 	} else {
269 		if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
270 			mdb_warn("couldn't read autovec at %p",
271 			    (uintptr_t)hd.avh_link);
272 		}
273 
274 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
275 	}
276 
277 	return (0);
278 }
279 
280 static int
281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
282 {
283 	struct autovec av;
284 	apix_impl_t apix;
285 	apix_vector_t apix_vector;
286 
287 	switch (rec->ttr_regs.r_trapno) {
288 	case T_SOFTINT:
289 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
290 		return (0);
291 	default:
292 		break;
293 	}
294 
295 	mdb_printf("%-3x ", rec->ttr_vector);
296 
297 	/* Read the per CPU apix entry */
298 	if (mdb_vread(&apix, sizeof (apix_impl_t),
299 	    (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
300 		mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
301 		return (-1);
302 	}
303 	if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
304 	    (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
305 		mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
306 		return (-1);
307 	}
308 	if (apix_vector.v_share == 0) {
309 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
310 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
311 		else
312 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
313 	} else {
314 		if (mdb_vread(&av, sizeof (struct autovec),
315 		    (uintptr_t)(apix_vector.v_autovect)) == -1) {
316 			mdb_warn("couldn't read autovec at %p",
317 			    (uintptr_t)apix_vector.v_autovect);
318 		}
319 
320 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
321 	}
322 
323 	return (0);
324 }
325 
326 
327 static struct {
328 	int tt_trapno;
329 	char *tt_name;
330 } ttrace_traps[] = {
331 	{ T_ZERODIV,	"divide-error" },
332 	{ T_SGLSTP,	"debug-exception" },
333 	{ T_NMIFLT,	"nmi-interrupt" },
334 	{ T_BPTFLT,	"breakpoint" },
335 	{ T_OVFLW,	"into-overflow" },
336 	{ T_BOUNDFLT,	"bound-exceeded" },
337 	{ T_ILLINST,	"invalid-opcode" },
338 	{ T_NOEXTFLT,	"device-not-avail" },
339 	{ T_DBLFLT,	"double-fault" },
340 	{ T_EXTOVRFLT,	"segment-overrun" },
341 	{ T_TSSFLT,	"invalid-tss" },
342 	{ T_SEGFLT,	"segment-not-pres" },
343 	{ T_STKFLT,	"stack-fault" },
344 	{ T_GPFLT,	"general-protectn" },
345 	{ T_PGFLT,	"page-fault" },
346 	{ T_EXTERRFLT,	"error-fault" },
347 	{ T_ALIGNMENT,	"alignment-check" },
348 	{ T_MCE,	"machine-check" },
349 	{ T_SIMDFPE,	"sse-exception" },
350 
351 	{ T_DBGENTR,	"debug-enter" },
352 	{ T_FASTTRAP,	"fasttrap-0xd2" },
353 	{ T_SYSCALLINT,	"syscall-0x91" },
354 	{ T_DTRACE_RET,	"dtrace-ret" },
355 	{ T_SOFTINT,	"softint" },
356 	{ T_INTERRUPT,	"interrupt" },
357 	{ T_FAULT,	"fault" },
358 	{ T_AST,	"ast" },
359 	{ T_SYSCALL,	"syscall" },
360 
361 	{ 0,		NULL }
362 };
363 
364 static int
365 ttrace_trap(trap_trace_rec_t *rec)
366 {
367 	int i;
368 
369 	if (rec->ttr_regs.r_trapno == T_AST)
370 		mdb_printf("%-3s ", "-");
371 	else
372 		mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
373 
374 	for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
375 		if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
376 			break;
377 	}
378 
379 	if (ttrace_traps[i].tt_name == NULL)
380 		mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
381 	else
382 		mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
383 
384 	return (0);
385 }
386 
387 static void
388 ttrace_intr_detail(trap_trace_rec_t *rec)
389 {
390 	mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
391 	    rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
392 }
393 
394 static struct {
395 	uchar_t t_marker;
396 	char *t_name;
397 	int (*t_hdlr)(trap_trace_rec_t *);
398 } ttrace_hdlr[] = {
399 	{ TT_SYSCALL, "sysc", ttrace_syscall },
400 	{ TT_SYSENTER, "syse", ttrace_syscall },
401 	{ TT_SYSC, "asys", ttrace_syscall },
402 	{ TT_SYSC64, "sc64", ttrace_syscall },
403 	{ TT_INTERRUPT, "intr", ttrace_interrupt },
404 	{ TT_TRAP, "trap", ttrace_trap },
405 	{ TT_EVENT, "evnt", ttrace_trap },
406 	{ 0, NULL, NULL }
407 };
408 
409 typedef struct ttrace_dcmd {
410 	processorid_t ttd_cpu;
411 	uint_t ttd_extended;
412 	uintptr_t ttd_kthread;
413 	trap_trace_ctl_t ttd_ttc[NCPU];
414 } ttrace_dcmd_t;
415 
416 #if defined(__amd64)
417 
418 #define	DUMP(reg) #reg, regs->r_##reg
419 #define	THREEREGS	"         %3s: %16lx %3s: %16lx %3s: %16lx\n"
420 
421 static void
422 ttrace_dumpregs(trap_trace_rec_t *rec)
423 {
424 	struct regs *regs = &rec->ttr_regs;
425 
426 	mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
427 	mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
428 	mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
429 	mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
430 	mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
431 	mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
432 	mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
433 	mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
434 	mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
435 	mdb_printf("         %3s: %16lx %3s: %16lx\n",
436 	    "fsb", regs->__r_fsbase,
437 	    "gsb", regs->__r_gsbase);
438 	mdb_printf("\n");
439 }
440 
441 #else
442 
443 #define	DUMP(reg) #reg, regs->r_##reg
444 #define	FOURREGS	"         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
445 
446 static void
447 ttrace_dumpregs(trap_trace_rec_t *rec)
448 {
449 	struct regs *regs = &rec->ttr_regs;
450 
451 	mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
452 	mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
453 	mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
454 	mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
455 	    DUMP(pc), DUMP(cs));
456 	mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
457 	    "cr2", rec->ttr_cr2);
458 	mdb_printf("\n");
459 }
460 
461 #endif	/* __amd64 */
462 
463 int
464 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
465 {
466 	struct regs *regs = &rec->ttr_regs;
467 	processorid_t cpu = -1, i;
468 
469 	for (i = 0; i < NCPU; i++) {
470 		if (addr >= dcmd->ttd_ttc[i].ttc_first &&
471 		    addr < dcmd->ttd_ttc[i].ttc_limit) {
472 			cpu = i;
473 			break;
474 		}
475 	}
476 
477 	if (cpu == -1) {
478 		mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
479 		return (WALK_ERR);
480 	}
481 
482 	if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
483 		return (WALK_NEXT);
484 
485 	if (dcmd->ttd_kthread != 0 &&
486 	    dcmd->ttd_kthread != rec->ttr_curthread)
487 		return (WALK_NEXT);
488 
489 	mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
490 
491 	for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
492 		if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
493 			continue;
494 		mdb_printf("%4s ", ttrace_hdlr[i].t_name);
495 		if (ttrace_hdlr[i].t_hdlr(rec) == -1)
496 			return (WALK_ERR);
497 	}
498 
499 	mdb_printf(" %a\n", regs->r_pc);
500 
501 	if (dcmd->ttd_extended == FALSE)
502 		return (WALK_NEXT);
503 
504 	if (rec->ttr_marker == TT_INTERRUPT)
505 		ttrace_intr_detail(rec);
506 	else
507 		ttrace_dumpregs(rec);
508 
509 	if (rec->ttr_sdepth > 0) {
510 		for (i = 0; i < rec->ttr_sdepth; i++) {
511 			if (i >= TTR_STACK_DEPTH) {
512 				mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
513 				    "should be <= %d)\n", " ", rec->ttr_sdepth,
514 				    TTR_STACK_DEPTH);
515 				break;
516 			}
517 
518 			mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
519 		}
520 		mdb_printf("\n");
521 	}
522 
523 	return (WALK_NEXT);
524 }
525 
526 int
527 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 {
529 	ttrace_dcmd_t dcmd;
530 	trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
531 	trap_trace_rec_t rec;
532 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
533 
534 	if (!ttrace_ttr_size_check())
535 		return (WALK_ERR);
536 
537 	bzero(&dcmd, sizeof (dcmd));
538 	dcmd.ttd_cpu = -1;
539 	dcmd.ttd_extended = FALSE;
540 
541 	if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
542 		mdb_warn("symbol 'trap_trace_ctl' not found; "
543 		    "non-TRAPTRACE kernel?\n");
544 		return (DCMD_ERR);
545 	}
546 
547 	if (mdb_getopts(argc, argv,
548 	    'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
549 	    't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
550 		return (DCMD_USAGE);
551 
552 	if (DCMD_HDRSPEC(flags)) {
553 		mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
554 		    "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
555 		    " EIP");
556 	}
557 
558 	if (flags & DCMD_ADDRSPEC) {
559 		if (addr >= NCPU) {
560 			if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
561 				mdb_warn("couldn't read trap trace record "
562 				    "at %p", addr);
563 				return (DCMD_ERR);
564 			}
565 
566 			if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
567 				return (DCMD_ERR);
568 
569 			return (DCMD_OK);
570 		}
571 		dcmd.ttd_cpu = addr;
572 	}
573 
574 	if (mdb_readvar(&use_apix, "apix_enable") == -1) {
575 		mdb_warn("failed to read apix_enable");
576 		use_apix = 0;
577 	}
578 
579 	if (use_apix) {
580 		if (mdb_readvar(&d_apixs, "apixs") == -1) {
581 			mdb_warn("\nfailed to read apixs.");
582 			return (DCMD_ERR);
583 		}
584 		/* change to apix ttrace interrupt handler */
585 		ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
586 	}
587 
588 	if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
589 		mdb_warn("couldn't walk 'ttrace'");
590 		return (DCMD_ERR);
591 	}
592 
593 	return (DCMD_OK);
594 }
595 
596 /*ARGSUSED*/
597 int
598 mutex_owner_init(mdb_walk_state_t *wsp)
599 {
600 	return (WALK_NEXT);
601 }
602 
603 int
604 mutex_owner_step(mdb_walk_state_t *wsp)
605 {
606 	uintptr_t addr = wsp->walk_addr;
607 	mutex_impl_t mtx;
608 	uintptr_t owner;
609 	kthread_t thr;
610 
611 	if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
612 		return (WALK_ERR);
613 
614 	if (!MUTEX_TYPE_ADAPTIVE(&mtx))
615 		return (WALK_DONE);
616 
617 	if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == NULL)
618 		return (WALK_DONE);
619 
620 	if (mdb_vread(&thr, sizeof (thr), owner) != -1)
621 		(void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
622 
623 	return (WALK_DONE);
624 }
625 
626 static void
627 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
628 {
629 	const char *lastnm;
630 	uint_t lastval;
631 	char type[4];
632 
633 	switch (gate->sgd_type) {
634 	case SDT_SYSIGT:
635 		strcpy(type, "int");
636 		break;
637 	case SDT_SYSTGT:
638 		strcpy(type, "trp");
639 		break;
640 	case SDT_SYSTASKGT:
641 		strcpy(type, "tsk");
642 		break;
643 	default:
644 		(void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
645 	}
646 
647 #if defined(__amd64)
648 	lastnm = "IST";
649 	lastval = gate->sgd_ist;
650 #else
651 	lastnm = "STK";
652 	lastval = gate->sgd_stkcpy;
653 #endif
654 
655 	if (header) {
656 		mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
657 		    "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
658 		    "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
659 	}
660 
661 	mdb_printf("%s", label);
662 
663 	if (gate->sgd_type == SDT_SYSTASKGT)
664 		mdb_printf("%-30s ", "-");
665 	else
666 		mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
667 
668 	mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
669 	    gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
670 }
671 
672 /*ARGSUSED*/
673 static int
674 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
675 {
676 	gate_desc_t gate;
677 
678 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
679 		return (DCMD_USAGE);
680 
681 	if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
682 	    sizeof (gate_desc_t)) {
683 		mdb_warn("failed to read gate descriptor at %p\n", addr);
684 		return (DCMD_ERR);
685 	}
686 
687 	gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
688 
689 	return (DCMD_OK);
690 }
691 
692 /*ARGSUSED*/
693 static int
694 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
695 {
696 	int i;
697 
698 	if (!(flags & DCMD_ADDRSPEC)) {
699 		GElf_Sym idt0_va;
700 		gate_desc_t *idt0;
701 
702 		if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
703 			mdb_warn("failed to find VA of idt0");
704 			return (DCMD_ERR);
705 		}
706 
707 		addr = idt0_va.st_value;
708 		if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
709 			mdb_warn("failed to read idt0 at %p\n", addr);
710 			return (DCMD_ERR);
711 		}
712 
713 		addr = (uintptr_t)idt0;
714 	}
715 
716 	for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
717 		gate_desc_t gate;
718 		char label[6];
719 
720 		if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
721 		    sizeof (gate_desc_t)) {
722 			mdb_warn("failed to read gate descriptor at %p\n",
723 			    addr);
724 			return (DCMD_ERR);
725 		}
726 
727 		(void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
728 		gate_desc_dump(&gate, label, i == 0);
729 	}
730 
731 	return (DCMD_OK);
732 }
733 
734 static void
735 htables_help(void)
736 {
737 	mdb_printf(
738 	    "Given a (hat_t *), generates the list of all (htable_t *)s\n"
739 	    "that correspond to that address space\n");
740 }
741 
742 static void
743 report_maps_help(void)
744 {
745 	mdb_printf(
746 	    "Given a PFN, report HAT structures that map the page, or use\n"
747 	    "the page as a pagetable.\n"
748 	    "\n"
749 	    "-m Interpret the PFN as an MFN (machine frame number)\n");
750 }
751 
752 static void
753 ptable_help(void)
754 {
755 	mdb_printf(
756 	    "Given a PFN holding a page table, print its contents, and\n"
757 	    "the address of the corresponding htable structure.\n"
758 	    "\n"
759 	    "-m Interpret the PFN as an MFN (machine frame number)\n"
760 	    "-l force page table level (3 is top)\n");
761 }
762 
763 static void
764 ptmap_help(void)
765 {
766 	mdb_printf(
767 	    "Report all mappings represented by the page table hierarchy\n"
768 	    "rooted at the given cr3 value / physical address.\n"
769 	    "\n"
770 	    "-w run ::whatis on mapping start addresses\n");
771 }
772 
773 /*
774  * NSEC_SHIFT is replicated here (it is not defined in a header file),
775  * but for amusement, the reader is directed to the comment that explains
776  * the rationale for this particular value on x86.  Spoiler:  the value is
777  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
778  * in that comment sounds too familiar, it's because your author also wrote
779  * that code -- some fifteen years prior to this writing in 2011...)
780  */
781 #define	NSEC_SHIFT 5
782 
783 /*ARGSUSED*/
784 static int
785 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
786 {
787 	uint32_t nsec_scale;
788 	hrtime_t tsc = addr, hrt;
789 	unsigned int *tscp = (unsigned int *)&tsc;
790 	uintptr_t scalehrtimef;
791 	uint64_t scale;
792 	GElf_Sym sym;
793 
794 	if (!(flags & DCMD_ADDRSPEC)) {
795 		if (argc != 1)
796 			return (DCMD_USAGE);
797 
798 		switch (argv[0].a_type) {
799 		case MDB_TYPE_STRING:
800 			tsc = mdb_strtoull(argv[0].a_un.a_str);
801 			break;
802 		case MDB_TYPE_IMMEDIATE:
803 			tsc = argv[0].a_un.a_val;
804 			break;
805 		default:
806 			return (DCMD_USAGE);
807 		}
808 	}
809 
810 	if (mdb_readsym(&scalehrtimef,
811 	    sizeof (scalehrtimef), "scalehrtimef") == -1) {
812 		mdb_warn("couldn't read 'scalehrtimef'");
813 		return (DCMD_ERR);
814 	}
815 
816 	if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
817 		mdb_warn("couldn't find 'tsc_scalehrtime'");
818 		return (DCMD_ERR);
819 	}
820 
821 	if (sym.st_value != scalehrtimef) {
822 		mdb_warn("::scalehrtime requires that scalehrtimef "
823 		    "be set to tsc_scalehrtime\n");
824 		return (DCMD_ERR);
825 	}
826 
827 	if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
828 		mdb_warn("couldn't read 'nsec_scale'");
829 		return (DCMD_ERR);
830 	}
831 
832 	scale = (uint64_t)nsec_scale;
833 
834 	hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
835 	hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
836 
837 	mdb_printf("0x%llx\n", hrt);
838 
839 	return (DCMD_OK);
840 }
841 
842 /*
843  * The x86 feature set is implemented as a bitmap array. That bitmap array is
844  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
845  * macro. We have the names for each of these features in unix's text segment
846  * so we do not have to duplicate them and instead just look them up.
847  */
848 /*ARGSUSED*/
849 static int
850 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
851     const mdb_arg_t *argv)
852 {
853 	void *fset;
854 	GElf_Sym sym;
855 	uintptr_t nptr;
856 	char name[128];
857 	int ii;
858 
859 	size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
860 
861 	if (argc != 0)
862 		return (DCMD_USAGE);
863 
864 	if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
865 		mdb_warn("couldn't find x86_feature_names");
866 		return (DCMD_ERR);
867 	}
868 
869 	fset = mdb_zalloc(sz, UM_NOSLEEP);
870 	if (fset == NULL) {
871 		mdb_warn("failed to allocate memory for x86_featureset");
872 		return (DCMD_ERR);
873 	}
874 
875 	if (mdb_readvar(fset, "x86_featureset") != sz) {
876 		mdb_warn("failed to read x86_featureset");
877 		mdb_free(fset, sz);
878 		return (DCMD_ERR);
879 	}
880 
881 	for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
882 		if (!BT_TEST((ulong_t *)fset, ii))
883 			continue;
884 
885 		if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
886 		    sizeof (void *) * ii) != sizeof (char *)) {
887 			mdb_warn("failed to read feature array %d", ii);
888 			mdb_free(fset, sz);
889 			return (DCMD_ERR);
890 		}
891 
892 		if (mdb_readstr(name, sizeof (name), nptr) == -1) {
893 			mdb_warn("failed to read feature %d", ii);
894 			mdb_free(fset, sz);
895 			return (DCMD_ERR);
896 		}
897 		mdb_printf("%s\n", name);
898 	}
899 
900 	mdb_free(fset, sz);
901 	return (DCMD_OK);
902 }
903 
904 #ifdef _KMDB
905 /* ARGSUSED */
906 static int
907 crregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
908 {
909 	ulong_t cr0, cr2, cr3, cr4;
910 	static const mdb_bitmask_t cr0_flag_bits[] = {
911 		{ "PE",		CR0_PE,		CR0_PE },
912 		{ "MP",		CR0_MP,		CR0_MP },
913 		{ "EM",		CR0_EM,		CR0_EM },
914 		{ "TS",		CR0_TS,		CR0_TS },
915 		{ "ET",		CR0_ET,		CR0_ET },
916 		{ "NE",		CR0_NE,		CR0_NE },
917 		{ "WP",		CR0_WP,		CR0_WP },
918 		{ "AM",		CR0_AM,		CR0_AM },
919 		{ "NW",		CR0_NW,		CR0_NW },
920 		{ "CD",		CR0_CD,		CR0_CD },
921 		{ "PG",		CR0_PG,		CR0_PG },
922 		{ NULL,		0,		0 }
923 	};
924 
925 	static const mdb_bitmask_t cr3_flag_bits[] = {
926 		{ "PCD",	CR3_PCD,	CR3_PCD },
927 		{ "PWT",	CR3_PWT,	CR3_PWT },
928 		{ NULL,		0,		0, }
929 	};
930 
931 	static const mdb_bitmask_t cr4_flag_bits[] = {
932 		{ "VME",	CR4_VME,	CR4_VME },
933 		{ "PVI",	CR4_PVI,	CR4_PVI },
934 		{ "TSD",	CR4_TSD,	CR4_TSD },
935 		{ "DE",		CR4_DE,		CR4_DE },
936 		{ "PSE",	CR4_PSE,	CR4_PSE },
937 		{ "PAE",	CR4_PAE,	CR4_PAE },
938 		{ "MCE",	CR4_MCE,	CR4_MCE },
939 		{ "PGE",	CR4_PGE,	CR4_PGE },
940 		{ "PCE",	CR4_PCE,	CR4_PCE },
941 		{ "OSFXSR",	CR4_OSFXSR,	CR4_OSFXSR },
942 		{ "OSXMMEXCPT",	CR4_OSXMMEXCPT,	CR4_OSXMMEXCPT },
943 		{ "VMXE",	CR4_VMXE,	CR4_VMXE },
944 		{ "SMXE",	CR4_SMXE,	CR4_SMXE },
945 		{ "PCIDE",	CR4_PCIDE,	CR4_PCIDE },
946 		{ "OSXSAVE",	CR4_OSXSAVE,	CR4_OSXSAVE },
947 		{ "SMEP",	CR4_SMEP,	CR4_SMEP },
948 		{ "SMAP",	CR4_SMAP,	CR4_SMAP },
949 		{ NULL,		0,		0 }
950 	};
951 
952 	cr0 = kmdb_unix_getcr0();
953 	cr2 = kmdb_unix_getcr2();
954 	cr3 = kmdb_unix_getcr3();
955 	cr4 = kmdb_unix_getcr4();
956 	mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits);
957 	mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2);
958 
959 	if ((cr4 & CR4_PCIDE)) {
960 		mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3,
961 		    cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
962 	} else {
963 		mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3,
964 		    cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
965 	}
966 
967 	mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits);
968 
969 	return (DCMD_OK);
970 }
971 #endif
972 
973 static const mdb_dcmd_t dcmds[] = {
974 	{ "gate_desc", ":", "dump a gate descriptor", gate_desc },
975 	{ "idt", ":[-v]", "dump an IDT", idt },
976 	{ "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
977 	{ "vatopfn", ":[-a as]", "translate address to physical page",
978 	    va2pfn_dcmd },
979 	{ "report_maps", ":[-m]",
980 	    "Given PFN, report mappings / page table usage",
981 	    report_maps_dcmd, report_maps_help },
982 	{ "htables", "", "Given hat_t *, lists all its htable_t * values",
983 	    htables_dcmd, htables_help },
984 	{ "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
985 	    ptable_dcmd, ptable_help },
986 	{ "ptmap", ":", "Given a cr3 value, dump all mappings",
987 	    ptmap_dcmd, ptmap_help },
988 	{ "pte", ":[-l N]", "print human readable page table entry",
989 	    pte_dcmd },
990 	{ "pfntomfn", ":", "convert physical page to hypervisor machine page",
991 	    pfntomfn_dcmd },
992 	{ "mfntopfn", ":", "convert hypervisor machine page to physical page",
993 	    mfntopfn_dcmd },
994 	{ "memseg_list", ":", "show memseg list", memseg_list },
995 	{ "scalehrtime", ":",
996 	    "scale an unscaled high-res time", scalehrtime_cmd },
997 	{ "x86_featureset", NULL, "dump the x86_featureset vector",
998 		x86_featureset_cmd },
999 #ifdef _KMDB
1000 	{ "crregs", NULL, "dump control registers", crregs_dcmd },
1001 #endif
1002 	{ NULL }
1003 };
1004 
1005 static const mdb_walker_t walkers[] = {
1006 	{ "ttrace", "walks trap trace buffers in reverse chronological order",
1007 		ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1008 	{ "mutex_owner", "walks the owner of a mutex",
1009 		mutex_owner_init, mutex_owner_step },
1010 	{ "memseg", "walk the memseg structures",
1011 		memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1012 	{ NULL }
1013 };
1014 
1015 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1016 
1017 const mdb_modinfo_t *
1018 _mdb_init(void)
1019 {
1020 	return (&modinfo);
1021 }
1022 
1023 void
1024 _mdb_fini(void)
1025 {
1026 	free_mmu();
1027 }
1028