1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <sys/types.h>
40 #include <sys/modctl.h>
41 #include <sys/stat.h>
42 #include <sys/wait.h>
43 #include <dtrace.h>
44 #include <sys/lockstat.h>
45 #include <alloca.h>
46 #include <signal.h>
47 #include <assert.h>
48 
49 #ifdef illumos
50 #define	GETOPT_EOF	EOF
51 #else
52 #include <sys/time.h>
53 #include <sys/resource.h>
54 
55 #define	mergesort(a, b, c, d)	lsmergesort(a, b, c, d)
56 #define	GETOPT_EOF		(-1)
57 
58 typedef	uintptr_t	pc_t;
59 #endif
60 
61 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
62 
63 #define	LS_MAX_STACK_DEPTH	50
64 #define	LS_MAX_EVENTS		64
65 
66 typedef struct lsrec {
67 	struct lsrec	*ls_next;	/* next in hash chain */
68 #ifdef illumos
69 	uintptr_t	ls_lock;	/* lock address */
70 #else
71 	char		*ls_lock;	/* lock name */
72 #endif
73 	uintptr_t	ls_caller;	/* caller address */
74 	uint32_t	ls_count;	/* cumulative event count */
75 	uint32_t	ls_event;	/* type of event */
76 	uintptr_t	ls_refcnt;	/* cumulative reference count */
77 	uint64_t	ls_time;	/* cumulative event duration */
78 	uint32_t	ls_hist[64];	/* log2(duration) histogram */
79 	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
80 } lsrec_t;
81 
82 typedef struct lsdata {
83 	struct lsrec	*lsd_next;	/* next available */
84 	int		lsd_count;	/* number of records */
85 } lsdata_t;
86 
87 /*
88  * Definitions for the types of experiments which can be run.  They are
89  * listed in increasing order of memory cost and processing time cost.
90  * The numerical value of each type is the number of bytes needed per record.
91  */
92 #define	LS_BASIC	offsetof(lsrec_t, ls_time)
93 #define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
94 #define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
95 #define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
96 
97 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
98 static void report_trace(FILE *, lsrec_t **);
99 
100 extern int symtab_init(void);
101 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
102 extern uintptr_t sym_to_addr(char *name);
103 extern size_t sym_size(char *name);
104 extern char *strtok_r(char *, const char *, char **);
105 
106 #define	DEFAULT_NRECS	10000
107 #define	DEFAULT_HZ	97
108 #define	MAX_HZ		1000
109 #define	MIN_AGGSIZE	(16 * 1024)
110 #define	MAX_AGGSIZE	(32 * 1024 * 1024)
111 
112 static int g_stkdepth;
113 static int g_topn = INT_MAX;
114 static hrtime_t g_elapsed;
115 static int g_rates = 0;
116 static int g_pflag = 0;
117 static int g_Pflag = 0;
118 static int g_wflag = 0;
119 static int g_Wflag = 0;
120 static int g_cflag = 0;
121 static int g_kflag = 0;
122 static int g_gflag = 0;
123 static int g_Vflag = 0;
124 static int g_tracing = 0;
125 static size_t g_recsize;
126 static size_t g_nrecs;
127 static int g_nrecs_used;
128 static uchar_t g_enabled[LS_MAX_EVENTS];
129 static hrtime_t g_min_duration[LS_MAX_EVENTS];
130 static dtrace_hdl_t *g_dtp;
131 static char *g_predicate;
132 static char *g_ipredicate;
133 static char *g_prog;
134 static int g_proglen;
135 static int g_dropped;
136 
137 typedef struct ls_event_info {
138 	char	ev_type;
139 	char	ev_lhdr[20];
140 	char	ev_desc[80];
141 	char	ev_units[10];
142 	char	ev_name[DTRACE_NAMELEN];
143 	char	*ev_predicate;
144 	char	*ev_acquire;
145 } ls_event_info_t;
146 
147 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
148 	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
149 	    "lockstat:::adaptive-spin" },
150 	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
151 	    "lockstat:::adaptive-block" },
152 	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
153 	    "lockstat:::spin-spin" },
154 	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
155 	    "lockstat:::thread-spin" },
156 	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
157 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
158 	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
159 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
160 	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
161 	    "lockstat:::rw-block", "arg2 == 1 && arg3 == 1" },
162 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
163 	    "lockstat:::rw-block", "arg2 == 1 && arg3 == 0 && arg4" },
164 	{ 'C',	"Lock",	"R/W writer spin on writer",		"nsec",
165 	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
166 	{ 'C',	"Lock",	"R/W writer spin on readers",		"nsec",
167 	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
168 	{ 'C',	"Lock",	"R/W reader spin on writer",		"nsec",
169 	    "lockstat:::rw-spin", "arg2 == 1 && arg3 == 1" },
170 	{ 'C',	"Lock",	"R/W reader spin on write wanted",	"nsec",
171 	    "lockstat:::rw-spin", "arg2 == 1 && arg3 == 0 && arg4" },
172 	{ 'C',	"Lock",	"SX exclusive block",			"nsec",
173 	    "lockstat:::sx-block", "arg2 == 0" },
174 	{ 'C',	"Lock",	"SX shared block",			"nsec",
175 	    "lockstat:::sx-block", "arg2 == 1" },
176 	{ 'C',	"Lock",	"SX exclusive spin",			"nsec",
177 	    "lockstat:::sx-spin", "arg2 == 0" },
178 	{ 'C',	"Lock",	"SX shared spin",			"nsec",
179 	    "lockstat:::sx-spin", "arg2 == 1" },
180 	{ 'C',	"Lock",	"lockmgr writer blocked by writer",	"nsec",
181 	    "lockstat:::lockmgr-block", "arg2 == 0 && arg3 == 1" },
182 	{ 'C',	"Lock",	"lockmgr writer blocked by readers",	"nsec",
183 	    "lockstat:::lockmgr-block", "arg2 == 0 && arg3 == 0 && arg4" },
184 	{ 'C',	"Lock",	"lockmgr reader blocked by writer",	"nsec",
185 	    "lockstat:::lockmgr-block", "arg2 == 1 && arg3 == 1" },
186 	{ 'C',	"Lock",	"lockmgr reader blocked by write wanted", "nsec",
187 	    "lockstat:::lockmgr-block", "arg2 == 1 && arg3 == 0 && arg4" },
188 	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
189 	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
190 	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
191 	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
192 	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
193 	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
194 	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
195 	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
196 	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
197 	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
198 	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
199 	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
200 	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
201 	    "lockstat:::adaptive-release", NULL,
202 	    "lockstat:::adaptive-acquire" },
203 	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
204 	    "lockstat:::spin-release", NULL,
205 	    "lockstat:::spin-acquire" },
206 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
207 	    "lockstat:::rw-release", "arg1 == 0",
208 	    "lockstat:::rw-acquire" },
209 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
210 	    "lockstat:::rw-release", "arg1 == 1",
211 	    "lockstat:::rw-acquire" },
212 	{ 'H',	"Lock",	"SX shared hold",			"nsec",
213 	    "lockstat:::sx-release", "arg1 == 1",
214 	    "lockstat:::sx-acquire" },
215 	{ 'H',	"Lock",	"SX exclusive hold",			"nsec",
216 	    "lockstat:::sx-release", "arg1 == 0",
217 	    "lockstat:::sx-acquire" },
218 	{ 'H',	"Lock",	"lockmgr shared hold",			"nsec",
219 	    "lockstat:::lockmgr-release", "arg1 == 1",
220 	    "lockstat:::lockmgr-acquire" },
221 	{ 'H',	"Lock",	"lockmgr exclusive hold",		"nsec",
222 	    "lockstat:::lockmgr-release,lockstat:::lockmgr-disown", "arg1 == 0",
223 	    "lockstat:::lockmgr-acquire" },
224 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
225 	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
226 	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
227 	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
228 	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
229 	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
230 	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
231 	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
232 	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
233 	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
234 	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
235 	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
236 	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
237 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
238 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
239 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
240 #ifdef illumos
241 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
242 #else
243 	{ 'I',	"CPU+Pri_Class", "Profiling interrupt",		"nsec",
244 #endif
245 	    "profile:::profile-97", NULL },
246 	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
247 	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
248 	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
249 	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
250 	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
251 	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
252 	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
253 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
254 };
255 
256 #ifndef illumos
257 static char *g_pri_class[] = {
258 	"",
259 	"Intr",
260 	"RealT",
261 	"TShar",
262 	"Idle"
263 };
264 #endif
265 
266 static void
267 fail(int do_perror, const char *message, ...)
268 {
269 	va_list args;
270 	int save_errno = errno;
271 
272 	va_start(args, message);
273 	(void) fprintf(stderr, "lockstat: ");
274 	(void) vfprintf(stderr, message, args);
275 	va_end(args);
276 	if (do_perror)
277 		(void) fprintf(stderr, ": %s", strerror(save_errno));
278 	(void) fprintf(stderr, "\n");
279 	exit(2);
280 }
281 
282 static void
283 dfail(const char *message, ...)
284 {
285 	va_list args;
286 
287 	va_start(args, message);
288 	(void) fprintf(stderr, "lockstat: ");
289 	(void) vfprintf(stderr, message, args);
290 	va_end(args);
291 	(void) fprintf(stderr, ": %s\n",
292 	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
293 
294 	exit(2);
295 }
296 
297 static void
298 show_events(char event_type, char *desc)
299 {
300 	int i, first = -1, last;
301 
302 	for (i = 0; i < LS_MAX_EVENTS; i++) {
303 		ls_event_info_t *evp = &g_event_info[i];
304 		if (evp->ev_type != event_type ||
305 		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
306 			continue;
307 		if (first == -1)
308 			first = i;
309 		last = i;
310 	}
311 
312 	(void) fprintf(stderr,
313 	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
314 	    desc, event_type, first, last);
315 
316 	for (i = first; i <= last; i++)
317 		(void) fprintf(stderr,
318 		    "%4d = %s\n", i, g_event_info[i].ev_desc);
319 }
320 
321 static void
322 usage(void)
323 {
324 	(void) fprintf(stderr,
325 	    "Usage: lockstat [options] command [args]\n"
326 	    "\nGeneral options:\n\n"
327 	    "  -V              print the corresponding D program\n"
328 	    "\nEvent selection options:\n\n"
329 	    "  -C              watch contention events [on by default]\n"
330 	    "  -E              watch error events [off by default]\n"
331 	    "  -H              watch hold events [off by default]\n"
332 	    "  -I              watch interrupt events [off by default]\n"
333 	    "  -A              watch all lock events [equivalent to -CH]\n"
334 	    "  -e event_list   only watch the specified events (shown below);\n"
335 	    "                  <event_list> is a comma-separated list of\n"
336 	    "                  events or ranges of events, e.g. 1,4-7,35\n"
337 	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
338 	    "\nData gathering options:\n\n"
339 	    "  -b              basic statistics (lock, caller, event count)\n"
340 	    "  -t              timing for all events [default]\n"
341 	    "  -h              histograms for event times\n"
342 	    "  -s depth        stack traces <depth> deep\n"
343 	    "  -x opt[=val]    enable or modify DTrace options\n"
344 	    "\nData filtering options:\n\n"
345 	    "  -n nrecords     maximum number of data records [default: %d]\n"
346 	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
347 	    "                  symbolic name or hex address; <size> defaults\n"
348 	    "                  to the ELF symbol size if available, 1 if not\n"
349 	    "  -f func[,size]  only watch events generated by <func>\n"
350 	    "  -d duration     only watch events longer than <duration>\n"
351 	    "  -T              trace (rather than sample) events\n"
352 	    "\nData reporting options:\n\n"
353 #ifdef illumos
354 	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
355 #endif
356 	    "  -k              coalesce PCs within functions\n"
357 	    "  -g              show total events generated by function\n"
358 	    "  -w              wherever: don't distinguish events by caller\n"
359 	    "  -W              whichever: don't distinguish events by lock\n"
360 	    "  -R              display rates rather than counts\n"
361 	    "  -p              parsable output format (awk(1)-friendly)\n"
362 	    "  -P              sort lock data by (count * avg_time) product\n"
363 	    "  -D n            only display top <n> events of each type\n"
364 	    "  -o filename     send output to <filename>\n",
365 	    DEFAULT_HZ, DEFAULT_NRECS);
366 
367 	show_events('C', "Contention");
368 	show_events('H', "Hold-time");
369 	show_events('I', "Interrupt");
370 	show_events('E', "Error");
371 	(void) fprintf(stderr, "\n");
372 
373 	exit(1);
374 }
375 
376 static int
377 lockcmp(lsrec_t *a, lsrec_t *b)
378 {
379 	int i;
380 
381 	if (a->ls_event < b->ls_event)
382 		return (-1);
383 	if (a->ls_event > b->ls_event)
384 		return (1);
385 
386 	for (i = g_stkdepth - 1; i >= 0; i--) {
387 		if (a->ls_stack[i] < b->ls_stack[i])
388 			return (-1);
389 		if (a->ls_stack[i] > b->ls_stack[i])
390 			return (1);
391 	}
392 
393 	if (a->ls_caller < b->ls_caller)
394 		return (-1);
395 	if (a->ls_caller > b->ls_caller)
396 		return (1);
397 
398 #ifdef illumos
399 	if (a->ls_lock < b->ls_lock)
400 		return (-1);
401 	if (a->ls_lock > b->ls_lock)
402 		return (1);
403 
404 	return (0);
405 #else
406 	return (strcmp(a->ls_lock, b->ls_lock));
407 #endif
408 }
409 
410 static int
411 countcmp(lsrec_t *a, lsrec_t *b)
412 {
413 	if (a->ls_event < b->ls_event)
414 		return (-1);
415 	if (a->ls_event > b->ls_event)
416 		return (1);
417 
418 	return (b->ls_count - a->ls_count);
419 }
420 
421 static int
422 timecmp(lsrec_t *a, lsrec_t *b)
423 {
424 	if (a->ls_event < b->ls_event)
425 		return (-1);
426 	if (a->ls_event > b->ls_event)
427 		return (1);
428 
429 	if (a->ls_time < b->ls_time)
430 		return (1);
431 	if (a->ls_time > b->ls_time)
432 		return (-1);
433 
434 	return (0);
435 }
436 
437 static int
438 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
439 {
440 	if (a->ls_event < b->ls_event)
441 		return (-1);
442 	if (a->ls_event > b->ls_event)
443 		return (1);
444 
445 #ifdef illumos
446 	if (a->ls_lock < b->ls_lock)
447 		return (-1);
448 	if (a->ls_lock > b->ls_lock)
449 		return (1);
450 
451 	return (0);
452 #else
453 	return (strcmp(a->ls_lock, b->ls_lock));
454 #endif
455 }
456 
457 static int
458 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
459 {
460 #ifndef illumos
461 	int cmp;
462 #endif
463 
464 	if (a->ls_event < b->ls_event)
465 		return (-1);
466 	if (a->ls_event > b->ls_event)
467 		return (1);
468 
469 #ifdef illumos
470 	if (a->ls_lock < b->ls_lock)
471 		return (-1);
472 	if (a->ls_lock > b->ls_lock)
473 		return (1);
474 #else
475 	cmp = strcmp(a->ls_lock, b->ls_lock);
476 	if (cmp != 0)
477 		return (cmp);
478 #endif
479 
480 	return (b->ls_count - a->ls_count);
481 }
482 
483 static int
484 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
485 {
486 	int i;
487 
488 	if (a->ls_event < b->ls_event)
489 		return (-1);
490 	if (a->ls_event > b->ls_event)
491 		return (1);
492 
493 	for (i = g_stkdepth - 1; i >= 0; i--) {
494 		if (a->ls_stack[i] < b->ls_stack[i])
495 			return (-1);
496 		if (a->ls_stack[i] > b->ls_stack[i])
497 			return (1);
498 	}
499 
500 	if (a->ls_caller < b->ls_caller)
501 		return (-1);
502 	if (a->ls_caller > b->ls_caller)
503 		return (1);
504 
505 	return (0);
506 }
507 
508 static int
509 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
510 {
511 	int i;
512 
513 	if (a->ls_event < b->ls_event)
514 		return (-1);
515 	if (a->ls_event > b->ls_event)
516 		return (1);
517 
518 	for (i = g_stkdepth - 1; i >= 0; i--) {
519 		if (a->ls_stack[i] < b->ls_stack[i])
520 			return (-1);
521 		if (a->ls_stack[i] > b->ls_stack[i])
522 			return (1);
523 	}
524 
525 	if (a->ls_caller < b->ls_caller)
526 		return (-1);
527 	if (a->ls_caller > b->ls_caller)
528 		return (1);
529 
530 	return (b->ls_count - a->ls_count);
531 }
532 
533 static void
534 lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
535 {
536 	int m = n / 2;
537 	int i, j;
538 
539 	if (m > 1)
540 		lsmergesort(cmp, a, b, m);
541 	if (n - m > 1)
542 		lsmergesort(cmp, a + m, b + m, n - m);
543 	for (i = m; i > 0; i--)
544 		b[i - 1] = a[i - 1];
545 	for (j = m - 1; j < n - 1; j++)
546 		b[n + m - j - 2] = a[j + 1];
547 	while (i < j)
548 		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
549 	*a = b[i];
550 }
551 
552 static void
553 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
554 {
555 	int i, j;
556 	lsrec_t *target, *current;
557 
558 	target = lock[0];
559 
560 	for (i = 1; i < n; i++) {
561 		current = lock[i];
562 		if (cmp(current, target) != 0) {
563 			target = current;
564 			continue;
565 		}
566 		current->ls_event = LS_MAX_EVENTS;
567 		target->ls_count += current->ls_count;
568 		target->ls_refcnt += current->ls_refcnt;
569 		if (g_recsize < LS_TIME)
570 			continue;
571 		target->ls_time += current->ls_time;
572 		if (g_recsize < LS_HIST)
573 			continue;
574 		for (j = 0; j < 64; j++)
575 			target->ls_hist[j] += current->ls_hist[j];
576 	}
577 }
578 
579 static void
580 coalesce_symbol(uintptr_t *addrp)
581 {
582 	uintptr_t symoff;
583 	size_t symsize;
584 
585 	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
586 		*addrp -= symoff;
587 }
588 
589 static void
590 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
591 {
592 	char *new;
593 	int len, newlen;
594 
595 	if (what == NULL)
596 		return;
597 
598 	if (*pred == NULL) {
599 		*pred = malloc(1);
600 		*pred[0] = '\0';
601 	}
602 
603 	len = strlen(*pred);
604 	newlen = len + strlen(what) + 32 + strlen("( && )");
605 	new = malloc(newlen);
606 
607 	if (*pred[0] != '\0') {
608 		if (cmp != NULL) {
609 			(void) sprintf(new, "(%s) && (%s %s %p)",
610 			    *pred, what, cmp, (void *)value);
611 		} else {
612 			(void) sprintf(new, "(%s) && (%s)", *pred, what);
613 		}
614 	} else {
615 		if (cmp != NULL) {
616 			(void) sprintf(new, "%s %s %p",
617 			    what, cmp, (void *)value);
618 		} else {
619 			(void) sprintf(new, "%s", what);
620 		}
621 	}
622 
623 	free(*pred);
624 	*pred = new;
625 }
626 
627 static void
628 predicate_destroy(char **pred)
629 {
630 	free(*pred);
631 	*pred = NULL;
632 }
633 
634 static void
635 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
636 {
637 	char buf[256], *c = buf, *new;
638 	int len, newlen;
639 
640 	if (*filt == NULL) {
641 		*filt = malloc(1);
642 		*filt[0] = '\0';
643 	}
644 
645 #ifdef illumos
646 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
647 	    " || " : "", what, (void *)base, what, (void *)(base + size));
648 #else
649 	(void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
650 	    " || " : "", what, (void *)base, what, (void *)(base + size));
651 #endif
652 
653 	newlen = (len = strlen(*filt) + 1) + strlen(c);
654 	new = malloc(newlen);
655 	bcopy(*filt, new, len);
656 	(void) strcat(new, c);
657 	free(*filt);
658 	*filt = new;
659 }
660 
661 static void
662 filter_destroy(char **filt)
663 {
664 	free(*filt);
665 	*filt = NULL;
666 }
667 
668 static void
669 dprog_add(const char *fmt, ...)
670 {
671 	va_list args;
672 	int size, offs;
673 	char c;
674 
675 	va_start(args, fmt);
676 	size = vsnprintf(&c, 1, fmt, args) + 1;
677 	va_end(args);
678 
679 	if (g_proglen == 0) {
680 		offs = 0;
681 	} else {
682 		offs = g_proglen - 1;
683 	}
684 
685 	g_proglen = offs + size;
686 
687 	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
688 		fail(1, "failed to reallocate program text");
689 
690 	va_start(args, fmt);
691 	(void) vsnprintf(&g_prog[offs], size, fmt, args);
692 	va_end(args);
693 }
694 
695 /*
696  * This function may read like an open sewer, but keep in mind that programs
697  * that generate other programs are rarely pretty.  If one has the unenviable
698  * task of maintaining or -- worse -- extending this code, use the -V option
699  * to examine the D program as generated by this function.
700  */
701 static void
702 dprog_addevent(int event)
703 {
704 	ls_event_info_t *info = &g_event_info[event];
705 	char *pred = NULL;
706 	char stack[20];
707 	const char *arg0, *caller;
708 	char *arg1 = "arg1";
709 	char buf[80];
710 	hrtime_t dur;
711 	int depth;
712 
713 	if (info->ev_name[0] == '\0')
714 		return;
715 
716 	if (info->ev_type == 'I') {
717 		/*
718 		 * For interrupt events, arg0 (normally the lock pointer) is
719 		 * the CPU address plus the current pil, and arg1 (normally
720 		 * the number of nanoseconds) is the number of nanoseconds
721 		 * late -- and it's stored in arg2.
722 		 */
723 #ifdef illumos
724 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
725 		    "\t    curthread->t_cpu->cpu_profile_pil";
726 #else
727 		arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
728 		    "\t    0x01000000 + curthread->td_pri_class";
729 #endif
730 		caller = "(uintptr_t)arg0";
731 		arg1 = "arg2";
732 	} else {
733 #ifdef illumos
734 		arg0 = "(uintptr_t)arg0";
735 #else
736 		arg0 = "stringof(args[0]->lock_object.lo_name)";
737 #endif
738 		caller = "caller";
739 	}
740 
741 	if (g_recsize > LS_HIST) {
742 		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
743 			continue;
744 
745 		if (g_tracing) {
746 			(void) sprintf(stack, "\tstack(%d);\n", depth);
747 		} else {
748 			(void) sprintf(stack, ", stack(%d)", depth);
749 		}
750 	} else {
751 		(void) sprintf(stack, "");
752 	}
753 
754 	if (info->ev_acquire != NULL) {
755 		/*
756 		 * If this is a hold event, we need to generate an additional
757 		 * clause for the acquire; the clause for the release will be
758 		 * generated with the aggregating statement, below.
759 		 */
760 		dprog_add("%s\n", info->ev_acquire);
761 		predicate_add(&pred, info->ev_predicate, NULL, 0);
762 		predicate_add(&pred, g_predicate, NULL, 0);
763 		if (pred != NULL)
764 			dprog_add("/%s/\n", pred);
765 
766 		dprog_add("{\n");
767 		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
768 
769 		if (info->ev_type == 'H') {
770 			dprog_add("\t%s = timestamp;\n", buf);
771 		} else {
772 			/*
773 			 * If this isn't a hold event, it's the recursive
774 			 * error event.  For this, we simply bump the
775 			 * thread-local, per-lock count.
776 			 */
777 			dprog_add("\t%s++;\n", buf);
778 		}
779 
780 		dprog_add("}\n\n");
781 		predicate_destroy(&pred);
782 		pred = NULL;
783 
784 		if (info->ev_type == 'E') {
785 			/*
786 			 * If this is the recursive lock error event, we need
787 			 * to generate an additional clause to decrement the
788 			 * thread-local, per-lock count.  This assures that we
789 			 * only execute the aggregating clause if we have
790 			 * recursive entry.
791 			 */
792 			dprog_add("%s\n", info->ev_name);
793 			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
794 		}
795 
796 		predicate_add(&pred, buf, NULL, 0);
797 
798 		if (info->ev_type == 'H') {
799 			(void) sprintf(buf, "timestamp -\n\t    "
800 			    "self->ev%d[(uintptr_t)arg0]", event);
801 		}
802 
803 		arg1 = buf;
804 	} else {
805 		predicate_add(&pred, info->ev_predicate, NULL, 0);
806 		if (info->ev_type != 'I')
807 			predicate_add(&pred, g_predicate, NULL, 0);
808 		else
809 			predicate_add(&pred, g_ipredicate, NULL, 0);
810 	}
811 
812 	if ((dur = g_min_duration[event]) != 0)
813 		predicate_add(&pred, arg1, ">=", dur);
814 
815 	dprog_add("%s\n", info->ev_name);
816 
817 	if (pred != NULL)
818 		dprog_add("/%s/\n", pred);
819 	predicate_destroy(&pred);
820 
821 	dprog_add("{\n");
822 
823 	if (g_tracing) {
824 		dprog_add("\ttrace(%dULL);\n", event);
825 		dprog_add("\ttrace(%s);\n", arg0);
826 		dprog_add("\ttrace(%s);\n", caller);
827 		dprog_add(stack);
828 	} else {
829 		/*
830 		 * The ordering here is important:  when we process the
831 		 * aggregate, we count on the fact that @avg appears before
832 		 * @hist in program order to assure that @avg is assigned the
833 		 * first aggregation variable ID and @hist assigned the
834 		 * second; see the comment in process_aggregate() for details.
835 		 */
836 		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
837 		    event, arg0, caller, stack, arg1);
838 
839 		if (g_recsize >= LS_HIST) {
840 			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
841 			    "(%s);\n", event, arg0, caller, stack, arg1);
842 		}
843 	}
844 
845 	if (info->ev_acquire != NULL)
846 		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
847 
848 	dprog_add("}\n\n");
849 }
850 
851 static void
852 dprog_compile()
853 {
854 	dtrace_prog_t *prog;
855 	dtrace_proginfo_t info;
856 
857 	if (g_Vflag) {
858 		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
859 		(void) fputs(g_prog, stderr);
860 		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
861 	}
862 
863 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
864 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
865 		dfail("failed to compile program");
866 
867 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
868 		dfail("failed to enable probes");
869 
870 	if (dtrace_go(g_dtp) != 0)
871 		dfail("couldn't start tracing");
872 }
873 
874 static void
875 #ifdef illumos
876 status_fire(void)
877 #else
878 status_fire(int i)
879 #endif
880 {}
881 
882 static void
883 status_init(void)
884 {
885 	dtrace_optval_t val, status, agg;
886 	struct sigaction act;
887 	struct itimerspec ts;
888 	struct sigevent ev;
889 	timer_t tid;
890 
891 	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
892 		dfail("failed to get 'statusrate'");
893 
894 	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
895 		dfail("failed to get 'statusrate'");
896 
897 	/*
898 	 * We would want to awaken at a rate that is the GCD of the statusrate
899 	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
900 	 * simply awaken at a rate that is the more frequent of the two, which
901 	 * assures that we're never later than the interval implied by the
902 	 * more frequent rate.
903 	 */
904 	val = status < agg ? status : agg;
905 
906 	(void) sigemptyset(&act.sa_mask);
907 	act.sa_flags = 0;
908 	act.sa_handler = status_fire;
909 	(void) sigaction(SIGUSR1, &act, NULL);
910 
911 	ev.sigev_notify = SIGEV_SIGNAL;
912 	ev.sigev_signo = SIGUSR1;
913 
914 	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
915 		dfail("cannot create CLOCK_REALTIME timer");
916 
917 	ts.it_value.tv_sec = val / NANOSEC;
918 	ts.it_value.tv_nsec = val % NANOSEC;
919 	ts.it_interval = ts.it_value;
920 
921 	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
922 		dfail("cannot set time on CLOCK_REALTIME timer");
923 }
924 
925 static void
926 status_check(void)
927 {
928 	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
929 		dfail("failed to snap aggregate");
930 
931 	if (dtrace_status(g_dtp) == -1)
932 		dfail("dtrace_status()");
933 }
934 
935 static void
936 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
937 {
938 	bzero(lsrec, g_recsize);
939 	lsrec->ls_count = 1;
940 
941 	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
942 		fail(0, "truncated DTrace record");
943 
944 	if (rec->dtrd_size != sizeof (uint64_t))
945 		fail(0, "bad event size in first record");
946 
947 	/* LINTED - alignment */
948 	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
949 	rec++;
950 
951 #ifdef illumos
952 	if (rec->dtrd_size != sizeof (uintptr_t))
953 		fail(0, "bad lock address size in second record");
954 
955 	/* LINTED - alignment */
956 	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
957 	rec++;
958 #else
959 	lsrec->ls_lock = strdup((const char *)(data + rec->dtrd_offset));
960 	rec++;
961 #endif
962 
963 	if (rec->dtrd_size != sizeof (uintptr_t))
964 		fail(0, "bad caller size in third record");
965 
966 	/* LINTED - alignment */
967 	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
968 	rec++;
969 
970 	if (g_recsize > LS_HIST) {
971 		int frames, i;
972 		pc_t *stack;
973 
974 		frames = rec->dtrd_size / sizeof (pc_t);
975 		/* LINTED - alignment */
976 		stack = (pc_t *)(data + rec->dtrd_offset);
977 
978 		for (i = 1; i < frames; i++)
979 			lsrec->ls_stack[i - 1] = stack[i];
980 	}
981 }
982 
983 /*ARGSUSED*/
984 static int
985 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
986 {
987 	*((size_t *)arg) += 1;
988 
989 	return (DTRACE_AGGWALK_NEXT);
990 }
991 
992 static int
993 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
994 {
995 	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
996 	caddr_t data = agg->dtada_data;
997 	lsdata_t *lsdata = arg;
998 	lsrec_t *lsrec = lsdata->lsd_next;
999 	const dtrace_recdesc_t *rec;
1000 	uint64_t *avg, *quantized;
1001 	int i, j;
1002 
1003 	assert(lsdata->lsd_count < g_nrecs);
1004 
1005 	/*
1006 	 * Aggregation variable IDs are guaranteed to be generated in program
1007 	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
1008 	 * plus one.  As "avg" appears before "hist" in program order, we know
1009 	 * that "avg" will be allocated the first aggregation variable ID, and
1010 	 * "hist" will be allocated the second aggregation variable ID -- and
1011 	 * we therefore use the aggregation variable ID to differentiate the
1012 	 * cases.
1013 	 */
1014 	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
1015 		/*
1016 		 * If this is the histogram entry.  We'll copy the quantized
1017 		 * data into lc_hist, and jump over the rest.
1018 		 */
1019 		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1020 
1021 		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
1022 			fail(0, "bad variable ID in aggregation record");
1023 
1024 		if (rec->dtrd_size !=
1025 		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
1026 			fail(0, "bad quantize size in aggregation record");
1027 
1028 		/* LINTED - alignment */
1029 		quantized = (uint64_t *)(data + rec->dtrd_offset);
1030 
1031 		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
1032 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
1033 			lsrec->ls_hist[j] = quantized[i];
1034 
1035 		goto out;
1036 	}
1037 
1038 	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
1039 	    aggdesc->dtagd_nrecs - 1, data);
1040 
1041 	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1042 
1043 	if (rec->dtrd_size != 2 * sizeof (uint64_t))
1044 		fail(0, "bad avg size in aggregation record");
1045 
1046 	/* LINTED - alignment */
1047 	avg = (uint64_t *)(data + rec->dtrd_offset);
1048 	lsrec->ls_count = (uint32_t)avg[0];
1049 	lsrec->ls_time = (uintptr_t)avg[1];
1050 
1051 	if (g_recsize >= LS_HIST)
1052 		return (DTRACE_AGGWALK_NEXT);
1053 
1054 out:
1055 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1056 	lsdata->lsd_count++;
1057 
1058 	return (DTRACE_AGGWALK_NEXT);
1059 }
1060 
1061 static int
1062 process_trace(const dtrace_probedata_t *pdata, void *arg)
1063 {
1064 	lsdata_t *lsdata = arg;
1065 	lsrec_t *lsrec = lsdata->lsd_next;
1066 	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
1067 	caddr_t data = pdata->dtpda_data;
1068 
1069 	if (lsdata->lsd_count >= g_nrecs)
1070 		return (DTRACE_CONSUME_NEXT);
1071 
1072 	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
1073 
1074 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1075 	lsdata->lsd_count++;
1076 
1077 	return (DTRACE_CONSUME_NEXT);
1078 }
1079 
1080 static int
1081 process_data(FILE *out, char *data)
1082 {
1083 	lsdata_t lsdata;
1084 
1085 	/* LINTED - alignment */
1086 	lsdata.lsd_next = (lsrec_t *)data;
1087 	lsdata.lsd_count = 0;
1088 
1089 	if (g_tracing) {
1090 		if (dtrace_consume(g_dtp, out,
1091 		    process_trace, NULL, &lsdata) != 0)
1092 			dfail("failed to consume buffer");
1093 
1094 		return (lsdata.lsd_count);
1095 	}
1096 
1097 	if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
1098 	    process_aggregate, &lsdata) != 0)
1099 		dfail("failed to walk aggregate");
1100 
1101 	return (lsdata.lsd_count);
1102 }
1103 
1104 /*ARGSUSED*/
1105 static int
1106 drophandler(const dtrace_dropdata_t *data, void *arg)
1107 {
1108 	g_dropped++;
1109 	(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
1110 	return (DTRACE_HANDLE_OK);
1111 }
1112 
1113 int
1114 main(int argc, char **argv)
1115 {
1116 	char *data_buf;
1117 	lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
1118 	FILE *out = stdout;
1119 	int c;
1120 	pid_t child;
1121 	int status;
1122 	int i, j;
1123 	hrtime_t duration;
1124 	char *addrp, *offp, *sizep, *evp, *lastp, *p;
1125 	uintptr_t addr;
1126 	size_t size, off;
1127 	int events_specified = 0;
1128 	int exec_errno = 0;
1129 	uint32_t event;
1130 	char *filt = NULL, *ifilt = NULL;
1131 	static uint64_t ev_count[LS_MAX_EVENTS + 1];
1132 	static uint64_t ev_time[LS_MAX_EVENTS + 1];
1133 	dtrace_optval_t aggsize;
1134 	char aggstr[10];
1135 	long ncpus;
1136 	int dynvar = 0;
1137 	int err;
1138 
1139 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
1140 		fail(0, "cannot open dtrace library: %s",
1141 		    dtrace_errmsg(NULL, err));
1142 	}
1143 
1144 	if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
1145 		dfail("couldn't establish drop handler");
1146 
1147 	if (symtab_init() == -1)
1148 		fail(1, "can't load kernel symbols");
1149 
1150 	g_nrecs = DEFAULT_NRECS;
1151 
1152 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1153 		switch (c) {
1154 		case 'b':
1155 			g_recsize = LS_BASIC;
1156 			break;
1157 
1158 		case 't':
1159 			g_recsize = LS_TIME;
1160 			break;
1161 
1162 		case 'h':
1163 			g_recsize = LS_HIST;
1164 			break;
1165 
1166 		case 's':
1167 			if (!isdigit(optarg[0]))
1168 				usage();
1169 			g_stkdepth = atoi(optarg);
1170 			if (g_stkdepth > LS_MAX_STACK_DEPTH)
1171 				fail(0, "max stack depth is %d",
1172 				    LS_MAX_STACK_DEPTH);
1173 			g_recsize = LS_STACK(g_stkdepth);
1174 			break;
1175 
1176 		case 'n':
1177 			if (!isdigit(optarg[0]))
1178 				usage();
1179 			g_nrecs = atoi(optarg);
1180 			break;
1181 
1182 		case 'd':
1183 			if (!isdigit(optarg[0]))
1184 				usage();
1185 			duration = atoll(optarg);
1186 
1187 			/*
1188 			 * XXX -- durations really should be per event
1189 			 * since the units are different, but it's hard
1190 			 * to express this nicely in the interface.
1191 			 * Not clear yet what the cleanest solution is.
1192 			 */
1193 			for (i = 0; i < LS_MAX_EVENTS; i++)
1194 				if (g_event_info[i].ev_type != 'E')
1195 					g_min_duration[i] = duration;
1196 
1197 			break;
1198 
1199 		case 'i':
1200 			if (!isdigit(optarg[0]))
1201 				usage();
1202 			i = atoi(optarg);
1203 			if (i <= 0)
1204 				usage();
1205 			if (i > MAX_HZ)
1206 				fail(0, "max interrupt rate is %d Hz", MAX_HZ);
1207 
1208 			for (j = 0; j < LS_MAX_EVENTS; j++)
1209 				if (strcmp(g_event_info[j].ev_desc,
1210 				    "Profiling interrupt") == 0)
1211 					break;
1212 
1213 			(void) sprintf(g_event_info[j].ev_name,
1214 			    "profile:::profile-%d", i);
1215 			break;
1216 
1217 		case 'l':
1218 		case 'f':
1219 			addrp = strtok(optarg, ",");
1220 			sizep = strtok(NULL, ",");
1221 			addrp = strtok(optarg, ",+");
1222 			offp = strtok(NULL, ",");
1223 
1224 			size = sizep ? strtoul(sizep, NULL, 0) : 1;
1225 			off = offp ? strtoul(offp, NULL, 0) : 0;
1226 
1227 			if (addrp[0] == '0') {
1228 				addr = strtoul(addrp, NULL, 16) + off;
1229 			} else {
1230 				addr = sym_to_addr(addrp) + off;
1231 				if (sizep == NULL)
1232 					size = sym_size(addrp) - off;
1233 				if (addr - off == 0)
1234 					fail(0, "symbol '%s' not found", addrp);
1235 				if (size == 0)
1236 					size = 1;
1237 			}
1238 
1239 
1240 			if (c == 'l') {
1241 				filter_add(&filt, "arg0", addr, size);
1242 			} else {
1243 				filter_add(&filt, "caller", addr, size);
1244 				filter_add(&ifilt, "arg0", addr, size);
1245 			}
1246 			break;
1247 
1248 		case 'e':
1249 			evp = strtok_r(optarg, ",", &lastp);
1250 			while (evp) {
1251 				int ev1, ev2;
1252 				char *evp2;
1253 
1254 				(void) strtok(evp, "-");
1255 				evp2 = strtok(NULL, "-");
1256 				ev1 = atoi(evp);
1257 				ev2 = evp2 ? atoi(evp2) : ev1;
1258 				if ((uint_t)ev1 >= LS_MAX_EVENTS ||
1259 				    (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
1260 					fail(0, "-e events out of range");
1261 				for (i = ev1; i <= ev2; i++)
1262 					g_enabled[i] = 1;
1263 				evp = strtok_r(NULL, ",", &lastp);
1264 			}
1265 			events_specified = 1;
1266 			break;
1267 
1268 #ifdef illumos
1269 		case 'c':
1270 			g_cflag = 1;
1271 			break;
1272 #endif
1273 
1274 		case 'k':
1275 			g_kflag = 1;
1276 			break;
1277 
1278 		case 'w':
1279 			g_wflag = 1;
1280 			break;
1281 
1282 		case 'W':
1283 			g_Wflag = 1;
1284 			break;
1285 
1286 		case 'g':
1287 			g_gflag = 1;
1288 			break;
1289 
1290 		case 'C':
1291 		case 'E':
1292 		case 'H':
1293 		case 'I':
1294 			for (i = 0; i < LS_MAX_EVENTS; i++)
1295 				if (g_event_info[i].ev_type == c)
1296 					g_enabled[i] = 1;
1297 			events_specified = 1;
1298 			break;
1299 
1300 		case 'A':
1301 			for (i = 0; i < LS_MAX_EVENTS; i++)
1302 				if (strchr("CH", g_event_info[i].ev_type))
1303 					g_enabled[i] = 1;
1304 			events_specified = 1;
1305 			break;
1306 
1307 		case 'T':
1308 			g_tracing = 1;
1309 			break;
1310 
1311 		case 'D':
1312 			if (!isdigit(optarg[0]))
1313 				usage();
1314 			g_topn = atoi(optarg);
1315 			break;
1316 
1317 		case 'R':
1318 			g_rates = 1;
1319 			break;
1320 
1321 		case 'p':
1322 			g_pflag = 1;
1323 			break;
1324 
1325 		case 'P':
1326 			g_Pflag = 1;
1327 			break;
1328 
1329 		case 'o':
1330 			if ((out = fopen(optarg, "w")) == NULL)
1331 				fail(1, "error opening file");
1332 			break;
1333 
1334 		case 'V':
1335 			g_Vflag = 1;
1336 			break;
1337 
1338 		default:
1339 			if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
1340 				usage();
1341 		}
1342 	}
1343 
1344 	if (filt != NULL) {
1345 		predicate_add(&g_predicate, filt, NULL, 0);
1346 		filter_destroy(&filt);
1347 	}
1348 
1349 	if (ifilt != NULL) {
1350 		predicate_add(&g_ipredicate, ifilt, NULL, 0);
1351 		filter_destroy(&ifilt);
1352 	}
1353 
1354 	if (g_recsize == 0) {
1355 		if (g_gflag) {
1356 			g_stkdepth = LS_MAX_STACK_DEPTH;
1357 			g_recsize = LS_STACK(g_stkdepth);
1358 		} else {
1359 			g_recsize = LS_TIME;
1360 		}
1361 	}
1362 
1363 	if (g_gflag && g_recsize <= LS_STACK(0))
1364 		fail(0, "'-g' requires at least '-s 1' data gathering");
1365 
1366 	/*
1367 	 * Make sure the alignment is reasonable
1368 	 */
1369 	g_recsize = -(-g_recsize & -sizeof (uint64_t));
1370 
1371 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1372 		/*
1373 		 * If no events were specified, enable -C.
1374 		 */
1375 		if (!events_specified && g_event_info[i].ev_type == 'C')
1376 			g_enabled[i] = 1;
1377 	}
1378 
1379 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1380 		if (!g_enabled[i])
1381 			continue;
1382 
1383 		if (g_event_info[i].ev_acquire != NULL) {
1384 			/*
1385 			 * If we've enabled a hold event, we must explicitly
1386 			 * allocate dynamic variable space.
1387 			 */
1388 			dynvar = 1;
1389 		}
1390 
1391 		dprog_addevent(i);
1392 	}
1393 
1394 	/*
1395 	 * Make sure there are remaining arguments to specify a child command
1396 	 * to execute.
1397 	 */
1398 	if (argc <= optind)
1399 		usage();
1400 
1401 	if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
1402 		dfail("couldn't determine number of online CPUs");
1403 
1404 	/*
1405 	 * By default, we set our data buffer size to be the number of records
1406 	 * multiplied by the size of the record, doubled to account for some
1407 	 * DTrace slop and divided by the number of CPUs.  We silently clamp
1408 	 * the aggregation size at both a minimum and a maximum to prevent
1409 	 * absurdly low or high values.
1410 	 */
1411 	if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
1412 		aggsize = MIN_AGGSIZE;
1413 
1414 	if (aggsize > MAX_AGGSIZE)
1415 		aggsize = MAX_AGGSIZE;
1416 
1417 	(void) sprintf(aggstr, "%lld", (long long)aggsize);
1418 
1419 	if (!g_tracing) {
1420 		if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
1421 			dfail("failed to set 'bufsize'");
1422 
1423 		if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
1424 			dfail("failed to set 'aggsize'");
1425 
1426 		if (dynvar) {
1427 			/*
1428 			 * If we're using dynamic variables, we set our
1429 			 * dynamic variable size to be one megabyte per CPU,
1430 			 * with a hard-limit of 32 megabytes.  This may still
1431 			 * be too small in some cases, but it can be tuned
1432 			 * manually via -x if need be.
1433 			 */
1434 			(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
1435 
1436 			if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
1437 				dfail("failed to set 'dynvarsize'");
1438 		}
1439 	} else {
1440 		if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
1441 			dfail("failed to set 'bufsize'");
1442 	}
1443 
1444 	if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
1445 		dfail("failed to set 'statusrate'");
1446 
1447 	optind = 1;
1448 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1449 		switch (c) {
1450 		case 'x':
1451 			if ((p = strchr(optarg, '=')) != NULL)
1452 				*p++ = '\0';
1453 
1454 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
1455 				dfail("failed to set -x %s", optarg);
1456 			break;
1457 		}
1458 	}
1459 
1460 	argc -= optind;
1461 	argv += optind;
1462 
1463 	dprog_compile();
1464 	status_init();
1465 
1466 	g_elapsed = -gethrtime();
1467 
1468 	/*
1469 	 * Spawn the specified command and wait for it to complete.
1470 	 */
1471 	child = fork();
1472 	if (child == -1)
1473 		fail(1, "cannot fork");
1474 	if (child == 0) {
1475 		(void) dtrace_close(g_dtp);
1476 		(void) execvp(argv[0], &argv[0]);
1477 		exec_errno = errno;
1478 		exit(127);
1479 	}
1480 
1481 #ifdef illumos
1482 	while (waitpid(child, &status, WEXITED) != child)
1483 #else
1484 	while (waitpid(child, &status, 0) != child)
1485 #endif
1486 		status_check();
1487 
1488 	g_elapsed += gethrtime();
1489 
1490 	if (WIFEXITED(status)) {
1491 		if (WEXITSTATUS(status) != 0) {
1492 			if (exec_errno != 0) {
1493 				errno = exec_errno;
1494 				fail(1, "could not execute %s", argv[0]);
1495 			}
1496 			(void) fprintf(stderr,
1497 			    "lockstat: warning: %s exited with code %d\n",
1498 			    argv[0], WEXITSTATUS(status));
1499 		}
1500 	} else {
1501 		(void) fprintf(stderr,
1502 		    "lockstat: warning: %s died on signal %d\n",
1503 		    argv[0], WTERMSIG(status));
1504 	}
1505 
1506 	if (dtrace_stop(g_dtp) == -1)
1507 		dfail("failed to stop dtrace");
1508 
1509 	/*
1510 	 * Before we read out the results, we need to allocate our buffer.
1511 	 * If we're tracing, then we'll just use the precalculated size.  If
1512 	 * we're not, then we'll take a snapshot of the aggregate, and walk
1513 	 * it to count the number of records.
1514 	 */
1515 	if (!g_tracing) {
1516 		if (dtrace_aggregate_snap(g_dtp) != 0)
1517 			dfail("failed to snap aggregate");
1518 
1519 		g_nrecs = 0;
1520 
1521 		if (dtrace_aggregate_walk(g_dtp,
1522 		    count_aggregate, &g_nrecs) != 0)
1523 			dfail("failed to walk aggregate");
1524 	}
1525 
1526 #ifdef illumos
1527 	if ((data_buf = memalign(sizeof (uint64_t),
1528 	    (g_nrecs + 1) * g_recsize)) == NULL)
1529 #else
1530 	if (posix_memalign((void **)&data_buf, sizeof (uint64_t),
1531 	    (g_nrecs + 1) * g_recsize) )
1532 #endif
1533 		fail(1, "Memory allocation failed");
1534 
1535 	/*
1536 	 * Read out the DTrace data.
1537 	 */
1538 	g_nrecs_used = process_data(out, data_buf);
1539 
1540 	if (g_nrecs_used > g_nrecs || g_dropped)
1541 		(void) fprintf(stderr, "lockstat: warning: "
1542 		    "ran out of data records (use -n for more)\n");
1543 
1544 	/* LINTED - alignment */
1545 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1546 	    /* LINTED - alignment */
1547 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1548 		ev_count[lsp->ls_event] += lsp->ls_count;
1549 		ev_time[lsp->ls_event] += lsp->ls_time;
1550 	}
1551 
1552 	/*
1553 	 * If -g was specified, convert stacks into individual records.
1554 	 */
1555 	if (g_gflag) {
1556 		lsrec_t *newlsp, *oldlsp;
1557 
1558 #ifdef illumos
1559 		newlsp = memalign(sizeof (uint64_t),
1560 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1561 #else
1562 		posix_memalign((void **)&newlsp, sizeof (uint64_t),
1563 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1564 #endif
1565 		if (newlsp == NULL)
1566 			fail(1, "Cannot allocate space for -g processing");
1567 		lsp = newlsp;
1568 		/* LINTED - alignment */
1569 		for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1570 		    /* LINTED - alignment */
1571 		    oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
1572 			int fr;
1573 			int caller_in_stack = 0;
1574 
1575 			if (oldlsp->ls_count == 0)
1576 				continue;
1577 
1578 			for (fr = 0; fr < g_stkdepth; fr++) {
1579 				if (oldlsp->ls_stack[fr] == 0)
1580 					break;
1581 				if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
1582 					caller_in_stack = 1;
1583 				bcopy(oldlsp, lsp, LS_TIME);
1584 				lsp->ls_caller = oldlsp->ls_stack[fr];
1585 #ifndef illumos
1586 				lsp->ls_lock = strdup(oldlsp->ls_lock);
1587 #endif
1588 				/* LINTED - alignment */
1589 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1590 			}
1591 			if (!caller_in_stack) {
1592 				bcopy(oldlsp, lsp, LS_TIME);
1593 				/* LINTED - alignment */
1594 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1595 			}
1596 #ifndef illumos
1597 			free(oldlsp->ls_lock);
1598 #endif
1599 		}
1600 		g_nrecs = g_nrecs_used =
1601 		    ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
1602 		g_recsize = LS_TIME;
1603 		g_stkdepth = 0;
1604 		free(data_buf);
1605 		data_buf = (char *)newlsp;
1606 	}
1607 
1608 	if ((sort_buf = calloc(2 * (g_nrecs + 1),
1609 	    sizeof (void *))) == NULL)
1610 		fail(1, "Sort buffer allocation failed");
1611 	merge_buf = sort_buf + (g_nrecs + 1);
1612 
1613 	/*
1614 	 * Build the sort buffer, discarding zero-count records along the way.
1615 	 */
1616 	/* LINTED - alignment */
1617 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1618 	    /* LINTED - alignment */
1619 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1620 		if (lsp->ls_count == 0)
1621 			lsp->ls_event = LS_MAX_EVENTS;
1622 		sort_buf[i] = lsp;
1623 	}
1624 
1625 	if (g_nrecs_used == 0)
1626 		exit(0);
1627 
1628 	/*
1629 	 * Add a sentinel after the last record
1630 	 */
1631 	sort_buf[i] = lsp;
1632 	lsp->ls_event = LS_MAX_EVENTS;
1633 
1634 	if (g_tracing) {
1635 		report_trace(out, sort_buf);
1636 		return (0);
1637 	}
1638 
1639 	/*
1640 	 * Application of -g may have resulted in multiple records
1641 	 * with the same signature; coalesce them.
1642 	 */
1643 	if (g_gflag) {
1644 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1645 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1646 	}
1647 
1648 	/*
1649 	 * Coalesce locks within the same symbol if -c option specified.
1650 	 * Coalesce PCs within the same function if -k option specified.
1651 	 */
1652 	if (g_cflag || g_kflag) {
1653 		for (i = 0; i < g_nrecs_used; i++) {
1654 			int fr;
1655 			lsp = sort_buf[i];
1656 #ifdef illumos
1657 			if (g_cflag)
1658 				coalesce_symbol(&lsp->ls_lock);
1659 #endif
1660 			if (g_kflag) {
1661 				for (fr = 0; fr < g_stkdepth; fr++)
1662 					coalesce_symbol(&lsp->ls_stack[fr]);
1663 				coalesce_symbol(&lsp->ls_caller);
1664 			}
1665 		}
1666 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1667 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1668 	}
1669 
1670 	/*
1671 	 * Coalesce callers if -w option specified
1672 	 */
1673 	if (g_wflag) {
1674 		mergesort(lock_and_count_cmp_anywhere,
1675 		    sort_buf, merge_buf, g_nrecs_used);
1676 		coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
1677 	}
1678 
1679 	/*
1680 	 * Coalesce locks if -W option specified
1681 	 */
1682 	if (g_Wflag) {
1683 		mergesort(site_and_count_cmp_anylock,
1684 		    sort_buf, merge_buf, g_nrecs_used);
1685 		coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
1686 	}
1687 
1688 	/*
1689 	 * Sort data by contention count (ls_count) or total time (ls_time),
1690 	 * depending on g_Pflag.  Override g_Pflag if time wasn't measured.
1691 	 */
1692 	if (g_recsize < LS_TIME)
1693 		g_Pflag = 0;
1694 
1695 	if (g_Pflag)
1696 		mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
1697 	else
1698 		mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
1699 
1700 	/*
1701 	 * Display data by event type
1702 	 */
1703 	first = &sort_buf[0];
1704 	while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
1705 		current = first;
1706 		while ((lsp = *current)->ls_event == event)
1707 			current++;
1708 		report_stats(out, first, current - first, ev_count[event],
1709 		    ev_time[event]);
1710 		first = current;
1711 	}
1712 
1713 #ifndef illumos
1714 	/*
1715 	 * Free lock name buffers
1716 	 */
1717 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1718 	    lsp = (lsrec_t *)((char *)lsp + g_recsize))
1719 		free(lsp->ls_lock);
1720 #endif
1721 
1722 	return (0);
1723 }
1724 
1725 static char *
1726 format_symbol(char *buf, uintptr_t addr, int show_size)
1727 {
1728 	uintptr_t symoff;
1729 	char *symname;
1730 	size_t symsize;
1731 
1732 	symname = addr_to_sym(addr, &symoff, &symsize);
1733 
1734 	if (show_size && symoff == 0)
1735 		(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
1736 	else if (symoff == 0)
1737 		(void) sprintf(buf, "%s", symname);
1738 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
1739 #ifdef illumos
1740 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
1741 #else
1742 		(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
1743 #endif
1744 	else if (symoff <= symsize || (symoff < 256 && addr != symoff))
1745 		(void) sprintf(buf, "%s+0x%llx", symname,
1746 		    (unsigned long long)symoff);
1747 	else
1748 		(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
1749 	return (buf);
1750 }
1751 
1752 static void
1753 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
1754 	uint64_t total_time)
1755 {
1756 	uint32_t event = sort_buf[0]->ls_event;
1757 	lsrec_t *lsp;
1758 	double ptotal = 0.0;
1759 	double percent;
1760 	int i, j, fr;
1761 	int displayed;
1762 	int first_bin, last_bin, max_bin_count, total_bin_count;
1763 	int rectype;
1764 	char buf[256];
1765 	char lhdr[80], chdr[80];
1766 
1767 	rectype = g_recsize;
1768 
1769 	if (g_topn == 0) {
1770 		(void) fprintf(out, "%20llu %s\n",
1771 		    g_rates == 0 ? total_count :
1772 		    ((unsigned long long)total_count * NANOSEC) / g_elapsed,
1773 		    g_event_info[event].ev_desc);
1774 		return;
1775 	}
1776 
1777 	(void) sprintf(lhdr, "%s%s",
1778 	    g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
1779 	(void) sprintf(chdr, "%s%s",
1780 	    g_wflag ? "Hottest " : "", "Caller");
1781 
1782 	if (!g_pflag)
1783 		(void) fprintf(out,
1784 		    "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
1785 		    g_event_info[event].ev_desc, (double)total_count,
1786 		    (double)g_elapsed / NANOSEC,
1787 		    (double)total_count * NANOSEC / g_elapsed);
1788 
1789 	if (!g_pflag && rectype < LS_HIST) {
1790 		(void) sprintf(buf, "%s", g_event_info[event].ev_units);
1791 		(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1792 		    g_rates ? "ops/s" : "Count",
1793 		    g_gflag ? "genr" : "indv",
1794 		    "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
1795 		(void) fprintf(out, "---------------------------------"
1796 		    "----------------------------------------------\n");
1797 	}
1798 
1799 	displayed = 0;
1800 	for (i = 0; i < nrecs; i++) {
1801 		lsp = sort_buf[i];
1802 
1803 		if (displayed++ >= g_topn)
1804 			break;
1805 
1806 		if (g_pflag) {
1807 			int j;
1808 
1809 			(void) fprintf(out, "%u %u",
1810 			    lsp->ls_event, lsp->ls_count);
1811 #ifdef illumos
1812 			(void) fprintf(out, " %s",
1813 			    format_symbol(buf, lsp->ls_lock, g_cflag));
1814 #else
1815 			(void) fprintf(out, " %s", lsp->ls_lock);
1816 #endif
1817 			(void) fprintf(out, " %s",
1818 			    format_symbol(buf, lsp->ls_caller, 0));
1819 			(void) fprintf(out, " %f",
1820 			    (double)lsp->ls_refcnt / lsp->ls_count);
1821 			if (rectype >= LS_TIME)
1822 				(void) fprintf(out, " %llu",
1823 				    (unsigned long long)lsp->ls_time);
1824 			if (rectype >= LS_HIST) {
1825 				for (j = 0; j < 64; j++)
1826 					(void) fprintf(out, " %u",
1827 					    lsp->ls_hist[j]);
1828 			}
1829 			for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
1830 				if (rectype <= LS_STACK(j) ||
1831 				    lsp->ls_stack[j] == 0)
1832 					break;
1833 				(void) fprintf(out, " %s",
1834 				    format_symbol(buf, lsp->ls_stack[j], 0));
1835 			}
1836 			(void) fprintf(out, "\n");
1837 			continue;
1838 		}
1839 
1840 		if (rectype >= LS_HIST) {
1841 			(void) fprintf(out, "---------------------------------"
1842 			    "----------------------------------------------\n");
1843 			(void) sprintf(buf, "%s",
1844 			    g_event_info[event].ev_units);
1845 			(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1846 			    g_rates ? "ops/s" : "Count",
1847 			    g_gflag ? "genr" : "indv",
1848 			    "cuml", "rcnt", buf, lhdr, chdr);
1849 		}
1850 
1851 		if (g_Pflag && total_time != 0)
1852 			percent = (lsp->ls_time * 100.00) / total_time;
1853 		else
1854 			percent = (lsp->ls_count * 100.00) / total_count;
1855 
1856 		ptotal += percent;
1857 
1858 		if (rectype >= LS_TIME)
1859 			(void) sprintf(buf, "%llu",
1860 			    (unsigned long long)(lsp->ls_time / lsp->ls_count));
1861 		else
1862 			buf[0] = '\0';
1863 
1864 		(void) fprintf(out, "%5llu ",
1865 		    g_rates == 0 ? lsp->ls_count :
1866 		    ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
1867 
1868 		(void) fprintf(out, "%3.0f%% ", percent);
1869 
1870 		if (g_gflag)
1871 			(void) fprintf(out, "---- ");
1872 		else
1873 			(void) fprintf(out, "%3.0f%% ", ptotal);
1874 
1875 		(void) fprintf(out, "%4.2f %8s ",
1876 		    (double)lsp->ls_refcnt / lsp->ls_count, buf);
1877 
1878 #ifdef illumos
1879 		(void) fprintf(out, "%-22s ",
1880 		    format_symbol(buf, lsp->ls_lock, g_cflag));
1881 #else
1882 		(void) fprintf(out, "%-22s ", lsp->ls_lock);
1883 #endif
1884 
1885 		(void) fprintf(out, "%-24s\n",
1886 		    format_symbol(buf, lsp->ls_caller, 0));
1887 
1888 		if (rectype < LS_HIST)
1889 			continue;
1890 
1891 		(void) fprintf(out, "\n");
1892 		(void) fprintf(out, "%10s %31s %-9s %-24s\n",
1893 		    g_event_info[event].ev_units,
1894 		    "------ Time Distribution ------",
1895 		    g_rates ? "ops/s" : "count",
1896 		    rectype > LS_STACK(0) ? "Stack" : "");
1897 
1898 		first_bin = 0;
1899 		while (lsp->ls_hist[first_bin] == 0)
1900 			first_bin++;
1901 
1902 		last_bin = 63;
1903 		while (lsp->ls_hist[last_bin] == 0)
1904 			last_bin--;
1905 
1906 		max_bin_count = 0;
1907 		total_bin_count = 0;
1908 		for (j = first_bin; j <= last_bin; j++) {
1909 			total_bin_count += lsp->ls_hist[j];
1910 			if (lsp->ls_hist[j] > max_bin_count)
1911 				max_bin_count = lsp->ls_hist[j];
1912 		}
1913 
1914 		/*
1915 		 * If we went a few frames below the caller, ignore them
1916 		 */
1917 		for (fr = 3; fr > 0; fr--)
1918 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1919 				break;
1920 
1921 		for (j = first_bin; j <= last_bin; j++) {
1922 			uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
1923 			(void) fprintf(out, "%10llu |%s%s %-9u ",
1924 			    1ULL << j,
1925 			    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
1926 			    "                              " + depth,
1927 			    g_rates == 0 ? lsp->ls_hist[j] :
1928 			    (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
1929 			    g_elapsed));
1930 			if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
1931 				(void) fprintf(out, "\n");
1932 				continue;
1933 			}
1934 			(void) fprintf(out, "%-24s\n",
1935 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1936 			fr++;
1937 		}
1938 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1939 			(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
1940 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1941 			fr++;
1942 		}
1943 	}
1944 
1945 	if (!g_pflag)
1946 		(void) fprintf(out, "---------------------------------"
1947 		    "----------------------------------------------\n");
1948 
1949 	(void) fflush(out);
1950 }
1951 
1952 static void
1953 report_trace(FILE *out, lsrec_t **sort_buf)
1954 {
1955 	lsrec_t *lsp;
1956 	int i, fr;
1957 	int rectype;
1958 	char buf[256], buf2[256];
1959 
1960 	rectype = g_recsize;
1961 
1962 	if (!g_pflag) {
1963 		(void) fprintf(out, "%5s  %7s  %11s  %-24s  %-24s\n",
1964 		    "Event", "Time", "Owner", "Lock", "Caller");
1965 		(void) fprintf(out, "---------------------------------"
1966 		    "----------------------------------------------\n");
1967 	}
1968 
1969 	for (i = 0; i < g_nrecs_used; i++) {
1970 
1971 		lsp = sort_buf[i];
1972 
1973 		if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
1974 			continue;
1975 
1976 		(void) fprintf(out, "%2d  %10llu  %11p  %-24s  %-24s\n",
1977 		    lsp->ls_event, (unsigned long long)lsp->ls_time,
1978 		    (void *)lsp->ls_next,
1979 #ifdef illumos
1980 		    format_symbol(buf, lsp->ls_lock, 0),
1981 #else
1982 		    lsp->ls_lock,
1983 #endif
1984 		    format_symbol(buf2, lsp->ls_caller, 0));
1985 
1986 		if (rectype <= LS_STACK(0))
1987 			continue;
1988 
1989 		/*
1990 		 * If we went a few frames below the caller, ignore them
1991 		 */
1992 		for (fr = 3; fr > 0; fr--)
1993 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1994 				break;
1995 
1996 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1997 			(void) fprintf(out, "%53s  %-24s\n", "",
1998 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1999 			fr++;
2000 		}
2001 		(void) fprintf(out, "\n");
2002 	}
2003 
2004 	(void) fflush(out);
2005 }
2006