1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
28  * Copyright (c) 2011 by Delphix. All rights reserved.
29  */
30 
31 #include <stdlib.h>
32 #include <strings.h>
33 #include <errno.h>
34 #include <unistd.h>
35 #include <limits.h>
36 #include <assert.h>
37 #include <ctype.h>
38 #include <alloca.h>
39 #include <dt_impl.h>
40 
41 #define	DT_MASK_LO 0x00000000FFFFFFFFULL
42 
43 /*
44  * We declare this here because (1) we need it and (2) we want to avoid a
45  * dependency on libm in libdtrace.
46  */
47 static long double
48 dt_fabsl(long double x)
49 {
50 	if (x < 0)
51 		return (-x);
52 
53 	return (x);
54 }
55 
56 /*
57  * 128-bit arithmetic functions needed to support the stddev() aggregating
58  * action.
59  */
60 static int
61 dt_gt_128(uint64_t *a, uint64_t *b)
62 {
63 	return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
64 }
65 
66 static int
67 dt_ge_128(uint64_t *a, uint64_t *b)
68 {
69 	return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
70 }
71 
72 static int
73 dt_le_128(uint64_t *a, uint64_t *b)
74 {
75 	return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
76 }
77 
78 /*
79  * Shift the 128-bit value in a by b. If b is positive, shift left.
80  * If b is negative, shift right.
81  */
82 static void
83 dt_shift_128(uint64_t *a, int b)
84 {
85 	uint64_t mask;
86 
87 	if (b == 0)
88 		return;
89 
90 	if (b < 0) {
91 		b = -b;
92 		if (b >= 64) {
93 			a[0] = a[1] >> (b - 64);
94 			a[1] = 0;
95 		} else {
96 			a[0] >>= b;
97 			mask = 1LL << (64 - b);
98 			mask -= 1;
99 			a[0] |= ((a[1] & mask) << (64 - b));
100 			a[1] >>= b;
101 		}
102 	} else {
103 		if (b >= 64) {
104 			a[1] = a[0] << (b - 64);
105 			a[0] = 0;
106 		} else {
107 			a[1] <<= b;
108 			mask = a[0] >> (64 - b);
109 			a[1] |= mask;
110 			a[0] <<= b;
111 		}
112 	}
113 }
114 
115 static int
116 dt_nbits_128(uint64_t *a)
117 {
118 	int nbits = 0;
119 	uint64_t tmp[2];
120 	uint64_t zero[2] = { 0, 0 };
121 
122 	tmp[0] = a[0];
123 	tmp[1] = a[1];
124 
125 	dt_shift_128(tmp, -1);
126 	while (dt_gt_128(tmp, zero)) {
127 		dt_shift_128(tmp, -1);
128 		nbits++;
129 	}
130 
131 	return (nbits);
132 }
133 
134 static void
135 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
136 {
137 	uint64_t result[2];
138 
139 	result[0] = minuend[0] - subtrahend[0];
140 	result[1] = minuend[1] - subtrahend[1] -
141 	    (minuend[0] < subtrahend[0] ? 1 : 0);
142 
143 	difference[0] = result[0];
144 	difference[1] = result[1];
145 }
146 
147 static void
148 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
149 {
150 	uint64_t result[2];
151 
152 	result[0] = addend1[0] + addend2[0];
153 	result[1] = addend1[1] + addend2[1] +
154 	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
155 
156 	sum[0] = result[0];
157 	sum[1] = result[1];
158 }
159 
160 /*
161  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
162  * use native multiplication on those, and then re-combine into the
163  * resulting 128-bit value.
164  *
165  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
166  *     hi1 * hi2 << 64 +
167  *     hi1 * lo2 << 32 +
168  *     hi2 * lo1 << 32 +
169  *     lo1 * lo2
170  */
171 static void
172 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
173 {
174 	uint64_t hi1, hi2, lo1, lo2;
175 	uint64_t tmp[2];
176 
177 	hi1 = factor1 >> 32;
178 	hi2 = factor2 >> 32;
179 
180 	lo1 = factor1 & DT_MASK_LO;
181 	lo2 = factor2 & DT_MASK_LO;
182 
183 	product[0] = lo1 * lo2;
184 	product[1] = hi1 * hi2;
185 
186 	tmp[0] = hi1 * lo2;
187 	tmp[1] = 0;
188 	dt_shift_128(tmp, 32);
189 	dt_add_128(product, tmp, product);
190 
191 	tmp[0] = hi2 * lo1;
192 	tmp[1] = 0;
193 	dt_shift_128(tmp, 32);
194 	dt_add_128(product, tmp, product);
195 }
196 
197 /*
198  * This is long-hand division.
199  *
200  * We initialize subtrahend by shifting divisor left as far as possible. We
201  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
202  * subtract and set the appropriate bit in the result.  We then shift
203  * subtrahend right by one bit for the next comparison.
204  */
205 static void
206 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
207 {
208 	uint64_t result[2] = { 0, 0 };
209 	uint64_t remainder[2];
210 	uint64_t subtrahend[2];
211 	uint64_t divisor_128[2];
212 	uint64_t mask[2] = { 1, 0 };
213 	int log = 0;
214 
215 	assert(divisor != 0);
216 
217 	divisor_128[0] = divisor;
218 	divisor_128[1] = 0;
219 
220 	remainder[0] = dividend[0];
221 	remainder[1] = dividend[1];
222 
223 	subtrahend[0] = divisor;
224 	subtrahend[1] = 0;
225 
226 	while (divisor > 0) {
227 		log++;
228 		divisor >>= 1;
229 	}
230 
231 	dt_shift_128(subtrahend, 128 - log);
232 	dt_shift_128(mask, 128 - log);
233 
234 	while (dt_ge_128(remainder, divisor_128)) {
235 		if (dt_ge_128(remainder, subtrahend)) {
236 			dt_subtract_128(remainder, subtrahend, remainder);
237 			result[0] |= mask[0];
238 			result[1] |= mask[1];
239 		}
240 
241 		dt_shift_128(subtrahend, -1);
242 		dt_shift_128(mask, -1);
243 	}
244 
245 	quotient[0] = result[0];
246 	quotient[1] = result[1];
247 }
248 
249 /*
250  * This is the long-hand method of calculating a square root.
251  * The algorithm is as follows:
252  *
253  * 1. Group the digits by 2 from the right.
254  * 2. Over the leftmost group, find the largest single-digit number
255  *    whose square is less than that group.
256  * 3. Subtract the result of the previous step (2 or 4, depending) and
257  *    bring down the next two-digit group.
258  * 4. For the result R we have so far, find the largest single-digit number
259  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
260  *    (Note that this is doubling R and performing a decimal left-shift by 1
261  *    and searching for the appropriate decimal to fill the one's place.)
262  *    The value x is the next digit in the square root.
263  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
264  * dealing with integers, so the above is sufficient.)
265  *
266  * In decimal, the square root of 582,734 would be calculated as so:
267  *
268  *     __7__6__3
269  *    | 58 27 34
270  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
271  *      --
272  *       9 27    (Subtract and bring down the next group.)
273  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
274  *      -----     the square root)
275  *         51 34 (Subtract and bring down the next group.)
276  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
277  *         -----  the square root)
278  *          5 65 (remainder)
279  *
280  * The above algorithm applies similarly in binary, but note that the
281  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
282  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
283  * preceding difference?
284  *
285  * In binary, the square root of 11011011 would be calculated as so:
286  *
287  *     __1__1__1__0
288  *    | 11 01 10 11
289  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
290  *      --
291  *      10 01 10 11
292  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
293  *      -----
294  *       1 00 10 11
295  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
296  *       -------
297  *          1 01 11
298  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
299  *
300  */
301 static uint64_t
302 dt_sqrt_128(uint64_t *square)
303 {
304 	uint64_t result[2] = { 0, 0 };
305 	uint64_t diff[2] = { 0, 0 };
306 	uint64_t one[2] = { 1, 0 };
307 	uint64_t next_pair[2];
308 	uint64_t next_try[2];
309 	uint64_t bit_pairs, pair_shift;
310 	int i;
311 
312 	bit_pairs = dt_nbits_128(square) / 2;
313 	pair_shift = bit_pairs * 2;
314 
315 	for (i = 0; i <= bit_pairs; i++) {
316 		/*
317 		 * Bring down the next pair of bits.
318 		 */
319 		next_pair[0] = square[0];
320 		next_pair[1] = square[1];
321 		dt_shift_128(next_pair, -pair_shift);
322 		next_pair[0] &= 0x3;
323 		next_pair[1] = 0;
324 
325 		dt_shift_128(diff, 2);
326 		dt_add_128(diff, next_pair, diff);
327 
328 		/*
329 		 * next_try = R << 2 + 1
330 		 */
331 		next_try[0] = result[0];
332 		next_try[1] = result[1];
333 		dt_shift_128(next_try, 2);
334 		dt_add_128(next_try, one, next_try);
335 
336 		if (dt_le_128(next_try, diff)) {
337 			dt_subtract_128(diff, next_try, diff);
338 			dt_shift_128(result, 1);
339 			dt_add_128(result, one, result);
340 		} else {
341 			dt_shift_128(result, 1);
342 		}
343 
344 		pair_shift -= 2;
345 	}
346 
347 	assert(result[1] == 0);
348 
349 	return (result[0]);
350 }
351 
352 uint64_t
353 dt_stddev(uint64_t *data, uint64_t normal)
354 {
355 	uint64_t avg_of_squares[2];
356 	uint64_t square_of_avg[2];
357 	int64_t norm_avg;
358 	uint64_t diff[2];
359 
360 	/*
361 	 * The standard approximation for standard deviation is
362 	 * sqrt(average(x**2) - average(x)**2), i.e. the square root
363 	 * of the average of the squares minus the square of the average.
364 	 */
365 	dt_divide_128(data + 2, normal, avg_of_squares);
366 	dt_divide_128(avg_of_squares, data[0], avg_of_squares);
367 
368 	norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
369 
370 	if (norm_avg < 0)
371 		norm_avg = -norm_avg;
372 
373 	dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
374 
375 	dt_subtract_128(avg_of_squares, square_of_avg, diff);
376 
377 	return (dt_sqrt_128(diff));
378 }
379 
380 static int
381 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
382     dtrace_bufdesc_t *buf, size_t offs)
383 {
384 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
385 	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
386 	char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
387 	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
388 	const char *str = NULL;
389 	static const char *e_str[2] = { " -> ", " => " };
390 	static const char *r_str[2] = { " <- ", " <= " };
391 	static const char *ent = "entry", *ret = "return";
392 	static int entlen = 0, retlen = 0;
393 	dtrace_epid_t next, id = epd->dtepd_epid;
394 	int rval;
395 
396 	if (entlen == 0) {
397 		assert(retlen == 0);
398 		entlen = strlen(ent);
399 		retlen = strlen(ret);
400 	}
401 
402 	/*
403 	 * If the name of the probe is "entry" or ends with "-entry", we
404 	 * treat it as an entry; if it is "return" or ends with "-return",
405 	 * we treat it as a return.  (This allows application-provided probes
406 	 * like "method-entry" or "function-entry" to participate in flow
407 	 * indentation -- without accidentally misinterpreting popular probe
408 	 * names like "carpentry", "gentry" or "Coventry".)
409 	 */
410 	if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
411 	    (sub == n || sub[-1] == '-')) {
412 		flow = DTRACEFLOW_ENTRY;
413 		str = e_str[strcmp(p, "syscall") == 0];
414 	} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
415 	    (sub == n || sub[-1] == '-')) {
416 		flow = DTRACEFLOW_RETURN;
417 		str = r_str[strcmp(p, "syscall") == 0];
418 	}
419 
420 	/*
421 	 * If we're going to indent this, we need to check the ID of our last
422 	 * call.  If we're looking at the same probe ID but a different EPID,
423 	 * we _don't_ want to indent.  (Yes, there are some minor holes in
424 	 * this scheme -- it's a heuristic.)
425 	 */
426 	if (flow == DTRACEFLOW_ENTRY) {
427 		if ((last != DTRACE_EPIDNONE && id != last &&
428 		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
429 			flow = DTRACEFLOW_NONE;
430 	}
431 
432 	/*
433 	 * If we're going to unindent this, it's more difficult to see if
434 	 * we don't actually want to unindent it -- we need to look at the
435 	 * _next_ EPID.
436 	 */
437 	if (flow == DTRACEFLOW_RETURN) {
438 		offs += epd->dtepd_size;
439 
440 		do {
441 			if (offs >= buf->dtbd_size) {
442 				/*
443 				 * We're at the end -- maybe.  If the oldest
444 				 * record is non-zero, we need to wrap.
445 				 */
446 				if (buf->dtbd_oldest != 0) {
447 					offs = 0;
448 				} else {
449 					goto out;
450 				}
451 			}
452 
453 			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
454 
455 			if (next == DTRACE_EPIDNONE)
456 				offs += sizeof (id);
457 		} while (next == DTRACE_EPIDNONE);
458 
459 		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
460 			return (rval);
461 
462 		if (next != id && npd->dtpd_id == pd->dtpd_id)
463 			flow = DTRACEFLOW_NONE;
464 	}
465 
466 out:
467 	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
468 		data->dtpda_prefix = str;
469 	} else {
470 		data->dtpda_prefix = "| ";
471 	}
472 
473 	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
474 		data->dtpda_indent -= 2;
475 
476 	data->dtpda_flow = flow;
477 
478 	return (0);
479 }
480 
481 static int
482 dt_nullprobe()
483 {
484 	return (DTRACE_CONSUME_THIS);
485 }
486 
487 static int
488 dt_nullrec()
489 {
490 	return (DTRACE_CONSUME_NEXT);
491 }
492 
493 int
494 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
495     uint64_t normal, long double total, char positives, char negatives)
496 {
497 	long double f;
498 	uint_t depth, len = 40;
499 
500 	const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
501 	const char *spaces = "                                        ";
502 
503 	assert(strlen(ats) == len && strlen(spaces) == len);
504 	assert(!(total == 0 && (positives || negatives)));
505 	assert(!(val < 0 && !negatives));
506 	assert(!(val > 0 && !positives));
507 	assert(!(val != 0 && total == 0));
508 
509 	if (!negatives) {
510 		if (positives) {
511 			f = (dt_fabsl((long double)val) * len) / total;
512 			depth = (uint_t)(f + 0.5);
513 		} else {
514 			depth = 0;
515 		}
516 
517 		return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
518 		    spaces + depth, (long long)val / normal));
519 	}
520 
521 	if (!positives) {
522 		f = (dt_fabsl((long double)val) * len) / total;
523 		depth = (uint_t)(f + 0.5);
524 
525 		return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
526 		    ats + len - depth, (long long)val / normal));
527 	}
528 
529 	/*
530 	 * If we're here, we have both positive and negative bucket values.
531 	 * To express this graphically, we're going to generate both positive
532 	 * and negative bars separated by a centerline.  These bars are half
533 	 * the size of normal quantize()/lquantize() bars, so we divide the
534 	 * length in half before calculating the bar length.
535 	 */
536 	len /= 2;
537 	ats = &ats[len];
538 	spaces = &spaces[len];
539 
540 	f = (dt_fabsl((long double)val) * len) / total;
541 	depth = (uint_t)(f + 0.5);
542 
543 	if (val <= 0) {
544 		return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
545 		    ats + len - depth, len, "", (long long)val / normal));
546 	} else {
547 		return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
548 		    ats + len - depth, spaces + depth,
549 		    (long long)val / normal));
550 	}
551 }
552 
553 int
554 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
555     size_t size, uint64_t normal)
556 {
557 	const int64_t *data = addr;
558 	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
559 	long double total = 0;
560 	char positives = 0, negatives = 0;
561 
562 	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
563 		return (dt_set_errno(dtp, EDT_DMISMATCH));
564 
565 	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
566 		first_bin++;
567 
568 	if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
569 		/*
570 		 * There isn't any data.  This is possible if (and only if)
571 		 * negative increment values have been used.  In this case,
572 		 * we'll print the buckets around 0.
573 		 */
574 		first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
575 		last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
576 	} else {
577 		if (first_bin > 0)
578 			first_bin--;
579 
580 		while (last_bin > 0 && data[last_bin] == 0)
581 			last_bin--;
582 
583 		if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
584 			last_bin++;
585 	}
586 
587 	for (i = first_bin; i <= last_bin; i++) {
588 		positives |= (data[i] > 0);
589 		negatives |= (data[i] < 0);
590 		total += dt_fabsl((long double)data[i]);
591 	}
592 
593 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
594 	    "------------- Distribution -------------", "count") < 0)
595 		return (-1);
596 
597 	for (i = first_bin; i <= last_bin; i++) {
598 		if (dt_printf(dtp, fp, "%16lld ",
599 		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
600 			return (-1);
601 
602 		if (dt_print_quantline(dtp, fp, data[i], normal, total,
603 		    positives, negatives) < 0)
604 			return (-1);
605 	}
606 
607 	return (0);
608 }
609 
610 int
611 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
612     size_t size, uint64_t normal)
613 {
614 	const int64_t *data = addr;
615 	int i, first_bin, last_bin, base;
616 	uint64_t arg;
617 	long double total = 0;
618 	uint16_t step, levels;
619 	char positives = 0, negatives = 0;
620 
621 	if (size < sizeof (uint64_t))
622 		return (dt_set_errno(dtp, EDT_DMISMATCH));
623 
624 	arg = *data++;
625 	size -= sizeof (uint64_t);
626 
627 	base = DTRACE_LQUANTIZE_BASE(arg);
628 	step = DTRACE_LQUANTIZE_STEP(arg);
629 	levels = DTRACE_LQUANTIZE_LEVELS(arg);
630 
631 	first_bin = 0;
632 	last_bin = levels + 1;
633 
634 	if (size != sizeof (uint64_t) * (levels + 2))
635 		return (dt_set_errno(dtp, EDT_DMISMATCH));
636 
637 	while (first_bin <= levels + 1 && data[first_bin] == 0)
638 		first_bin++;
639 
640 	if (first_bin > levels + 1) {
641 		first_bin = 0;
642 		last_bin = 2;
643 	} else {
644 		if (first_bin > 0)
645 			first_bin--;
646 
647 		while (last_bin > 0 && data[last_bin] == 0)
648 			last_bin--;
649 
650 		if (last_bin < levels + 1)
651 			last_bin++;
652 	}
653 
654 	for (i = first_bin; i <= last_bin; i++) {
655 		positives |= (data[i] > 0);
656 		negatives |= (data[i] < 0);
657 		total += dt_fabsl((long double)data[i]);
658 	}
659 
660 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
661 	    "------------- Distribution -------------", "count") < 0)
662 		return (-1);
663 
664 	for (i = first_bin; i <= last_bin; i++) {
665 		char c[32];
666 		int err;
667 
668 		if (i == 0) {
669 			(void) snprintf(c, sizeof (c), "< %d",
670 			    base / (uint32_t)normal);
671 			err = dt_printf(dtp, fp, "%16s ", c);
672 		} else if (i == levels + 1) {
673 			(void) snprintf(c, sizeof (c), ">= %d",
674 			    base + (levels * step));
675 			err = dt_printf(dtp, fp, "%16s ", c);
676 		} else {
677 			err = dt_printf(dtp, fp, "%16d ",
678 			    base + (i - 1) * step);
679 		}
680 
681 		if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
682 		    total, positives, negatives) < 0)
683 			return (-1);
684 	}
685 
686 	return (0);
687 }
688 
689 int
690 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
691     size_t size, uint64_t normal)
692 {
693 	int i, first_bin, last_bin, bin = 1, order, levels;
694 	uint16_t factor, low, high, nsteps;
695 	const int64_t *data = addr;
696 	int64_t value = 1, next, step;
697 	char positives = 0, negatives = 0;
698 	long double total = 0;
699 	uint64_t arg;
700 	char c[32];
701 
702 	if (size < sizeof (uint64_t))
703 		return (dt_set_errno(dtp, EDT_DMISMATCH));
704 
705 	arg = *data++;
706 	size -= sizeof (uint64_t);
707 
708 	factor = DTRACE_LLQUANTIZE_FACTOR(arg);
709 	low = DTRACE_LLQUANTIZE_LOW(arg);
710 	high = DTRACE_LLQUANTIZE_HIGH(arg);
711 	nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
712 
713 	/*
714 	 * We don't expect to be handed invalid llquantize() parameters here,
715 	 * but sanity check them (to a degree) nonetheless.
716 	 */
717 	if (size > INT32_MAX || factor < 2 || low >= high ||
718 	    nsteps == 0 || factor > nsteps)
719 		return (dt_set_errno(dtp, EDT_DMISMATCH));
720 
721 	levels = (int)size / sizeof (uint64_t);
722 
723 	first_bin = 0;
724 	last_bin = levels - 1;
725 
726 	while (first_bin < levels && data[first_bin] == 0)
727 		first_bin++;
728 
729 	if (first_bin == levels) {
730 		first_bin = 0;
731 		last_bin = 1;
732 	} else {
733 		if (first_bin > 0)
734 			first_bin--;
735 
736 		while (last_bin > 0 && data[last_bin] == 0)
737 			last_bin--;
738 
739 		if (last_bin < levels - 1)
740 			last_bin++;
741 	}
742 
743 	for (i = first_bin; i <= last_bin; i++) {
744 		positives |= (data[i] > 0);
745 		negatives |= (data[i] < 0);
746 		total += dt_fabsl((long double)data[i]);
747 	}
748 
749 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
750 	    "------------- Distribution -------------", "count") < 0)
751 		return (-1);
752 
753 	for (order = 0; order < low; order++)
754 		value *= factor;
755 
756 	next = value * factor;
757 	step = next > nsteps ? next / nsteps : 1;
758 
759 	if (first_bin == 0) {
760 		(void) snprintf(c, sizeof (c), "< %lld", value);
761 
762 		if (dt_printf(dtp, fp, "%16s ", c) < 0)
763 			return (-1);
764 
765 		if (dt_print_quantline(dtp, fp, data[0], normal,
766 		    total, positives, negatives) < 0)
767 			return (-1);
768 	}
769 
770 	while (order <= high) {
771 		if (bin >= first_bin && bin <= last_bin) {
772 			if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
773 				return (-1);
774 
775 			if (dt_print_quantline(dtp, fp, data[bin],
776 			    normal, total, positives, negatives) < 0)
777 				return (-1);
778 		}
779 
780 		assert(value < next);
781 		bin++;
782 
783 		if ((value += step) != next)
784 			continue;
785 
786 		next = value * factor;
787 		step = next > nsteps ? next / nsteps : 1;
788 		order++;
789 	}
790 
791 	if (last_bin < bin)
792 		return (0);
793 
794 	assert(last_bin == bin);
795 	(void) snprintf(c, sizeof (c), ">= %lld", value);
796 
797 	if (dt_printf(dtp, fp, "%16s ", c) < 0)
798 		return (-1);
799 
800 	return (dt_print_quantline(dtp, fp, data[bin], normal,
801 	    total, positives, negatives));
802 }
803 
804 /*ARGSUSED*/
805 static int
806 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
807     size_t size, uint64_t normal)
808 {
809 	/* LINTED - alignment */
810 	int64_t *data = (int64_t *)addr;
811 
812 	return (dt_printf(dtp, fp, " %16lld", data[0] ?
813 	    (long long)(data[1] / (int64_t)normal / data[0]) : 0));
814 }
815 
816 /*ARGSUSED*/
817 static int
818 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
819     size_t size, uint64_t normal)
820 {
821 	/* LINTED - alignment */
822 	uint64_t *data = (uint64_t *)addr;
823 
824 	return (dt_printf(dtp, fp, " %16llu", data[0] ?
825 	    (unsigned long long) dt_stddev(data, normal) : 0));
826 }
827 
828 /*ARGSUSED*/
829 int
830 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
831     size_t nbytes, int width, int quiet, int forceraw)
832 {
833 	/*
834 	 * If the byte stream is a series of printable characters, followed by
835 	 * a terminating byte, we print it out as a string.  Otherwise, we
836 	 * assume that it's something else and just print the bytes.
837 	 */
838 	int i, j, margin = 5;
839 	char *c = (char *)addr;
840 
841 	if (nbytes == 0)
842 		return (0);
843 
844 	if (forceraw)
845 		goto raw;
846 
847 	if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
848 		goto raw;
849 
850 	for (i = 0; i < nbytes; i++) {
851 		/*
852 		 * We define a "printable character" to be one for which
853 		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
854 		 * or a character which is either backspace or the bell.
855 		 * Backspace and the bell are regrettably special because
856 		 * they fail the first two tests -- and yet they are entirely
857 		 * printable.  These are the only two control characters that
858 		 * have meaning for the terminal and for which isprint(3C) and
859 		 * isspace(3C) return 0.
860 		 */
861 		if (isprint(c[i]) || isspace(c[i]) ||
862 		    c[i] == '\b' || c[i] == '\a')
863 			continue;
864 
865 		if (c[i] == '\0' && i > 0) {
866 			/*
867 			 * This looks like it might be a string.  Before we
868 			 * assume that it is indeed a string, check the
869 			 * remainder of the byte range; if it contains
870 			 * additional non-nul characters, we'll assume that
871 			 * it's a binary stream that just happens to look like
872 			 * a string, and we'll print out the individual bytes.
873 			 */
874 			for (j = i + 1; j < nbytes; j++) {
875 				if (c[j] != '\0')
876 					break;
877 			}
878 
879 			if (j != nbytes)
880 				break;
881 
882 			if (quiet)
883 				return (dt_printf(dtp, fp, "%s", c));
884 			else
885 				return (dt_printf(dtp, fp, "  %-*s", width, c));
886 		}
887 
888 		break;
889 	}
890 
891 	if (i == nbytes) {
892 		/*
893 		 * The byte range is all printable characters, but there is
894 		 * no trailing nul byte.  We'll assume that it's a string and
895 		 * print it as such.
896 		 */
897 		char *s = alloca(nbytes + 1);
898 		bcopy(c, s, nbytes);
899 		s[nbytes] = '\0';
900 		return (dt_printf(dtp, fp, "  %-*s", width, s));
901 	}
902 
903 raw:
904 	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
905 		return (-1);
906 
907 	for (i = 0; i < 16; i++)
908 		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
909 			return (-1);
910 
911 	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
912 		return (-1);
913 
914 
915 	for (i = 0; i < nbytes; i += 16) {
916 		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
917 			return (-1);
918 
919 		for (j = i; j < i + 16 && j < nbytes; j++) {
920 			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
921 				return (-1);
922 		}
923 
924 		while (j++ % 16) {
925 			if (dt_printf(dtp, fp, "   ") < 0)
926 				return (-1);
927 		}
928 
929 		if (dt_printf(dtp, fp, "  ") < 0)
930 			return (-1);
931 
932 		for (j = i; j < i + 16 && j < nbytes; j++) {
933 			if (dt_printf(dtp, fp, "%c",
934 			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
935 				return (-1);
936 		}
937 
938 		if (dt_printf(dtp, fp, "\n") < 0)
939 			return (-1);
940 	}
941 
942 	return (0);
943 }
944 
945 int
946 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
947     caddr_t addr, int depth, int size)
948 {
949 	dtrace_syminfo_t dts;
950 	GElf_Sym sym;
951 	int i, indent;
952 	char c[PATH_MAX * 2];
953 	uint64_t pc;
954 
955 	if (dt_printf(dtp, fp, "\n") < 0)
956 		return (-1);
957 
958 	if (format == NULL)
959 		format = "%s";
960 
961 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
962 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
963 	else
964 		indent = _dtrace_stkindent;
965 
966 	for (i = 0; i < depth; i++) {
967 		switch (size) {
968 		case sizeof (uint32_t):
969 			/* LINTED - alignment */
970 			pc = *((uint32_t *)addr);
971 			break;
972 
973 		case sizeof (uint64_t):
974 			/* LINTED - alignment */
975 			pc = *((uint64_t *)addr);
976 			break;
977 
978 		default:
979 			return (dt_set_errno(dtp, EDT_BADSTACKPC));
980 		}
981 
982 		if (pc == NULL)
983 			break;
984 
985 		addr += size;
986 
987 		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
988 			return (-1);
989 
990 		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
991 			if (pc > sym.st_value) {
992 				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
993 				    dts.dts_object, dts.dts_name,
994 				    pc - sym.st_value);
995 			} else {
996 				(void) snprintf(c, sizeof (c), "%s`%s",
997 				    dts.dts_object, dts.dts_name);
998 			}
999 		} else {
1000 			/*
1001 			 * We'll repeat the lookup, but this time we'll specify
1002 			 * a NULL GElf_Sym -- indicating that we're only
1003 			 * interested in the containing module.
1004 			 */
1005 			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1006 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1007 				    dts.dts_object, pc);
1008 			} else {
1009 				(void) snprintf(c, sizeof (c), "0x%llx", pc);
1010 			}
1011 		}
1012 
1013 		if (dt_printf(dtp, fp, format, c) < 0)
1014 			return (-1);
1015 
1016 		if (dt_printf(dtp, fp, "\n") < 0)
1017 			return (-1);
1018 	}
1019 
1020 	return (0);
1021 }
1022 
1023 int
1024 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1025     caddr_t addr, uint64_t arg)
1026 {
1027 	/* LINTED - alignment */
1028 	uint64_t *pc = (uint64_t *)addr;
1029 	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1030 	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1031 	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1032 	const char *str = strsize ? strbase : NULL;
1033 	int err = 0;
1034 
1035 	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1036 	struct ps_prochandle *P;
1037 	GElf_Sym sym;
1038 	int i, indent;
1039 	pid_t pid;
1040 
1041 	if (depth == 0)
1042 		return (0);
1043 
1044 	pid = (pid_t)*pc++;
1045 
1046 	if (dt_printf(dtp, fp, "\n") < 0)
1047 		return (-1);
1048 
1049 	if (format == NULL)
1050 		format = "%s";
1051 
1052 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1053 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1054 	else
1055 		indent = _dtrace_stkindent;
1056 
1057 	/*
1058 	 * Ultimately, we need to add an entry point in the library vector for
1059 	 * determining <symbol, offset> from <pid, address>.  For now, if
1060 	 * this is a vector open, we just print the raw address or string.
1061 	 */
1062 	if (dtp->dt_vector == NULL)
1063 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1064 	else
1065 		P = NULL;
1066 
1067 	if (P != NULL)
1068 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1069 
1070 	for (i = 0; i < depth && pc[i] != NULL; i++) {
1071 		const prmap_t *map;
1072 
1073 		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1074 			break;
1075 
1076 		if (P != NULL && Plookup_by_addr(P, pc[i],
1077 		    name, sizeof (name), &sym) == 0) {
1078 			(void) Pobjname(P, pc[i], objname, sizeof (objname));
1079 
1080 			if (pc[i] > sym.st_value) {
1081 				(void) snprintf(c, sizeof (c),
1082 				    "%s`%s+0x%llx", dt_basename(objname), name,
1083 				    (u_longlong_t)(pc[i] - sym.st_value));
1084 			} else {
1085 				(void) snprintf(c, sizeof (c),
1086 				    "%s`%s", dt_basename(objname), name);
1087 			}
1088 		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1089 		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1090 		    (map->pr_mflags & MA_WRITE)))) {
1091 			/*
1092 			 * If the current string pointer in the string table
1093 			 * does not point to an empty string _and_ the program
1094 			 * counter falls in a writable region, we'll use the
1095 			 * string from the string table instead of the raw
1096 			 * address.  This last condition is necessary because
1097 			 * some (broken) ustack helpers will return a string
1098 			 * even for a program counter that they can't
1099 			 * identify.  If we have a string for a program
1100 			 * counter that falls in a segment that isn't
1101 			 * writable, we assume that we have fallen into this
1102 			 * case and we refuse to use the string.
1103 			 */
1104 			(void) snprintf(c, sizeof (c), "%s", str);
1105 		} else {
1106 			if (P != NULL && Pobjname(P, pc[i], objname,
1107 			    sizeof (objname)) != NULL) {
1108 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1109 				    dt_basename(objname), (u_longlong_t)pc[i]);
1110 			} else {
1111 				(void) snprintf(c, sizeof (c), "0x%llx",
1112 				    (u_longlong_t)pc[i]);
1113 			}
1114 		}
1115 
1116 		if ((err = dt_printf(dtp, fp, format, c)) < 0)
1117 			break;
1118 
1119 		if ((err = dt_printf(dtp, fp, "\n")) < 0)
1120 			break;
1121 
1122 		if (str != NULL && str[0] == '@') {
1123 			/*
1124 			 * If the first character of the string is an "at" sign,
1125 			 * then the string is inferred to be an annotation --
1126 			 * and it is printed out beneath the frame and offset
1127 			 * with brackets.
1128 			 */
1129 			if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1130 				break;
1131 
1132 			(void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1133 
1134 			if ((err = dt_printf(dtp, fp, format, c)) < 0)
1135 				break;
1136 
1137 			if ((err = dt_printf(dtp, fp, "\n")) < 0)
1138 				break;
1139 		}
1140 
1141 		if (str != NULL) {
1142 			str += strlen(str) + 1;
1143 			if (str - strbase >= strsize)
1144 				str = NULL;
1145 		}
1146 	}
1147 
1148 	if (P != NULL) {
1149 		dt_proc_unlock(dtp, P);
1150 		dt_proc_release(dtp, P);
1151 	}
1152 
1153 	return (err);
1154 }
1155 
1156 static int
1157 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1158 {
1159 	/* LINTED - alignment */
1160 	uint64_t pid = ((uint64_t *)addr)[0];
1161 	/* LINTED - alignment */
1162 	uint64_t pc = ((uint64_t *)addr)[1];
1163 	const char *format = "  %-50s";
1164 	char *s;
1165 	int n, len = 256;
1166 
1167 	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1168 		struct ps_prochandle *P;
1169 
1170 		if ((P = dt_proc_grab(dtp, pid,
1171 		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1172 			GElf_Sym sym;
1173 
1174 			dt_proc_lock(dtp, P);
1175 
1176 			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1177 				pc = sym.st_value;
1178 
1179 			dt_proc_unlock(dtp, P);
1180 			dt_proc_release(dtp, P);
1181 		}
1182 	}
1183 
1184 	do {
1185 		n = len;
1186 		s = alloca(n);
1187 	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1188 
1189 	return (dt_printf(dtp, fp, format, s));
1190 }
1191 
1192 int
1193 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1194 {
1195 	/* LINTED - alignment */
1196 	uint64_t pid = ((uint64_t *)addr)[0];
1197 	/* LINTED - alignment */
1198 	uint64_t pc = ((uint64_t *)addr)[1];
1199 	int err = 0;
1200 
1201 	char objname[PATH_MAX], c[PATH_MAX * 2];
1202 	struct ps_prochandle *P;
1203 
1204 	if (format == NULL)
1205 		format = "  %-50s";
1206 
1207 	/*
1208 	 * See the comment in dt_print_ustack() for the rationale for
1209 	 * printing raw addresses in the vectored case.
1210 	 */
1211 	if (dtp->dt_vector == NULL)
1212 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1213 	else
1214 		P = NULL;
1215 
1216 	if (P != NULL)
1217 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1218 
1219 	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != NULL) {
1220 		(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1221 	} else {
1222 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1223 	}
1224 
1225 	err = dt_printf(dtp, fp, format, c);
1226 
1227 	if (P != NULL) {
1228 		dt_proc_unlock(dtp, P);
1229 		dt_proc_release(dtp, P);
1230 	}
1231 
1232 	return (err);
1233 }
1234 
1235 static int
1236 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1237 {
1238 	/* LINTED - alignment */
1239 	uint64_t pc = *((uint64_t *)addr);
1240 	dtrace_syminfo_t dts;
1241 	GElf_Sym sym;
1242 	char c[PATH_MAX * 2];
1243 
1244 	if (format == NULL)
1245 		format = "  %-50s";
1246 
1247 	if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1248 		(void) snprintf(c, sizeof (c), "%s`%s",
1249 		    dts.dts_object, dts.dts_name);
1250 	} else {
1251 		/*
1252 		 * We'll repeat the lookup, but this time we'll specify a
1253 		 * NULL GElf_Sym -- indicating that we're only interested in
1254 		 * the containing module.
1255 		 */
1256 		if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1257 			(void) snprintf(c, sizeof (c), "%s`0x%llx",
1258 			    dts.dts_object, (u_longlong_t)pc);
1259 		} else {
1260 			(void) snprintf(c, sizeof (c), "0x%llx",
1261 			    (u_longlong_t)pc);
1262 		}
1263 	}
1264 
1265 	if (dt_printf(dtp, fp, format, c) < 0)
1266 		return (-1);
1267 
1268 	return (0);
1269 }
1270 
1271 int
1272 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1273 {
1274 	/* LINTED - alignment */
1275 	uint64_t pc = *((uint64_t *)addr);
1276 	dtrace_syminfo_t dts;
1277 	char c[PATH_MAX * 2];
1278 
1279 	if (format == NULL)
1280 		format = "  %-50s";
1281 
1282 	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1283 		(void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1284 	} else {
1285 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1286 	}
1287 
1288 	if (dt_printf(dtp, fp, format, c) < 0)
1289 		return (-1);
1290 
1291 	return (0);
1292 }
1293 
1294 typedef struct dt_normal {
1295 	dtrace_aggvarid_t dtnd_id;
1296 	uint64_t dtnd_normal;
1297 } dt_normal_t;
1298 
1299 static int
1300 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1301 {
1302 	dt_normal_t *normal = arg;
1303 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1304 	dtrace_aggvarid_t id = normal->dtnd_id;
1305 
1306 	if (agg->dtagd_nrecs == 0)
1307 		return (DTRACE_AGGWALK_NEXT);
1308 
1309 	if (agg->dtagd_varid != id)
1310 		return (DTRACE_AGGWALK_NEXT);
1311 
1312 	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1313 	return (DTRACE_AGGWALK_NORMALIZE);
1314 }
1315 
1316 static int
1317 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1318 {
1319 	dt_normal_t normal;
1320 	caddr_t addr;
1321 
1322 	/*
1323 	 * We (should) have two records:  the aggregation ID followed by the
1324 	 * normalization value.
1325 	 */
1326 	addr = base + rec->dtrd_offset;
1327 
1328 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1329 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1330 
1331 	/* LINTED - alignment */
1332 	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1333 	rec++;
1334 
1335 	if (rec->dtrd_action != DTRACEACT_LIBACT)
1336 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1337 
1338 	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1339 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1340 
1341 	addr = base + rec->dtrd_offset;
1342 
1343 	switch (rec->dtrd_size) {
1344 	case sizeof (uint64_t):
1345 		/* LINTED - alignment */
1346 		normal.dtnd_normal = *((uint64_t *)addr);
1347 		break;
1348 	case sizeof (uint32_t):
1349 		/* LINTED - alignment */
1350 		normal.dtnd_normal = *((uint32_t *)addr);
1351 		break;
1352 	case sizeof (uint16_t):
1353 		/* LINTED - alignment */
1354 		normal.dtnd_normal = *((uint16_t *)addr);
1355 		break;
1356 	case sizeof (uint8_t):
1357 		normal.dtnd_normal = *((uint8_t *)addr);
1358 		break;
1359 	default:
1360 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1361 	}
1362 
1363 	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1364 
1365 	return (0);
1366 }
1367 
1368 static int
1369 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1370 {
1371 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1372 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1373 
1374 	if (agg->dtagd_nrecs == 0)
1375 		return (DTRACE_AGGWALK_NEXT);
1376 
1377 	if (agg->dtagd_varid != id)
1378 		return (DTRACE_AGGWALK_NEXT);
1379 
1380 	return (DTRACE_AGGWALK_DENORMALIZE);
1381 }
1382 
1383 static int
1384 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1385 {
1386 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1387 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1388 
1389 	if (agg->dtagd_nrecs == 0)
1390 		return (DTRACE_AGGWALK_NEXT);
1391 
1392 	if (agg->dtagd_varid != id)
1393 		return (DTRACE_AGGWALK_NEXT);
1394 
1395 	return (DTRACE_AGGWALK_CLEAR);
1396 }
1397 
1398 typedef struct dt_trunc {
1399 	dtrace_aggvarid_t dttd_id;
1400 	uint64_t dttd_remaining;
1401 } dt_trunc_t;
1402 
1403 static int
1404 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1405 {
1406 	dt_trunc_t *trunc = arg;
1407 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1408 	dtrace_aggvarid_t id = trunc->dttd_id;
1409 
1410 	if (agg->dtagd_nrecs == 0)
1411 		return (DTRACE_AGGWALK_NEXT);
1412 
1413 	if (agg->dtagd_varid != id)
1414 		return (DTRACE_AGGWALK_NEXT);
1415 
1416 	if (trunc->dttd_remaining == 0)
1417 		return (DTRACE_AGGWALK_REMOVE);
1418 
1419 	trunc->dttd_remaining--;
1420 	return (DTRACE_AGGWALK_NEXT);
1421 }
1422 
1423 static int
1424 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1425 {
1426 	dt_trunc_t trunc;
1427 	caddr_t addr;
1428 	int64_t remaining;
1429 	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1430 
1431 	/*
1432 	 * We (should) have two records:  the aggregation ID followed by the
1433 	 * number of aggregation entries after which the aggregation is to be
1434 	 * truncated.
1435 	 */
1436 	addr = base + rec->dtrd_offset;
1437 
1438 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1439 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1440 
1441 	/* LINTED - alignment */
1442 	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1443 	rec++;
1444 
1445 	if (rec->dtrd_action != DTRACEACT_LIBACT)
1446 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1447 
1448 	if (rec->dtrd_arg != DT_ACT_TRUNC)
1449 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1450 
1451 	addr = base + rec->dtrd_offset;
1452 
1453 	switch (rec->dtrd_size) {
1454 	case sizeof (uint64_t):
1455 		/* LINTED - alignment */
1456 		remaining = *((int64_t *)addr);
1457 		break;
1458 	case sizeof (uint32_t):
1459 		/* LINTED - alignment */
1460 		remaining = *((int32_t *)addr);
1461 		break;
1462 	case sizeof (uint16_t):
1463 		/* LINTED - alignment */
1464 		remaining = *((int16_t *)addr);
1465 		break;
1466 	case sizeof (uint8_t):
1467 		remaining = *((int8_t *)addr);
1468 		break;
1469 	default:
1470 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1471 	}
1472 
1473 	if (remaining < 0) {
1474 		func = dtrace_aggregate_walk_valsorted;
1475 		remaining = -remaining;
1476 	} else {
1477 		func = dtrace_aggregate_walk_valrevsorted;
1478 	}
1479 
1480 	assert(remaining >= 0);
1481 	trunc.dttd_remaining = remaining;
1482 
1483 	(void) func(dtp, dt_trunc_agg, &trunc);
1484 
1485 	return (0);
1486 }
1487 
1488 static int
1489 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1490     caddr_t addr, size_t size, uint64_t normal)
1491 {
1492 	int err;
1493 	dtrace_actkind_t act = rec->dtrd_action;
1494 
1495 	switch (act) {
1496 	case DTRACEACT_STACK:
1497 		return (dt_print_stack(dtp, fp, NULL, addr,
1498 		    rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1499 
1500 	case DTRACEACT_USTACK:
1501 	case DTRACEACT_JSTACK:
1502 		return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1503 
1504 	case DTRACEACT_USYM:
1505 	case DTRACEACT_UADDR:
1506 		return (dt_print_usym(dtp, fp, addr, act));
1507 
1508 	case DTRACEACT_UMOD:
1509 		return (dt_print_umod(dtp, fp, NULL, addr));
1510 
1511 	case DTRACEACT_SYM:
1512 		return (dt_print_sym(dtp, fp, NULL, addr));
1513 
1514 	case DTRACEACT_MOD:
1515 		return (dt_print_mod(dtp, fp, NULL, addr));
1516 
1517 	case DTRACEAGG_QUANTIZE:
1518 		return (dt_print_quantize(dtp, fp, addr, size, normal));
1519 
1520 	case DTRACEAGG_LQUANTIZE:
1521 		return (dt_print_lquantize(dtp, fp, addr, size, normal));
1522 
1523 	case DTRACEAGG_LLQUANTIZE:
1524 		return (dt_print_llquantize(dtp, fp, addr, size, normal));
1525 
1526 	case DTRACEAGG_AVG:
1527 		return (dt_print_average(dtp, fp, addr, size, normal));
1528 
1529 	case DTRACEAGG_STDDEV:
1530 		return (dt_print_stddev(dtp, fp, addr, size, normal));
1531 
1532 	default:
1533 		break;
1534 	}
1535 
1536 	switch (size) {
1537 	case sizeof (uint64_t):
1538 		err = dt_printf(dtp, fp, " %16lld",
1539 		    /* LINTED - alignment */
1540 		    (long long)*((uint64_t *)addr) / normal);
1541 		break;
1542 	case sizeof (uint32_t):
1543 		/* LINTED - alignment */
1544 		err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1545 		    (uint32_t)normal);
1546 		break;
1547 	case sizeof (uint16_t):
1548 		/* LINTED - alignment */
1549 		err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1550 		    (uint32_t)normal);
1551 		break;
1552 	case sizeof (uint8_t):
1553 		err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1554 		    (uint32_t)normal);
1555 		break;
1556 	default:
1557 		err = dt_print_bytes(dtp, fp, addr, size, 50, 0, 0);
1558 		break;
1559 	}
1560 
1561 	return (err);
1562 }
1563 
1564 int
1565 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1566 {
1567 	int i, aggact = 0;
1568 	dt_print_aggdata_t *pd = arg;
1569 	const dtrace_aggdata_t *aggdata = aggsdata[0];
1570 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1571 	FILE *fp = pd->dtpa_fp;
1572 	dtrace_hdl_t *dtp = pd->dtpa_dtp;
1573 	dtrace_recdesc_t *rec;
1574 	dtrace_actkind_t act;
1575 	caddr_t addr;
1576 	size_t size;
1577 
1578 	/*
1579 	 * Iterate over each record description in the key, printing the traced
1580 	 * data, skipping the first datum (the tuple member created by the
1581 	 * compiler).
1582 	 */
1583 	for (i = 1; i < agg->dtagd_nrecs; i++) {
1584 		rec = &agg->dtagd_rec[i];
1585 		act = rec->dtrd_action;
1586 		addr = aggdata->dtada_data + rec->dtrd_offset;
1587 		size = rec->dtrd_size;
1588 
1589 		if (DTRACEACT_ISAGG(act)) {
1590 			aggact = i;
1591 			break;
1592 		}
1593 
1594 		if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1595 			return (-1);
1596 
1597 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1598 		    DTRACE_BUFDATA_AGGKEY) < 0)
1599 			return (-1);
1600 	}
1601 
1602 	assert(aggact != 0);
1603 
1604 	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1605 		uint64_t normal;
1606 
1607 		aggdata = aggsdata[i];
1608 		agg = aggdata->dtada_desc;
1609 		rec = &agg->dtagd_rec[aggact];
1610 		act = rec->dtrd_action;
1611 		addr = aggdata->dtada_data + rec->dtrd_offset;
1612 		size = rec->dtrd_size;
1613 
1614 		assert(DTRACEACT_ISAGG(act));
1615 		normal = aggdata->dtada_normal;
1616 
1617 		if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1618 			return (-1);
1619 
1620 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1621 		    DTRACE_BUFDATA_AGGVAL) < 0)
1622 			return (-1);
1623 
1624 		if (!pd->dtpa_allunprint)
1625 			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1626 	}
1627 
1628 	if (dt_printf(dtp, fp, "\n") < 0)
1629 		return (-1);
1630 
1631 	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1632 	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1633 		return (-1);
1634 
1635 	return (0);
1636 }
1637 
1638 int
1639 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1640 {
1641 	dt_print_aggdata_t *pd = arg;
1642 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1643 	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1644 
1645 	if (pd->dtpa_allunprint) {
1646 		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1647 			return (0);
1648 	} else {
1649 		/*
1650 		 * If we're not printing all unprinted aggregations, then the
1651 		 * aggregation variable ID denotes a specific aggregation
1652 		 * variable that we should print -- skip any other aggregations
1653 		 * that we encounter.
1654 		 */
1655 		if (agg->dtagd_nrecs == 0)
1656 			return (0);
1657 
1658 		if (aggvarid != agg->dtagd_varid)
1659 			return (0);
1660 	}
1661 
1662 	return (dt_print_aggs(&aggdata, 1, arg));
1663 }
1664 
1665 int
1666 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1667     const char *option, const char *value)
1668 {
1669 	int len, rval;
1670 	char *msg;
1671 	const char *errstr;
1672 	dtrace_setoptdata_t optdata;
1673 
1674 	bzero(&optdata, sizeof (optdata));
1675 	(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1676 
1677 	if (dtrace_setopt(dtp, option, value) == 0) {
1678 		(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1679 		optdata.dtsda_probe = data;
1680 		optdata.dtsda_option = option;
1681 		optdata.dtsda_handle = dtp;
1682 
1683 		if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1684 			return (rval);
1685 
1686 		return (0);
1687 	}
1688 
1689 	errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
1690 	len = strlen(option) + strlen(value) + strlen(errstr) + 80;
1691 	msg = alloca(len);
1692 
1693 	(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
1694 	    option, value, errstr);
1695 
1696 	if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
1697 		return (0);
1698 
1699 	return (rval);
1700 }
1701 
1702 static int
1703 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
1704     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
1705 {
1706 	dtrace_epid_t id;
1707 	size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
1708 	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
1709 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1710 	int rval, i, n;
1711 	dtrace_epid_t last = DTRACE_EPIDNONE;
1712 	uint64_t tracememsize = 0;
1713 	dtrace_probedata_t data;
1714 	uint64_t drops;
1715 	caddr_t addr;
1716 
1717 	bzero(&data, sizeof (data));
1718 	data.dtpda_handle = dtp;
1719 	data.dtpda_cpu = cpu;
1720 
1721 again:
1722 	for (offs = start; offs < end; ) {
1723 		dtrace_eprobedesc_t *epd;
1724 
1725 		/*
1726 		 * We're guaranteed to have an ID.
1727 		 */
1728 		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
1729 
1730 		if (id == DTRACE_EPIDNONE) {
1731 			/*
1732 			 * This is filler to assure proper alignment of the
1733 			 * next record; we simply ignore it.
1734 			 */
1735 			offs += sizeof (id);
1736 			continue;
1737 		}
1738 
1739 		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
1740 		    &data.dtpda_pdesc)) != 0)
1741 			return (rval);
1742 
1743 		epd = data.dtpda_edesc;
1744 		data.dtpda_data = buf->dtbd_data + offs;
1745 
1746 		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
1747 			rval = dt_handle(dtp, &data);
1748 
1749 			if (rval == DTRACE_CONSUME_NEXT)
1750 				goto nextepid;
1751 
1752 			if (rval == DTRACE_CONSUME_ERROR)
1753 				return (-1);
1754 		}
1755 
1756 		if (flow)
1757 			(void) dt_flowindent(dtp, &data, last, buf, offs);
1758 
1759 		rval = (*efunc)(&data, arg);
1760 
1761 		if (flow) {
1762 			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
1763 				data.dtpda_indent += 2;
1764 		}
1765 
1766 		if (rval == DTRACE_CONSUME_NEXT)
1767 			goto nextepid;
1768 
1769 		if (rval == DTRACE_CONSUME_ABORT)
1770 			return (dt_set_errno(dtp, EDT_DIRABORT));
1771 
1772 		if (rval != DTRACE_CONSUME_THIS)
1773 			return (dt_set_errno(dtp, EDT_BADRVAL));
1774 
1775 		for (i = 0; i < epd->dtepd_nrecs; i++) {
1776 			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
1777 			dtrace_actkind_t act = rec->dtrd_action;
1778 
1779 			data.dtpda_data = buf->dtbd_data + offs +
1780 			    rec->dtrd_offset;
1781 			addr = data.dtpda_data;
1782 
1783 			if (act == DTRACEACT_LIBACT) {
1784 				uint64_t arg = rec->dtrd_arg;
1785 				dtrace_aggvarid_t id;
1786 
1787 				switch (arg) {
1788 				case DT_ACT_CLEAR:
1789 					/* LINTED - alignment */
1790 					id = *((dtrace_aggvarid_t *)addr);
1791 					(void) dtrace_aggregate_walk(dtp,
1792 					    dt_clear_agg, &id);
1793 					continue;
1794 
1795 				case DT_ACT_DENORMALIZE:
1796 					/* LINTED - alignment */
1797 					id = *((dtrace_aggvarid_t *)addr);
1798 					(void) dtrace_aggregate_walk(dtp,
1799 					    dt_denormalize_agg, &id);
1800 					continue;
1801 
1802 				case DT_ACT_FTRUNCATE:
1803 					if (fp == NULL)
1804 						continue;
1805 
1806 					(void) fflush(fp);
1807 					(void) ftruncate(fileno(fp), 0);
1808 					(void) fseeko(fp, 0, SEEK_SET);
1809 					continue;
1810 
1811 				case DT_ACT_NORMALIZE:
1812 					if (i == epd->dtepd_nrecs - 1)
1813 						return (dt_set_errno(dtp,
1814 						    EDT_BADNORMAL));
1815 
1816 					if (dt_normalize(dtp,
1817 					    buf->dtbd_data + offs, rec) != 0)
1818 						return (-1);
1819 
1820 					i++;
1821 					continue;
1822 
1823 				case DT_ACT_SETOPT: {
1824 					uint64_t *opts = dtp->dt_options;
1825 					dtrace_recdesc_t *valrec;
1826 					uint32_t valsize;
1827 					caddr_t val;
1828 					int rv;
1829 
1830 					if (i == epd->dtepd_nrecs - 1) {
1831 						return (dt_set_errno(dtp,
1832 						    EDT_BADSETOPT));
1833 					}
1834 
1835 					valrec = &epd->dtepd_rec[++i];
1836 					valsize = valrec->dtrd_size;
1837 
1838 					if (valrec->dtrd_action != act ||
1839 					    valrec->dtrd_arg != arg) {
1840 						return (dt_set_errno(dtp,
1841 						    EDT_BADSETOPT));
1842 					}
1843 
1844 					if (valsize > sizeof (uint64_t)) {
1845 						val = buf->dtbd_data + offs +
1846 						    valrec->dtrd_offset;
1847 					} else {
1848 						val = "1";
1849 					}
1850 
1851 					rv = dt_setopt(dtp, &data, addr, val);
1852 
1853 					if (rv != 0)
1854 						return (-1);
1855 
1856 					flow = (opts[DTRACEOPT_FLOWINDENT] !=
1857 					    DTRACEOPT_UNSET);
1858 					quiet = (opts[DTRACEOPT_QUIET] !=
1859 					    DTRACEOPT_UNSET);
1860 
1861 					continue;
1862 				}
1863 
1864 				case DT_ACT_TRUNC:
1865 					if (i == epd->dtepd_nrecs - 1)
1866 						return (dt_set_errno(dtp,
1867 						    EDT_BADTRUNC));
1868 
1869 					if (dt_trunc(dtp,
1870 					    buf->dtbd_data + offs, rec) != 0)
1871 						return (-1);
1872 
1873 					i++;
1874 					continue;
1875 
1876 				default:
1877 					continue;
1878 				}
1879 			}
1880 
1881 			if (act == DTRACEACT_TRACEMEM_DYNSIZE &&
1882 			    rec->dtrd_size == sizeof (uint64_t)) {
1883 				/* LINTED - alignment */
1884 				tracememsize = *((unsigned long long *)addr);
1885 				continue;
1886 			}
1887 
1888 			rval = (*rfunc)(&data, rec, arg);
1889 
1890 			if (rval == DTRACE_CONSUME_NEXT)
1891 				continue;
1892 
1893 			if (rval == DTRACE_CONSUME_ABORT)
1894 				return (dt_set_errno(dtp, EDT_DIRABORT));
1895 
1896 			if (rval != DTRACE_CONSUME_THIS)
1897 				return (dt_set_errno(dtp, EDT_BADRVAL));
1898 
1899 			if (act == DTRACEACT_STACK) {
1900 				int depth = rec->dtrd_arg;
1901 
1902 				if (dt_print_stack(dtp, fp, NULL, addr, depth,
1903 				    rec->dtrd_size / depth) < 0)
1904 					return (-1);
1905 				goto nextrec;
1906 			}
1907 
1908 			if (act == DTRACEACT_USTACK ||
1909 			    act == DTRACEACT_JSTACK) {
1910 				if (dt_print_ustack(dtp, fp, NULL,
1911 				    addr, rec->dtrd_arg) < 0)
1912 					return (-1);
1913 				goto nextrec;
1914 			}
1915 
1916 			if (act == DTRACEACT_SYM) {
1917 				if (dt_print_sym(dtp, fp, NULL, addr) < 0)
1918 					return (-1);
1919 				goto nextrec;
1920 			}
1921 
1922 			if (act == DTRACEACT_MOD) {
1923 				if (dt_print_mod(dtp, fp, NULL, addr) < 0)
1924 					return (-1);
1925 				goto nextrec;
1926 			}
1927 
1928 			if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
1929 				if (dt_print_usym(dtp, fp, addr, act) < 0)
1930 					return (-1);
1931 				goto nextrec;
1932 			}
1933 
1934 			if (act == DTRACEACT_UMOD) {
1935 				if (dt_print_umod(dtp, fp, NULL, addr) < 0)
1936 					return (-1);
1937 				goto nextrec;
1938 			}
1939 
1940 			if (DTRACEACT_ISPRINTFLIKE(act)) {
1941 				void *fmtdata;
1942 				int (*func)(dtrace_hdl_t *, FILE *, void *,
1943 				    const dtrace_probedata_t *,
1944 				    const dtrace_recdesc_t *, uint_t,
1945 				    const void *buf, size_t);
1946 
1947 				if ((fmtdata = dt_format_lookup(dtp,
1948 				    rec->dtrd_format)) == NULL)
1949 					goto nofmt;
1950 
1951 				switch (act) {
1952 				case DTRACEACT_PRINTF:
1953 					func = dtrace_fprintf;
1954 					break;
1955 				case DTRACEACT_PRINTA:
1956 					func = dtrace_fprinta;
1957 					break;
1958 				case DTRACEACT_SYSTEM:
1959 					func = dtrace_system;
1960 					break;
1961 				case DTRACEACT_FREOPEN:
1962 					func = dtrace_freopen;
1963 					break;
1964 				}
1965 
1966 				n = (*func)(dtp, fp, fmtdata, &data,
1967 				    rec, epd->dtepd_nrecs - i,
1968 				    (uchar_t *)buf->dtbd_data + offs,
1969 				    buf->dtbd_size - offs);
1970 
1971 				if (n < 0)
1972 					return (-1); /* errno is set for us */
1973 
1974 				if (n > 0)
1975 					i += n - 1;
1976 				goto nextrec;
1977 			}
1978 
1979 			/*
1980 			 * If this is a DIF expression, and the record has a
1981 			 * format set, this indicates we have a CTF type name
1982 			 * associated with the data and we should try to print
1983 			 * it out by type.
1984 			 */
1985 			if (act == DTRACEACT_DIFEXPR) {
1986 				const char *strdata = dt_strdata_lookup(dtp,
1987 				    rec->dtrd_format);
1988 				if (strdata != NULL) {
1989 					n = dtrace_print(dtp, fp, strdata,
1990 					    addr, rec->dtrd_size);
1991 
1992 					/*
1993 					 * dtrace_print() will return -1 on
1994 					 * error, or return the number of bytes
1995 					 * consumed.  It will return 0 if the
1996 					 * type couldn't be determined, and we
1997 					 * should fall through to the normal
1998 					 * trace method.
1999 					 */
2000 					if (n < 0)
2001 						return (-1);
2002 
2003 					if (n > 0)
2004 						goto nextrec;
2005 				}
2006 			}
2007 
2008 nofmt:
2009 			if (act == DTRACEACT_PRINTA) {
2010 				dt_print_aggdata_t pd;
2011 				dtrace_aggvarid_t *aggvars;
2012 				int j, naggvars = 0;
2013 				size_t size = ((epd->dtepd_nrecs - i) *
2014 				    sizeof (dtrace_aggvarid_t));
2015 
2016 				if ((aggvars = dt_alloc(dtp, size)) == NULL)
2017 					return (-1);
2018 
2019 				/*
2020 				 * This might be a printa() with multiple
2021 				 * aggregation variables.  We need to scan
2022 				 * forward through the records until we find
2023 				 * a record from a different statement.
2024 				 */
2025 				for (j = i; j < epd->dtepd_nrecs; j++) {
2026 					dtrace_recdesc_t *nrec;
2027 					caddr_t naddr;
2028 
2029 					nrec = &epd->dtepd_rec[j];
2030 
2031 					if (nrec->dtrd_uarg != rec->dtrd_uarg)
2032 						break;
2033 
2034 					if (nrec->dtrd_action != act) {
2035 						return (dt_set_errno(dtp,
2036 						    EDT_BADAGG));
2037 					}
2038 
2039 					naddr = buf->dtbd_data + offs +
2040 					    nrec->dtrd_offset;
2041 
2042 					aggvars[naggvars++] =
2043 					    /* LINTED - alignment */
2044 					    *((dtrace_aggvarid_t *)naddr);
2045 				}
2046 
2047 				i = j - 1;
2048 				bzero(&pd, sizeof (pd));
2049 				pd.dtpa_dtp = dtp;
2050 				pd.dtpa_fp = fp;
2051 
2052 				assert(naggvars >= 1);
2053 
2054 				if (naggvars == 1) {
2055 					pd.dtpa_id = aggvars[0];
2056 					dt_free(dtp, aggvars);
2057 
2058 					if (dt_printf(dtp, fp, "\n") < 0 ||
2059 					    dtrace_aggregate_walk_sorted(dtp,
2060 					    dt_print_agg, &pd) < 0)
2061 						return (-1);
2062 					goto nextrec;
2063 				}
2064 
2065 				if (dt_printf(dtp, fp, "\n") < 0 ||
2066 				    dtrace_aggregate_walk_joined(dtp, aggvars,
2067 				    naggvars, dt_print_aggs, &pd) < 0) {
2068 					dt_free(dtp, aggvars);
2069 					return (-1);
2070 				}
2071 
2072 				dt_free(dtp, aggvars);
2073 				goto nextrec;
2074 			}
2075 
2076 			if (act == DTRACEACT_TRACEMEM) {
2077 				if (tracememsize == 0 ||
2078 				    tracememsize > rec->dtrd_size) {
2079 					tracememsize = rec->dtrd_size;
2080 				}
2081 
2082 				n = dt_print_bytes(dtp, fp, addr,
2083 				    tracememsize, 33, quiet, 1);
2084 
2085 				tracememsize = 0;
2086 
2087 				if (n < 0)
2088 					return (-1);
2089 
2090 				goto nextrec;
2091 			}
2092 
2093 			switch (rec->dtrd_size) {
2094 			case sizeof (uint64_t):
2095 				n = dt_printf(dtp, fp,
2096 				    quiet ? "%lld" : " %16lld",
2097 				    /* LINTED - alignment */
2098 				    *((unsigned long long *)addr));
2099 				break;
2100 			case sizeof (uint32_t):
2101 				n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2102 				    /* LINTED - alignment */
2103 				    *((uint32_t *)addr));
2104 				break;
2105 			case sizeof (uint16_t):
2106 				n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2107 				    /* LINTED - alignment */
2108 				    *((uint16_t *)addr));
2109 				break;
2110 			case sizeof (uint8_t):
2111 				n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2112 				    *((uint8_t *)addr));
2113 				break;
2114 			default:
2115 				n = dt_print_bytes(dtp, fp, addr,
2116 				    rec->dtrd_size, 33, quiet, 0);
2117 				break;
2118 			}
2119 
2120 			if (n < 0)
2121 				return (-1); /* errno is set for us */
2122 
2123 nextrec:
2124 			if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2125 				return (-1); /* errno is set for us */
2126 		}
2127 
2128 		/*
2129 		 * Call the record callback with a NULL record to indicate
2130 		 * that we're done processing this EPID.
2131 		 */
2132 		rval = (*rfunc)(&data, NULL, arg);
2133 nextepid:
2134 		offs += epd->dtepd_size;
2135 		last = id;
2136 	}
2137 
2138 	if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
2139 		end = buf->dtbd_oldest;
2140 		start = 0;
2141 		goto again;
2142 	}
2143 
2144 	if ((drops = buf->dtbd_drops) == 0)
2145 		return (0);
2146 
2147 	/*
2148 	 * Explicitly zero the drops to prevent us from processing them again.
2149 	 */
2150 	buf->dtbd_drops = 0;
2151 
2152 	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2153 }
2154 
2155 typedef struct dt_begin {
2156 	dtrace_consume_probe_f *dtbgn_probefunc;
2157 	dtrace_consume_rec_f *dtbgn_recfunc;
2158 	void *dtbgn_arg;
2159 	dtrace_handle_err_f *dtbgn_errhdlr;
2160 	void *dtbgn_errarg;
2161 	int dtbgn_beginonly;
2162 } dt_begin_t;
2163 
2164 static int
2165 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
2166 {
2167 	dt_begin_t *begin = (dt_begin_t *)arg;
2168 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
2169 
2170 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2171 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2172 
2173 	if (begin->dtbgn_beginonly) {
2174 		if (!(r1 && r2))
2175 			return (DTRACE_CONSUME_NEXT);
2176 	} else {
2177 		if (r1 && r2)
2178 			return (DTRACE_CONSUME_NEXT);
2179 	}
2180 
2181 	/*
2182 	 * We have a record that we're interested in.  Now call the underlying
2183 	 * probe function...
2184 	 */
2185 	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2186 }
2187 
2188 static int
2189 dt_consume_begin_record(const dtrace_probedata_t *data,
2190     const dtrace_recdesc_t *rec, void *arg)
2191 {
2192 	dt_begin_t *begin = (dt_begin_t *)arg;
2193 
2194 	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2195 }
2196 
2197 static int
2198 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2199 {
2200 	dt_begin_t *begin = (dt_begin_t *)arg;
2201 	dtrace_probedesc_t *pd = data->dteda_pdesc;
2202 
2203 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2204 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2205 
2206 	if (begin->dtbgn_beginonly) {
2207 		if (!(r1 && r2))
2208 			return (DTRACE_HANDLE_OK);
2209 	} else {
2210 		if (r1 && r2)
2211 			return (DTRACE_HANDLE_OK);
2212 	}
2213 
2214 	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2215 }
2216 
2217 static int
2218 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2219     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2220 {
2221 	/*
2222 	 * There's this idea that the BEGIN probe should be processed before
2223 	 * everything else, and that the END probe should be processed after
2224 	 * anything else.  In the common case, this is pretty easy to deal
2225 	 * with.  However, a situation may arise where the BEGIN enabling and
2226 	 * END enabling are on the same CPU, and some enabling in the middle
2227 	 * occurred on a different CPU.  To deal with this (blech!) we need to
2228 	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
2229 	 * then set it aside.  We will then process every other CPU, and then
2230 	 * we'll return to the BEGIN CPU and process the rest of the data
2231 	 * (which will inevitably include the END probe, if any).  Making this
2232 	 * even more complicated (!) is the library's ERROR enabling.  Because
2233 	 * this enabling is processed before we even get into the consume call
2234 	 * back, any ERROR firing would result in the library's ERROR enabling
2235 	 * being processed twice -- once in our first pass (for BEGIN probes),
2236 	 * and again in our second pass (for everything but BEGIN probes).  To
2237 	 * deal with this, we interpose on the ERROR handler to assure that we
2238 	 * only process ERROR enablings induced by BEGIN enablings in the
2239 	 * first pass, and that we only process ERROR enablings _not_ induced
2240 	 * by BEGIN enablings in the second pass.
2241 	 */
2242 	dt_begin_t begin;
2243 	processorid_t cpu = dtp->dt_beganon;
2244 	dtrace_bufdesc_t nbuf;
2245 	int rval, i;
2246 	static int max_ncpus;
2247 	dtrace_optval_t size;
2248 
2249 	dtp->dt_beganon = -1;
2250 
2251 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2252 		/*
2253 		 * We really don't expect this to fail, but it is at least
2254 		 * technically possible for this to fail with ENOENT.  In this
2255 		 * case, we just drive on...
2256 		 */
2257 		if (errno == ENOENT)
2258 			return (0);
2259 
2260 		return (dt_set_errno(dtp, errno));
2261 	}
2262 
2263 	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2264 		/*
2265 		 * This is the simple case.  We're either not stopped, or if
2266 		 * we are, we actually processed any END probes on another
2267 		 * CPU.  We can simply consume this buffer and return.
2268 		 */
2269 		return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2270 	}
2271 
2272 	begin.dtbgn_probefunc = pf;
2273 	begin.dtbgn_recfunc = rf;
2274 	begin.dtbgn_arg = arg;
2275 	begin.dtbgn_beginonly = 1;
2276 
2277 	/*
2278 	 * We need to interpose on the ERROR handler to be sure that we
2279 	 * only process ERRORs induced by BEGIN.
2280 	 */
2281 	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2282 	begin.dtbgn_errarg = dtp->dt_errarg;
2283 	dtp->dt_errhdlr = dt_consume_begin_error;
2284 	dtp->dt_errarg = &begin;
2285 
2286 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2287 	    dt_consume_begin_record, &begin);
2288 
2289 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2290 	dtp->dt_errarg = begin.dtbgn_errarg;
2291 
2292 	if (rval != 0)
2293 		return (rval);
2294 
2295 	/*
2296 	 * Now allocate a new buffer.  We'll use this to deal with every other
2297 	 * CPU.
2298 	 */
2299 	bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2300 	(void) dtrace_getopt(dtp, "bufsize", &size);
2301 	if ((nbuf.dtbd_data = malloc(size)) == NULL)
2302 		return (dt_set_errno(dtp, EDT_NOMEM));
2303 
2304 	if (max_ncpus == 0)
2305 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2306 
2307 	for (i = 0; i < max_ncpus; i++) {
2308 		nbuf.dtbd_cpu = i;
2309 
2310 		if (i == cpu)
2311 			continue;
2312 
2313 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2314 			/*
2315 			 * If we failed with ENOENT, it may be because the
2316 			 * CPU was unconfigured -- this is okay.  Any other
2317 			 * error, however, is unexpected.
2318 			 */
2319 			if (errno == ENOENT)
2320 				continue;
2321 
2322 			free(nbuf.dtbd_data);
2323 
2324 			return (dt_set_errno(dtp, errno));
2325 		}
2326 
2327 		if ((rval = dt_consume_cpu(dtp, fp,
2328 		    i, &nbuf, pf, rf, arg)) != 0) {
2329 			free(nbuf.dtbd_data);
2330 			return (rval);
2331 		}
2332 	}
2333 
2334 	free(nbuf.dtbd_data);
2335 
2336 	/*
2337 	 * Okay -- we're done with the other buffers.  Now we want to
2338 	 * reconsume the first buffer -- but this time we're looking for
2339 	 * everything _but_ BEGIN.  And of course, in order to only consume
2340 	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2341 	 * ERROR interposition function...
2342 	 */
2343 	begin.dtbgn_beginonly = 0;
2344 
2345 	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2346 	assert(begin.dtbgn_errarg == dtp->dt_errarg);
2347 	dtp->dt_errhdlr = dt_consume_begin_error;
2348 	dtp->dt_errarg = &begin;
2349 
2350 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2351 	    dt_consume_begin_record, &begin);
2352 
2353 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2354 	dtp->dt_errarg = begin.dtbgn_errarg;
2355 
2356 	return (rval);
2357 }
2358 
2359 int
2360 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2361     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2362 {
2363 	dtrace_bufdesc_t *buf = &dtp->dt_buf;
2364 	dtrace_optval_t size;
2365 	static int max_ncpus;
2366 	int i, rval;
2367 	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2368 	hrtime_t now = gethrtime();
2369 
2370 	if (dtp->dt_lastswitch != 0) {
2371 		if (now - dtp->dt_lastswitch < interval)
2372 			return (0);
2373 
2374 		dtp->dt_lastswitch += interval;
2375 	} else {
2376 		dtp->dt_lastswitch = now;
2377 	}
2378 
2379 	if (!dtp->dt_active)
2380 		return (dt_set_errno(dtp, EINVAL));
2381 
2382 	if (max_ncpus == 0)
2383 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2384 
2385 	if (pf == NULL)
2386 		pf = (dtrace_consume_probe_f *)dt_nullprobe;
2387 
2388 	if (rf == NULL)
2389 		rf = (dtrace_consume_rec_f *)dt_nullrec;
2390 
2391 	if (buf->dtbd_data == NULL) {
2392 		(void) dtrace_getopt(dtp, "bufsize", &size);
2393 		if ((buf->dtbd_data = malloc(size)) == NULL)
2394 			return (dt_set_errno(dtp, EDT_NOMEM));
2395 
2396 		buf->dtbd_size = size;
2397 	}
2398 
2399 	/*
2400 	 * If we have just begun, we want to first process the CPU that
2401 	 * executed the BEGIN probe (if any).
2402 	 */
2403 	if (dtp->dt_active && dtp->dt_beganon != -1) {
2404 		buf->dtbd_cpu = dtp->dt_beganon;
2405 		if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2406 			return (rval);
2407 	}
2408 
2409 	for (i = 0; i < max_ncpus; i++) {
2410 		buf->dtbd_cpu = i;
2411 
2412 		/*
2413 		 * If we have stopped, we want to process the CPU on which the
2414 		 * END probe was processed only _after_ we have processed
2415 		 * everything else.
2416 		 */
2417 		if (dtp->dt_stopped && (i == dtp->dt_endedon))
2418 			continue;
2419 
2420 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2421 			/*
2422 			 * If we failed with ENOENT, it may be because the
2423 			 * CPU was unconfigured -- this is okay.  Any other
2424 			 * error, however, is unexpected.
2425 			 */
2426 			if (errno == ENOENT)
2427 				continue;
2428 
2429 			return (dt_set_errno(dtp, errno));
2430 		}
2431 
2432 		if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2433 			return (rval);
2434 	}
2435 
2436 	if (!dtp->dt_stopped)
2437 		return (0);
2438 
2439 	buf->dtbd_cpu = dtp->dt_endedon;
2440 
2441 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2442 		/*
2443 		 * This _really_ shouldn't fail, but it is strictly speaking
2444 		 * possible for this to return ENOENT if the CPU that called
2445 		 * the END enabling somehow managed to become unconfigured.
2446 		 * It's unclear how the user can possibly expect anything
2447 		 * rational to happen in this case -- the state has been thrown
2448 		 * out along with the unconfigured CPU -- so we'll just drive
2449 		 * on...
2450 		 */
2451 		if (errno == ENOENT)
2452 			return (0);
2453 
2454 		return (dt_set_errno(dtp, errno));
2455 	}
2456 
2457 	return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2458 }
2459